From 130fbda71b9281c373cacf046facd224564ca37b Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sat, 27 May 2023 15:31:07 +0200 Subject: [PATCH] radv: set has_bit_test for aco Foz-DB Navi21: Totals from 15285 (11.52% of 132657) affected shaders: VGPRs: 1019136 -> 1019000 (-0.01%); split: -0.02%, +0.01% SpillSGPRs: 10889 -> 10909 (+0.18%) SpillVGPRs: 901 -> 914 (+1.44%); split: -0.89%, +2.33% CodeSize: 103578640 -> 103523220 (-0.05%); split: -0.08%, +0.03% MaxWaves: 259782 -> 259820 (+0.01%) Instrs: 19247383 -> 19223764 (-0.12%); split: -0.15%, +0.02% Latency: 323877613 -> 323684655 (-0.06%); split: -0.10%, +0.04% InvThroughput: 62505295 -> 62386541 (-0.19%); split: -0.21%, +0.02% VClause: 366162 -> 366136 (-0.01%); split: -0.03%, +0.02% SClause: 786505 -> 785527 (-0.12%); split: -0.22%, +0.10% Copies: 1348920 -> 1349209 (+0.02%); split: -0.26%, +0.29% Branches: 456331 -> 456324 (-0.00%); split: -0.01%, +0.00% PreSGPRs: 849542 -> 849402 (-0.02%); split: -0.02%, +0.01% PreVGPRs: 925300 -> 924678 (-0.07%) Reviewed-by: Rhys Perry Part-of: --- src/amd/vulkan/radv_pipeline.c | 2 ++ src/amd/vulkan/radv_shader.c | 1 + 2 files changed, 3 insertions(+) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index d2583127f89..ef2a790de06 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -363,6 +363,8 @@ lower_bit_size_callback(const nir_instr *instr, void *_) case nir_op_ine: case nir_op_ult: case nir_op_uge: + case nir_op_bitz: + case nir_op_bitnz: return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32 : 0; default: return 0; diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 72d635eb4b1..a07e766e708 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -105,6 +105,7 @@ get_nir_options_for_stage(struct radv_physical_device *device, gl_shader_stage s .has_dot_2x16 = device->rad_info.has_accelerated_dot_product && device->rad_info.gfx_level < GFX11, .has_find_msb_rev = true, .has_pack_half_2x16_rtz = true, + .has_bit_test = !device->use_llvm, .has_fmulz = true, .max_unroll_iterations = 32, .max_unroll_iterations_aggressive = 128,