From 0cc7bf63b761a489af8861ebd32a53e8d229fd99 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 10 Oct 2022 13:35:01 -0700 Subject: [PATCH] nir: intel/compiler: Move ifind_msb lowering to NIR Unlike ufind_msb, ifind_msb is only defined in NIR for 32-bit values, so no @32 annotation is required. No shader-db or fossil-db changes on any Intel platform. Reviewed-by: Kenneth Graunke Part-of: --- src/compiler/nir/nir.h | 2 + src/compiler/nir/nir_opt_algebraic.py | 22 +++++++++++ src/intel/compiler/brw_compiler.c | 1 + src/intel/compiler/brw_fs_nir.cpp | 56 ++++++-------------------- src/intel/compiler/brw_vec4_nir.cpp | 57 +++++++-------------------- 5 files changed, 52 insertions(+), 86 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 37f8d68d45a..864d4ce4329 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3411,6 +3411,8 @@ typedef struct nir_shader_compiler_options { bool lower_ifind_msb; /** Lowers ifind_msb and ufind_msb to reverse variants */ bool lower_find_msb_to_reverse; + /** Lowers ifind_msb to uclz and logic ops*/ + bool lower_ifind_msb_to_uclz; /** Lowers find_lsb to ufind_msb and logic ops */ bool lower_find_lsb; bool lower_uadd_carry; diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 60957c8c5da..e8ec4918bd9 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -2013,6 +2013,28 @@ optimizations.extend([ ('ifind_msb_rev', 'value')), 'options->lower_find_msb_to_reverse'), + # uclz of an absolute value source almost always does the right thing. + # There are a couple problem values: + # + # * 0x80000000. Since abs(0x80000000) == 0x80000000, uclz returns 0. + # However, findMSB(int(0x80000000)) == 30. + # + # * 0xffffffff. Since abs(0xffffffff) == 1, uclz returns 31. Section 8.8 + # (Integer Functions) of the GLSL 4.50 spec says: + # + # For a value of zero or negative one, -1 will be returned. + # + # * Negative powers of two. uclz(abs(-(1<> 31)). + (('ifind_msb', 'value'), + ('isub', 31, ('uclz', ('ixor', 'value', ('ishr', 'value', 31)))), + 'options->lower_ifind_msb_to_uclz'), + (('ufind_msb', 'value'), ('bcsel', ('ige', ('ufind_msb_rev', 'value'), 0), ('isub', 31, ('ufind_msb_rev', 'value')), diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index 7ecf630be18..879f09c0bf6 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -189,6 +189,7 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo) nir_options->lower_rotate = devinfo->ver < 11; nir_options->lower_bitfield_reverse = devinfo->ver < 7; nir_options->lower_find_lsb = devinfo->ver < 7; + nir_options->lower_ifind_msb_to_uclz = devinfo->ver < 7; nir_options->has_iadd3 = devinfo->verx10 >= 125; nir_options->has_sdot_4x8 = devinfo->ver >= 12; diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 36bbef7e87c..0215dcde42f 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -613,38 +613,11 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, static void emit_find_msb_using_lzd(const fs_builder &bld, const fs_reg &result, - const fs_reg &src, - bool is_signed) + const fs_reg &src) { fs_inst *inst; fs_reg temp = src; - if (is_signed) { - /* LZD of an absolute value source almost always does the right - * thing. There are two problem values: - * - * * 0x80000000. Since abs(0x80000000) == 0x80000000, LZD returns - * 0. However, findMSB(int(0x80000000)) == 30. - * - * * 0xffffffff. Since abs(0xffffffff) == 1, LZD returns - * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: - * - * For a value of zero or negative one, -1 will be returned. - * - * * Negative powers of two. LZD(abs(-(1<dest.dest) < 64); - emit_find_msb_using_lzd(bld, result, op[0], false); + emit_find_msb_using_lzd(bld, result, op[0]); break; } @@ -1715,23 +1688,20 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, case nir_op_ifind_msb: { assert(nir_dest_bit_size(instr->dest.dest) < 64); + assert(devinfo->ver >= 7); - if (devinfo->ver < 7) { - emit_find_msb_using_lzd(bld, result, op[0], true); - } else { - bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]); + bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]); - /* FBH counts from the MSB side, while GLSL's findMSB() wants the - * count from the LSB side. If FBH didn't return an error - * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB - * count into an LSB count. - */ - bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ); + /* FBH counts from the MSB side, while GLSL's findMSB() wants the count + * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then + * subtract the result from 31 to convert the MSB count into an LSB + * count. + */ + bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ); - inst = bld.ADD(result, result, brw_imm_d(31)); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->src[0].negate = true; - } + inst = bld.ADD(result, result, brw_imm_d(31)); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->src[0].negate = true; break; } diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp index 5f402889c65..a490f010f07 100644 --- a/src/intel/compiler/brw_vec4_nir.cpp +++ b/src/intel/compiler/brw_vec4_nir.cpp @@ -832,38 +832,11 @@ vec4_visitor::optimize_predicate(nir_alu_instr *instr, static void emit_find_msb_using_lzd(const vec4_builder &bld, const dst_reg &dst, - const src_reg &src, - bool is_signed) + const src_reg &src) { vec4_instruction *inst; src_reg temp = src; - if (is_signed) { - /* LZD of an absolute value source almost always does the right - * thing. There are two problem values: - * - * * 0x80000000. Since abs(0x80000000) == 0x80000000, LZD returns - * 0. However, findMSB(int(0x80000000)) == 30. - * - * * 0xffffffff. Since abs(0xffffffff) == 1, LZD returns - * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: - * - * For a value of zero or negative one, -1 will be returned. - * - * * Negative powers of two. LZD(abs(-(1<dest.dest) < 64); - emit_find_msb_using_lzd(vec4_builder(this).at_end(), dst, op[0], false); + emit_find_msb_using_lzd(vec4_builder(this).at_end(), dst, op[0]); break; case nir_op_ifind_msb: { assert(nir_dest_bit_size(instr->dest.dest) < 64); + assert(devinfo->ver >= 7); + vec4_builder bld = vec4_builder(this).at_end(); src_reg src(dst); - if (devinfo->ver < 7) { - emit_find_msb_using_lzd(bld, dst, op[0], true); - } else { - emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0])); + emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0])); - /* FBH counts from the MSB side, while GLSL's findMSB() wants the - * count from the LSB side. If FBH didn't return an error - * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB - * count into an LSB count. - */ - bld.CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ); + /* FBH counts from the MSB side, while GLSL's findMSB() wants the count + * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then + * subtract the result from 31 to convert the MSB count into an LSB + * count. + */ + bld.CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ); - inst = bld.ADD(dst, src, brw_imm_d(31)); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->src[0].negate = true; - } + inst = bld.ADD(dst, src, brw_imm_d(31)); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->src[0].negate = true; break; }