From ca2a067469ca8b4b4a4f80fc1b948919257539a9 Mon Sep 17 00:00:00 2001 From: Zach Battleman Date: Wed, 17 Jul 2024 15:14:34 -0700 Subject: [PATCH] brw: Initial bits of BFN support v2 (idr): So much rebasing. Deleted a bunch of code that we're not going to need yet. v3 (Ken): bfn inst encoding fix v4 (idr): Add BFN to brw_get_lowered_simd_width. Reviewed-by: Matt Turner Part-of: --- src/intel/compiler/brw/brw_analysis_performance.cpp | 1 + src/intel/compiler/brw/brw_disasm.c | 11 +++++++++++ src/intel/compiler/brw/brw_eu.c | 1 + src/intel/compiler/brw/brw_eu.h | 4 ++++ src/intel/compiler/brw/brw_eu_defines.h | 1 + src/intel/compiler/brw/brw_eu_emit.c | 10 ++++++++++ src/intel/compiler/brw/brw_eu_inst.h | 2 ++ src/intel/compiler/brw/brw_generator.cpp | 3 +++ src/intel/compiler/brw/brw_lower_simd_width.cpp | 1 + 9 files changed, 34 insertions(+) diff --git a/src/intel/compiler/brw/brw_analysis_performance.cpp b/src/intel/compiler/brw/brw_analysis_performance.cpp index 3c130055f92..f405a06a841 100644 --- a/src/intel/compiler/brw/brw_analysis_performance.cpp +++ b/src/intel/compiler/brw/brw_analysis_performance.cpp @@ -371,6 +371,7 @@ namespace { case BRW_OPCODE_BFE: case BRW_OPCODE_BFI2: case BRW_OPCODE_CSEL: + case BRW_OPCODE_BFN: if (devinfo->ver >= 11) return calculate_desc(info, EU_UNIT_FPU, 0, 2, 1, 0, 2, 0, 10, 6 /* XXX */, 14 /* XXX */, 0, 0); diff --git a/src/intel/compiler/brw/brw_disasm.c b/src/intel/compiler/brw/brw_disasm.c index e0709bec5c0..f3e9b9a326b 100644 --- a/src/intel/compiler/brw/brw_disasm.c +++ b/src/intel/compiler/brw/brw_disasm.c @@ -2035,6 +2035,13 @@ brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa, err |= print_opcode(file, isa, opcode); + if (opcode == BRW_OPCODE_BFN) { + unsigned char table_byte = 0; + table_byte |= (inst->data[1] >> (84 - 64)) & 0xF; + table_byte |= ((inst->data[1] >> (92 - 64)) & 0xF) << 4; + format(file, "[0x%x]", table_byte); + } + if (!is_send(opcode)) err |= control(file, "saturate", saturate, brw_eu_inst_saturate(devinfo, inst), NULL); @@ -2062,6 +2069,10 @@ brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa, format(file, "x%d", rcount); } else if (!is_send(opcode) && + /* BFN has data in the place of the conditional modifier which + * is not a conditional modifer + */ + opcode != BRW_OPCODE_BFN && (devinfo->ver < 12 || brw_eu_inst_src0_reg_file(devinfo, inst) != IMM || brw_type_size_bytes(brw_eu_inst_src0_type(devinfo, inst)) < 8)) { diff --git a/src/intel/compiler/brw/brw_eu.c b/src/intel/compiler/brw/brw_eu.c index af62c41a0c6..24f29795adc 100644 --- a/src/intel/compiler/brw/brw_eu.c +++ b/src/intel/compiler/brw/brw_eu.c @@ -573,6 +573,7 @@ static const struct opcode_desc opcode_descs[] = { { BRW_OPCODE_OR, 102, "or", 2, 1, GFX_GE(GFX12) }, { BRW_OPCODE_XOR, 7, "xor", 2, 1, GFX_LT(GFX12) }, { BRW_OPCODE_XOR, 103, "xor", 2, 1, GFX_GE(GFX12) }, + { BRW_OPCODE_BFN, 107, "bfn", 3, 1, GFX_GE(GFX12) }, { BRW_OPCODE_SHR, 8, "shr", 2, 1, GFX_LT(GFX12) }, { BRW_OPCODE_SHR, 104, "shr", 2, 1, GFX_GE(GFX12) }, { BRW_OPCODE_SHL, 9, "shl", 2, 1, GFX_LT(GFX12) }, diff --git a/src/intel/compiler/brw/brw_eu.h b/src/intel/compiler/brw/brw_eu.h index 92c3c50e5f4..d060a7f3654 100644 --- a/src/intel/compiler/brw/brw_eu.h +++ b/src/intel/compiler/brw/brw_eu.h @@ -1499,6 +1499,10 @@ brw_eu_inst *brw_IF(struct brw_codegen *p, unsigned execute_size); void brw_ELSE(struct brw_codegen *p); void brw_ENDIF(struct brw_codegen *p); +brw_eu_inst *brw_BFN(struct brw_codegen *p, struct brw_reg dest, + struct brw_reg src0, struct brw_reg src1, + struct brw_reg src2, struct brw_reg table_byte); + /* DO/WHILE loops: */ brw_eu_inst *brw_DO(struct brw_codegen *p, unsigned execute_size); diff --git a/src/intel/compiler/brw/brw_eu_defines.h b/src/intel/compiler/brw/brw_eu_defines.h index e40f2339e6b..67a4d1b18ff 100644 --- a/src/intel/compiler/brw/brw_eu_defines.h +++ b/src/intel/compiler/brw/brw_eu_defines.h @@ -165,6 +165,7 @@ enum ENUM_PACKED opcode { BRW_OPCODE_AND, BRW_OPCODE_OR, BRW_OPCODE_XOR, + BRW_OPCODE_BFN, BRW_OPCODE_SHR, BRW_OPCODE_SHL, BRW_OPCODE_SMOV, diff --git a/src/intel/compiler/brw/brw_eu_emit.c b/src/intel/compiler/brw/brw_eu_emit.c index cd2cb444a61..cf64fcdfebe 100644 --- a/src/intel/compiler/brw/brw_eu_emit.c +++ b/src/intel/compiler/brw/brw_eu_emit.c @@ -1046,6 +1046,16 @@ void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func) * Comparisons, if/else/endif */ +brw_eu_inst * +brw_BFN(struct brw_codegen *p, struct brw_reg dest, + struct brw_reg src0, struct brw_reg src1, struct brw_reg src2, + struct brw_reg table_byte) +{ + brw_eu_inst *inst = brw_alu3(p, BRW_OPCODE_BFN, dest, src0, src1, src2); + brw_eu_inst_set_boolean_func_ctrl(p->devinfo, inst, table_byte.ud); + return inst; +} + brw_eu_inst * brw_JMPI(struct brw_codegen *p, struct brw_reg index, unsigned predicate_control) diff --git a/src/intel/compiler/brw/brw_eu_inst.h b/src/intel/compiler/brw/brw_eu_inst.h index e5a49b36460..c61693c209d 100644 --- a/src/intel/compiler/brw/brw_eu_inst.h +++ b/src/intel/compiler/brw/brw_eu_inst.h @@ -520,6 +520,8 @@ FF(3src_a1_src0_reg_file, /* 9+ */ 43, 43, /* 12+ */ 46, 66, .grf_or_imm = F(3src_a1_src2_is_imm, /* 9+ */ -1, -1, /* 12+ */ 47, 47) F(3src_a1_src0_is_imm, /* 9+ */ -1, -1, /* 12+ */ 46, 46) +FDC(boolean_func_ctrl, /* 9+ */ -1, -1, /* 12+ */ 95, 92, 87, 84, devinfo->verx10 >= 125) + /* Source Modifier fields same in align16 */ FFC(3src_a1_dst_reg_file, /* 9+ */ 36, 36, /* 12+ */ 50, 50, devinfo->ver >= 10, .grf_or_acc = true) FC(3src_a1_exec_type, /* 9+ */ 35, 35, /* 12+ */ 39, 39, devinfo->ver >= 10) diff --git a/src/intel/compiler/brw/brw_generator.cpp b/src/intel/compiler/brw/brw_generator.cpp index 3bfcd8525f5..20799bf7425 100644 --- a/src/intel/compiler/brw/brw_generator.cpp +++ b/src/intel/compiler/brw/brw_generator.cpp @@ -987,6 +987,9 @@ brw_generator::generate_code(const brw_shader &s, case BRW_OPCODE_NOT: brw_NOT(p, dst, src[0]); break; + case BRW_OPCODE_BFN: + brw_BFN(p, dst, src[0], src[1], src[2], src[3]); + break; case BRW_OPCODE_ASR: brw_ASR(p, dst, src[0], src[1]); break; diff --git a/src/intel/compiler/brw/brw_lower_simd_width.cpp b/src/intel/compiler/brw/brw_lower_simd_width.cpp index 3a7c8b2bb92..ed1dbb2f425 100644 --- a/src/intel/compiler/brw/brw_lower_simd_width.cpp +++ b/src/intel/compiler/brw/brw_lower_simd_width.cpp @@ -292,6 +292,7 @@ brw_get_lowered_simd_width(const brw_shader *shader, const brw_inst *inst) case BRW_OPCODE_CMP: case BRW_OPCODE_BFI1: case BRW_OPCODE_BFI2: + case BRW_OPCODE_BFN: return get_fpu_lowered_simd_width(shader, inst); case SHADER_OPCODE_RCP: