diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c index 943f724e60f..203280570aa 100644 --- a/src/intel/compiler/brw_eu_validate.c +++ b/src/intel/compiler/brw_eu_validate.c @@ -289,6 +289,18 @@ sources_not_null(const struct gen_device_info *devinfo, return error_msg; } +static struct string +alignment_supported(const struct gen_device_info *devinfo, + const brw_inst *inst) +{ + struct string error_msg = { .str = NULL, .len = 0 }; + + ERROR_IF(devinfo->gen >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16, + "Align16 not supported"); + + return error_msg; +} + static bool inst_uses_src_acc(const struct gen_device_info *devinfo, const brw_inst *inst) { @@ -600,17 +612,31 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); struct string error_msg = { .str = NULL, .len = 0 }; + if (devinfo->gen >= 11) { + if (num_sources == 3) { + ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 || + brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1, + "Byte data type is not supported for src1/2 register regioning. This includes " + "byte broadcast as well."); + } + if (num_sources == 2) { + ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1, + "Byte data type is not supported for src1 register regioning. This includes " + "byte broadcast as well."); + } + } + if (num_sources == 3) - return (struct string){}; + return error_msg; if (inst_is_send(devinfo, inst)) - return (struct string){}; + return error_msg; if (exec_size == 1) - return (struct string){}; + return error_msg; if (desc->ndst == 0) - return (struct string){}; + return error_msg; /* The PRMs say: * @@ -635,12 +661,9 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf if (dst_type_is_byte) { if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) { - if (!inst_is_raw_move(devinfo, inst)) { + if (!inst_is_raw_move(devinfo, inst)) ERROR("Only raw MOV supports a packed-byte destination"); - return error_msg; - } else { - return (struct string){}; - } + return error_msg; } } @@ -1823,6 +1846,7 @@ brw_validate_instructions(const struct gen_device_info *devinfo, } else { CHECK(sources_not_null); CHECK(send_restrictions); + CHECK(alignment_supported); CHECK(general_restrictions_based_on_operand_types); CHECK(general_restrictions_on_region_parameters); CHECK(special_restrictions_for_mixed_float_mode); diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index 9655c2ef554..b781f1257ba 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -322,10 +322,11 @@ namespace brw { case SHADER_OPCODE_INT_REMAINDER: return emit(instruction(opcode, dispatch_width(), dst, fix_math_operand(src0), - fix_math_operand(src1))); + fix_math_operand(fix_byte_src(src1)))); default: - return emit(instruction(opcode, dispatch_width(), dst, src0, src1)); + return emit(instruction(opcode, dispatch_width(), dst, + src0, fix_byte_src(src1))); } } @@ -344,12 +345,12 @@ namespace brw { case BRW_OPCODE_LRP: return emit(instruction(opcode, dispatch_width(), dst, fix_3src_operand(src0), - fix_3src_operand(src1), - fix_3src_operand(src2))); + fix_3src_operand(fix_byte_src(src1)), + fix_3src_operand(fix_byte_src(src2)))); default: return emit(instruction(opcode, dispatch_width(), dst, - src0, src1, src2)); + src0, fix_byte_src(src1), fix_byte_src(src2))); } } @@ -399,8 +400,11 @@ namespace brw { { assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L); - return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), - fix_unsigned_negate(src1))); + /* In some cases we can't have bytes as operand for src1, so use the + * same type for both operand. + */ + return set_condmod(mod, SEL(dst, fix_unsigned_negate(fix_byte_src(src0)), + fix_unsigned_negate(fix_byte_src(src1)))); } /** @@ -657,8 +661,8 @@ namespace brw { emit(BRW_OPCODE_CSEL, retype(dst, BRW_REGISTER_TYPE_F), retype(src0, BRW_REGISTER_TYPE_F), - retype(src1, BRW_REGISTER_TYPE_F), - src2)); + retype(fix_byte_src(src1), BRW_REGISTER_TYPE_F), + fix_byte_src(src2))); } /** @@ -719,6 +723,22 @@ namespace brw { backend_shader *shader; + /** + * Byte sized operands are not supported for src1 on Gen11+. + */ + src_reg + fix_byte_src(const src_reg &src) const + { + if ((shader->devinfo->gen < 11 && !shader->devinfo->is_geminilake) || + type_sz(src.type) != 1) + return src; + + dst_reg temp = vgrf(src.type == BRW_REGISTER_TYPE_UB ? + BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_D); + MOV(temp, src); + return src_reg(temp); + } + private: /** * Workaround for negation of UD registers. See comment in diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index e0818caae13..b9d42b6e26e 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1340,9 +1340,16 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, case nir_op_ine32: { fs_reg dest = result; + /* On Gen11 we have an additional issue being that src1 cannot be a byte + * type. So we convert both operands for the comparison. + */ + fs_reg temp_op[2]; + temp_op[0] = bld.fix_byte_src(op[0]); + temp_op[1] = bld.fix_byte_src(op[1]); + const uint32_t bit_size = nir_src_bit_size(instr->src[0].src); if (bit_size != 32) - dest = bld.vgrf(op[0].type, 1); + dest = bld.vgrf(temp_op[0].type, 1); brw_conditional_mod cond; switch (instr->op) { @@ -1363,7 +1370,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, default: unreachable("bad opcode"); } - bld.CMP(dest, op[0], op[1], cond); + bld.CMP(dest, temp_op[0], temp_op[1], cond); if (bit_size > 32) { bld.MOV(result, subscript(dest, BRW_REGISTER_TYPE_UD, 0)); diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp index 65326416064..efdae4fd79b 100644 --- a/src/intel/compiler/test_eu_validate.cpp +++ b/src/intel/compiler/test_eu_validate.cpp @@ -2372,3 +2372,124 @@ TEST_P(validation_test, qword_low_power_no_depctrl) clear_instructions(p); } } + +TEST_P(validation_test, gen11_no_byte_src_1_2) +{ + static const struct { + enum opcode opcode; + unsigned access_mode; + + enum brw_reg_type dst_type; + struct { + enum brw_reg_type type; + unsigned vstride; + unsigned width; + unsigned hstride; + } srcs[3]; + + int gen; + bool expected_result; + } inst[] = { +#define INST(opcode, access_mode, dst_type, \ + src0_type, src0_vstride, src0_width, src0_hstride, \ + src1_type, src1_vstride, src1_width, src1_hstride, \ + src2_type, \ + gen, expected_result) \ + { \ + BRW_OPCODE_##opcode, \ + BRW_ALIGN_##access_mode, \ + BRW_REGISTER_TYPE_##dst_type, \ + { \ + { \ + BRW_REGISTER_TYPE_##src0_type, \ + BRW_VERTICAL_STRIDE_##src0_vstride, \ + BRW_WIDTH_##src0_width, \ + BRW_HORIZONTAL_STRIDE_##src0_hstride, \ + }, \ + { \ + BRW_REGISTER_TYPE_##src1_type, \ + BRW_VERTICAL_STRIDE_##src1_vstride, \ + BRW_WIDTH_##src1_width, \ + BRW_HORIZONTAL_STRIDE_##src1_hstride, \ + }, \ + { \ + BRW_REGISTER_TYPE_##src2_type, \ + }, \ + }, \ + gen, \ + expected_result, \ + } + + /* Passes on < 11 */ + INST(MOV, 16, F, B, 2, 4, 0, UD, 0, 4, 0, D, 8, true ), + INST(ADD, 16, UD, F, 0, 4, 0, UB, 0, 1, 0, D, 7, true ), + INST(MAD, 16, D, B, 0, 4, 0, UB, 0, 1, 0, B, 10, true ), + + /* Fails on 11+ */ + INST(MAD, 1, UB, W, 1, 1, 0, D, 0, 4, 0, B, 11, false ), + INST(MAD, 1, UB, W, 1, 1, 1, UB, 1, 1, 0, W, 11, false ), + INST(ADD, 1, W, W, 1, 4, 1, B, 1, 1, 0, D, 11, false ), + + /* Passes on 11+ */ + INST(MOV, 1, W, B, 8, 8, 1, D, 8, 8, 1, D, 11, true ), + INST(ADD, 1, UD, B, 8, 8, 1, W, 8, 8, 1, D, 11, true ), + INST(MAD, 1, B, B, 0, 1, 0, D, 0, 4, 0, W, 11, true ), + +#undef INST + }; + + + for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { + /* Skip instruction not meant for this gen. */ + if (devinfo.gen != inst[i].gen) + continue; + + brw_push_insn_state(p); + + brw_set_default_exec_size(p, BRW_EXECUTE_8); + brw_set_default_access_mode(p, inst[i].access_mode); + + switch (inst[i].opcode) { + case BRW_OPCODE_MOV: + brw_MOV(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].srcs[0].type)); + brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride); + brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride); + break; + case BRW_OPCODE_ADD: + brw_ADD(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].srcs[0].type), + retype(g0, inst[i].srcs[1].type)); + brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride); + brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width); + brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride); + brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].srcs[1].vstride); + brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width); + brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].srcs[1].hstride); + break; + case BRW_OPCODE_MAD: + brw_MAD(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].srcs[0].type), + retype(g0, inst[i].srcs[1].type), + retype(g0, inst[i].srcs[2].type)); + brw_inst_set_3src_a1_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride); + brw_inst_set_3src_a1_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride); + brw_inst_set_3src_a1_src1_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride); + brw_inst_set_3src_a1_src1_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride); + break; + default: + unreachable("invalid opcode"); + } + + brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); + + brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width); + brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width); + + brw_pop_insn_state(p); + + EXPECT_EQ(inst[i].expected_result, validate(p)); + + clear_instructions(p); + } +}