From f321e555b6fc8bf28a616afef4eea6981320931d Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Thu, 15 Feb 2024 22:09:40 -0800 Subject: [PATCH] intel/brw: Remove Gfx8- code from EU emission Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/brw_eu.h | 47 - src/intel/compiler/brw_eu_emit.c | 1082 +++-------------------- src/intel/compiler/brw_eu_util.c | 14 - src/intel/compiler/brw_fs_generator.cpp | 11 - 4 files changed, 121 insertions(+), 1033 deletions(-) diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index ae08c4ab505..1148ad2af56 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -113,7 +113,6 @@ struct brw_codegen { */ bool automatic_exec_sizes; - bool single_program_flow; const struct brw_isa_info *isa; const struct intel_device_info *devinfo; @@ -1717,16 +1716,6 @@ brw_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo, SET_BITS(simd_mode, 16, 16)); } -void brw_urb_WRITE(struct brw_codegen *p, - struct brw_reg dest, - unsigned msg_reg_nr, - struct brw_reg src0, - enum brw_urb_write_flags flags, - unsigned msg_length, - unsigned response_length, - unsigned offset, - unsigned swizzle); - /** * Send message to shared unit \p sfid with a possibly indirect descriptor \p * desc. If \p desc is not an immediate it will be transparently loaded to an @@ -1778,30 +1767,10 @@ brw_inst *gfx9_fb_READ(struct brw_codegen *p, unsigned response_length, bool per_sample); -void brw_SAMPLE(struct brw_codegen *p, - struct brw_reg dest, - unsigned msg_reg_nr, - struct brw_reg src0, - unsigned binding_table_index, - unsigned sampler, - unsigned msg_type, - unsigned response_length, - unsigned msg_length, - unsigned header_present, - unsigned simd_mode, - unsigned return_format); - void brw_adjust_sampler_state_pointer(struct brw_codegen *p, struct brw_reg header, struct brw_reg sampler_index); -void gfx4_math(struct brw_codegen *p, - struct brw_reg dest, - unsigned function, - unsigned msg_reg_nr, - struct brw_reg src, - unsigned precision ); - void gfx6_math(struct brw_codegen *p, struct brw_reg dest, unsigned function, @@ -1946,22 +1915,6 @@ brw_memory_fence(struct brw_codegen *p, bool commit_enable, unsigned bti); -void -brw_pixel_interpolator_query(struct brw_codegen *p, - struct brw_reg dest, - struct brw_reg mrf, - bool noperspective, - bool coarse_pixel_rate, - unsigned mode, - struct brw_reg data, - unsigned msg_length, - unsigned response_length); - -void -brw_find_live_channel(struct brw_codegen *p, - struct brw_reg dst, - bool last); - void brw_broadcast(struct brw_codegen *p, struct brw_reg dst, diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 0c8bcf65620..b98a6438d07 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -48,8 +48,6 @@ gfx6_resolve_implied_move(struct brw_codegen *p, unsigned msg_reg_nr) { const struct intel_device_info *devinfo = p->devinfo; - if (devinfo->ver < 6) - return; if (src->file == BRW_MESSAGE_REGISTER_FILE) return; @@ -78,8 +76,7 @@ gfx7_convert_mrf_to_grf(struct brw_codegen *p, struct brw_reg *reg) * Since we're pretending to have 16 MRFs anyway, we may as well use the * registers required for messages with EOT. */ - const struct intel_device_info *devinfo = p->devinfo; - if (devinfo->ver >= 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) { + if (reg->file == BRW_MESSAGE_REGISTER_FILE) { reg->file = BRW_GENERAL_REGISTER_FILE; reg->nr += GFX7_MRF_HACK_START; } @@ -194,11 +191,7 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest) * exec sizes set properly when they are emitted and we can't rely on * this code to fix it. */ - bool fix_exec_size; - if (devinfo->ver >= 6) - fix_exec_size = dest.width < BRW_EXECUTE_4; - else - fix_exec_size = dest.width < BRW_EXECUTE_8; + bool fix_exec_size = dest.width < BRW_EXECUTE_4; if (fix_exec_size) brw_inst_set_exec_size(devinfo, inst, dest.width); @@ -217,11 +210,10 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) gfx7_convert_mrf_to_grf(p, ®); - if (devinfo->ver >= 6 && - (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND || - brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC || - brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS || - brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC)) { + if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC || + brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) { /* Any source modifiers or regions will be ignored, since this just * identifies the MRF/GRF to start reading the message contents from. * Check for some likely failures. @@ -321,17 +313,6 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) * descriptions for registers in align_16 as align_1: */ brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); - } else if (devinfo->verx10 == 70 && - reg.type == BRW_REGISTER_TYPE_DF && - reg.vstride == BRW_VERTICAL_STRIDE_2) { - /* From SNB PRM: - * - * "For Align16 access mode, only encodings of 0000 and 0011 - * are allowed. Other codes are reserved." - * - * Presumably the DevSNB behavior applies to IVB as well. - */ - brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); } else { brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); } @@ -428,17 +409,6 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) * descriptions for registers in align_16 as align_1: */ brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); - } else if (devinfo->verx10 == 70 && - reg.type == BRW_REGISTER_TYPE_DF && - reg.vstride == BRW_VERTICAL_STRIDE_2) { - /* From SNB PRM: - * - * "For Align16 access mode, only encodings of 0000 and 0011 - * are allowed. Other codes are reserved." - * - * Presumably the DevSNB behavior applies to IVB as well. - */ - brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); } else { brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); } @@ -536,49 +506,6 @@ static void brw_set_ff_sync_message(struct brw_codegen *p, brw_inst_set_urb_complete(devinfo, insn, 0); } -static void brw_set_urb_message( struct brw_codegen *p, - brw_inst *insn, - enum brw_urb_write_flags flags, - unsigned msg_length, - unsigned response_length, - unsigned offset, - unsigned swizzle_control ) -{ - const struct intel_device_info *devinfo = p->devinfo; - - assert(devinfo->ver < 7 || swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); - assert(devinfo->ver < 7 || !(flags & BRW_URB_WRITE_ALLOCATE)); - assert(devinfo->ver >= 7 || !(flags & BRW_URB_WRITE_PER_SLOT_OFFSET)); - - brw_set_desc(p, insn, brw_message_desc( - devinfo, msg_length, response_length, true)); - - brw_inst_set_sfid(devinfo, insn, BRW_SFID_URB); - brw_inst_set_eot(devinfo, insn, !!(flags & BRW_URB_WRITE_EOT)); - - if (flags & BRW_URB_WRITE_OWORD) { - assert(msg_length == 2); /* header + one OWORD of data */ - brw_inst_set_urb_opcode(devinfo, insn, BRW_URB_OPCODE_WRITE_OWORD); - } else { - brw_inst_set_urb_opcode(devinfo, insn, BRW_URB_OPCODE_WRITE_HWORD); - } - - brw_inst_set_urb_global_offset(devinfo, insn, offset); - brw_inst_set_urb_swizzle_control(devinfo, insn, swizzle_control); - - if (devinfo->ver < 8) { - brw_inst_set_urb_complete(devinfo, insn, !!(flags & BRW_URB_WRITE_COMPLETE)); - } - - if (devinfo->ver < 7) { - brw_inst_set_urb_allocate(devinfo, insn, !!(flags & BRW_URB_WRITE_ALLOCATE)); - brw_inst_set_urb_used(devinfo, insn, !(flags & BRW_URB_WRITE_UNUSED)); - } else { - brw_inst_set_urb_per_slot_offset(devinfo, insn, - !!(flags & BRW_URB_WRITE_PER_SLOT_OFFSET)); - } -} - static void gfx7_set_dp_scratch_message(struct brw_codegen *p, brw_inst *inst, @@ -593,9 +520,8 @@ gfx7_set_dp_scratch_message(struct brw_codegen *p, { const struct intel_device_info *devinfo = p->devinfo; assert(num_regs == 1 || num_regs == 2 || num_regs == 4 || - (devinfo->ver >= 8 && num_regs == 8)); - const unsigned block_size = (devinfo->ver >= 8 ? util_logbase2(num_regs) : - num_regs - 1); + num_regs == 8); + const unsigned block_size = util_logbase2(num_regs); brw_set_desc(p, inst, brw_message_desc( devinfo, mlen, rlen, header_present)); @@ -630,15 +556,13 @@ brw_inst_set_state(const struct brw_isa_info *isa, if (is_3src(isa, brw_inst_opcode(isa, insn)) && state->access_mode == BRW_ALIGN_16) { brw_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2); - if (devinfo->ver >= 7) - brw_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2); + brw_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2); } else { brw_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2); - if (devinfo->ver >= 7) - brw_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2); + brw_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2); } - if (devinfo->ver >= 6 && devinfo->ver < 20) + if (devinfo->ver < 20) brw_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control); } @@ -949,11 +873,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, dest.type == BRW_REGISTER_TYPE_DF || dest.type == BRW_REGISTER_TYPE_D || dest.type == BRW_REGISTER_TYPE_UD || - (dest.type == BRW_REGISTER_TYPE_HF && devinfo->ver >= 8)); - if (devinfo->ver == 6) { - brw_inst_set_3src_a16_dst_reg_file(devinfo, inst, - dest.file == BRW_MESSAGE_REGISTER_FILE); - } + dest.type == BRW_REGISTER_TYPE_HF); brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4); brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask); @@ -985,32 +905,30 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, brw_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst, src2.vstride == BRW_VERTICAL_STRIDE_0); - if (devinfo->ver >= 7) { - /* Set both the source and destination types based on dest.type, - * ignoring the source register types. The MAD and LRP emitters ensure - * that all four types are float. The BFE and BFI2 emitters, however, - * may send us mixed D and UD types and want us to ignore that and use - * the destination type. - */ - brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type); - brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type); + /* Set both the source and destination types based on dest.type, + * ignoring the source register types. The MAD and LRP emitters ensure + * that all four types are float. The BFE and BFI2 emitters, however, + * may send us mixed D and UD types and want us to ignore that and use + * the destination type. + */ + brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type); + brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type); - /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType: - * - * "Three source instructions can use operands with mixed-mode - * precision. When SrcType field is set to :f or :hf it defines - * precision for source 0 only, and fields Src1Type and Src2Type - * define precision for other source operands: - * - * 0b = :f. Single precision Float (32-bit). - * 1b = :hf. Half precision Float (16-bit)." - */ - if (src1.type == BRW_REGISTER_TYPE_HF) - brw_inst_set_3src_a16_src1_type(devinfo, inst, 1); + /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType: + * + * "Three source instructions can use operands with mixed-mode + * precision. When SrcType field is set to :f or :hf it defines + * precision for source 0 only, and fields Src1Type and Src2Type + * define precision for other source operands: + * + * 0b = :f. Single precision Float (32-bit). + * 1b = :hf. Half precision Float (16-bit)." + */ + if (src1.type == BRW_REGISTER_TYPE_HF) + brw_inst_set_3src_a16_src1_type(devinfo, inst, 1); - if (src2.type == BRW_REGISTER_TYPE_HF) - brw_inst_set_3src_a16_src2_type(devinfo, inst, 1); - } + if (src2.type == BRW_REGISTER_TYPE_HF) + brw_inst_set_3src_a16_src2_type(devinfo, inst, 1); } return inst; @@ -1176,31 +1094,7 @@ ALU1(CBIT) ALU2(ADDC) ALU2(SUBB) ALU3(ADD3) - -brw_inst * -brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0) -{ - const struct intel_device_info *devinfo = p->devinfo; - - /* When converting F->DF on IVB/BYT, every odd source channel is ignored. - * To avoid the problems that causes, we use an source region to - * read each element twice. - */ - if (devinfo->verx10 == 70 && - brw_get_default_access_mode(p) == BRW_ALIGN_1 && - dest.type == BRW_REGISTER_TYPE_DF && - (src0.type == BRW_REGISTER_TYPE_F || - src0.type == BRW_REGISTER_TYPE_D || - src0.type == BRW_REGISTER_TYPE_UD) && - !has_scalar_region(src0)) { - assert(src0.vstride == src0.width + src0.hstride); - src0.vstride = src0.hstride; - src0.width = BRW_WIDTH_2; - src0.hstride = BRW_HORIZONTAL_STRIDE_0; - } - - return brw_alu1(p, BRW_OPCODE_MOV, dest, src0); -} +ALU1(MOV) brw_inst * brw_ADD(struct brw_codegen *p, struct brw_reg dest, @@ -1311,47 +1205,6 @@ brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth, src1, src2); } -brw_inst * -brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src) -{ - assert(p->devinfo->ver == 7); - - /* The F32TO16 instruction doesn't support 32-bit destination types in - * Align1 mode. Gfx7 (only) does zero out the high 16 bits in Align16 - * mode as an undocumented feature. - */ - if (BRW_ALIGN_16 == brw_get_default_access_mode(p)) { - assert(dst.type == BRW_REGISTER_TYPE_UD); - } else { - assert(dst.type == BRW_REGISTER_TYPE_W || - dst.type == BRW_REGISTER_TYPE_UW); - } - - return brw_alu1(p, BRW_OPCODE_F32TO16, dst, src); -} - -brw_inst * -brw_F16TO32(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src) -{ - assert(p->devinfo->ver == 7); - - if (BRW_ALIGN_16 == brw_get_default_access_mode(p)) { - assert(src.type == BRW_REGISTER_TYPE_UD); - } else { - /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32: - * - * Because this instruction does not have a 16-bit floating-point - * type, the source data type must be Word (W). The destination type - * must be F (Float). - */ - assert(src.type == BRW_REGISTER_TYPE_W || - src.type == BRW_REGISTER_TYPE_UW); - } - - return brw_alu1(p, BRW_OPCODE_F16TO32, dst, src); -} - - void brw_NOP(struct brw_codegen *p) { brw_inst *insn = next_insn(p, BRW_OPCODE_NOP); @@ -1450,35 +1303,16 @@ brw_IF(struct brw_codegen *p, unsigned execute_size) /* Override the defaults for this instruction: */ - if (devinfo->ver < 6) { - brw_set_dest(p, insn, brw_ip_reg()); - brw_set_src0(p, insn, brw_ip_reg()); - brw_set_src1(p, insn, brw_imm_d(0x0)); - } else if (devinfo->ver == 6) { - brw_set_dest(p, insn, brw_imm_w(0)); - brw_inst_set_gfx6_jump_count(devinfo, insn, 0); - brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); - brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); - } else if (devinfo->ver == 7) { - brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); - brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); - brw_set_src1(p, insn, brw_imm_w(0)); - brw_inst_set_jip(devinfo, insn, 0); - brw_inst_set_uip(devinfo, insn, 0); - } else { - brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); - if (devinfo->ver < 12) - brw_set_src0(p, insn, brw_imm_d(0)); - brw_inst_set_jip(devinfo, insn, 0); - brw_inst_set_uip(devinfo, insn, 0); - } + brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); + if (devinfo->ver < 12) + brw_set_src0(p, insn, brw_imm_d(0)); + brw_inst_set_jip(devinfo, insn, 0); + brw_inst_set_uip(devinfo, insn, 0); brw_inst_set_exec_size(devinfo, insn, execute_size); brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NORMAL); brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE); - if (!p->single_program_flow && devinfo->ver < 6) - brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH); push_if_stack(p, insn); p->if_depth_in_loop[p->loop_stack_depth]++; @@ -1511,47 +1345,6 @@ gfx6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional, return insn; } -/** - * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. - */ -static void -convert_IF_ELSE_to_ADD(struct brw_codegen *p, - brw_inst *if_inst, brw_inst *else_inst) -{ - const struct intel_device_info *devinfo = p->devinfo; - - /* The next instruction (where the ENDIF would be, if it existed) */ - brw_inst *next_inst = &p->store[p->nr_insn]; - - assert(p->single_program_flow); - assert(if_inst != NULL && brw_inst_opcode(p->isa, if_inst) == BRW_OPCODE_IF); - assert(else_inst == NULL || brw_inst_opcode(p->isa, else_inst) == BRW_OPCODE_ELSE); - assert(brw_inst_exec_size(devinfo, if_inst) == BRW_EXECUTE_1); - - /* Convert IF to an ADD instruction that moves the instruction pointer - * to the first instruction of the ELSE block. If there is no ELSE - * block, point to where ENDIF would be. Reverse the predicate. - * - * There's no need to execute an ENDIF since we don't need to do any - * stack operations, and if we're currently executing, we just want to - * continue normally. - */ - brw_inst_set_opcode(p->isa, if_inst, BRW_OPCODE_ADD); - brw_inst_set_pred_inv(devinfo, if_inst, true); - - if (else_inst != NULL) { - /* Convert ELSE to an ADD instruction that points where the ENDIF - * would be. - */ - brw_inst_set_opcode(p->isa, else_inst, BRW_OPCODE_ADD); - - brw_inst_set_imm_ud(devinfo, if_inst, (else_inst - if_inst + 1) * 16); - brw_inst_set_imm_ud(devinfo, else_inst, (next_inst - else_inst) * 16); - } else { - brw_inst_set_imm_ud(devinfo, if_inst, (next_inst - if_inst) * 16); - } -} - /** * Patch IF and ELSE instructions with appropriate jump targets. */ @@ -1561,21 +1354,6 @@ patch_IF_ELSE(struct brw_codegen *p, { const struct intel_device_info *devinfo = p->devinfo; - /* We shouldn't be patching IF and ELSE instructions in single program flow - * mode when gen < 6, because in single program flow mode on those - * platforms, we convert flow control instructions to conditional ADDs that - * operate on IP (see brw_ENDIF). - * - * However, on Gfx6, writing to IP doesn't work in single program flow mode - * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may - * not be updated by non-flow control instructions."). And on later - * platforms, there is no significant benefit to converting control flow - * instructions to conditional ADDs. So we do patch IF and ELSE - * instructions in single program flow mode on those platforms. - */ - if (devinfo->ver < 6) - assert(!p->single_program_flow); - assert(if_inst != NULL && brw_inst_opcode(p->isa, if_inst) == BRW_OPCODE_IF); assert(endif_inst != NULL); assert(else_inst == NULL || brw_inst_opcode(p->isa, else_inst) == BRW_OPCODE_ELSE); @@ -1587,76 +1365,38 @@ patch_IF_ELSE(struct brw_codegen *p, if (else_inst == NULL) { /* Patch IF -> ENDIF */ - if (devinfo->ver < 6) { - /* Turn it into an IFF, which means no mask stack operations for - * all-false and jumping past the ENDIF. - */ - brw_inst_set_opcode(p->isa, if_inst, BRW_OPCODE_IFF); - brw_inst_set_gfx4_jump_count(devinfo, if_inst, - br * (endif_inst - if_inst + 1)); - brw_inst_set_gfx4_pop_count(devinfo, if_inst, 0); - } else if (devinfo->ver == 6) { - /* As of gfx6, there is no IFF and IF must point to the ENDIF. */ - brw_inst_set_gfx6_jump_count(devinfo, if_inst, br*(endif_inst - if_inst)); - } else { - brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst)); - brw_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst)); - } + brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst)); + brw_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst)); } else { brw_inst_set_exec_size(devinfo, else_inst, brw_inst_exec_size(devinfo, if_inst)); - /* Patch IF -> ELSE */ - if (devinfo->ver < 6) { - brw_inst_set_gfx4_jump_count(devinfo, if_inst, - br * (else_inst - if_inst)); - brw_inst_set_gfx4_pop_count(devinfo, if_inst, 0); - } else if (devinfo->ver == 6) { - brw_inst_set_gfx6_jump_count(devinfo, if_inst, - br * (else_inst - if_inst + 1)); - } - /* Patch ELSE -> ENDIF */ - if (devinfo->ver < 6) { - /* BRW_OPCODE_ELSE pre-gfx6 should point just past the - * matching ENDIF. - */ - brw_inst_set_gfx4_jump_count(devinfo, else_inst, - br * (endif_inst - else_inst + 1)); - brw_inst_set_gfx4_pop_count(devinfo, else_inst, 1); - } else if (devinfo->ver == 6) { - /* BRW_OPCODE_ELSE on gfx6 should point to the matching ENDIF. */ - brw_inst_set_gfx6_jump_count(devinfo, else_inst, - br * (endif_inst - else_inst)); + /* The IF instruction's JIP should point just past the ELSE */ + brw_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1)); + /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ + brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst)); + + if (devinfo->ver < 11) { + /* Set the ELSE instruction to use branch_ctrl with a join + * jump target pointing at the NOP inserted right before + * the ENDIF instruction in order to make sure it is + * executed in all cases, since attempting to do the same + * as on other generations could cause the EU to jump at + * the instruction immediately after the ENDIF due to + * Wa_220160235, which could cause the program to continue + * running with all channels disabled. + */ + brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst - 1)); + brw_inst_set_branch_control(devinfo, else_inst, true); } else { - /* The IF instruction's JIP should point just past the ELSE */ - brw_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1)); - /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ - brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst)); - - if (devinfo->ver >= 8 && devinfo->ver < 11) { - /* Set the ELSE instruction to use branch_ctrl with a join - * jump target pointing at the NOP inserted right before - * the ENDIF instruction in order to make sure it is - * executed in all cases, since attempting to do the same - * as on other generations could cause the EU to jump at - * the instruction immediately after the ENDIF due to - * Wa_220160235, which could cause the program to continue - * running with all channels disabled. - */ - brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst - 1)); - brw_inst_set_branch_control(devinfo, else_inst, true); - } else { - brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst)); - } - - if (devinfo->ver >= 8) { - /* Since we don't set branch_ctrl on Gfx11+, the ELSE's - * JIP and UIP both should point to ENDIF on those - * platforms. - */ - brw_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst)); - } + brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst)); } + + /* Since we don't set branch_ctrl on Gfx11+, the ELSE's + * JIP and UIP both should point to ENDIF on those + * platforms. + */ + brw_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst)); } } @@ -1668,33 +1408,14 @@ brw_ELSE(struct brw_codegen *p) insn = next_insn(p, BRW_OPCODE_ELSE); - if (devinfo->ver < 6) { - brw_set_dest(p, insn, brw_ip_reg()); - brw_set_src0(p, insn, brw_ip_reg()); - brw_set_src1(p, insn, brw_imm_d(0x0)); - } else if (devinfo->ver == 6) { - brw_set_dest(p, insn, brw_imm_w(0)); - brw_inst_set_gfx6_jump_count(devinfo, insn, 0); - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - } else if (devinfo->ver == 7) { - brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, brw_imm_w(0)); - brw_inst_set_jip(devinfo, insn, 0); - brw_inst_set_uip(devinfo, insn, 0); - } else { - brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - if (devinfo->ver < 12) - brw_set_src0(p, insn, brw_imm_d(0)); - brw_inst_set_jip(devinfo, insn, 0); - brw_inst_set_uip(devinfo, insn, 0); - } + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + if (devinfo->ver < 12) + brw_set_src0(p, insn, brw_imm_d(0)); + brw_inst_set_jip(devinfo, insn, 0); + brw_inst_set_uip(devinfo, insn, 0); brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE); - if (!p->single_program_flow && devinfo->ver < 6) - brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH); push_if_stack(p, insn); } @@ -1707,11 +1428,10 @@ brw_ENDIF(struct brw_codegen *p) brw_inst *else_inst = NULL; brw_inst *if_inst = NULL; brw_inst *tmp; - bool emit_endif = true; assert(p->if_stack_depth > 0); - if (devinfo->ver >= 8 && devinfo->ver < 11 && + if (devinfo->ver < 11 && brw_inst_opcode(p->isa, &p->store[p->if_stack[ p->if_stack_depth - 1]]) == BRW_OPCODE_ELSE) { /* Insert a NOP to be specified as join instruction within the @@ -1725,28 +1445,12 @@ brw_ENDIF(struct brw_codegen *p) brw_NOP(p); } - /* In single program flow mode, we can express IF and ELSE instructions - * equivalently as ADD instructions that operate on IP. On platforms prior - * to Gfx6, flow control instructions cause an implied thread switch, so - * this is a significant savings. - * - * However, on Gfx6, writing to IP doesn't work in single program flow mode - * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may - * not be updated by non-flow control instructions."). And on later - * platforms, there is no significant benefit to converting control flow - * instructions to conditional ADDs. So we only do this trick on Gfx4 and - * Gfx5. - */ - if (devinfo->ver < 6 && p->single_program_flow) - emit_endif = false; - /* * A single next_insn() may change the base address of instruction store * memory(p->store), so call it first before referencing the instruction * store pointer from an index */ - if (emit_endif) - insn = next_insn(p, BRW_OPCODE_ENDIF); + insn = next_insn(p, BRW_OPCODE_ENDIF); /* Pop the IF and (optional) ELSE instructions from the stack */ p->if_depth_in_loop[p->loop_stack_depth]--; @@ -1757,42 +1461,12 @@ brw_ENDIF(struct brw_codegen *p) } if_inst = tmp; - if (!emit_endif) { - /* ENDIF is useless; don't bother emitting it. */ - convert_IF_ELSE_to_ADD(p, if_inst, else_inst); - return; - } - - if (devinfo->ver < 6) { - brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, brw_imm_d(0x0)); - } else if (devinfo->ver == 6) { - brw_set_dest(p, insn, brw_imm_w(0)); - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - } else if (devinfo->ver == 7) { - brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, brw_imm_w(0)); - } else { - brw_set_src0(p, insn, brw_imm_d(0)); - } + brw_set_src0(p, insn, brw_imm_d(0)); brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE); - if (devinfo->ver < 6) - brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH); - /* Also pop item off the stack in the endif instruction: */ - if (devinfo->ver < 6) { - brw_inst_set_gfx4_jump_count(devinfo, insn, 0); - brw_inst_set_gfx4_pop_count(devinfo, insn, 1); - } else if (devinfo->ver == 6) { - brw_inst_set_gfx6_jump_count(devinfo, insn, 2); - } else { - brw_inst_set_jip(devinfo, insn, 2); - } + brw_inst_set_jip(devinfo, insn, 2); patch_IF_ELSE(p, if_inst, else_inst, insn); } @@ -1803,20 +1477,8 @@ brw_BREAK(struct brw_codegen *p) brw_inst *insn; insn = next_insn(p, BRW_OPCODE_BREAK); - if (devinfo->ver >= 8) { - brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, brw_imm_d(0x0)); - } else if (devinfo->ver >= 6) { - brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, brw_imm_d(0x0)); - } else { - brw_set_dest(p, insn, brw_ip_reg()); - brw_set_src0(p, insn, brw_ip_reg()); - brw_set_src1(p, insn, brw_imm_d(0x0)); - brw_inst_set_gfx4_pop_count(devinfo, insn, - p->if_depth_in_loop[p->loop_stack_depth]); - } + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, brw_imm_d(0x0)); brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p)); @@ -1831,17 +1493,8 @@ brw_CONT(struct brw_codegen *p) insn = next_insn(p, BRW_OPCODE_CONTINUE); brw_set_dest(p, insn, brw_ip_reg()); - if (devinfo->ver >= 8) { - brw_set_src0(p, insn, brw_imm_d(0x0)); - } else { - brw_set_src0(p, insn, brw_ip_reg()); - brw_set_src1(p, insn, brw_imm_d(0x0)); - } + brw_set_src0(p, insn, brw_imm_d(0x0)); - if (devinfo->ver < 6) { - brw_inst_set_gfx4_pop_count(devinfo, insn, - p->if_depth_in_loop[p->loop_stack_depth]); - } brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p)); return insn; @@ -1855,19 +1508,7 @@ brw_HALT(struct brw_codegen *p) insn = next_insn(p, BRW_OPCODE_HALT); brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - if (devinfo->ver < 6) { - /* From the Gfx4 PRM: - * - * "IP register must be put (for example, by the assembler) at - * and locations. - */ - brw_set_dest(p, insn, brw_ip_reg()); - brw_set_src0(p, insn, brw_ip_reg()); - brw_set_src1(p, insn, brw_imm_d(0x0)); /* exitcode updated later. */ - } else if (devinfo->ver < 8) { - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */ - } else if (devinfo->ver < 12) { + if (devinfo->ver < 12) { brw_set_src0(p, insn, brw_imm_d(0x0)); } @@ -1885,70 +1526,14 @@ brw_HALT(struct brw_codegen *p) * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, * jip and no DO instruction. * - * For non-uniform control flow pre-gfx6, there's a DO instruction to - * push the mask, and a WHILE to jump back, and BREAK to get out and - * pop the mask. - * * For gfx6, there's no more mask stack, so no need for DO. WHILE * just points back to the first instruction of the loop. */ brw_inst * brw_DO(struct brw_codegen *p, unsigned execute_size) { - const struct intel_device_info *devinfo = p->devinfo; - - if (devinfo->ver >= 6 || p->single_program_flow) { - push_loop_stack(p, &p->store[p->nr_insn]); - return &p->store[p->nr_insn]; - } else { - brw_inst *insn = next_insn(p, BRW_OPCODE_DO); - - push_loop_stack(p, insn); - - /* Override the defaults for this instruction: - */ - brw_set_dest(p, insn, brw_null_reg()); - brw_set_src0(p, insn, brw_null_reg()); - brw_set_src1(p, insn, brw_null_reg()); - - brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); - brw_inst_set_exec_size(devinfo, insn, execute_size); - brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); - - return insn; - } -} - -/** - * For pre-gfx6, we patch BREAK/CONT instructions to point at the WHILE - * instruction here. - * - * For gfx6+, see brw_set_uip_jip(), which doesn't care so much about the loop - * nesting, since it can always just point to the end of the block/current loop. - */ -static void -brw_patch_break_cont(struct brw_codegen *p, brw_inst *while_inst) -{ - const struct intel_device_info *devinfo = p->devinfo; - brw_inst *do_inst = get_inner_do_insn(p); - brw_inst *inst; - unsigned br = brw_jump_scale(devinfo); - - assert(devinfo->ver < 6); - - for (inst = while_inst - 1; inst != do_inst; inst--) { - /* If the jump count is != 0, that means that this instruction has already - * been patched because it's part of a loop inside of the one we're - * patching. - */ - if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_BREAK && - brw_inst_gfx4_jump_count(devinfo, inst) == 0) { - brw_inst_set_gfx4_jump_count(devinfo, inst, br*((while_inst - inst) + 1)); - } else if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_CONTINUE && - brw_inst_gfx4_jump_count(devinfo, inst) == 0) { - brw_inst_set_gfx4_jump_count(devinfo, inst, br * (while_inst - inst)); - } - } + push_loop_stack(p, &p->store[p->nr_insn]); + return &p->store[p->nr_insn]; } brw_inst * @@ -1958,55 +1543,16 @@ brw_WHILE(struct brw_codegen *p) brw_inst *insn, *do_insn; unsigned br = brw_jump_scale(devinfo); - if (devinfo->ver >= 6) { - insn = next_insn(p, BRW_OPCODE_WHILE); - do_insn = get_inner_do_insn(p); + insn = next_insn(p, BRW_OPCODE_WHILE); + do_insn = get_inner_do_insn(p); - if (devinfo->ver >= 8) { - brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - if (devinfo->ver < 12) - brw_set_src0(p, insn, brw_imm_d(0)); - brw_inst_set_jip(devinfo, insn, br * (do_insn - insn)); - } else if (devinfo->ver == 7) { - brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, brw_imm_w(0)); - brw_inst_set_jip(devinfo, insn, br * (do_insn - insn)); - } else { - brw_set_dest(p, insn, brw_imm_w(0)); - brw_inst_set_gfx6_jump_count(devinfo, insn, br * (do_insn - insn)); - brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - } + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + if (devinfo->ver < 12) + brw_set_src0(p, insn, brw_imm_d(0)); + brw_inst_set_jip(devinfo, insn, br * (do_insn - insn)); - brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p)); + brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p)); - } else { - if (p->single_program_flow) { - insn = next_insn(p, BRW_OPCODE_ADD); - do_insn = get_inner_do_insn(p); - - brw_set_dest(p, insn, brw_ip_reg()); - brw_set_src0(p, insn, brw_ip_reg()); - brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); - brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); - } else { - insn = next_insn(p, BRW_OPCODE_WHILE); - do_insn = get_inner_do_insn(p); - - assert(brw_inst_opcode(p->isa, do_insn) == BRW_OPCODE_DO); - - brw_set_dest(p, insn, brw_ip_reg()); - brw_set_src0(p, insn, brw_ip_reg()); - brw_set_src1(p, insn, brw_imm_d(0)); - - brw_inst_set_exec_size(devinfo, insn, brw_inst_exec_size(devinfo, do_insn)); - brw_inst_set_gfx4_jump_count(devinfo, insn, br * (do_insn - insn + 1)); - brw_inst_set_gfx4_pop_count(devinfo, insn, 0); - - brw_patch_break_cont(p, insn); - } - } brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); p->loop_stack_depth--; @@ -2020,10 +1566,7 @@ void brw_land_fwd_jump(struct brw_codegen *p, int jmp_insn_idx) { const struct intel_device_info *devinfo = p->devinfo; brw_inst *jmp_insn = &p->store[jmp_insn_idx]; - unsigned jmpi = 1; - - if (devinfo->ver >= 5) - jmpi = 2; + unsigned jmpi = 2; assert(brw_inst_opcode(p->isa, jmp_insn) == BRW_OPCODE_JMPI); assert(brw_inst_src1_reg_file(devinfo, jmp_insn) == BRW_IMMEDIATE_VALUE); @@ -2049,20 +1592,6 @@ void brw_CMP(struct brw_codegen *p, brw_set_dest(p, insn, dest); brw_set_src0(p, insn, src0); brw_set_src1(p, insn, src1); - - /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds - * page says: - * "Any CMP instruction with a null destination must use a {switch}." - * - * It also applies to other Gfx7 platforms (IVB, BYT) even though it isn't - * mentioned on their work-arounds pages. - */ - if (devinfo->ver == 7) { - if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && - dest.nr == BRW_ARF_NULL) { - brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH); - } - } } void brw_CMPN(struct brw_codegen *p, @@ -2078,63 +1607,12 @@ void brw_CMPN(struct brw_codegen *p, brw_set_dest(p, insn, dest); brw_set_src0(p, insn, src0); brw_set_src1(p, insn, src1); - - /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit ISA) - * says: - * - * If the destination is the null register, the {Switch} instruction - * option must be used. - * - * Page 77 of the Haswell PRM Volume 2b contains the same text. - */ - if (devinfo->ver == 7) { - if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && - dest.nr == BRW_ARF_NULL) { - brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH); - } - } } /*********************************************************************** * Helpers for the various SEND message types: */ -/** Extended math function, float[8]. - */ -void gfx4_math(struct brw_codegen *p, - struct brw_reg dest, - unsigned function, - unsigned msg_reg_nr, - struct brw_reg src, - unsigned precision ) -{ - const struct intel_device_info *devinfo = p->devinfo; - brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); - unsigned data_type; - if (has_scalar_region(src)) { - data_type = BRW_MATH_DATA_SCALAR; - } else { - data_type = BRW_MATH_DATA_VECTOR; - } - - assert(devinfo->ver < 6); - - /* Example code doesn't set predicate_control for send - * instructions. - */ - brw_inst_set_pred_control(devinfo, insn, 0); - brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr); - - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, src); - brw_set_math_message(p, - insn, - function, - src.type == BRW_REGISTER_TYPE_D, - precision, - data_type); -} - void gfx6_math(struct brw_codegen *p, struct brw_reg dest, unsigned function, @@ -2144,16 +1622,10 @@ void gfx6_math(struct brw_codegen *p, const struct intel_device_info *devinfo = p->devinfo; brw_inst *insn = next_insn(p, BRW_OPCODE_MATH); - assert(devinfo->ver >= 6); - assert(dest.file == BRW_GENERAL_REGISTER_FILE || - (devinfo->ver >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE)); + dest.file == BRW_MESSAGE_REGISTER_FILE); assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); - if (devinfo->ver == 6) { - assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); - assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); - } if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || @@ -2161,7 +1633,7 @@ void gfx6_math(struct brw_codegen *p, assert(src0.type != BRW_REGISTER_TYPE_F); assert(src1.type != BRW_REGISTER_TYPE_F); assert(src1.file == BRW_GENERAL_REGISTER_FILE || - (devinfo->ver >= 8 && src1.file == BRW_IMMEDIATE_VALUE)); + src1.file == BRW_IMMEDIATE_VALUE); /* From BSpec 6647/47428 "[Instruction] Extended Math Function": * INT DIV function does not support source modifiers. */ @@ -2176,14 +1648,6 @@ void gfx6_math(struct brw_codegen *p, (src1.type == BRW_REGISTER_TYPE_HF && devinfo->ver >= 9)); } - /* Source modifiers are ignored for extended math instructions on Gfx6. */ - if (devinfo->ver == 6) { - assert(!src0.negate); - assert(!src0.abs); - assert(!src1.negate); - assert(!src1.abs); - } - brw_inst_set_math_function(devinfo, insn, function); brw_set_dest(p, insn, dest); @@ -2199,10 +1663,7 @@ unsigned brw_scratch_surface_idx(const struct brw_codegen *p) { /* The scratch space is thread-local so IA coherency is unnecessary. */ - if (p->devinfo->ver >= 8) - return GFX8_BTI_STATELESS_NON_COHERENT; - else - return BRW_BTI_STATELESS; + return GFX8_BTI_STATELESS_NON_COHERENT; } /** @@ -2218,15 +1679,11 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, unsigned offset) { const struct intel_device_info *devinfo = p->devinfo; - const unsigned target_cache = - (devinfo->ver >= 7 ? GFX7_SFID_DATAPORT_DATA_CACHE : - devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_RENDER_CACHE : - BRW_SFID_DATAPORT_WRITE); + const unsigned target_cache = GFX7_SFID_DATAPORT_DATA_CACHE; const struct tgl_swsb swsb = brw_get_default_swsb(p); uint32_t msg_type; - if (devinfo->ver >= 6) - offset /= 16; + offset /= 16; mrf = retype(mrf, BRW_REGISTER_TYPE_UD); @@ -2273,8 +1730,6 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, src_header = vec16(src_header); assert(brw_inst_pred_control(devinfo, insn) == BRW_PREDICATE_NONE); - if (devinfo->ver < 6) - brw_inst_set_base_mrf(devinfo, insn, mrf.nr); /* Until gfx6, writes followed by reads from the same location * are not guaranteed to be ordered unless write_commit is set. @@ -2286,25 +1741,13 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, * protection. Our use of DP writes is all about register * spilling within a thread. */ - if (devinfo->ver >= 6) { - dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); - send_commit_msg = 0; - } else { - dest = src_header; - send_commit_msg = 1; - } + dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); + send_commit_msg = 0; brw_set_dest(p, insn, dest); - if (devinfo->ver >= 6) { - brw_set_src0(p, insn, mrf); - } else { - brw_set_src0(p, insn, brw_null_reg()); - } + brw_set_src0(p, insn, mrf); - if (devinfo->ver >= 6) - msg_type = GFX6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; - else - msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; + msg_type = GFX6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; brw_set_desc(p, insn, brw_message_desc(devinfo, mlen, send_commit_msg, true) | @@ -2332,28 +1775,20 @@ brw_oword_block_read_scratch(struct brw_codegen *p, const struct intel_device_info *devinfo = p->devinfo; const struct tgl_swsb swsb = brw_get_default_swsb(p); - if (devinfo->ver >= 6) - offset /= 16; + offset /= 16; - if (p->devinfo->ver >= 7) { - /* On gen 7 and above, we no longer have message registers and we can - * send from any register we want. By using the destination register - * for the message, we guarantee that the implied message write won't - * accidentally overwrite anything. This has been a problem because - * the MRF registers and source for the final FB write are both fixed - * and may overlap. - */ - mrf = retype(dest, BRW_REGISTER_TYPE_UD); - } else { - mrf = retype(mrf, BRW_REGISTER_TYPE_UD); - } + /* On gen 7 and above, we no longer have message registers and we can + * send from any register we want. By using the destination register + * for the message, we guarantee that the implied message write won't + * accidentally overwrite anything. This has been a problem because + * the MRF registers and source for the final FB write are both fixed + * and may overlap. + */ + mrf = retype(dest, BRW_REGISTER_TYPE_UD); dest = retype(dest, BRW_REGISTER_TYPE_UW); const unsigned rlen = num_regs; - const unsigned target_cache = - (devinfo->ver >= 7 ? GFX7_SFID_DATAPORT_DATA_CACHE : - devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_RENDER_CACHE : - BRW_SFID_DATAPORT_READ); + const unsigned target_cache = GFX7_SFID_DATAPORT_DATA_CACHE; { brw_push_insn_state(p); @@ -2381,12 +1816,7 @@ brw_oword_block_read_scratch(struct brw_codegen *p, brw_inst_set_compression(devinfo, insn, false); brw_set_dest(p, insn, dest); /* UW? */ - if (devinfo->ver >= 6) { - brw_set_src0(p, insn, mrf); - } else { - brw_set_src0(p, insn, brw_null_reg()); - brw_inst_set_base_mrf(devinfo, insn, mrf.nr); - } + brw_set_src0(p, insn, mrf); brw_set_desc(p, insn, brw_message_desc(devinfo, 1, rlen, true) | @@ -2443,15 +1873,12 @@ void brw_oword_block_read(struct brw_codegen *p, uint32_t bind_table_index) { const struct intel_device_info *devinfo = p->devinfo; - const unsigned target_cache = - (devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_CONSTANT_CACHE : - BRW_SFID_DATAPORT_READ); + const unsigned target_cache = GFX6_SFID_DATAPORT_CONSTANT_CACHE; const unsigned exec_size = 1 << brw_get_default_exec_size(p); const struct tgl_swsb swsb = brw_get_default_swsb(p); /* On newer hardware, offset is in units of owords. */ - if (devinfo->ver >= 6) - offset /= 16; + offset /= 16; mrf = retype(mrf, BRW_REGISTER_TYPE_UD); @@ -2486,12 +1913,7 @@ void brw_oword_block_read(struct brw_codegen *p, dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); brw_set_dest(p, insn, dest); - if (devinfo->ver >= 6) { - brw_set_src0(p, insn, mrf); - } else { - brw_set_src0(p, insn, brw_null_reg()); - brw_inst_set_base_mrf(devinfo, insn, mrf.nr); - } + brw_set_src0(p, insn, mrf); brw_set_desc(p, insn, brw_message_desc(devinfo, 1, DIV_ROUND_UP(exec_size, 8), true) | @@ -2529,60 +1951,6 @@ gfx9_fb_READ(struct brw_codegen *p, return insn; } -/** - * Texture sample instruction. - * Note: the msg_type plus msg_length values determine exactly what kind - * of sampling operation is performed. See volume 4, page 161 of docs. - */ -void brw_SAMPLE(struct brw_codegen *p, - struct brw_reg dest, - unsigned msg_reg_nr, - struct brw_reg src0, - unsigned binding_table_index, - unsigned sampler, - unsigned msg_type, - unsigned response_length, - unsigned msg_length, - unsigned header_present, - unsigned simd_mode, - unsigned return_format) -{ - const struct intel_device_info *devinfo = p->devinfo; - brw_inst *insn; - - if (msg_reg_nr != -1) - gfx6_resolve_implied_move(p, &src0, msg_reg_nr); - - insn = next_insn(p, BRW_OPCODE_SEND); - brw_inst_set_sfid(devinfo, insn, BRW_SFID_SAMPLER); - brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); /* XXX */ - - /* From the 965 PRM (volume 4, part 1, section 14.2.41): - * - * "Instruction compression is not allowed for this instruction (that - * is, send). The hardware behavior is undefined if this instruction is - * set as compressed. However, compress control can be set to "SecHalf" - * to affect the EMask generation." - * - * No similar wording is found in later PRMs, but there are examples - * utilizing send with SecHalf. More importantly, SIMD8 sampler messages - * are allowed in SIMD16 mode and they could not work without SecHalf. For - * these reasons, we allow BRW_COMPRESSION_2NDHALF here. - */ - brw_inst_set_compression(devinfo, insn, false); - - if (devinfo->ver < 6) - brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr); - - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, src0); - brw_set_desc(p, insn, - brw_message_desc(devinfo, msg_length, response_length, - header_present) | - brw_sampler_desc(devinfo, binding_table_index, sampler, - msg_type, simd_mode, return_format)); -} - /* Adjust the message header's sampler state pointer to * select the correct group of 16 samplers. */ @@ -2599,14 +1967,11 @@ void brw_adjust_sampler_state_pointer(struct brw_codegen *p, * exclusively use the offset - we have to use both. */ - const struct intel_device_info *devinfo = p->devinfo; - if (sampler_index.file == BRW_IMMEDIATE_VALUE) { const int sampler_state_size = 16; /* 16 bytes */ uint32_t sampler = sampler_index.ud; if (sampler >= 16) { - assert(devinfo->verx10 >= 75); brw_ADD(p, get_element_ud(header, 3), get_element_ud(brw_vec8_grf(0, 0), 3), @@ -2614,10 +1979,6 @@ void brw_adjust_sampler_state_pointer(struct brw_codegen *p, } } else { /* Non-const sampler array indexing case */ - if (devinfo->verx10 <= 70) { - return; - } - struct brw_reg temp = get_element_ud(header, 3); brw_push_insn_state(p); @@ -2632,58 +1993,6 @@ void brw_adjust_sampler_state_pointer(struct brw_codegen *p, } } -/* All these variables are pretty confusing - we might be better off - * using bitmasks and macros for this, in the old style. Or perhaps - * just having the caller instantiate the fields in dword3 itself. - */ -void brw_urb_WRITE(struct brw_codegen *p, - struct brw_reg dest, - unsigned msg_reg_nr, - struct brw_reg src0, - enum brw_urb_write_flags flags, - unsigned msg_length, - unsigned response_length, - unsigned offset, - unsigned swizzle) -{ - const struct intel_device_info *devinfo = p->devinfo; - brw_inst *insn; - - gfx6_resolve_implied_move(p, &src0, msg_reg_nr); - - if (devinfo->ver >= 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) { - /* Enable Channel Masks in the URB_WRITE_HWORD message header */ - brw_push_insn_state(p); - brw_set_default_access_mode(p, BRW_ALIGN_1); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_exec_size(p, BRW_EXECUTE_1); - brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5), - BRW_REGISTER_TYPE_UD), - retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), - brw_imm_ud(0xff00)); - brw_pop_insn_state(p); - } - - insn = next_insn(p, BRW_OPCODE_SEND); - - assert(msg_length < BRW_MAX_MRF(devinfo->ver)); - - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, src0); - brw_set_src1(p, insn, brw_imm_d(0)); - - if (devinfo->ver < 6) - brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr); - - brw_set_urb_message(p, - insn, - flags, - msg_length, - response_length, - offset, - swizzle); -} - void brw_send_indirect_message(struct brw_codegen *p, unsigned sfid, @@ -2928,8 +2237,7 @@ while_jumps_before_offset(const struct intel_device_info *devinfo, brw_inst *insn, int while_offset, int start_offset) { int scale = 16 / brw_jump_scale(devinfo); - int jip = devinfo->ver == 6 ? brw_inst_gfx6_jump_count(devinfo, insn) - : brw_inst_jip(devinfo, insn); + int jip = brw_inst_jip(devinfo, insn); assert(jip < 0); return while_offset + jip * scale <= start_offset; } @@ -2989,8 +2297,6 @@ brw_find_loop_end(struct brw_codegen *p, int start_offset) int offset; void *store = p->store; - assert(devinfo->ver >= 6); - /* Always start after the instruction (such as a WHILE) we're trying to fix * up. */ @@ -3020,9 +2326,6 @@ brw_set_uip_jip(struct brw_codegen *p, int start_offset) int scale = 16 / br; void *store = p->store; - if (devinfo->ver < 6) - return; - for (offset = start_offset; offset < p->next_insn_offset; offset += 16) { brw_inst *insn = store + offset; assert(brw_inst_cmpt_control(devinfo, insn) == 0); @@ -3034,8 +2337,7 @@ brw_set_uip_jip(struct brw_codegen *p, int start_offset) brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale); /* Gfx7 UIP points to WHILE; Gfx6 points just after it */ brw_inst_set_uip(devinfo, insn, - (brw_find_loop_end(p, offset) - offset + - (devinfo->ver == 6 ? 16 : 0)) / scale); + (brw_find_loop_end(p, offset) - offset) / scale); break; } @@ -3055,10 +2357,7 @@ brw_set_uip_jip(struct brw_codegen *p, int start_offset) int block_end_offset = brw_find_next_block_end(p, offset); int32_t jump = (block_end_offset == 0) ? 1 * br : (block_end_offset - offset) / scale; - if (devinfo->ver >= 7) - brw_inst_set_jip(devinfo, insn, jump); - else - brw_inst_set_gfx6_jump_count(devinfo, insn, jump); + brw_inst_set_jip(devinfo, insn, jump); break; } @@ -3099,7 +2398,6 @@ void brw_ff_sync(struct brw_codegen *p, unsigned response_length, bool eot) { - const struct intel_device_info *devinfo = p->devinfo; brw_inst *insn; gfx6_resolve_implied_move(p, &src0, msg_reg_nr); @@ -3109,9 +2407,6 @@ void brw_ff_sync(struct brw_codegen *p, brw_set_src0(p, insn, src0); brw_set_src1(p, insn, brw_imm_d(0)); - if (devinfo->ver < 6) - brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr); - brw_set_ff_sync_message(p, insn, allocate, @@ -3119,44 +2414,6 @@ void brw_ff_sync(struct brw_codegen *p, eot); } -/** - * Emit the SEND instruction necessary to generate stream output data on Gfx6 - * (for transform feedback). - * - * If send_commit_msg is true, this is the last piece of stream output data - * from this thread, so send the data as a committed write. According to the - * Sandy Bridge PRM (volume 2 part 1, section 4.5.1): - * - * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all - * writes are complete by sending the final write as a committed write." - */ -void -brw_svb_write(struct brw_codegen *p, - struct brw_reg dest, - unsigned msg_reg_nr, - struct brw_reg src0, - unsigned binding_table_index, - bool send_commit_msg) -{ - const struct intel_device_info *devinfo = p->devinfo; - assert(devinfo->ver == 6); - const unsigned target_cache = GFX6_SFID_DATAPORT_RENDER_CACHE; - brw_inst *insn; - - gfx6_resolve_implied_move(p, &src0, msg_reg_nr); - - insn = next_insn(p, BRW_OPCODE_SEND); - brw_inst_set_sfid(devinfo, insn, target_cache); - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, src0); - brw_set_desc(p, insn, - brw_message_desc(devinfo, 1, send_commit_msg, true) | - brw_dp_write_desc(devinfo, binding_table_index, - 0, /* msg_control: ignored */ - GFX6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, - send_commit_msg)); /* send_commit_msg */ -} - static unsigned brw_surface_payload_size(unsigned num_channels, unsigned exec_size /**< 0 for SIMD4x2 */) @@ -3180,14 +2437,9 @@ brw_untyped_atomic(struct brw_codegen *p, bool header_present) { const struct intel_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->verx10 >= 75 ? - HSW_SFID_DATAPORT_DATA_CACHE_1 : - GFX7_SFID_DATAPORT_DATA_CACHE); + const unsigned sfid = HSW_SFID_DATAPORT_DATA_CACHE_1; const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; - /* SIMD4x2 untyped atomic instructions only exist on HSW+ */ - const bool has_simd4x2 = devinfo->verx10 >= 75; - const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : - has_simd4x2 ? 0 : 8; + const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : 0; const unsigned response_length = brw_surface_payload_size(response_expected, exec_size); const unsigned desc = @@ -3215,9 +2467,7 @@ brw_untyped_surface_read(struct brw_codegen *p, unsigned num_channels) { const struct intel_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->verx10 >= 75 ? - HSW_SFID_DATAPORT_DATA_CACHE_1 : - GFX7_SFID_DATAPORT_DATA_CACHE); + const unsigned sfid = HSW_SFID_DATAPORT_DATA_CACHE_1; const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : 0; const unsigned response_length = @@ -3238,21 +2488,14 @@ brw_untyped_surface_write(struct brw_codegen *p, bool header_present) { const struct intel_device_info *devinfo = p->devinfo; - const unsigned sfid = (devinfo->verx10 >= 75 ? - HSW_SFID_DATAPORT_DATA_CACHE_1 : - GFX7_SFID_DATAPORT_DATA_CACHE); + const unsigned sfid = HSW_SFID_DATAPORT_DATA_CACHE_1; const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; - /* SIMD4x2 untyped surface write instructions only exist on HSW+ */ - const bool has_simd4x2 = devinfo->verx10 >= 75; - const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : - has_simd4x2 ? 0 : 8; + const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : 0; const unsigned desc = brw_message_desc(devinfo, msg_length, 0, header_present) | brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, true); - /* Mask out unused components -- See comment in brw_untyped_atomic(). */ - const unsigned mask = !has_simd4x2 && !align1 ? WRITEMASK_X : WRITEMASK_XYZW; - brw_send_indirect_surface_message(p, sfid, brw_writemask(brw_null_reg(), mask), + brw_send_indirect_surface_message(p, sfid, brw_null_reg(), payload, surface, desc); } @@ -3366,87 +2609,6 @@ brw_memory_fence(struct brw_codegen *p, brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti); } -void -brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, bool last) -{ - const struct intel_device_info *devinfo = p->devinfo; - const unsigned exec_size = 1 << brw_get_default_exec_size(p); - const unsigned qtr_control = brw_get_default_group(p) / 8; - brw_inst *inst; - - assert(devinfo->ver == 7); - - brw_push_insn_state(p); - - /* The flag register is only used on Gfx7 in align1 mode, so avoid setting - * unnecessary bits in the instruction words, get the information we need - * and reset the default flag register. This allows more instructions to be - * compacted. - */ - const unsigned flag_subreg = p->current->flag_subreg; - brw_set_default_flag_reg(p, 0, 0); - - if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - - const struct brw_reg flag = brw_flag_subreg(flag_subreg); - - brw_set_default_exec_size(p, BRW_EXECUTE_1); - brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); - - /* Run enough instructions returning zero with execution masking and - * a conditional modifier enabled in order to get the full execution - * mask in f1.0. We could use a single 32-wide move here if it - * weren't because of the hardware bug that causes channel enables to - * be applied incorrectly to the second half of 32-wide instructions - * on Gfx7. - */ - const unsigned lower_size = MIN2(16, exec_size); - for (unsigned i = 0; i < exec_size / lower_size; i++) { - inst = brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), - brw_imm_uw(0)); - brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE); - brw_inst_set_group(devinfo, inst, lower_size * i + 8 * qtr_control); - brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z); - brw_inst_set_exec_size(devinfo, inst, cvt(lower_size) - 1); - brw_inst_set_flag_reg_nr(devinfo, inst, flag_subreg / 2); - brw_inst_set_flag_subreg_nr(devinfo, inst, flag_subreg % 2); - } - - /* Find the first bit set in the exec_size-wide portion of the flag - * register that was updated by the last sequence of MOV - * instructions. - */ - const enum brw_reg_type type = brw_int_type(exec_size / 8, false); - brw_set_default_exec_size(p, BRW_EXECUTE_1); - if (!last) { - inst = brw_FBL(p, vec1(dst), byte_offset(retype(flag, type), qtr_control)); - } else { - inst = brw_LZD(p, vec1(dst), byte_offset(retype(flag, type), qtr_control)); - struct brw_reg neg = vec1(dst); - neg.negate = true; - inst = brw_ADD(p, vec1(dst), neg, brw_imm_uw(31)); - } - } else { - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - - /* Overwrite the destination without and with execution masking to - * find out which of the channels is active. - */ - brw_push_insn_state(p); - brw_set_default_exec_size(p, BRW_EXECUTE_4); - brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X), - brw_imm_ud(1)); - - inst = brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X), - brw_imm_ud(0)); - brw_pop_insn_state(p); - brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE); - } - - brw_pop_insn_state(p); -} - void brw_broadcast(struct brw_codegen *p, struct brw_reg dst, @@ -3603,8 +2765,6 @@ brw_barrier(struct brw_codegen *p, struct brw_reg src) const struct intel_device_info *devinfo = p->devinfo; struct brw_inst *inst; - assert(devinfo->ver >= 7); - brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); inst = next_insn(p, BRW_OPCODE_SEND); diff --git a/src/intel/compiler/brw_eu_util.c b/src/intel/compiler/brw_eu_util.c index 9fc8ff9c7c7..d680490aa43 100644 --- a/src/intel/compiler/brw_eu_util.c +++ b/src/intel/compiler/brw_eu_util.c @@ -34,20 +34,6 @@ #include "brw_eu.h" -void brw_math_invert( struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg src) -{ - gfx4_math(p, - dst, - BRW_MATH_FUNCTION_INV, - 0, - src, - BRW_MATH_PRECISION_FULL); -} - - - void brw_copy4(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src, diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index bb5784ab0c4..79dad3b45a0 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1264,12 +1264,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, assert(src[0].type == dst.type); brw_ROR(p, dst, src[0], src[1]); break; - case BRW_OPCODE_F32TO16: - brw_F32TO16(p, dst, src[0]); - break; - case BRW_OPCODE_F16TO32: - brw_F16TO32(p, dst, src[0]); - break; case BRW_OPCODE_CMP: brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); break; @@ -1513,12 +1507,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, break; case SHADER_OPCODE_FIND_LIVE_CHANNEL: - brw_find_live_channel(p, dst, false); - break; case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL: - brw_find_live_channel(p, dst, true); - break; - case SHADER_OPCODE_LOAD_LIVE_CHANNELS: unreachable("Should be lowered by lower_find_live_channel()"); break;