brw: Fix encoding of 3-src dst in Xe2+

Use FD20 macro that will account for the implicit LSB zero value and is
already used for sources.  For the new macro we need to use the entire
bit-range of the field (55-51), so remove the adjustments we used to
do prior to encoding and decoding.

Fixes assertion in vkpeak (https://github.com/nihui/vkpeak) when running
bf16 tests on BMG.  And the code now will correctly apply the subreg_nr
to the destination, e.g. a mad(32) gets splitted into two pieces, the
generation would not fill out the upper-part of the register

```
 mad(16)         g13<1>BF        g10<8,8,1>BF    g12<8,8,1>BF    g56<1,1,1>F { align1 1H A@5 };
-mad(16)         g13<1>BF        g10.16<8,8,1>BF g12.16<8,8,1>BF g57<1,1,1>F { align1 2H A@5 };
+mad(16)         g13.16<1>BF     g10.16<8,8,1>BF g12.16<8,8,1>BF g57<1,1,1>F { align1 2H A@5 };
```

Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37236>
This commit is contained in:
Caio Oliveira
2025-09-07 11:19:40 -07:00
committed by Marge Bot
parent f75e886bf6
commit f65fbb23e2
3 changed files with 3 additions and 3 deletions
+1 -1
View File
@@ -604,7 +604,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
brw_eu_inst_set_3src_a1_dst_reg_file(devinfo, inst, phys_file(dest));
brw_eu_inst_set_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
brw_eu_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest) / 8);
brw_eu_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
brw_eu_inst_set_3src_a1_dst_hstride(devinfo, inst,
to_3src_align1_dst_hstride(dest.hstride));
+1 -1
View File
@@ -508,7 +508,7 @@ FC(3src_a1_src0_hstride, /* 9+ */ 70, 69, /* 12+ */ 65, 64, devinfo->ver
FDC(3src_a1_src0_vstride, /* 9+ */ 68, 67, /* 12+ */ 43, 43, 35, 35, devinfo->ver >= 10)
FC(3src_a1_src0_hw_type, /* 9+ */ 66, 64, /* 12+ */ 42, 40, devinfo->ver >= 10)
/* dst_reg_nr same in align16 */
FC(3src_a1_dst_subreg_nr, /* 9+ */ 55, 54, /* 12+ */ 55, 54, devinfo->ver >= 10)
FD20(3src_a1_dst_subreg_nr, /* 9+ */ 55, 51, /* 12+ */ 55, 51, /* 20+ */ 55, 51, -1)
FC(3src_a1_special_acc, /* 9+ */ 55, 52, /* 12+ */ 54, 51, devinfo->ver >= 10) /* aliases dst_subreg_nr */
/* Reserved 51:50 */
FC(3src_a1_dst_hstride, /* 9+ */ 49, 49, /* 12+ */ 48, 48, devinfo->ver >= 10)
+1 -1
View File
@@ -2731,7 +2731,7 @@ brw_hw_decode_inst(const struct brw_isa_info *isa,
inst->dst.file = brw_eu_inst_3src_a1_dst_reg_file(devinfo, raw);
inst->dst.type = brw_eu_inst_3src_a1_dst_type(devinfo, raw);
inst->dst.nr = brw_eu_inst_3src_dst_reg_nr(devinfo, raw);
inst->dst.subnr = brw_eu_inst_3src_a1_dst_subreg_nr(devinfo, raw) * 8;
inst->dst.subnr = brw_eu_inst_3src_a1_dst_subreg_nr(devinfo, raw);
inst->dst.hstride = DST_STRIDE_3SRC(brw_eu_inst_3src_a1_dst_hstride(devinfo, raw));
inst->src[0].file = brw_eu_inst_3src_a1_src0_reg_file(devinfo, raw);