From c194df565a4331a76871d3c7dc603687b8003f38 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 21 Feb 2024 02:00:27 -0800 Subject: [PATCH] intel/brw: Don't include unnecessary undefined values in texture results When emitting a sampler message, we allocate a temporary destination large enough to hold 4 values (or 5 for sparse). This is the maximum size needed to hold any result. However, we shrink the size written by the sampler message to skip writing any trailing components that NIR tells us are never read. So we may not write the entire temporary. The NIR texture instruction has a destination VGRF which is sized assuming that all components are present. We issue a LOAD_PAYLOAD instruction to copy our sampler result temporary to the NIR destination. When we reduce the response length of the sampler messages, then some of these temporary components have undefined values. The correct way to indicate that is by using a BAD_FILE source. Unfortunately, we were naively reading offsets of the temporary that were never written, but are still part of a larger VGRF. This complicates things. For example, sampling and only using RGB (not RGBA) was producing this: txl_logical(8) (written: 3) vgrf3+0.0:F, ... undef(8) (written: 4) vgrf4:UD load_payload(8) (written: 4) vgrf4:F, vgrf3+0.0:F, vgrf3+1.0:F, vgrf3+2.0:F, vgrf3+3.0:F The last source, vgrf3+3.0:F, is undefined, and should be BAD_FILE. Doing so allows VGRF splitting and other optimizations to work better. Reviewed-by: Ian Romanick Part-of: --- src/intel/compiler/brw_fs_nir.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index bec0b261947..2f0438b41af 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -8205,16 +8205,19 @@ fs_nir_emit_texture(nir_to_brw_state &ntb, inst->has_packed_lod_ai_src = pack_lod_and_array_index; const unsigned dest_size = nir_tex_instr_dest_size(instr); + unsigned read_size = dest_size; if (instr->op != nir_texop_tg4 && instr->op != nir_texop_query_levels) { unsigned write_mask = nir_def_components_read(&instr->def); assert(write_mask != 0); /* dead code should have been eliminated */ if (instr->is_sparse) { - inst->size_written = (util_last_bit(write_mask) - 1) * - inst->dst.component_size(inst->exec_size) + - (reg_unit(devinfo) * REG_SIZE); + read_size = util_last_bit(write_mask) - 1; + inst->size_written = + read_size * inst->dst.component_size(inst->exec_size) + + (reg_unit(devinfo) * REG_SIZE); } else { - inst->size_written = util_last_bit(write_mask) * - inst->dst.component_size(inst->exec_size); + read_size = util_last_bit(write_mask); + inst->size_written = + read_size * inst->dst.component_size(inst->exec_size); } } else { inst->size_written = 4 * inst->dst.component_size(inst->exec_size) + @@ -8241,7 +8244,7 @@ fs_nir_emit_texture(nir_to_brw_state &ntb, } fs_reg nir_dest[5]; - for (unsigned i = 0; i < dest_size; i++) + for (unsigned i = 0; i < read_size; i++) nir_dest[i] = offset(dst, bld, i); if (instr->op == nir_texop_query_levels) {