brw: handling dynamic programmable offsets pre-Xe2
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37929>
This commit is contained in:
committed by
Marge Bot
parent
d37c6ff4ed
commit
c5d313a2a8
@@ -585,6 +585,8 @@ enum tex_logical_srcs {
|
||||
TEX_LOGICAL_SRC_SURFACE,
|
||||
/** Texture sampler index */
|
||||
TEX_LOGICAL_SRC_SAMPLER,
|
||||
/** Packed offsets */
|
||||
TEX_LOGICAL_SRC_PACKED_OFFSETS,
|
||||
/** Sampler payloads */
|
||||
TEX_LOGICAL_SRC_PAYLOAD0,
|
||||
TEX_LOGICAL_SRC_PAYLOAD1,
|
||||
|
||||
@@ -5922,7 +5922,8 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||
|
||||
brw_reg tmp = ubld.vgrf(BRW_TYPE_UD, 4);
|
||||
brw_tex_inst *inst = ubld.emit(SHADER_OPCODE_SAMPLER,
|
||||
tmp, srcs, 3)->as_tex();
|
||||
tmp, srcs,
|
||||
TEX_LOGICAL_SRC_PAYLOAD0 + 1)->as_tex();
|
||||
inst->required_params = 0x1 /* LOD */;
|
||||
inst->sampler_opcode = BRW_SAMPLER_OPCODE_RESINFO;
|
||||
inst->surface_bindless = instr->intrinsic == nir_intrinsic_bindless_image_size;
|
||||
@@ -6447,7 +6448,8 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||
|
||||
brw_reg tmp = ubld.vgrf(BRW_TYPE_UD, 4);
|
||||
brw_tex_inst *inst = ubld.emit(SHADER_OPCODE_SAMPLER,
|
||||
tmp, srcs, 3)->as_tex();
|
||||
tmp, srcs,
|
||||
TEX_LOGICAL_SRC_PAYLOAD0 + 1)->as_tex();
|
||||
inst->required_params = 0x1 /* LOD */;
|
||||
inst->sampler_opcode = BRW_SAMPLER_OPCODE_RESINFO;
|
||||
inst->surface_bindless = get_nir_src_bindless(ntb, instr->src[0]);
|
||||
@@ -7511,6 +7513,12 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
|
||||
required_params |= BITFIELD_BIT(i);
|
||||
}
|
||||
|
||||
int packed_offset_idx = nir_tex_instr_src_index(instr, nir_tex_src_backend2);
|
||||
if (packed_offset_idx >= 0) {
|
||||
srcs[TEX_LOGICAL_SRC_PACKED_OFFSETS] = bld.emit_uniformize(
|
||||
get_nir_src(ntb, instr->src[packed_offset_idx].src, 0));
|
||||
}
|
||||
|
||||
brw_reg nir_def_reg = get_nir_def(ntb, instr->def);
|
||||
|
||||
const unsigned dest_size = nir_tex_instr_dest_size(instr);
|
||||
|
||||
@@ -755,6 +755,11 @@ lower_sampler_logical_send(const brw_builder &bld, brw_tex_inst *tex)
|
||||
const bool sampler_bindless = tex->sampler_bindless;
|
||||
const brw_reg surface = tex->src[TEX_LOGICAL_SRC_SURFACE];
|
||||
const brw_reg sampler = tex->src[TEX_LOGICAL_SRC_SAMPLER];
|
||||
/* Xe2+ should never used packed offsets since it has enough opcodes to
|
||||
* handle any programmable offset.
|
||||
*/
|
||||
const brw_reg packed_offsets = tex->src[TEX_LOGICAL_SRC_PACKED_OFFSETS];
|
||||
assert(packed_offsets.file == BAD_FILE || devinfo->ver < 20);
|
||||
|
||||
const unsigned payload_type_bit_size =
|
||||
get_sampler_msg_payload_type_bit_size(devinfo, tex);
|
||||
@@ -771,6 +776,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_tex_inst *tex)
|
||||
const bool needs_header =
|
||||
sampler_op_needs_header(op, devinfo) ||
|
||||
tex->has_const_offsets ||
|
||||
packed_offsets.file != BAD_FILE ||
|
||||
sampler_bindless || is_high_sampler(devinfo, sampler) ||
|
||||
tex->residency;
|
||||
|
||||
@@ -829,7 +835,11 @@ lower_sampler_logical_send(const brw_builder &bld, brw_tex_inst *tex)
|
||||
else
|
||||
ubld.MOV(header, retype(brw_vec8_grf(0, 0), BRW_TYPE_UD));
|
||||
|
||||
if (g0_2) {
|
||||
if (packed_offsets.file != BAD_FILE) {
|
||||
ubld1.OR(retype(component(header, 2), BRW_TYPE_UD),
|
||||
retype(component(packed_offsets, 0), BRW_TYPE_UD),
|
||||
brw_imm_ud(g0_2));
|
||||
} else if (g0_2) {
|
||||
ubld1.MOV(component(header, 2), brw_imm_ud(g0_2));
|
||||
} else if (devinfo->ver < 11 &&
|
||||
bld.shader->stage != MESA_SHADER_VERTEX &&
|
||||
|
||||
@@ -2080,10 +2080,15 @@ flag_fused_eu_disable_instr(nir_builder *b, nir_instr *instr, void *data)
|
||||
for (unsigned i = 0; i < tex->num_srcs; ++i) {
|
||||
nir_tex_src_type src_type = tex->src[i].src_type;
|
||||
|
||||
/* backend2 is the packed dynamically programmable offset, goes into
|
||||
* the sampler message header, so it needs to be considered for EU
|
||||
* fusion.
|
||||
*/
|
||||
if (src_type != nir_tex_src_texture_handle &&
|
||||
src_type != nir_tex_src_sampler_handle &&
|
||||
src_type != nir_tex_src_texture_offset &&
|
||||
src_type != nir_tex_src_sampler_offset)
|
||||
src_type != nir_tex_src_sampler_offset &&
|
||||
src_type != nir_tex_src_backend2)
|
||||
continue;
|
||||
|
||||
if (nir_src_is_divergent(&tex->src[i].src)) {
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "compiler/nir/nir_builtin_builder.h"
|
||||
#include "compiler/nir/nir_format_convert.h"
|
||||
#include "brw_nir.h"
|
||||
#include "brw_sampler.h"
|
||||
|
||||
@@ -314,58 +315,105 @@ pack_offset(nir_builder *b, nir_tex_instr *tex,
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Sampler header offset format described in SKL PRMs Volume 7:
|
||||
* 3D-Media-GPGPU, Sampler, Message Header.
|
||||
*/
|
||||
static bool
|
||||
pack_header_offset(nir_builder *b, nir_tex_instr *tex)
|
||||
{
|
||||
nir_def *_offset = nir_steal_tex_src(tex, nir_tex_src_offset);
|
||||
if (!_offset)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(&tex->instr);
|
||||
|
||||
static const unsigned bits4[] = { 4, 4, 4, };
|
||||
nir_def *offset = nir_iand_imm(b, nir_format_clamp_sint(b, _offset, bits4), 0xf);
|
||||
|
||||
nir_def *offuvr = nir_ishl_imm(b, nir_channel(b, offset, 0), 8);
|
||||
for (unsigned i = 1; i < MIN2(offset->num_components, 3); i++) {
|
||||
nir_def *chan = nir_channel(b, offset, i);
|
||||
offuvr = nir_ior(b, offuvr, nir_ishl_imm(b, chan, 8 - (4 * i)));
|
||||
}
|
||||
|
||||
nir_tex_instr_add_src(tex, nir_tex_src_backend2, offuvr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_nir_lower_texture_instr(nir_builder *b, nir_tex_instr *tex, void *cb_data)
|
||||
{
|
||||
enum brw_sampler_opcode sampler_opcode = tex->backend_flags;
|
||||
bool progress = false;
|
||||
|
||||
if (brw_sampler_opcode_param_index(sampler_opcode,
|
||||
BRW_SAMPLER_PAYLOAD_PARAM_LOD_AI) != -1 ||
|
||||
brw_sampler_opcode_param_index(sampler_opcode,
|
||||
BRW_SAMPLER_PAYLOAD_PARAM_BIAS_AI) != -1)
|
||||
return pack_lod_and_array_index(b, tex);
|
||||
const struct brw_sampler_payload_desc *payload_desc =
|
||||
brw_get_sampler_payload_desc(sampler_opcode);
|
||||
bool has_offset_param = false;
|
||||
|
||||
if (brw_sampler_opcode_param_index(sampler_opcode,
|
||||
BRW_SAMPLER_PAYLOAD_PARAM_BIAS_OFFUV6) != -1 ||
|
||||
brw_sampler_opcode_param_index(sampler_opcode,
|
||||
BRW_SAMPLER_PAYLOAD_PARAM_LOD_OFFUV6) != -1)
|
||||
return pack_lod_or_bias_and_offset(b, tex, 6, 2);
|
||||
for (uint32_t i = 0; payload_desc->sources[i].param != BRW_SAMPLER_PAYLOAD_PARAM_INVALID; i++) {
|
||||
#define PARAM_CASE(name) case BRW_SAMPLER_PAYLOAD_PARAM_##name
|
||||
switch (payload_desc->sources[i].param) {
|
||||
PARAM_CASE(LOD_AI):
|
||||
PARAM_CASE(BIAS_AI):
|
||||
progress |= pack_lod_and_array_index(b, tex);
|
||||
break;
|
||||
PARAM_CASE(BIAS_OFFUV6):
|
||||
PARAM_CASE(LOD_OFFUV6):
|
||||
progress |= pack_lod_or_bias_and_offset(b, tex, 6, 2);
|
||||
has_offset_param = true;
|
||||
break;
|
||||
PARAM_CASE(BIAS_OFFUVR4):
|
||||
PARAM_CASE(LOD_OFFUVR4):
|
||||
progress |= pack_lod_or_bias_and_offset(b, tex, 4, 3);
|
||||
has_offset_param = true;
|
||||
break;
|
||||
PARAM_CASE(OFFUV4_R):
|
||||
progress |= pack_offset_r(b, tex, 4, 2);
|
||||
has_offset_param = true;
|
||||
break;
|
||||
PARAM_CASE(OFFUVR4_R):
|
||||
progress |= pack_offset_r(b, tex, 4, 3);
|
||||
has_offset_param = true;
|
||||
break;
|
||||
PARAM_CASE(OFFUV6_R):
|
||||
progress |= pack_offset_r(b, tex, 6, 2);
|
||||
has_offset_param = true;
|
||||
break;
|
||||
PARAM_CASE(OFFUV4):
|
||||
progress |= pack_offset(b, tex, 4, 2);
|
||||
has_offset_param = true;
|
||||
break;
|
||||
PARAM_CASE(OFFUVR4):
|
||||
progress |= pack_offset(b, tex, 4, 3);
|
||||
has_offset_param = true;
|
||||
break;
|
||||
PARAM_CASE(OFFUV6):
|
||||
progress |= pack_offset(b, tex, 6, 2);
|
||||
has_offset_param = true;
|
||||
break;
|
||||
PARAM_CASE(OFFUVR6):
|
||||
progress |= pack_offset(b, tex, 6, 3);
|
||||
has_offset_param = true;
|
||||
break;
|
||||
PARAM_CASE(OFFU):
|
||||
PARAM_CASE(OFFV):
|
||||
has_offset_param = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#undef PARAM_CASE
|
||||
}
|
||||
|
||||
if (brw_sampler_opcode_param_index(sampler_opcode,
|
||||
BRW_SAMPLER_PAYLOAD_PARAM_BIAS_OFFUVR4) != -1 ||
|
||||
brw_sampler_opcode_param_index(sampler_opcode,
|
||||
BRW_SAMPLER_PAYLOAD_PARAM_LOD_OFFUVR4) != -1)
|
||||
return pack_lod_or_bias_and_offset(b, tex, 4, 3);
|
||||
/* Handle pre-Xe2 dynamic programmable offsets */
|
||||
int offset_idx;
|
||||
if (!has_offset_param &&
|
||||
(offset_idx = nir_tex_instr_src_index(tex, nir_tex_src_offset)) >= 0 &&
|
||||
!brw_nir_tex_offset_in_constant_range(tex, offset_idx))
|
||||
progress |= pack_header_offset(b, tex);
|
||||
|
||||
if (brw_sampler_opcode_param_index(sampler_opcode,
|
||||
BRW_SAMPLER_PAYLOAD_PARAM_OFFUV4_R) != -1)
|
||||
return pack_offset_r(b, tex, 4, 2);
|
||||
|
||||
if (brw_sampler_opcode_param_index(sampler_opcode,
|
||||
BRW_SAMPLER_PAYLOAD_PARAM_OFFUVR4_R) != -1)
|
||||
return pack_offset_r(b, tex, 4, 3);
|
||||
|
||||
if (brw_sampler_opcode_param_index(sampler_opcode,
|
||||
BRW_SAMPLER_PAYLOAD_PARAM_OFFUV6_R) != -1)
|
||||
return pack_offset_r(b, tex, 6, 2);
|
||||
|
||||
if (brw_sampler_opcode_param_index(sampler_opcode,
|
||||
BRW_SAMPLER_PAYLOAD_PARAM_OFFUV4) != -1)
|
||||
return pack_offset(b, tex, 4, 2);
|
||||
|
||||
if (brw_sampler_opcode_param_index(sampler_opcode,
|
||||
BRW_SAMPLER_PAYLOAD_PARAM_OFFUVR4) != -1)
|
||||
return pack_offset(b, tex, 4, 3);
|
||||
|
||||
if (brw_sampler_opcode_param_index(sampler_opcode,
|
||||
BRW_SAMPLER_PAYLOAD_PARAM_OFFUV6) != -1)
|
||||
return pack_offset(b, tex, 6, 2);
|
||||
|
||||
if (brw_sampler_opcode_param_index(sampler_opcode,
|
||||
BRW_SAMPLER_PAYLOAD_PARAM_OFFUVR6) != -1)
|
||||
return pack_offset(b, tex, 6, 3);
|
||||
|
||||
return false;
|
||||
return progress;
|
||||
}
|
||||
|
||||
bool
|
||||
|
||||
@@ -466,6 +466,9 @@ brw_print_instruction(const brw_shader &s, const brw_inst *inst, FILE *file, con
|
||||
case TEX_LOGICAL_SRC_SAMPLER:
|
||||
fprintf(file, "smpl: ");
|
||||
break;
|
||||
case TEX_LOGICAL_SRC_PACKED_OFFSETS:
|
||||
fprintf(file, "pk_offs: ");
|
||||
break;
|
||||
default:
|
||||
fprintf(file, "%s: ",
|
||||
brw_sampler_payload_param_name(
|
||||
|
||||
@@ -850,39 +850,76 @@ brw_get_sampler_opcode_from_tex(const struct intel_device_info *devinfo,
|
||||
#define SKIP_IF(name, cond) { if (cond) { continue; } }
|
||||
#endif
|
||||
|
||||
enum brw_sampler_opcode opcode_index = BRW_SAMPLER_OPCODE_MAX;
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(sampler_opcode_descs); i++) {
|
||||
SKIP_IF("generation requirement not met",
|
||||
opcode_filters[i] != NULL && !opcode_filters[i](tex, devinfo));
|
||||
/* The sampler payloads described in this file are contiguous sets of
|
||||
* vector registers in the register file (Xe3+ can avoiding making this
|
||||
* contiguous) handed over to the sampler as input for a texture operation.
|
||||
* The format of the payloads are described above in sampler_opcode_descs[]
|
||||
* for each of the sampler opcode. Each payload element lives in a vector
|
||||
* register (or pair of vector register if the message is SIMD16/SIMD32,
|
||||
* depending on pre/post Xe2). And each lane of the shader subgroup
|
||||
* occupies a slot in each of the vector registers.
|
||||
*
|
||||
* Preceding the payload we can optionally add a header (a single vector
|
||||
* register) which does not hold per lane data, but instead data that is
|
||||
* common to all the lanes. This includes the sampler handle to use,
|
||||
* potential texture offsets (again the same for all the lanes), component
|
||||
* masking, sparse residency request, etc...
|
||||
*
|
||||
* Some opcodes allow for a per lane offsets, others don't. When we can't
|
||||
* use a per lane offset, we have to nir_lower_non_uniform_access texture
|
||||
* offsets like we do for sampler/texture handles and iterate through each
|
||||
* lane with the offset put into the sampler message header.
|
||||
*
|
||||
* We also have to consider that register space usage of per lane offsets.
|
||||
* In SIMD8 that's a single GRF per component, but on SIMD16 this is 2 GRFs
|
||||
* per component. So when the offset is constant or uniform across all
|
||||
* lanes, we want to put it in the header, since that will be combined with
|
||||
* other fields, reducing register usage.
|
||||
*
|
||||
* On Xe2+ platforms we can always find a sampler opcode that will
|
||||
* accomodate non constant offsets (Xe2 gained enough HW support). With the
|
||||
* opcodes ordered with per lane offsets at the bottom of the list we can
|
||||
* find the best matching opcode with one traversal.
|
||||
*
|
||||
* On pre-Xe2 platforms, we iterate through the opcodes twice, the first
|
||||
* iteration only considering the non constant offsets and the opcodes that
|
||||
* would accomodate them. The second iteration considering all the opcodes,
|
||||
* assuming the texture instructions were properly lowered with
|
||||
* nir_lower_non_uniform_access.
|
||||
*/
|
||||
const uint32_t n_iterations = devinfo->ver < 20 ? 2 : 1;
|
||||
for (uint32_t iteration = 0; iteration < n_iterations; iteration++) {
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(sampler_opcode_descs); i++) {
|
||||
SKIP_IF("generation requirement not met",
|
||||
opcode_filters[i] != NULL && !opcode_filters[i](tex, devinfo));
|
||||
|
||||
SKIP_IF("non constant offsets",
|
||||
offset_non_constant_or_non_header_range &&
|
||||
!sampler_opcode_descs[i].has_offset_payload);
|
||||
SKIP_IF("non constant offsets",
|
||||
iteration == 0 &&
|
||||
offset_non_constant_or_non_header_range &&
|
||||
!sampler_opcode_descs[i].has_offset_payload);
|
||||
|
||||
SKIP_IF("not fetch instruction",
|
||||
is_fetch != sampler_opcode_descs[i].is_fetch);
|
||||
SKIP_IF("not fetch instruction",
|
||||
is_fetch != sampler_opcode_descs[i].is_fetch);
|
||||
|
||||
SKIP_IF("not gather instruction",
|
||||
is_gather != sampler_opcode_descs[i].is_gather);
|
||||
SKIP_IF("not gather instruction",
|
||||
is_gather != sampler_opcode_descs[i].is_gather);
|
||||
|
||||
SKIP_IF("not gather implicit lod",
|
||||
tex->is_gather_implicit_lod !=
|
||||
sampler_opcode_descs[i].is_gather_implicit_lod);
|
||||
SKIP_IF("not gather implicit lod",
|
||||
tex->is_gather_implicit_lod !=
|
||||
sampler_opcode_descs[i].is_gather_implicit_lod);
|
||||
|
||||
SKIP_IF("non lod zero",
|
||||
!lod_zero && sampler_opcode_descs[i].lod_zero);
|
||||
SKIP_IF("non lod zero",
|
||||
!lod_zero && sampler_opcode_descs[i].lod_zero);
|
||||
|
||||
SKIP_IF("non matching sources",
|
||||
(sampler_opcode_descs[i].nir_src_mask & src_mask) != src_mask);
|
||||
SKIP_IF("non matching sources",
|
||||
(sampler_opcode_descs[i].nir_src_mask & src_mask) != src_mask);
|
||||
|
||||
opcode_index = i;
|
||||
#if DEBUG_SAMPLER_SELECTION
|
||||
fprintf(stderr, "selected %s\n", brw_sampler_opcode_name(opcode_index));
|
||||
fprintf(stderr, "selected %s\n", brw_sampler_opcode_name(i));
|
||||
#endif
|
||||
break;
|
||||
return (enum brw_sampler_opcode) i;
|
||||
}
|
||||
}
|
||||
|
||||
assert(opcode_index < BRW_SAMPLER_OPCODE_MAX);
|
||||
|
||||
return opcode_index;
|
||||
UNREACHABLE("Cannot match tex instruction to HW opcode");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user