pco: initial image write support

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta
2025-01-30 17:22:33 +00:00
committed by Marge Bot
parent 3aacb6731c
commit f54effa024
7 changed files with 56 additions and 172 deletions
+2 -56
View File
@@ -2532,30 +2532,7 @@ field_mappings=[
('f16', 'f16'),
('swap', 'swap'),
('cachemode_ld', 'cachemode_smp_ld'),
('w', 'smp_w', 0),
])
I_SMP_EXTB_W = bit_struct(
name='smp_extb_w',
bit_set=I_BACKEND,
field_mappings=[
('backend_op', 'backend_op', 'dma'),
('fcnorm', 'fcnorm'),
('drc', 'drc'),
('dma_op', 'dma_op', 'smp'),
('extb', 'extb', 1),
('dmn', 'dmn'),
('exta', 'exta', 0),
('chan', 'chan'),
('lodm', 'lodm'),
('rsvd3', 'rsvd3_smp'),
('f16', 'f16'),
('swap', 'swap'),
('cachemode_st', 'cachemode_smp_st'),
('w', 'smp_w', 1),
('w', 'smp_w'),
])
I_SMP_EXTAB = bit_struct(
@@ -2586,38 +2563,7 @@ field_mappings=[
('f16', 'f16'),
('swap', 'swap'),
('cachemode_ld', 'cachemode_smp_ld'),
('w', 'smp_w', 0),
])
I_SMP_EXTAB_W = bit_struct(
name='smp_extab_w',
bit_set=I_BACKEND,
field_mappings=[
('backend_op', 'backend_op', 'dma'),
('fcnorm', 'fcnorm'),
('drc', 'drc'),
('dma_op', 'dma_op', 'smp'),
('extb', 'extb', 1),
('dmn', 'dmn'),
('exta', 'exta', 1),
('chan', 'chan'),
('lodm', 'lodm'),
('pplod', 'pplod'),
('proj', 'proj'),
('sbmode', 'sbmode'),
('nncoords', 'nncoords'),
('sno', 'sno'),
('soo', 'soo'),
('tao', 'tao'),
('rsvd3', 'rsvd3_smp'),
('f16', 'f16'),
('swap', 'swap'),
('cachemode_st', 'cachemode_smp_st'),
('w', 'smp_w', 1),
('w', 'smp_w'),
])
I_ATOMIC = bit_struct(
+5 -2
View File
@@ -1369,7 +1369,8 @@ encode_map(O_SMP,
('tao', OM_TAO),
('f16', OM_F16),
('swap', OM_SCHEDSWAP),
('cachemode_ld', OM_MCU_CACHE_MODE_LD)
('cachemode_ld', OM_MCU_CACHE_MODE_LD),
('w', OM_WRT)
]),
(I_SMP_EXTA, [
('fcnorm', OM_FCNORM),
@@ -1387,7 +1388,8 @@ encode_map(O_SMP,
], [
(OM_F16, '== false'),
(OM_SCHEDSWAP, '== PCO_SCHEDSWAP_NONE'),
(OM_MCU_CACHE_MODE_LD, '== PCO_CACHEMODE_LD_NORMAL')
(OM_MCU_CACHE_MODE_LD, '== PCO_CACHEMODE_LD_NORMAL'),
(OM_WRT, '== false')
]),
(I_SMP_BRIEF, [
('fcnorm', OM_FCNORM),
@@ -1399,6 +1401,7 @@ encode_map(O_SMP,
(OM_F16, '== false'),
(OM_SCHEDSWAP, '== PCO_SCHEDSWAP_NONE'),
(OM_MCU_CACHE_MODE_LD, '== PCO_CACHEMODE_LD_NORMAL'),
(OM_WRT, '== false'),
(OM_PPLOD, '== false'),
(OM_PROJ, '== false'),
(OM_SB_MODE, '== PCO_SB_MODE_NONE'),
+1 -1
View File
@@ -429,7 +429,7 @@ static void pco_nir_opt(pco_ctx *ctx, nir_shader *nir)
};
NIR_PASS(progress, nir, nir_opt_load_store_vectorize, &vectorize_opts);
NIR_PASS(progress, nir, nir_opt_shrink_stores, true);
NIR_PASS(progress, nir, nir_opt_shrink_stores, false);
NIR_PASS(progress, nir, nir_opt_loop);
NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
+37 -111
View File
@@ -755,13 +755,13 @@ static nir_def *lower_image(nir_builder *b, nir_instr *instr, void *cb_data)
{
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
bool hw_array_support = false;
bool hw_int_support = false;
enum glsl_sampler_dim image_dim = nir_intrinsic_image_dim(intr);
bool is_array = nir_intrinsic_image_array(intr);
assert(!is_array);
enum pipe_format format = nir_intrinsic_format(intr);
nir_alu_type dest_type = nir_intrinsic_dest_type(intr);
nir_alu_type type = intr->intrinsic == nir_intrinsic_image_deref_load
? nir_intrinsic_dest_type(intr)
: nir_intrinsic_src_type(intr);
unsigned desc_set = nir_src_comp_as_uint(intr->src[0], 0);
unsigned binding = nir_src_comp_as_uint(intr->src[0], 1);
@@ -770,7 +770,15 @@ static nir_def *lower_image(nir_builder *b, nir_instr *instr, void *cb_data)
nir_def *coords = !nir_src_is_undef(intr->src[1]) ? intr->src[1].ssa : NULL;
nir_def *sample_index = !nir_src_is_undef(intr->src[2]) ? intr->src[2].ssa
: NULL;
nir_def *lod = !nir_src_is_undef(intr->src[3]) ? intr->src[3].ssa : NULL;
nir_def *write_data = intr->intrinsic == nir_intrinsic_image_deref_store
? intr->src[3].ssa
: NULL;
if (write_data) {
assert(intr->num_components == 4);
assert(write_data->num_components == 4);
}
ASSERTED bool msaa = image_dim == GLSL_SAMPLER_DIM_MS ||
image_dim == GLSL_SAMPLER_DIM_SUBPASS_MS;
@@ -788,9 +796,7 @@ static nir_def *lower_image(nir_builder *b, nir_instr *instr, void *cb_data)
.desc_set = PCO_POINT_SAMPLER,
.binding = PCO_POINT_SAMPLER);
unsigned num_coord_comps =
glsl_get_sampler_dim_coordinate_components(image_dim) + !!is_array;
unsigned num_coord_comps = nir_image_intrinsic_coord_components(intr);
if (coords)
coords = nir_trim_vector(b, coords, num_coord_comps);
@@ -807,113 +813,32 @@ static nir_def *lower_image(nir_builder *b, nir_instr *instr, void *cb_data)
&float_array_index,
&int_array_index);
nir_def *smp_data_comps[NIR_MAX_VEC_COMPONENTS];
unsigned smp_data_comp_count = 0;
pco_smp_flags smp_flags = {
.dim = to_pco_dim(image_dim),
.lod_mode = PCO_LOD_MODE_NORMAL,
pco_smp_params params = {
.tex_state = tex_state,
.smp_state = smp_state,
.dest_type = type,
.sampler_dim = image_dim,
.nncoords = true,
.coords = float_coords,
.ms_index = sample_index,
.write_data = write_data,
.sample_components = intr->intrinsic == nir_intrinsic_image_deref_load
? intr->def.num_components
: 0,
};
for (unsigned c = 0; c < num_coord_comps; ++c) {
smp_data_comps[smp_data_comp_count++] =
nir_channel(b, hw_int_support ? int_coords : float_coords, c);
}
nir_intrinsic_instr *smp = pco_emit_nir_smp(b, &params);
if (hw_array_support && int_array_index) {
smp_data_comps[smp_data_comp_count++] =
hw_int_support ? int_array_index : float_array_index;
if (intr->intrinsic == nir_intrinsic_image_deref_load)
return &smp->def;
smp_flags.array = true;
}
bool lod_set = false;
if (lod) {
lod = nir_i2f32(b, lod);
smp_data_comps[smp_data_comp_count++] = lod;
smp_flags.pplod = true;
smp_flags.lod_mode = PCO_LOD_MODE_REPLACE;
lod_set = true;
}
if (!hw_array_support && int_array_index) {
/* Set a per-pixel lod bias of 0 if none has been set yet. */
if (!lod_set) {
smp_data_comps[smp_data_comp_count++] = nir_imm_int(b, 0);
smp_flags.pplod = true;
smp_flags.lod_mode = PCO_LOD_MODE_BIAS;
lod_set = true;
}
nir_def *tex_state_word[] = {
[0] = nir_channel(b, tex_state, 0),
[1] = nir_channel(b, tex_state, 1),
[2] = nir_channel(b, tex_state, 2),
[3] = nir_channel(b, tex_state, 3),
};
nir_def *base_addr_lo;
nir_def *base_addr_hi;
unpack_base_addr(b, tex_state_word, &base_addr_lo, &base_addr_hi);
nir_def *array_index = int_array_index;
assert(array_index);
nir_def *array_size = STATE_UNPACK_ADD(b, tex_state_word, 2, 4, 11, 1);
array_index = nir_uclamp(b, array_index, nir_imm_int(b, 0), array_size);
nir_def *tex_meta = nir_load_tex_meta_pco(b,
PCO_IMAGE_META_COUNT,
elem,
.desc_set = desc_set,
.binding = binding);
nir_def *array_stride =
nir_channel(b, tex_meta, PCO_IMAGE_META_LAYER_SIZE);
nir_def *array_offset = nir_imul(b, array_index, array_stride);
nir_def *addr =
nir_uadd64_2x32_lo(b, base_addr_lo, base_addr_hi, array_offset);
smp_data_comps[smp_data_comp_count++] = nir_channel(b, addr, 0);
smp_data_comps[smp_data_comp_count++] = nir_channel(b, addr, 1);
smp_flags.tao = true;
}
if (sample_index) {
nir_def *lookup = nir_bitfield_insert(b,
nir_imm_int(b, 0),
sample_index,
nir_imm_int(b, 16),
nir_imm_int(b, 3));
smp_data_comps[smp_data_comp_count++] = lookup;
smp_flags.sno = true;
}
/* Pad out the rest of the data words. */
assert(smp_data_comp_count <= NIR_MAX_VEC_COMPONENTS);
for (unsigned c = smp_data_comp_count; c < ARRAY_SIZE(smp_data_comps); ++c)
smp_data_comps[c] = nir_imm_int(b, 0);
nir_def *smp_data = nir_vec(b, smp_data_comps, ARRAY_SIZE(smp_data_comps));
smp_flags.nncoords = true;
smp_flags.integer = hw_int_support;
smp_flags.fcnorm = nir_alu_type_get_base_type(dest_type) == nir_type_float;
return nir_smp_pco(b,
intr->def.num_components,
smp_data,
tex_state,
smp_state,
.smp_flags_pco = smp_flags._,
.range = smp_data_comp_count);
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
}
static bool is_image(const nir_instr *instr, UNUSED const void *cb_data)
@@ -924,6 +849,7 @@ static bool is_image(const nir_instr *instr, UNUSED const void *cb_data)
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_deref_store:
return true;
default:
+2
View File
@@ -170,6 +170,7 @@ static nir_def *lower_vk(nir_builder *b, nir_instr *instr, void *cb_data)
return lower_load_vulkan_descriptor(b, intr, common);
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_deref_store:
return lower_image_derefs(b, intr, common);
default:
@@ -207,6 +208,7 @@ static bool is_vk(const nir_instr *instr, UNUSED const void *cb_data)
switch (intr->intrinsic) {
case nir_intrinsic_load_vulkan_descriptor:
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_deref_store:
return true;
default:
+2 -1
View File
@@ -189,6 +189,7 @@ OM_PPLOD = op_mod('pplod', BaseType.bool)
OM_TAO = op_mod('tao', BaseType.bool)
OM_SOO = op_mod('soo', BaseType.bool)
OM_SNO = op_mod('sno', BaseType.bool)
OM_WRT = op_mod('wrt', BaseType.bool)
OM_SB_MODE = op_mod_enum('sb_mode', [
('none', ''),
('rawdata', 'rawdata'),
@@ -384,7 +385,7 @@ O_ATOMIC = hw_op('atomic', [OM_OLCHK, OM_EXEC_CND, OM_END, OM_ATOM_OP], 1, 2)
O_SMP = hw_op('smp', OM_ALU_RPT1 + [OM_DIM, OM_PROJ, OM_FCNORM, OM_NNCOORDS,
OM_LOD_MODE, OM_PPLOD, OM_TAO, OM_SOO,
OM_SNO, OM_SB_MODE, OM_MCU_CACHE_MODE_LD,
OM_SNO, OM_WRT, OM_SB_MODE, OM_MCU_CACHE_MODE_LD,
OM_ARRAY, OM_INTEGER, OM_SCHEDSWAP, OM_F16], 1, 6)
O_ALPHATST = hw_op('alphatst', OM_ALU_RPT1, 1, 4)
+7 -1
View File
@@ -1010,6 +1010,10 @@ static pco_instr *lower_smp(trans_ctx *tctx,
/* Destination and chans should be correct. */
break;
case nir_intrinsic_smp_write_pco:
chans = 4;
break;
default:
UNREACHABLE("");
}
@@ -1035,7 +1039,8 @@ static pco_instr *lower_smp(trans_ctx *tctx,
.sno = smp_flags.sno,
.sb_mode = sb_mode,
.array = smp_flags.array,
.integer = smp_flags.integer);
.integer = smp_flags.integer,
.wrt = smp_flags.wrt);
return smp;
}
@@ -1287,6 +1292,7 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
case nir_intrinsic_smp_coeffs_pco:
case nir_intrinsic_smp_pco:
case nir_intrinsic_smp_write_pco:
instr = lower_smp(tctx, intr, &dest, src[0], src[1], src[2]);
break;