pvr, pco: simple end-of-tile/render nir shader gen

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta
2025-02-06 13:35:26 +00:00
committed by Marge Bot
parent 2ed5aa49a5
commit 0ff8f57392
14 changed files with 136 additions and 38 deletions
+4
View File
@@ -2731,3 +2731,7 @@ intrinsic("isp_feedback_pco", src_comp=[1, 1])
# Loads the valid mask.
intrinsic("load_savmsk_vm_pco", src_comp=[], dest_comp=1, indices=[], flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32])
index("bool", "freep")
intrinsic("emitpix_pco", src_comp=[1, 1], indices=[FREEP], bit_sizes=[32])
intrinsic("wop_pco")
+23
View File
@@ -1454,6 +1454,11 @@ encode_map(O_SAVMSK,
op_ref_maps=[('backend', [['w0', '_'], ['w1', '_']], [])]
)
encode_map(O_EMITPIX,
encodings=[(I_EMITPIX, [('freep', OM_FREEP)])],
op_ref_maps=[('backend', [], ['s0', 's2'])]
)
encode_map(O_BBYP0BM,
encodings=[
(I_PHASE0_SRC, [
@@ -2937,6 +2942,24 @@ group_map(O_SAVMSK,
]
)
group_map(O_EMITPIX,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'be'),
('olchk', OM_OLCHK),
('w1p', False),
('w0p', False),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', OM_ATOM),
('rpt', 1)
]),
enc_ops=[('backend', O_EMITPIX)],
srcs=[
('s[0]', ('backend', SRC(0)), 's0'),
('s[2]', ('backend', SRC(1)), 's2')
]
)
group_map(O_MOVI32,
hdr=(I_IGRP_HDR_BITWISE, [
('opcnt', 'p0'),
+2
View File
@@ -395,6 +395,8 @@ O_DEPTHF = hw_op('depthf', OM_ALU_RPT1, 0, 2)
O_SAVMSK = hw_op('savmsk', OM_ALU_RPT1 + [OM_SAVMSK_MODE], 2)
O_EMITPIX = hw_op('emitpix', OM_ALU_RPT1 + [OM_FREEP], 0, 2)
## Bitwise.
O_MOVI32 = hw_op('movi32', OM_ALU, 1, 1)
+11
View File
@@ -1406,6 +1406,17 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
instr = trans_reg_intr(tctx, intr, dest, src[0], src[1]);
break;
case nir_intrinsic_emitpix_pco:
instr = pco_emitpix(&tctx->b,
src[0],
src[1],
.freep = nir_intrinsic_freep(intr));
break;
case nir_intrinsic_wop_pco:
instr = pco_wop(&tctx->b);
break;
default:
printf("Unsupported intrinsic: \"");
nir_print_instr(&intr->instr, stdout);
+13 -17
View File
@@ -48,6 +48,7 @@
#include "pvr_private.h"
#include "pvr_tex_state.h"
#include "pvr_types.h"
#include "pvr_usc.h"
#include "usc/pvr_uscgen.h"
#include "pvr_winsys.h"
#include "util/bitscan.h"
@@ -500,26 +501,26 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
const VkAllocationCallbacks *const allocator = &cmd_buffer->vk.pool->alloc;
struct pvr_device *const device = cmd_buffer->device;
struct pvr_suballoc_bo *usc_eot_program = NULL;
struct util_dynarray eot_program_bin;
struct pvr_eot_props props = {
.emit_count = emit_count,
.shared_words = false,
.state_words = pbe_cs_words,
};
uint32_t *staging_buffer;
uint32_t usc_temp_count;
pco_shader *eot;
VkResult result;
assert(emit_count > 0);
pvr_uscgen_eot("per-job EOT",
emit_count,
pbe_cs_words,
&usc_temp_count,
&eot_program_bin);
eot = pvr_usc_eot(cmd_buffer->device->pdevice->pco_ctx, &props);
usc_temp_count = pco_shader_data(eot)->common.temps;
result = pvr_cmd_buffer_upload_usc(cmd_buffer,
eot_program_bin.data,
eot_program_bin.size,
pco_shader_binary_data(eot),
pco_shader_binary_size(eot),
4,
&usc_eot_program);
util_dynarray_fini(&eot_program_bin);
ralloc_free(eot);
if (result != VK_SUCCESS)
return result;
@@ -556,15 +557,10 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
cmd_buffer->device->pixel_event_data_size_in_dwords,
4,
pds_upload_out);
if (result != VK_SUCCESS)
goto err_free_pixel_event_staging_buffer;
vk_free(allocator, staging_buffer);
return VK_SUCCESS;
err_free_pixel_event_staging_buffer:
vk_free(allocator, staging_buffer);
return result;
err_free_usc_pixel_program:
list_del(&usc_eot_program->link);
+16 -10
View File
@@ -38,6 +38,7 @@
#include "pvr_private.h"
#include "pvr_transfer_frag_store.h"
#include "pvr_types.h"
#include "pvr_usc.h"
#include "usc/pvr_uscgen.h"
#include "usc/programs/pvr_vdm_load_sr.h"
#include "usc/programs/pvr_vdm_store_sr.h"
@@ -1206,7 +1207,7 @@ static void pvr_transfer_ctx_ws_create_info_init(
static VkResult pvr_transfer_eot_shaders_init(struct pvr_device *device,
struct pvr_transfer_ctx *ctx)
{
uint64_t rt_pbe_regs[PVR_TRANSFER_MAX_RENDER_TARGETS];
unsigned rt_pbe_regs[PVR_TRANSFER_MAX_RENDER_TARGETS];
/* Setup start indexes of the shared registers that will contain the PBE
* state words for each render target. These must match the indexes used in
@@ -1220,26 +1221,31 @@ static VkResult pvr_transfer_eot_shaders_init(struct pvr_device *device,
* indexes and number of shared registers hard coded in
* pvr_pds_generate_pixel_event().
*/
for (uint32_t i = 0; i < ARRAY_SIZE(rt_pbe_regs); i++)
for (unsigned i = 0; i < ARRAY_SIZE(rt_pbe_regs); i++)
rt_pbe_regs[i] = i * PVR_STATE_PBE_DWORDS;
STATIC_ASSERT(ARRAY_SIZE(rt_pbe_regs) == ARRAY_SIZE(ctx->usc_eot_bos));
for (uint32_t i = 0; i < ARRAY_SIZE(ctx->usc_eot_bos); i++) {
const uint32_t cache_line_size =
rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
const unsigned rt_count = i + 1;
struct util_dynarray eot_bin;
struct pvr_eot_props props = {
.emit_count = i + 1,
.shared_words = true,
.state_regs = rt_pbe_regs,
};
pco_shader *eot;
VkResult result;
pvr_uscgen_tq_eot(rt_count, rt_pbe_regs, &eot_bin);
eot = pvr_usc_eot(device->pdevice->pco_ctx, &props);
result = pvr_gpu_upload_usc(device,
util_dynarray_begin(&eot_bin),
eot_bin.size,
pco_shader_binary_data(eot),
pco_shader_binary_size(eot),
cache_line_size,
&ctx->usc_eot_bos[i]);
util_dynarray_fini(&eot_bin);
ctx->usc_eot_usc_temps[i] = pco_shader_data(eot)->common.temps;
ralloc_free(eot);
if (result != VK_SUCCESS) {
for (uint32_t j = 0; j < i; j++)
pvr_bo_suballoc_free(ctx->usc_eot_bos[j]);
+1
View File
@@ -149,6 +149,7 @@ struct pvr_transfer_ctx {
struct pvr_transfer_frag_store frag_store;
struct pvr_suballoc_bo *usc_eot_bos[PVR_TRANSFER_MAX_RENDER_TARGETS];
unsigned usc_eot_usc_temps[PVR_TRANSFER_MAX_RENDER_TARGETS];
struct pvr_pds_upload pds_unitex_code[PVR_TRANSFER_MAX_TEXSTATE_DMA]
[PVR_TRANSFER_MAX_UNIFORM_DMA];
+1 -1
View File
@@ -1093,7 +1093,7 @@ static VkResult pvr_pbe_setup_emit(const struct pvr_transfer_cmd *transfer_cmd,
pvr_pds_setup_doutu(&program.task_control,
addr.addr,
0U,
ctx->usc_eot_usc_temps[rt_count - 1U],
ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
false);
+14 -9
View File
@@ -42,6 +42,7 @@
#include "usc/programs/pvr_static_shaders.h"
#include "pvr_tex_state.h"
#include "pvr_types.h"
#include "pvr_usc.h"
#include "usc/pvr_uscgen.h"
#include "util/bitscan.h"
#include "util/macros.h"
@@ -626,8 +627,9 @@ pvr_spm_init_eot_state(struct pvr_device *device,
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
uint32_t total_render_target_used = 0;
struct pvr_pds_upload pds_eot_program;
struct util_dynarray usc_shader_binary;
struct pvr_eot_props props;
uint32_t usc_temp_count;
pco_shader *eot;
VkResult result;
pvr_dev_addr_t next_scratch_buffer_addr =
@@ -726,20 +728,23 @@ pvr_spm_init_eot_state(struct pvr_device *device,
}
}
pvr_uscgen_eot("SPM EOT",
total_render_target_used,
pbe_state_words[0],
&usc_temp_count,
&usc_shader_binary);
props = (struct pvr_eot_props){
.emit_count = total_render_target_used,
.shared_words = false,
.state_words = pbe_state_words[0],
};
eot = pvr_usc_eot(device->pdevice->pco_ctx, &props);
usc_temp_count = pco_shader_data(eot)->common.temps;
/* TODO: Create a #define in the compiler code to replace the 16. */
result = pvr_gpu_upload_usc(device,
usc_shader_binary.data,
usc_shader_binary.size,
pco_shader_binary_data(eot),
pco_shader_binary_size(eot),
16,
&spm_eot_state->usc_eot_program);
util_dynarray_fini(&usc_shader_binary);
ralloc_free(eot);
if (result != VK_SUCCESS)
return result;
+36 -1
View File
@@ -77,7 +77,42 @@ pco_shader *pvr_usc_nop(pco_ctx *ctx, mesa_shader_stage stage)
*/
pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props)
{
UNREACHABLE("finishme: pvr_usc_eot");
nir_builder b =
nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
pco_nir_options(),
"eot%u.%s",
props->emit_count,
props->shared_words ? "sh" : "imm");
/* TODO: tile buffer support. */
nir_intrinsic_instr *last_emit = NULL;
for (unsigned u = 0; u < props->emit_count; ++u) {
if (u > 0)
nir_wop_pco(&b);
nir_def *state0;
nir_def *state1;
if (props->shared_words) {
state0 = nir_load_preamble(&b, 1, 32, .base = props->state_regs[u]);
state1 =
nir_load_preamble(&b, 1, 32, .base = props->state_regs[u] + 1);
} else {
unsigned state_off = u * ROGUE_NUM_PBESTATE_STATE_WORDS;
state0 = nir_imm_int(&b, props->state_words[state_off]);
state1 = nir_imm_int(&b, props->state_words[state_off + 1]);
}
last_emit = nir_emitpix_pco(&b, state0, state1);
}
assert(last_emit);
nir_intrinsic_set_freep(last_emit, true);
/* Just return. */
nir_jump(&b, nir_jump_return);
return build_shader(ctx, b.shader, &(pco_data){ 0 });
}
/**
+7
View File
@@ -23,6 +23,13 @@ pco_shader *pvr_usc_nop(pco_ctx *ctx, mesa_shader_stage stage);
/* EOT shader generation. */
struct pvr_eot_props {
unsigned emit_count;
bool shared_words;
union {
const uint32_t *state_words;
const unsigned *state_regs;
};
};
pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props);
+2
View File
@@ -29,6 +29,7 @@
#include <stdbool.h>
#if 0
/* Expects emit_count ROGUE_NUM_PBESTATE_STATE_WORDS entries */
void pvr_uscgen_eot(const char *name,
uint32_t emit_count,
@@ -72,6 +73,7 @@ void pvr_uscgen_eot(const char *name,
ralloc_free(shader);
}
#endif
void pvr_uscgen_nop(struct util_dynarray *binary)
{
+4
View File
@@ -137,11 +137,13 @@ struct pvr_tq_frag_sh_reg_layout {
/* TODO: Shader caching (not pipeline caching) support. */
#if 0
void pvr_uscgen_eot(const char *name,
uint32_t emit_count,
const uint32_t *emit_state,
unsigned *temps_used,
struct util_dynarray *binary);
#endif
void pvr_uscgen_nop(struct util_dynarray *binary);
@@ -150,8 +152,10 @@ void pvr_uscgen_tq_frag(const struct pvr_tq_shader_properties *shader_props,
unsigned *temps_used,
struct util_dynarray *binary);
#if 0
void pvr_uscgen_tq_eot(unsigned rt_count,
const uint64_t *pbe_regs,
struct util_dynarray *binary);
#endif
#endif /* PVR_USCGEN_H */
@@ -207,6 +207,7 @@ void pvr_uscgen_tq_frag(const struct pvr_tq_shader_properties *shader_props,
ralloc_free(shader);
}
#if 0
void pvr_uscgen_tq_eot(unsigned rt_count,
const uint64_t *pbe_regs,
struct util_dynarray *binary)
@@ -240,3 +241,4 @@ void pvr_uscgen_tq_eot(unsigned rt_count,
ralloc_free(shader);
}
#endif