From 0ff8f573926e1a2d751206d484f8dfffff2109e6 Mon Sep 17 00:00:00 2001 From: Simon Perretta Date: Thu, 6 Feb 2025 13:35:26 +0000 Subject: [PATCH] pvr, pco: simple end-of-tile/render nir shader gen Signed-off-by: Simon Perretta Acked-by: Erik Faye-Lund Part-of: --- src/compiler/nir/nir_intrinsics.py | 4 +++ src/imagination/pco/pco_map.py | 23 ++++++++++++++ src/imagination/pco/pco_ops.py | 2 ++ src/imagination/pco/pco_trans_nir.c | 11 +++++++ src/imagination/vulkan/pvr_cmd_buffer.c | 30 ++++++++---------- src/imagination/vulkan/pvr_job_context.c | 26 +++++++++------ src/imagination/vulkan/pvr_job_context.h | 1 + src/imagination/vulkan/pvr_job_transfer.c | 2 +- src/imagination/vulkan/pvr_spm.c | 23 ++++++++------ src/imagination/vulkan/pvr_usc.c | 37 +++++++++++++++++++++- src/imagination/vulkan/pvr_usc.h | 7 ++++ src/imagination/vulkan/usc/pvr_uscgen.c | 2 ++ src/imagination/vulkan/usc/pvr_uscgen.h | 4 +++ src/imagination/vulkan/usc/pvr_uscgen_tq.c | 2 ++ 14 files changed, 136 insertions(+), 38 deletions(-) diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 555def78efe..c9167b85699 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2731,3 +2731,7 @@ intrinsic("isp_feedback_pco", src_comp=[1, 1]) # Loads the valid mask. intrinsic("load_savmsk_vm_pco", src_comp=[], dest_comp=1, indices=[], flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32]) + +index("bool", "freep") +intrinsic("emitpix_pco", src_comp=[1, 1], indices=[FREEP], bit_sizes=[32]) +intrinsic("wop_pco") diff --git a/src/imagination/pco/pco_map.py b/src/imagination/pco/pco_map.py index 19af1b6d7e7..d67b8b9172b 100644 --- a/src/imagination/pco/pco_map.py +++ b/src/imagination/pco/pco_map.py @@ -1454,6 +1454,11 @@ encode_map(O_SAVMSK, op_ref_maps=[('backend', [['w0', '_'], ['w1', '_']], [])] ) +encode_map(O_EMITPIX, + encodings=[(I_EMITPIX, [('freep', OM_FREEP)])], + op_ref_maps=[('backend', [], ['s0', 's2'])] +) + encode_map(O_BBYP0BM, encodings=[ (I_PHASE0_SRC, [ @@ -2937,6 +2942,24 @@ group_map(O_SAVMSK, ] ) +group_map(O_EMITPIX, + hdr=(I_IGRP_HDR_MAIN, [ + ('oporg', 'be'), + ('olchk', OM_OLCHK), + ('w1p', False), + ('w0p', False), + ('cc', OM_EXEC_CND), + ('end', OM_END), + ('atom', OM_ATOM), + ('rpt', 1) + ]), + enc_ops=[('backend', O_EMITPIX)], + srcs=[ + ('s[0]', ('backend', SRC(0)), 's0'), + ('s[2]', ('backend', SRC(1)), 's2') + ] +) + group_map(O_MOVI32, hdr=(I_IGRP_HDR_BITWISE, [ ('opcnt', 'p0'), diff --git a/src/imagination/pco/pco_ops.py b/src/imagination/pco/pco_ops.py index 8cbbfa6c6f4..aeb6e200b18 100644 --- a/src/imagination/pco/pco_ops.py +++ b/src/imagination/pco/pco_ops.py @@ -395,6 +395,8 @@ O_DEPTHF = hw_op('depthf', OM_ALU_RPT1, 0, 2) O_SAVMSK = hw_op('savmsk', OM_ALU_RPT1 + [OM_SAVMSK_MODE], 2) +O_EMITPIX = hw_op('emitpix', OM_ALU_RPT1 + [OM_FREEP], 0, 2) + ## Bitwise. O_MOVI32 = hw_op('movi32', OM_ALU, 1, 1) diff --git a/src/imagination/pco/pco_trans_nir.c b/src/imagination/pco/pco_trans_nir.c index 1d37167ab70..e993f2a131a 100644 --- a/src/imagination/pco/pco_trans_nir.c +++ b/src/imagination/pco/pco_trans_nir.c @@ -1406,6 +1406,17 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr) instr = trans_reg_intr(tctx, intr, dest, src[0], src[1]); break; + case nir_intrinsic_emitpix_pco: + instr = pco_emitpix(&tctx->b, + src[0], + src[1], + .freep = nir_intrinsic_freep(intr)); + break; + + case nir_intrinsic_wop_pco: + instr = pco_wop(&tctx->b); + break; + default: printf("Unsupported intrinsic: \""); nir_print_instr(&intr->instr, stdout); diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index 218b6403c81..3d043fa43cb 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -48,6 +48,7 @@ #include "pvr_private.h" #include "pvr_tex_state.h" #include "pvr_types.h" +#include "pvr_usc.h" #include "usc/pvr_uscgen.h" #include "pvr_winsys.h" #include "util/bitscan.h" @@ -500,26 +501,26 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload( const VkAllocationCallbacks *const allocator = &cmd_buffer->vk.pool->alloc; struct pvr_device *const device = cmd_buffer->device; struct pvr_suballoc_bo *usc_eot_program = NULL; - struct util_dynarray eot_program_bin; + struct pvr_eot_props props = { + .emit_count = emit_count, + .shared_words = false, + .state_words = pbe_cs_words, + }; uint32_t *staging_buffer; uint32_t usc_temp_count; + pco_shader *eot; VkResult result; - assert(emit_count > 0); - - pvr_uscgen_eot("per-job EOT", - emit_count, - pbe_cs_words, - &usc_temp_count, - &eot_program_bin); + eot = pvr_usc_eot(cmd_buffer->device->pdevice->pco_ctx, &props); + usc_temp_count = pco_shader_data(eot)->common.temps; result = pvr_cmd_buffer_upload_usc(cmd_buffer, - eot_program_bin.data, - eot_program_bin.size, + pco_shader_binary_data(eot), + pco_shader_binary_size(eot), 4, &usc_eot_program); - util_dynarray_fini(&eot_program_bin); + ralloc_free(eot); if (result != VK_SUCCESS) return result; @@ -556,15 +557,10 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload( cmd_buffer->device->pixel_event_data_size_in_dwords, 4, pds_upload_out); - if (result != VK_SUCCESS) - goto err_free_pixel_event_staging_buffer; vk_free(allocator, staging_buffer); - return VK_SUCCESS; - -err_free_pixel_event_staging_buffer: - vk_free(allocator, staging_buffer); + return result; err_free_usc_pixel_program: list_del(&usc_eot_program->link); diff --git a/src/imagination/vulkan/pvr_job_context.c b/src/imagination/vulkan/pvr_job_context.c index 42a46a9b91e..f213555159a 100644 --- a/src/imagination/vulkan/pvr_job_context.c +++ b/src/imagination/vulkan/pvr_job_context.c @@ -38,6 +38,7 @@ #include "pvr_private.h" #include "pvr_transfer_frag_store.h" #include "pvr_types.h" +#include "pvr_usc.h" #include "usc/pvr_uscgen.h" #include "usc/programs/pvr_vdm_load_sr.h" #include "usc/programs/pvr_vdm_store_sr.h" @@ -1206,7 +1207,7 @@ static void pvr_transfer_ctx_ws_create_info_init( static VkResult pvr_transfer_eot_shaders_init(struct pvr_device *device, struct pvr_transfer_ctx *ctx) { - uint64_t rt_pbe_regs[PVR_TRANSFER_MAX_RENDER_TARGETS]; + unsigned rt_pbe_regs[PVR_TRANSFER_MAX_RENDER_TARGETS]; /* Setup start indexes of the shared registers that will contain the PBE * state words for each render target. These must match the indexes used in @@ -1220,26 +1221,31 @@ static VkResult pvr_transfer_eot_shaders_init(struct pvr_device *device, * indexes and number of shared registers hard coded in * pvr_pds_generate_pixel_event(). */ - for (uint32_t i = 0; i < ARRAY_SIZE(rt_pbe_regs); i++) + for (unsigned i = 0; i < ARRAY_SIZE(rt_pbe_regs); i++) rt_pbe_regs[i] = i * PVR_STATE_PBE_DWORDS; - STATIC_ASSERT(ARRAY_SIZE(rt_pbe_regs) == ARRAY_SIZE(ctx->usc_eot_bos)); - for (uint32_t i = 0; i < ARRAY_SIZE(ctx->usc_eot_bos); i++) { const uint32_t cache_line_size = rogue_get_slc_cache_line_size(&device->pdevice->dev_info); - const unsigned rt_count = i + 1; - struct util_dynarray eot_bin; + struct pvr_eot_props props = { + .emit_count = i + 1, + .shared_words = true, + .state_regs = rt_pbe_regs, + }; + pco_shader *eot; VkResult result; - pvr_uscgen_tq_eot(rt_count, rt_pbe_regs, &eot_bin); + eot = pvr_usc_eot(device->pdevice->pco_ctx, &props); result = pvr_gpu_upload_usc(device, - util_dynarray_begin(&eot_bin), - eot_bin.size, + pco_shader_binary_data(eot), + pco_shader_binary_size(eot), cache_line_size, &ctx->usc_eot_bos[i]); - util_dynarray_fini(&eot_bin); + + ctx->usc_eot_usc_temps[i] = pco_shader_data(eot)->common.temps; + ralloc_free(eot); + if (result != VK_SUCCESS) { for (uint32_t j = 0; j < i; j++) pvr_bo_suballoc_free(ctx->usc_eot_bos[j]); diff --git a/src/imagination/vulkan/pvr_job_context.h b/src/imagination/vulkan/pvr_job_context.h index 740b30a17c9..317ae295f6f 100644 --- a/src/imagination/vulkan/pvr_job_context.h +++ b/src/imagination/vulkan/pvr_job_context.h @@ -149,6 +149,7 @@ struct pvr_transfer_ctx { struct pvr_transfer_frag_store frag_store; struct pvr_suballoc_bo *usc_eot_bos[PVR_TRANSFER_MAX_RENDER_TARGETS]; + unsigned usc_eot_usc_temps[PVR_TRANSFER_MAX_RENDER_TARGETS]; struct pvr_pds_upload pds_unitex_code[PVR_TRANSFER_MAX_TEXSTATE_DMA] [PVR_TRANSFER_MAX_UNIFORM_DMA]; diff --git a/src/imagination/vulkan/pvr_job_transfer.c b/src/imagination/vulkan/pvr_job_transfer.c index 83c80dbbe65..7966d9ba300 100644 --- a/src/imagination/vulkan/pvr_job_transfer.c +++ b/src/imagination/vulkan/pvr_job_transfer.c @@ -1093,7 +1093,7 @@ static VkResult pvr_pbe_setup_emit(const struct pvr_transfer_cmd *transfer_cmd, pvr_pds_setup_doutu(&program.task_control, addr.addr, - 0U, + ctx->usc_eot_usc_temps[rt_count - 1U], ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE, false); diff --git a/src/imagination/vulkan/pvr_spm.c b/src/imagination/vulkan/pvr_spm.c index 3aa36a63031..d448324625a 100644 --- a/src/imagination/vulkan/pvr_spm.c +++ b/src/imagination/vulkan/pvr_spm.c @@ -42,6 +42,7 @@ #include "usc/programs/pvr_static_shaders.h" #include "pvr_tex_state.h" #include "pvr_types.h" +#include "pvr_usc.h" #include "usc/pvr_uscgen.h" #include "util/bitscan.h" #include "util/macros.h" @@ -626,8 +627,9 @@ pvr_spm_init_eot_state(struct pvr_device *device, const struct pvr_device_info *dev_info = &device->pdevice->dev_info; uint32_t total_render_target_used = 0; struct pvr_pds_upload pds_eot_program; - struct util_dynarray usc_shader_binary; + struct pvr_eot_props props; uint32_t usc_temp_count; + pco_shader *eot; VkResult result; pvr_dev_addr_t next_scratch_buffer_addr = @@ -726,20 +728,23 @@ pvr_spm_init_eot_state(struct pvr_device *device, } } - pvr_uscgen_eot("SPM EOT", - total_render_target_used, - pbe_state_words[0], - &usc_temp_count, - &usc_shader_binary); + props = (struct pvr_eot_props){ + .emit_count = total_render_target_used, + .shared_words = false, + .state_words = pbe_state_words[0], + }; + + eot = pvr_usc_eot(device->pdevice->pco_ctx, &props); + usc_temp_count = pco_shader_data(eot)->common.temps; /* TODO: Create a #define in the compiler code to replace the 16. */ result = pvr_gpu_upload_usc(device, - usc_shader_binary.data, - usc_shader_binary.size, + pco_shader_binary_data(eot), + pco_shader_binary_size(eot), 16, &spm_eot_state->usc_eot_program); - util_dynarray_fini(&usc_shader_binary); + ralloc_free(eot); if (result != VK_SUCCESS) return result; diff --git a/src/imagination/vulkan/pvr_usc.c b/src/imagination/vulkan/pvr_usc.c index a6680a05447..afe7f83220e 100644 --- a/src/imagination/vulkan/pvr_usc.c +++ b/src/imagination/vulkan/pvr_usc.c @@ -77,7 +77,42 @@ pco_shader *pvr_usc_nop(pco_ctx *ctx, mesa_shader_stage stage) */ pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props) { - UNREACHABLE("finishme: pvr_usc_eot"); + nir_builder b = + nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, + pco_nir_options(), + "eot%u.%s", + props->emit_count, + props->shared_words ? "sh" : "imm"); + + /* TODO: tile buffer support. */ + + nir_intrinsic_instr *last_emit = NULL; + for (unsigned u = 0; u < props->emit_count; ++u) { + if (u > 0) + nir_wop_pco(&b); + + nir_def *state0; + nir_def *state1; + if (props->shared_words) { + state0 = nir_load_preamble(&b, 1, 32, .base = props->state_regs[u]); + state1 = + nir_load_preamble(&b, 1, 32, .base = props->state_regs[u] + 1); + } else { + unsigned state_off = u * ROGUE_NUM_PBESTATE_STATE_WORDS; + state0 = nir_imm_int(&b, props->state_words[state_off]); + state1 = nir_imm_int(&b, props->state_words[state_off + 1]); + } + + last_emit = nir_emitpix_pco(&b, state0, state1); + } + + assert(last_emit); + nir_intrinsic_set_freep(last_emit, true); + + /* Just return. */ + nir_jump(&b, nir_jump_return); + + return build_shader(ctx, b.shader, &(pco_data){ 0 }); } /** diff --git a/src/imagination/vulkan/pvr_usc.h b/src/imagination/vulkan/pvr_usc.h index 6629c7ccf5a..c1ca07ec336 100644 --- a/src/imagination/vulkan/pvr_usc.h +++ b/src/imagination/vulkan/pvr_usc.h @@ -23,6 +23,13 @@ pco_shader *pvr_usc_nop(pco_ctx *ctx, mesa_shader_stage stage); /* EOT shader generation. */ struct pvr_eot_props { + unsigned emit_count; + + bool shared_words; + union { + const uint32_t *state_words; + const unsigned *state_regs; + }; }; pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props); diff --git a/src/imagination/vulkan/usc/pvr_uscgen.c b/src/imagination/vulkan/usc/pvr_uscgen.c index ee5dd453d7d..2372c7d92f5 100644 --- a/src/imagination/vulkan/usc/pvr_uscgen.c +++ b/src/imagination/vulkan/usc/pvr_uscgen.c @@ -29,6 +29,7 @@ #include +#if 0 /* Expects emit_count ROGUE_NUM_PBESTATE_STATE_WORDS entries */ void pvr_uscgen_eot(const char *name, uint32_t emit_count, @@ -72,6 +73,7 @@ void pvr_uscgen_eot(const char *name, ralloc_free(shader); } +#endif void pvr_uscgen_nop(struct util_dynarray *binary) { diff --git a/src/imagination/vulkan/usc/pvr_uscgen.h b/src/imagination/vulkan/usc/pvr_uscgen.h index ebb8af08f07..f7ce6066cdd 100644 --- a/src/imagination/vulkan/usc/pvr_uscgen.h +++ b/src/imagination/vulkan/usc/pvr_uscgen.h @@ -137,11 +137,13 @@ struct pvr_tq_frag_sh_reg_layout { /* TODO: Shader caching (not pipeline caching) support. */ +#if 0 void pvr_uscgen_eot(const char *name, uint32_t emit_count, const uint32_t *emit_state, unsigned *temps_used, struct util_dynarray *binary); +#endif void pvr_uscgen_nop(struct util_dynarray *binary); @@ -150,8 +152,10 @@ void pvr_uscgen_tq_frag(const struct pvr_tq_shader_properties *shader_props, unsigned *temps_used, struct util_dynarray *binary); +#if 0 void pvr_uscgen_tq_eot(unsigned rt_count, const uint64_t *pbe_regs, struct util_dynarray *binary); +#endif #endif /* PVR_USCGEN_H */ diff --git a/src/imagination/vulkan/usc/pvr_uscgen_tq.c b/src/imagination/vulkan/usc/pvr_uscgen_tq.c index 971010da6aa..6db53da00f8 100644 --- a/src/imagination/vulkan/usc/pvr_uscgen_tq.c +++ b/src/imagination/vulkan/usc/pvr_uscgen_tq.c @@ -207,6 +207,7 @@ void pvr_uscgen_tq_frag(const struct pvr_tq_shader_properties *shader_props, ralloc_free(shader); } +#if 0 void pvr_uscgen_tq_eot(unsigned rt_count, const uint64_t *pbe_regs, struct util_dynarray *binary) @@ -240,3 +241,4 @@ void pvr_uscgen_tq_eot(unsigned rt_count, ralloc_free(shader); } +#endif