pvr, pco: simple end-of-tile/render nir shader gen
Signed-off-by: Simon Perretta <simon.perretta@imgtec.com> Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
committed by
Marge Bot
parent
2ed5aa49a5
commit
0ff8f57392
@@ -2731,3 +2731,7 @@ intrinsic("isp_feedback_pco", src_comp=[1, 1])
|
||||
|
||||
# Loads the valid mask.
|
||||
intrinsic("load_savmsk_vm_pco", src_comp=[], dest_comp=1, indices=[], flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32])
|
||||
|
||||
index("bool", "freep")
|
||||
intrinsic("emitpix_pco", src_comp=[1, 1], indices=[FREEP], bit_sizes=[32])
|
||||
intrinsic("wop_pco")
|
||||
|
||||
@@ -1454,6 +1454,11 @@ encode_map(O_SAVMSK,
|
||||
op_ref_maps=[('backend', [['w0', '_'], ['w1', '_']], [])]
|
||||
)
|
||||
|
||||
encode_map(O_EMITPIX,
|
||||
encodings=[(I_EMITPIX, [('freep', OM_FREEP)])],
|
||||
op_ref_maps=[('backend', [], ['s0', 's2'])]
|
||||
)
|
||||
|
||||
encode_map(O_BBYP0BM,
|
||||
encodings=[
|
||||
(I_PHASE0_SRC, [
|
||||
@@ -2937,6 +2942,24 @@ group_map(O_SAVMSK,
|
||||
]
|
||||
)
|
||||
|
||||
group_map(O_EMITPIX,
|
||||
hdr=(I_IGRP_HDR_MAIN, [
|
||||
('oporg', 'be'),
|
||||
('olchk', OM_OLCHK),
|
||||
('w1p', False),
|
||||
('w0p', False),
|
||||
('cc', OM_EXEC_CND),
|
||||
('end', OM_END),
|
||||
('atom', OM_ATOM),
|
||||
('rpt', 1)
|
||||
]),
|
||||
enc_ops=[('backend', O_EMITPIX)],
|
||||
srcs=[
|
||||
('s[0]', ('backend', SRC(0)), 's0'),
|
||||
('s[2]', ('backend', SRC(1)), 's2')
|
||||
]
|
||||
)
|
||||
|
||||
group_map(O_MOVI32,
|
||||
hdr=(I_IGRP_HDR_BITWISE, [
|
||||
('opcnt', 'p0'),
|
||||
|
||||
@@ -395,6 +395,8 @@ O_DEPTHF = hw_op('depthf', OM_ALU_RPT1, 0, 2)
|
||||
|
||||
O_SAVMSK = hw_op('savmsk', OM_ALU_RPT1 + [OM_SAVMSK_MODE], 2)
|
||||
|
||||
O_EMITPIX = hw_op('emitpix', OM_ALU_RPT1 + [OM_FREEP], 0, 2)
|
||||
|
||||
## Bitwise.
|
||||
O_MOVI32 = hw_op('movi32', OM_ALU, 1, 1)
|
||||
|
||||
|
||||
@@ -1406,6 +1406,17 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
|
||||
instr = trans_reg_intr(tctx, intr, dest, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_emitpix_pco:
|
||||
instr = pco_emitpix(&tctx->b,
|
||||
src[0],
|
||||
src[1],
|
||||
.freep = nir_intrinsic_freep(intr));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_wop_pco:
|
||||
instr = pco_wop(&tctx->b);
|
||||
break;
|
||||
|
||||
default:
|
||||
printf("Unsupported intrinsic: \"");
|
||||
nir_print_instr(&intr->instr, stdout);
|
||||
|
||||
@@ -48,6 +48,7 @@
|
||||
#include "pvr_private.h"
|
||||
#include "pvr_tex_state.h"
|
||||
#include "pvr_types.h"
|
||||
#include "pvr_usc.h"
|
||||
#include "usc/pvr_uscgen.h"
|
||||
#include "pvr_winsys.h"
|
||||
#include "util/bitscan.h"
|
||||
@@ -500,26 +501,26 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
|
||||
const VkAllocationCallbacks *const allocator = &cmd_buffer->vk.pool->alloc;
|
||||
struct pvr_device *const device = cmd_buffer->device;
|
||||
struct pvr_suballoc_bo *usc_eot_program = NULL;
|
||||
struct util_dynarray eot_program_bin;
|
||||
struct pvr_eot_props props = {
|
||||
.emit_count = emit_count,
|
||||
.shared_words = false,
|
||||
.state_words = pbe_cs_words,
|
||||
};
|
||||
uint32_t *staging_buffer;
|
||||
uint32_t usc_temp_count;
|
||||
pco_shader *eot;
|
||||
VkResult result;
|
||||
|
||||
assert(emit_count > 0);
|
||||
|
||||
pvr_uscgen_eot("per-job EOT",
|
||||
emit_count,
|
||||
pbe_cs_words,
|
||||
&usc_temp_count,
|
||||
&eot_program_bin);
|
||||
eot = pvr_usc_eot(cmd_buffer->device->pdevice->pco_ctx, &props);
|
||||
usc_temp_count = pco_shader_data(eot)->common.temps;
|
||||
|
||||
result = pvr_cmd_buffer_upload_usc(cmd_buffer,
|
||||
eot_program_bin.data,
|
||||
eot_program_bin.size,
|
||||
pco_shader_binary_data(eot),
|
||||
pco_shader_binary_size(eot),
|
||||
4,
|
||||
&usc_eot_program);
|
||||
|
||||
util_dynarray_fini(&eot_program_bin);
|
||||
ralloc_free(eot);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
@@ -556,15 +557,10 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
|
||||
cmd_buffer->device->pixel_event_data_size_in_dwords,
|
||||
4,
|
||||
pds_upload_out);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_pixel_event_staging_buffer;
|
||||
|
||||
vk_free(allocator, staging_buffer);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
err_free_pixel_event_staging_buffer:
|
||||
vk_free(allocator, staging_buffer);
|
||||
return result;
|
||||
|
||||
err_free_usc_pixel_program:
|
||||
list_del(&usc_eot_program->link);
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
#include "pvr_private.h"
|
||||
#include "pvr_transfer_frag_store.h"
|
||||
#include "pvr_types.h"
|
||||
#include "pvr_usc.h"
|
||||
#include "usc/pvr_uscgen.h"
|
||||
#include "usc/programs/pvr_vdm_load_sr.h"
|
||||
#include "usc/programs/pvr_vdm_store_sr.h"
|
||||
@@ -1206,7 +1207,7 @@ static void pvr_transfer_ctx_ws_create_info_init(
|
||||
static VkResult pvr_transfer_eot_shaders_init(struct pvr_device *device,
|
||||
struct pvr_transfer_ctx *ctx)
|
||||
{
|
||||
uint64_t rt_pbe_regs[PVR_TRANSFER_MAX_RENDER_TARGETS];
|
||||
unsigned rt_pbe_regs[PVR_TRANSFER_MAX_RENDER_TARGETS];
|
||||
|
||||
/* Setup start indexes of the shared registers that will contain the PBE
|
||||
* state words for each render target. These must match the indexes used in
|
||||
@@ -1220,26 +1221,31 @@ static VkResult pvr_transfer_eot_shaders_init(struct pvr_device *device,
|
||||
* indexes and number of shared registers hard coded in
|
||||
* pvr_pds_generate_pixel_event().
|
||||
*/
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(rt_pbe_regs); i++)
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(rt_pbe_regs); i++)
|
||||
rt_pbe_regs[i] = i * PVR_STATE_PBE_DWORDS;
|
||||
|
||||
STATIC_ASSERT(ARRAY_SIZE(rt_pbe_regs) == ARRAY_SIZE(ctx->usc_eot_bos));
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(ctx->usc_eot_bos); i++) {
|
||||
const uint32_t cache_line_size =
|
||||
rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
|
||||
const unsigned rt_count = i + 1;
|
||||
struct util_dynarray eot_bin;
|
||||
struct pvr_eot_props props = {
|
||||
.emit_count = i + 1,
|
||||
.shared_words = true,
|
||||
.state_regs = rt_pbe_regs,
|
||||
};
|
||||
pco_shader *eot;
|
||||
VkResult result;
|
||||
|
||||
pvr_uscgen_tq_eot(rt_count, rt_pbe_regs, &eot_bin);
|
||||
eot = pvr_usc_eot(device->pdevice->pco_ctx, &props);
|
||||
|
||||
result = pvr_gpu_upload_usc(device,
|
||||
util_dynarray_begin(&eot_bin),
|
||||
eot_bin.size,
|
||||
pco_shader_binary_data(eot),
|
||||
pco_shader_binary_size(eot),
|
||||
cache_line_size,
|
||||
&ctx->usc_eot_bos[i]);
|
||||
util_dynarray_fini(&eot_bin);
|
||||
|
||||
ctx->usc_eot_usc_temps[i] = pco_shader_data(eot)->common.temps;
|
||||
ralloc_free(eot);
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
for (uint32_t j = 0; j < i; j++)
|
||||
pvr_bo_suballoc_free(ctx->usc_eot_bos[j]);
|
||||
|
||||
@@ -149,6 +149,7 @@ struct pvr_transfer_ctx {
|
||||
struct pvr_transfer_frag_store frag_store;
|
||||
|
||||
struct pvr_suballoc_bo *usc_eot_bos[PVR_TRANSFER_MAX_RENDER_TARGETS];
|
||||
unsigned usc_eot_usc_temps[PVR_TRANSFER_MAX_RENDER_TARGETS];
|
||||
|
||||
struct pvr_pds_upload pds_unitex_code[PVR_TRANSFER_MAX_TEXSTATE_DMA]
|
||||
[PVR_TRANSFER_MAX_UNIFORM_DMA];
|
||||
|
||||
@@ -1093,7 +1093,7 @@ static VkResult pvr_pbe_setup_emit(const struct pvr_transfer_cmd *transfer_cmd,
|
||||
|
||||
pvr_pds_setup_doutu(&program.task_control,
|
||||
addr.addr,
|
||||
0U,
|
||||
ctx->usc_eot_usc_temps[rt_count - 1U],
|
||||
ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
|
||||
false);
|
||||
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
#include "usc/programs/pvr_static_shaders.h"
|
||||
#include "pvr_tex_state.h"
|
||||
#include "pvr_types.h"
|
||||
#include "pvr_usc.h"
|
||||
#include "usc/pvr_uscgen.h"
|
||||
#include "util/bitscan.h"
|
||||
#include "util/macros.h"
|
||||
@@ -626,8 +627,9 @@ pvr_spm_init_eot_state(struct pvr_device *device,
|
||||
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
||||
uint32_t total_render_target_used = 0;
|
||||
struct pvr_pds_upload pds_eot_program;
|
||||
struct util_dynarray usc_shader_binary;
|
||||
struct pvr_eot_props props;
|
||||
uint32_t usc_temp_count;
|
||||
pco_shader *eot;
|
||||
VkResult result;
|
||||
|
||||
pvr_dev_addr_t next_scratch_buffer_addr =
|
||||
@@ -726,20 +728,23 @@ pvr_spm_init_eot_state(struct pvr_device *device,
|
||||
}
|
||||
}
|
||||
|
||||
pvr_uscgen_eot("SPM EOT",
|
||||
total_render_target_used,
|
||||
pbe_state_words[0],
|
||||
&usc_temp_count,
|
||||
&usc_shader_binary);
|
||||
props = (struct pvr_eot_props){
|
||||
.emit_count = total_render_target_used,
|
||||
.shared_words = false,
|
||||
.state_words = pbe_state_words[0],
|
||||
};
|
||||
|
||||
eot = pvr_usc_eot(device->pdevice->pco_ctx, &props);
|
||||
usc_temp_count = pco_shader_data(eot)->common.temps;
|
||||
|
||||
/* TODO: Create a #define in the compiler code to replace the 16. */
|
||||
result = pvr_gpu_upload_usc(device,
|
||||
usc_shader_binary.data,
|
||||
usc_shader_binary.size,
|
||||
pco_shader_binary_data(eot),
|
||||
pco_shader_binary_size(eot),
|
||||
16,
|
||||
&spm_eot_state->usc_eot_program);
|
||||
|
||||
util_dynarray_fini(&usc_shader_binary);
|
||||
ralloc_free(eot);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
@@ -77,7 +77,42 @@ pco_shader *pvr_usc_nop(pco_ctx *ctx, mesa_shader_stage stage)
|
||||
*/
|
||||
pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props)
|
||||
{
|
||||
UNREACHABLE("finishme: pvr_usc_eot");
|
||||
nir_builder b =
|
||||
nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
|
||||
pco_nir_options(),
|
||||
"eot%u.%s",
|
||||
props->emit_count,
|
||||
props->shared_words ? "sh" : "imm");
|
||||
|
||||
/* TODO: tile buffer support. */
|
||||
|
||||
nir_intrinsic_instr *last_emit = NULL;
|
||||
for (unsigned u = 0; u < props->emit_count; ++u) {
|
||||
if (u > 0)
|
||||
nir_wop_pco(&b);
|
||||
|
||||
nir_def *state0;
|
||||
nir_def *state1;
|
||||
if (props->shared_words) {
|
||||
state0 = nir_load_preamble(&b, 1, 32, .base = props->state_regs[u]);
|
||||
state1 =
|
||||
nir_load_preamble(&b, 1, 32, .base = props->state_regs[u] + 1);
|
||||
} else {
|
||||
unsigned state_off = u * ROGUE_NUM_PBESTATE_STATE_WORDS;
|
||||
state0 = nir_imm_int(&b, props->state_words[state_off]);
|
||||
state1 = nir_imm_int(&b, props->state_words[state_off + 1]);
|
||||
}
|
||||
|
||||
last_emit = nir_emitpix_pco(&b, state0, state1);
|
||||
}
|
||||
|
||||
assert(last_emit);
|
||||
nir_intrinsic_set_freep(last_emit, true);
|
||||
|
||||
/* Just return. */
|
||||
nir_jump(&b, nir_jump_return);
|
||||
|
||||
return build_shader(ctx, b.shader, &(pco_data){ 0 });
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -23,6 +23,13 @@ pco_shader *pvr_usc_nop(pco_ctx *ctx, mesa_shader_stage stage);
|
||||
|
||||
/* EOT shader generation. */
|
||||
struct pvr_eot_props {
|
||||
unsigned emit_count;
|
||||
|
||||
bool shared_words;
|
||||
union {
|
||||
const uint32_t *state_words;
|
||||
const unsigned *state_regs;
|
||||
};
|
||||
};
|
||||
|
||||
pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props);
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#if 0
|
||||
/* Expects emit_count ROGUE_NUM_PBESTATE_STATE_WORDS entries */
|
||||
void pvr_uscgen_eot(const char *name,
|
||||
uint32_t emit_count,
|
||||
@@ -72,6 +73,7 @@ void pvr_uscgen_eot(const char *name,
|
||||
|
||||
ralloc_free(shader);
|
||||
}
|
||||
#endif
|
||||
|
||||
void pvr_uscgen_nop(struct util_dynarray *binary)
|
||||
{
|
||||
|
||||
@@ -137,11 +137,13 @@ struct pvr_tq_frag_sh_reg_layout {
|
||||
|
||||
/* TODO: Shader caching (not pipeline caching) support. */
|
||||
|
||||
#if 0
|
||||
void pvr_uscgen_eot(const char *name,
|
||||
uint32_t emit_count,
|
||||
const uint32_t *emit_state,
|
||||
unsigned *temps_used,
|
||||
struct util_dynarray *binary);
|
||||
#endif
|
||||
|
||||
void pvr_uscgen_nop(struct util_dynarray *binary);
|
||||
|
||||
@@ -150,8 +152,10 @@ void pvr_uscgen_tq_frag(const struct pvr_tq_shader_properties *shader_props,
|
||||
unsigned *temps_used,
|
||||
struct util_dynarray *binary);
|
||||
|
||||
#if 0
|
||||
void pvr_uscgen_tq_eot(unsigned rt_count,
|
||||
const uint64_t *pbe_regs,
|
||||
struct util_dynarray *binary);
|
||||
#endif
|
||||
|
||||
#endif /* PVR_USCGEN_H */
|
||||
|
||||
@@ -207,6 +207,7 @@ void pvr_uscgen_tq_frag(const struct pvr_tq_shader_properties *shader_props,
|
||||
ralloc_free(shader);
|
||||
}
|
||||
|
||||
#if 0
|
||||
void pvr_uscgen_tq_eot(unsigned rt_count,
|
||||
const uint64_t *pbe_regs,
|
||||
struct util_dynarray *binary)
|
||||
@@ -240,3 +241,4 @@ void pvr_uscgen_tq_eot(unsigned rt_count,
|
||||
|
||||
ralloc_free(shader);
|
||||
}
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user