diff --git a/src/imagination/pco/pco_nir.c b/src/imagination/pco/pco_nir.c index 5e113b7cde5..6ee2a6848a1 100644 --- a/src/imagination/pco/pco_nir.c +++ b/src/imagination/pco/pco_nir.c @@ -871,9 +871,7 @@ void pco_lower_nir(pco_ctx *ctx, nir_shader *nir, pco_data *data) NIR_PASS(_, nir, pco_nir_lower_demote_samples); } - bool backup = nir->info.fs.uses_sample_shading; NIR_PASS(_, nir, nir_lower_blend, &data->fs.blend_opts); - nir->info.fs.uses_sample_shading = backup; nir_opt_peephole_select_options peep_opts = { .limit = 0, diff --git a/src/imagination/pco/pco_trans_nir.c b/src/imagination/pco/pco_trans_nir.c index 5eae18a6ec1..32b65e356dc 100644 --- a/src/imagination/pco/pco_trans_nir.c +++ b/src/imagination/pco/pco_trans_nir.c @@ -335,6 +335,43 @@ static inline pco_instr *build_itr(pco_builder *b, return instr; } +static pco_ref fs_is_single_sampled(trans_ctx *tctx) +{ + assert(tctx->stage == MESA_SHADER_FRAGMENT); + + /* n samples = ... + * 1 = 0b00000001 + * 2 = 0b00000011 + * 4 = 0b00001111 + * 8 = 0b11111111 + */ + pco_ref smp_rate_mask = pco_ref_new_ssa32(tctx->func); + pco_pck(&tctx->b, smp_rate_mask, pco_fone, .pck_fmt = PCO_PCK_FMT_COV); + + /* n samples = ... + * 1 = 0b00000000 + * 2 = 0b00000001 + * 4 = 0b00000111 + * 8 = 0b01111111 + */ + pco_ref smp_rate_mask_shr1 = pco_ref_new_ssa32(tctx->func); + pco_shift(&tctx->b, + smp_rate_mask_shr1, + smp_rate_mask, + pco_one, + pco_ref_null(), + .shiftop = PCO_SHIFTOP_SHR); + + pco_ref is_single_sampled = pco_ref_new_ssa32(tctx->func); + pco_tstz(&tctx->b, + is_single_sampled, + pco_ref_null(), + smp_rate_mask_shr1, + .tst_type_main = PCO_TST_TYPE_MAIN_U32); + + return is_single_sampled; +} + /** * \brief Translates a NIR fs load_input intrinsic into PCO. * @@ -412,18 +449,20 @@ static pco_instr *trans_load_input_fs(trans_ctx *tctx, /* Special case: x and y are loaded from special registers. */ switch (component) { case 0: /* x */ - return pco_mov(&tctx->b, - dest, - pco_ref_hwreg(fs_data->uses.sample_shading ? PCO_SR_X_S - : PCO_SR_X_P, - PCO_REG_CLASS_SPEC)); + case 1: /* y */ { + pco_ref xy_s[] = { pco_ref_hwreg(PCO_SR_X_S, PCO_REG_CLASS_SPEC), + pco_ref_hwreg(PCO_SR_Y_S, PCO_REG_CLASS_SPEC) }; + pco_ref xy_p[] = { pco_ref_hwreg(PCO_SR_X_P, PCO_REG_CLASS_SPEC), + pco_ref_hwreg(PCO_SR_Y_P, PCO_REG_CLASS_SPEC) }; - case 1: /* y */ - return pco_mov(&tctx->b, - dest, - pco_ref_hwreg(fs_data->uses.sample_shading ? PCO_SR_Y_S - : PCO_SR_Y_P, - PCO_REG_CLASS_SPEC)); + return pco_csel(&tctx->b, + dest, + fs_is_single_sampled(tctx), + xy_p[component], + xy_s[component], + .tst_op_main = PCO_TST_OP_MAIN_GZERO, + .tst_type_main = PCO_TST_TYPE_MAIN_U32); + } case 2: assert(fs_data->uses.z); diff --git a/src/imagination/vulkan/pds/pvr_pds.c b/src/imagination/vulkan/pds/pvr_pds.c index f5aa39edb6e..19c2e37823f 100644 --- a/src/imagination/vulkan/pds/pvr_pds.c +++ b/src/imagination/vulkan/pds/pvr_pds.c @@ -3772,6 +3772,7 @@ uint32_t *pvr_pds_kick_usc(struct pvr_pds_kickusc_program *restrict program, /* Copy the USC task control words to constants. */ constant = pvr_pds_get_constants(&next_constant, 2, &dummy_count); + program->doutu_offset = constant; pvr_pds_write_wide_constant(constants, constant + 0, diff --git a/src/imagination/vulkan/pds/pvr_pds.h b/src/imagination/vulkan/pds/pvr_pds.h index 40006f75b49..3d9fac760e6 100644 --- a/src/imagination/vulkan/pds/pvr_pds.h +++ b/src/imagination/vulkan/pds/pvr_pds.h @@ -237,6 +237,8 @@ struct pvr_pds_kickusc_program { uint32_t *data_segment; struct pvr_pds_usc_task_control usc_task_control; + uint32_t doutu_offset; + uint32_t data_size; uint32_t code_size; }; diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index 325581e36a8..5919e7962d9 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -5310,6 +5310,53 @@ pvr_setup_triangle_merging_flag(struct pvr_cmd_buffer *const cmd_buffer, } } +static VkResult +setup_pds_fragment_program(struct pvr_cmd_buffer *const cmd_buffer, + struct pvr_pds_upload *pds_fragment_program) +{ + struct pvr_cmd_buffer_state *const state = &cmd_buffer->state; + const struct pvr_fragment_shader_state *const fragment_shader_state = + &state->gfx_pipeline->shader_state.fragment; + const struct vk_dynamic_graphics_state *const dynamic_state = + &cmd_buffer->vk.dynamic_graphics_state; + const struct pvr_pds_kickusc_program *program = + &fragment_shader_state->pds_fragment_program; + uint32_t *pds_fragment_program_buffer = + fragment_shader_state->pds_fragment_program_buffer; + + memset(pds_fragment_program, 0, sizeof(*pds_fragment_program)); + + if (!pds_fragment_program_buffer) + return VK_SUCCESS; + + struct ROGUE_PDSINST_DOUTU_SRC0 doutu_src; + ROGUE_PDSINST_DOUTU_SRC0_unpack( + &pds_fragment_program_buffer[program->doutu_offset], + &doutu_src); + + /* TODO: VkPipelineMultisampleStateCreateInfo.sampleShadingEnable? */ + doutu_src.sample_rate = dynamic_state->ms.rasterization_samples > + VK_SAMPLE_COUNT_1_BIT + ? ROGUE_PDSINST_DOUTU_SAMPLE_RATE_FULL + : ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE; + + ROGUE_PDSINST_DOUTU_SRC0_pack( + &pds_fragment_program_buffer[program->doutu_offset], + &doutu_src); + + /* FIXME: Figure out the define for alignment of 16. */ + return pvr_cmd_buffer_upload_pds( + cmd_buffer, + &pds_fragment_program_buffer[0], + program->data_size, + 16, + &pds_fragment_program_buffer[program->data_size], + program->code_size, + 16, + 16, + pds_fragment_program); +} + static VkResult setup_pds_coeff_program(struct pvr_cmd_buffer *const cmd_buffer, struct pvr_pds_upload *pds_coeff_program) @@ -5391,9 +5438,14 @@ pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer, &fragment_shader_state->descriptor_state; const struct pvr_pipeline_stage_state *fragment_state = &fragment_shader_state->stage_state; + struct pvr_pds_upload pds_fragment_program; struct pvr_pds_upload pds_coeff_program; VkResult result; + result = setup_pds_fragment_program(cmd_buffer, &pds_fragment_program); + if (result != VK_SUCCESS) + return result; + result = setup_pds_coeff_program(cmd_buffer, &pds_coeff_program); if (result != VK_SUCCESS) return result; @@ -5438,10 +5490,7 @@ pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer, pvr_csb_pack (&ppp_state->pds.pixel_shader_base, TA_STATE_PDS_SHADERBASE, shader_base) { - const struct pvr_pds_upload *const pds_upload = - &fragment_shader_state->pds_fragment_program; - - shader_base.addr = PVR_DEV_ADDR(pds_upload->data_offset); + shader_base.addr = PVR_DEV_ADDR(pds_fragment_program.data_offset); } if (descriptor_shader_state->pds_code.pvr_bo) { @@ -5949,7 +5998,8 @@ static inline bool pvr_ppp_dynamic_state_isp_faces_and_control_dirty( BITSET_TEST(dynamic_dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE) || BITSET_TEST(dynamic_dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH) || BITSET_TEST(dynamic_dirty, - MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE); + MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE) || + BITSET_TEST(dynamic_dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES); } static inline bool diff --git a/src/imagination/vulkan/pvr_pass.c b/src/imagination/vulkan/pvr_pass.c index f64467b3f95..7464a25f52a 100644 --- a/src/imagination/vulkan/pvr_pass.c +++ b/src/imagination/vulkan/pvr_pass.c @@ -179,6 +179,72 @@ VkResult pvr_pds_unitex_state_program_create_and_upload( return VK_SUCCESS; } +static VkResult pvr_pds_fragment_program_create_and_upload( + struct pvr_device *device, + const VkAllocationCallbacks *allocator, + pco_shader *fs, + struct pvr_suballoc_bo *shader_bo, + struct pvr_pds_upload *pds_frag_prog, + bool msaa) +{ + struct pvr_pds_kickusc_program program = { 0 }; + pco_data *fs_data = pco_shader_data(fs); + uint32_t staging_buffer_size; + uint32_t *staging_buffer; + VkResult result; + + const pvr_dev_addr_t exec_addr = + PVR_DEV_ADDR_OFFSET(shader_bo->dev_addr, fs_data->common.entry_offset); + + /* Note this is not strictly required to be done before calculating the + * staging_buffer_size in this particular case. It can also be done after + * allocating the buffer. The size from pvr_pds_kick_usc() is constant. + */ + pvr_pds_setup_doutu(&program.usc_task_control, + exec_addr.addr, + fs_data->common.temps, + msaa ? ROGUE_PDSINST_DOUTU_SAMPLE_RATE_FULL + : ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE, + fs_data->fs.uses.phase_change); + + pvr_pds_kick_usc(&program, NULL, 0, false, PDS_GENERATE_SIZES); + + staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size); + + staging_buffer = vk_alloc2(&device->vk.alloc, + allocator, + staging_buffer_size, + 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!staging_buffer) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + pvr_pds_kick_usc(&program, + staging_buffer, + 0, + false, + PDS_GENERATE_CODEDATA_SEGMENTS); + + /* FIXME: Figure out the define for alignment of 16. */ + result = pvr_gpu_upload_pds(device, + &staging_buffer[0], + program.data_size, + 16, + &staging_buffer[program.data_size], + program.code_size, + 16, + 16, + pds_frag_prog); + if (result != VK_SUCCESS) { + vk_free2(&device->vk.alloc, allocator, staging_buffer); + return result; + } + + vk_free2(&device->vk.alloc, allocator, staging_buffer); + + return VK_SUCCESS; +} + static VkResult pvr_load_op_shader_generate(struct pvr_device *device, const VkAllocationCallbacks *allocator, @@ -203,25 +269,17 @@ pvr_load_op_shader_generate(struct pvr_device *device, const bool msaa = load_op->clears_loads_state.unresolved_msaa_mask & load_op->clears_loads_state.rt_load_mask; - /* TODO: amend this once the hardcoded shaders have been removed. */ - struct pvr_fragment_shader_state fragment_state = { - .shader_bo = load_op->usc_frag_prog_bo, - .sample_rate = msaa ? ROGUE_PDSINST_DOUTU_SAMPLE_RATE_FULL - : ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE, - .pds_fragment_program = load_op->pds_frag_prog, - }; - - result = pvr_pds_fragment_program_create_and_upload(device, - allocator, - loadop, - &fragment_state); + result = + pvr_pds_fragment_program_create_and_upload(device, + allocator, + loadop, + load_op->usc_frag_prog_bo, + &load_op->pds_frag_prog, + msaa); load_op->temps_count = pco_shader_data(loadop)->common.temps; ralloc_free(loadop); - load_op->usc_frag_prog_bo = fragment_state.shader_bo; - load_op->pds_frag_prog = fragment_state.pds_fragment_program; - if (result != VK_SUCCESS) goto err_free_usc_frag_prog_bo; diff --git a/src/imagination/vulkan/pvr_pipeline.c b/src/imagination/vulkan/pvr_pipeline.c index 720ea18837a..e7cd5b8504d 100644 --- a/src/imagination/vulkan/pvr_pipeline.c +++ b/src/imagination/vulkan/pvr_pipeline.c @@ -110,53 +110,37 @@ static VkResult pvr_pds_coeff_program_create_and_upload( return VK_SUCCESS; } -/* FIXME: move this elsewhere since it's also called in pvr_pass.c? */ /* If allocator == NULL, the internal one will be used. */ -VkResult pvr_pds_fragment_program_create_and_upload( +static VkResult pvr_pds_fragment_program_create( struct pvr_device *device, const VkAllocationCallbacks *allocator, pco_shader *fs, struct pvr_fragment_shader_state *fragment_state) { - /* TODO: remove the below + revert the pvr_pds_setup_doutu - * args and make sure fs isn't NULL instead; - * temporarily in place for hardcoded load ops in - * pvr_pass.c:pvr_generate_load_op_shader() - */ - unsigned temps = 0; - bool has_phase_rate_change = false; - unsigned entry_offset = 0; - - if (fs) { - pco_data *fs_data = pco_shader_data(fs); - temps = fs_data->common.temps; - has_phase_rate_change = fs_data->fs.uses.phase_change; - entry_offset = fs_data->common.entry_offset; - } - - struct pvr_pds_kickusc_program program = { 0 }; + struct pvr_pds_kickusc_program *program = + &fragment_state->pds_fragment_program; + pco_data *fs_data = pco_shader_data(fs); uint32_t staging_buffer_size; uint32_t *staging_buffer; - VkResult result; const pvr_dev_addr_t exec_addr = PVR_DEV_ADDR_OFFSET(fragment_state->shader_bo->dev_addr, - /* fs_data->common.entry_offset */ entry_offset); + fs_data->common.entry_offset); /* Note this is not strictly required to be done before calculating the * staging_buffer_size in this particular case. It can also be done after * allocating the buffer. The size from pvr_pds_kick_usc() is constant. */ - pvr_pds_setup_doutu( - &program.usc_task_control, - exec_addr.addr, - /* fs_data->common.temps */ temps, - fragment_state->sample_rate, - /* fs_data->fs.uses.phase_change */ has_phase_rate_change); + pvr_pds_setup_doutu(&program->usc_task_control, + exec_addr.addr, + fs_data->common.temps, + ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE, + fs_data->fs.uses.phase_change); - pvr_pds_kick_usc(&program, NULL, 0, false, PDS_GENERATE_SIZES); + pvr_pds_kick_usc(program, NULL, 0, false, PDS_GENERATE_SIZES); - staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size); + staging_buffer_size = + PVR_DW_TO_BYTES(program->code_size + program->data_size); staging_buffer = vk_alloc2(&device->vk.alloc, allocator, @@ -166,28 +150,13 @@ VkResult pvr_pds_fragment_program_create_and_upload( if (!staging_buffer) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - pvr_pds_kick_usc(&program, + pvr_pds_kick_usc(program, staging_buffer, 0, false, PDS_GENERATE_CODEDATA_SEGMENTS); - /* FIXME: Figure out the define for alignment of 16. */ - result = pvr_gpu_upload_pds(device, - &staging_buffer[0], - program.data_size, - 16, - &staging_buffer[program.data_size], - program.code_size, - 16, - 16, - &fragment_state->pds_fragment_program); - if (result != VK_SUCCESS) { - vk_free2(&device->vk.alloc, allocator, staging_buffer); - return result; - } - - vk_free2(&device->vk.alloc, allocator, staging_buffer); + fragment_state->pds_fragment_program_buffer = staging_buffer; return VK_SUCCESS; } @@ -1251,8 +1220,9 @@ pvr_graphics_pipeline_destroy(struct pvr_device *const device, pvr_pds_vertex_attrib_program_destroy(device, allocator, attrib_program); } - pvr_bo_suballoc_free( - gfx_pipeline->shader_state.fragment.pds_fragment_program.pvr_bo); + vk_free2(&device->vk.alloc, + allocator, + fragment_state->pds_fragment_program_buffer); vk_free2(&device->vk.alloc, allocator, fragment_state->pds_coeff_program_buffer); @@ -1295,6 +1265,7 @@ static void pvr_fragment_state_save(struct pvr_graphics_pipeline *gfx_pipeline, /* TODO: add selection for other values of pass type and sample rate. */ + /* TODO: do this dynamically as well */ if (shader_data->fs.uses.depth_feedback && !shader_data->fs.uses.early_frag) fragment_state->pass_type = ROGUE_TA_PASSTYPE_DEPTH_FEEDBACK; else if (shader_data->fs.uses.discard) @@ -1304,13 +1275,6 @@ static void pvr_fragment_state_save(struct pvr_graphics_pipeline *gfx_pipeline, else fragment_state->pass_type = ROGUE_TA_PASSTYPE_OPAQUE; - fragment_state->sample_rate = ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE; - if (shader_data->fs.uses.sample_shading || - gfx_pipeline->dynamic_state.ms.rasterization_samples > - VK_SAMPLE_COUNT_1_BIT) { - fragment_state->sample_rate = ROGUE_PDSINST_DOUTU_SAMPLE_RATE_FULL; - } - /* We can't initialize it yet since we still need to generate the PDS * programs so set it to `~0` to make sure that we set this up later on. */ @@ -2531,9 +2495,6 @@ pvr_preprocess_shader_data(pco_data *data, data->fs.meta_present.sample_mask = true; } - data->fs.rasterization_samples = state->ms->rasterization_samples; - nir->info.fs.uses_sample_shading = state->ms->rasterization_samples > - VK_SAMPLE_COUNT_1_BIT; if (BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) || (state->cb && state->cb->color_write_enables != BITFIELD_MASK(MESA_VK_MAX_COLOR_ATTACHMENTS))) { @@ -2760,10 +2721,10 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device, if (result != VK_SUCCESS) goto err_free_fragment_bo; - result = pvr_pds_fragment_program_create_and_upload(device, - allocator, - *fs, - fragment_state); + result = pvr_pds_fragment_program_create(device, + allocator, + *fs, + fragment_state); if (result != VK_SUCCESS) goto err_free_coeff_program; @@ -2827,7 +2788,9 @@ err_free_frag_descriptor_program: allocator, &fragment_state->descriptor_state); err_free_frag_program: - pvr_bo_suballoc_free(fragment_state->pds_fragment_program.pvr_bo); + vk_free2(&device->vk.alloc, + allocator, + fragment_state->pds_fragment_program_buffer); err_free_coeff_program: vk_free2(&device->vk.alloc, allocator, diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index 8e9315ba236..a8f0ace6c7d 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -894,12 +894,12 @@ struct pvr_fragment_shader_state { /* FIXME: Move this into stage_state? */ struct pvr_stage_allocation_descriptor_state descriptor_state; enum ROGUE_TA_PASSTYPE pass_type; - enum ROGUE_PDSINST_DOUTU_SAMPLE_RATE sample_rate; struct pvr_pds_coeff_loading_program pds_coeff_program; uint32_t *pds_coeff_program_buffer; - struct pvr_pds_upload pds_fragment_program; + struct pvr_pds_kickusc_program pds_fragment_program; + uint32_t *pds_fragment_program_buffer; }; struct pvr_pipeline { @@ -1366,12 +1366,6 @@ pvr_cmd_buffer_set_error_unwarned(struct pvr_cmd_buffer *cmd_buffer, return error; } -VkResult pvr_pds_fragment_program_create_and_upload( - struct pvr_device *device, - const VkAllocationCallbacks *allocator, - pco_shader *fs, - struct pvr_fragment_shader_state *fragment_state); - VkResult pvr_pds_unitex_state_program_create_and_upload( struct pvr_device *device, const VkAllocationCallbacks *allocator,