From eb9069c31468504a0f3defe9ebf976358b0697fc Mon Sep 17 00:00:00 2001 From: Simon Perretta Date: Tue, 8 Apr 2025 16:23:06 +0100 Subject: [PATCH] pvr, pco: basic write without format support Signed-off-by: Simon Perretta Acked-by: Erik Faye-Lund Part-of: --- src/imagination/pco/pco_data.h | 2 +- src/imagination/pco/pco_nir_tex.c | 314 +++++++++++++++---------- src/imagination/vulkan/pvr_device.c | 4 +- src/imagination/vulkan/pvr_tex_state.c | 91 ++++++- 4 files changed, 288 insertions(+), 123 deletions(-) diff --git a/src/imagination/pco/pco_data.h b/src/imagination/pco/pco_data.h index f3272b91f1c..e694242bb7f 100644 --- a/src/imagination/pco/pco_data.h +++ b/src/imagination/pco/pco_data.h @@ -138,7 +138,7 @@ enum pco_image_meta { PCO_IMAGE_META_LAYER_SIZE, PCO_IMAGE_META_BUFFER_ELEMS, PCO_IMAGE_META_Z_SLICE, - PCO_IMAGE_META_RSVD0, + PCO_IMAGE_META_PCK_INFO, PCO_IMAGE_META_COUNT, }; diff --git a/src/imagination/pco/pco_nir_tex.c b/src/imagination/pco/pco_nir_tex.c index 2ba6d6ee970..4fc90ab7082 100644 --- a/src/imagination/pco/pco_nir_tex.c +++ b/src/imagination/pco/pco_nir_tex.c @@ -928,147 +928,223 @@ static nir_def *lower_image(nir_builder *b, nir_instr *instr, void *cb_data) assert(intr->num_components == 4); assert(write_data->num_components == 4); - /* TODO: formatless write support */ - assert(format != PIPE_FORMAT_NONE); + if (format != PIPE_FORMAT_NONE) { + const struct util_format_description *desc = + util_format_description(format); - const struct util_format_description *desc = - util_format_description(format); + enum pipe_format data_format = + nir_type_to_pipe_format(type, desc->nr_channels); - enum pipe_format data_format = - nir_type_to_pipe_format(type, desc->nr_channels); + if (format != data_format) { + enum pco_pck_format pck_format = ~0; + bool scale = false; + bool roundzero = false; + bool split = false; - if (format != data_format) { - enum pco_pck_format pck_format = ~0; - bool scale = false; - bool roundzero = false; - bool split = false; + switch (format) { + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + pck_format = PCO_PCK_FORMAT_U8888; + scale = true; + break; - switch (format) { - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - pck_format = PCO_PCK_FORMAT_U8888; - scale = true; - break; + case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R8G8B8_SNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + pck_format = PCO_PCK_FORMAT_S8888; + scale = true; + break; - case PIPE_FORMAT_R8_SNORM: - case PIPE_FORMAT_R8G8_SNORM: - case PIPE_FORMAT_R8G8B8_SNORM: - case PIPE_FORMAT_R8G8B8A8_SNORM: - pck_format = PCO_PCK_FORMAT_S8888; - scale = true; - break; + case PIPE_FORMAT_R11G11B10_FLOAT: + pck_format = PCO_PCK_FORMAT_F111110; + break; - case PIPE_FORMAT_R11G11B10_FLOAT: - pck_format = PCO_PCK_FORMAT_F111110; - break; + case PIPE_FORMAT_R10G10B10A2_UNORM: + pck_format = PCO_PCK_FORMAT_U1010102; + scale = true; + break; - case PIPE_FORMAT_R10G10B10A2_UNORM: - pck_format = PCO_PCK_FORMAT_U1010102; - scale = true; - break; + case PIPE_FORMAT_R10G10B10A2_SNORM: + pck_format = PCO_PCK_FORMAT_S1010102; + scale = true; + break; - case PIPE_FORMAT_R10G10B10A2_SNORM: - pck_format = PCO_PCK_FORMAT_S1010102; - scale = true; - break; + case PIPE_FORMAT_R16_FLOAT: + case PIPE_FORMAT_R16G16_FLOAT: + case PIPE_FORMAT_R16G16B16_FLOAT: + case PIPE_FORMAT_R16G16B16A16_FLOAT: + pck_format = PCO_PCK_FORMAT_F16F16; + split = true; + break; - case PIPE_FORMAT_R16_FLOAT: - case PIPE_FORMAT_R16G16_FLOAT: - case PIPE_FORMAT_R16G16B16_FLOAT: - case PIPE_FORMAT_R16G16B16A16_FLOAT: - pck_format = PCO_PCK_FORMAT_F16F16; - split = true; - break; + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16G16_UNORM: + case PIPE_FORMAT_R16G16B16_UNORM: + case PIPE_FORMAT_R16G16B16A16_UNORM: + pck_format = PCO_PCK_FORMAT_U1616; + scale = true; + split = true; + break; - case PIPE_FORMAT_R16_UNORM: - case PIPE_FORMAT_R16G16_UNORM: - case PIPE_FORMAT_R16G16B16_UNORM: - case PIPE_FORMAT_R16G16B16A16_UNORM: - pck_format = PCO_PCK_FORMAT_U1616; - scale = true; - split = true; - break; + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16G16B16_SNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + pck_format = PCO_PCK_FORMAT_S1616; + scale = true; + split = true; + break; - case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_R16G16_SNORM: - case PIPE_FORMAT_R16G16B16_SNORM: - case PIPE_FORMAT_R16G16B16A16_SNORM: - pck_format = PCO_PCK_FORMAT_S1616; - scale = true; - split = true; - break; + case PIPE_FORMAT_R8_UINT: + case PIPE_FORMAT_R8G8_UINT: + case PIPE_FORMAT_R8G8B8_UINT: + case PIPE_FORMAT_R8G8B8A8_UINT: - case PIPE_FORMAT_R8_UINT: - case PIPE_FORMAT_R8G8_UINT: - case PIPE_FORMAT_R8G8B8_UINT: - case PIPE_FORMAT_R8G8B8A8_UINT: + case PIPE_FORMAT_R8_SINT: + case PIPE_FORMAT_R8G8_SINT: + case PIPE_FORMAT_R8G8B8_SINT: + case PIPE_FORMAT_R8G8B8A8_SINT: - case PIPE_FORMAT_R8_SINT: - case PIPE_FORMAT_R8G8_SINT: - case PIPE_FORMAT_R8G8B8_SINT: - case PIPE_FORMAT_R8G8B8A8_SINT: + case PIPE_FORMAT_R10G10B10A2_UINT: + case PIPE_FORMAT_R10G10B10A2_SINT: - case PIPE_FORMAT_R10G10B10A2_UINT: - case PIPE_FORMAT_R10G10B10A2_SINT: + case PIPE_FORMAT_R16_UINT: + case PIPE_FORMAT_R16G16_UINT: + case PIPE_FORMAT_R16G16B16_UINT: + case PIPE_FORMAT_R16G16B16A16_UINT: - case PIPE_FORMAT_R16_UINT: - case PIPE_FORMAT_R16G16_UINT: - case PIPE_FORMAT_R16G16B16_UINT: - case PIPE_FORMAT_R16G16B16A16_UINT: + case PIPE_FORMAT_R16_SINT: + case PIPE_FORMAT_R16G16_SINT: + case PIPE_FORMAT_R16G16B16_SINT: + case PIPE_FORMAT_R16G16B16A16_SINT: - case PIPE_FORMAT_R16_SINT: - case PIPE_FORMAT_R16G16_SINT: - case PIPE_FORMAT_R16G16B16_SINT: - case PIPE_FORMAT_R16G16B16A16_SINT: + case PIPE_FORMAT_R32_UINT: + case PIPE_FORMAT_R32G32_UINT: + case PIPE_FORMAT_R32G32B32_UINT: + case PIPE_FORMAT_R32G32B32A32_UINT: - case PIPE_FORMAT_R32_UINT: - case PIPE_FORMAT_R32G32_UINT: - case PIPE_FORMAT_R32G32B32_UINT: - case PIPE_FORMAT_R32G32B32A32_UINT: + case PIPE_FORMAT_R32_SINT: + case PIPE_FORMAT_R32G32_SINT: + case PIPE_FORMAT_R32G32B32_SINT: + case PIPE_FORMAT_R32G32B32A32_SINT: + /* No conversion needed. */ + break; - case PIPE_FORMAT_R32_SINT: - case PIPE_FORMAT_R32G32_SINT: - case PIPE_FORMAT_R32G32B32_SINT: - case PIPE_FORMAT_R32G32B32A32_SINT: - /* No conversion needed. */ - break; + default: + printf("Unsupported image write pack format %s.\n", + util_format_name(format)); + UNREACHABLE(""); + } - default: - printf("Unsupported image write pack format %s.\n", - util_format_name(format)); - UNREACHABLE(""); - } + if (pck_format != ~0) { + if (split) { + nir_def *lower = + nir_pck_prog_pco(b, + nir_channels(b, write_data, 0b0011), + nir_imm_int(b, pck_format), + .scale = scale, + .roundzero = roundzero); + nir_def *upper = + nir_pck_prog_pco(b, + nir_channels(b, write_data, 0b1100), + nir_imm_int(b, pck_format), + .scale = scale, + .roundzero = roundzero); - if (pck_format != ~0) { - if (split) { - nir_def *lower = - nir_pck_prog_pco(b, - nir_channels(b, write_data, 0b0011), - nir_imm_int(b, pck_format), - .scale = scale, - .roundzero = roundzero); - nir_def *upper = - nir_pck_prog_pco(b, - nir_channels(b, write_data, 0b1100), - nir_imm_int(b, pck_format), - .scale = scale, - .roundzero = roundzero); - - write_data = nir_vec4(b, - nir_channel(b, lower, 0), - nir_channel(b, lower, 1), - nir_channel(b, upper, 0), - nir_channel(b, upper, 1)); - } else { - write_data = nir_pck_prog_pco(b, - write_data, - nir_imm_int(b, pck_format), - .scale = scale, - .roundzero = roundzero); + write_data = nir_vec4(b, + nir_channel(b, lower, 0), + nir_channel(b, lower, 1), + nir_channel(b, upper, 0), + nir_channel(b, upper, 1)); + } else { + write_data = nir_pck_prog_pco(b, + write_data, + nir_imm_int(b, pck_format), + .scale = scale, + .roundzero = roundzero); + } } } + } else { + /* Formatless write. */ + nir_def *tex_meta = nir_load_tex_meta_pco(b, + PCO_IMAGE_META_COUNT, + elem, + .desc_set = desc_set, + .binding = binding); + + nir_def *pck_info = nir_channel(b, tex_meta, PCO_IMAGE_META_PCK_INFO); + nir_def *pck_skip = nir_ieq_imm(b, pck_info, 0xffffffff); + nir_def *pck_format = nir_ubitfield_extract_imm(b, pck_info, 0, 5); + nir_def *pck_split = nir_ubitfield_extract_imm(b, pck_info, 5, 1); + pck_split = nir_ine_imm(b, pck_split, 0); + nir_def *pck_scale = nir_ubitfield_extract_imm(b, pck_info, 6, 1); + pck_scale = nir_ine_imm(b, pck_scale, 0); + /* nir_def *pck_roundzero = nir_ubitfield_extract_imm(b, pck_info, 7, + * 1); */ + /* pck_roundzero = nir_ine_imm(b, pck_roundzero, 0); */ + + /* TODO: ideally would like for only 4 regs to be used, since only one + * of these code paths is going to be taken... look into conditional + * execution instead + */ + /* TODO: probably nicest to do this in uscgen? */ + + nir_def *write_data_scale = + nir_pck_prog_pco(b, write_data, pck_format, .scale = true); + nir_def *write_data_noscale = + nir_pck_prog_pco(b, write_data, pck_format, .scale = false); + + nir_def *split_lower_scale = + nir_pck_prog_pco(b, + nir_channels(b, write_data, 0b0011), + pck_format, + .scale = true); + nir_def *split_upper_scale = + nir_pck_prog_pco(b, + nir_channels(b, write_data, 0b1100), + pck_format, + .scale = true); + nir_def *write_data_split_scale = + nir_vec4(b, + nir_channel(b, split_lower_scale, 0), + nir_channel(b, split_lower_scale, 1), + nir_channel(b, split_upper_scale, 0), + nir_channel(b, split_upper_scale, 1)); + + nir_def *split_lower_noscale = + nir_pck_prog_pco(b, + nir_channels(b, write_data, 0b0011), + pck_format, + .scale = false); + nir_def *split_upper_noscale = + nir_pck_prog_pco(b, + nir_channels(b, write_data, 0b1100), + pck_format, + .scale = false); + nir_def *write_data_split_noscale = + nir_vec4(b, + nir_channel(b, split_lower_noscale, 0), + nir_channel(b, split_lower_noscale, 1), + nir_channel(b, split_upper_noscale, 0), + nir_channel(b, split_upper_noscale, 1)); + + nir_def *write_data_split = nir_bcsel(b, + pck_scale, + write_data_split_scale, + write_data_split_noscale); + + nir_def *write_data_unsplit = + nir_bcsel(b, pck_scale, write_data_scale, write_data_noscale); + + write_data = nir_bcsel( + b, + pck_skip, + write_data, + nir_bcsel(b, pck_split, write_data_split, write_data_unsplit)); } } diff --git a/src/imagination/vulkan/pvr_device.c b/src/imagination/vulkan/pvr_device.c index cfc796c1fd5..8339016ddbb 100644 --- a/src/imagination/vulkan/pvr_device.c +++ b/src/imagination/vulkan/pvr_device.c @@ -243,8 +243,8 @@ static void pvr_physical_device_get_supported_features( .shaderImageGatherExtended = false, .shaderStorageImageExtendedFormats = false, .shaderStorageImageMultisample = false, - .shaderStorageImageReadWithoutFormat = false, - .shaderStorageImageWriteWithoutFormat = false, + .shaderStorageImageReadWithoutFormat = true, + .shaderStorageImageWriteWithoutFormat = true, .shaderUniformBufferArrayDynamicIndexing = false, .shaderSampledImageArrayDynamicIndexing = false, .shaderStorageBufferArrayDynamicIndexing = false, diff --git a/src/imagination/vulkan/pvr_tex_state.c b/src/imagination/vulkan/pvr_tex_state.c index 24e9b73d39e..ed1a827ccf4 100644 --- a/src/imagination/vulkan/pvr_tex_state.c +++ b/src/imagination/vulkan/pvr_tex_state.c @@ -25,6 +25,7 @@ #include #include "hwdef/rogue_hw_defs.h" +#include "pco/pco_common.h" #include "pvr_csb.h" #include "pvr_device_info.h" #include "pvr_formats.h" @@ -61,6 +62,94 @@ static enum ROGUE_TEXSTATE_SWIZ pvr_get_hw_swizzle(VkComponentSwizzle comp, }; } +static uint32_t setup_pck_info(VkFormat vk_format) +{ + /* TODO NEXT: commonize this.*/ + enum pipe_format format = vk_format_to_pipe_format(vk_format); + enum pco_pck_format pck_format = ~0; + bool scale = false; + bool roundzero = false; + bool split = false; + + switch (format) { + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + pck_format = PCO_PCK_FORMAT_U8888; + scale = true; + break; + + case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R8G8B8_SNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + pck_format = PCO_PCK_FORMAT_S8888; + scale = true; + break; + + case PIPE_FORMAT_R11G11B10_FLOAT: + pck_format = PCO_PCK_FORMAT_F111110; + break; + + /* TODO: better way to do the 1x2 component. */ + case PIPE_FORMAT_R10G10B10A2_UNORM: + pck_format = PCO_PCK_FORMAT_U1010102; + scale = true; + break; + + /* TODO: better way to do the 1x2 component. */ + case PIPE_FORMAT_R10G10B10A2_SNORM: + pck_format = PCO_PCK_FORMAT_S1010102; + scale = true; + break; + + case PIPE_FORMAT_R16_FLOAT: + case PIPE_FORMAT_R16G16_FLOAT: + case PIPE_FORMAT_R16G16B16_FLOAT: + case PIPE_FORMAT_R16G16B16A16_FLOAT: + pck_format = PCO_PCK_FORMAT_F16F16; + split = true; + break; + + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16G16_UNORM: + case PIPE_FORMAT_R16G16B16_UNORM: + case PIPE_FORMAT_R16G16B16A16_UNORM: + pck_format = PCO_PCK_FORMAT_U1616; + scale = true; + split = true; + break; + + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16G16B16_SNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + pck_format = PCO_PCK_FORMAT_S1616; + scale = true; + split = true; + break; + + default: + break; + } + + if (pck_format == ~0) + return pck_format; + + uint32_t pck_info = pck_format; + if (split) + pck_info |= BITFIELD_BIT(5); + + if (scale) + pck_info |= BITFIELD_BIT(6); + + if (roundzero) + pck_info |= BITFIELD_BIT(7); + + return pck_info; +} + VkResult pvr_pack_tex_state(struct pvr_device *device, const struct pvr_texture_state_info *info, struct pvr_image_descriptor *state) @@ -253,7 +342,7 @@ VkResult pvr_pack_tex_state(struct pvr_device *device, state->meta[PCO_IMAGE_META_LAYER_SIZE] = info->layer_size; state->meta[PCO_IMAGE_META_BUFFER_ELEMS] = info->buffer_elems; state->meta[PCO_IMAGE_META_Z_SLICE] = info->z_slice; - state->meta[PCO_IMAGE_META_RSVD0] = 0; + state->meta[PCO_IMAGE_META_PCK_INFO] = setup_pck_info(info->format); return VK_SUCCESS; }