radv: use new common helpers for building buffer descriptor

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29268>
This commit is contained in:
Samuel Pitoiset
2024-05-16 17:50:57 +02:00
committed by Marge Bot
parent d3b01fd95e
commit 074f3cfe73
6 changed files with 49 additions and 152 deletions
@@ -3,6 +3,7 @@
*
* SPDX-License-Identifier: MIT
*/
#include "ac_descriptors.h"
#include "ac_shader_util.h"
#include "nir.h"
#include "nir_builder.h"
@@ -138,20 +139,11 @@ visit_load_vulkan_descriptor(nir_builder *b, apply_layout_state *state, nir_intr
static nir_def *
load_inline_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_def *rsrc)
{
uint32_t desc_type = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (state->gfx_level >= GFX11) {
desc_type |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (state->gfx_level >= GFX10) {
desc_type |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
} else {
desc_type |=
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
}
uint32_t desc[4];
return nir_vec4(b, rsrc, nir_imm_int(b, S_008F04_BASE_ADDRESS_HI(state->address32_hi)), nir_imm_int(b, 0xffffffff),
nir_imm_int(b, desc_type));
ac_build_raw_buffer_descriptor(state->gfx_level, (uint64_t)state->address32_hi << 32, 0xffffffff, desc);
return nir_vec4(b, rsrc, nir_imm_int(b, desc[1]), nir_imm_int(b, desc[2]), nir_imm_int(b, desc[3]));
}
static nir_def *
+15 -46
View File
@@ -26,13 +26,9 @@ radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFor
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct util_format_description *desc;
unsigned stride;
unsigned num_format, data_format;
int first_non_void;
enum pipe_swizzle swizzle[4];
unsigned rsrc_word3;
desc = vk_format_description(vk_format);
first_non_void = vk_format_get_first_non_void_channel(vk_format);
stride = desc->block.bits / 8;
radv_compose_swizzle(desc, NULL, swizzle);
@@ -43,49 +39,22 @@ radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFor
range /= stride;
}
rsrc_word3 = S_008F0C_DST_SEL_X(ac_map_swizzle(swizzle[0])) | S_008F0C_DST_SEL_Y(ac_map_swizzle(swizzle[1])) |
S_008F0C_DST_SEL_Z(ac_map_swizzle(swizzle[2])) | S_008F0C_DST_SEL_W(ac_map_swizzle(swizzle[3]));
const struct ac_buffer_state ac_state = {
.va = va,
.size = range,
.format = vk_format_to_pipe_format(vk_format),
.swizzle =
{
swizzle[0],
swizzle[1],
swizzle[2],
swizzle[3],
},
.stride = stride,
.gfx10_oob_select = V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET,
};
if (pdev->info.gfx_level >= GFX10) {
const struct gfx10_format *fmt =
&ac_get_gfx10_format_table(pdev->info.gfx_level)[vk_format_to_pipe_format(vk_format)];
/* OOB_SELECT chooses the out-of-bounds check.
*
* GFX10:
* - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
* - 1: index >= NUM_RECORDS
* - 2: NUM_RECORDS == 0
* - 3: if SWIZZLE_ENABLE:
* swizzle_address >= NUM_RECORDS
* else:
* offset >= NUM_RECORDS
*
* GFX11:
* - 0: (index >= NUM_RECORDS) || (offset+payload > STRIDE)
* - 1: index >= NUM_RECORDS
* - 2: NUM_RECORDS == 0
* - 3: if SWIZZLE_ENABLE && STRIDE:
* (index >= NUM_RECORDS) || ( offset+payload > STRIDE)
* else:
* offset+payload > NUM_RECORDS
*/
rsrc_word3 |= S_008F0C_FORMAT_GFX10(fmt->img_format) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
S_008F0C_RESOURCE_LEVEL(pdev->info.gfx_level < GFX11);
} else {
num_format = radv_translate_buffer_numformat(desc, first_non_void);
data_format = radv_translate_buffer_dataformat(desc, first_non_void);
assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
rsrc_word3 |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format);
}
state[0] = va;
state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
state[2] = range;
state[3] = rsrc_word3;
ac_build_buffer_descriptor(pdev->info.gfx_level, &ac_state, state);
}
void
+4 -32
View File
@@ -31,6 +31,7 @@
#include "vk_util.h"
#include "ac_debug.h"
#include "ac_descriptors.h"
#include "ac_nir.h"
#include "ac_shader_args.h"
@@ -6017,23 +6018,7 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer)
}
}
uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (pdev->info.gfx_level >= GFX11) {
rsrc_word3 |=
S_008F0C_FORMAT_GFX10(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (pdev->info.gfx_level >= GFX10) {
rsrc_word3 |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
} else {
rsrc_word3 |= S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
}
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
desc[2] = size;
desc[3] = rsrc_word3;
ac_build_raw_buffer_descriptor(pdev->info.gfx_level, va, size, desc);
}
desc_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
@@ -7195,22 +7180,9 @@ radv_bind_descriptor_sets(struct radv_cmd_buffer *cmd_buffer,
memset(dst, 0, 4 * 4);
} else {
uint64_t va = range->va + pBindDescriptorSetsInfo->pDynamicOffsets[dyn_idx];
dst[0] = va;
dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
dst[2] = no_dynamic_bounds ? 0xffffffffu : range->size;
dst[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
const uint32_t size = no_dynamic_bounds ? 0xffffffffu : range->size;
if (pdev->info.gfx_level >= GFX11) {
dst[3] |=
S_008F0C_FORMAT_GFX10(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (pdev->info.gfx_level >= GFX10) {
dst[3] |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
} else {
dst[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
}
ac_build_raw_buffer_descriptor(pdev->info.gfx_level, va, size, dst);
}
cmd_buffer->push_constant_stages |= set->header.layout->dynamic_shader_stages;
+3 -6
View File
@@ -18,6 +18,7 @@
#include "util/mesa-sha1.h"
#include "util/os_time.h"
#include "ac_debug.h"
#include "ac_descriptors.h"
#include "radv_buffer.h"
#include "radv_debug.h"
#include "radv_descriptor_set.h"
@@ -879,6 +880,7 @@ fail:
bool
radv_trap_handler_init(struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_winsys *ws = device->ws;
VkResult result;
@@ -912,12 +914,7 @@ radv_trap_handler_init(struct radv_device *device)
uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;
uint32_t desc[4];
desc[0] = tma_va;
desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);
desc[2] = TMA_BO_SIZE;
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
ac_build_raw_buffer_descriptor(pdev->info.gfx_level, tma_va, TMA_BO_SIZE, desc);
memcpy(device->tma_ptr, desc, sizeof(desc));
+2 -18
View File
@@ -10,6 +10,7 @@
#include <string.h>
#include "util/mesa-sha1.h"
#include "ac_descriptors.h"
#include "radv_buffer.h"
#include "radv_buffer_view.h"
#include "radv_cmd_buffer.h"
@@ -1077,28 +1078,11 @@ write_buffer_descriptor(struct radv_device *device, unsigned *dst, uint64_t va,
return;
}
uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (pdev->info.gfx_level >= GFX11) {
rsrc_word3 |=
S_008F0C_FORMAT_GFX10(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (pdev->info.gfx_level >= GFX10) {
rsrc_word3 |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
} else {
rsrc_word3 |=
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
}
dst[0] = va;
dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
/* robustBufferAccess is relaxed enough to allow this (in combination with the alignment/size
* we return from vkGetBufferMemoryRequirements) and this allows the shader compiler to create
* more efficient 8/16-bit buffer accesses.
*/
dst[2] = align(range, 4);
dst[3] = rsrc_word3;
ac_build_raw_buffer_descriptor(pdev->info.gfx_level, va, align(range, 4), dst);
}
static ALWAYS_INLINE void
+20 -37
View File
@@ -21,6 +21,7 @@
#include "vk_sync.h"
#include "ac_debug.h"
#include "ac_descriptors.h"
enum radeon_ctx_priority
radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoKHR *pObj)
@@ -238,35 +239,25 @@ radv_set_ring_buffer(const struct radv_physical_device *pdev, struct radeon_wins
{
const uint8_t oob_select = oob_select_raw ? V_008F0C_OOB_SELECT_RAW : V_008F0C_OOB_SELECT_DISABLED;
const uint64_t va = radv_buffer_get_va(bo) + offset;
const struct ac_buffer_state ac_state = {
.va = va,
.size = ring_size,
.format = PIPE_FORMAT_R32_FLOAT,
.swizzle =
{
PIPE_SWIZZLE_X,
PIPE_SWIZZLE_Y,
PIPE_SWIZZLE_Z,
PIPE_SWIZZLE_W,
},
.swizzle_enable = swizzle_enable,
.element_size = element_size,
.index_stride = index_stride,
.add_tid = add_tid,
.gfx10_oob_select = oob_select,
};
uint32_t rsrc_word1 = S_008F04_BASE_ADDRESS_HI(va >> 32);
if (pdev->info.gfx_level >= GFX11) {
rsrc_word1 |= S_008F04_SWIZZLE_ENABLE_GFX11(swizzle_enable);
} else {
rsrc_word1 |= S_008F04_SWIZZLE_ENABLE_GFX6(swizzle_enable);
}
uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
S_008F0C_INDEX_STRIDE(index_stride) | S_008F0C_ADD_TID_ENABLE(add_tid);
if (pdev->info.gfx_level >= GFX11) {
rsrc_word3 |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(oob_select);
} else if (pdev->info.gfx_level >= GFX10) {
rsrc_word3 |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(oob_select) |
S_008F0C_RESOURCE_LEVEL(1);
} else {
/* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */
const uint32_t data_format = pdev->info.gfx_level >= GFX8 && add_tid ? 0 : V_008F0C_BUF_DATA_FORMAT_32;
rsrc_word3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(data_format) |
S_008F0C_ELEMENT_SIZE(element_size);
}
desc[0] = va;
desc[1] = rsrc_word1;
desc[2] = ring_size;
desc[3] = rsrc_word3;
ac_build_buffer_descriptor(pdev->info.gfx_level, &ac_state, desc);
}
static void
@@ -350,15 +341,7 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
if (attr_ring_bo) {
assert(pdev->info.gfx_level >= GFX11);
uint64_t va = radv_buffer_get_va(attr_ring_bo);
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_SWIZZLE_ENABLE_GFX11(3) /* 16B */;
desc[2] = attr_ring_size;
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
S_008F0C_FORMAT_GFX10(V_008F0C_GFX11_FORMAT_32_32_32_32_FLOAT) |
S_008F0C_INDEX_STRIDE(2) /* 32 elements */;
ac_build_attr_ring_descriptor(pdev->info.gfx_level, radv_buffer_get_va(attr_ring_bo), attr_ring_size, &desc[0]);
}
desc += 4;