tu: Update LRZ FC allocation for A7XX layout

The allocation size is now determined based off the LRFC structure
rather than hardcoding in A6XX's layout.

Signed-off-by: Mark Collins <mark@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29453>
This commit is contained in:
Mark Collins
2024-03-29 11:44:48 +00:00
committed by Marge Bot
parent bf5e8fb394
commit db505ea565
3 changed files with 22 additions and 27 deletions
+18 -23
View File
@@ -23,6 +23,7 @@
#include "tu_descriptor_set.h"
#include "tu_device.h"
#include "tu_formats.h"
#include "tu_lrz.h"
#include "tu_rmv.h"
#include "tu_wsi.h"
@@ -441,6 +442,7 @@ format_list_has_swaps(const VkImageFormatListCreateInfo *fmt_list)
return false;
}
template <chip CHIP>
VkResult
tu_image_update_layout(struct tu_device *device, struct tu_image *image,
uint64_t modifier, const VkSubresourceLayout *plane_layouts)
@@ -573,41 +575,33 @@ tu_image_update_layout(struct tu_device *device, struct tu_image *image,
image->lrz_height = lrz_height;
image->lrz_pitch = lrz_pitch;
image->lrz_offset = image->total_size;
unsigned lrz_size = lrz_pitch * lrz_height * 2;
image->total_size += lrz_size;
unsigned lrz_size = lrz_pitch * lrz_height * sizeof(uint16_t);
unsigned nblocksx = DIV_ROUND_UP(DIV_ROUND_UP(width, 8), 16);
unsigned nblocksy = DIV_ROUND_UP(DIV_ROUND_UP(height, 8), 4);
/* Fast-clear buffer is 1bit/block */
image->lrz_fc_size = DIV_ROUND_UP(nblocksx * nblocksy, 8);
unsigned lrz_fc_size = DIV_ROUND_UP(nblocksx * nblocksy, 8);
/* Fast-clear buffer cannot be larger than 512 bytes (HW limitation) */
bool has_lrz_fc = image->lrz_fc_size <= 512 &&
/* Fast-clear buffer cannot be larger than 512 bytes on A6XX and 1024 bytes on A7XX (HW limitation) */
image->has_lrz_fc =
device->physical_device->info->a6xx.enable_lrz_fast_clear &&
lrz_fc_size <= tu_lrzfc_layout<CHIP>::FC_SIZE &&
!TU_DEBUG(NOLRZFC);
if (has_lrz_fc || device->physical_device->info->a6xx.has_lrz_dir_tracking) {
image->lrz_fc_offset = image->total_size;
image->total_size += 512;
if (device->physical_device->info->a6xx.has_lrz_dir_tracking) {
/* Direction tracking uses 1 byte */
image->total_size += 1;
/* GRAS_LRZ_DEPTH_VIEW needs 5 bytes: 4 for view data and 1 for padding */
image->total_size += 5;
}
if (image->has_lrz_fc || device->physical_device->info->a6xx.has_lrz_dir_tracking) {
image->lrz_fc_offset = image->total_size + lrz_size;
lrz_size += sizeof(tu_lrzfc_layout<CHIP>);
}
if (!has_lrz_fc) {
image->lrz_fc_size = 0;
}
image->total_size += lrz_size;
} else {
image->lrz_height = 0;
}
return VK_SUCCESS;
}
TU_GENX(tu_image_update_layout);
static VkResult
tu_image_init(struct tu_device *device, struct tu_image *image,
@@ -786,8 +780,8 @@ tu_CreateImage(VkDevice _device,
return VK_SUCCESS;
}
result = tu_image_update_layout(device, image, modifier,
plane_layouts);
result = TU_CALLX(device, tu_image_update_layout)(device, image, modifier,
plane_layouts);
if (result != VK_SUCCESS)
goto fail;
@@ -880,7 +874,8 @@ tu_BindImageMemory2(VkDevice _device,
return result;
}
result = tu_image_update_layout(device, image, eci.drmFormatModifier, a_plane_layouts);
result = TU_CALLX(device, tu_image_update_layout)(device, image,
eci.drmFormatModifier, a_plane_layouts);
if (result != VK_SUCCESS) {
if (status)
*status->pResult = result;
@@ -978,7 +973,7 @@ tu_GetDeviceImageMemoryRequirements(
vk_image_init(&device->vk, &image.vk, pInfo->pCreateInfo);
tu_image_init(device, &image, pInfo->pCreateInfo);
tu_image_update_layout(device, &image, DRM_FORMAT_MOD_INVALID, NULL);
TU_CALLX(device, tu_image_update_layout)(device, &image, DRM_FORMAT_MOD_INVALID, NULL);
tu_get_image_memory_requirements(device, &image, pMemoryRequirements);
}
@@ -1044,7 +1039,7 @@ tu_GetDeviceImageSubresourceLayoutKHR(VkDevice _device,
vk_image_init(&device->vk, &image.vk, pInfo->pCreateInfo);
tu_image_init(device, &image, pInfo->pCreateInfo);
tu_image_update_layout(device, &image, DRM_FORMAT_MOD_INVALID, NULL);
TU_CALLX(device, tu_image_update_layout)(device, &image, DRM_FORMAT_MOD_INVALID, NULL);
tu_get_image_subresource_layout(&image, pInfo->pSubresource, pLayout);
}
+1 -1
View File
@@ -38,7 +38,7 @@ struct tu_image
uint32_t lrz_pitch;
uint32_t lrz_offset;
uint32_t lrz_fc_offset;
uint32_t lrz_fc_size;
bool has_lrz_fc;
bool ubwc_enabled;
bool force_linear_tile;
+3 -3
View File
@@ -172,7 +172,7 @@ tu_lrz_init_state(struct tu_cmd_buffer *cmd,
/* Be optimistic and unconditionally enable fast-clear in
* secondary cmdbufs and when reusing previous LRZ state.
*/
cmd->state.lrz.fast_clear = view->image->lrz_fc_size > 0;
cmd->state.lrz.fast_clear = view->image->has_lrz_fc;
cmd->state.lrz.gpu_dir_tracking = has_gpu_tracking;
cmd->state.lrz.reuse_previous_state = !clears_depth;
@@ -367,7 +367,7 @@ tu_lrz_tiling_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
* TODO: we could avoid this if we don't store depth and don't
* expect secondary cmdbufs.
*/
if (lrz->image_view->image->lrz_fc_size) {
if (lrz->image_view->image->has_lrz_fc) {
tu6_dirty_lrz_fc<CHIP>(cmd, cs, lrz->image_view->image);
}
}
@@ -502,7 +502,7 @@ tu_lrz_clear_depth_image(struct tu_cmd_buffer *cmd,
if (!range)
return;
bool fast_clear = image->lrz_fc_size && (pDepthStencil->depth == 0.f ||
bool fast_clear = image->has_lrz_fc && (pDepthStencil->depth == 0.f ||
pDepthStencil->depth == 1.f);
tu6_emit_lrz_buffer<CHIP>(&cmd->cs, image);