diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 868f8ebd907..b9583b07288 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -1491,6 +1491,13 @@ anv_device_alloc_bo(struct anv_device *device, /* The kernel is going to give us whole pages anyway. */ size = align64(size, 4096); + const uint64_t ccs_offset = size; + if (alloc_flags & ANV_BO_ALLOC_AUX_CCS) { + assert(device->info->has_aux_map); + size += DIV_ROUND_UP(size, intel_aux_get_main_to_aux_ratio(device->aux_map_ctx)); + size = align64(size, 4096); + } + const struct intel_memory_class_instance *regions[2]; uint32_t nregions = 0; @@ -1532,6 +1539,7 @@ anv_device_alloc_bo(struct anv_device *device, .refcount = 1, .offset = -1, .size = size, + .ccs_offset = ccs_offset, .actual_size = actual_size, .flags = bo_flags, .alloc_flags = alloc_flags, @@ -1614,6 +1622,7 @@ anv_device_import_bo_from_host_ptr(struct anv_device *device, assert(!(alloc_flags & (ANV_BO_ALLOC_MAPPED | ANV_BO_ALLOC_HOST_CACHED | ANV_BO_ALLOC_HOST_COHERENT | + ANV_BO_ALLOC_AUX_CCS | ANV_BO_ALLOC_PROTECTED | ANV_BO_ALLOC_FIXED_ADDRESS))); assert(alloc_flags & ANV_BO_ALLOC_EXTERNAL); diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index cd0a04716ee..33042abcbc2 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -2298,6 +2298,8 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, !device->uses_ex_bso || driQueryOptionb(&instance->dri_options, "force_indirect_descriptors"); + device->alloc_aux_tt_mem = + device->info.has_aux_map && device->info.verx10 >= 125; /* Check if we can read the GPU timestamp register from the CPU */ uint64_t u64_ignore; device->has_reg_timestamp = intel_gem_read_render_timestamp(fd, @@ -4102,6 +4104,18 @@ VkResult anv_AllocateMemory( if (device->info->has_aux_map) alloc_flags |= ANV_BO_ALLOC_AUX_TT_ALIGNED; + /* If the allocation is not dedicated, allocate additional CCS space. + * + * TODO: If we ever ship VK_EXT_descriptor_buffer (ahahah... :() we could + * drop this flag in the descriptor buffer case as we don't need any + * compression there. + * + * TODO: We could also create new memory types for allocations that don't + * need any compression. + */ + if (device->physical->alloc_aux_tt_mem && dedicated_info == NULL) + alloc_flags |= ANV_BO_ALLOC_AUX_CCS; + /* TODO: Android, ChromeOS and other applications may need another way to * allocate buffers that can be scanout to display but it should pretty * easy to catch those as Xe KMD driver will print warnings in dmesg when diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index 8d652e0c8f8..944cacd600d 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -1873,7 +1873,8 @@ void anv_GetPhysicalDeviceSparseImageFormatProperties2( VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT; isl_surf_usage_flags_t isl_usage = - anv_image_choose_isl_surf_usage(vk_create_flags, pFormatInfo->usage, + anv_image_choose_isl_surf_usage(physical_device, + vk_create_flags, pFormatInfo->usage, 0, aspect); const enum isl_surf_dim isl_surf_dim = diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 6298b9635ab..5ba5713c9e7 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -203,13 +203,23 @@ memory_range_merge(struct anv_image_memory_range *a, } isl_surf_usage_flags_t -anv_image_choose_isl_surf_usage(VkImageCreateFlags vk_create_flags, +anv_image_choose_isl_surf_usage(struct anv_physical_device *device, + VkImageCreateFlags vk_create_flags, VkImageUsageFlags vk_usage, isl_surf_usage_flags_t isl_extra_usage, VkImageAspectFlagBits aspect) { isl_surf_usage_flags_t isl_usage = isl_extra_usage; + /* On platform like MTL, we choose to allocate additional CCS memory at the + * back of the VkDeviceMemory objects since different images can share the + * AUX-TT PTE because the HW doesn't care about the image format in the + * PTE. That means we can always ignore the AUX-TT alignment requirement + * from an ISL point of view. + */ + if (device->alloc_aux_tt_mem) + isl_usage |= ISL_SURF_USAGE_NO_AUX_TT_ALIGNMENT_BIT; + if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT) isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; @@ -1312,7 +1322,8 @@ add_all_surfaces_implicit_layout( VkImageUsageFlags vk_usage = vk_image_usage(&image->vk, aspect); isl_surf_usage_flags_t isl_usage = - anv_image_choose_isl_surf_usage(image->vk.create_flags, vk_usage, + anv_image_choose_isl_surf_usage(device->physical, + image->vk.create_flags, vk_usage, isl_extra_usage_flags, aspect); result = add_primary_surface(device, image, plane, plane_format, @@ -1710,8 +1721,8 @@ anv_image_init(struct anv_device *device, struct anv_image *image, devinfo, image->emu_plane_format, 0, image->vk.tiling); isl_surf_usage_flags_t isl_usage = anv_image_choose_isl_surf_usage( - image->vk.create_flags, image->vk.usage, isl_extra_usage_flags, - VK_IMAGE_ASPECT_COLOR_BIT); + device->physical, image->vk.create_flags, image->vk.usage, + isl_extra_usage_flags, VK_IMAGE_ASPECT_COLOR_BIT); r = add_primary_surface(device, image, plane, plane_format, ANV_OFFSET_IMPLICIT, 0, @@ -2261,23 +2272,65 @@ anv_image_map_aux_tt(struct anv_device *device, struct anv_bo *bo = main_addr.bo; assert(bo != NULL); - if (anv_address_allows_aux_map(device, main_addr)) { - const struct anv_address aux_addr = - anv_image_address(image, - &image->planes[plane].compr_ctrl_memory_range); - const struct isl_surf *surf = - &image->planes[plane].primary_surface.isl; + /* If the additional memory padding was added at the end of the BO for CCS + * data, map this region at the granularity of the main/CCS pages. + * + * Otherwise the image should have additional CCS data at the computed + * offset. + */ + if (device->physical->alloc_aux_tt_mem && + (bo->alloc_flags & ANV_BO_ALLOC_AUX_CCS)) { + uint64_t main_aux_alignment = + intel_aux_map_get_alignment(device->aux_map_ctx); + assert(bo->offset % main_aux_alignment == 0); + const struct anv_address start_addr = (struct anv_address) { + .bo = bo, + .offset = ROUND_DOWN_TO(main_addr.offset, main_aux_alignment), + }; + const struct anv_address aux_addr = (struct anv_address) { + .bo = bo, + .offset = bo->ccs_offset + + intel_aux_main_to_aux_offset(device->aux_map_ctx, + start_addr.offset), + }; + const struct isl_surf *surf = &image->planes[plane].primary_surface.isl; const uint64_t format_bits = intel_aux_map_format_bits_for_isl_surf(surf); + /* Make sure to have the mapping cover the entire image from the aux + * aligned start. + */ + const uint64_t main_size = align( + (main_addr.offset - start_addr.offset) + surf->size_B, + main_aux_alignment); + if (intel_aux_map_add_mapping(device->aux_map_ctx, - anv_address_physical(main_addr), + anv_address_physical(start_addr), anv_address_physical(aux_addr), - surf->size_B, format_bits)) { - image->planes[plane].aux_tt.addr = anv_address_physical(main_addr); - image->planes[plane].aux_tt.size = surf->size_B; + main_size, format_bits)) { image->planes[plane].aux_tt.mapped = true; + image->planes[plane].aux_tt.addr = anv_address_physical(start_addr); + image->planes[plane].aux_tt.size = main_size; return true; } + } else { + if (anv_address_allows_aux_map(device, main_addr)) { + const struct anv_address aux_addr = + anv_image_address(image, + &image->planes[plane].compr_ctrl_memory_range); + const struct isl_surf *surf = + &image->planes[plane].primary_surface.isl; + const uint64_t format_bits = + intel_aux_map_format_bits_for_isl_surf(surf); + if (intel_aux_map_add_mapping(device->aux_map_ctx, + anv_address_physical(main_addr), + anv_address_physical(aux_addr), + surf->size_B, format_bits)) { + image->planes[plane].aux_tt.mapped = true; + image->planes[plane].aux_tt.addr = anv_address_physical(main_addr); + image->planes[plane].aux_tt.size = surf->size_B; + return true; + } + } } return false; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index df481f09782..19a6314c480 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -447,6 +447,13 @@ enum anv_bo_alloc_flags { /** Specify whether this BO is internal to the driver */ ANV_BO_ALLOC_INTERNAL = (1 << 19), + + /** Allocate with CCS AUX requirements + * + * This pads the BO include CCS data mapppable through the AUX-TT and + * aligned to the AUX-TT requirements. + */ + ANV_BO_ALLOC_AUX_CCS = (1 << 20), }; /** Specifies that the BO should be cached and coherent. */ @@ -486,6 +493,9 @@ struct anv_bo { /** Size of the buffer */ uint64_t size; + /** Offset at which the CCS data is stored */ + uint64_t ccs_offset; + /* Map for internally mapped BOs. * * If ANV_BO_ALLOC_MAPPED is set in flags, this is the map for the whole @@ -1018,6 +1028,25 @@ struct anv_physical_device { bool uses_ex_bso; bool always_flush_cache; + + /** True if application memory is allocated with extra AUX memory + * + * Applications quite often pool image allocations together in a single + * VkDeviceMemory object. On platforms like MTL, the alignment of images + * with compression mapped through the AUX translation tables is large : + * 1MB. This can create a lot of wasted space in the application memory + * objects. + * + * To workaround this problem, we allocate CCS data at the end of + * VkDeviceMemory objects. This would not work well for TGL-like platforms + * because the AUX translation tables also contain the format of the + * images, but on MTL the HW ignore those values. So we can share the AUX + * TT entries between different images without problem. + * + * This should be only true for platforms with AUX TT. + */ + bool alloc_aux_tt_mem; + /** * True if the descriptors buffers are holding one of the following : * - anv_sampled_image_descriptor @@ -5232,7 +5261,8 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, bool predicate); isl_surf_usage_flags_t -anv_image_choose_isl_surf_usage(VkImageCreateFlags vk_create_flags, +anv_image_choose_isl_surf_usage(struct anv_physical_device *device, + VkImageCreateFlags vk_create_flags, VkImageUsageFlags vk_usage, isl_surf_usage_flags_t isl_extra_usage, VkImageAspectFlagBits aspect);