diff --git a/src/nouveau/vulkan/meson.build b/src/nouveau/vulkan/meson.build index 986e6b1b4eb..09faa5b4876 100644 --- a/src/nouveau/vulkan/meson.build +++ b/src/nouveau/vulkan/meson.build @@ -6,7 +6,6 @@ nvk_files = files( 'nvk_buffer.h', 'nvk_buffer_view.c', 'nvk_buffer_view.h', - 'nvk_cmd_blit.c', 'nvk_cmd_buffer.c', 'nvk_cmd_buffer.h', 'nvk_cmd_clear.c', diff --git a/src/nouveau/vulkan/nvk_cmd_blit.c b/src/nouveau/vulkan/nvk_cmd_blit.c deleted file mode 100644 index 4ee01f7f4c6..00000000000 --- a/src/nouveau/vulkan/nvk_cmd_blit.c +++ /dev/null @@ -1,190 +0,0 @@ -#include "nvk_cmd_buffer.h" - -#include "vulkan/util/vk_format.h" - -#include "nvk_buffer.h" -#include "nvk_device_memory.h" -#include "nvk_format.h" -#include "nvk_image.h" - -#include "nouveau_bo.h" - -#include "nvtypes.h" -#include "nvk_cl902d.h" - -VKAPI_ATTR void VKAPI_CALL -nvk_CmdBlitImage2(VkCommandBuffer commandBuffer, - const VkBlitImageInfo2 *pBlitImageInfo) -{ - VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); - VK_FROM_HANDLE(nvk_image, src, pBlitImageInfo->srcImage); - VK_FROM_HANDLE(nvk_image, dst, pBlitImageInfo->dstImage); - - struct nv_push *p = nvk_cmd_buffer_push(cmd, 16); - - assert(nvk_get_format(src->vk.format)->supports_2d_blit); - assert(nvk_get_format(dst->vk.format)->supports_2d_blit); - - P_IMMD(p, NV902D, SET_CLIP_ENABLE, V_FALSE); - P_IMMD(p, NV902D, SET_COLOR_KEY_ENABLE, V_FALSE); - P_IMMD(p, NV902D, SET_RENDER_ENABLE_C, MODE_TRUE); - - const uint32_t src_hw_format = nvk_get_format(src->vk.format)->hw_format; - const uint32_t dst_hw_format = nvk_get_format(dst->vk.format)->hw_format; - - P_IMMD(p, NV902D, SET_SRC_FORMAT, src_hw_format); - P_IMMD(p, NV902D, SET_DST_FORMAT, dst_hw_format); - - if (pBlitImageInfo->filter == VK_FILTER_NEAREST) { - P_IMMD(p, NV902D, SET_PIXELS_FROM_MEMORY_SAMPLE_MODE, { - .origin = ORIGIN_CORNER, - .filter = FILTER_POINT, - }); - } else { - P_IMMD(p, NV902D, SET_PIXELS_FROM_MEMORY_SAMPLE_MODE, { - .origin = ORIGIN_CORNER, - .filter = FILTER_BILINEAR, - }); - } - - /* for single channel sources we have to disable channels, we can use SRCCOPY_PREMULT: - * DST = SRC * BETA4 - * otherwise all channels of the destinations are filled - * - * NOTE: this only works for blits to 8 bit or packed formats - */ - if (vk_format_get_nr_components(src->vk.format) == 1 && - src_hw_format != dst_hw_format) { - uint8_t mask = vk_format_is_snorm(dst->vk.format) ? 0x7f : 0xff; - P_MTHD(p, NV902D, SET_BETA4); - P_NV902D_SET_BETA4(p, { - .r = mask, - .a = mask, - }); - P_IMMD(p, NV902D, SET_OPERATION, V_SRCCOPY_PREMULT); - } else { - P_IMMD(p, NV902D, SET_OPERATION, V_SRCCOPY); - } - - for (unsigned r = 0; r < pBlitImageInfo->regionCount; r++) { - const VkImageBlit2 *region = &pBlitImageInfo->pRegions[r]; - p = nvk_cmd_buffer_push(cmd, 30 + region->srcSubresource.layerCount * 10); - - unsigned x_i = region->dstOffsets[0].x < region->dstOffsets[1].x ? 0 : 1; - unsigned y_i = region->dstOffsets[0].y < region->dstOffsets[1].y ? 0 : 1; - - /* All src_* are in 32.32 fixed-point */ - int64_t src_start_x_fp = (int64_t)region->srcOffsets[x_i].x << 32; - int64_t src_start_y_fp = (int64_t)region->srcOffsets[y_i].y << 32; - int64_t src_end_x_fp = (int64_t)region->srcOffsets[1 - x_i].x << 32; - int64_t src_end_y_fp = (int64_t)region->srcOffsets[1 - y_i].y << 32; - int64_t src_width_fp = src_end_x_fp - src_start_x_fp; - int64_t src_height_fp = src_end_y_fp - src_start_y_fp; - - uint32_t dst_start_x = region->dstOffsets[x_i].x; - uint32_t dst_start_y = region->dstOffsets[y_i].y; - uint32_t dst_end_x = region->dstOffsets[1 - x_i].x; - uint32_t dst_end_y = region->dstOffsets[1 - y_i].y; - uint32_t dst_width = dst_end_x - dst_start_x; - uint32_t dst_height = dst_end_y - dst_start_y; - - int64_t scaling_x_fp = src_width_fp / dst_width; - int64_t scaling_y_fp = src_height_fp / dst_height; - - /* move the src by half a fraction. - * Alternatively I am sure there is a way to make that work with CENTER SAMPLE_MODE, but - * that didn't really pan out - */ - src_start_x_fp += scaling_x_fp / 2; - src_start_y_fp += scaling_y_fp / 2; - - const struct nil_image_level *src_level = - &src->nil.levels[region->srcSubresource.mipLevel]; - const VkExtent3D src_level_extent = - vk_image_mip_level_extent(&src->vk, region->srcSubresource.mipLevel); - - if (src_level->tiling.is_tiled) { - P_MTHD(p, NV902D, SET_SRC_MEMORY_LAYOUT); - P_NV902D_SET_SRC_MEMORY_LAYOUT(p, V_BLOCKLINEAR); - P_NV902D_SET_SRC_BLOCK_SIZE(p, { - .height = src_level->tiling.y_log2, - .depth = src_level->tiling.z_log2, - }); - } else { - P_IMMD(p, NV902D, SET_SRC_MEMORY_LAYOUT, V_PITCH); - } - - P_MTHD(p, NV902D, SET_SRC_DEPTH); - P_NV902D_SET_SRC_DEPTH(p, src_level_extent.depth); - - P_MTHD(p, NV902D, SET_SRC_PITCH); - P_NV902D_SET_SRC_PITCH(p, src_level->row_stride_B); - P_NV902D_SET_SRC_WIDTH(p, src_level_extent.width); - P_NV902D_SET_SRC_HEIGHT(p, src_level_extent.height); - - const struct nil_image_level *dst_level = - &dst->nil.levels[region->dstSubresource.mipLevel]; - const VkExtent3D dst_level_extent = - vk_image_mip_level_extent(&dst->vk, region->dstSubresource.mipLevel); - - if (dst_level->tiling.is_tiled) { - P_MTHD(p, NV902D, SET_DST_MEMORY_LAYOUT); - P_NV902D_SET_DST_MEMORY_LAYOUT(p, V_BLOCKLINEAR); - P_NV902D_SET_DST_BLOCK_SIZE(p, { - .height = dst_level->tiling.y_log2, - .depth = dst_level->tiling.z_log2, - }); - } else { - P_IMMD(p, NV902D, SET_DST_MEMORY_LAYOUT, V_PITCH); - } - - P_MTHD(p, NV902D, SET_DST_DEPTH); - P_NV902D_SET_DST_DEPTH(p, dst_level_extent.depth); - - P_MTHD(p, NV902D, SET_DST_PITCH); - P_NV902D_SET_DST_PITCH(p, dst_level->row_stride_B); - P_NV902D_SET_DST_WIDTH(p, dst_level_extent.width); - P_NV902D_SET_DST_HEIGHT(p, dst_level_extent.height); - - P_MTHD(p, NV902D, SET_PIXELS_FROM_MEMORY_DST_X0); - P_NV902D_SET_PIXELS_FROM_MEMORY_DST_X0(p, dst_start_x); - P_NV902D_SET_PIXELS_FROM_MEMORY_DST_Y0(p, dst_start_y); - P_NV902D_SET_PIXELS_FROM_MEMORY_DST_WIDTH(p, dst_width); - P_NV902D_SET_PIXELS_FROM_MEMORY_DST_HEIGHT(p, dst_height); - P_NV902D_SET_PIXELS_FROM_MEMORY_DU_DX_FRAC(p, scaling_x_fp & 0xffffffff); - P_NV902D_SET_PIXELS_FROM_MEMORY_DU_DX_INT(p, scaling_x_fp >> 32); - P_NV902D_SET_PIXELS_FROM_MEMORY_DV_DY_FRAC(p, scaling_y_fp & 0xffffffff); - P_NV902D_SET_PIXELS_FROM_MEMORY_DV_DY_INT(p, scaling_y_fp >> 32); - P_NV902D_SET_PIXELS_FROM_MEMORY_SRC_X0_FRAC(p, src_start_x_fp & 0xffffffff); - P_NV902D_SET_PIXELS_FROM_MEMORY_SRC_X0_INT(p, src_start_x_fp >> 32); - P_NV902D_SET_PIXELS_FROM_MEMORY_SRC_Y0_FRAC(p, src_start_y_fp & 0xffffffff); - - assert(src->vk.image_type != VK_IMAGE_TYPE_3D); - assert(dst->vk.image_type != VK_IMAGE_TYPE_3D); - for (unsigned w = 0; w < region->srcSubresource.layerCount; w++) { - const uint32_t src_layer = w + region->srcSubresource.baseArrayLayer; - const VkDeviceSize src_addr = nvk_image_base_address(src) + - src_layer * src->nil.array_stride_B + - src_level->offset_B; - - const uint32_t dst_layer = w + region->dstSubresource.baseArrayLayer; - const VkDeviceSize dst_addr = nvk_image_base_address(dst) + - dst_layer * dst->nil.array_stride_B + - dst_level->offset_B; - - P_MTHD(p, NV902D, SET_SRC_OFFSET_UPPER); - P_NV902D_SET_SRC_OFFSET_UPPER(p, src_addr >> 32); - P_NV902D_SET_SRC_OFFSET_LOWER(p, src_addr & 0xffffffff); - - P_MTHD(p, NV902D, SET_DST_OFFSET_UPPER); - P_NV902D_SET_DST_OFFSET_UPPER(p, dst_addr >> 32); - P_NV902D_SET_DST_OFFSET_LOWER(p, dst_addr & 0xffffffff); - - P_MTHD(p, NV902D, SET_DST_LAYER); - P_NV902D_SET_DST_LAYER(p, 0); - - P_MTHD(p, NV902D, PIXELS_FROM_MEMORY_SRC_Y0_INT); - P_NV902D_PIXELS_FROM_MEMORY_SRC_Y0_INT(p, src_start_y_fp >> 32); - } - } -} diff --git a/src/nouveau/vulkan/nvk_cmd_meta.c b/src/nouveau/vulkan/nvk_cmd_meta.c index de295d01b7e..9ff4989982d 100644 --- a/src/nouveau/vulkan/nvk_cmd_meta.c +++ b/src/nouveau/vulkan/nvk_cmd_meta.c @@ -119,3 +119,18 @@ nvk_meta_end(struct nvk_cmd_buffer *cmd, memcpy(cmd->state.gfx.descriptors.root.push, save->push, sizeof(save->push)); } + +VKAPI_ATTR void VKAPI_CALL +nvk_CmdBlitImage2(VkCommandBuffer commandBuffer, + const VkBlitImageInfo2 *pBlitImageInfo) +{ + VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); + struct nvk_device *dev = nvk_cmd_buffer_device(cmd); + + struct nvk_meta_save save; + nvk_meta_begin(cmd, &save); + + vk_meta_blit_image2(&cmd->vk, &dev->meta, pBlitImageInfo); + + nvk_meta_end(cmd, &save); +} diff --git a/src/nouveau/vulkan/nvk_format.c b/src/nouveau/vulkan/nvk_format.c index 99e0a823898..b719946fd4f 100644 --- a/src/nouveau/vulkan/nvk_format.c +++ b/src/nouveau/vulkan/nvk_format.c @@ -11,183 +11,6 @@ #include "vulkan/util/vk_enum_defines.h" #include "vulkan/util/vk_format.h" -/* - * nvidia names - * _: UNORM - * F: SFLOAT (and maybe UFLOAT?) - * L: SINT and UINT - * N: SNORM - * and for whatever reason, 8 bit format names are in BE order - * - * TODO: swizzles - * TODO: X formats - * TODO: Y formats - * TODO: Z formats - * TODO: O formats - */ - -struct nvk_format nvk_formats[] = { - { - .vk_format = VK_FORMAT_R8_UNORM, - .hw_format = 0x0, - .supports_2d_blit = false, - }, - - { - .vk_format = VK_FORMAT_A1R5G5B5_UNORM_PACK16, - .hw_format = NV902D_SET_SRC_FORMAT_V_A1R5G5B5, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_A2B10G10R10_UNORM_PACK32, - .hw_format = NV902D_SET_SRC_FORMAT_V_A2B10G10R10, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_A2R10G10B10_UNORM_PACK32, - .hw_format = NV902D_SET_SRC_FORMAT_V_A2R10G10B10, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_A8B8G8R8_SINT_PACK32, - .hw_format = NV902D_SET_SRC_FORMAT_V_A8BL8GL8RL8, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_A8B8G8R8_SNORM_PACK32, - .hw_format = NV902D_SET_SRC_FORMAT_V_AN8BN8GN8RN8, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_A8B8G8R8_UINT_PACK32, - .hw_format = NV902D_SET_SRC_FORMAT_V_A8BL8GL8RL8, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_A8B8G8R8_UNORM_PACK32, - .hw_format = NV902D_SET_SRC_FORMAT_V_A8B8G8R8, - .supports_2d_blit = true, - }, - - { - .vk_format = VK_FORMAT_B8G8R8A8_SINT, - .hw_format = NV902D_SET_SRC_FORMAT_V_A8RL8GL8BL8, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_B8G8R8A8_UINT, - .hw_format = NV902D_SET_SRC_FORMAT_V_A8RL8GL8BL8, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_B8G8R8A8_UNORM, - .hw_format = NV902D_SET_SRC_FORMAT_V_A8R8G8B8, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_B10G11R11_UFLOAT_PACK32, - .hw_format = NV902D_SET_SRC_FORMAT_V_BF10GF11RF11, - .supports_2d_blit = true, - }, - - { - .vk_format = VK_FORMAT_R5G6B5_UNORM_PACK16, - .hw_format = NV902D_SET_SRC_FORMAT_V_R5G6B5, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_R8G8_SNORM, - .hw_format = NV902D_SET_SRC_FORMAT_V_GN8RN8, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_R8G8_UNORM, - .hw_format = NV902D_SET_SRC_FORMAT_V_G8R8, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_R8G8B8A8_SINT, - .hw_format = NV902D_SET_SRC_FORMAT_V_A8BL8GL8RL8, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_R8G8B8A8_SNORM, - .hw_format = NV902D_SET_SRC_FORMAT_V_AN8BN8GN8RN8, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_R8G8B8A8_UINT, - .hw_format = NV902D_SET_SRC_FORMAT_V_A8BL8GL8RL8, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_R8G8B8A8_UNORM, - .hw_format = NV902D_SET_SRC_FORMAT_V_A8B8G8R8, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_R16G16_SFLOAT, - .hw_format = NV902D_SET_SRC_FORMAT_V_RF16_GF16, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_R16G16_SNORM, - .hw_format = NV902D_SET_SRC_FORMAT_V_RN16_GN16, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_R16G16_UNORM, - .hw_format = NV902D_SET_SRC_FORMAT_V_R16_G16, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_R16G16B16A16_SFLOAT, - .hw_format = NV902D_SET_SRC_FORMAT_V_RF16_GF16_BF16_AF16, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_R16G16B16A16_SNORM, - .hw_format = NV902D_SET_SRC_FORMAT_V_RN16_GN16_BN16_AN16, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_R16G16B16A16_UNORM, - .hw_format = NV902D_SET_SRC_FORMAT_V_R16_G16_B16_A16, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_R32G32_SFLOAT, - .hw_format = NV902D_SET_SRC_FORMAT_V_RF32_GF32, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_R32G32B32A32_SFLOAT, - .hw_format = NV902D_SET_SRC_FORMAT_V_RF32_GF32_BF32_AF32, - .supports_2d_blit = true, - }, - { - .vk_format = VK_FORMAT_R32_UINT, - .hw_format = NV90C0_SET_SU_LD_ST_TARGET_FORMAT_COLOR_RU32, - .supports_2d_blit = false, - }, - { - .vk_format = VK_FORMAT_R16_UINT, - .hw_format = NV90C0_SET_SU_LD_ST_TARGET_FORMAT_COLOR_RU16, - .supports_2d_blit = false, - }, -}; - -const struct nvk_format * -nvk_get_format(VkFormat vk_format) -{ - for (unsigned i = 0; i < ARRAY_SIZE(nvk_formats); i++) { - if (nvk_formats[i].vk_format == vk_format) - return &nvk_formats[i]; - } - - return NULL; -} - #define VA_FMT(vk_fmt, widths, swap_rb, type) \ [VK_FORMAT_##vk_fmt] = \ { NV9097_SET_VERTEX_ATTRIBUTE_A_COMPONENT_BIT_WIDTHS_##widths, \ diff --git a/src/nouveau/vulkan/nvk_format.h b/src/nouveau/vulkan/nvk_format.h index 38263754cb8..cf50581eb34 100644 --- a/src/nouveau/vulkan/nvk_format.h +++ b/src/nouveau/vulkan/nvk_format.h @@ -5,15 +5,6 @@ struct nvk_physical_device; -struct nvk_format { - VkFormat vk_format; - uint8_t hw_format; - - bool supports_2d_blit:1; -}; - -const struct nvk_format *nvk_get_format(VkFormat vk_format); - struct nvk_va_format { uint8_t bit_widths; uint8_t swap_rb:1; diff --git a/src/nouveau/vulkan/nvk_image.c b/src/nouveau/vulkan/nvk_image.c index 501128c1cd3..0451a5ad1d2 100644 --- a/src/nouveau/vulkan/nvk_image.c +++ b/src/nouveau/vulkan/nvk_image.c @@ -33,6 +33,7 @@ nvk_get_image_format_features(struct nvk_physical_device *pdevice, features |= VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT; features |= VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT; features |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT; + features |= VK_FORMAT_FEATURE_2_BLIT_SRC_BIT; if (nil_format_supports_filtering(pdevice->dev, p_format)) { features |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_LINEAR_BIT; @@ -46,6 +47,7 @@ nvk_get_image_format_features(struct nvk_physical_device *pdevice, features |= VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT; if (nil_format_supports_blending(pdevice->dev, p_format)) features |= VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BLEND_BIT; + features |= VK_FORMAT_FEATURE_2_BLIT_DST_BIT; } if (vk_format_is_depth_or_stencil(vk_format)) { @@ -66,12 +68,6 @@ nvk_get_image_format_features(struct nvk_physical_device *pdevice, if (p_format == PIPE_FORMAT_R32_UINT) features |= VK_FORMAT_FEATURE_2_STORAGE_IMAGE_ATOMIC_BIT; - const struct nvk_format *nvk_format = nvk_get_format(vk_format); - if (nvk_format && nvk_format->supports_2d_blit) { - features |= VK_FORMAT_FEATURE_2_BLIT_SRC_BIT | - VK_FORMAT_FEATURE_2_BLIT_DST_BIT; - } - return features; } @@ -220,6 +216,11 @@ nvk_image_init(struct nvk_device *device, { vk_image_init(&device->vk, &image->vk, pCreateInfo); + if (image->vk.usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) + image->vk.usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + if (image->vk.usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) + image->vk.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + /* TODO: Implement multisampling */ assert(pCreateInfo->samples == VK_SAMPLE_COUNT_1_BIT); diff --git a/src/vulkan/runtime/vk_meta_blit.c b/src/vulkan/runtime/vk_meta_blit.c index cb5c652eaf2..230fbf32e5d 100644 --- a/src/vulkan/runtime/vk_meta_blit.c +++ b/src/vulkan/runtime/vk_meta_blit.c @@ -124,8 +124,8 @@ build_blit_shader(const struct vk_meta_blit_key *key) nir_builder *b = &build; struct glsl_struct_field push_fields[] = { - { .type = glsl_vec4_type(), .name = "xy_xform" }, - { .type = glsl_vec4_type(), .name = "z_xform" }, + { .type = glsl_vec4_type(), .name = "xy_xform", .offset = 0 }, + { .type = glsl_vec4_type(), .name = "z_xform", .offset = 16 }, }; const struct glsl_type *push_iface_type = glsl_interface_type(push_fields, ARRAY_SIZE(push_fields), @@ -504,16 +504,19 @@ vk_meta_blit_image(struct vk_command_buffer *cmd, uint32_t dst_base_layer, dst_layer_count; if (src_image->image_type == VK_IMAGE_TYPE_3D) { - uint32_t start_layer, end_layer; + dst_base_layer = MIN2(regions[r].dstOffsets[0].z, + regions[r].dstOffsets[1].z); + + uint32_t layer0, layer1; compute_off_scale(src_extent.depth, regions[r].srcOffsets[0].z, regions[r].srcOffsets[1].z, - regions[r].dstOffsets[0].z, - regions[r].dstOffsets[1].z, - &start_layer, &end_layer, + regions[r].dstOffsets[0].z - dst_base_layer, + regions[r].dstOffsets[1].z - dst_base_layer, + &layer0, &layer1, &push.z_off, &push.z_scale); - dst_base_layer = start_layer; - dst_layer_count = end_layer - start_layer; + assert(layer0 == 0); + dst_layer_count = layer1; } else { dst_base_layer = regions[r].dstSubresource.baseArrayLayer; dst_layer_count = regions[r].dstSubresource.layerCount;