diff --git a/src/amd/vulkan/bvh/build_interface.h b/src/amd/vulkan/bvh/build_interface.h index e8bc1c519c7..fc05f73ac61 100644 --- a/src/amd/vulkan/bvh/build_interface.h +++ b/src/amd/vulkan/bvh/build_interface.h @@ -95,7 +95,7 @@ struct copy_args { uint32_t mode; }; -struct convert_internal_args { +struct encode_args { VOID_REF intermediate_bvh; VOID_REF output_bvh; REF(radv_ir_header) header; @@ -104,12 +104,6 @@ struct convert_internal_args { uint32_t geometry_type; }; -struct convert_leaf_args { - VOID_REF intermediate_bvh; - VOID_REF output_bvh; - uint32_t geometry_type; -}; - struct ploc_prefix_scan_partition { uint32_t aggregate; uint32_t inclusive_sum; diff --git a/src/amd/vulkan/bvh/bvh.h b/src/amd/vulkan/bvh/bvh.h index 96ec036519c..eb7f4d7da8d 100644 --- a/src/amd/vulkan/bvh/bvh.h +++ b/src/amd/vulkan/bvh/bvh.h @@ -158,7 +158,7 @@ struct radv_ir_header { int32_t min_bounds[3]; int32_t max_bounds[3]; uint32_t active_leaf_count; - /* Indirect dispatch dimensions for the internal node converter. + /* Indirect dispatch dimensions for the encoder. * ir_internal_node_count is the thread count in the X dimension, * while Y and Z are always set to 1. */ uint32_t ir_internal_node_count; diff --git a/src/amd/vulkan/bvh/converter_leaf.comp b/src/amd/vulkan/bvh/converter_leaf.comp deleted file mode 100644 index 167549f8ca1..00000000000 --- a/src/amd/vulkan/bvh/converter_leaf.comp +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright © 2022 Friedrich Vock - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#version 460 - -#extension GL_GOOGLE_include_directive : require - -#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require -#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require -#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require -#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require -#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require -#extension GL_EXT_scalar_block_layout : require -#extension GL_EXT_buffer_reference : require -#extension GL_EXT_buffer_reference2 : require - -layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; - -#include "build_helpers.h" -#include "build_interface.h" - -layout(push_constant) uniform CONSTS { - convert_leaf_args args; -}; - -uint32_t -convert_geometry_id_and_flags(uint32_t src) -{ - uint32_t flags = src >> 28; - uint32_t ret = src & 0xfffffffu; - if ((flags & VK_GEOMETRY_OPAQUE_BIT_KHR) != 0) - ret |= RADV_GEOMETRY_OPAQUE; - return ret; -} - -uint32_t -convert_sbt_offset_and_flags(uint32_t src) -{ - uint32_t flags = src >> 24; - uint32_t ret = src & 0xffffffu; - if ((flags & VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR) != 0) - ret |= RADV_INSTANCE_FORCE_OPAQUE; - if ((flags & VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR) == 0) - ret |= RADV_INSTANCE_NO_FORCE_NOT_OPAQUE; - if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR) != 0) - ret |= RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE; - if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR) != 0) - ret |= RADV_INSTANCE_TRIANGLE_FLIP_FACING; - return ret; -} - -void -main() -{ - uint32_t global_id = gl_GlobalInvocationID.x; - uint32_t dst_leaf_offset = - id_to_offset(RADV_BVH_ROOT_NODE) + SIZEOF(radv_bvh_box32_node); - - VOID_REF dst_leaves = OFFSET(args.output_bvh, dst_leaf_offset); - switch (args.geometry_type) { - case VK_GEOMETRY_TYPE_TRIANGLES_KHR: { - radv_ir_triangle_node src = - DEREF(INDEX(radv_ir_triangle_node, args.intermediate_bvh, global_id)); - REF(radv_bvh_triangle_node) dst = - INDEX(radv_bvh_triangle_node, dst_leaves, global_id); - - DEREF(dst).coords = src.coords; - DEREF(dst).triangle_id = src.triangle_id; - DEREF(dst).geometry_id_and_flags = convert_geometry_id_and_flags(src.geometry_id_and_flags); - DEREF(dst).id = src.id; - break; - } - case VK_GEOMETRY_TYPE_AABBS_KHR: { - radv_ir_aabb_node src = - DEREF(INDEX(radv_ir_aabb_node, args.intermediate_bvh, global_id)); - REF(radv_bvh_aabb_node) dst = - INDEX(radv_bvh_aabb_node, dst_leaves, global_id); - DEREF(dst).aabb = src.base.aabb; - DEREF(dst).primitive_id = src.primitive_id; - DEREF(dst).geometry_id_and_flags = convert_geometry_id_and_flags(src.geometry_id_and_flags); - break; - } - default: { /* instances */ - radv_ir_instance_node src = - DEREF(INDEX(radv_ir_instance_node, args.intermediate_bvh, global_id)); - REF(radv_bvh_instance_node) dst = - INDEX(radv_bvh_instance_node, dst_leaves, global_id); - uint32_t bvh_offset = 0; - if (src.base_ptr != 0) - bvh_offset = DEREF(REF(radv_accel_struct_header)(src.base_ptr)).bvh_offset; - - DEREF(dst).bvh_ptr = addr_to_node(src.base_ptr + bvh_offset); - DEREF(dst).custom_instance_and_mask = src.custom_instance_and_mask; - DEREF(dst).sbt_offset_and_flags = convert_sbt_offset_and_flags(src.sbt_offset_and_flags); - DEREF(dst).instance_id = src.instance_id; - DEREF(dst).bvh_offset = bvh_offset; - - mat4 transform = mat4(src.otw_matrix); - - mat4 inv_transform = transpose(inverse(transpose(transform))); - DEREF(dst).wto_matrix = mat3x4(inv_transform); - DEREF(dst).otw_matrix = mat3x4(transform); - - break; - } - } -} diff --git a/src/amd/vulkan/bvh/converter_internal.comp b/src/amd/vulkan/bvh/encode.comp similarity index 76% rename from src/amd/vulkan/bvh/converter_internal.comp rename to src/amd/vulkan/bvh/encode.comp index 2eb8427fbec..f77283ea191 100644 --- a/src/amd/vulkan/bvh/converter_internal.comp +++ b/src/amd/vulkan/bvh/encode.comp @@ -41,7 +41,7 @@ layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; #include "build_interface.h" layout(push_constant) uniform CONSTS { - convert_internal_args args; + encode_args args; }; void set_parent(uint32_t child, uint32_t parent) @@ -50,6 +50,76 @@ void set_parent(uint32_t child, uint32_t parent) DEREF(REF(uint32_t)(addr)) = parent; } +uint32_t +encode_geometry_id_and_flags(uint32_t src) +{ + uint32_t flags = src >> 28; + uint32_t ret = src & 0xfffffffu; + if ((flags & VK_GEOMETRY_OPAQUE_BIT_KHR) != 0) + ret |= RADV_GEOMETRY_OPAQUE; + return ret; +} + +uint32_t +encode_sbt_offset_and_flags(uint32_t src) +{ + uint32_t flags = src >> 24; + uint32_t ret = src & 0xffffffu; + if ((flags & VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR) != 0) + ret |= RADV_INSTANCE_FORCE_OPAQUE; + if ((flags & VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR) == 0) + ret |= RADV_INSTANCE_NO_FORCE_NOT_OPAQUE; + if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR) != 0) + ret |= RADV_INSTANCE_TRIANGLE_FACING_CULL_DISABLE; + if ((flags & VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR) != 0) + ret |= RADV_INSTANCE_TRIANGLE_FLIP_FACING; + return ret; +} + +void +encode_leaf_node(uint32_t type, uint64_t src_node, uint64_t dst_node) +{ + switch (type) { + case radv_ir_node_triangle: { + radv_ir_triangle_node src = DEREF(REF(radv_ir_triangle_node)(src_node)); + REF(radv_bvh_triangle_node) dst = REF(radv_bvh_triangle_node)(dst_node); + + DEREF(dst).coords = src.coords; + DEREF(dst).triangle_id = src.triangle_id; + DEREF(dst).geometry_id_and_flags = encode_geometry_id_and_flags(src.geometry_id_and_flags); + DEREF(dst).id = src.id; + break; + } + case radv_ir_node_aabb: { + radv_ir_aabb_node src = DEREF(REF(radv_ir_aabb_node)(src_node)); + REF(radv_bvh_aabb_node) dst = REF(radv_bvh_aabb_node)(dst_node); + + DEREF(dst).aabb = src.base.aabb; + DEREF(dst).primitive_id = src.primitive_id; + DEREF(dst).geometry_id_and_flags = encode_geometry_id_and_flags(src.geometry_id_and_flags); + break; + } + case radv_ir_node_instance: { + radv_ir_instance_node src = DEREF(REF(radv_ir_instance_node)(src_node)); + REF(radv_bvh_instance_node) dst = REF(radv_bvh_instance_node)(dst_node); + uint32_t bvh_offset = DEREF(REF(radv_accel_struct_header)(src.base_ptr)).bvh_offset; + + DEREF(dst).bvh_ptr = addr_to_node(src.base_ptr + bvh_offset); + DEREF(dst).custom_instance_and_mask = src.custom_instance_and_mask; + DEREF(dst).sbt_offset_and_flags = encode_sbt_offset_and_flags(src.sbt_offset_and_flags); + DEREF(dst).instance_id = src.instance_id; + DEREF(dst).bvh_offset = bvh_offset; + + mat4 transform = mat4(src.otw_matrix); + + mat4 inv_transform = transpose(inverse(transpose(transform))); + DEREF(dst).wto_matrix = mat3x4(inv_transform); + DEREF(dst).otw_matrix = mat3x4(transform); + break; + } + } +} + void main() { @@ -165,6 +235,8 @@ main() if (offset < intermediate_leaf_nodes_size) { uint32_t child_index = offset / intermediate_leaf_node_size; dst_offset = dst_leaf_offset + child_index * output_leaf_node_size; + + encode_leaf_node(type, args.intermediate_bvh + offset, args.output_bvh + dst_offset); } else { uint32_t offset_in_internal_nodes = offset - intermediate_leaf_nodes_size; uint32_t child_index = offset_in_internal_nodes / SIZEOF(radv_ir_box_node); diff --git a/src/amd/vulkan/bvh/lbvh_generate_ir.comp b/src/amd/vulkan/bvh/lbvh_generate_ir.comp index 3956509d449..e800cad2753 100644 --- a/src/amd/vulkan/bvh/lbvh_generate_ir.comp +++ b/src/amd/vulkan/bvh/lbvh_generate_ir.comp @@ -72,7 +72,7 @@ main(void) break; /* We allocate nodes on demand with the atomic here to ensure children come before their - * parents, which is a requirement of the converter. + * parents, which is a requirement of the encoder. */ uint32_t dst_idx = atomicAdd(DEREF(REF(radv_ir_header)(args.header)).ir_internal_node_count, 1); diff --git a/src/amd/vulkan/bvh/meson.build b/src/amd/vulkan/bvh/meson.build index 1d3674bf7fa..bcd8bb6738f 100644 --- a/src/amd/vulkan/bvh/meson.build +++ b/src/amd/vulkan/bvh/meson.build @@ -25,6 +25,11 @@ bvh_shaders = [ 'copy', [], ], + [ + 'encode.comp', + 'encode', + [], + ], [ 'lbvh_generate_ir.comp', 'lbvh_generate_ir', @@ -55,16 +60,6 @@ bvh_shaders = [ 'ploc_internal_extended', ['EXTENDED_SAH=1'], ], - [ - 'converter_internal.comp', - 'converter_internal', - [], - ], - [ - 'converter_leaf.comp', - 'converter_leaf', - [], - ], ] bvh_include_dir = dir_source_root + '/src/amd/vulkan/bvh' diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c index c1c43ad42f3..f69a64eeb55 100644 --- a/src/amd/vulkan/radv_acceleration_structure.c +++ b/src/amd/vulkan/radv_acceleration_structure.c @@ -61,12 +61,8 @@ static const uint32_t copy_spv[] = { #include "bvh/copy.spv.h" }; -static const uint32_t convert_leaf_spv[] = { -#include "bvh/converter_leaf.spv.h" -}; - -static const uint32_t convert_internal_spv[] = { -#include "bvh/converter_internal.spv.h" +static const uint32_t encode_spv[] = { +#include "bvh/encode.spv.h" }; #define KEY_ID_PAIR_SIZE 8 @@ -357,10 +353,8 @@ radv_device_finish_accel_struct_build_state(struct radv_device *device) &state->alloc); radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.leaf_pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->accel_struct_build.convert_leaf_pipeline, &state->alloc); - radv_DestroyPipeline(radv_device_to_handle(device), - state->accel_struct_build.convert_internal_pipeline, &state->alloc); + radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.encode_pipeline, + &state->alloc); radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.morton_pipeline, &state->alloc); radv_DestroyPipelineLayout(radv_device_to_handle(device), @@ -374,9 +368,7 @@ radv_device_finish_accel_struct_build_state(struct radv_device *device) radv_DestroyPipelineLayout(radv_device_to_handle(device), state->accel_struct_build.leaf_p_layout, &state->alloc); radv_DestroyPipelineLayout(radv_device_to_handle(device), - state->accel_struct_build.convert_leaf_p_layout, &state->alloc); - radv_DestroyPipelineLayout(radv_device_to_handle(device), - state->accel_struct_build.convert_internal_p_layout, &state->alloc); + state->accel_struct_build.encode_p_layout, &state->alloc); radv_DestroyPipelineLayout(radv_device_to_handle(device), state->accel_struct_build.morton_p_layout, &state->alloc); @@ -622,18 +614,10 @@ radv_device_init_accel_struct_build_state(struct radv_device *device) if (result != VK_SUCCESS) return result; - result = create_build_pipeline_spv(device, convert_leaf_spv, sizeof(convert_leaf_spv), - sizeof(struct convert_leaf_args), - &device->meta_state.accel_struct_build.convert_leaf_pipeline, - &device->meta_state.accel_struct_build.convert_leaf_p_layout); - if (result != VK_SUCCESS) - return result; - result = - create_build_pipeline_spv(device, convert_internal_spv, sizeof(convert_internal_spv), - sizeof(struct convert_internal_args), - &device->meta_state.accel_struct_build.convert_internal_pipeline, - &device->meta_state.accel_struct_build.convert_internal_p_layout); + create_build_pipeline_spv(device, encode_spv, sizeof(encode_spv), sizeof(struct encode_args), + &device->meta_state.accel_struct_build.encode_pipeline, + &device->meta_state.accel_struct_build.encode_p_layout); if (result != VK_SUCCESS) return result; @@ -947,43 +931,13 @@ ploc_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount, } static void -convert_leaf_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount, - const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, - struct bvh_state *bvh_states) +encode_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, + struct bvh_state *bvh_states) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); radv_CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, - cmd_buffer->device->meta_state.accel_struct_build.convert_leaf_pipeline); - for (uint32_t i = 0; i < infoCount; ++i) { - if (!pInfos[i].geometryCount) - continue; - - RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct, - pInfos[i].dstAccelerationStructure); - - const struct convert_leaf_args args = { - .intermediate_bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset, - .output_bvh = accel_struct->va + bvh_states[i].accel_struct.bvh_offset, - .geometry_type = pInfos[i].pGeometries ? pInfos[i].pGeometries[0].geometryType - : pInfos[i].ppGeometries[0]->geometryType, - }; - radv_CmdPushConstants(commandBuffer, - cmd_buffer->device->meta_state.accel_struct_build.convert_leaf_p_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args); - radv_unaligned_dispatch(cmd_buffer, bvh_states[i].leaf_node_count, 1, 1); - } - /* This is the final access to the leaf nodes, no need to flush */ -} - -static void -convert_internal_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount, - const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, - struct bvh_state *bvh_states) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - radv_CmdBindPipeline( - commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, - cmd_buffer->device->meta_state.accel_struct_build.convert_internal_pipeline); + cmd_buffer->device->meta_state.accel_struct_build.encode_pipeline); for (uint32_t i = 0; i < infoCount; ++i) { RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure); @@ -997,7 +951,7 @@ convert_internal_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount, geometry_type = pInfos[i].pGeometries ? pInfos[i].pGeometries[0].geometryType : pInfos[i].ppGeometries[0]->geometryType; - const struct convert_internal_args args = { + const struct encode_args args = { .intermediate_bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset, .output_bvh = accel_struct->va + bvh_states[i].accel_struct.bvh_offset, .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset, @@ -1005,9 +959,9 @@ convert_internal_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount, .leaf_node_count = bvh_states[i].leaf_node_count, .geometry_type = geometry_type, }; - radv_CmdPushConstants( - commandBuffer, cmd_buffer->device->meta_state.accel_struct_build.convert_internal_p_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args); + radv_CmdPushConstants(commandBuffer, + cmd_buffer->device->meta_state.accel_struct_build.encode_p_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args); radv_indirect_unaligned_dispatch(cmd_buffer, NULL, pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset + @@ -1084,9 +1038,7 @@ radv_CmdBuildAccelerationStructuresKHR( cmd_buffer->state.flush_bits |= flush_bits; - convert_leaf_nodes(commandBuffer, infoCount, pInfos, bvh_states); - - convert_internal_nodes(commandBuffer, infoCount, pInfos, bvh_states); + encode_nodes(commandBuffer, infoCount, pInfos, bvh_states); for (uint32_t i = 0; i < infoCount; ++i) { RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct, diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index f35257e917c..e3bc9cbc1d1 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -689,10 +689,8 @@ struct radv_meta_state { VkPipelineLayout ploc_p_layout; VkPipeline ploc_pipeline; VkPipeline ploc_extended_pipeline; - VkPipelineLayout convert_leaf_p_layout; - VkPipeline convert_leaf_pipeline; - VkPipelineLayout convert_internal_p_layout; - VkPipeline convert_internal_pipeline; + VkPipelineLayout encode_p_layout; + VkPipeline encode_pipeline; VkPipelineLayout copy_p_layout; VkPipeline copy_pipeline;