radv: Add new LBVH shaders.
Contrary to the previous implementation, this actually implements an LBVH builder. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19891>
This commit is contained in:
committed by
Marge Bot
parent
f531f671ef
commit
ea159e47a5
@@ -65,6 +65,34 @@ struct lbvh_internal_args {
|
||||
uint32_t src_count;
|
||||
};
|
||||
|
||||
#define LBVH_RIGHT_CHILD_BIT_SHIFT 29
|
||||
#define LBVH_RIGHT_CHILD_BIT (1 << LBVH_RIGHT_CHILD_BIT_SHIFT)
|
||||
|
||||
struct lbvh_node_info {
|
||||
/* Number of children that have been processed (or are invalid/leaves) in
|
||||
* the lbvh_generate_ir pass.
|
||||
*/
|
||||
uint32_t path_count;
|
||||
|
||||
uint32_t children[2];
|
||||
uint32_t parent;
|
||||
};
|
||||
|
||||
struct lbvh_main_args {
|
||||
VOID_REF bvh;
|
||||
REF(key_id_pair) src_ids;
|
||||
VOID_REF node_info;
|
||||
uint32_t id_count;
|
||||
uint32_t internal_node_base;
|
||||
};
|
||||
|
||||
struct lbvh_generate_ir_args {
|
||||
VOID_REF bvh;
|
||||
VOID_REF node_info;
|
||||
VOID_REF header;
|
||||
uint32_t internal_node_base;
|
||||
};
|
||||
|
||||
#define RADV_COPY_MODE_COPY 0
|
||||
#define RADV_COPY_MODE_SERIALIZE 1
|
||||
#define RADV_COPY_MODE_DESERIALIZE 2
|
||||
|
||||
@@ -0,0 +1,138 @@
|
||||
/*
|
||||
* Copyright © 2022 Bas Nieuwenhuizen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#version 460
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
|
||||
#extension GL_EXT_scalar_block_layout : require
|
||||
#extension GL_EXT_buffer_reference : require
|
||||
#extension GL_EXT_buffer_reference2 : require
|
||||
#extension GL_KHR_memory_scope_semantics : require
|
||||
|
||||
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
#include "build_interface.h"
|
||||
|
||||
TYPE(lbvh_node_info, 4);
|
||||
|
||||
layout(push_constant) uniform CONSTS
|
||||
{
|
||||
lbvh_generate_ir_args args;
|
||||
};
|
||||
|
||||
void
|
||||
main(void)
|
||||
{
|
||||
uint32_t global_id = gl_GlobalInvocationID.x;
|
||||
|
||||
uint32_t idx = global_id;
|
||||
|
||||
uint32_t previous_id = RADV_BVH_INVALID_NODE;
|
||||
radv_aabb previous_bounds;
|
||||
previous_bounds.min = vec3(INFINITY);
|
||||
previous_bounds.max = vec3(-INFINITY);
|
||||
|
||||
for (;;) {
|
||||
uint32_t count = 0;
|
||||
|
||||
/* Check if all children have been processed. As this is an atomic the last path coming from
|
||||
* a child will pass here, while earlier paths break.
|
||||
*/
|
||||
count = atomicAdd(
|
||||
DEREF(INDEX(lbvh_node_info, args.node_info, idx)).path_count, 1, gl_ScopeDevice,
|
||||
gl_StorageSemanticsBuffer,
|
||||
gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
|
||||
if (count != 2)
|
||||
break;
|
||||
|
||||
/* We allocate nodes on demand with the atomic here to ensure children come before their
|
||||
* parents, which is a requirement of the converter.
|
||||
*/
|
||||
uint32_t dst_idx =
|
||||
atomicAdd(DEREF(REF(radv_ir_header)(args.header)).ir_internal_node_count, 1);
|
||||
|
||||
uint32_t current_offset = args.internal_node_base + dst_idx * SIZEOF(radv_ir_box_node);
|
||||
uint32_t current_id = pack_ir_node_id(current_offset, radv_ir_node_internal);
|
||||
|
||||
REF(radv_ir_box_node) node = REF(radv_ir_box_node)(OFFSET(args.bvh, current_offset));
|
||||
radv_aabb bounds = previous_bounds;
|
||||
|
||||
lbvh_node_info info = DEREF(INDEX(lbvh_node_info, args.node_info, idx));
|
||||
|
||||
uint32_t children[2] = info.children;
|
||||
|
||||
/* Try using the cached previous_bounds instead of fetching the bounds twice. */
|
||||
int32_t previous_child_index = -1;
|
||||
if (previous_id == children[0])
|
||||
previous_child_index = 0;
|
||||
else if (previous_id == children[1])
|
||||
previous_child_index = 1;
|
||||
|
||||
if (previous_child_index == -1) {
|
||||
if (children[0] != RADV_BVH_INVALID_NODE) {
|
||||
uint32_t child_offset = ir_id_to_offset(children[0]);
|
||||
REF(radv_ir_node) child = REF(radv_ir_node)(OFFSET(args.bvh, child_offset));
|
||||
radv_aabb child_bounds = DEREF(child).aabb;
|
||||
bounds.min = min(bounds.min, child_bounds.min);
|
||||
bounds.max = max(bounds.max, child_bounds.max);
|
||||
}
|
||||
previous_child_index = 0;
|
||||
}
|
||||
|
||||
/* Fetch the non-cached child */
|
||||
if (children[1 - previous_child_index] != RADV_BVH_INVALID_NODE) {
|
||||
uint32_t child_offset = ir_id_to_offset(children[1 - previous_child_index]);
|
||||
REF(radv_ir_node) child = REF(radv_ir_node)(OFFSET(args.bvh, child_offset));
|
||||
radv_aabb child_bounds = DEREF(child).aabb;
|
||||
bounds.min = min(bounds.min, child_bounds.min);
|
||||
bounds.max = max(bounds.max, child_bounds.max);
|
||||
}
|
||||
|
||||
radv_ir_box_node node_value;
|
||||
|
||||
node_value.base.aabb = bounds;
|
||||
node_value.in_final_tree = FINAL_TREE_UNKNOWN;
|
||||
node_value.children = children;
|
||||
|
||||
DEREF(node) = node_value;
|
||||
|
||||
if (info.parent == RADV_BVH_INVALID_NODE)
|
||||
break;
|
||||
|
||||
idx = info.parent & ~LBVH_RIGHT_CHILD_BIT;
|
||||
|
||||
DEREF(INDEX(lbvh_node_info, args.node_info, idx))
|
||||
.children[(info.parent >> LBVH_RIGHT_CHILD_BIT_SHIFT) & 1] = current_id;
|
||||
|
||||
previous_id = current_id;
|
||||
previous_bounds = bounds;
|
||||
|
||||
memoryBarrierBuffer();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
* Copyright © 2022 Bas Nieuwenhuizen
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#version 460
|
||||
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
|
||||
#extension GL_EXT_scalar_block_layout : require
|
||||
#extension GL_EXT_buffer_reference : require
|
||||
#extension GL_EXT_buffer_reference2 : require
|
||||
|
||||
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
#include "build_interface.h"
|
||||
|
||||
TYPE(lbvh_node_info, 4);
|
||||
|
||||
layout(push_constant) uniform CONSTS
|
||||
{
|
||||
lbvh_main_args args;
|
||||
};
|
||||
|
||||
int32_t
|
||||
longest_common_prefix(int32_t i, uint32_t key_i, int32_t j)
|
||||
{
|
||||
if (j < 0 || j >= args.id_count)
|
||||
return -1;
|
||||
|
||||
uint32_t key_j = DEREF(INDEX(key_id_pair, args.src_ids, j)).key;
|
||||
|
||||
uint32_t diff = key_i ^ key_j;
|
||||
int32_t ret = 0;
|
||||
if (key_i == key_j) {
|
||||
ret += 32;
|
||||
diff = i ^ j;
|
||||
}
|
||||
|
||||
return ret + 31 - findMSB(diff);
|
||||
}
|
||||
|
||||
/*
|
||||
* The LBVH algorithm constructs a radix tree of the sorted nodes according to their key.
|
||||
*
|
||||
* We do this by making the following decision:
|
||||
*
|
||||
* Node N always either starts or ends at leaf N.
|
||||
*
|
||||
* From there it follows that we always have to extend it into the direction which has
|
||||
* a longer common prefix with the direct neighbour. Then we try to make the node cover
|
||||
* as many leaves as possible without including the other neighbour.
|
||||
*
|
||||
* For finding the split point we compute the longest common prefix of all the leaves within the
|
||||
* node, and look for the first leaf the same length common prefix with leaf N as that.
|
||||
*
|
||||
* To give an example: leaves=[000,001,010,011,100,101,110,111], node=5 (with value 101)
|
||||
*
|
||||
* lcp(101, 100) = 2 and lcp(101, 110) = 1, so we extend down.
|
||||
* lcp(101, 011) = 0, so the range of the node is [4,5] with values [100, 101]
|
||||
*
|
||||
* the lcp of all the leaves in the range is the same as the lcp of the first and last leaf, in this
|
||||
* case that is lcp(101, 100) = 2. Then we have lcp(101, 101) = 3 and lcp(101, 100) = 2, so the first
|
||||
* leaf that has a longer lcp is 4. Hence the two children of this node have range [4,4] and [5,5]
|
||||
*/
|
||||
void
|
||||
main()
|
||||
{
|
||||
if (args.id_count <= 1) {
|
||||
REF(lbvh_node_info) dst = REF(lbvh_node_info)(args.node_info);
|
||||
DEREF(dst).parent = RADV_BVH_INVALID_NODE;
|
||||
DEREF(dst).path_count = 2;
|
||||
DEREF(dst).children[0] =
|
||||
args.id_count == 1 ? DEREF(INDEX(key_id_pair, args.src_ids, 0)).id : RADV_BVH_INVALID_NODE;
|
||||
DEREF(dst).children[1] = RADV_BVH_INVALID_NODE;
|
||||
return;
|
||||
}
|
||||
|
||||
int32_t id = int32_t(gl_GlobalInvocationID.x);
|
||||
uint32_t id_key = DEREF(INDEX(key_id_pair, args.src_ids, id)).key;
|
||||
|
||||
int32_t left_lcp = longest_common_prefix(id, id_key, id - 1);
|
||||
int32_t right_lcp = longest_common_prefix(id, id_key, id + 1);
|
||||
int32_t dir = right_lcp > left_lcp ? 1 : -1;
|
||||
int32_t lcp_min = min(left_lcp, right_lcp);
|
||||
|
||||
/* Determine the bounds for the binary search for the length of the range that
|
||||
* this subtree is going to own.
|
||||
*/
|
||||
int32_t lmax = 128;
|
||||
while (longest_common_prefix(id, id_key, id + dir * lmax) > lcp_min) {
|
||||
lmax *= 2;
|
||||
}
|
||||
|
||||
int32_t length = 0;
|
||||
for (int32_t t = lmax / 2; t >= 1; t /= 2) {
|
||||
if (longest_common_prefix(id, id_key, id + (length + t) * dir) > lcp_min)
|
||||
length += t;
|
||||
}
|
||||
int32_t other_end = id + length * dir;
|
||||
|
||||
/* The number of bits in the prefix that is the same for all elements in the
|
||||
* range.
|
||||
*/
|
||||
int32_t lcp_node = longest_common_prefix(id, id_key, other_end);
|
||||
int32_t child_range = 0;
|
||||
for (int32_t diff = 2; diff < 2 * length; diff *= 2) {
|
||||
int32_t t = DIV_ROUND_UP(length, diff);
|
||||
if (longest_common_prefix(id, id_key, id + (child_range + t) * dir) > lcp_node)
|
||||
child_range += t;
|
||||
}
|
||||
|
||||
int32_t child_split = id + child_range * dir;
|
||||
|
||||
/* If dir = -1, right = child_split */
|
||||
int32_t left = child_split + min(dir, 0);
|
||||
int32_t right = left + 1;
|
||||
|
||||
/* if the number of leaves covered by a child is 1, we can use the leaf directly */
|
||||
bool left_leaf = min(id, other_end) == left;
|
||||
bool right_leaf = max(id, other_end) == right;
|
||||
|
||||
if (!left_leaf)
|
||||
DEREF(INDEX(lbvh_node_info, args.node_info, left)).parent = id;
|
||||
if (!right_leaf)
|
||||
DEREF(INDEX(lbvh_node_info, args.node_info, right)).parent = LBVH_RIGHT_CHILD_BIT | id;
|
||||
|
||||
REF(lbvh_node_info) dst = INDEX(lbvh_node_info, args.node_info, id);
|
||||
DEREF(dst).path_count = (left_leaf ? 1 : 0) + (right_leaf ? 1 : 0);
|
||||
DEREF(dst).children[0] = DEREF(INDEX(key_id_pair, args.src_ids, left)).id;
|
||||
DEREF(dst).children[1] = DEREF(INDEX(key_id_pair, args.src_ids, right)).id;
|
||||
if (id == 0)
|
||||
DEREF(dst).parent = RADV_BVH_INVALID_NODE;
|
||||
}
|
||||
@@ -21,6 +21,8 @@
|
||||
bvh_shaders = [
|
||||
'copy.comp',
|
||||
'lbvh_internal.comp',
|
||||
'lbvh_generate_ir.comp',
|
||||
'lbvh_main.comp',
|
||||
'leaf.comp',
|
||||
'morton.comp',
|
||||
'ploc_internal.comp',
|
||||
|
||||
@@ -43,6 +43,14 @@ static const uint32_t lbvh_internal_spv[] = {
|
||||
#include "bvh/lbvh_internal.comp.spv.h"
|
||||
};
|
||||
|
||||
static const uint32_t lbvh_main_spv[] = {
|
||||
#include "bvh/lbvh_main.comp.spv.h"
|
||||
};
|
||||
|
||||
static const uint32_t lbvh_generate_ir_spv[] = {
|
||||
#include "bvh/lbvh_generate_ir.comp.spv.h"
|
||||
};
|
||||
|
||||
static const uint32_t ploc_spv[] = {
|
||||
#include "bvh/ploc_internal.comp.spv.h"
|
||||
};
|
||||
@@ -316,6 +324,10 @@ radv_device_finish_accel_struct_build_state(struct radv_device *device)
|
||||
&state->alloc);
|
||||
radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.ploc_pipeline,
|
||||
&state->alloc);
|
||||
radv_DestroyPipeline(radv_device_to_handle(device),
|
||||
state->accel_struct_build.lbvh_generate_ir_pipeline, &state->alloc);
|
||||
radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.lbvh_main_pipeline,
|
||||
&state->alloc);
|
||||
radv_DestroyPipeline(radv_device_to_handle(device),
|
||||
state->accel_struct_build.lbvh_internal_pipeline, &state->alloc);
|
||||
radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.leaf_pipeline,
|
||||
@@ -330,6 +342,10 @@ radv_device_finish_accel_struct_build_state(struct radv_device *device)
|
||||
state->accel_struct_build.copy_p_layout, &state->alloc);
|
||||
radv_DestroyPipelineLayout(radv_device_to_handle(device),
|
||||
state->accel_struct_build.ploc_p_layout, &state->alloc);
|
||||
radv_DestroyPipelineLayout(radv_device_to_handle(device),
|
||||
state->accel_struct_build.lbvh_generate_ir_p_layout, &state->alloc);
|
||||
radv_DestroyPipelineLayout(radv_device_to_handle(device),
|
||||
state->accel_struct_build.lbvh_main_p_layout, &state->alloc);
|
||||
radv_DestroyPipelineLayout(radv_device_to_handle(device),
|
||||
state->accel_struct_build.lbvh_internal_p_layout, &state->alloc);
|
||||
radv_DestroyPipelineLayout(radv_device_to_handle(device),
|
||||
@@ -434,6 +450,21 @@ radv_device_init_accel_struct_build_state(struct radv_device *device)
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = create_build_pipeline_spv(device, lbvh_main_spv, sizeof(lbvh_main_spv),
|
||||
sizeof(struct lbvh_main_args),
|
||||
&device->meta_state.accel_struct_build.lbvh_main_pipeline,
|
||||
&device->meta_state.accel_struct_build.lbvh_main_p_layout);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
result =
|
||||
create_build_pipeline_spv(device, lbvh_generate_ir_spv, sizeof(lbvh_generate_ir_spv),
|
||||
sizeof(struct lbvh_generate_ir_args),
|
||||
&device->meta_state.accel_struct_build.lbvh_generate_ir_pipeline,
|
||||
&device->meta_state.accel_struct_build.lbvh_generate_ir_p_layout);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = create_build_pipeline_spv(device, ploc_spv, sizeof(ploc_spv), sizeof(struct ploc_args),
|
||||
&device->meta_state.accel_struct_build.ploc_pipeline,
|
||||
&device->meta_state.accel_struct_build.ploc_p_layout);
|
||||
|
||||
@@ -679,6 +679,10 @@ struct radv_meta_state {
|
||||
VkPipeline morton_pipeline;
|
||||
VkPipelineLayout lbvh_internal_p_layout;
|
||||
VkPipeline lbvh_internal_pipeline;
|
||||
VkPipelineLayout lbvh_main_p_layout;
|
||||
VkPipeline lbvh_main_pipeline;
|
||||
VkPipelineLayout lbvh_generate_ir_p_layout;
|
||||
VkPipeline lbvh_generate_ir_pipeline;
|
||||
VkPipelineLayout ploc_p_layout;
|
||||
VkPipeline ploc_pipeline;
|
||||
VkPipelineLayout convert_leaf_p_layout;
|
||||
|
||||
Reference in New Issue
Block a user