From ac4593534508dabcb6b3e160e79ff509efb97e94 Mon Sep 17 00:00:00 2001 From: Konstantin Seurer Date: Sat, 24 Sep 2022 14:28:19 +0200 Subject: [PATCH] radv: Add a common traversal build helper Adds a helper for building the ray traversal loop to radv_rt_common. Reviewed-by: Bas Nieuwenhuizen Reviewed-by: Konstantin Seurer Part-of: --- src/amd/vulkan/radv_rt_common.c | 267 ++++++++++++++++++++++++++++++++ src/amd/vulkan/radv_rt_common.h | 93 +++++++++++ 2 files changed, 360 insertions(+) diff --git a/src/amd/vulkan/radv_rt_common.c b/src/amd/vulkan/radv_rt_common.c index 3ea151ab001..60a51a82255 100644 --- a/src/amd/vulkan/radv_rt_common.c +++ b/src/amd/vulkan/radv_rt_common.c @@ -421,3 +421,270 @@ create_bvh_descriptor(nir_builder *b) b, 0, 1u << 31 /* Enable box sorting */, (bvh_size - 1) & 0xFFFFFFFFu, ((bvh_size - 1) >> 32) | (1u << 24 /* Return IJ for triangles */) | (1u << 31)); } + +static void +insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, + const struct radv_ray_traversal_args *args, nir_ssa_def *result, + nir_ssa_def *bvh_node) +{ + struct radv_triangle_intersection intersection; + intersection.t = nir_channel(b, result, 0); + nir_ssa_def *div = nir_channel(b, result, 1); + intersection.t = nir_fdiv(b, intersection.t, div); + intersection.frontface = nir_flt(b, nir_imm_float(b, 0), div); + nir_ssa_def *switch_ccw = nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), + VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR << 24); + intersection.frontface = nir_ixor(b, intersection.frontface, switch_ccw); + + nir_ssa_def *not_cull = + nir_inot(b, nir_test_mask(b, args->flags, SpvRayFlagsSkipTrianglesKHRMask)); + nir_ssa_def *not_facing_cull = + nir_ieq_imm(b, + nir_iand(b, args->flags, + nir_bcsel(b, intersection.frontface, + nir_imm_int(b, SpvRayFlagsCullFrontFacingTrianglesKHRMask), + nir_imm_int(b, SpvRayFlagsCullBackFacingTrianglesKHRMask))), + 0); + + not_cull = nir_iand( + b, not_cull, + nir_ior(b, not_facing_cull, + nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), + VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR << 24))); + + nir_push_if(b, + nir_iand(b, + nir_iand(b, nir_flt(b, intersection.t, nir_load_deref(b, args->vars.tmax)), + nir_flt(b, args->tmin, intersection.t)), + not_cull)); + { + intersection.base.node_addr = build_node_to_addr(device, b, bvh_node); + nir_ssa_def *triangle_info = + nir_build_load_global(b, 2, 32, + nir_iadd_imm(b, intersection.base.node_addr, + offsetof(struct radv_bvh_triangle_node, triangle_id))); + intersection.base.primitive_id = nir_channel(b, triangle_info, 0); + intersection.base.geometry_id_and_flags = nir_channel(b, triangle_info, 1); + intersection.base.opaque = + hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), args->flags, + intersection.base.geometry_id_and_flags); + + not_cull = nir_ieq_imm(b, + nir_iand(b, args->flags, + nir_bcsel(b, intersection.base.opaque, + nir_imm_int(b, SpvRayFlagsCullOpaqueKHRMask), + nir_imm_int(b, SpvRayFlagsCullNoOpaqueKHRMask))), + 0); + nir_push_if(b, not_cull); + { + nir_ssa_def *divs[2] = {div, div}; + intersection.barycentrics = nir_fdiv(b, nir_channels(b, result, 0xc), nir_vec(b, divs, 2)); + + args->triangle_cb(b, &intersection, args); + } + nir_pop_if(b, NULL); + } + nir_pop_if(b, NULL); +} + +static void +insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, + const struct radv_ray_traversal_args *args, nir_ssa_def *bvh_node) +{ + struct radv_leaf_intersection intersection; + intersection.node_addr = build_node_to_addr(device, b, bvh_node); + nir_ssa_def *triangle_info = + nir_build_load_global(b, 2, 32, nir_iadd_imm(b, intersection.node_addr, 24)); + intersection.primitive_id = nir_channel(b, triangle_info, 0); + intersection.geometry_id_and_flags = nir_channel(b, triangle_info, 1); + intersection.opaque = hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), + args->flags, intersection.geometry_id_and_flags); + + nir_ssa_def *not_skip_aabb = + nir_inot(b, nir_test_mask(b, args->flags, SpvRayFlagsSkipAABBsKHRMask)); + nir_ssa_def *not_cull = nir_iand( + b, not_skip_aabb, + nir_ieq_imm( + b, + nir_iand(b, args->flags, + nir_bcsel(b, intersection.opaque, nir_imm_int(b, SpvRayFlagsCullOpaqueKHRMask), + nir_imm_int(b, SpvRayFlagsCullNoOpaqueKHRMask))), + 0)); + nir_push_if(b, not_cull); + { + args->aabb_cb(b, &intersection, args); + } + nir_pop_if(b, NULL); +} + +nir_ssa_def * +radv_build_ray_traversal(struct radv_device *device, nir_builder *b, + const struct radv_ray_traversal_args *args) +{ + nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete"); + nir_store_var(b, incomplete, nir_ine_imm(b, args->accel_struct, 0), 0x1); + + nir_push_if(b, nir_load_var(b, incomplete)); + { + nir_ssa_def *desc = create_bvh_descriptor(b); + nir_ssa_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0); + + nir_push_loop(b); + { + nir_push_if(b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), -1)); + { + nir_push_if(b, nir_ilt(b, nir_load_deref(b, args->vars.stack), nir_imm_int(b, args->stack_stride))); + { + nir_store_var(b, incomplete, nir_imm_bool(b, false), 0x1); + nir_jump(b, nir_jump_break); + } + nir_pop_if(b, NULL); + + nir_if *instance_exit = + nir_push_if(b, nir_uge(b, nir_load_deref(b, args->vars.top_stack), + nir_load_deref(b, args->vars.stack))); + instance_exit->control = nir_selection_control_dont_flatten; + { + nir_store_deref(b, args->vars.top_stack, nir_imm_int(b, 0), 1); + nir_store_deref(b, args->vars.bvh_base, build_addr_to_node(b, args->accel_struct), + 1); + nir_store_deref(b, args->vars.origin, args->origin, 7); + nir_store_deref(b, args->vars.dir, args->dir, 7); + nir_store_deref(b, args->vars.inv_dir, nir_fdiv(b, vec3ones, args->dir), 7); + } + nir_pop_if(b, NULL); + + nir_store_deref(b, args->vars.stack, + nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_stride), 1); + + nir_ssa_def *bvh_node = + args->stack_load_cb(b, nir_load_deref(b, args->vars.stack), args); + nir_store_deref(b, args->vars.current_node, bvh_node, 0x1); + } + nir_pop_if(b, NULL); + + if (args->check_stack_overflow_cb) + args->check_stack_overflow_cb(b, args); + + nir_ssa_def *bvh_node = nir_load_deref(b, args->vars.current_node); + nir_ssa_def *bvh_node_type = bvh_node; + + nir_store_deref(b, args->vars.current_node, nir_imm_int(b, -1), 0x1); + + bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node)); + + nir_ssa_def *intrinsic_result = NULL; + if (!radv_emulate_rt(device->physical_device)) { + intrinsic_result = nir_bvh64_intersect_ray_amd( + b, 32, desc, nir_unpack_64_2x32(b, bvh_node), nir_load_deref(b, args->vars.tmax), + nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir), + nir_load_deref(b, args->vars.inv_dir)); + } + + nir_push_if(b, nir_ine_imm(b, nir_iand_imm(b, bvh_node_type, 4), 0)); + { + nir_push_if(b, nir_ine_imm(b, nir_iand_imm(b, bvh_node_type, 2), 0)); + { + nir_push_if(b, nir_ine_imm(b, nir_iand_imm(b, bvh_node_type, 1), 0)); + { + insert_traversal_aabb_case(device, b, args, bvh_node); + } + nir_push_else(b, NULL); + { + /* instance */ + nir_ssa_def *instance_node_addr = build_node_to_addr(device, b, bvh_node); + nir_ssa_def *instance_data = nir_build_load_global( + b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0); + nir_ssa_def *instance_and_mask = nir_channel(b, instance_data, 2); + nir_ssa_def *instance_mask = nir_ushr_imm(b, instance_and_mask, 24); + + nir_push_if(b, nir_ieq_imm(b, nir_iand(b, instance_mask, args->cull_mask), 0)); + { + nir_jump(b, nir_jump_continue); + } + nir_pop_if(b, NULL); + + nir_ssa_def *wto_matrix[3]; + nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix); + nir_ssa_def *instance_id = nir_build_load_global( + b, 1, 32, + nir_iadd_imm(b, instance_node_addr, + offsetof(struct radv_bvh_instance_node, instance_id))); + + + nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1); + nir_store_deref(b, args->vars.bvh_base, + build_addr_to_node( + b, nir_pack_64_2x32(b, nir_channels(b, instance_data, 0x3))), + 1); + + /* Push the instance root node onto the stack */ + nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), + 0x1); + + /* Transform the ray into object space */ + nir_store_deref(b, args->vars.origin, + nir_build_vec3_mat_mult(b, args->origin, wto_matrix, true), 7); + nir_store_deref(b, args->vars.dir, + nir_build_vec3_mat_mult(b, args->dir, wto_matrix, false), 7); + nir_store_deref(b, args->vars.inv_dir, + nir_fdiv(b, vec3ones, nir_load_deref(b, args->vars.dir)), 7); + + nir_store_deref(b, args->vars.sbt_offset_and_flags, + nir_channel(b, instance_data, 3), 1); + nir_store_deref(b, args->vars.custom_instance_and_mask, instance_and_mask, 1); + nir_store_deref(b, args->vars.instance_id, instance_id, 1); + nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1); + } + nir_pop_if(b, NULL); + } + nir_push_else(b, NULL); + { + nir_ssa_def *result = intrinsic_result; + if (!result) { + /* If we didn't run the intrinsic cause the hardware didn't support it, + * emulate ray/box intersection here */ + result = intersect_ray_amd_software_box( + device, b, bvh_node, nir_load_deref(b, args->vars.tmax), + nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir), + nir_load_deref(b, args->vars.inv_dir)); + } + + nir_ssa_def *new_nodes[4]; + for (unsigned i = 0; i < 4; ++i) + new_nodes[i] = nir_channel(b, result, i); + + for (unsigned i = 1; i < 4; ++i) + nir_push_if(b, nir_ine_imm(b, new_nodes[i], -1)); + + for (unsigned i = 4; i-- > 1;) { + nir_ssa_def *stack = nir_load_deref(b, args->vars.stack); + args->stack_store_cb(b, stack, new_nodes[i], args); + nir_store_deref(b, args->vars.stack, nir_iadd_imm(b, stack, args->stack_stride), 1); + nir_pop_if(b, NULL); + } + nir_store_deref(b, args->vars.current_node, new_nodes[0], 0x1); + } + nir_pop_if(b, NULL); + } + nir_push_else(b, NULL); + { + nir_ssa_def *result = intrinsic_result; + if (!result) { + /* If we didn't run the intrinsic cause the hardware didn't support it, + * emulate ray/tri intersection here */ + result = intersect_ray_amd_software_tri( + device, b, bvh_node, nir_load_deref(b, args->vars.tmax), + nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir), + nir_load_deref(b, args->vars.inv_dir)); + } + insert_traversal_triangle_case(device, b, args, result, bvh_node); + } + nir_pop_if(b, NULL); + } + nir_pop_loop(b, NULL); + } + nir_pop_if(b, NULL); + + return nir_load_var(b, incomplete); +} diff --git a/src/amd/vulkan/radv_rt_common.h b/src/amd/vulkan/radv_rt_common.h index ffab835532a..1e05a07a709 100644 --- a/src/amd/vulkan/radv_rt_common.h +++ b/src/amd/vulkan/radv_rt_common.h @@ -70,4 +70,97 @@ nir_ssa_def *create_bvh_descriptor(nir_builder *b); #define MAX_STACK_LDS_ENTRY_COUNT 16 #define MAX_STACK_SCRATCH_ENTRY_COUNT (MAX_STACK_ENTRY_COUNT - MAX_STACK_LDS_ENTRY_COUNT) +struct radv_ray_traversal_args; + +struct radv_leaf_intersection { + nir_ssa_def *node_addr; + nir_ssa_def *primitive_id; + nir_ssa_def *geometry_id_and_flags; + nir_ssa_def *opaque; +}; + +typedef void (*radv_aabb_intersection_cb)(nir_builder *b, + struct radv_leaf_intersection *intersection, + const struct radv_ray_traversal_args *args); + +struct radv_triangle_intersection { + struct radv_leaf_intersection base; + + nir_ssa_def *t; + nir_ssa_def *frontface; + nir_ssa_def *barycentrics; +}; + +typedef void (*radv_triangle_intersection_cb)(nir_builder *b, + struct radv_triangle_intersection *intersection, + const struct radv_ray_traversal_args *args); + +typedef void (*radv_rt_stack_store_cb)(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value, + const struct radv_ray_traversal_args *args); + +typedef nir_ssa_def *(*radv_rt_stack_load_cb)(nir_builder *b, nir_ssa_def *index, + const struct radv_ray_traversal_args *args); + +typedef void (*radv_rt_check_stack_overflow_cb)(nir_builder *b, + const struct radv_ray_traversal_args *args); + +struct radv_ray_traversal_vars { + /* For each accepted hit, tmax will be set to the t value. This allows for automatic intersection + * culling. + */ + nir_deref_instr *tmax; + + /* Those variables change when entering and exiting BLASes. */ + nir_deref_instr *origin; + nir_deref_instr *dir; + nir_deref_instr *inv_dir; + + /* The base address of the current TLAS/BLAS. */ + nir_deref_instr *bvh_base; + + /* stack is the current stack pointer/index. top_stack is the pointer/index that marks the end of + * traversal for the current BLAS/TLAS. + */ + nir_deref_instr *stack; + nir_deref_instr *top_stack; + + nir_deref_instr *current_node; + + /* Information about the current instance used for culling. */ + nir_deref_instr *instance_id; + nir_deref_instr *instance_addr; + nir_deref_instr *custom_instance_and_mask; + nir_deref_instr *sbt_offset_and_flags; +}; + +struct radv_ray_traversal_args { + nir_ssa_def *accel_struct; + nir_ssa_def *flags; + nir_ssa_def *cull_mask; + nir_ssa_def *origin; + nir_ssa_def *tmin; + nir_ssa_def *dir; + + struct radv_ray_traversal_vars vars; + + /* The increment/decrement used for radv_ray_traversal_vars::stack */ + uint32_t stack_stride; + + radv_rt_stack_store_cb stack_store_cb; + radv_rt_stack_load_cb stack_load_cb; + radv_rt_check_stack_overflow_cb check_stack_overflow_cb; + + radv_aabb_intersection_cb aabb_cb; + radv_triangle_intersection_cb triangle_cb; + + void *data; +}; + +/* Builds the ray traversal loop and returns whether traversal is incomplete, similar to + * rayQueryProceedEXT. Traversal will only be considered incomplete, if one of the specified + * callbacks breaks out of the traversal loop. + */ +nir_ssa_def *radv_build_ray_traversal(struct radv_device *device, nir_builder *b, + const struct radv_ray_traversal_args *args); + #endif