From 0966fb2c10025398fc048361a9898ba2d56b4f9b Mon Sep 17 00:00:00 2001 From: Konstantin Seurer Date: Sat, 12 Nov 2022 16:06:26 +0100 Subject: [PATCH] radv/ray_queries: Fix AABB handling AABB intersections always have to be committed manually. -> We don't have to copy opaque ones to closest. It's also invalid to query t for candidate AABBs. Totals from 7 (14.29% of 49) affected shaders: CodeSize: 171008 -> 169672 (-0.78%) Instrs: 32499 -> 32250 (-0.77%); split: -0.78%, +0.01% Latency: 418859 -> 414759 (-0.98%); split: -0.98%, +0.00% InvThroughput: 89182 -> 88251 (-1.04%); split: -1.05%, +0.00% VClause: 602 -> 599 (-0.50%) SClause: 837 -> 835 (-0.24%) Copies: 4804 -> 4802 (-0.04%); split: -0.35%, +0.31% Branches: 1593 -> 1585 (-0.50%) PreSGPRs: 567 -> 566 (-0.18%) Fixes: 3f72061 ("radv/rq: Use the common traversal helper") Part-of: --- src/amd/vulkan/radv_nir_lower_ray_queries.c | 51 +++------------------ 1 file changed, 7 insertions(+), 44 deletions(-) diff --git a/src/amd/vulkan/radv_nir_lower_ray_queries.c b/src/amd/vulkan/radv_nir_lower_ray_queries.c index 53ad47e7ab3..7063058517b 100644 --- a/src/amd/vulkan/radv_nir_lower_ray_queries.c +++ b/src/amd/vulkan/radv_nir_lower_ray_queries.c @@ -559,51 +559,14 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio struct ray_query_vars *vars = data->vars; nir_ssa_def *index = data->index; - nir_ssa_def *vec3_zero = nir_channels(b, nir_imm_vec4(b, 0, 0, 0, 0), 0x7); - nir_ssa_def *vec3_inf = nir_channels(b, nir_imm_vec4(b, INFINITY, INFINITY, INFINITY, 0), 0x7); + rq_store_var(b, index, vars->candidate.primitive_id, intersection->primitive_id, 1); + rq_store_var(b, index, vars->candidate.geometry_id_and_flags, + intersection->geometry_id_and_flags, 1); + rq_store_var(b, index, vars->candidate.opaque, intersection->opaque, 0x1); + rq_store_var(b, index, vars->candidate.intersection_type, nir_imm_int(b, intersection_type_aabb), + 0x1); - nir_ssa_def *bvh_lo = - nir_build_load_global(b, 3, 32, nir_iadd_imm(b, intersection->node_addr, 0)); - nir_ssa_def *bvh_hi = - nir_build_load_global(b, 3, 32, nir_iadd_imm(b, intersection->node_addr, 12)); - - bvh_lo = nir_fsub(b, bvh_lo, nir_load_deref(b, args->vars.origin)); - bvh_hi = nir_fsub(b, bvh_hi, nir_load_deref(b, args->vars.origin)); - nir_ssa_def *t_vec = nir_fmin(b, nir_fmul(b, bvh_lo, nir_load_deref(b, args->vars.inv_dir)), - nir_fmul(b, bvh_hi, nir_load_deref(b, args->vars.inv_dir))); - nir_ssa_def *t2_vec = nir_fmax(b, nir_fmul(b, bvh_lo, nir_load_deref(b, args->vars.inv_dir)), - nir_fmul(b, bvh_hi, nir_load_deref(b, args->vars.inv_dir))); - /* If we run parallel to one of the edges the range should be [0, inf) not [0,0] */ - t2_vec = - nir_bcsel(b, nir_feq(b, nir_load_deref(b, args->vars.dir), vec3_zero), vec3_inf, t2_vec); - - nir_ssa_def *t_min = nir_fmax(b, nir_channel(b, t_vec, 0), nir_channel(b, t_vec, 1)); - t_min = nir_fmax(b, t_min, nir_channel(b, t_vec, 2)); - - nir_ssa_def *t_max = nir_fmin(b, nir_channel(b, t2_vec, 0), nir_channel(b, t2_vec, 1)); - t_max = nir_fmin(b, t_max, nir_channel(b, t2_vec, 2)); - - nir_push_if(b, nir_iand(b, nir_fge(b, rq_load_var(b, index, vars->closest.t), t_min), - nir_fge(b, t_max, rq_load_var(b, index, vars->tmin)))); - { - rq_store_var(b, index, vars->candidate.t, - nir_fmax(b, t_min, rq_load_var(b, index, vars->tmin)), 0x1); - rq_store_var(b, index, vars->candidate.primitive_id, intersection->primitive_id, 1); - rq_store_var(b, index, vars->candidate.geometry_id_and_flags, - intersection->geometry_id_and_flags, 1); - rq_store_var(b, index, vars->candidate.opaque, intersection->opaque, 0x1); - rq_store_var(b, index, vars->candidate.intersection_type, - nir_imm_int(b, intersection_type_aabb), 0x1); - - nir_push_if(b, intersection->opaque); - { - copy_candidate_to_closest(b, index, vars); - } - nir_pop_if(b, NULL); - - nir_jump(b, nir_jump_break); - } - nir_pop_if(b, NULL); + nir_jump(b, nir_jump_break); } static void