diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index c2da94ac5cf..7eb8d537084 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -124,6 +124,8 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) case nir_intrinsic_load_viewport_y_scale: case nir_intrinsic_load_viewport_z_scale: case nir_intrinsic_load_viewport_offset: + case nir_intrinsic_load_viewport_x_offset: + case nir_intrinsic_load_viewport_y_offset: case nir_intrinsic_load_viewport_z_offset: case nir_intrinsic_load_blend_const_color_a_float: case nir_intrinsic_load_blend_const_color_b_float: @@ -146,6 +148,12 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) case nir_intrinsic_load_workgroup_num_input_vertices_amd: case nir_intrinsic_load_workgroup_num_input_primitives_amd: case nir_intrinsic_load_shader_query_enabled_amd: + case nir_intrinsic_load_cull_front_face_enabled_amd: + case nir_intrinsic_load_cull_back_face_enabled_amd: + case nir_intrinsic_load_cull_ccw_amd: + case nir_intrinsic_load_cull_small_primitives_enabled_amd: + case nir_intrinsic_load_cull_any_enabled_amd: + case nir_intrinsic_load_cull_small_prim_precision_amd: is_divergent = false; break; diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index ab5a5fac57b..74839171589 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -781,9 +781,13 @@ system_value("shader_record_ptr", 1, bit_sizes=[64]) # # Panfrost needs to implement all coordinate transformation in the # vertex shader; system values allow us to share this routine in NIR. +# +# RADV uses these for NGG primitive culling. system_value("viewport_x_scale", 1) system_value("viewport_y_scale", 1) system_value("viewport_z_scale", 1) +system_value("viewport_x_offset", 1) +system_value("viewport_y_offset", 1) system_value("viewport_z_offset", 1) system_value("viewport_scale", 3) system_value("viewport_offset", 3) @@ -1179,7 +1183,19 @@ system_value("workgroup_num_input_primitives_amd", 1) # For NGG passthrough mode only. Pre-packed argument for export_primitive_amd. system_value("packed_passthrough_primitive_amd", 1) # Whether NGG GS should execute shader query. -system_value("shader_query_enabled_amd", 1, bit_sizes=[1]) +system_value("shader_query_enabled_amd", dest_comp=1, bit_sizes=[1]) +# Whether the shader should cull front facing triangles. +intrinsic("load_cull_front_face_enabled_amd", dest_comp=1, bit_sizes=[1], flags=[CAN_ELIMINATE]) +# Whether the shader should cull back facing triangles. +intrinsic("load_cull_back_face_enabled_amd", dest_comp=1, bit_sizes=[1], flags=[CAN_ELIMINATE]) +# True if face culling should use CCW (false if CW). +intrinsic("load_cull_ccw_amd", dest_comp=1, bit_sizes=[1], flags=[CAN_ELIMINATE]) +# Whether the shader should cull small primitives that are not visible in a pixel. +intrinsic("load_cull_small_primitives_enabled_amd", dest_comp=1, bit_sizes=[1], flags=[CAN_ELIMINATE]) +# Whether any culling setting is enabled in the shader. +intrinsic("load_cull_any_enabled_amd", dest_comp=1, bit_sizes=[1], flags=[CAN_ELIMINATE]) +# Small primitive culling precision +intrinsic("load_cull_small_prim_precision_amd", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE, CAN_REORDER]) # Initial edge flag in a Vertex Shader. src = {vertex index}. intrinsic("load_initial_edgeflag_amd", src_comp=[1], dest_comp=1, indices=[]) # Exports the current invocation's vertex. This is a placeholder where all vertex attribute export instructions should be emitted. @@ -1188,6 +1204,12 @@ intrinsic("export_vertex_amd", src_comp=[], indices=[]) intrinsic("export_primitive_amd", src_comp=[1], indices=[]) # Allocates export space for vertices and primitives. src[] = {num_vertices, num_primitives}. intrinsic("alloc_vertices_and_primitives_amd", src_comp=[1, 1], indices=[]) +# Overwrites VS input registers, for use with vertex compaction after culling. src = {vertex_id, instance_id}. +intrinsic("overwrite_vs_arguments_amd", src_comp=[1, 1], indices=[]) +# Overwrites TES input registers, for use with vertex compaction after culling. src = {tes_u, tes_v, rel_patch_id, patch_id}. +intrinsic("overwrite_tes_arguments_amd", src_comp=[1, 1, 1, 1], indices=[]) +# Overwrites the input vertex and primitive count in the current subgroup after culling. src = {num_vertices, num_primitives}. +intrinsic("overwrite_subgroup_num_vertices_and_primitives_amd", src_comp=[1, 1], indices=[]) # src = [index] BINDING = which table BASE = offset within handle intrinsic("load_sbt_amd", src_comp=[-1], dest_comp=0, indices=[BINDING, BASE],