radv: rename NGG query state to be more generic

To use emulated GS counters for legacy GS.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24231>
This commit is contained in:
Samuel Pitoiset
2023-07-19 08:44:59 +02:00
parent 86a5e942dd
commit e1f8cfc2b2
11 changed files with 77 additions and 75 deletions
+6 -6
View File
@@ -46,7 +46,7 @@ radv_suspend_queries(struct radv_meta_saved_state *state, struct radv_cmd_buffer
state->active_pipeline_gds_queries = cmd_buffer->state.active_pipeline_gds_queries;
cmd_buffer->state.active_pipeline_gds_queries = 0;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
}
/* Occlusion queries. */
@@ -66,14 +66,14 @@ radv_suspend_queries(struct radv_meta_saved_state *state, struct radv_cmd_buffer
if (cmd_buffer->state.active_prims_gen_gds_queries) {
state->active_prims_gen_gds_queries = cmd_buffer->state.active_prims_gen_gds_queries;
cmd_buffer->state.active_prims_gen_gds_queries = 0;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
}
/* Transform feedback queries (NGG). */
if (cmd_buffer->state.active_prims_xfb_gds_queries) {
state->active_prims_xfb_gds_queries = cmd_buffer->state.active_prims_xfb_gds_queries;
cmd_buffer->state.active_prims_xfb_gds_queries = 0;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
}
}
@@ -86,7 +86,7 @@ radv_resume_queries(const struct radv_meta_saved_state *state, struct radv_cmd_b
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_START_PIPELINE_STATS;
cmd_buffer->state.active_pipeline_gds_queries = state->active_pipeline_gds_queries;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
}
/* Occlusion queries. */
@@ -104,13 +104,13 @@ radv_resume_queries(const struct radv_meta_saved_state *state, struct radv_cmd_b
/* Primitives generated queries (NGG). */
if (state->active_prims_gen_gds_queries) {
cmd_buffer->state.active_prims_gen_gds_queries = state->active_prims_gen_gds_queries;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
}
/* Transform feedback queries (NGG). */
if (state->active_prims_xfb_gds_queries) {
cmd_buffer->state.active_prims_xfb_gds_queries = state->active_prims_xfb_gds_queries;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
}
}
+8 -8
View File
@@ -61,9 +61,9 @@ nggc_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
}
static nir_ssa_def *
ngg_query_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
shader_query_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
{
nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_query_state);
nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->shader_query_state);
return nir_test_mask(b, settings, mask);
}
@@ -183,13 +183,13 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[0]);
break;
case nir_intrinsic_load_pipeline_stat_query_enabled_amd:
replacement = ngg_query_bool_setting(b, radv_ngg_query_pipeline_stat, s);
replacement = shader_query_bool_setting(b, radv_shader_query_pipeline_stat, s);
break;
case nir_intrinsic_load_prim_gen_query_enabled_amd:
replacement = ngg_query_bool_setting(b, radv_ngg_query_prim_gen, s);
replacement = shader_query_bool_setting(b, radv_shader_query_prim_gen, s);
break;
case nir_intrinsic_load_prim_xfb_query_enabled_amd:
replacement = ngg_query_bool_setting(b, radv_ngg_query_prim_xfb, s);
replacement = shader_query_bool_setting(b, radv_shader_query_prim_xfb, s);
break;
case nir_intrinsic_load_merged_wave_info_amd:
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.merged_wave_info);
@@ -347,17 +347,17 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
break;
}
case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, nir_imm_int(b, RADV_NGG_QUERY_PIPELINE_STAT_OFFSET),
nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, nir_imm_int(b, RADV_SHADER_QUERY_PIPELINE_STAT_OFFSET),
nir_imm_int(b, 0x100));
break;
case nir_intrinsic_atomic_add_gen_prim_count_amd:
nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa,
nir_imm_int(b, RADV_NGG_QUERY_PRIM_GEN_OFFSET(nir_intrinsic_stream_id(intrin))),
nir_imm_int(b, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(nir_intrinsic_stream_id(intrin))),
nir_imm_int(b, 0x100));
break;
case nir_intrinsic_atomic_add_xfb_prim_count_amd:
nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa,
nir_imm_int(b, RADV_NGG_QUERY_PRIM_XFB_OFFSET(nir_intrinsic_stream_id(intrin))),
nir_imm_int(b, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(nir_intrinsic_stream_id(intrin))),
nir_imm_int(b, 0x100));
break;
case nir_intrinsic_atomic_add_gs_invocation_count_amd:
+17 -17
View File
@@ -5035,39 +5035,39 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer)
}
static void
radv_flush_ngg_query_state(struct radv_cmd_buffer *cmd_buffer)
radv_flush_shader_query_state(struct radv_cmd_buffer *cmd_buffer)
{
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
const struct radv_userdata_info *loc = radv_get_user_sgpr(last_vgt_shader, AC_UD_NGG_QUERY_STATE);
enum radv_ngg_query_state ngg_query_state = radv_ngg_query_none;
const struct radv_userdata_info *loc = radv_get_user_sgpr(last_vgt_shader, AC_UD_SHADER_QUERY_STATE);
enum radv_shader_query_state shader_query_state = radv_shader_query_none;
uint32_t base_reg;
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_NGG_QUERY;
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_SHADER_QUERY;
if (loc->sgpr_idx == -1)
return;
assert(last_vgt_shader->info.is_ngg);
/* By default NGG queries are disabled but they are enabled if the command buffer has active GDS
/* By default shader queries are disabled but they are enabled if the command buffer has active GDS
* queries or if it's a secondary command buffer that inherits the number of generated
* primitives.
*/
if (cmd_buffer->state.active_pipeline_gds_queries ||
(cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT))
ngg_query_state |= radv_ngg_query_pipeline_stat;
shader_query_state |= radv_shader_query_pipeline_stat;
if (cmd_buffer->state.active_prims_gen_gds_queries)
ngg_query_state |= radv_ngg_query_prim_gen;
shader_query_state |= radv_shader_query_prim_gen;
if (cmd_buffer->state.active_prims_xfb_gds_queries && radv_is_streamout_enabled(cmd_buffer)) {
ngg_query_state |= radv_ngg_query_prim_xfb | radv_ngg_query_prim_gen;
shader_query_state |= radv_shader_query_prim_xfb | radv_shader_query_prim_gen;
}
base_reg = last_vgt_shader->info.user_data_0;
assert(loc->sgpr_idx != -1);
radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, ngg_query_state);
radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, shader_query_state);
}
static void
@@ -5753,7 +5753,7 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi
cmd_buffer->state.inherited_pipeline_statistics = pBeginInfo->pInheritanceInfo->pipelineStatistics;
if (cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
cmd_buffer->state.inherited_occlusion_queries = pBeginInfo->pInheritanceInfo->occlusionQueryEnable;
cmd_buffer->state.inherited_query_control_flags = pBeginInfo->pInheritanceInfo->queryFlags;
@@ -6248,9 +6248,9 @@ radv_bind_pre_rast_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
}
if (radv_get_user_sgpr(shader, AC_UD_NGG_QUERY_STATE)->sgpr_idx != -1) {
/* Re-emit NGG query state when SGPR exists but location potentially changed. */
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
if (radv_get_user_sgpr(shader, AC_UD_SHADER_QUERY_STATE)->sgpr_idx != -1) {
/* Re-emit shader query state when SGPR exists but location potentially changed. */
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
}
loc = radv_get_user_sgpr(shader, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
@@ -7470,7 +7470,7 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou
* some states.
*/
primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_GUARDBAND |
RADV_CMD_DIRTY_DYNAMIC_ALL | RADV_CMD_DIRTY_NGG_QUERY | RADV_CMD_DIRTY_OCCLUSION_QUERY |
RADV_CMD_DIRTY_DYNAMIC_ALL | RADV_CMD_DIRTY_SHADER_QUERY | RADV_CMD_DIRTY_OCCLUSION_QUERY |
RADV_CMD_DIRTY_DB_SHADER_CONTROL;
radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_GRAPHICS);
radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_COMPUTE);
@@ -8806,8 +8806,8 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_RBPLUS)
radv_emit_rbplus_state(cmd_buffer);
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_NGG_QUERY)
radv_flush_ngg_query_state(cmd_buffer);
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_SHADER_QUERY)
radv_flush_shader_query_state(cmd_buffer);
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_OCCLUSION_QUERY)
radv_flush_occlusion_query_state(cmd_buffer);
@@ -10617,7 +10617,7 @@ radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable)
radv_emit_streamout_enable(cmd_buffer);
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
/* Re-emit streamout buffers to unbind them. */
if (!enable)
+4 -4
View File
@@ -141,15 +141,15 @@
*/
#define RADV_SHADER_UPLOAD_CS_COUNT 32
/* NGG GDS counters:
/* Shader GDS counters:
* offset 0| 4| 8|12 - reserved for NGG streamout counters
* offset 16 - pipeline statistics counter for all streams
* offset 20|24|28|32 - generated primitive counter for stream 0|1|2|3
* offset 36|40|44|48 - written primitive counter for stream 0|1|2|3
*/
#define RADV_NGG_QUERY_PIPELINE_STAT_OFFSET 16
#define RADV_NGG_QUERY_PRIM_GEN_OFFSET(stream) (20 + stream * 4)
#define RADV_NGG_QUERY_PRIM_XFB_OFFSET(stream) (36 + stream * 4)
#define RADV_SHADER_QUERY_PIPELINE_STAT_OFFSET 16
#define RADV_SHADER_QUERY_PRIM_GEN_OFFSET(stream) (20 + stream * 4)
#define RADV_SHADER_QUERY_PRIM_XFB_OFFSET(stream) (36 + stream * 4)
/* Number of samples for line smooth lowering (hw requirement). */
#define RADV_NUM_SMOOTH_AA_SAMPLES 4
+6 -6
View File
@@ -1354,7 +1354,7 @@ enum radv_cmd_dirty_bits {
RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 54,
RADV_CMD_DIRTY_GUARDBAND = 1ull << 55,
RADV_CMD_DIRTY_RBPLUS = 1ull << 56,
RADV_CMD_DIRTY_NGG_QUERY = 1ull << 57,
RADV_CMD_DIRTY_SHADER_QUERY = 1ull << 57,
RADV_CMD_DIRTY_OCCLUSION_QUERY = 1ull << 58,
RADV_CMD_DIRTY_DB_SHADER_CONTROL = 1ull << 59,
};
@@ -1406,11 +1406,11 @@ enum radv_nggc_settings {
radv_nggc_small_primitives = 1 << 3,
};
enum radv_ngg_query_state {
radv_ngg_query_none = 0,
radv_ngg_query_pipeline_stat = 1 << 0,
radv_ngg_query_prim_gen = 1 << 1,
radv_ngg_query_prim_xfb = 1 << 2,
enum radv_shader_query_state {
radv_shader_query_none = 0,
radv_shader_query_pipeline_stat = 1 << 0,
radv_shader_query_prim_gen = 1 << 1,
radv_shader_query_prim_xfb = 1 << 2,
};
struct radv_vertex_binding {
+18 -18
View File
@@ -1781,13 +1781,13 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
va += pipelinestat_block_size * 2;
/* pipeline statistics counter for all streams */
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PIPELINE_STAT_OFFSET, va);
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PIPELINE_STAT_OFFSET, va);
/* Record that the command buffer needs GDS. */
cmd_buffer->gds_needed = true;
if (!cmd_buffer->state.active_pipeline_gds_queries)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
cmd_buffer->state.active_pipeline_gds_queries++;
}
@@ -1796,15 +1796,15 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
/* generated prim counter */
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PRIM_GEN_OFFSET(index), va);
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
radv_emit_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
/* written prim counter */
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PRIM_XFB_OFFSET(index), va + 8);
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(index), va + 8);
radv_emit_write_data_imm(cs, V_370_ME, va + 12, 0x80000000);
if (!cmd_buffer->state.active_prims_xfb_gds_queries)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
cmd_buffer->state.active_prims_xfb_gds_queries++;
} else {
@@ -1814,14 +1814,14 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
/* On GFX11+, primitives generated query always use GDS. */
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PRIM_GEN_OFFSET(index), va);
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
radv_emit_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
/* Record that the command buffer needs GDS. */
cmd_buffer->gds_needed = true;
if (!cmd_buffer->state.active_prims_gen_gds_queries)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
cmd_buffer->state.active_prims_gen_gds_queries++;
} else {
@@ -1839,13 +1839,13 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
if (pool->uses_gds) {
/* generated prim counter */
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PRIM_GEN_OFFSET(index), va + 32);
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 32);
/* Record that the command buffer needs GDS. */
cmd_buffer->gds_needed = true;
if (!cmd_buffer->state.active_prims_gen_gds_queries)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
cmd_buffer->state.active_prims_gen_gds_queries++;
}
@@ -1922,29 +1922,29 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
va += pipelinestat_block_size + 8;
/* pipeline statistics counter for all streams */
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PIPELINE_STAT_OFFSET, va);
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PIPELINE_STAT_OFFSET, va);
cmd_buffer->state.active_pipeline_gds_queries--;
if (!cmd_buffer->state.active_pipeline_gds_queries)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
}
break;
}
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
/* generated prim counter */
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PRIM_GEN_OFFSET(index), va + 16);
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
radv_emit_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
/* written prim counter */
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PRIM_XFB_OFFSET(index), va + 24);
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(index), va + 24);
radv_emit_write_data_imm(cs, V_370_ME, va + 28, 0x80000000);
cmd_buffer->state.active_prims_xfb_gds_queries--;
if (!cmd_buffer->state.active_prims_xfb_gds_queries)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
} else {
emit_sample_streamout(cmd_buffer, va + 16, index);
}
@@ -1952,13 +1952,13 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
/* On GFX11+, primitives generated query always use GDS. */
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PRIM_GEN_OFFSET(index), va + 16);
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
radv_emit_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
cmd_buffer->state.active_prims_gen_gds_queries--;
if (!cmd_buffer->state.active_prims_gen_gds_queries)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
} else {
if (cmd_buffer->state.active_prims_gen_queries == 1) {
bool old_streamout_enabled = radv_is_streamout_enabled(cmd_buffer);
@@ -1974,12 +1974,12 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
if (pool->uses_gds) {
/* generated prim counter */
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PRIM_GEN_OFFSET(index), va + 36);
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 36);
cmd_buffer->state.active_prims_gen_gds_queries--;
if (!cmd_buffer->state.active_prims_gen_gds_queries)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
}
emit_sample_streamout(cmd_buffer, va + 16, index);
+2 -2
View File
@@ -866,8 +866,8 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
options.has_param_exports = info->outinfo.param_exports || info->outinfo.prim_param_exports;
options.can_cull = nir->info.stage != MESA_SHADER_GEOMETRY && info->has_ngg_culling;
options.disable_streamout = !device->physical_device->use_ngg_streamout;
options.has_gen_prim_query = info->has_ngg_prim_query;
options.has_xfb_prim_query = info->has_ngg_xfb_query;
options.has_gen_prim_query = info->has_prim_query;
options.has_xfb_prim_query = info->has_xfb_query;
options.force_vrs = info->force_vrs_per_vertex;
if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) {
+4 -4
View File
@@ -162,7 +162,7 @@ enum radv_ud_index {
AC_UD_INDIRECT_DESCRIPTOR_SETS = 3,
AC_UD_VIEW_INDEX = 4,
AC_UD_STREAMOUT_BUFFERS = 5,
AC_UD_NGG_QUERY_STATE = 6,
AC_UD_SHADER_QUERY_STATE = 6,
AC_UD_NGG_PROVOKING_VTX = 7,
AC_UD_NGG_CULLING_SETTINGS = 8,
AC_UD_NGG_VIEWPORT = 9,
@@ -288,8 +288,8 @@ struct radv_shader_info {
bool is_ngg_passthrough;
bool has_ngg_culling;
bool has_ngg_early_prim_export;
bool has_ngg_prim_query;
bool has_ngg_xfb_query;
bool has_prim_query;
bool has_xfb_query;
uint32_t num_tess_patches;
uint32_t esgs_itemsize; /* Only for VS or TES as ES */
struct radv_vs_output_info outinfo;
@@ -332,7 +332,7 @@ struct radv_shader_info {
unsigned invocations;
unsigned es_type; /* GFX9: VS or TES */
uint8_t num_linked_inputs;
bool has_ngg_pipeline_stat_query;
bool has_pipeline_stat_query;
} gs;
struct {
uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
+6 -6
View File
@@ -286,11 +286,11 @@ declare_ps_input_vgprs(const struct radv_shader_info *info, struct radv_shader_a
}
static void
declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, bool has_ngg_query,
declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, bool has_shader_query,
bool has_ngg_provoking_vtx)
{
if (has_ngg_query)
add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_query_state, AC_UD_NGG_QUERY_STATE);
if (has_shader_query)
add_ud_arg(args, 1, AC_ARG_INT, &args->shader_query_state, AC_UD_SHADER_QUERY_STATE);
if (has_ngg_provoking_vtx)
add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_provoking_vtx, AC_UD_NGG_PROVOKING_VTX);
@@ -382,8 +382,8 @@ declare_shader_args(const struct radv_device *device, const struct radv_pipeline
{
const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
bool needs_view_index = info->uses_view_index;
bool has_ngg_query = info->has_ngg_prim_query || info->has_ngg_xfb_query ||
(stage == MESA_SHADER_GEOMETRY && info->gs.has_ngg_pipeline_stat_query);
bool has_shader_query = info->has_prim_query || info->has_xfb_query ||
(stage == MESA_SHADER_GEOMETRY && info->gs.has_pipeline_stat_query);
bool has_ngg_provoking_vtx =
(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_GEOMETRY) && key->dynamic_provoking_vtx_mode;
@@ -614,7 +614,7 @@ declare_shader_args(const struct radv_device *device, const struct radv_pipeline
}
if (info->is_ngg) {
declare_ngg_sgprs(info, args, has_ngg_query, has_ngg_provoking_vtx);
declare_ngg_sgprs(info, args, has_shader_query, has_ngg_provoking_vtx);
}
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
+3 -1
View File
@@ -42,8 +42,10 @@ struct radv_shader_args {
/* Streamout */
struct ac_arg streamout_buffers;
/* Emulated query */
struct ac_arg shader_query_state;
/* NGG */
struct ac_arg ngg_query_state;
struct ac_arg ngg_provoking_vtx;
/* NGG GS */
+3 -3
View File
@@ -1447,9 +1447,9 @@ gfx10_get_ngg_query_info(const struct radv_device *device, struct radv_shader_st
{
struct radv_shader_info *info = gs_stage ? &gs_stage->info : &es_stage->info;
info->gs.has_ngg_pipeline_stat_query = device->physical_device->emulate_ngg_gs_query_pipeline_stat && !!gs_stage;
info->has_ngg_xfb_query = gs_stage ? !!gs_stage->nir->xfb_info : !!es_stage->nir->xfb_info;
info->has_ngg_prim_query = pipeline_key->primitives_generated_query || info->has_ngg_xfb_query;
info->gs.has_pipeline_stat_query = device->physical_device->emulate_ngg_gs_query_pipeline_stat && !!gs_stage;
info->has_xfb_query = gs_stage ? !!gs_stage->nir->xfb_info : !!es_stage->nir->xfb_info;
info->has_prim_query = pipeline_key->primitives_generated_query || info->has_xfb_query;
}
static void