radv: rename NGG query state to be more generic
To use emulated GS counters for legacy GS. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24231>
This commit is contained in:
@@ -46,7 +46,7 @@ radv_suspend_queries(struct radv_meta_saved_state *state, struct radv_cmd_buffer
|
||||
|
||||
state->active_pipeline_gds_queries = cmd_buffer->state.active_pipeline_gds_queries;
|
||||
cmd_buffer->state.active_pipeline_gds_queries = 0;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
}
|
||||
|
||||
/* Occlusion queries. */
|
||||
@@ -66,14 +66,14 @@ radv_suspend_queries(struct radv_meta_saved_state *state, struct radv_cmd_buffer
|
||||
if (cmd_buffer->state.active_prims_gen_gds_queries) {
|
||||
state->active_prims_gen_gds_queries = cmd_buffer->state.active_prims_gen_gds_queries;
|
||||
cmd_buffer->state.active_prims_gen_gds_queries = 0;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
}
|
||||
|
||||
/* Transform feedback queries (NGG). */
|
||||
if (cmd_buffer->state.active_prims_xfb_gds_queries) {
|
||||
state->active_prims_xfb_gds_queries = cmd_buffer->state.active_prims_xfb_gds_queries;
|
||||
cmd_buffer->state.active_prims_xfb_gds_queries = 0;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -86,7 +86,7 @@ radv_resume_queries(const struct radv_meta_saved_state *state, struct radv_cmd_b
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_START_PIPELINE_STATS;
|
||||
|
||||
cmd_buffer->state.active_pipeline_gds_queries = state->active_pipeline_gds_queries;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
}
|
||||
|
||||
/* Occlusion queries. */
|
||||
@@ -104,13 +104,13 @@ radv_resume_queries(const struct radv_meta_saved_state *state, struct radv_cmd_b
|
||||
/* Primitives generated queries (NGG). */
|
||||
if (state->active_prims_gen_gds_queries) {
|
||||
cmd_buffer->state.active_prims_gen_gds_queries = state->active_prims_gen_gds_queries;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
}
|
||||
|
||||
/* Transform feedback queries (NGG). */
|
||||
if (state->active_prims_xfb_gds_queries) {
|
||||
cmd_buffer->state.active_prims_xfb_gds_queries = state->active_prims_xfb_gds_queries;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -61,9 +61,9 @@ nggc_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
ngg_query_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
|
||||
shader_query_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
|
||||
{
|
||||
nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_query_state);
|
||||
nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->shader_query_state);
|
||||
return nir_test_mask(b, settings, mask);
|
||||
}
|
||||
|
||||
@@ -183,13 +183,13 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
||||
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[0]);
|
||||
break;
|
||||
case nir_intrinsic_load_pipeline_stat_query_enabled_amd:
|
||||
replacement = ngg_query_bool_setting(b, radv_ngg_query_pipeline_stat, s);
|
||||
replacement = shader_query_bool_setting(b, radv_shader_query_pipeline_stat, s);
|
||||
break;
|
||||
case nir_intrinsic_load_prim_gen_query_enabled_amd:
|
||||
replacement = ngg_query_bool_setting(b, radv_ngg_query_prim_gen, s);
|
||||
replacement = shader_query_bool_setting(b, radv_shader_query_prim_gen, s);
|
||||
break;
|
||||
case nir_intrinsic_load_prim_xfb_query_enabled_amd:
|
||||
replacement = ngg_query_bool_setting(b, radv_ngg_query_prim_xfb, s);
|
||||
replacement = shader_query_bool_setting(b, radv_shader_query_prim_xfb, s);
|
||||
break;
|
||||
case nir_intrinsic_load_merged_wave_info_amd:
|
||||
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.merged_wave_info);
|
||||
@@ -347,17 +347,17 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
|
||||
nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, nir_imm_int(b, RADV_NGG_QUERY_PIPELINE_STAT_OFFSET),
|
||||
nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, nir_imm_int(b, RADV_SHADER_QUERY_PIPELINE_STAT_OFFSET),
|
||||
nir_imm_int(b, 0x100));
|
||||
break;
|
||||
case nir_intrinsic_atomic_add_gen_prim_count_amd:
|
||||
nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa,
|
||||
nir_imm_int(b, RADV_NGG_QUERY_PRIM_GEN_OFFSET(nir_intrinsic_stream_id(intrin))),
|
||||
nir_imm_int(b, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(nir_intrinsic_stream_id(intrin))),
|
||||
nir_imm_int(b, 0x100));
|
||||
break;
|
||||
case nir_intrinsic_atomic_add_xfb_prim_count_amd:
|
||||
nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa,
|
||||
nir_imm_int(b, RADV_NGG_QUERY_PRIM_XFB_OFFSET(nir_intrinsic_stream_id(intrin))),
|
||||
nir_imm_int(b, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(nir_intrinsic_stream_id(intrin))),
|
||||
nir_imm_int(b, 0x100));
|
||||
break;
|
||||
case nir_intrinsic_atomic_add_gs_invocation_count_amd:
|
||||
|
||||
@@ -5035,39 +5035,39 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer)
|
||||
}
|
||||
|
||||
static void
|
||||
radv_flush_ngg_query_state(struct radv_cmd_buffer *cmd_buffer)
|
||||
radv_flush_shader_query_state(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader;
|
||||
const struct radv_userdata_info *loc = radv_get_user_sgpr(last_vgt_shader, AC_UD_NGG_QUERY_STATE);
|
||||
enum radv_ngg_query_state ngg_query_state = radv_ngg_query_none;
|
||||
const struct radv_userdata_info *loc = radv_get_user_sgpr(last_vgt_shader, AC_UD_SHADER_QUERY_STATE);
|
||||
enum radv_shader_query_state shader_query_state = radv_shader_query_none;
|
||||
uint32_t base_reg;
|
||||
|
||||
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_NGG_QUERY;
|
||||
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
|
||||
if (loc->sgpr_idx == -1)
|
||||
return;
|
||||
|
||||
assert(last_vgt_shader->info.is_ngg);
|
||||
|
||||
/* By default NGG queries are disabled but they are enabled if the command buffer has active GDS
|
||||
/* By default shader queries are disabled but they are enabled if the command buffer has active GDS
|
||||
* queries or if it's a secondary command buffer that inherits the number of generated
|
||||
* primitives.
|
||||
*/
|
||||
if (cmd_buffer->state.active_pipeline_gds_queries ||
|
||||
(cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT))
|
||||
ngg_query_state |= radv_ngg_query_pipeline_stat;
|
||||
shader_query_state |= radv_shader_query_pipeline_stat;
|
||||
|
||||
if (cmd_buffer->state.active_prims_gen_gds_queries)
|
||||
ngg_query_state |= radv_ngg_query_prim_gen;
|
||||
shader_query_state |= radv_shader_query_prim_gen;
|
||||
|
||||
if (cmd_buffer->state.active_prims_xfb_gds_queries && radv_is_streamout_enabled(cmd_buffer)) {
|
||||
ngg_query_state |= radv_ngg_query_prim_xfb | radv_ngg_query_prim_gen;
|
||||
shader_query_state |= radv_shader_query_prim_xfb | radv_shader_query_prim_gen;
|
||||
}
|
||||
|
||||
base_reg = last_vgt_shader->info.user_data_0;
|
||||
assert(loc->sgpr_idx != -1);
|
||||
|
||||
radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, ngg_query_state);
|
||||
radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, shader_query_state);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -5753,7 +5753,7 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi
|
||||
cmd_buffer->state.inherited_pipeline_statistics = pBeginInfo->pInheritanceInfo->pipelineStatistics;
|
||||
|
||||
if (cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
|
||||
cmd_buffer->state.inherited_occlusion_queries = pBeginInfo->pInheritanceInfo->occlusionQueryEnable;
|
||||
cmd_buffer->state.inherited_query_control_flags = pBeginInfo->pInheritanceInfo->queryFlags;
|
||||
@@ -6248,9 +6248,9 @@ radv_bind_pre_rast_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
|
||||
}
|
||||
|
||||
if (radv_get_user_sgpr(shader, AC_UD_NGG_QUERY_STATE)->sgpr_idx != -1) {
|
||||
/* Re-emit NGG query state when SGPR exists but location potentially changed. */
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
|
||||
if (radv_get_user_sgpr(shader, AC_UD_SHADER_QUERY_STATE)->sgpr_idx != -1) {
|
||||
/* Re-emit shader query state when SGPR exists but location potentially changed. */
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
}
|
||||
|
||||
loc = radv_get_user_sgpr(shader, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
|
||||
@@ -7470,7 +7470,7 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou
|
||||
* some states.
|
||||
*/
|
||||
primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_GUARDBAND |
|
||||
RADV_CMD_DIRTY_DYNAMIC_ALL | RADV_CMD_DIRTY_NGG_QUERY | RADV_CMD_DIRTY_OCCLUSION_QUERY |
|
||||
RADV_CMD_DIRTY_DYNAMIC_ALL | RADV_CMD_DIRTY_SHADER_QUERY | RADV_CMD_DIRTY_OCCLUSION_QUERY |
|
||||
RADV_CMD_DIRTY_DB_SHADER_CONTROL;
|
||||
radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_GRAPHICS);
|
||||
radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||
@@ -8806,8 +8806,8 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r
|
||||
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_RBPLUS)
|
||||
radv_emit_rbplus_state(cmd_buffer);
|
||||
|
||||
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_NGG_QUERY)
|
||||
radv_flush_ngg_query_state(cmd_buffer);
|
||||
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_SHADER_QUERY)
|
||||
radv_flush_shader_query_state(cmd_buffer);
|
||||
|
||||
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_OCCLUSION_QUERY)
|
||||
radv_flush_occlusion_query_state(cmd_buffer);
|
||||
@@ -10617,7 +10617,7 @@ radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable)
|
||||
radv_emit_streamout_enable(cmd_buffer);
|
||||
|
||||
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
|
||||
/* Re-emit streamout buffers to unbind them. */
|
||||
if (!enable)
|
||||
|
||||
@@ -141,15 +141,15 @@
|
||||
*/
|
||||
#define RADV_SHADER_UPLOAD_CS_COUNT 32
|
||||
|
||||
/* NGG GDS counters:
|
||||
/* Shader GDS counters:
|
||||
* offset 0| 4| 8|12 - reserved for NGG streamout counters
|
||||
* offset 16 - pipeline statistics counter for all streams
|
||||
* offset 20|24|28|32 - generated primitive counter for stream 0|1|2|3
|
||||
* offset 36|40|44|48 - written primitive counter for stream 0|1|2|3
|
||||
*/
|
||||
#define RADV_NGG_QUERY_PIPELINE_STAT_OFFSET 16
|
||||
#define RADV_NGG_QUERY_PRIM_GEN_OFFSET(stream) (20 + stream * 4)
|
||||
#define RADV_NGG_QUERY_PRIM_XFB_OFFSET(stream) (36 + stream * 4)
|
||||
#define RADV_SHADER_QUERY_PIPELINE_STAT_OFFSET 16
|
||||
#define RADV_SHADER_QUERY_PRIM_GEN_OFFSET(stream) (20 + stream * 4)
|
||||
#define RADV_SHADER_QUERY_PRIM_XFB_OFFSET(stream) (36 + stream * 4)
|
||||
|
||||
/* Number of samples for line smooth lowering (hw requirement). */
|
||||
#define RADV_NUM_SMOOTH_AA_SAMPLES 4
|
||||
|
||||
@@ -1354,7 +1354,7 @@ enum radv_cmd_dirty_bits {
|
||||
RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 54,
|
||||
RADV_CMD_DIRTY_GUARDBAND = 1ull << 55,
|
||||
RADV_CMD_DIRTY_RBPLUS = 1ull << 56,
|
||||
RADV_CMD_DIRTY_NGG_QUERY = 1ull << 57,
|
||||
RADV_CMD_DIRTY_SHADER_QUERY = 1ull << 57,
|
||||
RADV_CMD_DIRTY_OCCLUSION_QUERY = 1ull << 58,
|
||||
RADV_CMD_DIRTY_DB_SHADER_CONTROL = 1ull << 59,
|
||||
};
|
||||
@@ -1406,11 +1406,11 @@ enum radv_nggc_settings {
|
||||
radv_nggc_small_primitives = 1 << 3,
|
||||
};
|
||||
|
||||
enum radv_ngg_query_state {
|
||||
radv_ngg_query_none = 0,
|
||||
radv_ngg_query_pipeline_stat = 1 << 0,
|
||||
radv_ngg_query_prim_gen = 1 << 1,
|
||||
radv_ngg_query_prim_xfb = 1 << 2,
|
||||
enum radv_shader_query_state {
|
||||
radv_shader_query_none = 0,
|
||||
radv_shader_query_pipeline_stat = 1 << 0,
|
||||
radv_shader_query_prim_gen = 1 << 1,
|
||||
radv_shader_query_prim_xfb = 1 << 2,
|
||||
};
|
||||
|
||||
struct radv_vertex_binding {
|
||||
|
||||
+18
-18
@@ -1781,13 +1781,13 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
|
||||
va += pipelinestat_block_size * 2;
|
||||
|
||||
/* pipeline statistics counter for all streams */
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PIPELINE_STAT_OFFSET, va);
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PIPELINE_STAT_OFFSET, va);
|
||||
|
||||
/* Record that the command buffer needs GDS. */
|
||||
cmd_buffer->gds_needed = true;
|
||||
|
||||
if (!cmd_buffer->state.active_pipeline_gds_queries)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
|
||||
cmd_buffer->state.active_pipeline_gds_queries++;
|
||||
}
|
||||
@@ -1796,15 +1796,15 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
|
||||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
|
||||
/* generated prim counter */
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PRIM_GEN_OFFSET(index), va);
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
|
||||
radv_emit_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
|
||||
|
||||
/* written prim counter */
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PRIM_XFB_OFFSET(index), va + 8);
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(index), va + 8);
|
||||
radv_emit_write_data_imm(cs, V_370_ME, va + 12, 0x80000000);
|
||||
|
||||
if (!cmd_buffer->state.active_prims_xfb_gds_queries)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
|
||||
cmd_buffer->state.active_prims_xfb_gds_queries++;
|
||||
} else {
|
||||
@@ -1814,14 +1814,14 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
|
||||
/* On GFX11+, primitives generated query always use GDS. */
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PRIM_GEN_OFFSET(index), va);
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
|
||||
radv_emit_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
|
||||
|
||||
/* Record that the command buffer needs GDS. */
|
||||
cmd_buffer->gds_needed = true;
|
||||
|
||||
if (!cmd_buffer->state.active_prims_gen_gds_queries)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
|
||||
cmd_buffer->state.active_prims_gen_gds_queries++;
|
||||
} else {
|
||||
@@ -1839,13 +1839,13 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
|
||||
|
||||
if (pool->uses_gds) {
|
||||
/* generated prim counter */
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PRIM_GEN_OFFSET(index), va + 32);
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 32);
|
||||
|
||||
/* Record that the command buffer needs GDS. */
|
||||
cmd_buffer->gds_needed = true;
|
||||
|
||||
if (!cmd_buffer->state.active_prims_gen_gds_queries)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
|
||||
cmd_buffer->state.active_prims_gen_gds_queries++;
|
||||
}
|
||||
@@ -1922,29 +1922,29 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
|
||||
va += pipelinestat_block_size + 8;
|
||||
|
||||
/* pipeline statistics counter for all streams */
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PIPELINE_STAT_OFFSET, va);
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PIPELINE_STAT_OFFSET, va);
|
||||
|
||||
cmd_buffer->state.active_pipeline_gds_queries--;
|
||||
|
||||
if (!cmd_buffer->state.active_pipeline_gds_queries)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
|
||||
/* generated prim counter */
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PRIM_GEN_OFFSET(index), va + 16);
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
|
||||
radv_emit_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
|
||||
|
||||
/* written prim counter */
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PRIM_XFB_OFFSET(index), va + 24);
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(index), va + 24);
|
||||
radv_emit_write_data_imm(cs, V_370_ME, va + 28, 0x80000000);
|
||||
|
||||
cmd_buffer->state.active_prims_xfb_gds_queries--;
|
||||
|
||||
if (!cmd_buffer->state.active_prims_xfb_gds_queries)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
} else {
|
||||
emit_sample_streamout(cmd_buffer, va + 16, index);
|
||||
}
|
||||
@@ -1952,13 +1952,13 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
|
||||
/* On GFX11+, primitives generated query always use GDS. */
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PRIM_GEN_OFFSET(index), va + 16);
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
|
||||
radv_emit_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
|
||||
|
||||
cmd_buffer->state.active_prims_gen_gds_queries--;
|
||||
|
||||
if (!cmd_buffer->state.active_prims_gen_gds_queries)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
} else {
|
||||
if (cmd_buffer->state.active_prims_gen_queries == 1) {
|
||||
bool old_streamout_enabled = radv_is_streamout_enabled(cmd_buffer);
|
||||
@@ -1974,12 +1974,12 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
|
||||
|
||||
if (pool->uses_gds) {
|
||||
/* generated prim counter */
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_NGG_QUERY_PRIM_GEN_OFFSET(index), va + 36);
|
||||
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 36);
|
||||
|
||||
cmd_buffer->state.active_prims_gen_gds_queries--;
|
||||
|
||||
if (!cmd_buffer->state.active_prims_gen_gds_queries)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
}
|
||||
|
||||
emit_sample_streamout(cmd_buffer, va + 16, index);
|
||||
|
||||
@@ -866,8 +866,8 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
|
||||
options.has_param_exports = info->outinfo.param_exports || info->outinfo.prim_param_exports;
|
||||
options.can_cull = nir->info.stage != MESA_SHADER_GEOMETRY && info->has_ngg_culling;
|
||||
options.disable_streamout = !device->physical_device->use_ngg_streamout;
|
||||
options.has_gen_prim_query = info->has_ngg_prim_query;
|
||||
options.has_xfb_prim_query = info->has_ngg_xfb_query;
|
||||
options.has_gen_prim_query = info->has_prim_query;
|
||||
options.has_xfb_prim_query = info->has_xfb_query;
|
||||
options.force_vrs = info->force_vrs_per_vertex;
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
|
||||
@@ -162,7 +162,7 @@ enum radv_ud_index {
|
||||
AC_UD_INDIRECT_DESCRIPTOR_SETS = 3,
|
||||
AC_UD_VIEW_INDEX = 4,
|
||||
AC_UD_STREAMOUT_BUFFERS = 5,
|
||||
AC_UD_NGG_QUERY_STATE = 6,
|
||||
AC_UD_SHADER_QUERY_STATE = 6,
|
||||
AC_UD_NGG_PROVOKING_VTX = 7,
|
||||
AC_UD_NGG_CULLING_SETTINGS = 8,
|
||||
AC_UD_NGG_VIEWPORT = 9,
|
||||
@@ -288,8 +288,8 @@ struct radv_shader_info {
|
||||
bool is_ngg_passthrough;
|
||||
bool has_ngg_culling;
|
||||
bool has_ngg_early_prim_export;
|
||||
bool has_ngg_prim_query;
|
||||
bool has_ngg_xfb_query;
|
||||
bool has_prim_query;
|
||||
bool has_xfb_query;
|
||||
uint32_t num_tess_patches;
|
||||
uint32_t esgs_itemsize; /* Only for VS or TES as ES */
|
||||
struct radv_vs_output_info outinfo;
|
||||
@@ -332,7 +332,7 @@ struct radv_shader_info {
|
||||
unsigned invocations;
|
||||
unsigned es_type; /* GFX9: VS or TES */
|
||||
uint8_t num_linked_inputs;
|
||||
bool has_ngg_pipeline_stat_query;
|
||||
bool has_pipeline_stat_query;
|
||||
} gs;
|
||||
struct {
|
||||
uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
|
||||
|
||||
@@ -286,11 +286,11 @@ declare_ps_input_vgprs(const struct radv_shader_info *info, struct radv_shader_a
|
||||
}
|
||||
|
||||
static void
|
||||
declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, bool has_ngg_query,
|
||||
declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, bool has_shader_query,
|
||||
bool has_ngg_provoking_vtx)
|
||||
{
|
||||
if (has_ngg_query)
|
||||
add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_query_state, AC_UD_NGG_QUERY_STATE);
|
||||
if (has_shader_query)
|
||||
add_ud_arg(args, 1, AC_ARG_INT, &args->shader_query_state, AC_UD_SHADER_QUERY_STATE);
|
||||
|
||||
if (has_ngg_provoking_vtx)
|
||||
add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_provoking_vtx, AC_UD_NGG_PROVOKING_VTX);
|
||||
@@ -382,8 +382,8 @@ declare_shader_args(const struct radv_device *device, const struct radv_pipeline
|
||||
{
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
|
||||
bool needs_view_index = info->uses_view_index;
|
||||
bool has_ngg_query = info->has_ngg_prim_query || info->has_ngg_xfb_query ||
|
||||
(stage == MESA_SHADER_GEOMETRY && info->gs.has_ngg_pipeline_stat_query);
|
||||
bool has_shader_query = info->has_prim_query || info->has_xfb_query ||
|
||||
(stage == MESA_SHADER_GEOMETRY && info->gs.has_pipeline_stat_query);
|
||||
bool has_ngg_provoking_vtx =
|
||||
(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_GEOMETRY) && key->dynamic_provoking_vtx_mode;
|
||||
|
||||
@@ -614,7 +614,7 @@ declare_shader_args(const struct radv_device *device, const struct radv_pipeline
|
||||
}
|
||||
|
||||
if (info->is_ngg) {
|
||||
declare_ngg_sgprs(info, args, has_ngg_query, has_ngg_provoking_vtx);
|
||||
declare_ngg_sgprs(info, args, has_shader_query, has_ngg_provoking_vtx);
|
||||
}
|
||||
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
|
||||
|
||||
@@ -42,8 +42,10 @@ struct radv_shader_args {
|
||||
/* Streamout */
|
||||
struct ac_arg streamout_buffers;
|
||||
|
||||
/* Emulated query */
|
||||
struct ac_arg shader_query_state;
|
||||
|
||||
/* NGG */
|
||||
struct ac_arg ngg_query_state;
|
||||
struct ac_arg ngg_provoking_vtx;
|
||||
|
||||
/* NGG GS */
|
||||
|
||||
@@ -1447,9 +1447,9 @@ gfx10_get_ngg_query_info(const struct radv_device *device, struct radv_shader_st
|
||||
{
|
||||
struct radv_shader_info *info = gs_stage ? &gs_stage->info : &es_stage->info;
|
||||
|
||||
info->gs.has_ngg_pipeline_stat_query = device->physical_device->emulate_ngg_gs_query_pipeline_stat && !!gs_stage;
|
||||
info->has_ngg_xfb_query = gs_stage ? !!gs_stage->nir->xfb_info : !!es_stage->nir->xfb_info;
|
||||
info->has_ngg_prim_query = pipeline_key->primitives_generated_query || info->has_ngg_xfb_query;
|
||||
info->gs.has_pipeline_stat_query = device->physical_device->emulate_ngg_gs_query_pipeline_stat && !!gs_stage;
|
||||
info->has_xfb_query = gs_stage ? !!gs_stage->nir->xfb_info : !!es_stage->nir->xfb_info;
|
||||
info->has_prim_query = pipeline_key->primitives_generated_query || info->has_xfb_query;
|
||||
}
|
||||
|
||||
static void
|
||||
|
||||
Reference in New Issue
Block a user