diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index a6acf97e029..1e7e0bd05d2 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -1864,8 +1864,7 @@ void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx) ctx->ac.i32_1, ngg_get_emulated_counters_buf(ctx), LLVMConstInt(ctx->ac.i32, - (si_hw_query_dw_offset(PIPE_STAT_QUERY_GS_INVOCATIONS) + - SI_QUERY_STATS_END_OFFSET_DW) * 4, + si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_INVOCATIONS) * 4, false), ctx->ac.i32_0, /* soffset */ ctx->ac.i32_0, /* cachepolicy */ @@ -2195,8 +2194,7 @@ void gfx10_ngg_gs_build_end(struct si_shader_context *ctx) ctx->ac.i32_1, ngg_get_emulated_counters_buf(ctx), LLVMConstInt(ctx->ac.i32, - (si_hw_query_dw_offset(PIPE_STAT_QUERY_GS_PRIMITIVES) + - SI_QUERY_STATS_END_OFFSET_DW) * 4, + si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4, false), ctx->ac.i32_0, /* soffset */ ctx->ac.i32_0, /* cachepolicy */ diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index 7dd488d9980..a60e325d58f 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -637,27 +637,35 @@ static bool si_query_hw_prepare_buffer(struct si_context *sctx, struct si_query_ return true; } -int si_hw_query_dw_offset(int index) +static unsigned si_query_pipestats_num_results(struct si_screen *sscreen) +{ + return 11; +} + +static unsigned si_query_pipestat_dw_offset(enum pipe_statistics_query_index index) { - /* Offset in dwords in the query buffer of the start value - * for the given counter. - */ switch (index) { - case PIPE_STAT_QUERY_IA_VERTICES: return 14; - case PIPE_STAT_QUERY_IA_PRIMITIVES: return 12; - case PIPE_STAT_QUERY_VS_INVOCATIONS: return 6; - case PIPE_STAT_QUERY_GS_INVOCATIONS: return 8; - case PIPE_STAT_QUERY_GS_PRIMITIVES: return 10; - case PIPE_STAT_QUERY_C_INVOCATIONS: return 4; - case PIPE_STAT_QUERY_C_PRIMITIVES: return 2; - case PIPE_STAT_QUERY_PS_INVOCATIONS: return 0; - case PIPE_STAT_QUERY_HS_INVOCATIONS: return 16; - case PIPE_STAT_QUERY_DS_INVOCATIONS: return 18; - case PIPE_STAT_QUERY_CS_INVOCATIONS: return 20; - default: - assert(false); - } - return -1; + case PIPE_STAT_QUERY_PS_INVOCATIONS: return 0; + case PIPE_STAT_QUERY_C_PRIMITIVES: return 2; + case PIPE_STAT_QUERY_C_INVOCATIONS: return 4; + case PIPE_STAT_QUERY_VS_INVOCATIONS: return 6; + case PIPE_STAT_QUERY_GS_INVOCATIONS: return 8; + case PIPE_STAT_QUERY_GS_PRIMITIVES: return 10; + case PIPE_STAT_QUERY_IA_PRIMITIVES: return 12; + case PIPE_STAT_QUERY_IA_VERTICES: return 14; + case PIPE_STAT_QUERY_HS_INVOCATIONS: return 16; + case PIPE_STAT_QUERY_DS_INVOCATIONS: return 18; + case PIPE_STAT_QUERY_CS_INVOCATIONS: return 20; + default: + assert(false); + } + return ~0; +} + +unsigned si_query_pipestat_end_dw_offset(struct si_screen *sscreen, + enum pipe_statistics_query_index index) +{ + return si_query_pipestats_num_results(sscreen) * 2 + si_query_pipestat_dw_offset(index); } static void si_query_hw_get_result_resource(struct si_context *sctx, struct si_query *squery, @@ -725,8 +733,7 @@ static struct pipe_query *si_query_hw_create(struct si_screen *sscreen, unsigned query->b.num_cs_dw_suspend = 6 * SI_MAX_STREAMS; break; case PIPE_QUERY_PIPELINE_STATISTICS: - /* 11 values on GCN. */ - query->result_size = 11 * 16; + query->result_size = si_query_pipestats_num_results(sscreen) * 16; query->result_size += 8; /* for the fence + alignment */ query->b.num_cs_dw_suspend = 6 + si_cp_write_fence_dwords(sscreen); query->index = index; @@ -860,7 +867,7 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_h const uint32_t zero = 0; radeon_begin(cs); /* Clear the emulated counter end value. We don't clear start because it's unused. */ - va += (si_hw_query_dw_offset(query->index) + SI_QUERY_STATS_END_OFFSET_DW) * 4; + va += si_query_pipestat_end_dw_offset(sctx->screen, query->index) * 4; radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + 1, 0)); radeon_emit(S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); radeon_emit(va); @@ -1322,9 +1329,9 @@ static void si_get_hw_query_params(struct si_context *sctx, struct si_query_hw * params->fence_offset = squery->result_size - 4; break; case PIPE_QUERY_PIPELINE_STATISTICS: { - params->start_offset = si_hw_query_dw_offset(index) * 4; - params->end_offset = SI_QUERY_STATS_END_OFFSET_DW * 4 + params->start_offset; - params->fence_offset = 2 * 88; + params->start_offset = si_query_pipestat_dw_offset(index) * 4; + params->end_offset = si_query_pipestat_end_dw_offset(sctx->screen, index) * 4; + params->fence_offset = si_query_pipestats_num_results(sctx->screen) * 16; break; } default: @@ -1404,10 +1411,9 @@ static void si_query_hw_add_result(struct si_screen *sscreen, struct si_query_hw break; case PIPE_QUERY_PIPELINE_STATISTICS: for (int i = 0; i < 11; i++) { - int start_offset = si_hw_query_dw_offset(i); result->pipeline_statistics.counters[i] += - si_query_read_result(buffer, start_offset, - start_offset + SI_QUERY_STATS_END_OFFSET_DW, false); + si_query_read_result(buffer, si_query_pipestat_dw_offset(i), + si_query_pipestat_end_dw_offset(sscreen, i), false); } #if 0 /* for testing */ printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, " diff --git a/src/gallium/drivers/radeonsi/si_query.h b/src/gallium/drivers/radeonsi/si_query.h index a7be5dae0d0..382075a7717 100644 --- a/src/gallium/drivers/radeonsi/si_query.h +++ b/src/gallium/drivers/radeonsi/si_query.h @@ -127,14 +127,6 @@ enum SI_NUM_SW_QUERY_GROUPS }; -/* The counters are stored in a buffer, each with a start and end value, - * with this layout: - * [start1][start2][...][startN][end1][end2][...][endN] - * N is 11 and each value is a 64-bit int so we get: - */ -#define SI_QUERY_STATS_END_OFFSET_DW (11 * 2) -int si_hw_query_dw_offset(int index); - struct si_query_ops { void (*destroy)(struct si_context *, struct si_query *); bool (*begin)(struct si_context *, struct si_query *); @@ -227,6 +219,8 @@ struct si_query_hw { unsigned workaround_offset; }; +unsigned si_query_pipestat_end_dw_offset(struct si_screen *sscreen, + enum pipe_statistics_query_index index); void si_query_hw_destroy(struct si_context *sctx, struct si_query *squery); bool si_query_hw_begin(struct si_context *sctx, struct si_query *squery); bool si_query_hw_end(struct si_context *sctx, struct si_query *squery); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index be2e57480ea..4c1a7887dd7 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -245,8 +245,7 @@ void si_llvm_gs_build_end(struct si_shader_context *ctx) prim, ngg_get_emulated_counters_buf(ctx), LLVMConstInt(ctx->ac.i32, - (si_hw_query_dw_offset(PIPE_STAT_QUERY_GS_PRIMITIVES) + - SI_QUERY_STATS_END_OFFSET_DW) * 4, + si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4, false), ctx->ac.i32_0, /* soffset */ ctx->ac.i32_0, /* cachepolicy */ @@ -255,9 +254,8 @@ void si_llvm_gs_build_end(struct si_shader_context *ctx) args[0] = ctx->ac.i32_1; args[2] = LLVMConstInt(ctx->ac.i32, - (si_hw_query_dw_offset(PIPE_STAT_QUERY_GS_INVOCATIONS) + - SI_QUERY_STATS_END_OFFSET_DW) * 4, - false); + si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_INVOCATIONS) * 4, + false); ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32", ctx->ac.i32, args, 5, 0); } ac_build_endif(&ctx->ac, 5229);