swr: [rasterizer core] separate frontend/backend stats enables

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
Tim Rowley
2016-10-28 16:10:12 -05:00
parent 937b7d8e5a
commit dc8408920c
6 changed files with 51 additions and 26 deletions
@@ -1591,14 +1591,28 @@ VOID* SwrAllocDrawContextMemory(
/// @brief Enables stats counting
/// @param hContext - Handle passed back from SwrCreateContext
/// @param enable - If true then counts are incremented.
void SwrEnableStats(
void SwrEnableStatsFE(
HANDLE hContext,
bool enable)
{
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
pDC->pState->state.enableStats = enable;
pDC->pState->state.enableStatsFE = enable;
}
//////////////////////////////////////////////////////////////////////////
/// @brief Enables stats counting
/// @param hContext - Handle passed back from SwrCreateContext
/// @param enable - If true then counts are incremented.
void SwrEnableStatsBE(
HANDLE hContext,
bool enable)
{
SWR_CONTEXT *pContext = GetContext(hContext);
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
pDC->pState->state.enableStatsBE = enable;
}
//////////////////////////////////////////////////////////////////////////
@@ -630,7 +630,15 @@ VOID* SWR_API SwrAllocDrawContextMemory(
/// @brief Enables stats counting
/// @param hContext - Handle passed back from SwrCreateContext
/// @param enable - If true then counts are incremented.
void SWR_API SwrEnableStats(
void SWR_API SwrEnableStatsFE(
HANDLE hContext,
bool enable);
//////////////////////////////////////////////////////////////////////////
/// @brief Enables stats counting
/// @param hContext - Handle passed back from SwrCreateContext
/// @param enable - If true then counts are incremented.
void SWR_API SwrEnableStatsBE(
HANDLE hContext,
bool enable);
@@ -73,7 +73,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
state.pfnCsFunc(GetPrivateState(pDC), &csContext);
UPDATE_STAT(CsInvocations, state.totalThreadsInGroup);
UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup);
AR_END(BEDispatch, 1);
}
@@ -553,7 +553,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
// execute pixel shader
AR_BEGIN(BEPixelShader, pDC->drawId);
UPDATE_STAT(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
AR_END(BEPixelShader, 0);
@@ -578,7 +578,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
uint32_t statMask = _simd_movemask_ps(depthPassMask);
uint32_t statCount = _mm_popcnt_u32(statMask);
UPDATE_STAT(DepthPassCount, statCount);
UPDATE_STAT_BE(DepthPassCount, statCount);
// output merger
AR_BEGIN(BEOutputMerger, pDC->drawId);
@@ -763,7 +763,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
// execute pixel shader
AR_BEGIN(BEPixelShader, pDC->drawId);
UPDATE_STAT(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
AR_END(BEPixelShader, 0);
@@ -790,7 +790,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
uint32_t statMask = _simd_movemask_ps(depthPassMask);
uint32_t statCount = _mm_popcnt_u32(statMask);
UPDATE_STAT(DepthPassCount, statCount);
UPDATE_STAT_BE(DepthPassCount, statCount);
// output merger
AR_BEGIN(BEOutputMerger, pDC->drawId);
@@ -922,7 +922,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
if(T::bCanEarlyZ && !T::bForcedSampleCount)
{
uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BEEarlyDepthTest);
UPDATE_STAT(DepthPassCount, depthPassCount);
UPDATE_STAT_BE(DepthPassCount, depthPassCount);
}
// if we have no covered samples that passed depth at this point, go to next tile
@@ -944,7 +944,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
// execute pixel shader
AR_BEGIN(BEPixelShader, pDC->drawId);
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
UPDATE_STAT(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
AR_END(BEPixelShader, 0);
// update active lanes to remove any discarded or oMask'd pixels
@@ -955,7 +955,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
if(!T::bCanEarlyZ && !T::bForcedSampleCount)
{
uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BELateDepthTest);
UPDATE_STAT(DepthPassCount, depthPassCount);
UPDATE_STAT_BE(DepthPassCount, depthPassCount);
}
// if we have no covered samples that passed depth at this point, skip OM and go to next tile
@@ -1140,7 +1140,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
uint32_t statMask = _simd_movemask_ps(depthPassMask);
uint32_t statCount = _mm_popcnt_u32(statMask);
UPDATE_STAT(DepthPassCount, statCount);
UPDATE_STAT_BE(DepthPassCount, statCount);
}
Endtile:
@@ -297,14 +297,13 @@ OSALIGNLINE(struct) API_STATE
SWR_BLEND_STATE blendState;
PFN_BLEND_JIT_FUNC pfnBlendFunc[SWR_NUM_RENDERTARGETS];
// Stats are incremented when this is true.
bool enableStats;
struct
{
uint32_t colorHottileEnable : 8;
uint32_t depthHottileEnable: 1;
uint32_t stencilHottileEnable : 1;
uint32_t enableStatsFE : 1; // Enable frontend pipeline stats
uint32_t enableStatsBE : 1; // Enable backend pipeline stats
uint32_t colorHottileEnable : 8; // Bitmask of enabled color hottiles
uint32_t depthHottileEnable: 1; // Enable depth buffer hottile
uint32_t stencilHottileEnable : 1; // Enable stencil buffer hottile
};
PFN_QUANTIZE_DEPTH pfnQuantizeDepth;
@@ -516,8 +515,8 @@ struct SWR_CONTEXT
HANDLE* pArContext;
};
#define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.pStats[workerId].name += count; }
#define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.statsFE.name += count; }
#define UPDATE_STAT_BE(name, count) if (GetApiState(pDC).enableStatsBE) { pDC->dynState.pStats[workerId].name += count; }
#define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStatsFE) { pDC->dynState.statsFE.name += count; }
// ArchRast instrumentation framework
#define AR_WORKER_CTX pContext->pArContext[workerId]
@@ -322,7 +322,7 @@ bool CheckDependencyFE(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t lastRe
/// @brief Update client stats.
INLINE void UpdateClientStats(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC)
{
if ((pContext->pfnUpdateStats == nullptr) || (GetApiState(pDC).enableStats == false))
if ((pContext->pfnUpdateStats == nullptr) || (GetApiState(pDC).enableStatsBE == false))
{
return;
}
@@ -571,7 +571,7 @@ bool WorkOnFifoBE(
/// @brief Called when FE work is complete for this DC.
INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC)
{
if (pContext->pfnUpdateStatsFE && GetApiState(pDC).enableStats)
if (pContext->pfnUpdateStatsFE && GetApiState(pDC).enableStatsFE)
{
SWR_STATS_FE& stats = pDC->dynState.statsFE;
+8 -4
View File
@@ -177,8 +177,10 @@ swr_begin_query(struct pipe_context *pipe, struct pipe_query *q)
swr_update_draw_context(ctx, &pq->result);
/* Only change stat collection if there are no active queries */
if (ctx->active_queries == 0)
SwrEnableStats(ctx->swrContext, TRUE);
if (ctx->active_queries == 0) {
SwrEnableStatsFE(ctx->swrContext, TRUE);
SwrEnableStatsBE(ctx->swrContext, TRUE);
}
break;
}
@@ -212,8 +214,10 @@ swr_end_query(struct pipe_context *pipe, struct pipe_query *q)
swr_fence_submit(ctx, pq->fence);
/* Only change stat collection if there are no active queries */
if (ctx->active_queries == 0)
SwrEnableStats(ctx->swrContext, FALSE);
if (ctx->active_queries == 0) {
SwrEnableStatsFE(ctx->swrContext, FALSE);
SwrEnableStatsBE(ctx->swrContext, FALSE);
}
break;
}