swr: [rasterizer core] Add macros for mapping ArchRast to buckets
Switch all RDTSC_START/STOP macros to use AR_BEGIN/END macros. Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
This commit is contained in:
@@ -46,8 +46,6 @@
|
||||
#include "common/simdintrin.h"
|
||||
#include "common/os.h"
|
||||
|
||||
#include "archrast/archrast.h"
|
||||
|
||||
static const SWR_RECT g_MaxScissorRect = { 0, 0, KNOB_MAX_SCISSOR_X, KNOB_MAX_SCISSOR_Y };
|
||||
|
||||
void SetupDefaultState(SWR_CONTEXT *pContext);
|
||||
@@ -264,9 +262,9 @@ void QueueWork(SWR_CONTEXT *pContext)
|
||||
}
|
||||
else
|
||||
{
|
||||
RDTSC_START(APIDrawWakeAllThreads);
|
||||
AR_API_BEGIN(APIDrawWakeAllThreads, pDC->drawId);
|
||||
WakeAllThreads(pContext);
|
||||
RDTSC_STOP(APIDrawWakeAllThreads, 1, 0);
|
||||
AR_API_END(APIDrawWakeAllThreads, 1);
|
||||
}
|
||||
|
||||
// Set current draw context to NULL so that next state call forces a new draw context to be created and populated.
|
||||
@@ -286,7 +284,7 @@ INLINE void QueueDispatch(SWR_CONTEXT* pContext)
|
||||
|
||||
DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
|
||||
{
|
||||
RDTSC_START(APIGetDrawContext);
|
||||
AR_API_BEGIN(APIGetDrawContext, 0);
|
||||
// If current draw context is null then need to obtain a new draw context to use from ring.
|
||||
if (pContext->pCurDrawContext == nullptr)
|
||||
{
|
||||
@@ -372,7 +370,7 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
|
||||
SWR_ASSERT(isSplitDraw == false, "Split draw should only be used when obtaining a new DC");
|
||||
}
|
||||
|
||||
RDTSC_STOP(APIGetDrawContext, 0, 0);
|
||||
AR_API_END(APIGetDrawContext, 0);
|
||||
return pContext->pCurDrawContext;
|
||||
}
|
||||
|
||||
@@ -418,13 +416,13 @@ void SetupDefaultState(SWR_CONTEXT *pContext)
|
||||
|
||||
void SwrSync(HANDLE hContext, PFN_CALLBACK_FUNC pfnFunc, uint64_t userData, uint64_t userData2, uint64_t userData3)
|
||||
{
|
||||
RDTSC_START(APISync);
|
||||
|
||||
SWR_ASSERT(pfnFunc != nullptr);
|
||||
|
||||
SWR_CONTEXT *pContext = GetContext(hContext);
|
||||
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
|
||||
|
||||
AR_API_BEGIN(APISync, 0);
|
||||
|
||||
pDC->FeWork.type = SYNC;
|
||||
pDC->FeWork.pfnWork = ProcessSync;
|
||||
|
||||
@@ -437,35 +435,35 @@ void SwrSync(HANDLE hContext, PFN_CALLBACK_FUNC pfnFunc, uint64_t userData, uint
|
||||
//enqueue
|
||||
QueueDraw(pContext);
|
||||
|
||||
RDTSC_STOP(APISync, 1, 0);
|
||||
AR_API_END(APISync, 1);
|
||||
}
|
||||
|
||||
void SwrWaitForIdle(HANDLE hContext)
|
||||
{
|
||||
SWR_CONTEXT *pContext = GetContext(hContext);
|
||||
|
||||
RDTSC_START(APIWaitForIdle);
|
||||
AR_API_BEGIN(APIWaitForIdle, 0);
|
||||
|
||||
while (!pContext->dcRing.IsEmpty())
|
||||
{
|
||||
_mm_pause();
|
||||
}
|
||||
|
||||
RDTSC_STOP(APIWaitForIdle, 1, 0);
|
||||
AR_API_END(APIWaitForIdle, 1);
|
||||
}
|
||||
|
||||
void SwrWaitForIdleFE(HANDLE hContext)
|
||||
{
|
||||
SWR_CONTEXT *pContext = GetContext(hContext);
|
||||
|
||||
RDTSC_START(APIWaitForIdle);
|
||||
AR_API_BEGIN(APIWaitForIdle, 0);
|
||||
|
||||
while (pContext->drawsOutstandingFE > 0)
|
||||
{
|
||||
_mm_pause();
|
||||
}
|
||||
|
||||
RDTSC_STOP(APIWaitForIdle, 1, 0);
|
||||
AR_API_END(APIWaitForIdle, 1);
|
||||
}
|
||||
|
||||
void SwrSetVertexBuffers(
|
||||
@@ -1080,11 +1078,11 @@ void DrawInstanced(
|
||||
return;
|
||||
}
|
||||
|
||||
RDTSC_START(APIDraw);
|
||||
|
||||
SWR_CONTEXT *pContext = GetContext(hContext);
|
||||
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
|
||||
|
||||
AR_API_BEGIN(APIDraw, pDC->drawId);
|
||||
|
||||
uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology);
|
||||
uint32_t primsPerDraw = GetNumPrims(topology, maxVertsPerDraw);
|
||||
uint32_t remainingVerts = numVertices;
|
||||
@@ -1139,7 +1137,7 @@ void DrawInstanced(
|
||||
pDC = GetDrawContext(pContext);
|
||||
pDC->pState->state.rastState.cullMode = oldCullMode;
|
||||
|
||||
RDTSC_STOP(APIDraw, numVertices * numInstances, 0);
|
||||
AR_API_END(APIDraw, numVertices * numInstances);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@@ -1200,14 +1198,12 @@ void DrawIndexedInstance(
|
||||
return;
|
||||
}
|
||||
|
||||
RDTSC_START(APIDrawIndexed);
|
||||
|
||||
SWR_CONTEXT *pContext = GetContext(hContext);
|
||||
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
|
||||
API_STATE* pState = &pDC->pState->state;
|
||||
|
||||
AR_BEGIN(AR_API_CTX, APIDrawIndexed, pDC->drawId);
|
||||
AR_EVENT(AR_API_CTX, DrawIndexedInstance(topology, numIndices, indexOffset, baseVertex, numInstances, startInstance));
|
||||
AR_API_BEGIN(APIDrawIndexed, pDC->drawId);
|
||||
AR_API_EVENT(DrawIndexedInstance(topology, numIndices, indexOffset, baseVertex, numInstances, startInstance));
|
||||
|
||||
uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology);
|
||||
uint32_t primsPerDraw = GetNumPrims(topology, maxIndicesPerDraw);
|
||||
@@ -1280,8 +1276,7 @@ void DrawIndexedInstance(
|
||||
pDC = GetDrawContext(pContext);
|
||||
pDC->pState->state.rastState.cullMode = oldCullMode;
|
||||
|
||||
AR_END(AR_API_CTX, APIDrawIndexed, numIndices * numInstances);
|
||||
RDTSC_STOP(APIDrawIndexed, numIndices * numInstances, 0);
|
||||
AR_API_END(APIDrawIndexed, numIndices * numInstances);
|
||||
}
|
||||
|
||||
|
||||
@@ -1406,10 +1401,11 @@ void SwrDispatch(
|
||||
return;
|
||||
}
|
||||
|
||||
RDTSC_START(APIDispatch);
|
||||
SWR_CONTEXT *pContext = GetContext(hContext);
|
||||
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
|
||||
|
||||
AR_API_BEGIN(APIDispatch, pDC->drawId);
|
||||
|
||||
pDC->isCompute = true; // This is a compute context.
|
||||
|
||||
COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pArena->AllocAligned(sizeof(COMPUTE_DESC), 64);
|
||||
@@ -1424,7 +1420,7 @@ void SwrDispatch(
|
||||
pDC->pDispatch->initialize(totalThreadGroups, pTaskData);
|
||||
|
||||
QueueDispatch(pContext);
|
||||
RDTSC_STOP(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ, 0);
|
||||
AR_API_END(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ);
|
||||
}
|
||||
|
||||
// Deswizzles, converts and stores current contents of the hot tiles to surface
|
||||
@@ -1440,11 +1436,11 @@ void SWR_API SwrStoreTiles(
|
||||
return;
|
||||
}
|
||||
|
||||
RDTSC_START(APIStoreTiles);
|
||||
|
||||
SWR_CONTEXT *pContext = GetContext(hContext);
|
||||
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
|
||||
|
||||
AR_API_BEGIN(APIStoreTiles, pDC->drawId);
|
||||
|
||||
pDC->FeWork.type = STORETILES;
|
||||
pDC->FeWork.pfnWork = ProcessStoreTiles;
|
||||
pDC->FeWork.desc.storeTiles.attachment = attachment;
|
||||
@@ -1455,7 +1451,7 @@ void SWR_API SwrStoreTiles(
|
||||
//enqueue
|
||||
QueueDraw(pContext);
|
||||
|
||||
RDTSC_STOP(APIStoreTiles, 0, 0);
|
||||
AR_API_END(APIStoreTiles, 1);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@@ -1479,11 +1475,11 @@ void SWR_API SwrClearRenderTarget(
|
||||
return;
|
||||
}
|
||||
|
||||
RDTSC_START(APIClearRenderTarget);
|
||||
|
||||
SWR_CONTEXT *pContext = GetContext(hContext);
|
||||
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
|
||||
|
||||
AR_API_BEGIN(APIClearRenderTarget, pDC->drawId);
|
||||
|
||||
CLEAR_FLAGS flags;
|
||||
flags.bits = 0;
|
||||
flags.mask = clearMask;
|
||||
@@ -1503,7 +1499,7 @@ void SWR_API SwrClearRenderTarget(
|
||||
// enqueue draw
|
||||
QueueDraw(pContext);
|
||||
|
||||
RDTSC_STOP(APIClearRenderTarget, 0, pDC->drawId);
|
||||
AR_API_END(APIClearRenderTarget, 1);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -47,10 +47,10 @@ static PFN_CLEAR_TILES sClearTilesTable[NUM_SWR_FORMATS];
|
||||
/// @param threadGroupId - the linear index for the thread group within the dispatch.
|
||||
void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer)
|
||||
{
|
||||
RDTSC_START(BEDispatch);
|
||||
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
AR_BEGIN(BEDispatch, pDC->drawId);
|
||||
|
||||
const COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pDispatch->GetTasksData();
|
||||
SWR_ASSERT(pTaskData != nullptr);
|
||||
|
||||
@@ -75,7 +75,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
|
||||
|
||||
UPDATE_STAT(CsInvocations, state.totalThreadsInGroup);
|
||||
|
||||
RDTSC_STOP(BEDispatch, 1, 0);
|
||||
AR_END(BEDispatch, 1);
|
||||
}
|
||||
|
||||
void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
|
||||
@@ -180,16 +180,17 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui
|
||||
|
||||
void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
if (KNOB_FAST_CLEAR)
|
||||
{
|
||||
CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
SWR_MULTISAMPLE_COUNT sampleCount = pDC->pState->state.rastState.sampleCount;
|
||||
uint32_t numSamples = GetNumSamples(sampleCount);
|
||||
|
||||
SWR_ASSERT(pClear->flags.bits != 0); // shouldn't be here without a reason.
|
||||
|
||||
RDTSC_START(BEClear);
|
||||
AR_BEGIN(BEClear, pDC->drawId);
|
||||
|
||||
if (pClear->flags.mask & SWR_CLEAR_COLOR)
|
||||
{
|
||||
@@ -217,13 +218,13 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
|
||||
pHotTile->state = HOTTILE_CLEAR;
|
||||
}
|
||||
|
||||
RDTSC_STOP(BEClear, 0, 0);
|
||||
AR_END(BEClear, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Legacy clear
|
||||
CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
|
||||
RDTSC_START(BEClear);
|
||||
AR_BEGIN(BEClear, pDC->drawId);
|
||||
|
||||
if (pClear->flags.mask & SWR_CLEAR_COLOR)
|
||||
{
|
||||
@@ -265,17 +266,18 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
|
||||
pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, clearData, pClear->rect);
|
||||
}
|
||||
|
||||
RDTSC_STOP(BEClear, 0, 0);
|
||||
AR_END(BEClear, 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
|
||||
{
|
||||
RDTSC_START(BEStoreTiles);
|
||||
STORE_TILES_DESC *pDesc = (STORE_TILES_DESC*)pData;
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
AR_BEGIN(BEStoreTiles, pDC->drawId);
|
||||
|
||||
#ifdef KNOB_ENABLE_RDTSC
|
||||
uint32_t numTiles = 0;
|
||||
#endif
|
||||
@@ -326,7 +328,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
|
||||
pHotTile->state = (HOTTILE_STATE)pDesc->postStoreTileState;
|
||||
}
|
||||
}
|
||||
RDTSC_STOP(BEStoreTiles, numTiles, pDC->drawId);
|
||||
AR_END(BEStoreTiles, numTiles);
|
||||
}
|
||||
|
||||
|
||||
@@ -387,8 +389,10 @@ simdmask ComputeUserClipMask(uint8_t clipMask, float* pUserClipBuffer, simdscala
|
||||
template<typename T>
|
||||
void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
|
||||
{
|
||||
RDTSC_START(BESingleSampleBackend);
|
||||
RDTSC_START(BESetup);
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
AR_BEGIN(BESingleSampleBackend, pDC->drawId);
|
||||
AR_BEGIN(BESetup, pDC->drawId);
|
||||
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
const SWR_RASTSTATE& rastState = state.rastState;
|
||||
@@ -423,7 +427,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
||||
pColorBase[rt] = renderBuffers.pColor[rt];
|
||||
}
|
||||
uint8_t *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil;
|
||||
RDTSC_STOP(BESetup, 0, 0);
|
||||
AR_END(BESetup, 1);
|
||||
|
||||
SWR_PS_CONTEXT psContext;
|
||||
psContext.pAttribs = work.pAttribs;
|
||||
@@ -462,7 +466,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
||||
generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, pBlendState->sampleMask);
|
||||
}
|
||||
|
||||
RDTSC_START(BEBarycentric);
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
CalcPixelBarycentrics(coeffs, psContext);
|
||||
|
||||
// for 1x case, centroid is pixel center
|
||||
@@ -475,7 +479,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
||||
// interpolate and quantize z
|
||||
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
|
||||
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
|
||||
RDTSC_STOP(BEBarycentric, 0, 0);
|
||||
AR_END(BEBarycentric, 1);
|
||||
|
||||
simdmask clipCoverageMask = coverageMask & MASK;
|
||||
// interpolate user clip distance if available
|
||||
@@ -492,10 +496,10 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
||||
// Early-Z?
|
||||
if(T::bCanEarlyZ)
|
||||
{
|
||||
RDTSC_START(BEEarlyDepthTest);
|
||||
AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask);
|
||||
RDTSC_STOP(BEEarlyDepthTest, 0, 0);
|
||||
AR_END(BEEarlyDepthTest, 0);
|
||||
|
||||
// early-exit if no pixels passed depth or earlyZ is forced on
|
||||
if(pPSState->forceEarlyZ || !_simd_movemask_ps(depthPassMask))
|
||||
@@ -514,20 +518,20 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
||||
psContext.activeMask = _simd_castps_si(vCoverageMask);
|
||||
|
||||
// execute pixel shader
|
||||
RDTSC_START(BEPixelShader);
|
||||
AR_BEGIN(BEPixelShader, pDC->drawId);
|
||||
UPDATE_STAT(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
|
||||
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
|
||||
RDTSC_STOP(BEPixelShader, 0, 0);
|
||||
AR_END(BEPixelShader, 0);
|
||||
|
||||
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
|
||||
|
||||
// late-Z
|
||||
if(!T::bCanEarlyZ)
|
||||
{
|
||||
RDTSC_START(BELateDepthTest);
|
||||
AR_BEGIN(BELateDepthTest, pDC->drawId);
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
psContext.vZ, pDepthBase, vCoverageMask, pStencilBase, &stencilPassMask);
|
||||
RDTSC_STOP(BELateDepthTest, 0, 0);
|
||||
AR_END(BELateDepthTest, 0);
|
||||
|
||||
if(!_simd_movemask_ps(depthPassMask))
|
||||
{
|
||||
@@ -543,7 +547,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
||||
UPDATE_STAT(DepthPassCount, statCount);
|
||||
|
||||
// output merger
|
||||
RDTSC_START(BEOutputMerger);
|
||||
AR_BEGIN(BEOutputMerger, pDC->drawId);
|
||||
OutputMerger(psContext, pColorBase, 0, pBlendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, pPSState->numRenderTargets);
|
||||
|
||||
// do final depth write after all pixel kills
|
||||
@@ -552,11 +556,11 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthBase, depthPassMask, vCoverageMask, pStencilBase, stencilPassMask);
|
||||
}
|
||||
RDTSC_STOP(BEOutputMerger, 0, 0);
|
||||
AR_END(BEOutputMerger, 0);
|
||||
}
|
||||
|
||||
Endtile:
|
||||
RDTSC_START(BEEndTile);
|
||||
AR_BEGIN(BEEndTile, pDC->drawId);
|
||||
coverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
|
||||
{
|
||||
@@ -569,17 +573,19 @@ Endtile:
|
||||
{
|
||||
pColorBase[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
}
|
||||
RDTSC_STOP(BEEndTile, 0, 0);
|
||||
AR_END(BEEndTile, 0);
|
||||
}
|
||||
}
|
||||
RDTSC_STOP(BESingleSampleBackend, 0, 0);
|
||||
AR_END(BESingleSampleBackend, 0);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
|
||||
{
|
||||
RDTSC_START(BESampleRateBackend);
|
||||
RDTSC_START(BESetup);
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
AR_BEGIN(BESampleRateBackend, pDC->drawId);
|
||||
AR_BEGIN(BESetup, pDC->drawId);
|
||||
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
const SWR_RASTSTATE& rastState = state.rastState;
|
||||
@@ -613,7 +619,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
||||
pColorBase[rt] = renderBuffers.pColor[rt];
|
||||
}
|
||||
uint8_t *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil;
|
||||
RDTSC_STOP(BESetup, 0, 0);
|
||||
AR_END(BESetup, 0);
|
||||
|
||||
SWR_PS_CONTEXT psContext;
|
||||
psContext.pAttribs = work.pAttribs;
|
||||
@@ -643,9 +649,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
||||
// pixel center
|
||||
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps((float)xx));
|
||||
|
||||
RDTSC_START(BEBarycentric);
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
CalcPixelBarycentrics(coeffs, psContext);
|
||||
RDTSC_STOP(BEBarycentric, 0, 0);
|
||||
AR_END(BEBarycentric, 0);
|
||||
|
||||
if(T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
|
||||
{
|
||||
@@ -657,7 +663,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
||||
if(T::bCentroidPos)
|
||||
{
|
||||
///@ todo: don't need to genererate input coverage 2x if input coverage and centroid
|
||||
RDTSC_START(BEBarycentric);
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
if(T::bIsStandardPattern)
|
||||
{
|
||||
CalcCentroidPos<T>(psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL);
|
||||
@@ -668,7 +674,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
||||
psContext.vY.centroid = _simd_add_ps(psContext.vY.UL, _simd_set1_ps(0.5f));
|
||||
}
|
||||
CalcCentroidBarycentrics(coeffs, psContext, psContext.vX.UL, psContext.vY.UL);
|
||||
RDTSC_STOP(BEBarycentric, 0, 0);
|
||||
AR_END(BEBarycentric, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -681,7 +687,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
||||
simdmask coverageMask = work.coverageMask[sample] & MASK;
|
||||
if (coverageMask)
|
||||
{
|
||||
RDTSC_START(BEBarycentric);
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
// calculate per sample positions
|
||||
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
|
||||
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
|
||||
@@ -691,7 +697,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
||||
// interpolate and quantize z
|
||||
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
|
||||
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
|
||||
RDTSC_STOP(BEBarycentric, 0, 0);
|
||||
AR_END(BEBarycentric, 0);
|
||||
|
||||
// interpolate user clip distance if available
|
||||
if (rastState.clipDistanceMask)
|
||||
@@ -711,10 +717,10 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
||||
// Early-Z?
|
||||
if (T::bCanEarlyZ)
|
||||
{
|
||||
RDTSC_START(BEEarlyDepthTest);
|
||||
AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
|
||||
RDTSC_STOP(BEEarlyDepthTest, 0, 0);
|
||||
AR_END(BEEarlyDepthTest, 0);
|
||||
|
||||
// early-exit if no samples passed depth or earlyZ is forced on.
|
||||
if (pPSState->forceEarlyZ || !_simd_movemask_ps(depthPassMask))
|
||||
@@ -734,20 +740,20 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
||||
psContext.activeMask = _simd_castps_si(vCoverageMask);
|
||||
|
||||
// execute pixel shader
|
||||
RDTSC_START(BEPixelShader);
|
||||
AR_BEGIN(BEPixelShader, pDC->drawId);
|
||||
UPDATE_STAT(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
|
||||
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
|
||||
RDTSC_STOP(BEPixelShader, 0, 0);
|
||||
AR_END(BEPixelShader, 0);
|
||||
|
||||
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
|
||||
|
||||
// late-Z
|
||||
if (!T::bCanEarlyZ)
|
||||
{
|
||||
RDTSC_START(BELateDepthTest);
|
||||
AR_BEGIN(BELateDepthTest, pDC->drawId);
|
||||
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
|
||||
RDTSC_STOP(BELateDepthTest, 0, 0);
|
||||
AR_END(BELateDepthTest, 0);
|
||||
|
||||
if (!_simd_movemask_ps(depthPassMask))
|
||||
{
|
||||
@@ -765,7 +771,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
||||
UPDATE_STAT(DepthPassCount, statCount);
|
||||
|
||||
// output merger
|
||||
RDTSC_START(BEOutputMerger);
|
||||
AR_BEGIN(BEOutputMerger, pDC->drawId);
|
||||
OutputMerger(psContext, pColorBase, sample, pBlendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, pPSState->numRenderTargets);
|
||||
|
||||
// do final depth write after all pixel kills
|
||||
@@ -774,11 +780,11 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
|
||||
}
|
||||
RDTSC_STOP(BEOutputMerger, 0, 0);
|
||||
AR_END(BEOutputMerger, 0);
|
||||
}
|
||||
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
}
|
||||
RDTSC_START(BEEndTile);
|
||||
AR_BEGIN(BEEndTile, pDC->drawId);
|
||||
if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
|
||||
{
|
||||
work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
@@ -790,17 +796,19 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
||||
{
|
||||
pColorBase[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
}
|
||||
RDTSC_STOP(BEEndTile, 0, 0);
|
||||
AR_END(BEEndTile, 0);
|
||||
}
|
||||
}
|
||||
RDTSC_STOP(BESampleRateBackend, 0, 0);
|
||||
AR_END(BESampleRateBackend, 0);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
|
||||
{
|
||||
RDTSC_START(BEPixelRateBackend);
|
||||
RDTSC_START(BESetup);
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
AR_BEGIN(BEPixelRateBackend, pDC->drawId);
|
||||
AR_BEGIN(BESetup, pDC->drawId);
|
||||
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
const SWR_RASTSTATE& rastState = state.rastState;
|
||||
@@ -834,7 +842,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
||||
pColorBase[rt] = renderBuffers.pColor[rt];
|
||||
}
|
||||
uint8_t *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil;
|
||||
RDTSC_STOP(BESetup, 0, 0);
|
||||
AR_END(BESetup, 0);
|
||||
|
||||
SWR_PS_CONTEXT psContext;
|
||||
psContext.pAttribs = work.pAttribs;
|
||||
@@ -852,7 +860,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
||||
|
||||
psContext.sampleIndex = 0;
|
||||
|
||||
PixelRateZTestLoop<T> PixelRateZTest(pDC, work, coeffs, state, pDepthBase, pStencilBase, rastState.clipDistanceMask);
|
||||
PixelRateZTestLoop<T> PixelRateZTest(pDC, workerId, work, coeffs, state, pDepthBase, pStencilBase, rastState.clipDistanceMask);
|
||||
|
||||
for(uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
|
||||
{
|
||||
@@ -868,9 +876,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
||||
// set pixel center positions
|
||||
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps((float)xx));
|
||||
|
||||
RDTSC_START(BEBarycentric);
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
CalcPixelBarycentrics(coeffs, psContext);
|
||||
RDTSC_STOP(BEBarycentric, 0, 0);
|
||||
AR_END(BEBarycentric, 0);
|
||||
|
||||
if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
|
||||
{
|
||||
@@ -882,7 +890,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
||||
if(T::bCentroidPos)
|
||||
{
|
||||
///@ todo: don't need to genererate input coverage 2x if input coverage and centroid
|
||||
RDTSC_START(BEBarycentric);
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
if(T::bIsStandardPattern)
|
||||
{
|
||||
CalcCentroidPos<T>(psContext, &work.coverageMask[0], pBlendState->sampleMask, psContext.vX.UL, psContext.vY.UL);
|
||||
@@ -894,7 +902,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
||||
}
|
||||
|
||||
CalcCentroidBarycentrics(coeffs, psContext, psContext.vX.UL, psContext.vY.UL);
|
||||
RDTSC_STOP(BEBarycentric, 0, 0);
|
||||
AR_END(BEBarycentric, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -921,11 +929,11 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
||||
|
||||
if(pPSState->usesSourceDepth)
|
||||
{
|
||||
RDTSC_START(BEBarycentric);
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
// interpolate and quantize z
|
||||
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
|
||||
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
|
||||
RDTSC_STOP(BEBarycentric, 0, 0);
|
||||
AR_END(BEBarycentric, 0);
|
||||
}
|
||||
|
||||
// pixels that are currently active
|
||||
@@ -933,10 +941,10 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
||||
psContext.oMask = T::MultisampleT::FullSampleMask();
|
||||
|
||||
// execute pixel shader
|
||||
RDTSC_START(BEPixelShader);
|
||||
AR_BEGIN(BEPixelShader, pDC->drawId);
|
||||
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
|
||||
UPDATE_STAT(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
|
||||
RDTSC_STOP(BEPixelShader, 0, 0);
|
||||
AR_END(BEPixelShader, 0);
|
||||
|
||||
// update active lanes to remove any discarded or oMask'd pixels
|
||||
activeLanes = _simd_castsi_ps(_simd_and_si(psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si())));
|
||||
@@ -956,7 +964,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
||||
// loop over all samples, broadcasting the results of the PS to all passing pixels
|
||||
for(uint32_t sample = 0; sample < GetNumOMSamples<T>(pBlendState->sampleCount); sample++)
|
||||
{
|
||||
RDTSC_START(BEOutputMerger);
|
||||
AR_BEGIN(BEOutputMerger, pDC->drawId);
|
||||
// center pattern does a single coverage/depth/stencil test, standard pattern tests all samples
|
||||
uint32_t coverageSampleNum = (T::bIsStandardPattern) ? sample : 0;
|
||||
simdscalar coverageMask, depthMask;
|
||||
@@ -971,7 +979,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
||||
if(!_simd_movemask_ps(depthMask))
|
||||
{
|
||||
// stencil should already have been written in early/lateZ tests
|
||||
RDTSC_STOP(BEOutputMerger, 0, 0);
|
||||
AR_END(BEOutputMerger, 0);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -987,10 +995,10 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, PixelRateZTest.vZ[coverageSampleNum],
|
||||
pDepthSample, depthMask, coverageMask, pStencilSample, PixelRateZTest.stencilPassMask[coverageSampleNum]);
|
||||
}
|
||||
RDTSC_STOP(BEOutputMerger, 0, 0);
|
||||
AR_END(BEOutputMerger, 0);
|
||||
}
|
||||
Endtile:
|
||||
RDTSC_START(BEEndTile);
|
||||
AR_BEGIN(BEEndTile, pDC->drawId);
|
||||
for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
|
||||
{
|
||||
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
|
||||
@@ -1008,19 +1016,21 @@ Endtile:
|
||||
{
|
||||
pColorBase[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
}
|
||||
RDTSC_STOP(BEEndTile, 0, 0);
|
||||
AR_END(BEEndTile, 0);
|
||||
}
|
||||
}
|
||||
RDTSC_STOP(BEPixelRateBackend, 0, 0);
|
||||
AR_END(BEPixelRateBackend, 0);
|
||||
}
|
||||
// optimized backend flow with NULL PS
|
||||
template<uint32_t sampleCountT>
|
||||
void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
|
||||
{
|
||||
RDTSC_START(BENullBackend);
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
AR_BEGIN(BENullBackend, pDC->drawId);
|
||||
///@todo: handle center multisample pattern
|
||||
typedef SwrBackendTraits<sampleCountT, SWR_MSAA_STANDARD_PATTERN> T;
|
||||
RDTSC_START(BESetup);
|
||||
AR_BEGIN(BESetup, pDC->drawId);
|
||||
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
const SWR_RASTSTATE& rastState = pDC->pState->state.rastState;
|
||||
@@ -1043,7 +1053,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
|
||||
|
||||
uint8_t *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil;
|
||||
|
||||
RDTSC_STOP(BESetup, 0, 0);
|
||||
AR_END(BESetup, 0);
|
||||
|
||||
SWR_PS_CONTEXT psContext;
|
||||
for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
|
||||
@@ -1065,7 +1075,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
|
||||
simdmask coverageMask = work.coverageMask[sample] & MASK;
|
||||
if (coverageMask)
|
||||
{
|
||||
RDTSC_START(BEBarycentric);
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
// calculate per sample positions
|
||||
psContext.vX.sample = _simd_add_ps(vXSamplePosUL, T::MultisampleT::vX(sample));
|
||||
psContext.vY.sample = _simd_add_ps(vYSamplePosUL, T::MultisampleT::vY(sample));
|
||||
@@ -1076,7 +1086,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
|
||||
psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
|
||||
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
|
||||
|
||||
RDTSC_STOP(BEBarycentric, 0, 0);
|
||||
AR_END(BEBarycentric, 0);
|
||||
|
||||
// interpolate user clip distance if available
|
||||
if (rastState.clipDistanceMask)
|
||||
@@ -1092,12 +1102,12 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
|
||||
uint8_t *pDepthSample = pDepthBase + RasterTileDepthOffset(sample);
|
||||
uint8_t *pStencilSample = pStencilBase + RasterTileStencilOffset(sample);
|
||||
|
||||
RDTSC_START(BEEarlyDepthTest);
|
||||
AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
|
||||
simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
|
||||
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
|
||||
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
|
||||
RDTSC_STOP(BEEarlyDepthTest, 0, 0);
|
||||
AR_END(BEEarlyDepthTest, 0);
|
||||
|
||||
uint32_t statMask = _simd_movemask_ps(depthPassMask);
|
||||
uint32_t statCount = _mm_popcnt_u32(statMask);
|
||||
@@ -1109,7 +1119,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
|
||||
pStencilBase += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
|
||||
}
|
||||
}
|
||||
RDTSC_STOP(BENullBackend, 0, 0);
|
||||
AR_END(BENullBackend, 0);
|
||||
}
|
||||
|
||||
void InitClearTilesTable()
|
||||
|
||||
@@ -432,15 +432,17 @@ INLINE uint32_t GetNumOMSamples(SWR_MULTISAMPLE_COUNT blendSampleCount)
|
||||
template<typename T>
|
||||
struct PixelRateZTestLoop
|
||||
{
|
||||
PixelRateZTestLoop(DRAW_CONTEXT *DC, const SWR_TRIANGLE_DESC &Work, const BarycentricCoeffs& Coeffs, const API_STATE& apiState,
|
||||
PixelRateZTestLoop(DRAW_CONTEXT *DC, uint32_t _workerId, const SWR_TRIANGLE_DESC &Work, const BarycentricCoeffs& Coeffs, const API_STATE& apiState,
|
||||
uint8_t*& depthBase, uint8_t*& stencilBase, const uint8_t ClipDistanceMask) :
|
||||
work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState),
|
||||
pDC(DC), workerId(_workerId), work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState),
|
||||
clipDistanceMask(ClipDistanceMask), pDepthBase(depthBase), pStencilBase(stencilBase) {};
|
||||
|
||||
INLINE
|
||||
uint32_t operator()(simdscalar& activeLanes, SWR_PS_CONTEXT& psContext,
|
||||
const CORE_BUCKETS BEDepthBucket, uint32_t currentSimdIn8x8 = 0)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
uint32_t statCount = 0;
|
||||
simdscalar anyDepthSamplePassed = _simd_setzero_ps();
|
||||
for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
|
||||
@@ -454,7 +456,7 @@ struct PixelRateZTestLoop
|
||||
continue;
|
||||
}
|
||||
|
||||
RDTSC_START(BEBarycentric);
|
||||
AR_BEGIN(BEBarycentric, pDC->drawId);
|
||||
// calculate per sample positions
|
||||
psContext.vX.sample = _simd_add_ps(psContext.vX.UL, T::MultisampleT::vX(sample));
|
||||
psContext.vY.sample = _simd_add_ps(psContext.vY.UL, T::MultisampleT::vY(sample));
|
||||
@@ -472,7 +474,7 @@ struct PixelRateZTestLoop
|
||||
vZ[sample] = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
|
||||
vZ[sample] = state.pfnQuantizeDepth(vZ[sample]);
|
||||
}
|
||||
RDTSC_STOP(BEBarycentric, 0, 0);
|
||||
AR_END(BEBarycentric, 0);
|
||||
|
||||
///@todo: perspective correct vs non-perspective correct clipping?
|
||||
// if clip distances are enabled, we need to interpolate for each sample
|
||||
@@ -488,13 +490,14 @@ struct PixelRateZTestLoop
|
||||
uint8_t * pStencilSample = pStencilBase + RasterTileStencilOffset(sample);
|
||||
|
||||
// ZTest for this sample
|
||||
RDTSC_START(BEDepthBucket);
|
||||
///@todo Need to uncomment out this bucket.
|
||||
//AR_BEGIN(BEDepthBucket, pDC->drawId);
|
||||
depthPassMask[sample] = vCoverageMask[sample];
|
||||
stencilPassMask[sample] = vCoverageMask[sample];
|
||||
depthPassMask[sample] = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
|
||||
vZ[sample], pDepthSample, vCoverageMask[sample],
|
||||
pStencilSample, &stencilPassMask[sample]);
|
||||
RDTSC_STOP(BEDepthBucket, 0, 0);
|
||||
//AR_END(BEDepthBucket, 0);
|
||||
|
||||
// early-exit if no pixels passed depth or earlyZ is forced on
|
||||
if(psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask[sample]))
|
||||
@@ -525,6 +528,9 @@ struct PixelRateZTestLoop
|
||||
|
||||
private:
|
||||
// functor inputs
|
||||
DRAW_CONTEXT* pDC;
|
||||
uint32_t workerId;
|
||||
|
||||
const SWR_TRIANGLE_DESC& work;
|
||||
const BarycentricCoeffs& coeffs;
|
||||
const API_STATE& state;
|
||||
|
||||
@@ -181,24 +181,27 @@ void Clip(const float *pTriangle, const float *pAttribs, int numAttribs, float *
|
||||
|
||||
void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
|
||||
{
|
||||
RDTSC_START(FEClipTriangles);
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
AR_BEGIN(FEClipTriangles, pDC->drawId);
|
||||
Clipper<3> clipper(workerId, pDC);
|
||||
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
|
||||
RDTSC_STOP(FEClipTriangles, 1, 0);
|
||||
AR_END(FEClipTriangles, 1);
|
||||
}
|
||||
|
||||
void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
|
||||
{
|
||||
RDTSC_START(FEClipLines);
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
AR_BEGIN(FEClipLines, pDC->drawId);
|
||||
Clipper<2> clipper(workerId, pDC);
|
||||
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
|
||||
RDTSC_STOP(FEClipLines, 1, 0);
|
||||
AR_END(FEClipLines, 1);
|
||||
}
|
||||
void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
|
||||
{
|
||||
RDTSC_START(FEClipPoints);
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
AR_BEGIN(FEClipPoints, pDC->drawId);
|
||||
Clipper<1> clipper(workerId, pDC);
|
||||
clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx);
|
||||
RDTSC_STOP(FEClipPoints, 1, 0);
|
||||
AR_END(FEClipPoints, 1);
|
||||
}
|
||||
|
||||
|
||||
@@ -501,6 +501,10 @@ public:
|
||||
// execute the clipper stage
|
||||
void ExecuteStage(PA_STATE& pa, simdvector prim[], uint32_t primMask, simdscalari primId, simdscalari viewportIdx)
|
||||
{
|
||||
SWR_ASSERT(pa.pDC != nullptr);
|
||||
|
||||
SWR_CONTEXT *pContext = pa.pDC->pContext;
|
||||
|
||||
// set up binner based on PA state
|
||||
PFN_PROCESS_PRIMS pfnBinner;
|
||||
switch (pa.binTopology)
|
||||
@@ -548,11 +552,11 @@ public:
|
||||
|
||||
if (clipMask)
|
||||
{
|
||||
RDTSC_START(FEGuardbandClip);
|
||||
AR_BEGIN(FEGuardbandClip, pa.pDC->drawId);
|
||||
// we have to clip tris, execute the clipper, which will also
|
||||
// call the binner
|
||||
ClipSimd(vMask(primMask), vMask(clipMask), pa, primId, viewportIdx);
|
||||
RDTSC_STOP(FEGuardbandClip, 1, 0);
|
||||
AR_END(FEGuardbandClip, 1);
|
||||
}
|
||||
else if (validMask)
|
||||
{
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
#include "common/simdintrin.h"
|
||||
#include "core/threads.h"
|
||||
#include "ringbuffer.h"
|
||||
#include "archrast/archrast.h"
|
||||
|
||||
// x.8 fixed point precision values
|
||||
#define FIXED_POINT_SHIFT 8
|
||||
@@ -515,15 +516,30 @@ struct SWR_CONTEXT
|
||||
#define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.statsFE.name += count; }
|
||||
|
||||
// ArchRast instrumentation framework
|
||||
#ifdef KNOB_ENABLE_AR
|
||||
#define AR_WORKER_CTX pDC->pContext->pArContext[workerId]
|
||||
#define AR_API_CTX pDC->pContext->pArContext[pContext->NumWorkerThreads]
|
||||
#define AR_WORKER_CTX pContext->pArContext[workerId]
|
||||
#define AR_API_CTX pContext->pArContext[pContext->NumWorkerThreads]
|
||||
|
||||
#define AR_BEGIN(ctx, type, id) ArchRast::dispatch(ctx, ArchRast::Start(ArchRast::type, id))
|
||||
#define AR_END(ctx, type, count) ArchRast::dispatch(ctx, ArchRast::End(ArchRast::type, count))
|
||||
#define AR_EVENT(ctx, event) ArchRast::dispatch(ctx, ArchRast::event)
|
||||
#ifdef KNOB_ENABLE_AR
|
||||
#define _AR_BEGIN(ctx, type, id) ArchRast::dispatch(ctx, ArchRast::Start(ArchRast::type, id))
|
||||
#define _AR_END(ctx, type, count) ArchRast::dispatch(ctx, ArchRast::End(ArchRast::type, count))
|
||||
#define _AR_EVENT(ctx, event) ArchRast::dispatch(ctx, ArchRast::event)
|
||||
#else
|
||||
#define AR_BEGIN(ctx, type, id)
|
||||
#define AR_END(ctx, type, id)
|
||||
#define AR_EVENT(ctx, event)
|
||||
#endif
|
||||
#ifdef KNOB_ENABLE_RDTSC
|
||||
#define _AR_BEGIN(ctx, type, id) (void)ctx; RDTSC_START(type)
|
||||
#define _AR_END(ctx, type, id) RDTSC_STOP(type, id, 0)
|
||||
#else
|
||||
#define _AR_BEGIN(ctx, type, id) (void)ctx
|
||||
#define _AR_END(ctx, type, id)
|
||||
#endif
|
||||
#define _AR_EVENT(ctx, event)
|
||||
#endif
|
||||
|
||||
// Use these macros for api thread.
|
||||
#define AR_API_BEGIN(type, id) _AR_BEGIN(AR_API_CTX, type, id)
|
||||
#define AR_API_END(type, count) _AR_END(AR_API_CTX, type, count)
|
||||
#define AR_API_EVENT(event) _AR_EVENT(AR_API_CTX, event)
|
||||
|
||||
// Use these macros for worker threads.
|
||||
#define AR_BEGIN(type, id) _AR_BEGIN(AR_WORKER_CTX, type, id)
|
||||
#define AR_END(type, count) _AR_END(AR_WORKER_CTX, type, count)
|
||||
#define AR_EVENT(event) _AR_EVENT(AR_WORKER_CTX, event)
|
||||
|
||||
@@ -130,7 +130,7 @@ void ProcessStoreTiles(
|
||||
uint32_t workerId,
|
||||
void *pUserData)
|
||||
{
|
||||
RDTSC_START(FEProcessStoreTiles);
|
||||
AR_BEGIN(FEProcessStoreTiles, pDC->drawId);
|
||||
MacroTileMgr *pTileMgr = pDC->pTileMgr;
|
||||
STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pUserData;
|
||||
|
||||
@@ -155,7 +155,7 @@ void ProcessStoreTiles(
|
||||
}
|
||||
}
|
||||
|
||||
RDTSC_STOP(FEProcessStoreTiles, 0, pDC->drawId);
|
||||
AR_END(FEProcessStoreTiles, 0);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@@ -171,7 +171,7 @@ void ProcessDiscardInvalidateTiles(
|
||||
uint32_t workerId,
|
||||
void *pUserData)
|
||||
{
|
||||
RDTSC_START(FEProcessInvalidateTiles);
|
||||
AR_BEGIN(FEProcessInvalidateTiles, pDC->drawId);
|
||||
DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pUserData;
|
||||
MacroTileMgr *pTileMgr = pDC->pTileMgr;
|
||||
|
||||
@@ -210,7 +210,7 @@ void ProcessDiscardInvalidateTiles(
|
||||
}
|
||||
}
|
||||
|
||||
RDTSC_STOP(FEProcessInvalidateTiles, 0, pDC->drawId);
|
||||
AR_END(FEProcessInvalidateTiles, 0);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@@ -542,7 +542,9 @@ static void StreamOut(
|
||||
uint32_t* pPrimData,
|
||||
uint32_t streamIndex)
|
||||
{
|
||||
RDTSC_START(FEStreamout);
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
AR_BEGIN(FEStreamout, pDC->drawId);
|
||||
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
const SWR_STREAMOUT_STATE &soState = state.soState;
|
||||
@@ -615,7 +617,7 @@ static void StreamOut(
|
||||
UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded);
|
||||
UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten);
|
||||
|
||||
RDTSC_STOP(FEStreamout, 1, 0);
|
||||
AR_END(FEStreamout, 1);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@@ -698,7 +700,9 @@ static void GeometryShaderStage(
|
||||
uint32_t* pSoPrimData,
|
||||
simdscalari primID)
|
||||
{
|
||||
RDTSC_START(FEGeometryShader);
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
AR_BEGIN(FEGeometryShader, pDC->drawId);
|
||||
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
const SWR_GS_STATE* pState = &state.gsState;
|
||||
@@ -895,7 +899,7 @@ static void GeometryShaderStage(
|
||||
UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount);
|
||||
UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated);
|
||||
|
||||
RDTSC_STOP(FEGeometryShader, 1, 0);
|
||||
AR_END(FEGeometryShader, 1);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@@ -990,6 +994,7 @@ static void TessellationStages(
|
||||
uint32_t* pSoPrimData,
|
||||
simdscalari primID)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
const SWR_TS_STATE& tsState = state.tsState;
|
||||
|
||||
@@ -1053,9 +1058,9 @@ static void TessellationStages(
|
||||
hsContext.mask = GenerateMask(numPrims);
|
||||
|
||||
// Run the HS
|
||||
RDTSC_START(FEHullShader);
|
||||
AR_BEGIN(FEHullShader, pDC->drawId);
|
||||
state.pfnHsFunc(GetPrivateState(pDC), &hsContext);
|
||||
RDTSC_STOP(FEHullShader, 0, 0);
|
||||
AR_END(FEHullShader, 0);
|
||||
|
||||
UPDATE_STAT_FE(HsInvocations, numPrims);
|
||||
|
||||
@@ -1065,9 +1070,9 @@ static void TessellationStages(
|
||||
{
|
||||
// Run Tessellator
|
||||
SWR_TS_TESSELLATED_DATA tsData = { 0 };
|
||||
RDTSC_START(FETessellation);
|
||||
AR_BEGIN(FETessellation, pDC->drawId);
|
||||
TSTessellate(tsCtx, hsContext.pCPout[p].tessFactors, tsData);
|
||||
RDTSC_STOP(FETessellation, 0, 0);
|
||||
AR_END(FETessellation, 0);
|
||||
|
||||
if (tsData.NumPrimitives == 0)
|
||||
{
|
||||
@@ -1107,9 +1112,9 @@ static void TessellationStages(
|
||||
{
|
||||
dsContext.mask = GenerateMask(tsData.NumDomainPoints - dsInvocations);
|
||||
|
||||
RDTSC_START(FEDomainShader);
|
||||
AR_BEGIN(FEDomainShader, pDC->drawId);
|
||||
state.pfnDsFunc(GetPrivateState(pDC), &dsContext);
|
||||
RDTSC_STOP(FEDomainShader, 0, 0);
|
||||
AR_END(FEDomainShader, 0);
|
||||
|
||||
dsInvocations += KNOB_SIMD_WIDTH;
|
||||
}
|
||||
@@ -1142,12 +1147,12 @@ static void TessellationStages(
|
||||
if (HasRastT::value)
|
||||
{
|
||||
simdvector prim[3]; // Only deal with triangles, lines, or points
|
||||
RDTSC_START(FEPAAssemble);
|
||||
AR_BEGIN(FEPAAssemble, pDC->drawId);
|
||||
#if SWR_ENABLE_ASSERTS
|
||||
bool assemble =
|
||||
#endif
|
||||
tessPa.Assemble(VERTEX_POSITION_SLOT, prim);
|
||||
RDTSC_STOP(FEPAAssemble, 1, 0);
|
||||
AR_END(FEPAAssemble, 1);
|
||||
SWR_ASSERT(assemble);
|
||||
|
||||
SWR_ASSERT(pfnClipFunc);
|
||||
@@ -1196,7 +1201,7 @@ void ProcessDraw(
|
||||
}
|
||||
#endif
|
||||
|
||||
RDTSC_START(FEProcessDraw);
|
||||
AR_BEGIN(FEProcessDraw, pDC->drawId);
|
||||
|
||||
DRAW_WORK& work = *(DRAW_WORK*)pUserData;
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
@@ -1334,9 +1339,9 @@ void ProcessDraw(
|
||||
{
|
||||
|
||||
// 1. Execute FS/VS for a single SIMD.
|
||||
RDTSC_START(FEFetchShader);
|
||||
AR_BEGIN(FEFetchShader, pDC->drawId);
|
||||
state.pfnFetchFunc(fetchInfo, vin);
|
||||
RDTSC_STOP(FEFetchShader, 0, 0);
|
||||
AR_END(FEFetchShader, 0);
|
||||
|
||||
// forward fetch generated vertex IDs to the vertex shader
|
||||
vsContext.VertexID = fetchInfo.VertexID;
|
||||
@@ -1356,9 +1361,9 @@ void ProcessDraw(
|
||||
if (!KNOB_TOSS_FETCH)
|
||||
#endif
|
||||
{
|
||||
RDTSC_START(FEVertexShader);
|
||||
AR_BEGIN(FEVertexShader, pDC->drawId);
|
||||
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext);
|
||||
RDTSC_STOP(FEVertexShader, 0, 0);
|
||||
AR_END(FEVertexShader, 0);
|
||||
|
||||
UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
|
||||
}
|
||||
@@ -1369,9 +1374,9 @@ void ProcessDraw(
|
||||
{
|
||||
simdvector prim[MAX_NUM_VERTS_PER_PRIM];
|
||||
// PaAssemble returns false if there is not enough verts to assemble.
|
||||
RDTSC_START(FEPAAssemble);
|
||||
AR_BEGIN(FEPAAssemble, pDC->drawId);
|
||||
bool assemble = pa.Assemble(VERTEX_POSITION_SLOT, prim);
|
||||
RDTSC_STOP(FEPAAssemble, 1, 0);
|
||||
AR_END(FEPAAssemble, 1);
|
||||
|
||||
#if KNOB_ENABLE_TOSS_POINTS
|
||||
if (!KNOB_TOSS_FETCH)
|
||||
@@ -1428,7 +1433,7 @@ void ProcessDraw(
|
||||
pa.Reset();
|
||||
}
|
||||
|
||||
RDTSC_STOP(FEProcessDraw, numPrims * work.numInstances, pDC->drawId);
|
||||
AR_END(FEProcessDraw, numPrims * work.numInstances);
|
||||
}
|
||||
|
||||
struct FEDrawChooser
|
||||
@@ -1787,7 +1792,9 @@ void BinTriangles(
|
||||
simdscalari primID,
|
||||
simdscalari viewportIdx)
|
||||
{
|
||||
RDTSC_START(FEBinTriangles);
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
AR_BEGIN(FEBinTriangles, pDC->drawId);
|
||||
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
const SWR_RASTSTATE& rastState = state.rastState;
|
||||
@@ -2168,7 +2175,7 @@ void BinTriangles(
|
||||
}
|
||||
|
||||
endBinTriangles:
|
||||
RDTSC_STOP(FEBinTriangles, 1, 0);
|
||||
AR_END(FEBinTriangles, 1);
|
||||
}
|
||||
|
||||
struct FEBinTrianglesChooser
|
||||
@@ -2204,7 +2211,9 @@ void BinPoints(
|
||||
simdscalari primID,
|
||||
simdscalari viewportIdx)
|
||||
{
|
||||
RDTSC_START(FEBinPoints);
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
AR_BEGIN(FEBinPoints, pDC->drawId);
|
||||
|
||||
simdvector& primVerts = prim[0];
|
||||
|
||||
@@ -2519,10 +2528,7 @@ void BinPoints(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
RDTSC_STOP(FEBinPoints, 1, 0);
|
||||
AR_END(FEBinPoints, 1);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@@ -2542,7 +2548,9 @@ void BinLines(
|
||||
simdscalari primID,
|
||||
simdscalari viewportIdx)
|
||||
{
|
||||
RDTSC_START(FEBinLines);
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
AR_BEGIN(FEBinLines, pDC->drawId);
|
||||
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
const SWR_RASTSTATE& rastState = state.rastState;
|
||||
@@ -2765,5 +2773,5 @@ void BinLines(
|
||||
|
||||
endBinLines:
|
||||
|
||||
RDTSC_STOP(FEBinLines, 1, 0);
|
||||
AR_END(FEBinLines, 1);
|
||||
}
|
||||
|
||||
@@ -758,7 +758,7 @@ INLINE bool TrivialAcceptTest<std::false_type>(const int mask0, const int mask1,
|
||||
template <typename RT, typename ValidEdgeMaskT, typename InputCoverageT>
|
||||
struct GenerateSVInnerCoverage
|
||||
{
|
||||
INLINE GenerateSVInnerCoverage(DRAW_CONTEXT*, EDGE*, double*, uint64_t &){};
|
||||
INLINE GenerateSVInnerCoverage(DRAW_CONTEXT*, uint32_t, EDGE*, double*, uint64_t &){};
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@@ -768,8 +768,10 @@ struct GenerateSVInnerCoverage
|
||||
template <typename RT>
|
||||
struct GenerateSVInnerCoverage<RT, AllEdgesValidT, InnerConservativeCoverageT>
|
||||
{
|
||||
INLINE GenerateSVInnerCoverage(DRAW_CONTEXT* pDC, EDGE* pRastEdges, double* pStartQuadEdges, uint64_t &innerCoverageMask)
|
||||
INLINE GenerateSVInnerCoverage(DRAW_CONTEXT* pDC, uint32_t workerId, EDGE* pRastEdges, double* pStartQuadEdges, uint64_t &innerCoverageMask)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
double startQuadEdgesAdj[RT::NumEdgesT::value];
|
||||
for(uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
|
||||
{
|
||||
@@ -777,9 +779,9 @@ struct GenerateSVInnerCoverage<RT, AllEdgesValidT, InnerConservativeCoverageT>
|
||||
}
|
||||
|
||||
// not trivial accept or reject, must rasterize full tile
|
||||
RDTSC_START(BERasterizePartial);
|
||||
AR_BEGIN(BERasterizePartial, pDC->drawId);
|
||||
innerCoverageMask = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdgesAdj, pRastEdges);
|
||||
RDTSC_STOP(BERasterizePartial, 0, 0);
|
||||
AR_END(BERasterizePartial, 0);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -835,6 +837,7 @@ struct UpdateEdgeMasksInnerConservative<RT, ValidEdgeMaskT, InnerConservativeCov
|
||||
template <typename RT>
|
||||
void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
const TRIANGLE_WORK_DESC &workDesc = *((TRIANGLE_WORK_DESC*)pDesc);
|
||||
#if KNOB_ENABLE_TOSS_POINTS
|
||||
if (KNOB_TOSS_BIN_TRIS)
|
||||
@@ -842,9 +845,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
RDTSC_START(BERasterizeTriangle);
|
||||
AR_BEGIN(BERasterizeTriangle, pDC->drawId);
|
||||
AR_BEGIN(BETriangleSetup, pDC->drawId);
|
||||
|
||||
RDTSC_START(BETriangleSetup);
|
||||
const API_STATE &state = GetApiState(pDC);
|
||||
const SWR_RASTSTATE &rastState = state.rastState;
|
||||
const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
|
||||
@@ -1009,7 +1012,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
|
||||
SWR_ASSERT(intersect.xmin <= intersect.xmax && intersect.ymin <= intersect.ymax && intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 && intersect.ymax >= 0);
|
||||
|
||||
RDTSC_STOP(BETriangleSetup, 0, pDC->drawId);
|
||||
AR_END(BETriangleSetup, 0);
|
||||
|
||||
// update triangle desc
|
||||
uint32_t minTileX = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
|
||||
@@ -1022,11 +1025,11 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
if (numTilesX == 0 || numTilesY == 0)
|
||||
{
|
||||
RDTSC_EVENT(BEEmptyTriangle, 1, 0);
|
||||
RDTSC_STOP(BERasterizeTriangle, 1, 0);
|
||||
AR_END(BERasterizeTriangle, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
RDTSC_START(BEStepSetup);
|
||||
AR_BEGIN(BEStepSetup, pDC->drawId);
|
||||
|
||||
// Step to pixel center of top-left pixel of the triangle bbox
|
||||
// Align intersect bbox (top/left) to raster tile's (top/left).
|
||||
@@ -1134,7 +1137,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
}
|
||||
}
|
||||
|
||||
RDTSC_STOP(BEStepSetup, 0, pDC->drawId);
|
||||
AR_END(BEStepSetup, 0);
|
||||
|
||||
uint32_t tY = minTileY;
|
||||
uint32_t tX = minTileX;
|
||||
@@ -1226,14 +1229,14 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
}
|
||||
|
||||
// not trivial accept or reject, must rasterize full tile
|
||||
RDTSC_START(BERasterizePartial);
|
||||
AR_BEGIN(BERasterizePartial, pDC->drawId);
|
||||
triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdges, rastEdges);
|
||||
RDTSC_STOP(BERasterizePartial, 0, 0);
|
||||
AR_END(BERasterizePartial, 0);
|
||||
|
||||
triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum];
|
||||
|
||||
// Output SV InnerCoverage, if needed
|
||||
GenerateSVInnerCoverage<RT, typename RT::ValidEdgeMaskT, typename RT::InputCoverageT>(pDC, rastEdges, startQuadEdges, triDesc.innerCoverageMask);
|
||||
GenerateSVInnerCoverage<RT, typename RT::ValidEdgeMaskT, typename RT::InputCoverageT>(pDC, workerId, rastEdges, startQuadEdges, triDesc.innerCoverageMask);
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -1264,9 +1267,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
UnrollerL<1, RT::MT::numSamples, 1>::step(copyCoverage);
|
||||
}
|
||||
|
||||
RDTSC_START(BEPixelBackend);
|
||||
AR_BEGIN(BEPixelBackend, pDC->drawId);
|
||||
backendFuncs.pfnBackend(pDC, workerId, tileX << KNOB_TILE_X_DIM_SHIFT, tileY << KNOB_TILE_Y_DIM_SHIFT, triDesc, renderBuffers);
|
||||
RDTSC_STOP(BEPixelBackend, 0, 0);
|
||||
AR_END(BEPixelBackend, 0);
|
||||
}
|
||||
|
||||
// step to the next tile in X
|
||||
@@ -1285,7 +1288,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
StepRasterTileY<RT>(state.psState.numRenderTargets, renderBuffers, currentRenderBufferRow);
|
||||
}
|
||||
|
||||
RDTSC_STOP(BERasterizeTriangle, 1, 0);
|
||||
AR_END(BERasterizeTriangle, 1);
|
||||
}
|
||||
|
||||
void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData)
|
||||
@@ -1420,6 +1423,8 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile,
|
||||
|
||||
void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
|
||||
#if KNOB_ENABLE_TOSS_POINTS
|
||||
if (KNOB_TOSS_BIN_TRIS)
|
||||
{
|
||||
@@ -1475,9 +1480,9 @@ void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTi
|
||||
GetRenderHotTiles(pDC, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT,
|
||||
renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
|
||||
|
||||
RDTSC_START(BEPixelBackend);
|
||||
AR_BEGIN(BEPixelBackend, pDC->drawId);
|
||||
backendFuncs.pfnBackend(pDC, workerId, tileAlignedX, tileAlignedY, triDesc, renderBuffers);
|
||||
RDTSC_STOP(BEPixelBackend, 0, 0);
|
||||
AR_END(BEPixelBackend, 0);
|
||||
}
|
||||
|
||||
// Get pointers to hot tile memory for color RT, depth, stencil
|
||||
@@ -1561,6 +1566,7 @@ INLINE void StepRasterTileY(uint32_t NumRT, RenderOutputBuffers &buffers, Render
|
||||
|
||||
void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
const TRIANGLE_WORK_DESC &workDesc = *((TRIANGLE_WORK_DESC*)pData);
|
||||
#if KNOB_ENABLE_TOSS_POINTS
|
||||
if (KNOB_TOSS_BIN_TRIS)
|
||||
@@ -1570,7 +1576,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
|
||||
#endif
|
||||
|
||||
// bloat line to two tris and call the triangle rasterizer twice
|
||||
RDTSC_START(BERasterizeLine);
|
||||
AR_BEGIN(BERasterizeLine, pDC->drawId);
|
||||
|
||||
const API_STATE &state = GetApiState(pDC);
|
||||
const SWR_RASTSTATE &rastState = state.rastState;
|
||||
@@ -1763,7 +1769,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
|
||||
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
|
||||
}
|
||||
|
||||
RDTSC_STOP(BERasterizeLine, 1, 0);
|
||||
AR_END(BERasterizeLine, 1);
|
||||
}
|
||||
|
||||
struct RasterizerChooser
|
||||
|
||||
@@ -501,7 +501,7 @@ void WorkOnFifoBE(
|
||||
{
|
||||
BE_WORK *pWork;
|
||||
|
||||
RDTSC_START(WorkerFoundWork);
|
||||
AR_BEGIN(WorkerFoundWork, pDC->drawId);
|
||||
|
||||
uint32_t numWorkItems = tile->getNumQueued();
|
||||
SWR_ASSERT(numWorkItems);
|
||||
@@ -510,7 +510,7 @@ void WorkOnFifoBE(
|
||||
SWR_ASSERT(pWork);
|
||||
if (pWork->type == DRAW)
|
||||
{
|
||||
pContext->pHotTileMgr->InitializeHotTiles(pContext, pDC, tileID);
|
||||
pContext->pHotTileMgr->InitializeHotTiles(pContext, pDC, workerId, tileID);
|
||||
}
|
||||
|
||||
while ((pWork = tile->peek()) != nullptr)
|
||||
@@ -518,7 +518,7 @@ void WorkOnFifoBE(
|
||||
pWork->pfnWork(pDC, workerId, tileID, &pWork->desc);
|
||||
tile->dequeue();
|
||||
}
|
||||
RDTSC_STOP(WorkerFoundWork, numWorkItems, pDC->drawId);
|
||||
AR_END(WorkerFoundWork, numWorkItems);
|
||||
|
||||
_ReadWriteBarrier();
|
||||
|
||||
@@ -735,12 +735,12 @@ DWORD workerThreadMain(LPVOID pData)
|
||||
break;
|
||||
}
|
||||
|
||||
RDTSC_START(WorkerWaitForThreadEvent);
|
||||
AR_BEGIN(WorkerWaitForThreadEvent, 0);
|
||||
|
||||
pContext->FifosNotEmpty.wait(lock);
|
||||
lock.unlock();
|
||||
|
||||
RDTSC_STOP(WorkerWaitForThreadEvent, 0, 0);
|
||||
AR_END(WorkerWaitForThreadEvent, 0);
|
||||
|
||||
if (pContext->threadPool.inThreadShutdown)
|
||||
{
|
||||
@@ -750,9 +750,9 @@ DWORD workerThreadMain(LPVOID pData)
|
||||
|
||||
if (IsBEThread)
|
||||
{
|
||||
RDTSC_START(WorkerWorkOnFifoBE);
|
||||
AR_BEGIN(WorkerWorkOnFifoBE, 0);
|
||||
WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask);
|
||||
RDTSC_STOP(WorkerWorkOnFifoBE, 0, 0);
|
||||
AR_END(WorkerWorkOnFifoBE, 0);
|
||||
|
||||
WorkOnCompute(pContext, workerId, curDrawBE);
|
||||
}
|
||||
|
||||
@@ -281,7 +281,7 @@ void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile)
|
||||
/// to avoid unnecessary setup every triangle
|
||||
/// @todo support deferred clear
|
||||
/// @param pCreateInfo - pointer to creation info.
|
||||
void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID)
|
||||
void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID)
|
||||
{
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
|
||||
@@ -301,19 +301,19 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui
|
||||
|
||||
if (pHotTile->state == HOTTILE_INVALID)
|
||||
{
|
||||
RDTSC_START(BELoadTiles);
|
||||
AR_BEGIN(BELoadTiles, pDC->drawId);
|
||||
// invalid hottile before draw requires a load from surface before we can draw to it
|
||||
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
|
||||
pHotTile->state = HOTTILE_DIRTY;
|
||||
RDTSC_STOP(BELoadTiles, 0, 0);
|
||||
AR_END(BELoadTiles, 0);
|
||||
}
|
||||
else if (pHotTile->state == HOTTILE_CLEAR)
|
||||
{
|
||||
RDTSC_START(BELoadTiles);
|
||||
AR_BEGIN(BELoadTiles, pDC->drawId);
|
||||
// Clear the tile.
|
||||
ClearColorHotTile(pHotTile);
|
||||
pHotTile->state = HOTTILE_DIRTY;
|
||||
RDTSC_STOP(BELoadTiles, 0, 0);
|
||||
AR_END(BELoadTiles, 0);
|
||||
}
|
||||
colorHottileEnableMask &= ~(1 << rtSlot);
|
||||
}
|
||||
@@ -324,19 +324,19 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui
|
||||
HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
|
||||
if (pHotTile->state == HOTTILE_INVALID)
|
||||
{
|
||||
RDTSC_START(BELoadTiles);
|
||||
AR_BEGIN(BELoadTiles, pDC->drawId);
|
||||
// invalid hottile before draw requires a load from surface before we can draw to it
|
||||
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
|
||||
pHotTile->state = HOTTILE_DIRTY;
|
||||
RDTSC_STOP(BELoadTiles, 0, 0);
|
||||
AR_END(BELoadTiles, 0);
|
||||
}
|
||||
else if (pHotTile->state == HOTTILE_CLEAR)
|
||||
{
|
||||
RDTSC_START(BELoadTiles);
|
||||
AR_BEGIN(BELoadTiles, pDC->drawId);
|
||||
// Clear the tile.
|
||||
ClearDepthHotTile(pHotTile);
|
||||
pHotTile->state = HOTTILE_DIRTY;
|
||||
RDTSC_STOP(BELoadTiles, 0, 0);
|
||||
AR_END(BELoadTiles, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -346,19 +346,19 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui
|
||||
HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
|
||||
if (pHotTile->state == HOTTILE_INVALID)
|
||||
{
|
||||
RDTSC_START(BELoadTiles);
|
||||
AR_BEGIN(BELoadTiles, pDC->drawId);
|
||||
// invalid hottile before draw requires a load from surface before we can draw to it
|
||||
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
|
||||
pHotTile->state = HOTTILE_DIRTY;
|
||||
RDTSC_STOP(BELoadTiles, 0, 0);
|
||||
AR_END(BELoadTiles, 0);
|
||||
}
|
||||
else if (pHotTile->state == HOTTILE_CLEAR)
|
||||
{
|
||||
RDTSC_START(BELoadTiles);
|
||||
AR_BEGIN(BELoadTiles, pDC->drawId);
|
||||
// Clear the tile.
|
||||
ClearStencilHotTile(pHotTile);
|
||||
pHotTile->state = HOTTILE_DIRTY;
|
||||
RDTSC_STOP(BELoadTiles, 0, 0);
|
||||
AR_END(BELoadTiles, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -291,7 +291,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID);
|
||||
void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID);
|
||||
|
||||
HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1,
|
||||
uint32_t renderTargetArrayIndex = 0);
|
||||
|
||||
Reference in New Issue
Block a user