swr: [rasterizer core] Fix Compute workitem retirement

This commit is contained in:
Tim Rowley
2016-03-22 17:28:06 -06:00
parent 813e89c0cc
commit 0767e820fd
4 changed files with 22 additions and 31 deletions
+14 -23
View File
@@ -160,20 +160,12 @@ void WakeAllThreads(SWR_CONTEXT *pContext)
template<bool IsDraw>
void QueueWork(SWR_CONTEXT *pContext)
{
if (IsDraw)
{
// Each worker thread looks at a DC for both FE and BE work at different times and so we
// multiply threadDone by 2. When the threadDone counter has reached 0 then all workers
// have moved past this DC. (i.e. Each worker has checked this DC for both FE and BE work and
// then moved on if all work is done.)
pContext->pCurDrawContext->threadsDone =
pContext->NumWorkerThreads ? pContext->NumWorkerThreads * 2 : 2;
}
else
{
pContext->pCurDrawContext->threadsDone =
pContext->NumWorkerThreads ? pContext->NumWorkerThreads : 1;
}
// Each worker thread looks at a DC for both FE and BE work at different times and so we
// multiply threadDone by 2. When the threadDone counter has reached 0 then all workers
// have moved past this DC. (i.e. Each worker has checked this DC for both FE and BE work and
// then moved on if all work is done.)
pContext->pCurDrawContext->threadsDone =
pContext->NumWorkerThreads ? pContext->NumWorkerThreads * 2 : 2;
_ReadWriteBarrier();
{
@@ -201,10 +193,7 @@ void QueueWork(SWR_CONTEXT *pContext)
}
// Dequeue the work here, if not already done, since we're single threaded (i.e. no workers).
if (!pContext->dcRing.IsEmpty())
{
pContext->dcRing.Dequeue();
}
while (CompleteDrawContext(pContext, pContext->pCurDrawContext) > 0) {}
// restore csr
_mm_setcsr(mxcsr);
@@ -252,8 +241,6 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
uint32_t dsIndex = pContext->curStateId % KNOB_MAX_DRAWS_IN_FLIGHT;
pCurDrawContext->pState = &pContext->dsRing[dsIndex];
auto& stateArena = *(pCurDrawContext->pState->pArena);
// Copy previous state to current state.
if (pContext->pPrevDrawContext)
{
@@ -266,7 +253,9 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
{
CopyState(*pCurDrawContext->pState, *pPrevDrawContext->pState);
stateArena.Reset(true); // Reset memory.
// Should have been cleaned up previously
SWR_ASSERT(pCurDrawContext->pState->pArena->IsEmpty() == true);
pCurDrawContext->pState->pPrivateState = nullptr;
pContext->curStateId++; // Progress state ring index forward.
@@ -276,16 +265,18 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
// If its a split draw then just copy the state pointer over
// since its the same draw.
pCurDrawContext->pState = pPrevDrawContext->pState;
SWR_ASSERT(pPrevDrawContext->cleanupState == false);
}
}
else
{
stateArena.Reset(); // Reset memory.
SWR_ASSERT(pCurDrawContext->pState->pArena->IsEmpty() == true);
pContext->curStateId++; // Progress state ring index forward.
}
SWR_ASSERT(pCurDrawContext->pArena->IsEmpty() == true);
pCurDrawContext->dependency = 0;
pCurDrawContext->pArena->Reset();
pCurDrawContext->pContext = pContext;
pCurDrawContext->isCompute = false; // Dispatch has to set this to true.
@@ -51,10 +51,7 @@ public:
}
};
static const size_t ARENA_BLOCK_SHIFT = 5;
static const size_t ARENA_BLOCK_ALIGN = KNOB_SIMD_WIDTH * 4;
static_assert((1U << ARENA_BLOCK_SHIFT) == ARENA_BLOCK_ALIGN,
"Invalid value for ARENA_BLOCK_ALIGN/SHIFT");
static const size_t ARENA_BLOCK_ALIGN = 64;
struct ArenaBlock
{
@@ -65,7 +62,7 @@ static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN,
"Increase BLOCK_ALIGN size");
// Caching Allocator for Arena
template<uint32_t NumBucketsT = 1, uint32_t StartBucketBitT = 20>
template<uint32_t NumBucketsT = 4, uint32_t StartBucketBitT = 16>
struct CachingAllocatorT : DefaultAllocator
{
static uint32_t GetBucketId(size_t blockSize)
@@ -279,11 +279,10 @@ bool CheckDependency(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint64_t lastReti
return (pDC->dependency > lastRetiredDraw);
}
INLINE void CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
INLINE int64_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
{
int64_t result = InterlockedDecrement64(&pDC->threadsDone);
SWR_ASSERT(result >= 0);
if (result == 0)
{
@@ -299,6 +298,8 @@ INLINE void CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
pContext->dcRing.Dequeue(); // Remove from tail
}
return result;
}
INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint64_t& curDrawBE)
@@ -34,6 +34,7 @@
typedef std::thread* THREAD_PTR;
struct SWR_CONTEXT;
struct DRAW_CONTEXT;
struct THREAD_DATA
{
@@ -63,3 +64,4 @@ void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE, int numaNode);
void WorkOnFifoBE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE, TileSet &usedTiles);
void WorkOnCompute(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE);
int64_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC);