swr: [rasterizer core] Fix Compute workitem retirement
This commit is contained in:
@@ -160,20 +160,12 @@ void WakeAllThreads(SWR_CONTEXT *pContext)
|
||||
template<bool IsDraw>
|
||||
void QueueWork(SWR_CONTEXT *pContext)
|
||||
{
|
||||
if (IsDraw)
|
||||
{
|
||||
// Each worker thread looks at a DC for both FE and BE work at different times and so we
|
||||
// multiply threadDone by 2. When the threadDone counter has reached 0 then all workers
|
||||
// have moved past this DC. (i.e. Each worker has checked this DC for both FE and BE work and
|
||||
// then moved on if all work is done.)
|
||||
pContext->pCurDrawContext->threadsDone =
|
||||
pContext->NumWorkerThreads ? pContext->NumWorkerThreads * 2 : 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
pContext->pCurDrawContext->threadsDone =
|
||||
pContext->NumWorkerThreads ? pContext->NumWorkerThreads : 1;
|
||||
}
|
||||
// Each worker thread looks at a DC for both FE and BE work at different times and so we
|
||||
// multiply threadDone by 2. When the threadDone counter has reached 0 then all workers
|
||||
// have moved past this DC. (i.e. Each worker has checked this DC for both FE and BE work and
|
||||
// then moved on if all work is done.)
|
||||
pContext->pCurDrawContext->threadsDone =
|
||||
pContext->NumWorkerThreads ? pContext->NumWorkerThreads * 2 : 2;
|
||||
|
||||
_ReadWriteBarrier();
|
||||
{
|
||||
@@ -201,10 +193,7 @@ void QueueWork(SWR_CONTEXT *pContext)
|
||||
}
|
||||
|
||||
// Dequeue the work here, if not already done, since we're single threaded (i.e. no workers).
|
||||
if (!pContext->dcRing.IsEmpty())
|
||||
{
|
||||
pContext->dcRing.Dequeue();
|
||||
}
|
||||
while (CompleteDrawContext(pContext, pContext->pCurDrawContext) > 0) {}
|
||||
|
||||
// restore csr
|
||||
_mm_setcsr(mxcsr);
|
||||
@@ -252,8 +241,6 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
|
||||
uint32_t dsIndex = pContext->curStateId % KNOB_MAX_DRAWS_IN_FLIGHT;
|
||||
pCurDrawContext->pState = &pContext->dsRing[dsIndex];
|
||||
|
||||
auto& stateArena = *(pCurDrawContext->pState->pArena);
|
||||
|
||||
// Copy previous state to current state.
|
||||
if (pContext->pPrevDrawContext)
|
||||
{
|
||||
@@ -266,7 +253,9 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
|
||||
{
|
||||
CopyState(*pCurDrawContext->pState, *pPrevDrawContext->pState);
|
||||
|
||||
stateArena.Reset(true); // Reset memory.
|
||||
// Should have been cleaned up previously
|
||||
SWR_ASSERT(pCurDrawContext->pState->pArena->IsEmpty() == true);
|
||||
|
||||
pCurDrawContext->pState->pPrivateState = nullptr;
|
||||
|
||||
pContext->curStateId++; // Progress state ring index forward.
|
||||
@@ -276,16 +265,18 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
|
||||
// If its a split draw then just copy the state pointer over
|
||||
// since its the same draw.
|
||||
pCurDrawContext->pState = pPrevDrawContext->pState;
|
||||
SWR_ASSERT(pPrevDrawContext->cleanupState == false);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
stateArena.Reset(); // Reset memory.
|
||||
SWR_ASSERT(pCurDrawContext->pState->pArena->IsEmpty() == true);
|
||||
pContext->curStateId++; // Progress state ring index forward.
|
||||
}
|
||||
|
||||
SWR_ASSERT(pCurDrawContext->pArena->IsEmpty() == true);
|
||||
|
||||
pCurDrawContext->dependency = 0;
|
||||
pCurDrawContext->pArena->Reset();
|
||||
pCurDrawContext->pContext = pContext;
|
||||
pCurDrawContext->isCompute = false; // Dispatch has to set this to true.
|
||||
|
||||
|
||||
@@ -51,10 +51,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
static const size_t ARENA_BLOCK_SHIFT = 5;
|
||||
static const size_t ARENA_BLOCK_ALIGN = KNOB_SIMD_WIDTH * 4;
|
||||
static_assert((1U << ARENA_BLOCK_SHIFT) == ARENA_BLOCK_ALIGN,
|
||||
"Invalid value for ARENA_BLOCK_ALIGN/SHIFT");
|
||||
static const size_t ARENA_BLOCK_ALIGN = 64;
|
||||
|
||||
struct ArenaBlock
|
||||
{
|
||||
@@ -65,7 +62,7 @@ static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN,
|
||||
"Increase BLOCK_ALIGN size");
|
||||
|
||||
// Caching Allocator for Arena
|
||||
template<uint32_t NumBucketsT = 1, uint32_t StartBucketBitT = 20>
|
||||
template<uint32_t NumBucketsT = 4, uint32_t StartBucketBitT = 16>
|
||||
struct CachingAllocatorT : DefaultAllocator
|
||||
{
|
||||
static uint32_t GetBucketId(size_t blockSize)
|
||||
|
||||
@@ -279,11 +279,10 @@ bool CheckDependency(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint64_t lastReti
|
||||
return (pDC->dependency > lastRetiredDraw);
|
||||
}
|
||||
|
||||
|
||||
|
||||
INLINE void CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
|
||||
INLINE int64_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
|
||||
{
|
||||
int64_t result = InterlockedDecrement64(&pDC->threadsDone);
|
||||
SWR_ASSERT(result >= 0);
|
||||
|
||||
if (result == 0)
|
||||
{
|
||||
@@ -299,6 +298,8 @@ INLINE void CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
|
||||
|
||||
pContext->dcRing.Dequeue(); // Remove from tail
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint64_t& curDrawBE)
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
typedef std::thread* THREAD_PTR;
|
||||
|
||||
struct SWR_CONTEXT;
|
||||
struct DRAW_CONTEXT;
|
||||
|
||||
struct THREAD_DATA
|
||||
{
|
||||
@@ -63,3 +64,4 @@ void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
|
||||
void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawFE, int numaNode);
|
||||
void WorkOnFifoBE(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE, TileSet &usedTiles);
|
||||
void WorkOnCompute(SWR_CONTEXT *pContext, uint32_t workerId, uint64_t &curDrawBE);
|
||||
int64_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC);
|
||||
Reference in New Issue
Block a user