swr/rast: Optimize late/bindless JIT of samplers

Add per-worker thread private data to all shader calls
Add per-worker sampler cache and jit context
Add late LoadTexel JIT support
Add per-worker-thread Sampler / LoadTexel JIT

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
George Kyriazis
2018-04-10 01:05:19 -05:00
parent ec7154abc0
commit 0899122c03
25 changed files with 213 additions and 105 deletions
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -122,6 +122,11 @@ HANDLE SwrCreateContext(
pContext->apiThreadInfo.numAPIThreadsPerCore = 1;
}
if (pCreateInfo->pWorkerPrivateState)
{
pContext->workerPrivateState = *pCreateInfo->pWorkerPrivateState;
}
memset(&pContext->WaitLock, 0, sizeof(pContext->WaitLock));
memset(&pContext->FifosNotEmpty, 0, sizeof(pContext->FifosNotEmpty));
new (&pContext->WaitLock) std::mutex();
+35 -12
View File
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -115,7 +115,8 @@ struct SWR_RECT
/// @param x - destination x coordinate
/// @param y - destination y coordinate
/// @param pDstHotTile - pointer to the hot tile surface
typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, SWR_FORMAT dstFormat,
typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPrivateData,
SWR_FORMAT dstFormat,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, uint8_t *pDstHotTile);
@@ -127,7 +128,8 @@ typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, SWR_FORMAT dstForma
/// @param x - destination x coordinate
/// @param y - destination y coordinate
/// @param pSrcHotTile - pointer to the hot tile surface
typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, SWR_FORMAT srcFormat,
typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPrivateData,
SWR_FORMAT srcFormat,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, uint8_t *pSrcHotTile);
@@ -139,7 +141,7 @@ typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, SWR_FORMAT srcForm
/// @param y - destination y coordinate
/// @param renderTargetArrayIndex - render target array offset from arrayIndex
/// @param pClearColor - pointer to the hot tile's clear value
typedef void(SWR_API *PFN_CLEAR_TILE)(HANDLE hPrivateContext,
typedef void(SWR_API *PFN_CLEAR_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPrivateData,
SWR_RENDERTARGET_ATTACHMENT rtIndex,
uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, const float* pClearColor);
@@ -208,6 +210,21 @@ struct SWR_API_THREADING_INFO
// Independent of KNOB_MAX_THREADS_PER_CORE.
};
//////////////////////////////////////////////////////////////////////////
/// SWR_WORKER_PRIVATE_STATE
/// Data used to allocate per-worker thread private data. A pointer
/// to this data will be passed in to each shader function.
/////////////////////////////////////////////////////////////////////////
struct SWR_WORKER_PRIVATE_STATE
{
typedef void (SWR_API *PFN_WORKER_DATA)(HANDLE hWorkerPrivateData, uint32_t iWorkerNum);
size_t perWorkerPrivateStateSize; ///< Amount of data to allocate per-worker
PFN_WORKER_DATA pfnInitWorkerData; ///< Init function for worker data. If null
///< worker data will be initialized to 0.
PFN_WORKER_DATA pfnFinishWorkerData; ///< Finish / destroy function for worker data.
///< Can be null.
};
//////////////////////////////////////////////////////////////////////////
/// SWR_CREATECONTEXT_INFO
@@ -216,7 +233,10 @@ struct SWR_CREATECONTEXT_INFO
{
// External functions (e.g. sampler) need per draw context state.
// Use SwrGetPrivateContextState() to access private state.
uint32_t privateStateSize;
size_t privateStateSize;
// Optional per-worker state, can be NULL for no worker-private data
SWR_WORKER_PRIVATE_STATE* pWorkerPrivateState;
// Callback functions
PFN_LOAD_TILE pfnLoadTile;
@@ -229,23 +249,23 @@ struct SWR_CREATECONTEXT_INFO
// Pointer to rdtsc buckets mgr returned to the caller.
// Only populated when KNOB_ENABLE_RDTSC is set
BucketManager* pBucketMgr;
BucketManager* pBucketMgr;
// Output: size required memory passed to for SwrSaveState / SwrRestoreState
size_t contextSaveSize;
size_t contextSaveSize;
// ArchRast event manager.
HANDLE hArEventManager;
HANDLE hArEventManager;
// Input (optional): Threading info that overrides any set KNOB values.
SWR_THREADING_INFO* pThreadInfo;
SWR_THREADING_INFO* pThreadInfo;
// Input (optional}: Info for reserving API threads
SWR_API_THREADING_INFO* pApiThreadInfo;
// Input (optional): Info for reserving API threads
SWR_API_THREADING_INFO* pApiThreadInfo;
// Input: if set to non-zero value, overrides KNOB value for maximum
// number of draws in flight
uint32_t MAX_DRAWS_IN_FLIGHT;
uint32_t MAX_DRAWS_IN_FLIGHT;
};
//////////////////////////////////////////////////////////////////////////
@@ -714,6 +734,7 @@ SWR_FUNC(void, SwrInit);
/// @param x, y - Coordinates to raster tile.
/// @param pDstHotTile - Pointer to Hot Tile
SWR_FUNC(void, SwrLoadHotTile,
HANDLE hWorkerPrivateData,
const SWR_SURFACE_STATE *pSrcSurface,
SWR_FORMAT dstFormat,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
@@ -728,6 +749,7 @@ SWR_FUNC(void, SwrLoadHotTile,
/// @param x, y - Coordinates to raster tile.
/// @param pSrcHotTile - Pointer to Hot Tile
SWR_FUNC(void, SwrStoreHotTileToSurface,
HANDLE hWorkerPrivateData,
SWR_SURFACE_STATE *pDstSurface,
SWR_FORMAT srcFormat,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
@@ -741,6 +763,7 @@ SWR_FUNC(void, SwrStoreHotTileToSurface,
/// @param x, y - Coordinates to raster tile.
/// @param pClearColor - Pointer to clear color
SWR_FUNC(void, SwrStoreHotTileClear,
HANDLE hWorkerPrivateData,
SWR_SURFACE_STATE *pDstSurface,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
uint32_t x,
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -78,7 +78,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
csContext.pScratchSpace = (uint8_t*)pScratchSpace;
csContext.scratchSpacePerSimd = pDC->pState->state.scratchSpaceSize;
state.pfnCsFunc(GetPrivateState(pDC), &csContext);
state.pfnCsFunc(GetPrivateState(pDC), pContext->threadPool.pThreadData[workerId].pWorkerPrivateData, &csContext);
UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup);
AR_EVENT(CSStats(csContext.stats.numInstExecuted));
@@ -107,6 +107,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
SWR_RENDERTARGET_ATTACHMENT attachment)
{
SWR_CONTEXT *pContext = pDC->pContext;
HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
RDTSC_BEGIN(BEStoreTiles, pDC->drawId);
@@ -139,7 +140,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[srcFormat];
SWR_ASSERT(pfnClearTiles != nullptr);
pfnClearTiles(pDC, attachment, macroTile, pHotTile->renderTargetArrayIndex, pHotTile->clearData, pDesc->rect);
pfnClearTiles(pDC, hWorkerPrivateData, attachment, macroTile, pHotTile->renderTargetArrayIndex, pHotTile->clearData, pDesc->rect);
}
if (pHotTile->state == HOTTILE_DIRTY || pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY)
@@ -147,7 +148,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
int32_t destX = KNOB_MACROTILE_X_DIM * x;
int32_t destY = KNOB_MACROTILE_Y_DIM * y;
pContext->pfnStoreTile(GetPrivateState(pDC), srcFormat,
pContext->pfnStoreTile(GetPrivateState(pDC), hWorkerPrivateData, srcFormat,
attachment, destX, destY, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
}
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -41,7 +41,7 @@ void ProcessStoreTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTil
void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
void ProcessShutdownBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, uint32_t, DWORD[4], const SWR_RECT& rect);
typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, HANDLE hWorkerData, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, uint32_t, DWORD[4], const SWR_RECT& rect);
extern PFN_CLEAR_TILES gClearTilesTable[NUM_SWR_FORMATS];
extern PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_COUNT];
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -76,7 +76,7 @@ void ClearRasterTile(uint8_t *pTileBuffer, simd16vector &value)
#endif
template<SWR_FORMAT format>
INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, uint32_t renderTargetArrayIndex, DWORD clear[4], const SWR_RECT& rect)
INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, HANDLE hWorkerPrivateData, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, uint32_t renderTargetArrayIndex, DWORD clear[4], const SWR_RECT& rect)
{
// convert clear color to hottile format
// clear color is in RGBA float/uint32
@@ -146,7 +146,7 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui
const uint32_t macroTileRowStep = (KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * rasterTileStep;
const uint32_t pitch = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8);
HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, macroTile, rt, true, numSamples, renderTargetArrayIndex);
HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, hWorkerPrivateData, macroTile, rt, true, numSamples, renderTargetArrayIndex);
uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, clearTile.xmin, clearTile.ymin)) * numSamples;
uint8_t* pRasterTileRow = pHotTile->pBuffer + rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
@@ -172,6 +172,7 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui
void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
{
SWR_CONTEXT *pContext = pDC->pContext;
HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
if (KNOB_FAST_CLEAR)
{
@@ -191,7 +192,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
{
mask &= ~(1 << rt);
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, (SWR_RENDERTARGET_ATTACHMENT)rt, true, numSamples, pClear->renderTargetArrayIndex);
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroTile, (SWR_RENDERTARGET_ATTACHMENT)rt, true, numSamples, pClear->renderTargetArrayIndex);
// All we want to do here is to mark the hot tile as being in a "needs clear" state.
pHotTile->clearData[0] = *(DWORD*)&(pClear->clearRTColor[0]);
@@ -204,14 +205,14 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
{
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_DEPTH, true, numSamples, pClear->renderTargetArrayIndex);
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroTile, SWR_ATTACHMENT_DEPTH, true, numSamples, pClear->renderTargetArrayIndex);
pHotTile->clearData[0] = *(DWORD*)&pClear->clearDepth;
pHotTile->state = HOTTILE_CLEAR;
}
if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
{
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_STENCIL, true, numSamples, pClear->renderTargetArrayIndex);
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroTile, SWR_ATTACHMENT_STENCIL, true, numSamples, pClear->renderTargetArrayIndex);
pHotTile->clearData[0] = pClear->clearStencil;
pHotTile->state = HOTTILE_CLEAR;
@@ -242,7 +243,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
{
mask &= ~(1 << rt);
pfnClearTiles(pDC, (SWR_RENDERTARGET_ATTACHMENT)rt, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
pfnClearTiles(pDC, hWorkerPrivateData, (SWR_RENDERTARGET_ATTACHMENT)rt, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
}
}
@@ -253,7 +254,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT];
SWR_ASSERT(pfnClearTiles != nullptr);
pfnClearTiles(pDC, SWR_ATTACHMENT_DEPTH, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
pfnClearTiles(pDC, hWorkerPrivateData, SWR_ATTACHMENT_DEPTH, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
}
if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
@@ -262,7 +263,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
clearData[0] = pClear->clearStencil;
PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT];
pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
pfnClearTiles(pDC, hWorkerPrivateData, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
}
RDTSC_END(BEClear, 1);
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -884,6 +884,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
BarycentricCoeffs coeffs;
SetupBarycentricCoeffs(&coeffs, work);
SWR_CONTEXT *pContext = pDC->pContext;
void* pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
SWR_PS_CONTEXT psContext;
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
SetupPixelShaderContext<T>(&psContext, samplePos, work);
@@ -964,7 +967,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
// execute pixel shader
RDTSC_BEGIN(BEPixelShader, pDC->drawId);
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
RDTSC_END(BEPixelShader, 0);
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -43,6 +43,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
RDTSC_BEGIN(BESampleRateBackend, pDC->drawId);
RDTSC_BEGIN(BESetup, pDC->drawId);
void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
const API_STATE &state = GetApiState(pDC);
BarycentricCoeffs coeffs;
@@ -163,7 +164,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
// execute pixel shader
RDTSC_BEGIN(BEPixelShader, pDC->drawId);
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
RDTSC_END(BEPixelShader, 0);
// update stats
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -43,6 +43,8 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
RDTSC_BEGIN(BESingleSampleBackend, pDC->drawId);
RDTSC_BEGIN(BESetup, pDC->drawId);
void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
const API_STATE &state = GetApiState(pDC);
BarycentricCoeffs coeffs;
@@ -146,7 +148,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
// execute pixel shader
RDTSC_BEGIN(BEPixelShader, pDC->drawId);
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
RDTSC_END(BEPixelShader, 0);
// update stats
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -483,6 +483,7 @@ struct SWR_CONTEXT
THREAD_POOL threadPool; // Thread pool associated with this context
SWR_THREADING_INFO threadInfo;
SWR_API_THREADING_INFO apiThreadInfo;
SWR_WORKER_PRIVATE_STATE workerPrivateState;
uint32_t MAX_DRAWS_IN_FLIGHT;
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -799,6 +799,8 @@ static void GeometryShaderStage(
{
RDTSC_BEGIN(FEGeometryShader, pDC->drawId);
void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
const API_STATE& state = GetApiState(pDC);
const SWR_GS_STATE* pState = &state.gsState;
SWR_GS_CONTEXT gsContext;
@@ -850,7 +852,7 @@ static void GeometryShaderStage(
gsContext.mask = GenerateMask(numInputPrims);
// execute the geometry shader
state.pfnGsFunc(GetPrivateState(pDC), &gsContext);
state.pfnGsFunc(GetPrivateState(pDC), pWorkerData, &gsContext);
AR_EVENT(GSStats(gsContext.stats.numInstExecuted));
for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i)
@@ -1169,6 +1171,7 @@ static void TessellationStages(
{
const API_STATE& state = GetApiState(pDC);
const SWR_TS_STATE& tsState = state.tsState;
void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
SWR_ASSERT(gt_pTessellationThreadData);
@@ -1250,7 +1253,7 @@ static void TessellationStages(
// Run the HS
RDTSC_BEGIN(FEHullShader, pDC->drawId);
state.pfnHsFunc(GetPrivateState(pDC), &hsContext);
state.pfnHsFunc(GetPrivateState(pDC), pWorkerData, &hsContext);
RDTSC_END(FEHullShader, 0);
UPDATE_STAT_FE(HsInvocations, numPrims);
@@ -1315,7 +1318,7 @@ static void TessellationStages(
dsContext.mask = GenerateMask(tsData.NumDomainPoints - dsInvocations);
RDTSC_BEGIN(FEDomainShader, pDC->drawId);
state.pfnDsFunc(GetPrivateState(pDC), &dsContext);
state.pfnDsFunc(GetPrivateState(pDC), pWorkerData, &dsContext);
RDTSC_END(FEDomainShader, 0);
AR_EVENT(DSStats(dsContext.stats.numInstExecuted));
@@ -1521,6 +1524,8 @@ void ProcessDraw(
RDTSC_BEGIN(FEProcessDraw, pDC->drawId);
void* pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
DRAW_WORK& work = *(DRAW_WORK*)pUserData;
const API_STATE& state = GetApiState(pDC);
@@ -1738,13 +1743,13 @@ void ProcessDraw(
// 1. Execute FS/VS for a single SIMD.
RDTSC_BEGIN(FEFetchShader, pDC->drawId);
#if USE_SIMD16_SHADERS
state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo_lo, vin);
state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_lo, vin);
#else
state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo_lo, vin_lo);
state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_lo, vin_lo);
if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH
{
state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo_hi, vin_hi);
state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_hi, vin_hi);
}
#endif
RDTSC_END(FEFetchShader, 0);
@@ -1793,15 +1798,15 @@ void ProcessDraw(
{
RDTSC_BEGIN(FEVertexShader, pDC->drawId);
#if USE_SIMD16_VS
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_lo);
state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo);
AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted));
#else
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_lo);
state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo);
AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted));
if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH
{
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_hi);
state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_hi);
AR_EVENT(VSStats(vsContext_hi.stats.numInstExecuted));
}
#endif
@@ -1994,7 +1999,7 @@ void ProcessDraw(
// 1. Execute FS/VS for a single SIMD.
RDTSC_BEGIN(FEFetchShader, pDC->drawId);
state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo, vout);
state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo, vout);
RDTSC_END(FEFetchShader, 0);
// forward fetch generated vertex IDs to the vertex shader
@@ -2016,7 +2021,7 @@ void ProcessDraw(
#endif
{
RDTSC_BEGIN(FEVertexShader, pDC->drawId);
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext);
state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext);
RDTSC_END(FEVertexShader, 0);
UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -302,7 +302,7 @@ void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTi
triDesc.Z[0] = triDesc.Z[1] = triDesc.Z[2] = z;
RenderOutputBuffers renderBuffers;
GetRenderHotTiles(pDC, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT,
GetRenderHotTiles(pDC, workerId, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT,
renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
RDTSC_BEGIN(BEPixelBackend, pDC->drawId);
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -40,7 +40,7 @@
extern PFN_WORK_FUNC gRasterizerFuncs[SWR_MULTISAMPLE_TYPE_COUNT][2][2][SWR_INPUT_COVERAGE_COUNT][STATE_VALID_TRI_EDGE_COUNT][2];
template <uint32_t numSamples = 1>
void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t x, uint32_t y, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex);
void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroID, uint32_t x, uint32_t y, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex);
template <typename RT>
void StepRasterTileX(uint32_t colorHotTileMask, RenderOutputBuffers &buffers);
template <typename RT>
@@ -1145,7 +1145,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
uint32_t maxX = maxTileX;
RenderOutputBuffers renderBuffers, currentRenderBufferRow;
GetRenderHotTiles<RT::MT::numSamples>(pDC, macroTile, minTileX, minTileY, renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
GetRenderHotTiles<RT::MT::numSamples>(pDC, workerId, macroTile, minTileX, minTileY, renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
currentRenderBufferRow = renderBuffers;
// rasterize and generate coverage masks per sample
@@ -1297,10 +1297,11 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
// Get pointers to hot tile memory for color RT, depth, stencil
template <uint32_t numSamples>
void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint32_t tileY, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex)
void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroID, uint32_t tileX, uint32_t tileY, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex)
{
const API_STATE& state = GetApiState(pDC);
SWR_CONTEXT *pContext = pDC->pContext;
HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
uint32_t mx, my;
MacroTileMgr::getTileIndices(macroID, mx, my);
@@ -1316,7 +1317,7 @@ void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint
uint32_t colorHottileEnableMask = state.colorHottileEnable;
while(_BitScanForward(&rtSlot, colorHottileEnableMask))
{
HOTTILE *pColor = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true,
HOTTILE *pColor = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true,
numSamples, renderTargetArrayIndex);
pColor->state = HOTTILE_DIRTY;
renderBuffers.pColor[rtSlot] = pColor->pBuffer + offset;
@@ -1328,7 +1329,7 @@ void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint
const uint32_t pitch = KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
uint32_t offset = ComputeTileOffset2D<TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp> >(pitch, tileX, tileY);
offset*=numSamples;
HOTTILE *pDepth = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true,
HOTTILE *pDepth = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true,
numSamples, renderTargetArrayIndex);
pDepth->state = HOTTILE_DIRTY;
SWR_ASSERT(pDepth->pBuffer != nullptr);
@@ -1339,7 +1340,7 @@ void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint
const uint32_t pitch = KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
uint32_t offset = ComputeTileOffset2D<TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp> >(pitch, tileX, tileY);
offset*=numSamples;
HOTTILE* pStencil = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true,
HOTTILE* pStencil = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true,
numSamples, renderTargetArrayIndex);
pStencil->state = HOTTILE_DIRTY;
SWR_ASSERT(pStencil->pBuffer != nullptr);
@@ -911,18 +911,18 @@ struct SWR_BLEND_CONTEXT
/// FUNCTION POINTERS FOR SHADERS
#if USE_SIMD16_SHADERS
typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
#else
typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
#endif
typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext);
typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext);
typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext);
typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, SWR_GS_CONTEXT* pGsContext);
typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsContext);
typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_VS_CONTEXT* pVsContext);
typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_HS_CONTEXT* pHsContext);
typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_DS_CONTEXT* pDsContext);
typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_GS_CONTEXT* pGsContext);
typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_CS_CONTEXT* pCsContext);
typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT *pContext);
typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT *pContext);
typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(SWR_BLEND_CONTEXT*);
typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &);
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -42,6 +42,7 @@
#endif
#include "common/os.h"
#include "core/api.h"
#include "context.h"
#include "frontend.h"
#include "backend.h"
@@ -1128,7 +1129,8 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
if (pContext->threadInfo.SINGLE_THREADED)
{
return;
numAPIReservedThreads = 0;
numThreads = 1;
}
if (numAPIReservedThreads)
@@ -1139,6 +1141,10 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
{
numAPIReservedThreads = 0;
}
else
{
memset(pPool->pApiThreadData, 0, sizeof(THREAD_DATA) * numAPIReservedThreads);
}
}
pPool->numReservedThreads = numAPIReservedThreads;
@@ -1147,8 +1153,37 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
pPool->pThreadData = new (std::nothrow) THREAD_DATA[pPool->numThreads];
SWR_ASSERT(pPool->pThreadData);
memset(pPool->pThreadData, 0, sizeof(THREAD_DATA) * pPool->numThreads);
pPool->numaMask = 0;
// Allocate worker private data
pPool->pWorkerPrivateDataArray = nullptr;
if (pContext->workerPrivateState.perWorkerPrivateStateSize)
{
size_t perWorkerSize = AlignUpPow2(pContext->workerPrivateState.perWorkerPrivateStateSize, 64);
size_t totalSize = perWorkerSize * pPool->numThreads;
if (totalSize)
{
pPool->pWorkerPrivateDataArray = AlignedMalloc(totalSize, 64);
SWR_ASSERT(pPool->pWorkerPrivateDataArray);
void* pWorkerData = pPool->pWorkerPrivateDataArray;
for (uint32_t i = 0; i < pPool->numThreads; ++i)
{
pPool->pThreadData[i].pWorkerPrivateData = pWorkerData;
if (pContext->workerPrivateState.pfnInitWorkerData)
{
pContext->workerPrivateState.pfnInitWorkerData(pWorkerData, i);
}
pWorkerData = PtrAdd(pWorkerData, perWorkerSize);
}
}
}
if (pContext->threadInfo.SINGLE_THREADED)
{
return;
}
pPool->pThreads = new (std::nothrow) THREAD_PTR[pPool->numThreads];
SWR_ASSERT(pPool->pThreads);
@@ -1293,13 +1328,13 @@ void StartThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
/// @param pPool - pointer to thread pool object.
void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
{
if (!pContext->threadInfo.SINGLE_THREADED)
{
// Wait for all threads to finish
SwrWaitForIdle(pContext);
// Wait for all threads to finish
SwrWaitForIdle(pContext);
// Wait for threads to finish and destroy them
for (uint32_t t = 0; t < pPool->numThreads; ++t)
// Wait for threads to finish and destroy them
for (uint32_t t = 0; t < pPool->numThreads; ++t)
{
if (!pContext->threadInfo.SINGLE_THREADED)
{
// Detach from thread. Cannot join() due to possibility (in Windows) of code
// in some DLLMain(THREAD_DETATCH case) blocking the thread until after this returns.
@@ -1307,10 +1342,17 @@ void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
delete(pPool->pThreads[t]);
}
delete[] pPool->pThreads;
// Clean up data used by threads
delete[] pPool->pThreadData;
delete[] pPool->pApiThreadData;
if (pContext->workerPrivateState.pfnFinishWorkerData)
{
pContext->workerPrivateState.pfnFinishWorkerData(pPool->pThreadData[t].pWorkerPrivateData, t);
}
}
delete[] pPool->pThreads;
// Clean up data used by threads
delete[] pPool->pThreadData;
delete[] pPool->pApiThreadData;
AlignedFree(pPool->pWorkerPrivateDataArray);
}
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -35,9 +35,11 @@ typedef std::thread* THREAD_PTR;
struct SWR_CONTEXT;
struct DRAW_CONTEXT;
struct SWR_WORKER_PRIVATE_STATE;
struct THREAD_DATA
{
void* pWorkerPrivateData;// Pointer to per-worker private data
uint32_t procGroupId; // Will always be 0 for non-Windows OS
uint32_t threadId; // within the procGroup for Windows
uint32_t numaId; // NUMA node id
@@ -55,6 +57,7 @@ struct THREAD_POOL
uint32_t numThreads;
uint32_t numaMask;
THREAD_DATA *pThreadData;
void* pWorkerPrivateDataArray; // All memory for worker private data
uint32_t numReservedThreads; // Number of threads reserved for API use
THREAD_DATA *pApiThreadData;
};
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -83,7 +83,7 @@ void MacroTileMgr::markTileComplete(uint32_t id)
tile.mWorkItemsBE = 0;
}
HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples,
HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, HANDLE hWorkerPrivateData, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples,
uint32_t renderTargetArrayIndex)
{
uint32_t x, y;
@@ -163,11 +163,11 @@ HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32
if (hotTile.state == HOTTILE_DIRTY)
{
pContext->pfnStoreTile(GetPrivateState(pDC), format, attachment,
pContext->pfnStoreTile(GetPrivateState(pDC), hWorkerPrivateData, format, attachment,
x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, hotTile.renderTargetArrayIndex, hotTile.pBuffer);
}
pContext->pfnLoadTile(GetPrivateState(pDC), format, attachment,
pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, format, attachment,
x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, renderTargetArrayIndex, hotTile.pBuffer);
hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
@@ -379,6 +379,7 @@ void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile)
void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID)
{
const API_STATE& state = GetApiState(pDC);
HANDLE hWorkerPrivateData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
uint32_t x, y;
MacroTileMgr::getTileIndices(macroID, x, y);
@@ -392,13 +393,13 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui
uint32_t colorHottileEnableMask = state.colorHottileEnable;
while (_BitScanForward(&rtSlot, colorHottileEnableMask))
{
HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true, numSamples);
HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true, numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
RDTSC_END(BELoadTiles, 0);
}
@@ -416,12 +417,12 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui
// check depth if enabled
if (state.depthHottileEnable)
{
HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
RDTSC_END(BELoadTiles, 0);
}
@@ -438,12 +439,12 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui
// check stencil if enabled
if (state.stencilHottileEnable)
{
HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
RDTSC_END(BELoadTiles, 0);
}
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -305,7 +305,7 @@ public:
void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID);
HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1,
HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, HANDLE hWorkerData, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1,
uint32_t renderTargetArrayIndex = 0);
HOTTILE *GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1);
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -19,13 +19,13 @@
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*
* @file JitManager.cpp
*
*
* @brief Implementation if the Jit Manager.
*
*
* Notes:
*
*
******************************************************************************/
#include "jit_pch.hpp"
@@ -66,7 +66,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
InitializeNativeTargetAsmPrinter();
InitializeNativeTargetDisassembler();
TargetOptions tOpts;
tOpts.AllowFPOpFusion = FPOpFusion::Fast;
tOpts.NoInfsFPMath = false;
@@ -125,6 +125,8 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
// llvm5 is picky and does not take a void * type
fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0));
fsArgs.push_back(Type::getInt8PtrTy(mContext));
fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0));
#if USE_SIMD16_SHADERS
fsArgs.push_back(PointerType::get(Gen_simd16vertex(this), 0));
@@ -158,7 +160,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
void JitManager::SetupNewModule()
{
SWR_ASSERT(mIsModuleFinalized == true && "Current module is not finalized!");
std::unique_ptr<Module> newModule(new Module("", mContext));
mpCurrentModule = newModule.get();
#if defined(_WIN32)
@@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -91,6 +91,7 @@ struct FetchJit : public BuilderGfxMem
void CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4]);
void ConvertFormat(SWR_FORMAT format, Value *texels[4]);
Value* mpWorkerData;
Value* mpFetchInfo;
};
@@ -113,6 +114,8 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
privateContext->setName("privateContext");
SetPrivateContext(privateContext);
mpWorkerData = &*argitr; ++argitr;
mpWorkerData->setName("pWorkerData");
mpFetchInfo = &*argitr; ++argitr;
mpFetchInfo->setName("fetchInfo");
Value* pVtxOut = &*argitr;
@@ -1097,8 +1100,7 @@ Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex)
Value* vIndexMask = ICMP_SGT(vMaxIndex, vIndexOffsets);
// Load the indices; OOB loads 0
pIndices = BITCAST(pIndices, PointerType::get(mSimdInt32Ty, 0));
return MASKED_LOAD(pIndices, 4, vIndexMask, VIMMED1(0));
return MASKED_LOAD(pIndices, 4, vIndexMask, VIMMED1(0), "vIndices", PointerType::get(mSimdInt32Ty, 0), GFX_MEM_CLIENT_FETCH);
}
//////////////////////////////////////////////////////////////////////////
@@ -153,6 +153,7 @@ struct StoreMacroTileClear
/// @param x, y - Coordinates to raster tile.
/// @param pClearColor - Pointer to clear color
void SwrStoreHotTileClear(
HANDLE hWorkerPrivateData,
SWR_SURFACE_STATE *pDstSurface,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
UINT x,
@@ -54,6 +54,7 @@ static std::mutex sBucketMutex;
/// @param x, y - Coordinates to raster tile.
/// @param pDstHotTile - Pointer to Hot Tile
void SwrLoadHotTile(
HANDLE hWorkerPrivateData,
const SWR_SURFACE_STATE *pSrcSurface,
SWR_FORMAT dstFormat,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
@@ -59,6 +59,7 @@ static std::vector<int32_t> sBuckets(NUM_SWR_FORMATS, -1);
/// @param x, y - Coordinates to raster tile.
/// @param pSrcHotTile - Pointer to Hot Tile
void SwrStoreHotTileToSurface(
HANDLE hWorkerPrivateData,
SWR_SURFACE_STATE *pDstSurface,
SWR_FORMAT srcFormat,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+6 -3
View File
@@ -25,6 +25,7 @@
INLINE void
swr_LoadHotTile(HANDLE hPrivateContext,
HANDLE hWorkerPrivateData,
SWR_FORMAT dstFormat,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
UINT x, UINT y,
@@ -34,11 +35,12 @@ swr_LoadHotTile(HANDLE hPrivateContext,
swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
SWR_SURFACE_STATE *pSrcSurface = &pDC->renderTargets[renderTargetIndex];
pDC->pAPI->pfnSwrLoadHotTile(pSrcSurface, dstFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pDstHotTile);
pDC->pAPI->pfnSwrLoadHotTile(hWorkerPrivateData, pSrcSurface, dstFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pDstHotTile);
}
INLINE void
swr_StoreHotTile(HANDLE hPrivateContext,
HANDLE hWorkerPrivateData,
SWR_FORMAT srcFormat,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
UINT x, UINT y,
@@ -48,11 +50,12 @@ swr_StoreHotTile(HANDLE hPrivateContext,
swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
SWR_SURFACE_STATE *pDstSurface = &pDC->renderTargets[renderTargetIndex];
pDC->pAPI->pfnSwrStoreHotTileToSurface(pDstSurface, srcFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pSrcHotTile);
pDC->pAPI->pfnSwrStoreHotTileToSurface(hWorkerPrivateData, pDstSurface, srcFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pSrcHotTile);
}
INLINE void
swr_StoreHotTileClear(HANDLE hPrivateContext,
HANDLE hWorkerPrivateData,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
UINT x,
UINT y,
@@ -63,5 +66,5 @@ swr_StoreHotTileClear(HANDLE hPrivateContext,
swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
SWR_SURFACE_STATE *pDstSurface = &pDC->renderTargets[renderTargetIndex];
pDC->pAPI->pfnSwrStoreHotTileClear(pDstSurface, renderTargetIndex, x, y, renderTargetArrayIndex, pClearColor);
pDC->pAPI->pfnSwrStoreHotTileClear(hWorkerPrivateData, pDstSurface, renderTargetIndex, x, y, renderTargetArrayIndex, pClearColor);
}
+9
View File
@@ -586,6 +586,7 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
std::vector<Type *> gsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
PointerType::get(mInt8Ty, 0),
PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
FunctionType *vsFuncType =
FunctionType::get(Type::getVoidTy(JM()->mContext), gsArgs, false);
@@ -610,6 +611,8 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
auto argitr = pFunction->arg_begin();
Value *hPrivateData = &*argitr++;
hPrivateData->setName("hPrivateData");
Value *pWorkerData = &*argitr++;
pWorkerData->setName("pWorkerData");
Value *pGsCtx = &*argitr++;
pGsCtx->setName("gsCtx");
@@ -754,6 +757,7 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
PointerType::get(mInt8Ty, 0),
PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
FunctionType *vsFuncType =
FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false);
@@ -778,6 +782,8 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
auto argitr = pFunction->arg_begin();
Value *hPrivateData = &*argitr++;
hPrivateData->setName("hPrivateData");
Value *pWorkerData = &*argitr++;
pWorkerData->setName("pWorkerData");
Value *pVsCtx = &*argitr++;
pVsCtx->setName("vsCtx");
@@ -1037,6 +1043,7 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key)
attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
PointerType::get(mInt8Ty, 0),
PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
FunctionType *funcType =
FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false);
@@ -1060,6 +1067,8 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key)
auto args = pFunction->arg_begin();
Value *hPrivateData = &*args++;
hPrivateData->setName("hPrivateData");
Value *pWorkerData = &*args++;
pWorkerData->setName("pWorkerData");
Value *pPS = &*args++;
pPS->setName("psCtx");