swr/rast: Optimize late/bindless JIT of samplers
Add per-worker thread private data to all shader calls Add per-worker sampler cache and jit context Add late LoadTexel JIT support Add per-worker-thread Sampler / LoadTexel JIT Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -122,6 +122,11 @@ HANDLE SwrCreateContext(
|
||||
pContext->apiThreadInfo.numAPIThreadsPerCore = 1;
|
||||
}
|
||||
|
||||
if (pCreateInfo->pWorkerPrivateState)
|
||||
{
|
||||
pContext->workerPrivateState = *pCreateInfo->pWorkerPrivateState;
|
||||
}
|
||||
|
||||
memset(&pContext->WaitLock, 0, sizeof(pContext->WaitLock));
|
||||
memset(&pContext->FifosNotEmpty, 0, sizeof(pContext->FifosNotEmpty));
|
||||
new (&pContext->WaitLock) std::mutex();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -115,7 +115,8 @@ struct SWR_RECT
|
||||
/// @param x - destination x coordinate
|
||||
/// @param y - destination y coordinate
|
||||
/// @param pDstHotTile - pointer to the hot tile surface
|
||||
typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, SWR_FORMAT dstFormat,
|
||||
typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPrivateData,
|
||||
SWR_FORMAT dstFormat,
|
||||
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
|
||||
uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, uint8_t *pDstHotTile);
|
||||
|
||||
@@ -127,7 +128,8 @@ typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, SWR_FORMAT dstForma
|
||||
/// @param x - destination x coordinate
|
||||
/// @param y - destination y coordinate
|
||||
/// @param pSrcHotTile - pointer to the hot tile surface
|
||||
typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, SWR_FORMAT srcFormat,
|
||||
typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPrivateData,
|
||||
SWR_FORMAT srcFormat,
|
||||
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
|
||||
uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, uint8_t *pSrcHotTile);
|
||||
|
||||
@@ -139,7 +141,7 @@ typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, SWR_FORMAT srcForm
|
||||
/// @param y - destination y coordinate
|
||||
/// @param renderTargetArrayIndex - render target array offset from arrayIndex
|
||||
/// @param pClearColor - pointer to the hot tile's clear value
|
||||
typedef void(SWR_API *PFN_CLEAR_TILE)(HANDLE hPrivateContext,
|
||||
typedef void(SWR_API *PFN_CLEAR_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPrivateData,
|
||||
SWR_RENDERTARGET_ATTACHMENT rtIndex,
|
||||
uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, const float* pClearColor);
|
||||
|
||||
@@ -208,6 +210,21 @@ struct SWR_API_THREADING_INFO
|
||||
// Independent of KNOB_MAX_THREADS_PER_CORE.
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// SWR_WORKER_PRIVATE_STATE
|
||||
/// Data used to allocate per-worker thread private data. A pointer
|
||||
/// to this data will be passed in to each shader function.
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
struct SWR_WORKER_PRIVATE_STATE
|
||||
{
|
||||
typedef void (SWR_API *PFN_WORKER_DATA)(HANDLE hWorkerPrivateData, uint32_t iWorkerNum);
|
||||
|
||||
size_t perWorkerPrivateStateSize; ///< Amount of data to allocate per-worker
|
||||
PFN_WORKER_DATA pfnInitWorkerData; ///< Init function for worker data. If null
|
||||
///< worker data will be initialized to 0.
|
||||
PFN_WORKER_DATA pfnFinishWorkerData; ///< Finish / destroy function for worker data.
|
||||
///< Can be null.
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// SWR_CREATECONTEXT_INFO
|
||||
@@ -216,7 +233,10 @@ struct SWR_CREATECONTEXT_INFO
|
||||
{
|
||||
// External functions (e.g. sampler) need per draw context state.
|
||||
// Use SwrGetPrivateContextState() to access private state.
|
||||
uint32_t privateStateSize;
|
||||
size_t privateStateSize;
|
||||
|
||||
// Optional per-worker state, can be NULL for no worker-private data
|
||||
SWR_WORKER_PRIVATE_STATE* pWorkerPrivateState;
|
||||
|
||||
// Callback functions
|
||||
PFN_LOAD_TILE pfnLoadTile;
|
||||
@@ -229,23 +249,23 @@ struct SWR_CREATECONTEXT_INFO
|
||||
|
||||
// Pointer to rdtsc buckets mgr returned to the caller.
|
||||
// Only populated when KNOB_ENABLE_RDTSC is set
|
||||
BucketManager* pBucketMgr;
|
||||
BucketManager* pBucketMgr;
|
||||
|
||||
// Output: size required memory passed to for SwrSaveState / SwrRestoreState
|
||||
size_t contextSaveSize;
|
||||
size_t contextSaveSize;
|
||||
|
||||
// ArchRast event manager.
|
||||
HANDLE hArEventManager;
|
||||
HANDLE hArEventManager;
|
||||
|
||||
// Input (optional): Threading info that overrides any set KNOB values.
|
||||
SWR_THREADING_INFO* pThreadInfo;
|
||||
SWR_THREADING_INFO* pThreadInfo;
|
||||
|
||||
// Input (optional}: Info for reserving API threads
|
||||
SWR_API_THREADING_INFO* pApiThreadInfo;
|
||||
// Input (optional): Info for reserving API threads
|
||||
SWR_API_THREADING_INFO* pApiThreadInfo;
|
||||
|
||||
// Input: if set to non-zero value, overrides KNOB value for maximum
|
||||
// number of draws in flight
|
||||
uint32_t MAX_DRAWS_IN_FLIGHT;
|
||||
uint32_t MAX_DRAWS_IN_FLIGHT;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@@ -714,6 +734,7 @@ SWR_FUNC(void, SwrInit);
|
||||
/// @param x, y - Coordinates to raster tile.
|
||||
/// @param pDstHotTile - Pointer to Hot Tile
|
||||
SWR_FUNC(void, SwrLoadHotTile,
|
||||
HANDLE hWorkerPrivateData,
|
||||
const SWR_SURFACE_STATE *pSrcSurface,
|
||||
SWR_FORMAT dstFormat,
|
||||
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
|
||||
@@ -728,6 +749,7 @@ SWR_FUNC(void, SwrLoadHotTile,
|
||||
/// @param x, y - Coordinates to raster tile.
|
||||
/// @param pSrcHotTile - Pointer to Hot Tile
|
||||
SWR_FUNC(void, SwrStoreHotTileToSurface,
|
||||
HANDLE hWorkerPrivateData,
|
||||
SWR_SURFACE_STATE *pDstSurface,
|
||||
SWR_FORMAT srcFormat,
|
||||
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
|
||||
@@ -741,6 +763,7 @@ SWR_FUNC(void, SwrStoreHotTileToSurface,
|
||||
/// @param x, y - Coordinates to raster tile.
|
||||
/// @param pClearColor - Pointer to clear color
|
||||
SWR_FUNC(void, SwrStoreHotTileClear,
|
||||
HANDLE hWorkerPrivateData,
|
||||
SWR_SURFACE_STATE *pDstSurface,
|
||||
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
|
||||
uint32_t x,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -78,7 +78,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
|
||||
csContext.pScratchSpace = (uint8_t*)pScratchSpace;
|
||||
csContext.scratchSpacePerSimd = pDC->pState->state.scratchSpaceSize;
|
||||
|
||||
state.pfnCsFunc(GetPrivateState(pDC), &csContext);
|
||||
state.pfnCsFunc(GetPrivateState(pDC), pContext->threadPool.pThreadData[workerId].pWorkerPrivateData, &csContext);
|
||||
|
||||
UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup);
|
||||
AR_EVENT(CSStats(csContext.stats.numInstExecuted));
|
||||
@@ -107,6 +107,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
|
||||
SWR_RENDERTARGET_ATTACHMENT attachment)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
|
||||
|
||||
RDTSC_BEGIN(BEStoreTiles, pDC->drawId);
|
||||
|
||||
@@ -139,7 +140,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
|
||||
PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[srcFormat];
|
||||
SWR_ASSERT(pfnClearTiles != nullptr);
|
||||
|
||||
pfnClearTiles(pDC, attachment, macroTile, pHotTile->renderTargetArrayIndex, pHotTile->clearData, pDesc->rect);
|
||||
pfnClearTiles(pDC, hWorkerPrivateData, attachment, macroTile, pHotTile->renderTargetArrayIndex, pHotTile->clearData, pDesc->rect);
|
||||
}
|
||||
|
||||
if (pHotTile->state == HOTTILE_DIRTY || pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY)
|
||||
@@ -147,7 +148,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
|
||||
int32_t destX = KNOB_MACROTILE_X_DIM * x;
|
||||
int32_t destY = KNOB_MACROTILE_Y_DIM * y;
|
||||
|
||||
pContext->pfnStoreTile(GetPrivateState(pDC), srcFormat,
|
||||
pContext->pfnStoreTile(GetPrivateState(pDC), hWorkerPrivateData, srcFormat,
|
||||
attachment, destX, destY, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -41,7 +41,7 @@ void ProcessStoreTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTil
|
||||
void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
|
||||
void ProcessShutdownBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
|
||||
|
||||
typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, uint32_t, DWORD[4], const SWR_RECT& rect);
|
||||
typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, HANDLE hWorkerData, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, uint32_t, DWORD[4], const SWR_RECT& rect);
|
||||
|
||||
extern PFN_CLEAR_TILES gClearTilesTable[NUM_SWR_FORMATS];
|
||||
extern PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_COUNT];
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -76,7 +76,7 @@ void ClearRasterTile(uint8_t *pTileBuffer, simd16vector &value)
|
||||
|
||||
#endif
|
||||
template<SWR_FORMAT format>
|
||||
INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, uint32_t renderTargetArrayIndex, DWORD clear[4], const SWR_RECT& rect)
|
||||
INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, HANDLE hWorkerPrivateData, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, uint32_t renderTargetArrayIndex, DWORD clear[4], const SWR_RECT& rect)
|
||||
{
|
||||
// convert clear color to hottile format
|
||||
// clear color is in RGBA float/uint32
|
||||
@@ -146,7 +146,7 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui
|
||||
const uint32_t macroTileRowStep = (KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * rasterTileStep;
|
||||
const uint32_t pitch = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8);
|
||||
|
||||
HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, macroTile, rt, true, numSamples, renderTargetArrayIndex);
|
||||
HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, hWorkerPrivateData, macroTile, rt, true, numSamples, renderTargetArrayIndex);
|
||||
uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, clearTile.xmin, clearTile.ymin)) * numSamples;
|
||||
uint8_t* pRasterTileRow = pHotTile->pBuffer + rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
|
||||
|
||||
@@ -172,6 +172,7 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui
|
||||
void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
|
||||
{
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
|
||||
|
||||
if (KNOB_FAST_CLEAR)
|
||||
{
|
||||
@@ -191,7 +192,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
|
||||
{
|
||||
mask &= ~(1 << rt);
|
||||
|
||||
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, (SWR_RENDERTARGET_ATTACHMENT)rt, true, numSamples, pClear->renderTargetArrayIndex);
|
||||
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroTile, (SWR_RENDERTARGET_ATTACHMENT)rt, true, numSamples, pClear->renderTargetArrayIndex);
|
||||
|
||||
// All we want to do here is to mark the hot tile as being in a "needs clear" state.
|
||||
pHotTile->clearData[0] = *(DWORD*)&(pClear->clearRTColor[0]);
|
||||
@@ -204,14 +205,14 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
|
||||
|
||||
if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
|
||||
{
|
||||
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_DEPTH, true, numSamples, pClear->renderTargetArrayIndex);
|
||||
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroTile, SWR_ATTACHMENT_DEPTH, true, numSamples, pClear->renderTargetArrayIndex);
|
||||
pHotTile->clearData[0] = *(DWORD*)&pClear->clearDepth;
|
||||
pHotTile->state = HOTTILE_CLEAR;
|
||||
}
|
||||
|
||||
if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
|
||||
{
|
||||
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_STENCIL, true, numSamples, pClear->renderTargetArrayIndex);
|
||||
HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroTile, SWR_ATTACHMENT_STENCIL, true, numSamples, pClear->renderTargetArrayIndex);
|
||||
|
||||
pHotTile->clearData[0] = pClear->clearStencil;
|
||||
pHotTile->state = HOTTILE_CLEAR;
|
||||
@@ -242,7 +243,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
|
||||
{
|
||||
mask &= ~(1 << rt);
|
||||
|
||||
pfnClearTiles(pDC, (SWR_RENDERTARGET_ATTACHMENT)rt, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
|
||||
pfnClearTiles(pDC, hWorkerPrivateData, (SWR_RENDERTARGET_ATTACHMENT)rt, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -253,7 +254,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
|
||||
PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT];
|
||||
SWR_ASSERT(pfnClearTiles != nullptr);
|
||||
|
||||
pfnClearTiles(pDC, SWR_ATTACHMENT_DEPTH, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
|
||||
pfnClearTiles(pDC, hWorkerPrivateData, SWR_ATTACHMENT_DEPTH, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
|
||||
}
|
||||
|
||||
if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
|
||||
@@ -262,7 +263,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
|
||||
clearData[0] = pClear->clearStencil;
|
||||
PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT];
|
||||
|
||||
pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
|
||||
pfnClearTiles(pDC, hWorkerPrivateData, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
|
||||
}
|
||||
|
||||
RDTSC_END(BEClear, 1);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -884,6 +884,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
||||
BarycentricCoeffs coeffs;
|
||||
SetupBarycentricCoeffs(&coeffs, work);
|
||||
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
void* pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
|
||||
|
||||
SWR_PS_CONTEXT psContext;
|
||||
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
|
||||
SetupPixelShaderContext<T>(&psContext, samplePos, work);
|
||||
@@ -964,7 +967,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
|
||||
|
||||
// execute pixel shader
|
||||
RDTSC_BEGIN(BEPixelShader, pDC->drawId);
|
||||
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
|
||||
state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
|
||||
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
|
||||
RDTSC_END(BEPixelShader, 0);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -43,6 +43,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
||||
RDTSC_BEGIN(BESampleRateBackend, pDC->drawId);
|
||||
RDTSC_BEGIN(BESetup, pDC->drawId);
|
||||
|
||||
void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
|
||||
const API_STATE &state = GetApiState(pDC);
|
||||
|
||||
BarycentricCoeffs coeffs;
|
||||
@@ -163,7 +164,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
|
||||
|
||||
// execute pixel shader
|
||||
RDTSC_BEGIN(BEPixelShader, pDC->drawId);
|
||||
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
|
||||
state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
|
||||
RDTSC_END(BEPixelShader, 0);
|
||||
|
||||
// update stats
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -43,6 +43,8 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
||||
RDTSC_BEGIN(BESingleSampleBackend, pDC->drawId);
|
||||
RDTSC_BEGIN(BESetup, pDC->drawId);
|
||||
|
||||
void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
|
||||
|
||||
const API_STATE &state = GetApiState(pDC);
|
||||
|
||||
BarycentricCoeffs coeffs;
|
||||
@@ -146,7 +148,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
|
||||
|
||||
// execute pixel shader
|
||||
RDTSC_BEGIN(BEPixelShader, pDC->drawId);
|
||||
state.psState.pfnPixelShader(GetPrivateState(pDC), &psContext);
|
||||
state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
|
||||
RDTSC_END(BEPixelShader, 0);
|
||||
|
||||
// update stats
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -483,6 +483,7 @@ struct SWR_CONTEXT
|
||||
THREAD_POOL threadPool; // Thread pool associated with this context
|
||||
SWR_THREADING_INFO threadInfo;
|
||||
SWR_API_THREADING_INFO apiThreadInfo;
|
||||
SWR_WORKER_PRIVATE_STATE workerPrivateState;
|
||||
|
||||
uint32_t MAX_DRAWS_IN_FLIGHT;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -799,6 +799,8 @@ static void GeometryShaderStage(
|
||||
{
|
||||
RDTSC_BEGIN(FEGeometryShader, pDC->drawId);
|
||||
|
||||
void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
|
||||
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
const SWR_GS_STATE* pState = &state.gsState;
|
||||
SWR_GS_CONTEXT gsContext;
|
||||
@@ -850,7 +852,7 @@ static void GeometryShaderStage(
|
||||
gsContext.mask = GenerateMask(numInputPrims);
|
||||
|
||||
// execute the geometry shader
|
||||
state.pfnGsFunc(GetPrivateState(pDC), &gsContext);
|
||||
state.pfnGsFunc(GetPrivateState(pDC), pWorkerData, &gsContext);
|
||||
AR_EVENT(GSStats(gsContext.stats.numInstExecuted));
|
||||
|
||||
for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i)
|
||||
@@ -1169,6 +1171,7 @@ static void TessellationStages(
|
||||
{
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
const SWR_TS_STATE& tsState = state.tsState;
|
||||
void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
|
||||
|
||||
SWR_ASSERT(gt_pTessellationThreadData);
|
||||
|
||||
@@ -1250,7 +1253,7 @@ static void TessellationStages(
|
||||
|
||||
// Run the HS
|
||||
RDTSC_BEGIN(FEHullShader, pDC->drawId);
|
||||
state.pfnHsFunc(GetPrivateState(pDC), &hsContext);
|
||||
state.pfnHsFunc(GetPrivateState(pDC), pWorkerData, &hsContext);
|
||||
RDTSC_END(FEHullShader, 0);
|
||||
|
||||
UPDATE_STAT_FE(HsInvocations, numPrims);
|
||||
@@ -1315,7 +1318,7 @@ static void TessellationStages(
|
||||
dsContext.mask = GenerateMask(tsData.NumDomainPoints - dsInvocations);
|
||||
|
||||
RDTSC_BEGIN(FEDomainShader, pDC->drawId);
|
||||
state.pfnDsFunc(GetPrivateState(pDC), &dsContext);
|
||||
state.pfnDsFunc(GetPrivateState(pDC), pWorkerData, &dsContext);
|
||||
RDTSC_END(FEDomainShader, 0);
|
||||
|
||||
AR_EVENT(DSStats(dsContext.stats.numInstExecuted));
|
||||
@@ -1521,6 +1524,8 @@ void ProcessDraw(
|
||||
|
||||
RDTSC_BEGIN(FEProcessDraw, pDC->drawId);
|
||||
|
||||
void* pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
|
||||
|
||||
DRAW_WORK& work = *(DRAW_WORK*)pUserData;
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
|
||||
@@ -1738,13 +1743,13 @@ void ProcessDraw(
|
||||
// 1. Execute FS/VS for a single SIMD.
|
||||
RDTSC_BEGIN(FEFetchShader, pDC->drawId);
|
||||
#if USE_SIMD16_SHADERS
|
||||
state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo_lo, vin);
|
||||
state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_lo, vin);
|
||||
#else
|
||||
state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo_lo, vin_lo);
|
||||
state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_lo, vin_lo);
|
||||
|
||||
if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH
|
||||
{
|
||||
state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo_hi, vin_hi);
|
||||
state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_hi, vin_hi);
|
||||
}
|
||||
#endif
|
||||
RDTSC_END(FEFetchShader, 0);
|
||||
@@ -1793,15 +1798,15 @@ void ProcessDraw(
|
||||
{
|
||||
RDTSC_BEGIN(FEVertexShader, pDC->drawId);
|
||||
#if USE_SIMD16_VS
|
||||
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_lo);
|
||||
state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo);
|
||||
AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted));
|
||||
#else
|
||||
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_lo);
|
||||
state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo);
|
||||
AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted));
|
||||
|
||||
if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH
|
||||
{
|
||||
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext_hi);
|
||||
state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_hi);
|
||||
AR_EVENT(VSStats(vsContext_hi.stats.numInstExecuted));
|
||||
}
|
||||
#endif
|
||||
@@ -1994,7 +1999,7 @@ void ProcessDraw(
|
||||
|
||||
// 1. Execute FS/VS for a single SIMD.
|
||||
RDTSC_BEGIN(FEFetchShader, pDC->drawId);
|
||||
state.pfnFetchFunc(GetPrivateState(pDC), fetchInfo, vout);
|
||||
state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo, vout);
|
||||
RDTSC_END(FEFetchShader, 0);
|
||||
|
||||
// forward fetch generated vertex IDs to the vertex shader
|
||||
@@ -2016,7 +2021,7 @@ void ProcessDraw(
|
||||
#endif
|
||||
{
|
||||
RDTSC_BEGIN(FEVertexShader, pDC->drawId);
|
||||
state.pfnVertexFunc(GetPrivateState(pDC), &vsContext);
|
||||
state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext);
|
||||
RDTSC_END(FEVertexShader, 0);
|
||||
|
||||
UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -302,7 +302,7 @@ void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTi
|
||||
triDesc.Z[0] = triDesc.Z[1] = triDesc.Z[2] = z;
|
||||
|
||||
RenderOutputBuffers renderBuffers;
|
||||
GetRenderHotTiles(pDC, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT,
|
||||
GetRenderHotTiles(pDC, workerId, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT,
|
||||
renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
|
||||
|
||||
RDTSC_BEGIN(BEPixelBackend, pDC->drawId);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -40,7 +40,7 @@
|
||||
extern PFN_WORK_FUNC gRasterizerFuncs[SWR_MULTISAMPLE_TYPE_COUNT][2][2][SWR_INPUT_COVERAGE_COUNT][STATE_VALID_TRI_EDGE_COUNT][2];
|
||||
|
||||
template <uint32_t numSamples = 1>
|
||||
void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t x, uint32_t y, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex);
|
||||
void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroID, uint32_t x, uint32_t y, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex);
|
||||
template <typename RT>
|
||||
void StepRasterTileX(uint32_t colorHotTileMask, RenderOutputBuffers &buffers);
|
||||
template <typename RT>
|
||||
@@ -1145,7 +1145,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
uint32_t maxX = maxTileX;
|
||||
|
||||
RenderOutputBuffers renderBuffers, currentRenderBufferRow;
|
||||
GetRenderHotTiles<RT::MT::numSamples>(pDC, macroTile, minTileX, minTileY, renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
|
||||
GetRenderHotTiles<RT::MT::numSamples>(pDC, workerId, macroTile, minTileX, minTileY, renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
|
||||
currentRenderBufferRow = renderBuffers;
|
||||
|
||||
// rasterize and generate coverage masks per sample
|
||||
@@ -1297,10 +1297,11 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
|
||||
// Get pointers to hot tile memory for color RT, depth, stencil
|
||||
template <uint32_t numSamples>
|
||||
void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint32_t tileY, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex)
|
||||
void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroID, uint32_t tileX, uint32_t tileY, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex)
|
||||
{
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
|
||||
|
||||
uint32_t mx, my;
|
||||
MacroTileMgr::getTileIndices(macroID, mx, my);
|
||||
@@ -1316,7 +1317,7 @@ void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint
|
||||
uint32_t colorHottileEnableMask = state.colorHottileEnable;
|
||||
while(_BitScanForward(&rtSlot, colorHottileEnableMask))
|
||||
{
|
||||
HOTTILE *pColor = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true,
|
||||
HOTTILE *pColor = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true,
|
||||
numSamples, renderTargetArrayIndex);
|
||||
pColor->state = HOTTILE_DIRTY;
|
||||
renderBuffers.pColor[rtSlot] = pColor->pBuffer + offset;
|
||||
@@ -1328,7 +1329,7 @@ void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint
|
||||
const uint32_t pitch = KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
|
||||
uint32_t offset = ComputeTileOffset2D<TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp> >(pitch, tileX, tileY);
|
||||
offset*=numSamples;
|
||||
HOTTILE *pDepth = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true,
|
||||
HOTTILE *pDepth = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true,
|
||||
numSamples, renderTargetArrayIndex);
|
||||
pDepth->state = HOTTILE_DIRTY;
|
||||
SWR_ASSERT(pDepth->pBuffer != nullptr);
|
||||
@@ -1339,7 +1340,7 @@ void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint
|
||||
const uint32_t pitch = KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
|
||||
uint32_t offset = ComputeTileOffset2D<TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp> >(pitch, tileX, tileY);
|
||||
offset*=numSamples;
|
||||
HOTTILE* pStencil = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true,
|
||||
HOTTILE* pStencil = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true,
|
||||
numSamples, renderTargetArrayIndex);
|
||||
pStencil->state = HOTTILE_DIRTY;
|
||||
SWR_ASSERT(pStencil->pBuffer != nullptr);
|
||||
|
||||
@@ -911,18 +911,18 @@ struct SWR_BLEND_CONTEXT
|
||||
/// FUNCTION POINTERS FOR SHADERS
|
||||
|
||||
#if USE_SIMD16_SHADERS
|
||||
typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
|
||||
typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
|
||||
#else
|
||||
typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
|
||||
typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
|
||||
#endif
|
||||
typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext);
|
||||
typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext);
|
||||
typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext);
|
||||
typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, SWR_GS_CONTEXT* pGsContext);
|
||||
typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsContext);
|
||||
typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_VS_CONTEXT* pVsContext);
|
||||
typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_HS_CONTEXT* pHsContext);
|
||||
typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_DS_CONTEXT* pDsContext);
|
||||
typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_GS_CONTEXT* pGsContext);
|
||||
typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_CS_CONTEXT* pCsContext);
|
||||
typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
|
||||
typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
|
||||
typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
|
||||
typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT *pContext);
|
||||
typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT *pContext);
|
||||
typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(SWR_BLEND_CONTEXT*);
|
||||
typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -42,6 +42,7 @@
|
||||
#endif
|
||||
|
||||
#include "common/os.h"
|
||||
#include "core/api.h"
|
||||
#include "context.h"
|
||||
#include "frontend.h"
|
||||
#include "backend.h"
|
||||
@@ -1128,7 +1129,8 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
|
||||
|
||||
if (pContext->threadInfo.SINGLE_THREADED)
|
||||
{
|
||||
return;
|
||||
numAPIReservedThreads = 0;
|
||||
numThreads = 1;
|
||||
}
|
||||
|
||||
if (numAPIReservedThreads)
|
||||
@@ -1139,6 +1141,10 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
|
||||
{
|
||||
numAPIReservedThreads = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
memset(pPool->pApiThreadData, 0, sizeof(THREAD_DATA) * numAPIReservedThreads);
|
||||
}
|
||||
}
|
||||
pPool->numReservedThreads = numAPIReservedThreads;
|
||||
|
||||
@@ -1147,8 +1153,37 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
|
||||
|
||||
pPool->pThreadData = new (std::nothrow) THREAD_DATA[pPool->numThreads];
|
||||
SWR_ASSERT(pPool->pThreadData);
|
||||
memset(pPool->pThreadData, 0, sizeof(THREAD_DATA) * pPool->numThreads);
|
||||
pPool->numaMask = 0;
|
||||
|
||||
// Allocate worker private data
|
||||
pPool->pWorkerPrivateDataArray = nullptr;
|
||||
if (pContext->workerPrivateState.perWorkerPrivateStateSize)
|
||||
{
|
||||
size_t perWorkerSize = AlignUpPow2(pContext->workerPrivateState.perWorkerPrivateStateSize, 64);
|
||||
size_t totalSize = perWorkerSize * pPool->numThreads;
|
||||
if (totalSize)
|
||||
{
|
||||
pPool->pWorkerPrivateDataArray = AlignedMalloc(totalSize, 64);
|
||||
SWR_ASSERT(pPool->pWorkerPrivateDataArray);
|
||||
|
||||
void* pWorkerData = pPool->pWorkerPrivateDataArray;
|
||||
for (uint32_t i = 0; i < pPool->numThreads; ++i)
|
||||
{
|
||||
pPool->pThreadData[i].pWorkerPrivateData = pWorkerData;
|
||||
if (pContext->workerPrivateState.pfnInitWorkerData)
|
||||
{
|
||||
pContext->workerPrivateState.pfnInitWorkerData(pWorkerData, i);
|
||||
}
|
||||
pWorkerData = PtrAdd(pWorkerData, perWorkerSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pContext->threadInfo.SINGLE_THREADED)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
pPool->pThreads = new (std::nothrow) THREAD_PTR[pPool->numThreads];
|
||||
SWR_ASSERT(pPool->pThreads);
|
||||
@@ -1293,13 +1328,13 @@ void StartThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
|
||||
/// @param pPool - pointer to thread pool object.
|
||||
void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
|
||||
{
|
||||
if (!pContext->threadInfo.SINGLE_THREADED)
|
||||
{
|
||||
// Wait for all threads to finish
|
||||
SwrWaitForIdle(pContext);
|
||||
// Wait for all threads to finish
|
||||
SwrWaitForIdle(pContext);
|
||||
|
||||
// Wait for threads to finish and destroy them
|
||||
for (uint32_t t = 0; t < pPool->numThreads; ++t)
|
||||
// Wait for threads to finish and destroy them
|
||||
for (uint32_t t = 0; t < pPool->numThreads; ++t)
|
||||
{
|
||||
if (!pContext->threadInfo.SINGLE_THREADED)
|
||||
{
|
||||
// Detach from thread. Cannot join() due to possibility (in Windows) of code
|
||||
// in some DLLMain(THREAD_DETATCH case) blocking the thread until after this returns.
|
||||
@@ -1307,10 +1342,17 @@ void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
|
||||
delete(pPool->pThreads[t]);
|
||||
}
|
||||
|
||||
delete[] pPool->pThreads;
|
||||
|
||||
// Clean up data used by threads
|
||||
delete[] pPool->pThreadData;
|
||||
delete[] pPool->pApiThreadData;
|
||||
if (pContext->workerPrivateState.pfnFinishWorkerData)
|
||||
{
|
||||
pContext->workerPrivateState.pfnFinishWorkerData(pPool->pThreadData[t].pWorkerPrivateData, t);
|
||||
}
|
||||
}
|
||||
|
||||
delete[] pPool->pThreads;
|
||||
|
||||
// Clean up data used by threads
|
||||
delete[] pPool->pThreadData;
|
||||
delete[] pPool->pApiThreadData;
|
||||
|
||||
AlignedFree(pPool->pWorkerPrivateDataArray);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -35,9 +35,11 @@ typedef std::thread* THREAD_PTR;
|
||||
|
||||
struct SWR_CONTEXT;
|
||||
struct DRAW_CONTEXT;
|
||||
struct SWR_WORKER_PRIVATE_STATE;
|
||||
|
||||
struct THREAD_DATA
|
||||
{
|
||||
void* pWorkerPrivateData;// Pointer to per-worker private data
|
||||
uint32_t procGroupId; // Will always be 0 for non-Windows OS
|
||||
uint32_t threadId; // within the procGroup for Windows
|
||||
uint32_t numaId; // NUMA node id
|
||||
@@ -55,6 +57,7 @@ struct THREAD_POOL
|
||||
uint32_t numThreads;
|
||||
uint32_t numaMask;
|
||||
THREAD_DATA *pThreadData;
|
||||
void* pWorkerPrivateDataArray; // All memory for worker private data
|
||||
uint32_t numReservedThreads; // Number of threads reserved for API use
|
||||
THREAD_DATA *pApiThreadData;
|
||||
};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -83,7 +83,7 @@ void MacroTileMgr::markTileComplete(uint32_t id)
|
||||
tile.mWorkItemsBE = 0;
|
||||
}
|
||||
|
||||
HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples,
|
||||
HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, HANDLE hWorkerPrivateData, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples,
|
||||
uint32_t renderTargetArrayIndex)
|
||||
{
|
||||
uint32_t x, y;
|
||||
@@ -163,11 +163,11 @@ HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32
|
||||
|
||||
if (hotTile.state == HOTTILE_DIRTY)
|
||||
{
|
||||
pContext->pfnStoreTile(GetPrivateState(pDC), format, attachment,
|
||||
pContext->pfnStoreTile(GetPrivateState(pDC), hWorkerPrivateData, format, attachment,
|
||||
x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, hotTile.renderTargetArrayIndex, hotTile.pBuffer);
|
||||
}
|
||||
|
||||
pContext->pfnLoadTile(GetPrivateState(pDC), format, attachment,
|
||||
pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, format, attachment,
|
||||
x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, renderTargetArrayIndex, hotTile.pBuffer);
|
||||
|
||||
hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
|
||||
@@ -379,6 +379,7 @@ void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile)
|
||||
void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID)
|
||||
{
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
HANDLE hWorkerPrivateData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
|
||||
|
||||
uint32_t x, y;
|
||||
MacroTileMgr::getTileIndices(macroID, x, y);
|
||||
@@ -392,13 +393,13 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui
|
||||
uint32_t colorHottileEnableMask = state.colorHottileEnable;
|
||||
while (_BitScanForward(&rtSlot, colorHottileEnableMask))
|
||||
{
|
||||
HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true, numSamples);
|
||||
HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true, numSamples);
|
||||
|
||||
if (pHotTile->state == HOTTILE_INVALID)
|
||||
{
|
||||
RDTSC_BEGIN(BELoadTiles, pDC->drawId);
|
||||
// invalid hottile before draw requires a load from surface before we can draw to it
|
||||
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
|
||||
pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
|
||||
pHotTile->state = HOTTILE_DIRTY;
|
||||
RDTSC_END(BELoadTiles, 0);
|
||||
}
|
||||
@@ -416,12 +417,12 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui
|
||||
// check depth if enabled
|
||||
if (state.depthHottileEnable)
|
||||
{
|
||||
HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
|
||||
HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
|
||||
if (pHotTile->state == HOTTILE_INVALID)
|
||||
{
|
||||
RDTSC_BEGIN(BELoadTiles, pDC->drawId);
|
||||
// invalid hottile before draw requires a load from surface before we can draw to it
|
||||
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
|
||||
pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
|
||||
pHotTile->state = HOTTILE_DIRTY;
|
||||
RDTSC_END(BELoadTiles, 0);
|
||||
}
|
||||
@@ -438,12 +439,12 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui
|
||||
// check stencil if enabled
|
||||
if (state.stencilHottileEnable)
|
||||
{
|
||||
HOTTILE* pHotTile = GetHotTile(pContext, pDC, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
|
||||
HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
|
||||
if (pHotTile->state == HOTTILE_INVALID)
|
||||
{
|
||||
RDTSC_BEGIN(BELoadTiles, pDC->drawId);
|
||||
// invalid hottile before draw requires a load from surface before we can draw to it
|
||||
pContext->pfnLoadTile(GetPrivateState(pDC), KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
|
||||
pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
|
||||
pHotTile->state = HOTTILE_DIRTY;
|
||||
RDTSC_END(BELoadTiles, 0);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -305,7 +305,7 @@ public:
|
||||
|
||||
void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID);
|
||||
|
||||
HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1,
|
||||
HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, HANDLE hWorkerData, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1,
|
||||
uint32_t renderTargetArrayIndex = 0);
|
||||
|
||||
HOTTILE *GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -19,13 +19,13 @@
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
*
|
||||
* @file JitManager.cpp
|
||||
*
|
||||
*
|
||||
* @brief Implementation if the Jit Manager.
|
||||
*
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
*
|
||||
******************************************************************************/
|
||||
#include "jit_pch.hpp"
|
||||
|
||||
@@ -66,7 +66,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
|
||||
InitializeNativeTargetAsmPrinter();
|
||||
InitializeNativeTargetDisassembler();
|
||||
|
||||
|
||||
|
||||
TargetOptions tOpts;
|
||||
tOpts.AllowFPOpFusion = FPOpFusion::Fast;
|
||||
tOpts.NoInfsFPMath = false;
|
||||
@@ -125,6 +125,8 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
|
||||
// llvm5 is picky and does not take a void * type
|
||||
fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0));
|
||||
|
||||
fsArgs.push_back(Type::getInt8PtrTy(mContext));
|
||||
|
||||
fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0));
|
||||
#if USE_SIMD16_SHADERS
|
||||
fsArgs.push_back(PointerType::get(Gen_simd16vertex(this), 0));
|
||||
@@ -158,7 +160,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
|
||||
void JitManager::SetupNewModule()
|
||||
{
|
||||
SWR_ASSERT(mIsModuleFinalized == true && "Current module is not finalized!");
|
||||
|
||||
|
||||
std::unique_ptr<Module> newModule(new Module("", mContext));
|
||||
mpCurrentModule = newModule.get();
|
||||
#if defined(_WIN32)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -91,6 +91,7 @@ struct FetchJit : public BuilderGfxMem
|
||||
void CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4]);
|
||||
void ConvertFormat(SWR_FORMAT format, Value *texels[4]);
|
||||
|
||||
Value* mpWorkerData;
|
||||
Value* mpFetchInfo;
|
||||
};
|
||||
|
||||
@@ -113,6 +114,8 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
|
||||
privateContext->setName("privateContext");
|
||||
SetPrivateContext(privateContext);
|
||||
|
||||
mpWorkerData = &*argitr; ++argitr;
|
||||
mpWorkerData->setName("pWorkerData");
|
||||
mpFetchInfo = &*argitr; ++argitr;
|
||||
mpFetchInfo->setName("fetchInfo");
|
||||
Value* pVtxOut = &*argitr;
|
||||
@@ -1097,8 +1100,7 @@ Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex)
|
||||
Value* vIndexMask = ICMP_SGT(vMaxIndex, vIndexOffsets);
|
||||
|
||||
// Load the indices; OOB loads 0
|
||||
pIndices = BITCAST(pIndices, PointerType::get(mSimdInt32Ty, 0));
|
||||
return MASKED_LOAD(pIndices, 4, vIndexMask, VIMMED1(0));
|
||||
return MASKED_LOAD(pIndices, 4, vIndexMask, VIMMED1(0), "vIndices", PointerType::get(mSimdInt32Ty, 0), GFX_MEM_CLIENT_FETCH);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -153,6 +153,7 @@ struct StoreMacroTileClear
|
||||
/// @param x, y - Coordinates to raster tile.
|
||||
/// @param pClearColor - Pointer to clear color
|
||||
void SwrStoreHotTileClear(
|
||||
HANDLE hWorkerPrivateData,
|
||||
SWR_SURFACE_STATE *pDstSurface,
|
||||
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
|
||||
UINT x,
|
||||
|
||||
@@ -54,6 +54,7 @@ static std::mutex sBucketMutex;
|
||||
/// @param x, y - Coordinates to raster tile.
|
||||
/// @param pDstHotTile - Pointer to Hot Tile
|
||||
void SwrLoadHotTile(
|
||||
HANDLE hWorkerPrivateData,
|
||||
const SWR_SURFACE_STATE *pSrcSurface,
|
||||
SWR_FORMAT dstFormat,
|
||||
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
|
||||
|
||||
@@ -59,6 +59,7 @@ static std::vector<int32_t> sBuckets(NUM_SWR_FORMATS, -1);
|
||||
/// @param x, y - Coordinates to raster tile.
|
||||
/// @param pSrcHotTile - Pointer to Hot Tile
|
||||
void SwrStoreHotTileToSurface(
|
||||
HANDLE hWorkerPrivateData,
|
||||
SWR_SURFACE_STATE *pDstSurface,
|
||||
SWR_FORMAT srcFormat,
|
||||
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
|
||||
INLINE void
|
||||
swr_LoadHotTile(HANDLE hPrivateContext,
|
||||
HANDLE hWorkerPrivateData,
|
||||
SWR_FORMAT dstFormat,
|
||||
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
|
||||
UINT x, UINT y,
|
||||
@@ -34,11 +35,12 @@ swr_LoadHotTile(HANDLE hPrivateContext,
|
||||
swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
|
||||
SWR_SURFACE_STATE *pSrcSurface = &pDC->renderTargets[renderTargetIndex];
|
||||
|
||||
pDC->pAPI->pfnSwrLoadHotTile(pSrcSurface, dstFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pDstHotTile);
|
||||
pDC->pAPI->pfnSwrLoadHotTile(hWorkerPrivateData, pSrcSurface, dstFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pDstHotTile);
|
||||
}
|
||||
|
||||
INLINE void
|
||||
swr_StoreHotTile(HANDLE hPrivateContext,
|
||||
HANDLE hWorkerPrivateData,
|
||||
SWR_FORMAT srcFormat,
|
||||
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
|
||||
UINT x, UINT y,
|
||||
@@ -48,11 +50,12 @@ swr_StoreHotTile(HANDLE hPrivateContext,
|
||||
swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
|
||||
SWR_SURFACE_STATE *pDstSurface = &pDC->renderTargets[renderTargetIndex];
|
||||
|
||||
pDC->pAPI->pfnSwrStoreHotTileToSurface(pDstSurface, srcFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pSrcHotTile);
|
||||
pDC->pAPI->pfnSwrStoreHotTileToSurface(hWorkerPrivateData, pDstSurface, srcFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pSrcHotTile);
|
||||
}
|
||||
|
||||
INLINE void
|
||||
swr_StoreHotTileClear(HANDLE hPrivateContext,
|
||||
HANDLE hWorkerPrivateData,
|
||||
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
|
||||
UINT x,
|
||||
UINT y,
|
||||
@@ -63,5 +66,5 @@ swr_StoreHotTileClear(HANDLE hPrivateContext,
|
||||
swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
|
||||
SWR_SURFACE_STATE *pDstSurface = &pDC->renderTargets[renderTargetIndex];
|
||||
|
||||
pDC->pAPI->pfnSwrStoreHotTileClear(pDstSurface, renderTargetIndex, x, y, renderTargetArrayIndex, pClearColor);
|
||||
pDC->pAPI->pfnSwrStoreHotTileClear(hWorkerPrivateData, pDstSurface, renderTargetIndex, x, y, renderTargetArrayIndex, pClearColor);
|
||||
}
|
||||
|
||||
@@ -586,6 +586,7 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
|
||||
attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
|
||||
|
||||
std::vector<Type *> gsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
|
||||
PointerType::get(mInt8Ty, 0),
|
||||
PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
|
||||
FunctionType *vsFuncType =
|
||||
FunctionType::get(Type::getVoidTy(JM()->mContext), gsArgs, false);
|
||||
@@ -610,6 +611,8 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
|
||||
auto argitr = pFunction->arg_begin();
|
||||
Value *hPrivateData = &*argitr++;
|
||||
hPrivateData->setName("hPrivateData");
|
||||
Value *pWorkerData = &*argitr++;
|
||||
pWorkerData->setName("pWorkerData");
|
||||
Value *pGsCtx = &*argitr++;
|
||||
pGsCtx->setName("gsCtx");
|
||||
|
||||
@@ -754,6 +757,7 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
|
||||
attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
|
||||
|
||||
std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
|
||||
PointerType::get(mInt8Ty, 0),
|
||||
PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
|
||||
FunctionType *vsFuncType =
|
||||
FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false);
|
||||
@@ -778,6 +782,8 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
|
||||
auto argitr = pFunction->arg_begin();
|
||||
Value *hPrivateData = &*argitr++;
|
||||
hPrivateData->setName("hPrivateData");
|
||||
Value *pWorkerData = &*argitr++;
|
||||
pWorkerData->setName("pWorkerData");
|
||||
Value *pVsCtx = &*argitr++;
|
||||
pVsCtx->setName("vsCtx");
|
||||
|
||||
@@ -1037,6 +1043,7 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key)
|
||||
attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
|
||||
|
||||
std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
|
||||
PointerType::get(mInt8Ty, 0),
|
||||
PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
|
||||
FunctionType *funcType =
|
||||
FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false);
|
||||
@@ -1060,6 +1067,8 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key)
|
||||
auto args = pFunction->arg_begin();
|
||||
Value *hPrivateData = &*args++;
|
||||
hPrivateData->setName("hPrivateData");
|
||||
Value *pWorkerData = &*args++;
|
||||
pWorkerData->setName("pWorkerData");
|
||||
Value *pPS = &*args++;
|
||||
pPS->setName("psCtx");
|
||||
|
||||
|
||||
Reference in New Issue
Block a user