swr: [rasterizer core] conservative rast backend changes
Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
This commit is contained in:
@@ -760,8 +760,8 @@ void SetupMacroTileScissors(DRAW_CONTEXT *pDC)
|
||||
// templated backend function tables
|
||||
extern PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_MAX];
|
||||
extern PFN_BACKEND_FUNC gBackendSingleSample[2][2][2];
|
||||
extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_MSAA_SAMPLE_PATTERN_MAX][SWR_INPUT_COVERAGE_MAX][2][2][2];
|
||||
extern PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_INPUT_COVERAGE_MAX][2][2];
|
||||
extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_MSAA_SAMPLE_PATTERN_MAX][2][2][2][2];
|
||||
extern PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_MAX][2][2][2];
|
||||
void SetupPipeline(DRAW_CONTEXT *pDC)
|
||||
{
|
||||
DRAW_STATE* pState = pDC->pState;
|
||||
@@ -780,7 +780,7 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
|
||||
const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || rastState.forcedSampleCount) ? 1 : 0;
|
||||
const uint32_t centroid = ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0;
|
||||
const uint32_t canEarlyZ = (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesSourceDepth && !psState.usesUAV)) ? 1 : 0;
|
||||
const uint32_t inputCoverage = (psState.inputCoverage != SWR_INPUT_COVERAGE_NONE);
|
||||
const uint32_t inputCoverage = (psState.inputCoverage != SWR_INPUT_COVERAGE_NONE) ? 1 : 0;
|
||||
|
||||
SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask;
|
||||
|
||||
|
||||
@@ -1154,12 +1154,13 @@ PFN_BACKEND_FUNC gBackendSingleSample[2] // input coverage
|
||||
= {};
|
||||
PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_MAX]
|
||||
[SWR_MSAA_SAMPLE_PATTERN_MAX]
|
||||
[SWR_INPUT_COVERAGE_MAX]
|
||||
[2] // input coverage
|
||||
[2] // centroid
|
||||
[2] // forcedSampleCount
|
||||
[2] // canEarlyZ
|
||||
= {};
|
||||
PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_MAX][SWR_INPUT_COVERAGE_MAX]
|
||||
PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_MAX]
|
||||
[2] // input coverage
|
||||
[2] // centroid
|
||||
[2] // canEarlyZ
|
||||
= {};
|
||||
@@ -1232,28 +1233,27 @@ struct BEChooser
|
||||
|
||||
void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[2][2][2])
|
||||
{
|
||||
for(uint32_t inputCoverage = SWR_INPUT_COVERAGE_NONE; inputCoverage < SWR_INPUT_COVERAGE_MAX; inputCoverage++)
|
||||
for(uint32_t inputCoverage = 0; inputCoverage < 2; inputCoverage++)
|
||||
{
|
||||
for(uint32_t isCentroid = 0; isCentroid < 2; isCentroid++)
|
||||
{
|
||||
for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
|
||||
{
|
||||
table[inputCoverage][isCentroid][canEarlyZ] =
|
||||
BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL),
|
||||
BEChooser<>::GetFunc(SWR_MULTISAMPLE_1X, SWR_MSAA_STANDARD_PATTERN, (inputCoverage > 0),
|
||||
(isCentroid > 0), false, (canEarlyZ > 0), SWR_BACKEND_SINGLE_SAMPLE);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InitBackendPixelFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_MAX][SWR_MSAA_SAMPLE_PATTERN_MAX][SWR_INPUT_COVERAGE_MAX]
|
||||
[2][2][2])
|
||||
void InitBackendPixelFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_MAX][SWR_MSAA_SAMPLE_PATTERN_MAX][2][2][2][2])
|
||||
{
|
||||
for(uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < SWR_MULTISAMPLE_TYPE_MAX; sampleCount++)
|
||||
{
|
||||
for(uint32_t samplePattern = SWR_MSAA_CENTER_PATTERN; samplePattern < SWR_MSAA_SAMPLE_PATTERN_MAX; samplePattern++)
|
||||
{
|
||||
for(uint32_t inputCoverage = SWR_INPUT_COVERAGE_NONE; inputCoverage < SWR_INPUT_COVERAGE_MAX; inputCoverage++)
|
||||
for(uint32_t inputCoverage = 0; inputCoverage < 2; inputCoverage++)
|
||||
{
|
||||
for(uint32_t isCentroid = 0; isCentroid < 2; isCentroid++)
|
||||
{
|
||||
@@ -1262,7 +1262,7 @@ void InitBackendPixelFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_MA
|
||||
for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
|
||||
{
|
||||
table[sampleCount][samplePattern][inputCoverage][isCentroid][forcedSampleCount][canEarlyZ] =
|
||||
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, (SWR_MSAA_SAMPLE_PATTERN)samplePattern, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL),
|
||||
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, (SWR_MSAA_SAMPLE_PATTERN)samplePattern, (inputCoverage > 0),
|
||||
(isCentroid > 0), (forcedSampleCount > 0), (canEarlyZ > 0), SWR_BACKEND_MSAA_PIXEL_RATE);
|
||||
}
|
||||
}
|
||||
@@ -1272,18 +1272,18 @@ void InitBackendPixelFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_MA
|
||||
}
|
||||
}
|
||||
|
||||
void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_MAX][SWR_INPUT_COVERAGE_MAX][2][2])
|
||||
void InitBackendSampleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_MAX][2][2][2])
|
||||
{
|
||||
for(uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < SWR_MULTISAMPLE_TYPE_MAX; sampleCount++)
|
||||
{
|
||||
for(uint32_t inputCoverage = SWR_INPUT_COVERAGE_NONE; inputCoverage < SWR_INPUT_COVERAGE_MAX; inputCoverage++)
|
||||
for(uint32_t inputCoverage = 0; inputCoverage < 2; inputCoverage++)
|
||||
{
|
||||
for(uint32_t centroid = 0; centroid < 2; centroid++)
|
||||
{
|
||||
for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
|
||||
{
|
||||
table[sampleCount][inputCoverage][centroid][canEarlyZ] =
|
||||
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, (inputCoverage == SWR_INPUT_COVERAGE_NORMAL),
|
||||
BEChooser<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, SWR_MSAA_STANDARD_PATTERN, (inputCoverage > 0),
|
||||
(centroid > 0), false, (canEarlyZ > 0), (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,7 +31,8 @@ enum FixedPointFmt
|
||||
{
|
||||
FP_UNINIT,
|
||||
_16_8,
|
||||
_16_9
|
||||
_16_9,
|
||||
_X_16,
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@@ -39,6 +40,7 @@ enum FixedPointFmt
|
||||
typedef std::integral_constant<uint32_t, FP_UNINIT> Fixed_Uninit;
|
||||
typedef std::integral_constant<uint32_t, _16_8> Fixed_16_8;
|
||||
typedef std::integral_constant<uint32_t, _16_9> Fixed_16_9;
|
||||
typedef std::integral_constant<uint32_t, _X_16> Fixed_X_16;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @struct FixedPointTraits
|
||||
@@ -53,9 +55,9 @@ template<>
|
||||
struct FixedPointTraits<Fixed_16_8>
|
||||
{
|
||||
/// multiplier to go from FP32 to Fixed Point 16.8
|
||||
typedef std::integral_constant<uint32_t, 256> FixedPointScaleT;
|
||||
typedef std::integral_constant<uint32_t, 256> ScaleT;
|
||||
/// number of bits to shift to go from 16.8 fixed => int32
|
||||
typedef std::integral_constant<uint32_t, 8> FixedPointShiftT;
|
||||
typedef std::integral_constant<uint32_t, 8> BitsT;
|
||||
typedef Fixed_16_8 TypeT;
|
||||
};
|
||||
|
||||
@@ -65,12 +67,24 @@ template<>
|
||||
struct FixedPointTraits<Fixed_16_9>
|
||||
{
|
||||
/// multiplier to go from FP32 to Fixed Point 16.9
|
||||
typedef std::integral_constant<uint32_t, 512> FixedPointScaleT;
|
||||
typedef std::integral_constant<uint32_t, 512> ScaleT;
|
||||
/// number of bits to shift to go from 16.9 fixed => int32
|
||||
typedef std::integral_constant<uint32_t, 9> FixedPointShiftT;
|
||||
typedef std::integral_constant<uint32_t, 9> BitsT;
|
||||
typedef Fixed_16_9 TypeT;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Fixed_16_9 specialization of FixedPointTraits
|
||||
template<>
|
||||
struct FixedPointTraits<Fixed_X_16>
|
||||
{
|
||||
/// multiplier to go from FP32 to Fixed Point X.16
|
||||
typedef std::integral_constant<uint32_t, 65536> ScaleT;
|
||||
/// number of bits to shift to go from X.16 fixed => int32
|
||||
typedef std::integral_constant<uint32_t, 16> BitsT;
|
||||
typedef Fixed_X_16 TypeT;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief convenience typedefs for conservative rasterization modes
|
||||
typedef std::false_type StandardRastT;
|
||||
@@ -118,3 +132,90 @@ struct ConservativeRastFETraits<ConservativeRastT>
|
||||
/// @brief convenience typedefs for ConservativeRastFETraits
|
||||
typedef ConservativeRastFETraits<StandardRastT> FEStandardRastT;
|
||||
typedef ConservativeRastFETraits<ConservativeRastT> FEConservativeRastT;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @struct ConservativeRastBETraits
|
||||
/// @brief primary ConservativeRastBETraits template. Shouldn't be instantiated;
|
||||
/// default to standard rasterization behavior
|
||||
/// @tparam ConservativeT: type of conservative rasterization
|
||||
/// @tparam InputCoverageT: type of input coverage requested, if any
|
||||
template <typename ConservativeT, typename InputCoverageT>
|
||||
struct ConservativeRastBETraits {
|
||||
typedef std::false_type IsConservativeT;
|
||||
typedef FixedPointTraits<Fixed_16_8> ConservativePrecisionT;
|
||||
typedef std::integral_constant<int32_t, 0> ConservativeEdgeOffsetT;
|
||||
typedef std::integral_constant<int32_t, 0> InnerConservativeEdgeOffsetT;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief StandardRastT specialization of ConservativeRastBETraits
|
||||
template <typename InputCoverageT>
|
||||
struct ConservativeRastBETraits<StandardRastT, InputCoverageT>
|
||||
{
|
||||
typedef std::false_type IsConservativeT;
|
||||
typedef FixedPointTraits<Fixed_16_8> ConservativePrecisionT;
|
||||
typedef std::integral_constant<int32_t, 0> ConservativeEdgeOffsetT;
|
||||
typedef std::integral_constant<int32_t, 0> InnerConservativeEdgeOffsetT;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief ConservativeRastT specialization of ConservativeRastBETraits
|
||||
/// with no input coverage
|
||||
template <>
|
||||
struct ConservativeRastBETraits<ConservativeRastT, NoInputCoverageT>
|
||||
{
|
||||
typedef std::true_type IsConservativeT;
|
||||
typedef NoInputCoverageT InputCoverageT;
|
||||
|
||||
typedef FixedPointTraits<Fixed_16_9> ConservativePrecisionT;
|
||||
|
||||
/// offset edge away from pixel center by 1/2 pixel + 1/512, in Fixed 16.9 precision
|
||||
/// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead of
|
||||
/// of having to compare individual edges to pixel corners to check if any part of the triangle
|
||||
/// intersects a pixel
|
||||
typedef std::integral_constant<int32_t, (ConservativePrecisionT::ScaleT::value/2) + 1> ConservativeEdgeOffsetT;
|
||||
typedef std::integral_constant<int32_t, 0> InnerConservativeEdgeOffsetT;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief ConservativeRastT specialization of ConservativeRastBETraits
|
||||
/// with OuterConservativeCoverage
|
||||
template <>
|
||||
struct ConservativeRastBETraits<ConservativeRastT, OuterConservativeCoverageT>
|
||||
{
|
||||
typedef std::true_type IsConservativeT;
|
||||
typedef OuterConservativeCoverageT InputCoverageT;
|
||||
|
||||
typedef FixedPointTraits<Fixed_16_9> ConservativePrecisionT;
|
||||
|
||||
/// offset edge away from pixel center by 1/2 pixel + 1/512, in Fixed 16.9 precision
|
||||
/// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead of
|
||||
/// of having to compare individual edges to pixel corners to check if any part of the triangle
|
||||
/// intersects a pixel
|
||||
typedef std::integral_constant<int32_t, (ConservativePrecisionT::ScaleT::value/2) + 1> ConservativeEdgeOffsetT;
|
||||
typedef std::integral_constant<int32_t, 0> InnerConservativeEdgeOffsetT;
|
||||
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief ConservativeRastT specialization of ConservativeRastBETraits
|
||||
/// with InnerConservativeCoverage
|
||||
template <>
|
||||
struct ConservativeRastBETraits<ConservativeRastT, InnerConservativeCoverageT>
|
||||
{
|
||||
typedef std::true_type IsConservativeT;
|
||||
typedef InnerConservativeCoverageT InputCoverageT;
|
||||
|
||||
typedef FixedPointTraits<Fixed_16_9> ConservativePrecisionT;
|
||||
|
||||
/// offset edge away from pixel center by 1/2 pixel + 1/512, in Fixed 16.9 precision
|
||||
/// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead of
|
||||
/// of having to compare individual edges to pixel corners to check if any part of the triangle
|
||||
/// intersects a pixel
|
||||
typedef std::integral_constant<int32_t, (ConservativePrecisionT::ScaleT::value/2) + 1> ConservativeEdgeOffsetT;
|
||||
|
||||
/// offset edge towards from pixel center by 1/2 pixel + 1/512, in Fixed 16.9 precision
|
||||
/// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead of
|
||||
/// of having to compare individual edges to pixel corners to check if a pixel is fully covered by a triangle
|
||||
typedef std::integral_constant<int32_t, static_cast<int32_t>(-((ConservativePrecisionT::ScaleT::value/2) + 1))> InnerConservativeEdgeOffsetT;
|
||||
};
|
||||
@@ -1596,7 +1596,7 @@ void ProcessUserClipDist(PA_STATE& pa, uint32_t primIndex, uint8_t clipDistMask,
|
||||
template <typename PT = FixedPointTraits<Fixed_16_8>>
|
||||
INLINE simdscalari fpToFixedPointVertical(const simdscalar vIn)
|
||||
{
|
||||
simdscalar vFixed = _simd_mul_ps(vIn, _simd_set1_ps(PT::FixedPointScaleT::value));
|
||||
simdscalar vFixed = _simd_mul_ps(vIn, _simd_set1_ps(PT::ScaleT::value));
|
||||
return _simd_cvtps_epi32(vFixed);
|
||||
}
|
||||
|
||||
@@ -1842,10 +1842,13 @@ void BinTriangles(
|
||||
/// Note: these variable initializations must stay above any 'goto endBenTriangles'
|
||||
// compute per tri backface
|
||||
uint32_t frontFaceMask = frontWindingTris;
|
||||
|
||||
uint32_t *pPrimID = (uint32_t *)&primID;
|
||||
DWORD triIndex = 0;
|
||||
|
||||
// for center sample pattern, all samples are at pixel center; calculate coverage
|
||||
// once at center and broadcast the results in the backend
|
||||
uint32_t sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X;
|
||||
PFN_WORK_FUNC pfnWork = GetRasterizerFunc(sampleCount, (rastState.conservativeRast > 0),
|
||||
pDC->pState->state.psState.inputCoverage, (rastState.scissorEnable > 0));
|
||||
if (!triMask)
|
||||
{
|
||||
goto endBinTriangles;
|
||||
@@ -1945,34 +1948,23 @@ void BinTriangles(
|
||||
_simd_store_si((simdscalari*)aRTAI, _simd_setzero_si());
|
||||
}
|
||||
|
||||
|
||||
// scan remaining valid triangles and bin each separately
|
||||
while (_BitScanForward(&triIndex, triMask))
|
||||
{
|
||||
uint32_t linkageCount = state.linkageCount;
|
||||
uint32_t linkageMask = state.linkageMask;
|
||||
uint32_t numScalarAttribs = linkageCount * 4;
|
||||
|
||||
|
||||
BE_WORK work;
|
||||
work.type = DRAW;
|
||||
work.pfnWork = pfnWork;
|
||||
|
||||
TRIANGLE_WORK_DESC &desc = work.desc.tri;
|
||||
|
||||
desc.triFlags.frontFacing = state.forceFront ? 1 : ((frontFaceMask >> triIndex) & 1);
|
||||
desc.triFlags.primID = pPrimID[triIndex];
|
||||
desc.triFlags.renderTargetArrayIndex = aRTAI[triIndex];
|
||||
|
||||
if(rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN)
|
||||
{
|
||||
work.pfnWork = gRasterizerTable[rastState.scissorEnable][rastState.sampleCount];
|
||||
}
|
||||
else
|
||||
{
|
||||
// for center sample pattern, all samples are at pixel center; calculate coverage
|
||||
// once at center and broadcast the results in the backend
|
||||
work.pfnWork = gRasterizerTable[rastState.scissorEnable][SWR_MULTISAMPLE_1X];
|
||||
}
|
||||
|
||||
|
||||
auto pArena = pDC->pArena;
|
||||
SWR_ASSERT(pArena != nullptr);
|
||||
|
||||
@@ -2028,7 +2020,7 @@ struct FEBinTrianglesChooser
|
||||
}
|
||||
};
|
||||
|
||||
// Selector for correct templated Draw front-end function
|
||||
// Selector for correct templated BinTrinagles function
|
||||
PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative)
|
||||
{
|
||||
return TemplateArgUnroller<FEBinTrianglesChooser>::GetFunc(IsConservative);
|
||||
|
||||
@@ -246,14 +246,6 @@ void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, BBOX &bbox)
|
||||
bbox.right = _mm_extract_epi32(vMaxX, 0);
|
||||
bbox.top = _mm_extract_epi32(vMinY, 0);
|
||||
bbox.bottom = _mm_extract_epi32(vMaxY, 0);
|
||||
|
||||
#if 0
|
||||
Jacob: A = _mm_shuffle_ps(X, Y, 0 0 0 0)
|
||||
B = _mm_shuffle_ps(Z, W, 0 0 0 0)
|
||||
A = _mm_shuffle_epi32(A, 3 0 3 0)
|
||||
A = _mm_shuffle_ps(A, B, 1 0 1 0)
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
INLINE
|
||||
|
||||
@@ -30,7 +30,6 @@
|
||||
#include <algorithm>
|
||||
|
||||
#include "rasterizer.h"
|
||||
#include "multisample.h"
|
||||
#include "rdtsc_core.h"
|
||||
#include "backend.h"
|
||||
#include "utils.h"
|
||||
@@ -38,11 +37,12 @@
|
||||
#include "tilemgr.h"
|
||||
#include "memory/tilingtraits.h"
|
||||
|
||||
void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t x, uint32_t y, RenderOutputBuffers &renderBuffers,
|
||||
uint32_t numSamples, uint32_t renderTargetArrayIndex);
|
||||
void StepRasterTileX(uint32_t MaxRT, RenderOutputBuffers &buffers, uint32_t colorTileStep, uint32_t depthTileStep, uint32_t stencilTileStep);
|
||||
void StepRasterTileY(uint32_t MaxRT, RenderOutputBuffers &buffers, RenderOutputBuffers &startBufferRow,
|
||||
uint32_t colorRowStep, uint32_t depthRowStep, uint32_t stencilRowStep);
|
||||
template <uint32_t numSamples = 1>
|
||||
void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t x, uint32_t y, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex);
|
||||
template <typename RT>
|
||||
void StepRasterTileX(uint32_t MaxRT, RenderOutputBuffers &buffers);
|
||||
template <typename RT>
|
||||
void StepRasterTileY(uint32_t MaxRT, RenderOutputBuffers &buffers, RenderOutputBuffers &startBufferRow);
|
||||
|
||||
#define MASKTOVEC(i3,i2,i1,i0) {-i0,-i1,-i2,-i3}
|
||||
const __m256d gMaskToVecpd[] =
|
||||
@@ -254,7 +254,7 @@ INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT *pDC, double startEdges[NumEdg
|
||||
// Top left: a sample is in if it is a top or left edge.
|
||||
// Out: !(horizontal && above) = !horizontal && below
|
||||
// Out: !horizontal && left = !(!horizontal && left) = horizontal and right
|
||||
INLINE __m256d adjustTopLeftRuleIntFix16(const __m128i vA, const __m128i vB, const __m256d vEdge)
|
||||
INLINE void adjustTopLeftRuleIntFix16(const __m128i vA, const __m128i vB, __m256d &vEdge)
|
||||
{
|
||||
// if vA < 0, vC--
|
||||
// if vA == 0 && vB < 0, vC--
|
||||
@@ -271,8 +271,110 @@ INLINE __m256d adjustTopLeftRuleIntFix16(const __m128i vA, const __m128i vB, con
|
||||
msk2 &= _mm_movemask_ps(_mm_castsi128_ps(vB));
|
||||
|
||||
// if either of these are true and we're on the line (edge == 0), bump it outside the line
|
||||
vEdgeOut = _mm256_blendv_pd(vEdgeOut, vEdgeAdjust, gMaskToVecpd[msk | msk2]);
|
||||
return vEdgeOut;
|
||||
vEdge = _mm256_blendv_pd(vEdgeOut, vEdgeAdjust, gMaskToVecpd[msk | msk2]);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @struct adjustEdgeConservative
|
||||
/// @brief Primary template definition used for partially specializing
|
||||
/// the adjustEdgeConservative function. This struct should never
|
||||
/// be instantiated.
|
||||
/// @tparam RT: rasterizer traits
|
||||
/// @tparam IsConservativeT: is conservative rast enabled?
|
||||
template <typename RT, typename IsConservativeT>
|
||||
struct adjustEdgeConservative
|
||||
{
|
||||
INLINE adjustEdgeConservative(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge) = delete;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief adjustEdgeConservative<RT, std::true_type> specialization
|
||||
/// of adjustEdgeConservative. Used for conservative rasterization specific
|
||||
/// edge adjustments
|
||||
template <typename RT>
|
||||
struct adjustEdgeConservative<RT, std::true_type>
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs calculations to adjust each edge of a triangle away
|
||||
/// from the pixel center by 1/2 pixel + uncertainty region in both the x and y
|
||||
/// direction.
|
||||
///
|
||||
/// Uncertainty regions arise from fixed point rounding, which
|
||||
/// can snap a vertex +/- by min fixed point value.
|
||||
/// Adding 1/2 pixel in x/y bumps the edge equation tests out towards the pixel corners.
|
||||
/// This allows the rasterizer to test for coverage only at the pixel center,
|
||||
/// instead of having to test individual pixel corners for conservative coverage
|
||||
INLINE adjustEdgeConservative(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge)
|
||||
{
|
||||
/// Assumes CCW winding order. Subtracting from the evaluated edge equation moves the edge away
|
||||
/// from the pixel center (in the direction of the edge normal A/B)
|
||||
|
||||
/// edge = Ax + Bx + C - (manh/e)
|
||||
/// manh = manhattan distance = abs(A) + abs(B)
|
||||
/// e = absolute rounding error from snapping from float to fixed point precision
|
||||
|
||||
/// 'fixed point' multiply (in double to be avx1 friendly)
|
||||
/// need doubles to hold result of a fixed multiply: 16.8 * 16.9 = 32.17, for example
|
||||
__m256d vAai = _mm256_cvtepi32_pd(_mm_abs_epi32(vAi)), vBai = _mm256_cvtepi32_pd(_mm_abs_epi32(vBi));
|
||||
__m256d manh = _mm256_add_pd(_mm256_mul_pd(vAai, _mm256_set1_pd(RT::ConservativeEdgeOffsetT::value)),
|
||||
_mm256_mul_pd(vBai, _mm256_set1_pd(RT::ConservativeEdgeOffsetT::value)));
|
||||
|
||||
static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value,
|
||||
"Inadequate precision of result of manh calculation ");
|
||||
|
||||
/// rasterizer incoming edge precision is x.16, so we need to get our edge offset into the same precision
|
||||
/// since we're doing fixed math in double format, multiply by multiples of 1/2 instead of a bit shift right
|
||||
manh = _mm256_mul_pd(manh, _mm256_set1_pd(((RT::PrecisionT::BitsT::value +
|
||||
RT::ConservativePrecisionT::BitsT::value) -
|
||||
RT::EdgePrecisionT::BitsT::value) * 0.5));
|
||||
|
||||
/// move the edge away from the pixel center by the required conservative precision + 1/2 pixel
|
||||
/// this allows the rasterizer to do a single conservative coverage test to see if the primitive
|
||||
/// intersects the pixel at all
|
||||
vEdge = _mm256_sub_pd(vEdge, manh);
|
||||
};
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief adjustEdgeConservative<RT, std::false_type> specialization
|
||||
/// of adjustEdgeConservative. Allows code to be generically called; when
|
||||
/// IsConservativeT trait is disabled this inlines an empty function, which
|
||||
/// should get optimized out.
|
||||
template <typename RT>
|
||||
struct adjustEdgeConservative<RT, std::false_type>
|
||||
{
|
||||
INLINE adjustEdgeConservative(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge){};
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Performs calculations to adjust each a scalar edge out
|
||||
/// from the pixel center by 1/2 pixel + uncertainty region in both the x and y
|
||||
/// direction.
|
||||
template <typename RT>
|
||||
INLINE void adjustScissorEdge(const double a, const double b, __m256d &vEdge)
|
||||
{
|
||||
int32_t aabs = std::abs(static_cast<int32_t>(a)), babs = std::abs(static_cast<int32_t>(b));
|
||||
|
||||
int64_t manh = ((aabs * RT::ConservativeEdgeOffsetT::value) + (babs * RT::ConservativeEdgeOffsetT::value)) >>
|
||||
((RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value) - RT::EdgePrecisionT::BitsT::value);
|
||||
|
||||
static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value,
|
||||
"Inadequate precision of result of manh calculation ");
|
||||
|
||||
vEdge = _mm256_sub_pd(vEdge, _mm256_set1_pd(manh));
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Perform any needed adjustments to evaluated triangle edges
|
||||
template <typename RT>
|
||||
INLINE void adjustEdgesFix16(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge)
|
||||
{
|
||||
static_assert(std::is_same<typename RT::EdgePrecisionT, FixedPointTraits<Fixed_X_16>>::value,
|
||||
"Edge equation expected to be in x.16 fixed point");
|
||||
/// need to offset the edge before applying the top-left rule
|
||||
adjustEdgeConservative<RT, typename RT::IsConservativeT>(vAi, vBi, vEdge);
|
||||
|
||||
adjustTopLeftRuleIntFix16(vAi, vBi, vEdge);
|
||||
}
|
||||
|
||||
// max(abs(dz/dx), abs(dz,dy)
|
||||
@@ -409,7 +511,128 @@ void ComputeEdgeData(const POS& p0, const POS& p1, EDGE& edge)
|
||||
ComputeEdgeData(p0.y - p1.y, p1.x - p0.x, edge);
|
||||
}
|
||||
|
||||
template<bool RasterizeScissorEdges, SWR_MULTISAMPLE_COUNT sampleCount>
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Primary template definition used for partially specializing
|
||||
/// the UpdateEdgeMasks function. Offset evaluated edges from UL pixel
|
||||
/// corner to sample position, and test for coverage
|
||||
/// @tparam sampleCount: multisample count
|
||||
template <uint32_t numEdges>
|
||||
INLINE void UpdateEdgeMasks(const __m256d (&vEdgeTileBbox)[3], const __m256d (&vEdgeFix16)[7],
|
||||
int32_t &mask0, int32_t &mask1, int32_t &mask2)
|
||||
{
|
||||
__m256d vSampleBboxTest0, vSampleBboxTest1, vSampleBboxTest2;
|
||||
// evaluate edge equations at the tile multisample bounding box
|
||||
vSampleBboxTest0 = _mm256_add_pd(vEdgeTileBbox[0], vEdgeFix16[0]);
|
||||
vSampleBboxTest1 = _mm256_add_pd(vEdgeTileBbox[1], vEdgeFix16[1]);
|
||||
vSampleBboxTest2 = _mm256_add_pd(vEdgeTileBbox[2], vEdgeFix16[2]);
|
||||
mask0 = _mm256_movemask_pd(vSampleBboxTest0);
|
||||
mask1 = _mm256_movemask_pd(vSampleBboxTest1);
|
||||
mask2 = _mm256_movemask_pd(vSampleBboxTest2);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief UpdateEdgeMasks<SWR_MULTISAMPLE_1X, numEdges> partial specialization,
|
||||
/// instantiated when MSAA is disabled.
|
||||
template <>
|
||||
INLINE void UpdateEdgeMasks<SWR_MULTISAMPLE_1X>(const __m256d(&)[3], const __m256d (&vEdgeFix16)[7],
|
||||
int32_t &mask0, int32_t &mask1, int32_t &mask2)
|
||||
{
|
||||
mask0 = _mm256_movemask_pd(vEdgeFix16[0]);
|
||||
mask1 = _mm256_movemask_pd(vEdgeFix16[1]);
|
||||
mask2 = _mm256_movemask_pd(vEdgeFix16[2]);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @struct ComputeScissorEdges
|
||||
/// @brief Primary template definition. Allows the function to be generically
|
||||
/// called. When paired with below specializations, will result in an empty
|
||||
/// inlined function if scissor is not enabled
|
||||
/// @tparam RasterScissorEdgesT: is scissor enabled?
|
||||
/// @tparam IsConservativeT: is conservative rast enabled?
|
||||
/// @tparam RT: rasterizer traits
|
||||
template <typename RasterScissorEdgesT, typename IsConservativeT, typename RT>
|
||||
struct ComputeScissorEdges
|
||||
{
|
||||
INLINE ComputeScissorEdges(const BBOX &triBBox, const BBOX &scissorBBox, const int32_t x, const int32_t y,
|
||||
EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7]){};
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief ComputeScissorEdges<std::true_type, std::true_type, RT> partial
|
||||
/// specialization. Instantiated when conservative rast and scissor are enabled
|
||||
template <typename RT>
|
||||
struct ComputeScissorEdges<std::true_type, std::true_type, RT>
|
||||
{
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Intersect tri bbox with scissor, compute scissor edge vectors,
|
||||
/// evaluate edge equations and offset them away from pixel center.
|
||||
INLINE ComputeScissorEdges(const BBOX &triBBox, const BBOX &scissorBBox, const int32_t x, const int32_t y,
|
||||
EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7])
|
||||
{
|
||||
/// if conservative rasterizing, triangle bbox intersected with scissor bbox is used
|
||||
BBOX scissor;
|
||||
scissor.left = std::max(triBBox.left, scissorBBox.left);
|
||||
scissor.right = std::min(triBBox.right, scissorBBox.right);
|
||||
scissor.top = std::max(triBBox.top, scissorBBox.top);
|
||||
scissor.bottom = std::min(triBBox.bottom, scissorBBox.bottom);
|
||||
|
||||
POS topLeft{scissor.left, scissor.top};
|
||||
POS bottomLeft{scissor.left, scissor.bottom};
|
||||
POS topRight{scissor.right, scissor.top};
|
||||
POS bottomRight{scissor.right, scissor.bottom};
|
||||
|
||||
// construct 4 scissor edges in ccw direction
|
||||
ComputeEdgeData(topLeft, bottomLeft, rastEdges[3]);
|
||||
ComputeEdgeData(bottomLeft, bottomRight, rastEdges[4]);
|
||||
ComputeEdgeData(bottomRight, topRight, rastEdges[5]);
|
||||
ComputeEdgeData(topRight, topLeft, rastEdges[6]);
|
||||
|
||||
vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.left)) + (rastEdges[3].b * (y - scissor.top)));
|
||||
vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.left)) + (rastEdges[4].b * (y - scissor.bottom)));
|
||||
vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.right)) + (rastEdges[5].b * (y - scissor.bottom)));
|
||||
vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.right)) + (rastEdges[6].b * (y - scissor.top)));
|
||||
|
||||
/// if conservative rasterizing, need to bump the scissor edges out by the conservative uncertainty distance, else do nothing
|
||||
adjustScissorEdge<RT>(rastEdges[3].a, rastEdges[3].b, vEdgeFix16[3]);
|
||||
adjustScissorEdge<RT>(rastEdges[4].a, rastEdges[4].b, vEdgeFix16[4]);
|
||||
adjustScissorEdge<RT>(rastEdges[5].a, rastEdges[5].b, vEdgeFix16[5]);
|
||||
adjustScissorEdge<RT>(rastEdges[6].a, rastEdges[6].b, vEdgeFix16[6]);
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief ComputeScissorEdges<std::true_type, std::false_type, RT> partial
|
||||
/// specialization. Instantiated when scissor is enabled and conservative rast
|
||||
/// is disabled.
|
||||
template <typename RT>
|
||||
struct ComputeScissorEdges<std::true_type, std::false_type, RT>
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Compute scissor edge vectors and evaluate edge equations
|
||||
INLINE ComputeScissorEdges(const BBOX &, const BBOX &scissorBBox, const int32_t x, const int32_t y,
|
||||
EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7])
|
||||
{
|
||||
const BBOX &scissor = scissorBBox;
|
||||
POS topLeft{scissor.left, scissor.top};
|
||||
POS bottomLeft{scissor.left, scissor.bottom};
|
||||
POS topRight{scissor.right, scissor.top};
|
||||
POS bottomRight{scissor.right, scissor.bottom};
|
||||
|
||||
// construct 4 scissor edges in ccw direction
|
||||
ComputeEdgeData(topLeft, bottomLeft, rastEdges[3]);
|
||||
ComputeEdgeData(bottomLeft, bottomRight, rastEdges[4]);
|
||||
ComputeEdgeData(bottomRight, topRight, rastEdges[5]);
|
||||
ComputeEdgeData(topRight, topLeft, rastEdges[6]);
|
||||
|
||||
vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.left)) + (rastEdges[3].b * (y - scissor.top)));
|
||||
vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.left)) + (rastEdges[4].b * (y - scissor.bottom)));
|
||||
vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.right)) + (rastEdges[5].b * (y - scissor.bottom)));
|
||||
vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.right)) + (rastEdges[6].b * (y - scissor.top)));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename RT>
|
||||
void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc)
|
||||
{
|
||||
const TRIANGLE_WORK_DESC &workDesc = *((TRIANGLE_WORK_DESC*)pDesc);
|
||||
@@ -439,6 +662,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
vRecipW = _mm_load_ps(workDesc.pTriBuffer + 12);
|
||||
|
||||
// convert to fixed point
|
||||
static_assert(std::is_same<typename RT::PrecisionT, FixedPointTraits<Fixed_16_8>>::value, "Rasterizer expects 16.8 fixed point precision");
|
||||
__m128i vXi = fpToFixedPoint(vX);
|
||||
__m128i vYi = fpToFixedPoint(vY);
|
||||
|
||||
@@ -457,7 +681,8 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
// determinant
|
||||
float det = calcDeterminantInt(vAi, vBi);
|
||||
|
||||
/// @todo: This test is flipped...we have a stray '-' sign somewhere
|
||||
/// Verts in Pixel Coordinate Space at this point
|
||||
/// Det > 0 = CW winding order
|
||||
// Convert CW triangles to CCW
|
||||
if (det > 0.0)
|
||||
{
|
||||
@@ -468,6 +693,8 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
det = -det;
|
||||
}
|
||||
|
||||
/// @todo: handle degenerates for ConservativeRast
|
||||
|
||||
__m128 vC;
|
||||
// Finish triangle setup - C edge coef
|
||||
triangleSetupC(vX, vY, vA, vB, vC);
|
||||
@@ -533,43 +760,16 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
// add depth bias
|
||||
triDesc.Z[2] += ComputeDepthBias(&rastState, &triDesc, workDesc.pTriBuffer + 8);
|
||||
|
||||
// Compute edge data
|
||||
OSALIGNSIMD(int32_t) aAi[4], aBi[4];
|
||||
_mm_store_si128((__m128i*)aAi, vAi);
|
||||
_mm_store_si128((__m128i*)aBi, vBi);
|
||||
|
||||
const uint32_t numEdges = 3 + (RasterizeScissorEdges ? 4 : 0);
|
||||
EDGE rastEdges[7];
|
||||
|
||||
// compute triangle edges
|
||||
ComputeEdgeData(aAi[0], aBi[0], rastEdges[0]);
|
||||
ComputeEdgeData(aAi[1], aBi[1], rastEdges[1]);
|
||||
ComputeEdgeData(aAi[2], aBi[2], rastEdges[2]);
|
||||
|
||||
// compute scissor edges if enabled
|
||||
if (RasterizeScissorEdges)
|
||||
{
|
||||
POS topLeft{state.scissorInFixedPoint.left, state.scissorInFixedPoint.top};
|
||||
POS bottomLeft{state.scissorInFixedPoint.left, state.scissorInFixedPoint.bottom};
|
||||
POS topRight{state.scissorInFixedPoint.right, state.scissorInFixedPoint.top};
|
||||
POS bottomRight{state.scissorInFixedPoint.right, state.scissorInFixedPoint.bottom};
|
||||
|
||||
// construct 4 scissor edges in ccw direction
|
||||
ComputeEdgeData(topLeft, bottomLeft, rastEdges[3]);
|
||||
ComputeEdgeData(bottomLeft, bottomRight, rastEdges[4]);
|
||||
ComputeEdgeData(bottomRight, topRight, rastEdges[5]);
|
||||
ComputeEdgeData(topRight, topLeft, rastEdges[6]);
|
||||
}
|
||||
|
||||
// Calc bounding box of triangle
|
||||
OSALIGNSIMD(BBOX) bbox;
|
||||
calcBoundingBoxInt(vXi, vYi, bbox);
|
||||
|
||||
// Intersect with scissor/viewport
|
||||
bbox.left = std::max(bbox.left, state.scissorInFixedPoint.left);
|
||||
bbox.right = std::min(bbox.right - 1, state.scissorInFixedPoint.right);
|
||||
bbox.top = std::max(bbox.top, state.scissorInFixedPoint.top);
|
||||
bbox.bottom = std::min(bbox.bottom - 1, state.scissorInFixedPoint.bottom);
|
||||
OSALIGNSIMD(BBOX) intersect;
|
||||
intersect.left = std::max(bbox.left, state.scissorInFixedPoint.left);
|
||||
intersect.right = std::min(bbox.right - 1, state.scissorInFixedPoint.right);
|
||||
intersect.top = std::max(bbox.top, state.scissorInFixedPoint.top);
|
||||
intersect.bottom = std::min(bbox.bottom - 1, state.scissorInFixedPoint.bottom);
|
||||
|
||||
triDesc.triFlags = workDesc.triFlags;
|
||||
|
||||
@@ -581,11 +781,10 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
|
||||
int32_t macroBoxBottom = macroBoxTop + KNOB_MACROTILE_Y_DIM_FIXED - 1;
|
||||
|
||||
OSALIGNSIMD(BBOX) intersect;
|
||||
intersect.left = std::max(bbox.left, macroBoxLeft);
|
||||
intersect.top = std::max(bbox.top, macroBoxTop);
|
||||
intersect.right = std::min(bbox.right, macroBoxRight);
|
||||
intersect.bottom = std::min(bbox.bottom, macroBoxBottom);
|
||||
intersect.left = std::max(intersect.left, macroBoxLeft);
|
||||
intersect.top = std::max(intersect.top, macroBoxTop);
|
||||
intersect.right = std::min(intersect.right, macroBoxRight);
|
||||
intersect.bottom = std::min(intersect.bottom, macroBoxBottom);
|
||||
|
||||
SWR_ASSERT(intersect.left <= intersect.right && intersect.top <= intersect.bottom && intersect.left >= 0 && intersect.right >= 0 && intersect.top >= 0 && intersect.bottom >= 0);
|
||||
|
||||
@@ -613,7 +812,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
int32_t x = AlignDown(intersect.left, (FIXED_POINT_SCALE * KNOB_TILE_X_DIM));
|
||||
int32_t y = AlignDown(intersect.top, (FIXED_POINT_SCALE * KNOB_TILE_Y_DIM));
|
||||
|
||||
if(sampleCount == SWR_MULTISAMPLE_1X)
|
||||
if(RT::MT::sampleCount == SWR_MULTISAMPLE_1X)
|
||||
{
|
||||
// Add 0.5, in fixed point, to offset to pixel center
|
||||
x += (FIXED_POINT_SCALE / 2);
|
||||
@@ -624,9 +823,6 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
__m128i vTopLeftY = _mm_set1_epi32(y);
|
||||
|
||||
// evaluate edge equations at top-left pixel using 64bit math
|
||||
// all other evaluations will be 32bit steps from it
|
||||
// small triangles could skip this and do all 32bit math
|
||||
// edge 0
|
||||
//
|
||||
// line = Ax + By + C
|
||||
// solving for C:
|
||||
@@ -634,18 +830,15 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
// we know x0 and y0 are on the line; plug them in:
|
||||
// C = -Ax0 - By0
|
||||
// plug C back into line equation:
|
||||
// line = Ax - Bx - Ax0 - Bx1
|
||||
// line = Ax - By - Ax0 - By0
|
||||
// line = A(x - x0) + B(y - y0)
|
||||
// line = A(x0+dX) + B(y0+dY) + C = Ax0 + AdX + By0 + BdY + c = AdX + BdY
|
||||
// dX = (x-x0), dY = (y-y0)
|
||||
// so all this simplifies to
|
||||
// edge = A(dX) + B(dY), our first test at the top left of the bbox we're rasterizing within
|
||||
|
||||
// edge 0 and 1
|
||||
// edge0 = A0(x - x0) + B0(y - y0)
|
||||
// edge1 = A1(x - x1) + B1(y - y1)
|
||||
__m128i vDeltaX = _mm_sub_epi32(vTopLeftX, vXi);
|
||||
__m128i vDeltaY = _mm_sub_epi32(vTopLeftY, vYi);
|
||||
|
||||
__m256d vEdgeFix16[7];
|
||||
|
||||
// evaluate A(dx) and B(dY) for all points
|
||||
__m256d vAipd = _mm256_cvtepi32_pd(vAi);
|
||||
__m256d vBipd = _mm256_cvtepi32_pd(vBi);
|
||||
@@ -656,28 +849,33 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
__m256d vBiDeltaYFix16 = _mm256_mul_pd(vBipd, vDeltaYpd);
|
||||
__m256d vEdge = _mm256_add_pd(vAiDeltaXFix16, vBiDeltaYFix16);
|
||||
|
||||
// adjust for top-left rule
|
||||
vEdge = adjustTopLeftRuleIntFix16(vAi, vBi, vEdge);
|
||||
// apply and edge adjustments(top-left, crast, etc)
|
||||
adjustEdgesFix16<RT>(vAi, vBi, vEdge);
|
||||
|
||||
// broadcast respective edge results to all lanes
|
||||
double* pEdge = (double*)&vEdge;
|
||||
__m256d vEdgeFix16[7];
|
||||
vEdgeFix16[0] = _mm256_set1_pd(pEdge[0]);
|
||||
vEdgeFix16[1] = _mm256_set1_pd(pEdge[1]);
|
||||
vEdgeFix16[2] = _mm256_set1_pd(pEdge[2]);
|
||||
|
||||
// evaluate edge equations for scissor edges
|
||||
if (RasterizeScissorEdges)
|
||||
{
|
||||
const BBOX &scissor = state.scissorInFixedPoint;
|
||||
vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.left)) + (rastEdges[3].b * (y - scissor.top)));
|
||||
vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.left)) + (rastEdges[4].b * (y - scissor.bottom)));
|
||||
vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.right)) + (rastEdges[5].b * (y - scissor.bottom)));
|
||||
vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.right)) + (rastEdges[6].b * (y - scissor.top)));
|
||||
}
|
||||
OSALIGNSIMD(int32_t) aAi[4], aBi[4];
|
||||
_mm_store_si128((__m128i*)aAi, vAi);
|
||||
_mm_store_si128((__m128i*)aBi, vBi);
|
||||
EDGE rastEdges[RT::NumEdgesT::value];
|
||||
|
||||
// Compute and store triangle edge data
|
||||
ComputeEdgeData(aAi[0], aBi[0], rastEdges[0]);
|
||||
ComputeEdgeData(aAi[1], aBi[1], rastEdges[1]);
|
||||
ComputeEdgeData(aAi[2], aBi[2], rastEdges[2]);
|
||||
|
||||
// Compute and store triangle edge data if scissor needs to rasterized
|
||||
ComputeScissorEdges<typename RT::RasterizeScissorEdgesT, typename RT::IsConservativeT, RT>
|
||||
(bbox, state.scissorInFixedPoint, x, y, rastEdges, vEdgeFix16);
|
||||
|
||||
// Evaluate edge equations at sample positions of each of the 4 corners of a raster tile
|
||||
// used to for testing if entire raster tile is inside a triangle
|
||||
for (uint32_t e = 0; e < numEdges; ++e)
|
||||
for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
|
||||
{
|
||||
vEdgeFix16[e] = _mm256_add_pd(vEdgeFix16[e], rastEdges[e].vRasterTileOffsets);
|
||||
}
|
||||
@@ -689,10 +887,10 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
// | |
|
||||
// min(xSamples),max(ySamples) ------ max(xSamples),max(ySamples)
|
||||
__m256d vEdgeTileBbox[3];
|
||||
if (sampleCount > SWR_MULTISAMPLE_1X)
|
||||
if (RT::MT::sampleCount > SWR_MULTISAMPLE_1X)
|
||||
{
|
||||
__m128i vTileSampleBBoxXh = MultisampleTraits<sampleCount>::TileSampleOffsetsX();
|
||||
__m128i vTileSampleBBoxYh = MultisampleTraits<sampleCount>::TileSampleOffsetsY();
|
||||
__m128i vTileSampleBBoxXh = RT::MT::TileSampleOffsetsX();
|
||||
__m128i vTileSampleBBoxYh = RT::MT::TileSampleOffsetsY();
|
||||
|
||||
__m256d vTileSampleBBoxXFix8 = _mm256_cvtepi32_pd(vTileSampleBBoxXh);
|
||||
__m256d vTileSampleBBoxYFix8 = _mm256_cvtepi32_pd(vTileSampleBBoxYh);
|
||||
@@ -714,25 +912,15 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
uint32_t maxY = maxTileY;
|
||||
uint32_t maxX = maxTileX;
|
||||
|
||||
// compute steps between raster tiles for render output buffers
|
||||
static const uint32_t colorRasterTileStep{(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8)) * MultisampleTraits<sampleCount>::numSamples};
|
||||
static const uint32_t colorRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * colorRasterTileStep};
|
||||
static const uint32_t depthRasterTileStep{(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8)) * MultisampleTraits<sampleCount>::numSamples};
|
||||
static const uint32_t depthRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM)* depthRasterTileStep};
|
||||
static const uint32_t stencilRasterTileStep{(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8)) * MultisampleTraits<sampleCount>::numSamples};
|
||||
static const uint32_t stencilRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * stencilRasterTileStep};
|
||||
RenderOutputBuffers renderBuffers, currentRenderBufferRow;
|
||||
|
||||
GetRenderHotTiles(pDC, macroTile, tileX, tileY, renderBuffers, MultisampleTraits<sampleCount>::numSamples,
|
||||
triDesc.triFlags.renderTargetArrayIndex);
|
||||
GetRenderHotTiles<RT::MT::numSamples>(pDC, macroTile, tileX, tileY, renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
|
||||
currentRenderBufferRow = renderBuffers;
|
||||
|
||||
// rasterize and generate coverage masks per sample
|
||||
uint32_t maxSamples = MultisampleTraits<sampleCount>::numSamples;
|
||||
for (uint32_t tileY = tY; tileY <= maxY; ++tileY)
|
||||
{
|
||||
__m256d vStartOfRowEdge[numEdges];
|
||||
for (uint32_t e = 0; e < numEdges; ++e)
|
||||
__m256d vStartOfRowEdge[RT::NumEdgesT::value];
|
||||
for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
|
||||
{
|
||||
vStartOfRowEdge[e] = vEdgeFix16[e];
|
||||
}
|
||||
@@ -743,25 +931,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
|
||||
// is the corner of the edge outside of the raster tile? (vEdge < 0)
|
||||
int mask0, mask1, mask2;
|
||||
if (sampleCount == SWR_MULTISAMPLE_1X)
|
||||
{
|
||||
mask0 = _mm256_movemask_pd(vEdgeFix16[0]);
|
||||
mask1 = _mm256_movemask_pd(vEdgeFix16[1]);
|
||||
mask2 = _mm256_movemask_pd(vEdgeFix16[2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
__m256d vSampleBboxTest0, vSampleBboxTest1, vSampleBboxTest2;
|
||||
// evaluate edge equations at the tile multisample bounding box
|
||||
vSampleBboxTest0 = _mm256_add_pd(vEdgeTileBbox[0], vEdgeFix16[0]);
|
||||
vSampleBboxTest1 = _mm256_add_pd(vEdgeTileBbox[1], vEdgeFix16[1]);
|
||||
vSampleBboxTest2 = _mm256_add_pd(vEdgeTileBbox[2], vEdgeFix16[2]);
|
||||
mask0 = _mm256_movemask_pd(vSampleBboxTest0);
|
||||
mask1 = _mm256_movemask_pd(vSampleBboxTest1);
|
||||
mask2 = _mm256_movemask_pd(vSampleBboxTest2);
|
||||
}
|
||||
UpdateEdgeMasks<RT::MT::sampleCount>(vEdgeTileBbox, vEdgeFix16, mask0, mask1, mask2);
|
||||
|
||||
for (uint32_t sampleNum = 0; sampleNum < maxSamples; sampleNum++)
|
||||
for (uint32_t sampleNum = 0; sampleNum < RT::MT::numSamples; sampleNum++)
|
||||
{
|
||||
// trivial reject, at least one edge has all 4 corners of raster tile outside
|
||||
bool trivialReject = (!(mask0 && mask1 && mask2)) ? true : false;
|
||||
@@ -779,27 +951,24 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
}
|
||||
else
|
||||
{
|
||||
__m256d vEdgeAtSample[numEdges];
|
||||
if(sampleCount == SWR_MULTISAMPLE_1X)
|
||||
__m256d vEdgeAtSample[RT::NumEdgesT::value];
|
||||
if(RT::MT::sampleCount == SWR_MULTISAMPLE_1X)
|
||||
{
|
||||
// should get optimized out for single sample case (global value numbering or copy propagation)
|
||||
for (uint32_t e = 0; e < numEdges; ++e)
|
||||
for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
|
||||
{
|
||||
vEdgeAtSample[e] = vEdgeFix16[e];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
__m128i vSampleOffsetXh = MultisampleTraits<sampleCount>::vXi(sampleNum);
|
||||
__m128i vSampleOffsetYh = MultisampleTraits<sampleCount>::vYi(sampleNum);
|
||||
__m128i vSampleOffsetXh = RT::MT::vXi(sampleNum);
|
||||
__m128i vSampleOffsetYh = RT::MT::vYi(sampleNum);
|
||||
__m256d vSampleOffsetX = _mm256_cvtepi32_pd(vSampleOffsetXh);
|
||||
__m256d vSampleOffsetY = _mm256_cvtepi32_pd(vSampleOffsetYh);
|
||||
|
||||
// *note*: none of this needs to be vectorized as rasterizePartialTile just takes vEdge[0]
|
||||
// for each edge and broadcasts it before offsetting to individual pixel quads
|
||||
|
||||
// step edge equation tests from UL tile corner to pixel sample position
|
||||
for (uint32_t e = 0; e < numEdges; ++e)
|
||||
for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
|
||||
{
|
||||
__m256d vResultAxFix16 = _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].a), vSampleOffsetX);
|
||||
__m256d vResultByFix16 = _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].b), vSampleOffsetY);
|
||||
@@ -808,23 +977,16 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
}
|
||||
}
|
||||
|
||||
double startQuadEdges[numEdges];
|
||||
double startQuadEdges[RT::NumEdgesT::value];
|
||||
const __m256i vLane0Mask = _mm256_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
|
||||
for (uint32_t e = 0; e < numEdges; ++e)
|
||||
for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
|
||||
{
|
||||
_mm256_maskstore_pd(&startQuadEdges[e], vLane0Mask, vEdgeAtSample[e]);
|
||||
}
|
||||
|
||||
// not trivial accept or reject, must rasterize full tile
|
||||
RDTSC_START(BERasterizePartial);
|
||||
if (RasterizeScissorEdges)
|
||||
{
|
||||
triDesc.coverageMask[sampleNum] = rasterizePartialTile<7>(pDC, startQuadEdges, rastEdges);
|
||||
}
|
||||
else
|
||||
{
|
||||
triDesc.coverageMask[sampleNum] = rasterizePartialTile<3>(pDC, startQuadEdges, rastEdges);
|
||||
}
|
||||
triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value>(pDC, startQuadEdges, rastEdges);
|
||||
RDTSC_STOP(BERasterizePartial, 0, 0);
|
||||
|
||||
triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum];
|
||||
@@ -833,7 +995,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
else
|
||||
{
|
||||
// if we're calculating coverage per sample, need to store it off. otherwise no covered samples, don't need to do anything
|
||||
if(sampleCount > SWR_MULTISAMPLE_1X)
|
||||
if(RT::MT::sampleCount > SWR_MULTISAMPLE_1X)
|
||||
{
|
||||
triDesc.coverageMask[sampleNum] = 0;
|
||||
}
|
||||
@@ -856,19 +1018,19 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
|
||||
}
|
||||
|
||||
// step to the next tile in X
|
||||
for (uint32_t e = 0; e < numEdges; ++e)
|
||||
for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
|
||||
{
|
||||
vEdgeFix16[e] = _mm256_add_pd(vEdgeFix16[e], _mm256_set1_pd(rastEdges[e].stepRasterTileX));
|
||||
}
|
||||
StepRasterTileX(state.psState.numRenderTargets, renderBuffers, colorRasterTileStep, depthRasterTileStep, stencilRasterTileStep);
|
||||
StepRasterTileX<RT>(state.psState.numRenderTargets, renderBuffers);
|
||||
}
|
||||
|
||||
// step to the next tile in Y
|
||||
for (uint32_t e = 0; e < numEdges; ++e)
|
||||
for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
|
||||
{
|
||||
vEdgeFix16[e] = _mm256_add_pd(vStartOfRowEdge[e], _mm256_set1_pd(rastEdges[e].stepRasterTileY));
|
||||
}
|
||||
StepRasterTileY(state.psState.numRenderTargets, renderBuffers, currentRenderBufferRow, colorRasterTileRowStep, depthRasterTileRowStep, stencilRasterTileRowStep);
|
||||
StepRasterTileY<RT>(state.psState.numRenderTargets, renderBuffers, currentRenderBufferRow);
|
||||
}
|
||||
|
||||
RDTSC_STOP(BERasterizeTriangle, 1, 0);
|
||||
@@ -922,16 +1084,11 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile,
|
||||
|
||||
// setup triangle rasterizer function
|
||||
PFN_WORK_FUNC pfnTriRast;
|
||||
if (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN)
|
||||
{
|
||||
pfnTriRast = gRasterizerTable[rastState.scissorEnable][rastState.sampleCount];
|
||||
}
|
||||
else
|
||||
{
|
||||
// for center sample pattern, all samples are at pixel center; calculate coverage
|
||||
// once at center and broadcast the results in the backend
|
||||
pfnTriRast = gRasterizerTable[rastState.scissorEnable][SWR_MULTISAMPLE_1X];
|
||||
}
|
||||
// for center sample pattern, all samples are at pixel center; calculate coverage
|
||||
// once at center and broadcast the results in the backend
|
||||
uint32_t sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X;
|
||||
// conservative rast not supported for points/lines
|
||||
pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, (rastState.scissorEnable > 0));
|
||||
|
||||
// overwrite texcoords for point sprites
|
||||
if (isPointSpriteTexCoordEnabled)
|
||||
@@ -1064,7 +1221,7 @@ void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTi
|
||||
|
||||
RenderOutputBuffers renderBuffers;
|
||||
GetRenderHotTiles(pDC, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT,
|
||||
renderBuffers, 1, triDesc.triFlags.renderTargetArrayIndex);
|
||||
renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
|
||||
|
||||
RDTSC_START(BEPixelBackend);
|
||||
backendFuncs.pfnBackend(pDC, workerId, tileAlignedX, tileAlignedY, triDesc, renderBuffers);
|
||||
@@ -1072,8 +1229,8 @@ void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTi
|
||||
}
|
||||
|
||||
// Get pointers to hot tile memory for color RT, depth, stencil
|
||||
void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint32_t tileY, RenderOutputBuffers &renderBuffers,
|
||||
uint32_t numSamples, uint32_t renderTargetArrayIndex)
|
||||
template <uint32_t numSamples>
|
||||
void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint32_t tileY, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex)
|
||||
{
|
||||
const API_STATE& state = GetApiState(pDC);
|
||||
SWR_CONTEXT *pContext = pDC->pContext;
|
||||
@@ -1123,52 +1280,33 @@ void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t macroID, uint32_t tileX, uint
|
||||
}
|
||||
}
|
||||
|
||||
INLINE
|
||||
void StepRasterTileX(uint32_t NumRT, RenderOutputBuffers &buffers, uint32_t colorTileStep, uint32_t depthTileStep, uint32_t stencilTileStep)
|
||||
template <typename RT>
|
||||
INLINE void StepRasterTileX(uint32_t NumRT, RenderOutputBuffers &buffers)
|
||||
{
|
||||
for(uint32_t rt = 0; rt < NumRT; ++rt)
|
||||
{
|
||||
buffers.pColor[rt] += colorTileStep;
|
||||
buffers.pColor[rt] += RT::colorRasterTileStep;
|
||||
}
|
||||
|
||||
buffers.pDepth += depthTileStep;
|
||||
buffers.pStencil += stencilTileStep;
|
||||
buffers.pDepth += RT::depthRasterTileStep;
|
||||
buffers.pStencil += RT::stencilRasterTileStep;
|
||||
}
|
||||
|
||||
INLINE
|
||||
void StepRasterTileY(uint32_t NumRT, RenderOutputBuffers &buffers, RenderOutputBuffers &startBufferRow, uint32_t colorRowStep, uint32_t depthRowStep, uint32_t stencilRowStep)
|
||||
template <typename RT>
|
||||
INLINE void StepRasterTileY(uint32_t NumRT, RenderOutputBuffers &buffers, RenderOutputBuffers &startBufferRow)
|
||||
{
|
||||
for(uint32_t rt = 0; rt < NumRT; ++rt)
|
||||
{
|
||||
startBufferRow.pColor[rt] += colorRowStep;
|
||||
startBufferRow.pColor[rt] += RT::colorRasterTileRowStep;
|
||||
buffers.pColor[rt] = startBufferRow.pColor[rt];
|
||||
}
|
||||
startBufferRow.pDepth += depthRowStep;
|
||||
startBufferRow.pDepth += RT::depthRasterTileRowStep;
|
||||
buffers.pDepth = startBufferRow.pDepth;
|
||||
|
||||
startBufferRow.pStencil += stencilRowStep;
|
||||
startBufferRow.pStencil += RT::stencilRasterTileRowStep;
|
||||
buffers.pStencil = startBufferRow.pStencil;
|
||||
}
|
||||
|
||||
// initialize rasterizer function table
|
||||
PFN_WORK_FUNC gRasterizerTable[2][SWR_MULTISAMPLE_TYPE_MAX] =
|
||||
{
|
||||
{
|
||||
RasterizeTriangle<false, SWR_MULTISAMPLE_1X>,
|
||||
RasterizeTriangle<false, SWR_MULTISAMPLE_2X>,
|
||||
RasterizeTriangle<false, SWR_MULTISAMPLE_4X>,
|
||||
RasterizeTriangle<false, SWR_MULTISAMPLE_8X>,
|
||||
RasterizeTriangle<false, SWR_MULTISAMPLE_16X>
|
||||
},
|
||||
{
|
||||
RasterizeTriangle<true, SWR_MULTISAMPLE_1X>,
|
||||
RasterizeTriangle<true, SWR_MULTISAMPLE_2X>,
|
||||
RasterizeTriangle<true, SWR_MULTISAMPLE_4X>,
|
||||
RasterizeTriangle<true, SWR_MULTISAMPLE_8X>,
|
||||
RasterizeTriangle<true, SWR_MULTISAMPLE_16X>
|
||||
}
|
||||
};
|
||||
|
||||
void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
|
||||
{
|
||||
const TRIANGLE_WORK_DESC &workDesc = *((TRIANGLE_WORK_DESC*)pData);
|
||||
@@ -1274,6 +1412,12 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
|
||||
}
|
||||
}
|
||||
|
||||
// setup triangle rasterizer function
|
||||
PFN_WORK_FUNC pfnTriRast;
|
||||
uint32_t sampleCount = (rastState.samplePattern == SWR_MSAA_STANDARD_PATTERN) ? rastState.sampleCount : SWR_MULTISAMPLE_1X;
|
||||
// conservative rast not supported for points/lines
|
||||
pfnTriRast = GetRasterizerFunc(sampleCount, false, SWR_INPUT_COVERAGE_NONE, (rastState.scissorEnable > 0));
|
||||
|
||||
// make sure this macrotile intersects the triangle
|
||||
__m128i vXai = fpToFixedPoint(vXa);
|
||||
__m128i vYai = fpToFixedPoint(vYa);
|
||||
@@ -1289,7 +1433,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
|
||||
bboxA.bottom - 1 < macroBoxTop ||
|
||||
bboxA.bottom - 1 < state.scissorInFixedPoint.top)) {
|
||||
// rasterize triangle
|
||||
gRasterizerTable[rastState.scissorEnable][rastState.sampleCount](pDC, workerId, macroTile, (void*)&newWorkDesc);
|
||||
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
|
||||
}
|
||||
|
||||
// triangle 1
|
||||
@@ -1362,9 +1506,30 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
|
||||
bboxA.bottom - 1 < macroBoxTop ||
|
||||
bboxA.bottom - 1 < state.scissorInFixedPoint.top)) {
|
||||
// rasterize triangle
|
||||
gRasterizerTable[rastState.scissorEnable][rastState.sampleCount](pDC, workerId, macroTile, (void*)&newWorkDesc);
|
||||
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
|
||||
}
|
||||
|
||||
RDTSC_STOP(BERasterizeLine, 1, 0);
|
||||
}
|
||||
|
||||
struct RasterizerChooser
|
||||
{
|
||||
typedef PFN_WORK_FUNC FuncType;
|
||||
|
||||
template <typename... ArgsB>
|
||||
static FuncType GetFunc()
|
||||
{
|
||||
return RasterizeTriangle<RasterizerTraits<ArgsB...>>;
|
||||
}
|
||||
};
|
||||
|
||||
// Selector for correct templated RasterizeTriangle function
|
||||
PFN_WORK_FUNC GetRasterizerFunc(
|
||||
uint32_t numSamples,
|
||||
bool IsConservative,
|
||||
uint32_t InputCoverage,
|
||||
bool RasterizeScissorEdges
|
||||
)
|
||||
{
|
||||
return TemplateArgUnroller<RasterizerChooser>::GetFunc(numSamples, IsConservative, InputCoverage, RasterizeScissorEdges);
|
||||
}
|
||||
|
||||
@@ -29,8 +29,9 @@
|
||||
|
||||
#include "context.h"
|
||||
#include <type_traits>
|
||||
#include "conservativeRast.h"
|
||||
#include "multisample.h"
|
||||
|
||||
extern PFN_WORK_FUNC gRasterizerTable[2][SWR_MULTISAMPLE_TYPE_MAX];
|
||||
void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
|
||||
void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
|
||||
void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
|
||||
@@ -40,4 +41,70 @@ __m128i fpToFixedPoint(const __m128 vIn)
|
||||
{
|
||||
__m128 vFixed = _mm_mul_ps(vIn, _mm_set1_ps(FIXED_POINT_SCALE));
|
||||
return _mm_cvtps_epi32(vFixed);
|
||||
}
|
||||
}
|
||||
|
||||
// Selector for correct templated RasterizeTriangle function
|
||||
PFN_WORK_FUNC GetRasterizerFunc(
|
||||
uint32_t numSamples,
|
||||
bool IsConservative,
|
||||
uint32_t InputCoverage,
|
||||
bool RasterizeScissorEdges);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @struct RasterScissorEdgesT
|
||||
/// @brief Primary RasterScissorEdgesT templated struct that holds compile
|
||||
/// time information about the number of edges needed to be rasterized,
|
||||
/// If either the scissor rect or conservative rast is enabled,
|
||||
/// the scissor test is enabled and the rasterizer will test
|
||||
/// 3 triangle edges + 4 scissor edges for coverage.
|
||||
/// @tparam RasterScissorEdgesT: number of multisamples
|
||||
/// @tparam ConservativeT: is this a conservative rasterization
|
||||
template <typename RasterScissorEdgesT, typename ConservativeT>
|
||||
struct RasterEdgeTraits
|
||||
{
|
||||
typedef std::true_type RasterizeScissorEdgesT;
|
||||
typedef std::integral_constant<uint32_t, 7> NumEdgesT;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief specialization of RasterEdgeTraits. If neither scissor rect
|
||||
/// nor conservative rast is enabled, only test 3 triangle edges
|
||||
/// for coverage
|
||||
template <>
|
||||
struct RasterEdgeTraits<std::false_type, std::false_type>
|
||||
{
|
||||
typedef std::false_type RasterizeScissorEdgesT;
|
||||
typedef std::integral_constant<uint32_t, 3> NumEdgesT;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @struct RasterizerTraits
|
||||
/// @brief templated struct that holds compile time information used
|
||||
/// during rasterization. Inherits EdgeTraits and ConservativeRastBETraits.
|
||||
/// @tparam NumSamplesT: number of multisamples
|
||||
/// @tparam ConservativeT: is this a conservative rasterization
|
||||
/// @tparam InputCoverageT: what type of input coverage is the PS expecting?
|
||||
/// (only used with conservative rasterization)
|
||||
/// @tparam RasterScissorEdgesT: do we need to rasterize with a scissor?
|
||||
template <typename NumSamplesT, typename ConservativeT, typename InputCoverageT, typename RasterScissorEdgesT>
|
||||
struct RasterizerTraits final : public ConservativeRastBETraits<ConservativeT, InputCoverageT>,
|
||||
public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT>
|
||||
{
|
||||
typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value)> MT;
|
||||
|
||||
/// Fixed point precision the rasterizer is using
|
||||
typedef FixedPointTraits<Fixed_16_8> PrecisionT;
|
||||
/// Fixed point precision of the edge tests used during rasterization
|
||||
typedef FixedPointTraits<Fixed_X_16> EdgePrecisionT;
|
||||
|
||||
static_assert(EdgePrecisionT::BitsT::value >= ConservativeRastBETraits<ConservativeT, InputCoverageT>::ConservativePrecisionT::BitsT::value,
|
||||
"Rasterizer edge fixed point precision < required conservative rast precision");
|
||||
|
||||
/// constants used to offset between different types of raster tiles
|
||||
static const int colorRasterTileStep{(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8)) * MT::numSamples};
|
||||
static const int depthRasterTileStep{(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8)) * MT::numSamples};
|
||||
static const int stencilRasterTileStep{(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8)) * MT::numSamples};
|
||||
static const int colorRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * colorRasterTileStep};
|
||||
static const int depthRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM)* depthRasterTileStep};
|
||||
static const int stencilRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * stencilRasterTileStep};
|
||||
};
|
||||
|
||||
@@ -1022,7 +1022,7 @@ struct SWR_PS_STATE
|
||||
|
||||
// dword 2
|
||||
uint32_t killsPixel : 1; // pixel shader can kill pixels
|
||||
uint32_t inputCoverage : 1; // ps uses input coverage
|
||||
uint32_t inputCoverage : 2; // ps uses input coverage
|
||||
uint32_t writesODepth : 1; // pixel shader writes to depth
|
||||
uint32_t usesSourceDepth : 1; // pixel shader reads depth
|
||||
uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
|
||||
|
||||
Reference in New Issue
Block a user