llvmpipe: combine linear mask calculation

This commit is contained in:
Keith Whitwell
2010-08-22 22:56:54 +01:00
parent d8c92a1eea
commit 0aa3a09ced
2 changed files with 81 additions and 12 deletions
+67 -4
View File
@@ -128,11 +128,71 @@ build_mask_linear(int c, int dcdx, int dcdy)
return mask;
}
static INLINE void
build_masks(int c,
int cdiff,
int dcdx,
int dcdy,
unsigned *outmask,
unsigned *partmask)
{
*outmask |= build_mask_linear(c, dcdx, dcdy);
*partmask |= build_mask_linear(c + cdiff, dcdx, dcdy);
}
#else
#include <emmintrin.h>
#include "util/u_sse.h"
static INLINE void
build_masks(int c,
int cdiff,
int dcdx,
int dcdy,
unsigned *outmask,
unsigned *partmask)
{
__m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
__m128i xdcdy = _mm_set1_epi32(dcdy);
/* Get values across the quad
*/
__m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
__m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
__m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
{
__m128i cstep01, cstep23, result;
cstep01 = _mm_packs_epi32(cstep0, cstep1);
cstep23 = _mm_packs_epi32(cstep2, cstep3);
result = _mm_packs_epi16(cstep01, cstep23);
*outmask |= _mm_movemask_epi8(result);
}
{
__m128i cio4 = _mm_set1_epi32(cdiff);
__m128i cstep01, cstep23, result;
cstep0 = _mm_add_epi32(cstep0, cio4);
cstep1 = _mm_add_epi32(cstep1, cio4);
cstep2 = _mm_add_epi32(cstep2, cio4);
cstep3 = _mm_add_epi32(cstep3, cio4);
cstep01 = _mm_packs_epi32(cstep0, cstep1);
cstep23 = _mm_packs_epi32(cstep2, cstep3);
result = _mm_packs_epi16(cstep01, cstep23);
*partmask |= _mm_movemask_epi8(result);
}
}
static INLINE unsigned
build_mask_linear(int c, int dcdx, int dcdy)
{
@@ -263,11 +323,14 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
{
const int dcdx = -plane[j].dcdx * 4;
const int dcdy = plane[j].dcdy * 4;
const int cox = c[j] + plane[j].eo * 4;
const int cio = c[j] + plane[j].ei * 4 - 1;
const int cox = plane[j].eo * 4;
const int cio = plane[j].ei * 4 - 1;
outmask |= build_mask_linear(cox, dcdx, dcdy);
partmask |= build_mask_linear(cio, dcdx, dcdy);
build_masks(c[j] + cox,
cio - cox,
dcdx, dcdy,
&outmask, /* sign bits from c[i][0..15] + cox */
&partmask); /* sign bits from c[i][0..15] + cio */
}
}
+14 -8
View File
@@ -81,11 +81,14 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
for (j = 0; j < NR_PLANES; j++) {
const int dcdx = -plane[j].dcdx * 4;
const int dcdy = plane[j].dcdy * 4;
const int cox = c[j] + plane[j].eo * 4;
const int cio = c[j] + plane[j].ei * 4 - 1;
const int cox = plane[j].eo * 4;
const int cio = plane[j].ei * 4 - 1;
outmask |= build_mask_linear(cox, dcdx, dcdy);
partmask |= build_mask_linear(cio, dcdx, dcdy);
build_masks(c[j] + cox,
cio - cox,
dcdx, dcdy,
&outmask, /* sign bits from c[i][0..15] + cox */
&partmask); /* sign bits from c[i][0..15] + cio */
}
if (outmask == 0xffff)
@@ -171,11 +174,14 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
{
const int dcdx = -plane[j].dcdx * 16;
const int dcdy = plane[j].dcdy * 16;
const int cox = c[j] + plane[j].eo * 16;
const int cio = c[j] + plane[j].ei * 16 - 1;
const int cox = plane[j].eo * 16;
const int cio = plane[j].ei * 16 - 1;
outmask |= build_mask_linear(cox, dcdx, dcdy);
partmask |= build_mask_linear(cio, dcdx, dcdy);
build_masks(c[j] + cox,
cio - cox,
dcdx, dcdy,
&outmask, /* sign bits from c[i][0..15] + cox */
&partmask); /* sign bits from c[i][0..15] + cio */
}
j++;