llvmpipe: Unswizzled rendering.
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
This commit is contained in:
committed by
José Fonseca
parent
1d3789bccb
commit
fa1b481c09
@@ -414,6 +414,81 @@ lp_build_unsigned_norm_to_float(struct gallivm_state *gallivm,
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Pick a suitable num_dsts for lp_build_conv to ensure optimal cases are used.
|
||||
*
|
||||
* Returns the number of dsts created from src
|
||||
*/
|
||||
int lp_build_conv_auto(struct gallivm_state *gallivm,
|
||||
struct lp_type src_type,
|
||||
struct lp_type* dst_type,
|
||||
const LLVMValueRef *src,
|
||||
unsigned num_srcs,
|
||||
LLVMValueRef *dst)
|
||||
{
|
||||
int i;
|
||||
int num_dsts = num_srcs;
|
||||
|
||||
if (src_type.floating == dst_type->floating &&
|
||||
src_type.width == dst_type->width &&
|
||||
src_type.length == dst_type->length &&
|
||||
src_type.fixed == dst_type->fixed &&
|
||||
src_type.norm == dst_type->norm &&
|
||||
src_type.sign == dst_type->sign)
|
||||
return num_dsts;
|
||||
|
||||
/* Special case 4x4f -> 1x16ub or 2x8f -> 1x16ub
|
||||
*/
|
||||
if (src_type.floating == 1 &&
|
||||
src_type.fixed == 0 &&
|
||||
src_type.sign == 1 &&
|
||||
src_type.norm == 0 &&
|
||||
src_type.width == 32 &&
|
||||
|
||||
dst_type->floating == 0 &&
|
||||
dst_type->fixed == 0 &&
|
||||
dst_type->sign == 0 &&
|
||||
dst_type->norm == 1 &&
|
||||
dst_type->width == 8)
|
||||
{
|
||||
/* Special case 4x4f --> 1x16ub */
|
||||
if (src_type.length == 4 && util_cpu_caps.has_sse2)
|
||||
{
|
||||
assert((num_srcs % 4) == 0);
|
||||
|
||||
num_dsts = num_srcs / 4;
|
||||
dst_type->length = 16;
|
||||
|
||||
lp_build_conv(gallivm, src_type, *dst_type, src, num_srcs, dst, num_dsts);
|
||||
return num_dsts;
|
||||
}
|
||||
|
||||
/* Special case 2x8f --> 1x16ub */
|
||||
if (src_type.length == 8 && util_cpu_caps.has_avx)
|
||||
{
|
||||
assert((num_srcs % 2) == 0);
|
||||
|
||||
num_dsts = num_srcs / 2;
|
||||
dst_type->length = 16;
|
||||
|
||||
lp_build_conv(gallivm, src_type, *dst_type, src, num_srcs, dst, num_dsts);
|
||||
return num_dsts;
|
||||
}
|
||||
}
|
||||
|
||||
/* lp_build_resize does not support M:N */
|
||||
if (src_type.width == dst_type->width) {
|
||||
lp_build_conv(gallivm, src_type, *dst_type, src, num_srcs, dst, num_dsts);
|
||||
} else {
|
||||
for (i = 0; i < num_srcs; ++i) {
|
||||
lp_build_conv(gallivm, src_type, *dst_type, &src[i], 1, &dst[i], 1);
|
||||
}
|
||||
}
|
||||
|
||||
return num_dsts;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Generic type conversion.
|
||||
*
|
||||
|
||||
@@ -70,6 +70,16 @@ lp_build_conv(struct gallivm_state *gallivm,
|
||||
const LLVMValueRef *srcs, unsigned num_srcs,
|
||||
LLVMValueRef *dsts, unsigned num_dsts);
|
||||
|
||||
|
||||
int
|
||||
lp_build_conv_auto(struct gallivm_state *gallivm,
|
||||
struct lp_type src_type,
|
||||
struct lp_type* dst_type,
|
||||
const LLVMValueRef *src,
|
||||
unsigned num_srcs,
|
||||
LLVMValueRef *dst);
|
||||
|
||||
|
||||
void
|
||||
lp_build_conv_mask(struct gallivm_state *gallivm,
|
||||
struct lp_type src_type,
|
||||
|
||||
@@ -560,7 +560,8 @@ LLVMValueRef
|
||||
lp_build_select_aos(struct lp_build_context *bld,
|
||||
unsigned mask,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b)
|
||||
LLVMValueRef b,
|
||||
unsigned num_channels)
|
||||
{
|
||||
LLVMBuilderRef builder = bld->gallivm->builder;
|
||||
const struct lp_type type = bld->type;
|
||||
@@ -594,8 +595,8 @@ lp_build_select_aos(struct lp_build_context *bld,
|
||||
LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
|
||||
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
|
||||
|
||||
for(j = 0; j < n; j += 4)
|
||||
for(i = 0; i < 4; ++i)
|
||||
for(j = 0; j < n; j += num_channels)
|
||||
for(i = 0; i < num_channels; ++i)
|
||||
shuffles[j + i] = LLVMConstInt(elem_type,
|
||||
(mask & (1 << i) ? 0 : n) + j + i,
|
||||
0);
|
||||
@@ -603,7 +604,7 @@ lp_build_select_aos(struct lp_build_context *bld,
|
||||
return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), "");
|
||||
}
|
||||
else {
|
||||
LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, 4);
|
||||
LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels);
|
||||
return lp_build_select(bld, mask_vec, a, b);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,7 +79,8 @@ LLVMValueRef
|
||||
lp_build_select_aos(struct lp_build_context *bld,
|
||||
unsigned mask,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b);
|
||||
LLVMValueRef b,
|
||||
unsigned num_channels);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
|
||||
@@ -211,6 +211,42 @@ lp_build_concat(struct gallivm_state *gallivm,
|
||||
return tmp[0];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Combines vectors to reduce from num_srcs to num_dsts.
|
||||
* Returns the number of src vectors concatenated in a single dst.
|
||||
*
|
||||
* num_srcs must be exactly divisible by num_dsts.
|
||||
*
|
||||
* e.g. For num_srcs = 4 and src = [x, y, z, w]
|
||||
* num_dsts = 1 dst = [xyzw] return = 4
|
||||
* num_dsts = 2 dst = [xy, zw] return = 2
|
||||
*/
|
||||
int
|
||||
lp_build_concat_n(struct gallivm_state *gallivm,
|
||||
struct lp_type src_type,
|
||||
LLVMValueRef *src,
|
||||
unsigned num_srcs,
|
||||
LLVMValueRef *dst,
|
||||
unsigned num_dsts)
|
||||
{
|
||||
int size = num_srcs / num_dsts;
|
||||
int i;
|
||||
|
||||
assert(num_srcs >= num_dsts);
|
||||
assert((num_srcs % size) == 0);
|
||||
|
||||
if (num_srcs == num_dsts)
|
||||
return 1;
|
||||
|
||||
for (i = 0; i < num_dsts; ++i) {
|
||||
dst[i] = lp_build_concat(gallivm, &src[i * size], src_type, size);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Interleave vector elements.
|
||||
*
|
||||
|
||||
@@ -87,6 +87,15 @@ lp_build_concat(struct gallivm_state *gallivm,
|
||||
struct lp_type src_type,
|
||||
unsigned num_vectors);
|
||||
|
||||
int
|
||||
lp_build_concat_n(struct gallivm_state *gallivm,
|
||||
struct lp_type src_type,
|
||||
LLVMValueRef *src,
|
||||
unsigned num_srcs,
|
||||
LLVMValueRef *dst,
|
||||
unsigned num_dsts);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_packs2(struct gallivm_state *gallivm,
|
||||
struct lp_type src_type,
|
||||
|
||||
@@ -31,6 +31,7 @@
|
||||
#include "lp_bld_const.h"
|
||||
#include "lp_bld_swizzle.h"
|
||||
#include "lp_bld_quad.h"
|
||||
#include "lp_bld_pack.h"
|
||||
|
||||
|
||||
static const unsigned char
|
||||
@@ -156,3 +157,52 @@ lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld,
|
||||
return LLVMBuildSub(builder, vec2, vec1, "ddxddyddxddy");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Twiddle from quad format to row format
|
||||
*
|
||||
* src0 src1
|
||||
* ######### ######### #################
|
||||
* # 0 | 1 # # 4 | 5 # # 0 | 1 | 4 | 5 # src0
|
||||
* #---+---# #---+---# -> #################
|
||||
* # 2 | 3 # # 6 | 7 # # 2 | 3 | 6 | 7 # src1
|
||||
* ######### ######### #################
|
||||
*
|
||||
*/
|
||||
void
|
||||
lp_bld_quad_twiddle(struct gallivm_state *gallivm,
|
||||
struct lp_type lp_dst_type,
|
||||
const LLVMValueRef* src,
|
||||
unsigned src_count,
|
||||
LLVMValueRef* dst)
|
||||
{
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
LLVMTypeRef dst_type_ref;
|
||||
LLVMTypeRef type2_ref;
|
||||
struct lp_type type2;
|
||||
unsigned i;
|
||||
|
||||
assert((src_count % 2) == 0);
|
||||
|
||||
/* Create a type with only 2 elements */
|
||||
type2 = lp_dst_type;
|
||||
type2.width = (lp_dst_type.width * lp_dst_type.length) / 2;
|
||||
type2.length = 2;
|
||||
type2.floating = 0;
|
||||
|
||||
type2_ref = lp_build_vec_type(gallivm, type2);
|
||||
dst_type_ref = lp_build_vec_type(gallivm, lp_dst_type);
|
||||
|
||||
for (i = 0; i < src_count; i += 2) {
|
||||
LLVMValueRef src0, src1;
|
||||
|
||||
src0 = LLVMBuildBitCast(builder, src[i + 0], type2_ref, "");
|
||||
src1 = LLVMBuildBitCast(builder, src[i + 1], type2_ref, "");
|
||||
|
||||
dst[i + 0] = lp_build_interleave2(gallivm, type2, src0, src1, 0);
|
||||
dst[i + 1] = lp_build_interleave2(gallivm, type2, src0, src1, 1);
|
||||
|
||||
dst[i + 0] = LLVMBuildBitCast(builder, dst[i + 0], dst_type_ref, "");
|
||||
dst[i + 1] = LLVMBuildBitCast(builder, dst[i + 1], dst_type_ref, "");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,5 +88,14 @@ LLVMValueRef
|
||||
lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
|
||||
LLVMValueRef a);
|
||||
|
||||
/*
|
||||
* Twiddle from quad format to row format
|
||||
*/
|
||||
void
|
||||
lp_bld_quad_twiddle(struct gallivm_state *gallivm,
|
||||
struct lp_type lp_dst_type,
|
||||
const LLVMValueRef* src,
|
||||
unsigned src_count,
|
||||
LLVMValueRef* dst);
|
||||
|
||||
#endif /* LP_BLD_QUAD_H_ */
|
||||
|
||||
@@ -772,7 +772,7 @@ lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
|
||||
offset1 = LLVMBuildLoad(builder, offset1, "");
|
||||
offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexo, "");
|
||||
}
|
||||
offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0);
|
||||
offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0, 4);
|
||||
}
|
||||
else {
|
||||
unsigned i;
|
||||
@@ -849,7 +849,7 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
|
||||
stride1 = LLVMBuildLoad(builder, stride1, "");
|
||||
stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, "");
|
||||
}
|
||||
stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0);
|
||||
stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0, 4);
|
||||
}
|
||||
else {
|
||||
LLVMValueRef stride1;
|
||||
@@ -1045,11 +1045,11 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
|
||||
*out_width = size;
|
||||
}
|
||||
else if (bld->num_lods == num_quads) {
|
||||
*out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0);
|
||||
*out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
|
||||
if (dims >= 2) {
|
||||
*out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1);
|
||||
*out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
|
||||
if (dims == 3) {
|
||||
*out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2);
|
||||
*out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1246,9 +1246,9 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
|
||||
signrxyz = LLVMBuildBitCast(builder, rxyz, lp_build_vec_type(gallivm, intctype), "");
|
||||
signrxyz = LLVMBuildAnd(builder, signrxyz, signmask, "");
|
||||
|
||||
arxs = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 0);
|
||||
arys = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 1);
|
||||
arzs = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 2);
|
||||
arxs = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 0, 4);
|
||||
arys = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 1, 4);
|
||||
arzs = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 2, 4);
|
||||
|
||||
/*
|
||||
* select x if x >= y else select y
|
||||
@@ -1267,15 +1267,15 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
|
||||
* snewz = signrz * rx;
|
||||
* tnewz = -ry;
|
||||
*/
|
||||
signrxs = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 0);
|
||||
signrxs = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 0, 4);
|
||||
snewx = LLVMBuildXor(builder, signrxs, rzneg, "");
|
||||
tnewx = ryneg;
|
||||
|
||||
signrys = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 1);
|
||||
signrys = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 1, 4);
|
||||
snewy = rx;
|
||||
tnewy = LLVMBuildXor(builder, signrys, rz, "");
|
||||
|
||||
signrzs = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 2);
|
||||
signrzs = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 2, 4);
|
||||
snewz = LLVMBuildXor(builder, signrzs, rx, "");
|
||||
tnewz = ryneg;
|
||||
|
||||
|
||||
@@ -159,21 +159,24 @@ lp_build_extract_broadcast(struct gallivm_state *gallivm,
|
||||
|
||||
|
||||
/**
|
||||
* Swizzle one channel into all other three channels.
|
||||
* Swizzle one channel into other channels.
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
unsigned channel)
|
||||
unsigned channel,
|
||||
unsigned num_channels)
|
||||
{
|
||||
LLVMBuilderRef builder = bld->gallivm->builder;
|
||||
const struct lp_type type = bld->type;
|
||||
const unsigned n = type.length;
|
||||
unsigned i, j;
|
||||
|
||||
if(a == bld->undef || a == bld->zero || a == bld->one)
|
||||
if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1)
|
||||
return a;
|
||||
|
||||
assert(num_channels == 2 || num_channels == 4);
|
||||
|
||||
/* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
|
||||
* using shuffles here actually causes worst results. More investigation is
|
||||
* needed. */
|
||||
@@ -184,12 +187,55 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
|
||||
LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
|
||||
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
|
||||
|
||||
for(j = 0; j < n; j += 4)
|
||||
for(i = 0; i < 4; ++i)
|
||||
for(j = 0; j < n; j += num_channels)
|
||||
for(i = 0; i < num_channels; ++i)
|
||||
shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
|
||||
|
||||
return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
|
||||
}
|
||||
else if (num_channels == 2) {
|
||||
/*
|
||||
* Bit mask and shifts
|
||||
*
|
||||
* XY XY .... XY <= input
|
||||
* 0Y 0Y .... 0Y
|
||||
* YY YY .... YY
|
||||
* YY YY .... YY <= output
|
||||
*/
|
||||
struct lp_type type2;
|
||||
LLVMValueRef tmp = NULL;
|
||||
int shift;
|
||||
|
||||
a = LLVMBuildAnd(builder, a,
|
||||
lp_build_const_mask_aos(bld->gallivm,
|
||||
type, 1 << channel, num_channels), "");
|
||||
|
||||
type2 = type;
|
||||
type2.floating = FALSE;
|
||||
type2.width *= 2;
|
||||
type2.length /= 2;
|
||||
|
||||
a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), "");
|
||||
|
||||
#ifdef PIPE_ARCH_LITTLE_ENDIAN
|
||||
shift = channel == 0 ? 1 : -1;
|
||||
#else
|
||||
shift = channel == 0 ? -1 : 1;
|
||||
#endif
|
||||
|
||||
if (shift > 0) {
|
||||
tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), "");
|
||||
} else if (shift < 0) {
|
||||
tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), "");
|
||||
}
|
||||
|
||||
assert(tmp);
|
||||
if (tmp) {
|
||||
a = LLVMBuildOr(builder, a, tmp, "");
|
||||
}
|
||||
|
||||
return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
|
||||
}
|
||||
else {
|
||||
/*
|
||||
* Bit mask and recursive shifts
|
||||
@@ -247,6 +293,45 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Swizzle a vector consisting of an array of XYZW structs.
|
||||
*
|
||||
* This fills a vector of dst_len length with the swizzled channels from src.
|
||||
*
|
||||
* e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in
|
||||
* RGBA RGBA = BGR BGR BG
|
||||
*
|
||||
* @param swizzles the swizzle array
|
||||
* @param num_swizzles the number of elements in swizzles
|
||||
* @param dst_len the length of the result
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_swizzle_aos_n(struct gallivm_state* gallivm,
|
||||
LLVMValueRef src,
|
||||
const unsigned char* swizzles,
|
||||
unsigned num_swizzles,
|
||||
unsigned dst_len)
|
||||
{
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH];
|
||||
unsigned i;
|
||||
|
||||
assert(dst_len < LP_MAX_VECTOR_WIDTH);
|
||||
|
||||
for (i = 0; i < dst_len; ++i) {
|
||||
int swizzle = swizzles[i % num_swizzles];
|
||||
|
||||
if (swizzle == LP_BLD_SWIZZLE_DONTCARE) {
|
||||
shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
|
||||
} else {
|
||||
shuffles[i] = lp_build_const_int32(gallivm, swizzle);
|
||||
}
|
||||
}
|
||||
|
||||
return LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), LLVMConstVector(shuffles, dst_len), "");
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_swizzle_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
@@ -272,7 +357,7 @@ lp_build_swizzle_aos(struct lp_build_context *bld,
|
||||
case PIPE_SWIZZLE_GREEN:
|
||||
case PIPE_SWIZZLE_BLUE:
|
||||
case PIPE_SWIZZLE_ALPHA:
|
||||
return lp_build_swizzle_scalar_aos(bld, a, swizzles[0]);
|
||||
return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4);
|
||||
case PIPE_SWIZZLE_ZERO:
|
||||
return bld->zero;
|
||||
case PIPE_SWIZZLE_ONE:
|
||||
@@ -367,7 +452,7 @@ lp_build_swizzle_aos(struct lp_build_context *bld,
|
||||
cond |= 1 << chan;
|
||||
}
|
||||
}
|
||||
res = lp_build_select_aos(bld, cond, bld->one, bld->zero);
|
||||
res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4);
|
||||
|
||||
/*
|
||||
* Build a type where each element is an integer that cover the four
|
||||
@@ -553,6 +638,44 @@ lp_build_transpose_aos(struct gallivm_state *gallivm,
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Transpose from AOS <-> SOA for num_srcs
|
||||
*/
|
||||
void
|
||||
lp_build_transpose_aos_n(struct gallivm_state *gallivm,
|
||||
struct lp_type type,
|
||||
const LLVMValueRef* src,
|
||||
unsigned num_srcs,
|
||||
LLVMValueRef* dst)
|
||||
{
|
||||
switch (num_srcs) {
|
||||
case 1:
|
||||
dst[0] = src[0];
|
||||
break;
|
||||
|
||||
case 2:
|
||||
{
|
||||
/* Note: we must use a temporary incase src == dst */
|
||||
LLVMValueRef lo, hi;
|
||||
|
||||
lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0);
|
||||
hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1);
|
||||
|
||||
dst[0] = lo;
|
||||
dst[1] = hi;
|
||||
break;
|
||||
}
|
||||
|
||||
case 4:
|
||||
lp_build_transpose_aos(gallivm, type, src, dst);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Pack n-th element of aos values,
|
||||
* pad out to destination size.
|
||||
|
||||
@@ -67,13 +67,14 @@ lp_build_extract_broadcast(struct gallivm_state *gallivm,
|
||||
|
||||
|
||||
/**
|
||||
* Broadcast one channel of a vector composed of arrays of XYZW structures into
|
||||
* all four channel.
|
||||
* Broadcast one channel of a vector composed of arrays of XYZ.. structures into
|
||||
* all channels XXX...
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
unsigned channel);
|
||||
LLVMValueRef a,
|
||||
unsigned channel,
|
||||
unsigned num_channels);
|
||||
|
||||
|
||||
/**
|
||||
@@ -87,6 +88,14 @@ lp_build_swizzle_aos(struct lp_build_context *bld,
|
||||
const unsigned char swizzles[4]);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_swizzle_aos_n(struct gallivm_state* gallivm,
|
||||
LLVMValueRef src,
|
||||
const unsigned char* swizzles,
|
||||
unsigned num_swizzles,
|
||||
unsigned dst_len);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_swizzle_soa_channel(struct lp_build_context *bld,
|
||||
const LLVMValueRef *unswizzled,
|
||||
@@ -113,6 +122,14 @@ lp_build_transpose_aos(struct gallivm_state *gallivm,
|
||||
LLVMValueRef dst[4]);
|
||||
|
||||
|
||||
void
|
||||
lp_build_transpose_aos_n(struct gallivm_state *gallivm,
|
||||
struct lp_type type,
|
||||
const LLVMValueRef* src,
|
||||
unsigned num_srcs,
|
||||
LLVMValueRef* dst);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
|
||||
struct lp_type src_type,
|
||||
|
||||
@@ -94,7 +94,7 @@ swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
|
||||
unsigned chan)
|
||||
{
|
||||
chan = bld->swizzles[chan];
|
||||
return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan);
|
||||
return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan, 4);
|
||||
}
|
||||
|
||||
|
||||
@@ -623,7 +623,7 @@ lp_emit_instruction_aos(
|
||||
|
||||
case TGSI_OPCODE_EX2:
|
||||
src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
|
||||
tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X);
|
||||
tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS);
|
||||
dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
|
||||
break;
|
||||
|
||||
|
||||
@@ -60,10 +60,13 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
|
||||
struct lp_type type,
|
||||
unsigned rt,
|
||||
LLVMValueRef src,
|
||||
LLVMValueRef src_alpha,
|
||||
LLVMValueRef dst,
|
||||
LLVMValueRef mask,
|
||||
LLVMValueRef const_,
|
||||
const unsigned char swizzle[4]);
|
||||
LLVMValueRef const_alpha,
|
||||
const unsigned char swizzle[4],
|
||||
int nr_channels);
|
||||
|
||||
|
||||
void
|
||||
|
||||
@@ -66,14 +66,18 @@
|
||||
struct lp_build_blend_aos_context
|
||||
{
|
||||
struct lp_build_context base;
|
||||
|
||||
|
||||
LLVMValueRef src;
|
||||
LLVMValueRef src_alpha;
|
||||
LLVMValueRef dst;
|
||||
LLVMValueRef const_;
|
||||
LLVMValueRef const_alpha;
|
||||
|
||||
LLVMValueRef inv_src;
|
||||
LLVMValueRef inv_src_alpha;
|
||||
LLVMValueRef inv_dst;
|
||||
LLVMValueRef inv_const;
|
||||
LLVMValueRef inv_const_alpha;
|
||||
LLVMValueRef saturate;
|
||||
|
||||
LLVMValueRef rgb_src_factor;
|
||||
@@ -88,14 +92,18 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
|
||||
unsigned factor,
|
||||
boolean alpha)
|
||||
{
|
||||
LLVMValueRef src_alpha = bld->src_alpha ? bld->src_alpha : bld->src;
|
||||
LLVMValueRef const_alpha = bld->const_alpha ? bld->const_alpha : bld->const_;
|
||||
|
||||
switch (factor) {
|
||||
case PIPE_BLENDFACTOR_ZERO:
|
||||
return bld->base.zero;
|
||||
case PIPE_BLENDFACTOR_ONE:
|
||||
return bld->base.one;
|
||||
case PIPE_BLENDFACTOR_SRC_COLOR:
|
||||
case PIPE_BLENDFACTOR_SRC_ALPHA:
|
||||
return bld->src;
|
||||
case PIPE_BLENDFACTOR_SRC_ALPHA:
|
||||
return src_alpha;
|
||||
case PIPE_BLENDFACTOR_DST_COLOR:
|
||||
case PIPE_BLENDFACTOR_DST_ALPHA:
|
||||
return bld->dst;
|
||||
@@ -106,32 +114,39 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
|
||||
if(!bld->inv_dst)
|
||||
bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
|
||||
if(!bld->saturate)
|
||||
bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst);
|
||||
bld->saturate = lp_build_min(&bld->base, src_alpha, bld->inv_dst);
|
||||
return bld->saturate;
|
||||
}
|
||||
case PIPE_BLENDFACTOR_CONST_COLOR:
|
||||
case PIPE_BLENDFACTOR_CONST_ALPHA:
|
||||
return bld->const_;
|
||||
case PIPE_BLENDFACTOR_CONST_ALPHA:
|
||||
return const_alpha;
|
||||
case PIPE_BLENDFACTOR_SRC1_COLOR:
|
||||
case PIPE_BLENDFACTOR_SRC1_ALPHA:
|
||||
/* TODO */
|
||||
assert(0);
|
||||
return bld->base.zero;
|
||||
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
|
||||
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
|
||||
if(!bld->inv_src)
|
||||
bld->inv_src = lp_build_comp(&bld->base, bld->src);
|
||||
return bld->inv_src;
|
||||
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
|
||||
if(!bld->inv_src_alpha)
|
||||
bld->inv_src_alpha = lp_build_comp(&bld->base, src_alpha);
|
||||
return bld->inv_src_alpha;
|
||||
case PIPE_BLENDFACTOR_INV_DST_COLOR:
|
||||
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
|
||||
if(!bld->inv_dst)
|
||||
bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
|
||||
return bld->inv_dst;
|
||||
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
|
||||
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
|
||||
if(!bld->inv_const)
|
||||
bld->inv_const = lp_build_comp(&bld->base, bld->const_);
|
||||
return bld->inv_const;
|
||||
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
|
||||
if(!bld->inv_const_alpha)
|
||||
bld->inv_const_alpha = lp_build_comp(&bld->base, const_alpha);
|
||||
return bld->inv_const_alpha;
|
||||
case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
|
||||
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
|
||||
/* TODO */
|
||||
@@ -190,7 +205,8 @@ lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
|
||||
LLVMValueRef rgb,
|
||||
LLVMValueRef alpha,
|
||||
enum lp_build_blend_swizzle rgb_swizzle,
|
||||
unsigned alpha_swizzle)
|
||||
unsigned alpha_swizzle,
|
||||
unsigned num_channels)
|
||||
{
|
||||
LLVMValueRef swizzled_rgb;
|
||||
|
||||
@@ -199,7 +215,7 @@ lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
|
||||
swizzled_rgb = rgb;
|
||||
break;
|
||||
case LP_BUILD_BLEND_SWIZZLE_AAAA:
|
||||
swizzled_rgb = lp_build_swizzle_scalar_aos(&bld->base, rgb, alpha_swizzle);
|
||||
swizzled_rgb = lp_build_swizzle_scalar_aos(&bld->base, rgb, alpha_swizzle, num_channels);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
@@ -208,13 +224,13 @@ lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
|
||||
|
||||
if (rgb != alpha) {
|
||||
swizzled_rgb = lp_build_select_aos(&bld->base, 1 << alpha_swizzle,
|
||||
alpha, swizzled_rgb);
|
||||
alpha, swizzled_rgb,
|
||||
num_channels);
|
||||
}
|
||||
|
||||
return swizzled_rgb;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
|
||||
*/
|
||||
@@ -222,17 +238,22 @@ static LLVMValueRef
|
||||
lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
|
||||
unsigned rgb_factor,
|
||||
unsigned alpha_factor,
|
||||
unsigned alpha_swizzle)
|
||||
unsigned alpha_swizzle,
|
||||
unsigned num_channels)
|
||||
{
|
||||
LLVMValueRef rgb_factor_, alpha_factor_;
|
||||
enum lp_build_blend_swizzle rgb_swizzle;
|
||||
|
||||
if (alpha_swizzle == 0) {
|
||||
return lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
|
||||
}
|
||||
|
||||
rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
|
||||
|
||||
if (alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
|
||||
rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
|
||||
alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
|
||||
return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle);
|
||||
return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, num_channels);
|
||||
} else {
|
||||
return rgb_factor_;
|
||||
}
|
||||
@@ -261,18 +282,21 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
|
||||
struct lp_type type,
|
||||
unsigned rt,
|
||||
LLVMValueRef src,
|
||||
LLVMValueRef src_alpha,
|
||||
LLVMValueRef dst,
|
||||
LLVMValueRef mask,
|
||||
LLVMValueRef const_,
|
||||
const unsigned char swizzle[4])
|
||||
LLVMValueRef const_alpha,
|
||||
const unsigned char swizzle[4],
|
||||
int nr_channels)
|
||||
{
|
||||
const struct pipe_rt_blend_state * state = &blend->rt[rt];
|
||||
const struct util_format_description * desc;
|
||||
struct lp_build_blend_aos_context bld;
|
||||
LLVMValueRef src_factor, dst_factor;
|
||||
LLVMValueRef result;
|
||||
unsigned alpha_swizzle = swizzle[3];
|
||||
boolean fullcolormask;
|
||||
unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE;
|
||||
unsigned i;
|
||||
|
||||
desc = util_format_description(cbuf_format[rt]);
|
||||
|
||||
@@ -282,20 +306,32 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
|
||||
bld.src = src;
|
||||
bld.dst = dst;
|
||||
bld.const_ = const_;
|
||||
bld.src_alpha = src_alpha;
|
||||
bld.const_alpha = const_alpha;
|
||||
|
||||
if (swizzle[3] > UTIL_FORMAT_SWIZZLE_W || swizzle[3] == swizzle[0])
|
||||
alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE;
|
||||
/* Find the alpha channel if not provided seperately */
|
||||
if (!src_alpha) {
|
||||
for (i = 0; i < 4; ++i) {
|
||||
if (swizzle[i] == 3) {
|
||||
alpha_swizzle = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!state->blend_enable) {
|
||||
result = src;
|
||||
} else {
|
||||
boolean rgb_alpha_same = state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor;
|
||||
assert(rgb_alpha_same || alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
|
||||
boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor) || nr_channels == 1;
|
||||
|
||||
src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor,
|
||||
state->alpha_src_factor, alpha_swizzle);
|
||||
state->alpha_src_factor,
|
||||
alpha_swizzle,
|
||||
nr_channels);
|
||||
|
||||
dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor,
|
||||
state->alpha_dst_factor, alpha_swizzle);
|
||||
state->alpha_dst_factor,
|
||||
alpha_swizzle,
|
||||
nr_channels);
|
||||
|
||||
result = lp_build_blend(&bld.base,
|
||||
state->rgb_func,
|
||||
@@ -308,7 +344,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
|
||||
rgb_alpha_same,
|
||||
false);
|
||||
|
||||
if(state->rgb_func != state->alpha_func && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
|
||||
if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
|
||||
LLVMValueRef alpha;
|
||||
|
||||
alpha = lp_build_blend(&bld.base,
|
||||
@@ -326,22 +362,27 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
|
||||
result,
|
||||
alpha,
|
||||
LP_BUILD_BLEND_SWIZZLE_RGBA,
|
||||
alpha_swizzle);
|
||||
alpha_swizzle,
|
||||
nr_channels);
|
||||
}
|
||||
}
|
||||
|
||||
/* Check if color mask is necessary */
|
||||
fullcolormask = util_format_colormask_full(util_format_description(cbuf_format[rt]), state->colormask);
|
||||
|
||||
if (!fullcolormask) {
|
||||
if (!util_format_colormask_full(desc, state->colormask)) {
|
||||
LLVMValueRef color_mask;
|
||||
|
||||
color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state.colormask, desc->nr_channels, swizzle);
|
||||
color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, nr_channels, swizzle);
|
||||
lp_build_name(color_mask, "color_mask");
|
||||
|
||||
/* Combine with input mask if necessary */
|
||||
if (mask) {
|
||||
/* We can be blending floating values but masks are always integer... */
|
||||
unsigned floating = bld.base.type.floating;
|
||||
bld.base.type.floating = 0;
|
||||
|
||||
mask = lp_build_and(&bld.base, color_mask, mask);
|
||||
|
||||
bld.base.type.floating = floating;
|
||||
} else {
|
||||
mask = color_mask;
|
||||
}
|
||||
|
||||
@@ -128,7 +128,8 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
|
||||
elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatTypeInContext(lc);
|
||||
elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] =
|
||||
elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32TypeInContext(lc);
|
||||
elem_types[LP_JIT_CTX_BLEND_COLOR] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
|
||||
elem_types[LP_JIT_CTX_U8_BLEND_COLOR] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
|
||||
elem_types[LP_JIT_CTX_F_BLEND_COLOR] = LLVMPointerType(LLVMFloatTypeInContext(lc), 0);
|
||||
elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type,
|
||||
PIPE_MAX_SAMPLERS);
|
||||
|
||||
@@ -153,9 +154,12 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
|
||||
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_back,
|
||||
gallivm->target, context_type,
|
||||
LP_JIT_CTX_STENCIL_REF_BACK);
|
||||
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color,
|
||||
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, u8_blend_color,
|
||||
gallivm->target, context_type,
|
||||
LP_JIT_CTX_BLEND_COLOR);
|
||||
LP_JIT_CTX_U8_BLEND_COLOR);
|
||||
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, f_blend_color,
|
||||
gallivm->target, context_type,
|
||||
LP_JIT_CTX_F_BLEND_COLOR);
|
||||
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures,
|
||||
gallivm->target, context_type,
|
||||
LP_JIT_CTX_TEXTURES);
|
||||
|
||||
@@ -103,8 +103,8 @@ struct lp_jit_context
|
||||
|
||||
uint32_t stencil_ref_front, stencil_ref_back;
|
||||
|
||||
/* FIXME: store (also?) in floats */
|
||||
uint8_t *blend_color;
|
||||
uint8_t *u8_blend_color;
|
||||
float *f_blend_color;
|
||||
|
||||
struct lp_jit_texture textures[PIPE_MAX_SAMPLERS];
|
||||
};
|
||||
@@ -119,7 +119,8 @@ enum {
|
||||
LP_JIT_CTX_ALPHA_REF,
|
||||
LP_JIT_CTX_STENCIL_REF_FRONT,
|
||||
LP_JIT_CTX_STENCIL_REF_BACK,
|
||||
LP_JIT_CTX_BLEND_COLOR,
|
||||
LP_JIT_CTX_U8_BLEND_COLOR,
|
||||
LP_JIT_CTX_F_BLEND_COLOR,
|
||||
LP_JIT_CTX_TEXTURES,
|
||||
LP_JIT_CTX_COUNT
|
||||
};
|
||||
@@ -137,14 +138,33 @@ enum {
|
||||
#define lp_jit_context_stencil_ref_back_value(_gallivm, _ptr) \
|
||||
lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_STENCIL_REF_BACK, "stencil_ref_back")
|
||||
|
||||
#define lp_jit_context_blend_color(_gallivm, _ptr) \
|
||||
lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_BLEND_COLOR, "blend_color")
|
||||
#define lp_jit_context_u8_blend_color(_gallivm, _ptr) \
|
||||
lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_U8_BLEND_COLOR, "u8_blend_color")
|
||||
|
||||
#define lp_jit_context_f_blend_color(_gallivm, _ptr) \
|
||||
lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_F_BLEND_COLOR, "f_blend_color")
|
||||
|
||||
#define lp_jit_context_textures(_gallivm, _ptr) \
|
||||
lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_CTX_TEXTURES, "textures")
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* typedef for fragment shader function
|
||||
*
|
||||
* @param context jit context
|
||||
* @param x block start x
|
||||
* @param y block start y
|
||||
* @param facing is front facing
|
||||
* @param a0 shader input a0
|
||||
* @param dadx shader input dadx
|
||||
* @param dady shader input dady
|
||||
* @param color color buffer
|
||||
* @param depth depth buffer
|
||||
* @param mask mask of visible pixels in block
|
||||
* @param thread_data task thread data
|
||||
* @param stride color buffer row stride in bytes
|
||||
*/
|
||||
typedef void
|
||||
(*lp_jit_frag_func)(const struct lp_jit_context *context,
|
||||
uint32_t x,
|
||||
@@ -156,7 +176,8 @@ typedef void
|
||||
uint8_t **color,
|
||||
void *depth,
|
||||
uint32_t mask,
|
||||
uint32_t *counter);
|
||||
uint32_t *counter,
|
||||
unsigned *stride);
|
||||
|
||||
|
||||
void
|
||||
|
||||
@@ -151,47 +151,70 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
|
||||
const union lp_rast_cmd_arg arg)
|
||||
{
|
||||
const struct lp_scene *scene = task->scene;
|
||||
const uint8_t *clear_color = arg.clear_color;
|
||||
uint8_t clear_color[4];
|
||||
|
||||
unsigned i;
|
||||
boolean gray;
|
||||
|
||||
LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
|
||||
for (i = 0; i < 4; ++i) {
|
||||
clear_color[i] = float_to_ubyte(arg.clear_color[i]);
|
||||
}
|
||||
|
||||
LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
|
||||
clear_color[0],
|
||||
clear_color[1],
|
||||
clear_color[2],
|
||||
clear_color[3]);
|
||||
|
||||
if (clear_color[0] == clear_color[1] &&
|
||||
clear_color[1] == clear_color[2] &&
|
||||
clear_color[2] == clear_color[3]) {
|
||||
/* clear to grayscale value {x, x, x, x} */
|
||||
for (i = 0; i < scene->fb.nr_cbufs; i++) {
|
||||
uint8_t *ptr =
|
||||
lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
|
||||
memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4);
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Non-gray color.
|
||||
* Note: if the swizzled tile layout changes (see TILE_PIXEL) this code
|
||||
* will need to change. It'll be pretty obvious when clearing no longer
|
||||
* works.
|
||||
*/
|
||||
const unsigned chunk = TILE_SIZE / 4;
|
||||
for (i = 0; i < scene->fb.nr_cbufs; i++) {
|
||||
uint8_t *c =
|
||||
lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
|
||||
gray =
|
||||
clear_color[0] == clear_color[1] &&
|
||||
clear_color[1] == clear_color[2] &&
|
||||
clear_color[2] == clear_color[3];
|
||||
|
||||
for (i = 0; i < scene->fb.nr_cbufs; i++) {
|
||||
if (scene->cbufs[i].unswizzled) {
|
||||
const struct lp_scene *scene = task->scene;
|
||||
union util_color uc;
|
||||
|
||||
util_pack_color(arg.clear_color,
|
||||
scene->fb.cbufs[i]->format, &uc);
|
||||
|
||||
util_fill_rect(scene->cbufs[i].map,
|
||||
scene->fb.cbufs[i]->format,
|
||||
scene->cbufs[i].stride,
|
||||
task->x,
|
||||
task->y,
|
||||
TILE_SIZE,
|
||||
TILE_SIZE,
|
||||
&uc);
|
||||
} else {
|
||||
const unsigned chunk = TILE_SIZE / 4;
|
||||
uint8_t *ptr;
|
||||
unsigned j;
|
||||
|
||||
for (j = 0; j < 4 * TILE_SIZE; j++) {
|
||||
memset(c, clear_color[0], chunk);
|
||||
c += chunk;
|
||||
memset(c, clear_color[1], chunk);
|
||||
c += chunk;
|
||||
memset(c, clear_color[2], chunk);
|
||||
c += chunk;
|
||||
memset(c, clear_color[3], chunk);
|
||||
c += chunk;
|
||||
ptr = lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
|
||||
|
||||
if (gray) {
|
||||
/* clear to grayscale value {x, x, x, x} */
|
||||
|
||||
memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4);
|
||||
} else {
|
||||
/* Non-gray color.
|
||||
* Note: if the swizzled tile layout changes (see TILE_PIXEL) this code
|
||||
* will need to change. It'll be pretty obvious when clearing no longer
|
||||
* works.
|
||||
*/
|
||||
|
||||
for (j = 0; j < 4 * TILE_SIZE; j++) {
|
||||
memset(ptr, clear_color[0], chunk);
|
||||
ptr += chunk;
|
||||
memset(ptr, clear_color[1], chunk);
|
||||
ptr += chunk;
|
||||
memset(ptr, clear_color[2], chunk);
|
||||
ptr += chunk;
|
||||
memset(ptr, clear_color[3], chunk);
|
||||
ptr += chunk;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -311,7 +334,7 @@ lp_rast_store_linear_color( struct lp_rasterizer_task *task )
|
||||
const unsigned level = cbuf->u.tex.level;
|
||||
struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture);
|
||||
|
||||
if (!task->color_tiles[buf])
|
||||
if (scene->cbufs[buf].unswizzled || !task->color_tiles[buf])
|
||||
continue;
|
||||
|
||||
llvmpipe_unswizzle_cbuf_tile(lpt,
|
||||
@@ -358,13 +381,20 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
|
||||
for (y = 0; y < TILE_SIZE; y += 4){
|
||||
for (x = 0; x < TILE_SIZE; x += 4) {
|
||||
uint8_t *color[PIPE_MAX_COLOR_BUFS];
|
||||
unsigned stride[PIPE_MAX_COLOR_BUFS];
|
||||
uint32_t *depth;
|
||||
unsigned i;
|
||||
|
||||
/* color buffer */
|
||||
for (i = 0; i < scene->fb.nr_cbufs; i++)
|
||||
color[i] = lp_rast_get_color_block_pointer(task, i,
|
||||
tile_x + x, tile_y + y);
|
||||
for (i = 0; i < scene->fb.nr_cbufs; i++){
|
||||
stride[i] = scene->cbufs[i].stride;
|
||||
|
||||
if (scene->cbufs[i].unswizzled) {
|
||||
color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, tile_x + x, tile_y + y);
|
||||
} else {
|
||||
color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x, tile_y + y);
|
||||
}
|
||||
}
|
||||
|
||||
/* depth buffer */
|
||||
depth = lp_rast_get_depth_block_pointer(task, tile_x + x, tile_y + y);
|
||||
@@ -380,7 +410,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
|
||||
color,
|
||||
depth,
|
||||
0xffff,
|
||||
&task->vis_counter);
|
||||
&task->vis_counter,
|
||||
stride);
|
||||
END_JIT_CALL();
|
||||
}
|
||||
}
|
||||
@@ -408,7 +439,9 @@ lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task,
|
||||
|
||||
/* this will prevent converting the layout from tiled to linear */
|
||||
for (i = 0; i < scene->fb.nr_cbufs; i++) {
|
||||
(void)lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
|
||||
if (!scene->cbufs[i].unswizzled) {
|
||||
(void)lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
|
||||
}
|
||||
}
|
||||
|
||||
lp_rast_shade_tile(task, arg);
|
||||
@@ -431,6 +464,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
|
||||
struct lp_fragment_shader_variant *variant = state->variant;
|
||||
const struct lp_scene *scene = task->scene;
|
||||
uint8_t *color[PIPE_MAX_COLOR_BUFS];
|
||||
unsigned stride[PIPE_MAX_COLOR_BUFS];
|
||||
void *depth;
|
||||
unsigned i;
|
||||
|
||||
@@ -447,15 +481,20 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
|
||||
|
||||
/* color buffer */
|
||||
for (i = 0; i < scene->fb.nr_cbufs; i++) {
|
||||
color[i] = lp_rast_get_color_block_pointer(task, i, x, y);
|
||||
assert(lp_check_alignment(color[i], 16));
|
||||
stride[i] = scene->cbufs[i].stride;
|
||||
|
||||
if (scene->cbufs[i].unswizzled) {
|
||||
color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y);
|
||||
} else {
|
||||
color[i] = lp_rast_get_color_block_pointer(task, i, x, y);
|
||||
}
|
||||
}
|
||||
|
||||
/* depth buffer */
|
||||
depth = lp_rast_get_depth_block_pointer(task, x, y);
|
||||
|
||||
|
||||
assert(lp_check_alignment(state->jit_context.blend_color, 16));
|
||||
assert(lp_check_alignment(state->jit_context.u8_blend_color, 16));
|
||||
|
||||
/* run shader on 4x4 block */
|
||||
BEGIN_JIT_CALL(state, task);
|
||||
@@ -468,7 +507,8 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
|
||||
color,
|
||||
depth,
|
||||
mask,
|
||||
&task->vis_counter);
|
||||
&task->vis_counter,
|
||||
stride);
|
||||
END_JIT_CALL();
|
||||
}
|
||||
|
||||
|
||||
@@ -152,7 +152,7 @@ union lp_rast_cmd_arg {
|
||||
unsigned plane_mask;
|
||||
} triangle;
|
||||
const struct lp_rast_state *set_state;
|
||||
uint8_t clear_color[4];
|
||||
float clear_color[4];
|
||||
struct {
|
||||
uint32_t value;
|
||||
uint32_t mask;
|
||||
|
||||
@@ -189,6 +189,7 @@ lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
|
||||
assert(task->x % TILE_SIZE == 0);
|
||||
assert(task->y % TILE_SIZE == 0);
|
||||
assert(buf < scene->fb.nr_cbufs);
|
||||
assert(scene->cbufs[buf].unswizzled == 0);
|
||||
|
||||
if (!task->color_tiles[buf]) {
|
||||
struct pipe_surface *cbuf = scene->fb.cbufs[buf];
|
||||
@@ -210,6 +211,35 @@ lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get pointer to the unswizzled color tile
|
||||
*/
|
||||
static INLINE uint8_t *
|
||||
lp_rast_get_unswizzled_color_tile_pointer(struct lp_rasterizer_task *task,
|
||||
unsigned buf, enum lp_texture_usage usage)
|
||||
{
|
||||
const struct lp_scene *scene = task->scene;
|
||||
unsigned format_bytes;
|
||||
|
||||
assert(task->x < scene->tiles_x * TILE_SIZE);
|
||||
assert(task->y < scene->tiles_y * TILE_SIZE);
|
||||
assert(task->x % TILE_SIZE == 0);
|
||||
assert(task->y % TILE_SIZE == 0);
|
||||
assert(buf < scene->fb.nr_cbufs);
|
||||
assert(scene->cbufs[buf].unswizzled);
|
||||
|
||||
if (!task->color_tiles[buf]) {
|
||||
struct pipe_surface *cbuf = scene->fb.cbufs[buf];
|
||||
assert(cbuf);
|
||||
|
||||
format_bytes = util_format_description(cbuf->format)->block.bits / 8;
|
||||
task->color_tiles[buf] = scene->cbufs[buf].map + scene->cbufs[buf].stride * task->y + format_bytes * task->x;
|
||||
}
|
||||
|
||||
return task->color_tiles[buf];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the pointer to a 4x4 color block (within a 64x64 tile).
|
||||
* We'll map the color buffer on demand here.
|
||||
@@ -228,6 +258,8 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
|
||||
assert(y < task->scene->tiles_y * TILE_SIZE);
|
||||
assert((x % TILE_VECTOR_WIDTH) == 0);
|
||||
assert((y % TILE_VECTOR_HEIGHT) == 0);
|
||||
assert(buf < task->scene->fb.nr_cbufs);
|
||||
assert(task->scene->cbufs[buf].unswizzled == 0);
|
||||
|
||||
color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE);
|
||||
assert(color);
|
||||
@@ -243,6 +275,40 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the pointer to an unswizzled 4x4 color block (within an unswizzled 64x64 tile).
|
||||
* \param x, y location of 4x4 block in window coords
|
||||
*/
|
||||
static INLINE uint8_t *
|
||||
lp_rast_get_unswizzled_color_block_pointer(struct lp_rasterizer_task *task,
|
||||
unsigned buf, unsigned x, unsigned y)
|
||||
{
|
||||
unsigned px, py, pixel_offset, format_bytes;
|
||||
uint8_t *color;
|
||||
|
||||
assert(x < task->scene->tiles_x * TILE_SIZE);
|
||||
assert(y < task->scene->tiles_y * TILE_SIZE);
|
||||
assert((x % TILE_VECTOR_WIDTH) == 0);
|
||||
assert((y % TILE_VECTOR_HEIGHT) == 0);
|
||||
assert(buf < task->scene->fb.nr_cbufs);
|
||||
assert(task->scene->cbufs[buf].unswizzled);
|
||||
|
||||
format_bytes = util_format_description(task->scene->fb.cbufs[buf]->format)->block.bits / 8;
|
||||
|
||||
color = lp_rast_get_unswizzled_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE);
|
||||
assert(color);
|
||||
|
||||
px = x % TILE_SIZE;
|
||||
py = y % TILE_SIZE;
|
||||
pixel_offset = px * format_bytes + py * task->scene->cbufs[buf].stride;
|
||||
|
||||
color = color + pixel_offset;
|
||||
|
||||
assert(lp_check_alignment(color, llvmpipe_get_format_alignment(task->scene->fb.cbufs[buf]->format)));
|
||||
return color;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Shade all pixels in a 4x4 block. The fragment code omits the
|
||||
@@ -258,12 +324,20 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
|
||||
const struct lp_rast_state *state = task->state;
|
||||
struct lp_fragment_shader_variant *variant = state->variant;
|
||||
uint8_t *color[PIPE_MAX_COLOR_BUFS];
|
||||
unsigned stride[PIPE_MAX_COLOR_BUFS];
|
||||
void *depth;
|
||||
unsigned i;
|
||||
|
||||
/* color buffer */
|
||||
for (i = 0; i < scene->fb.nr_cbufs; i++)
|
||||
color[i] = lp_rast_get_color_block_pointer(task, i, x, y);
|
||||
for (i = 0; i < scene->fb.nr_cbufs; i++) {
|
||||
stride[i] = scene->cbufs[i].stride;
|
||||
|
||||
if (scene->cbufs[i].unswizzled) {
|
||||
color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y);
|
||||
} else {
|
||||
color[i] = lp_rast_get_color_block_pointer(task, i, x, y);
|
||||
}
|
||||
}
|
||||
|
||||
depth = lp_rast_get_depth_block_pointer(task, x, y);
|
||||
|
||||
@@ -278,7 +352,8 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
|
||||
color,
|
||||
depth,
|
||||
0xffff,
|
||||
&task->vis_counter );
|
||||
&task->vis_counter,
|
||||
stride );
|
||||
END_JIT_CALL();
|
||||
}
|
||||
|
||||
|
||||
@@ -150,6 +150,8 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
|
||||
cbuf->u.tex.first_layer,
|
||||
LP_TEX_USAGE_READ_WRITE,
|
||||
LP_TEX_LAYOUT_LINEAR);
|
||||
|
||||
scene->cbufs[i].unswizzled = llvmpipe_is_format_unswizzled(cbuf->format);
|
||||
}
|
||||
|
||||
if (fb->zsbuf) {
|
||||
|
||||
@@ -137,6 +137,7 @@ struct lp_scene {
|
||||
uint8_t *map;
|
||||
unsigned stride;
|
||||
unsigned blocksize;
|
||||
unsigned unswizzled;
|
||||
} zsbuf, cbufs[PIPE_MAX_COLOR_BUFS];
|
||||
|
||||
/** the framebuffer to render the scene into */
|
||||
|
||||
@@ -390,7 +390,7 @@ lp_setup_try_clear( struct lp_setup_context *setup,
|
||||
|
||||
if (flags & PIPE_CLEAR_COLOR) {
|
||||
for (i = 0; i < 4; i++)
|
||||
color_arg.clear_color[i] = float_to_ubyte(color[i]);
|
||||
color_arg.clear_color[i] = color[i];
|
||||
}
|
||||
|
||||
if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
|
||||
@@ -805,14 +805,26 @@ try_update_scene_state( struct lp_setup_context *setup )
|
||||
|
||||
if(setup->dirty & LP_SETUP_NEW_BLEND_COLOR) {
|
||||
uint8_t *stored;
|
||||
float* fstored;
|
||||
unsigned i, j;
|
||||
unsigned size;
|
||||
|
||||
/* Alloc u8_blend_color (16 x i8) and f_blend_color (4 or 8 x f32) */
|
||||
size = 4 * 16 * sizeof(uint8_t);
|
||||
size += (LP_MAX_VECTOR_LENGTH / 4) * sizeof(float);
|
||||
stored = lp_scene_alloc_aligned(scene, size, LP_MAX_VECTOR_LENGTH);
|
||||
|
||||
stored = lp_scene_alloc_aligned(scene, 4 * 16, 16);
|
||||
if (!stored) {
|
||||
assert(!new_scene);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Store floating point colour */
|
||||
fstored = (float*)(stored + 4*16);
|
||||
for (i = 0; i < (LP_MAX_VECTOR_LENGTH / 4); ++i) {
|
||||
fstored[i] = setup->blend_color.current.color[i % 4];
|
||||
}
|
||||
|
||||
/* smear each blend color component across 16 ubyte elements */
|
||||
for (i = 0; i < 4; ++i) {
|
||||
uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]);
|
||||
@@ -821,7 +833,8 @@ try_update_scene_state( struct lp_setup_context *setup )
|
||||
}
|
||||
|
||||
setup->blend_color.stored = stored;
|
||||
setup->fs.current.jit_context.blend_color = setup->blend_color.stored;
|
||||
setup->fs.current.jit_context.u8_blend_color = stored;
|
||||
setup->fs.current.jit_context.f_blend_color = fstored;
|
||||
setup->dirty |= LP_SETUP_NEW_FS;
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -94,6 +94,9 @@ struct lp_fragment_shader_variant
|
||||
|
||||
lp_jit_frag_func jit_function[2];
|
||||
|
||||
/* Bitmask to say what cbufs are unswizzled */
|
||||
unsigned unswizzled_cbufs;
|
||||
|
||||
/* Total number of LLVM instructions generated */
|
||||
unsigned nr_instrs;
|
||||
|
||||
|
||||
@@ -195,7 +195,7 @@ add_blend_test(struct gallivm_state *gallivm,
|
||||
dst = LLVMBuildLoad(builder, dst_ptr, "dst");
|
||||
con = LLVMBuildLoad(builder, const_ptr, "const");
|
||||
|
||||
res = lp_build_blend_aos(gallivm, blend, &format, type, rt, src, dst, NULL, con, swizzle);
|
||||
res = lp_build_blend_aos(gallivm, blend, &format, type, rt, src, NULL, dst, NULL, con, NULL, swizzle, 4);
|
||||
|
||||
lp_build_name(res, "res");
|
||||
|
||||
|
||||
@@ -756,6 +756,73 @@ llvmpipe_is_resource_referenced( struct pipe_context *pipe,
|
||||
return lp_setup_is_resource_referenced(llvmpipe->setup, presource);
|
||||
}
|
||||
|
||||
boolean
|
||||
llvmpipe_is_format_unswizzled( enum pipe_format format )
|
||||
{
|
||||
const struct util_format_description *desc = util_format_description(format);
|
||||
unsigned chan;
|
||||
|
||||
if (format == PIPE_FORMAT_B8G8R8X8_UNORM || format == PIPE_FORMAT_B8G8R8A8_UNORM) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
|
||||
desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||
|
||||
desc->block.width != 1 ||
|
||||
desc->block.height != 1) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
for (chan = 0; chan < desc->nr_channels; ++chan) {
|
||||
if (desc->channel[chan].type == UTIL_FORMAT_TYPE_VOID && (chan + 1) == desc->nr_channels)
|
||||
continue;
|
||||
|
||||
if (desc->channel[chan].type != desc->channel[0].type)
|
||||
return FALSE;
|
||||
|
||||
if (desc->channel[chan].normalized != desc->channel[0].normalized)
|
||||
return FALSE;
|
||||
|
||||
if (desc->channel[chan].pure_integer != desc->channel[0].pure_integer)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* All code assumes alpha is the last channel */
|
||||
if (desc->nr_channels == 4 && desc->swizzle[3] < 3) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the largest possible alignment for a format in llvmpipe
|
||||
*/
|
||||
unsigned
|
||||
llvmpipe_get_format_alignment( enum pipe_format format )
|
||||
{
|
||||
const struct util_format_description *desc = util_format_description(format);
|
||||
unsigned size = 0;
|
||||
unsigned bytes;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < desc->nr_channels; ++i) {
|
||||
size += desc->channel[i].size;
|
||||
}
|
||||
|
||||
bytes = size / 8;
|
||||
|
||||
if (!util_is_power_of_two(bytes)) {
|
||||
bytes /= desc->nr_channels;
|
||||
}
|
||||
|
||||
if (bytes % 2 || bytes < 1) {
|
||||
return 1;
|
||||
} else {
|
||||
return bytes;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
|
||||
@@ -256,4 +256,10 @@ llvmpipe_is_resource_referenced( struct pipe_context *pipe,
|
||||
struct pipe_resource *presource,
|
||||
unsigned level, int layer);
|
||||
|
||||
boolean
|
||||
llvmpipe_is_format_unswizzled(enum pipe_format format);
|
||||
|
||||
unsigned
|
||||
llvmpipe_get_format_alignment(enum pipe_format format);
|
||||
|
||||
#endif /* LP_TEXTURE_H */
|
||||
|
||||
Reference in New Issue
Block a user