llvmpipe: Unswizzled rendering.

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
This commit is contained in:
James Benton
2012-09-13 16:04:42 +01:00
committed by José Fonseca
parent 1d3789bccb
commit fa1b481c09
27 changed files with 1781 additions and 156 deletions
@@ -414,6 +414,81 @@ lp_build_unsigned_norm_to_float(struct gallivm_state *gallivm,
}
/**
* Pick a suitable num_dsts for lp_build_conv to ensure optimal cases are used.
*
* Returns the number of dsts created from src
*/
int lp_build_conv_auto(struct gallivm_state *gallivm,
struct lp_type src_type,
struct lp_type* dst_type,
const LLVMValueRef *src,
unsigned num_srcs,
LLVMValueRef *dst)
{
int i;
int num_dsts = num_srcs;
if (src_type.floating == dst_type->floating &&
src_type.width == dst_type->width &&
src_type.length == dst_type->length &&
src_type.fixed == dst_type->fixed &&
src_type.norm == dst_type->norm &&
src_type.sign == dst_type->sign)
return num_dsts;
/* Special case 4x4f -> 1x16ub or 2x8f -> 1x16ub
*/
if (src_type.floating == 1 &&
src_type.fixed == 0 &&
src_type.sign == 1 &&
src_type.norm == 0 &&
src_type.width == 32 &&
dst_type->floating == 0 &&
dst_type->fixed == 0 &&
dst_type->sign == 0 &&
dst_type->norm == 1 &&
dst_type->width == 8)
{
/* Special case 4x4f --> 1x16ub */
if (src_type.length == 4 && util_cpu_caps.has_sse2)
{
assert((num_srcs % 4) == 0);
num_dsts = num_srcs / 4;
dst_type->length = 16;
lp_build_conv(gallivm, src_type, *dst_type, src, num_srcs, dst, num_dsts);
return num_dsts;
}
/* Special case 2x8f --> 1x16ub */
if (src_type.length == 8 && util_cpu_caps.has_avx)
{
assert((num_srcs % 2) == 0);
num_dsts = num_srcs / 2;
dst_type->length = 16;
lp_build_conv(gallivm, src_type, *dst_type, src, num_srcs, dst, num_dsts);
return num_dsts;
}
}
/* lp_build_resize does not support M:N */
if (src_type.width == dst_type->width) {
lp_build_conv(gallivm, src_type, *dst_type, src, num_srcs, dst, num_dsts);
} else {
for (i = 0; i < num_srcs; ++i) {
lp_build_conv(gallivm, src_type, *dst_type, &src[i], 1, &dst[i], 1);
}
}
return num_dsts;
}
/**
* Generic type conversion.
*
@@ -70,6 +70,16 @@ lp_build_conv(struct gallivm_state *gallivm,
const LLVMValueRef *srcs, unsigned num_srcs,
LLVMValueRef *dsts, unsigned num_dsts);
int
lp_build_conv_auto(struct gallivm_state *gallivm,
struct lp_type src_type,
struct lp_type* dst_type,
const LLVMValueRef *src,
unsigned num_srcs,
LLVMValueRef *dst);
void
lp_build_conv_mask(struct gallivm_state *gallivm,
struct lp_type src_type,
+5 -4
View File
@@ -560,7 +560,8 @@ LLVMValueRef
lp_build_select_aos(struct lp_build_context *bld,
unsigned mask,
LLVMValueRef a,
LLVMValueRef b)
LLVMValueRef b,
unsigned num_channels)
{
LLVMBuilderRef builder = bld->gallivm->builder;
const struct lp_type type = bld->type;
@@ -594,8 +595,8 @@ lp_build_select_aos(struct lp_build_context *bld,
LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
for(j = 0; j < n; j += 4)
for(i = 0; i < 4; ++i)
for(j = 0; j < n; j += num_channels)
for(i = 0; i < num_channels; ++i)
shuffles[j + i] = LLVMConstInt(elem_type,
(mask & (1 << i) ? 0 : n) + j + i,
0);
@@ -603,7 +604,7 @@ lp_build_select_aos(struct lp_build_context *bld,
return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), "");
}
else {
LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, 4);
LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels);
return lp_build_select(bld, mask_vec, a, b);
}
}
+2 -1
View File
@@ -79,7 +79,8 @@ LLVMValueRef
lp_build_select_aos(struct lp_build_context *bld,
unsigned mask,
LLVMValueRef a,
LLVMValueRef b);
LLVMValueRef b,
unsigned num_channels);
LLVMValueRef
@@ -211,6 +211,42 @@ lp_build_concat(struct gallivm_state *gallivm,
return tmp[0];
}
/**
* Combines vectors to reduce from num_srcs to num_dsts.
* Returns the number of src vectors concatenated in a single dst.
*
* num_srcs must be exactly divisible by num_dsts.
*
* e.g. For num_srcs = 4 and src = [x, y, z, w]
* num_dsts = 1 dst = [xyzw] return = 4
* num_dsts = 2 dst = [xy, zw] return = 2
*/
int
lp_build_concat_n(struct gallivm_state *gallivm,
struct lp_type src_type,
LLVMValueRef *src,
unsigned num_srcs,
LLVMValueRef *dst,
unsigned num_dsts)
{
int size = num_srcs / num_dsts;
int i;
assert(num_srcs >= num_dsts);
assert((num_srcs % size) == 0);
if (num_srcs == num_dsts)
return 1;
for (i = 0; i < num_dsts; ++i) {
dst[i] = lp_build_concat(gallivm, &src[i * size], src_type, size);
}
return size;
}
/**
* Interleave vector elements.
*
@@ -87,6 +87,15 @@ lp_build_concat(struct gallivm_state *gallivm,
struct lp_type src_type,
unsigned num_vectors);
int
lp_build_concat_n(struct gallivm_state *gallivm,
struct lp_type src_type,
LLVMValueRef *src,
unsigned num_srcs,
LLVMValueRef *dst,
unsigned num_dsts);
LLVMValueRef
lp_build_packs2(struct gallivm_state *gallivm,
struct lp_type src_type,
@@ -31,6 +31,7 @@
#include "lp_bld_const.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_quad.h"
#include "lp_bld_pack.h"
static const unsigned char
@@ -156,3 +157,52 @@ lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld,
return LLVMBuildSub(builder, vec2, vec1, "ddxddyddxddy");
}
/**
* Twiddle from quad format to row format
*
* src0 src1
* ######### ######### #################
* # 0 | 1 # # 4 | 5 # # 0 | 1 | 4 | 5 # src0
* #---+---# #---+---# -> #################
* # 2 | 3 # # 6 | 7 # # 2 | 3 | 6 | 7 # src1
* ######### ######### #################
*
*/
void
lp_bld_quad_twiddle(struct gallivm_state *gallivm,
struct lp_type lp_dst_type,
const LLVMValueRef* src,
unsigned src_count,
LLVMValueRef* dst)
{
LLVMBuilderRef builder = gallivm->builder;
LLVMTypeRef dst_type_ref;
LLVMTypeRef type2_ref;
struct lp_type type2;
unsigned i;
assert((src_count % 2) == 0);
/* Create a type with only 2 elements */
type2 = lp_dst_type;
type2.width = (lp_dst_type.width * lp_dst_type.length) / 2;
type2.length = 2;
type2.floating = 0;
type2_ref = lp_build_vec_type(gallivm, type2);
dst_type_ref = lp_build_vec_type(gallivm, lp_dst_type);
for (i = 0; i < src_count; i += 2) {
LLVMValueRef src0, src1;
src0 = LLVMBuildBitCast(builder, src[i + 0], type2_ref, "");
src1 = LLVMBuildBitCast(builder, src[i + 1], type2_ref, "");
dst[i + 0] = lp_build_interleave2(gallivm, type2, src0, src1, 0);
dst[i + 1] = lp_build_interleave2(gallivm, type2, src0, src1, 1);
dst[i + 0] = LLVMBuildBitCast(builder, dst[i + 0], dst_type_ref, "");
dst[i + 1] = LLVMBuildBitCast(builder, dst[i + 1], dst_type_ref, "");
}
}
@@ -88,5 +88,14 @@ LLVMValueRef
lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
LLVMValueRef a);
/*
* Twiddle from quad format to row format
*/
void
lp_bld_quad_twiddle(struct gallivm_state *gallivm,
struct lp_type lp_dst_type,
const LLVMValueRef* src,
unsigned src_count,
LLVMValueRef* dst);
#endif /* LP_BLD_QUAD_H_ */
+11 -11
View File
@@ -772,7 +772,7 @@ lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
offset1 = LLVMBuildLoad(builder, offset1, "");
offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexo, "");
}
offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0);
offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0, 4);
}
else {
unsigned i;
@@ -849,7 +849,7 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
stride1 = LLVMBuildLoad(builder, stride1, "");
stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, "");
}
stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0);
stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0, 4);
}
else {
LLVMValueRef stride1;
@@ -1045,11 +1045,11 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
*out_width = size;
}
else if (bld->num_lods == num_quads) {
*out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0);
*out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
if (dims >= 2) {
*out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1);
*out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
if (dims == 3) {
*out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2);
*out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2, 4);
}
}
}
@@ -1246,9 +1246,9 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
signrxyz = LLVMBuildBitCast(builder, rxyz, lp_build_vec_type(gallivm, intctype), "");
signrxyz = LLVMBuildAnd(builder, signrxyz, signmask, "");
arxs = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 0);
arys = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 1);
arzs = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 2);
arxs = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 0, 4);
arys = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 1, 4);
arzs = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 2, 4);
/*
* select x if x >= y else select y
@@ -1267,15 +1267,15 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
* snewz = signrz * rx;
* tnewz = -ry;
*/
signrxs = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 0);
signrxs = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 0, 4);
snewx = LLVMBuildXor(builder, signrxs, rzneg, "");
tnewx = ryneg;
signrys = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 1);
signrys = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 1, 4);
snewy = rx;
tnewy = LLVMBuildXor(builder, signrys, rz, "");
signrzs = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 2);
signrzs = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 2, 4);
snewz = LLVMBuildXor(builder, signrzs, rx, "");
tnewz = ryneg;
+130 -7
View File
@@ -159,21 +159,24 @@ lp_build_extract_broadcast(struct gallivm_state *gallivm,
/**
* Swizzle one channel into all other three channels.
* Swizzle one channel into other channels.
*/
LLVMValueRef
lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
LLVMValueRef a,
unsigned channel)
unsigned channel,
unsigned num_channels)
{
LLVMBuilderRef builder = bld->gallivm->builder;
const struct lp_type type = bld->type;
const unsigned n = type.length;
unsigned i, j;
if(a == bld->undef || a == bld->zero || a == bld->one)
if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1)
return a;
assert(num_channels == 2 || num_channels == 4);
/* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
* using shuffles here actually causes worst results. More investigation is
* needed. */
@@ -184,12 +187,55 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
for(j = 0; j < n; j += 4)
for(i = 0; i < 4; ++i)
for(j = 0; j < n; j += num_channels)
for(i = 0; i < num_channels; ++i)
shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
}
else if (num_channels == 2) {
/*
* Bit mask and shifts
*
* XY XY .... XY <= input
* 0Y 0Y .... 0Y
* YY YY .... YY
* YY YY .... YY <= output
*/
struct lp_type type2;
LLVMValueRef tmp = NULL;
int shift;
a = LLVMBuildAnd(builder, a,
lp_build_const_mask_aos(bld->gallivm,
type, 1 << channel, num_channels), "");
type2 = type;
type2.floating = FALSE;
type2.width *= 2;
type2.length /= 2;
a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), "");
#ifdef PIPE_ARCH_LITTLE_ENDIAN
shift = channel == 0 ? 1 : -1;
#else
shift = channel == 0 ? -1 : 1;
#endif
if (shift > 0) {
tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), "");
} else if (shift < 0) {
tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), "");
}
assert(tmp);
if (tmp) {
a = LLVMBuildOr(builder, a, tmp, "");
}
return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
}
else {
/*
* Bit mask and recursive shifts
@@ -247,6 +293,45 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
}
/**
* Swizzle a vector consisting of an array of XYZW structs.
*
* This fills a vector of dst_len length with the swizzled channels from src.
*
* e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in
* RGBA RGBA = BGR BGR BG
*
* @param swizzles the swizzle array
* @param num_swizzles the number of elements in swizzles
* @param dst_len the length of the result
*/
LLVMValueRef
lp_build_swizzle_aos_n(struct gallivm_state* gallivm,
LLVMValueRef src,
const unsigned char* swizzles,
unsigned num_swizzles,
unsigned dst_len)
{
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH];
unsigned i;
assert(dst_len < LP_MAX_VECTOR_WIDTH);
for (i = 0; i < dst_len; ++i) {
int swizzle = swizzles[i % num_swizzles];
if (swizzle == LP_BLD_SWIZZLE_DONTCARE) {
shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
} else {
shuffles[i] = lp_build_const_int32(gallivm, swizzle);
}
}
return LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), LLVMConstVector(shuffles, dst_len), "");
}
LLVMValueRef
lp_build_swizzle_aos(struct lp_build_context *bld,
LLVMValueRef a,
@@ -272,7 +357,7 @@ lp_build_swizzle_aos(struct lp_build_context *bld,
case PIPE_SWIZZLE_GREEN:
case PIPE_SWIZZLE_BLUE:
case PIPE_SWIZZLE_ALPHA:
return lp_build_swizzle_scalar_aos(bld, a, swizzles[0]);
return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4);
case PIPE_SWIZZLE_ZERO:
return bld->zero;
case PIPE_SWIZZLE_ONE:
@@ -367,7 +452,7 @@ lp_build_swizzle_aos(struct lp_build_context *bld,
cond |= 1 << chan;
}
}
res = lp_build_select_aos(bld, cond, bld->one, bld->zero);
res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4);
/*
* Build a type where each element is an integer that cover the four
@@ -553,6 +638,44 @@ lp_build_transpose_aos(struct gallivm_state *gallivm,
}
/**
* Transpose from AOS <-> SOA for num_srcs
*/
void
lp_build_transpose_aos_n(struct gallivm_state *gallivm,
struct lp_type type,
const LLVMValueRef* src,
unsigned num_srcs,
LLVMValueRef* dst)
{
switch (num_srcs) {
case 1:
dst[0] = src[0];
break;
case 2:
{
/* Note: we must use a temporary incase src == dst */
LLVMValueRef lo, hi;
lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0);
hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1);
dst[0] = lo;
dst[1] = hi;
break;
}
case 4:
lp_build_transpose_aos(gallivm, type, src, dst);
break;
default:
assert(0);
};
}
/**
* Pack n-th element of aos values,
* pad out to destination size.
+21 -4
View File
@@ -67,13 +67,14 @@ lp_build_extract_broadcast(struct gallivm_state *gallivm,
/**
* Broadcast one channel of a vector composed of arrays of XYZW structures into
* all four channel.
* Broadcast one channel of a vector composed of arrays of XYZ.. structures into
* all channels XXX...
*/
LLVMValueRef
lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
LLVMValueRef a,
unsigned channel);
LLVMValueRef a,
unsigned channel,
unsigned num_channels);
/**
@@ -87,6 +88,14 @@ lp_build_swizzle_aos(struct lp_build_context *bld,
const unsigned char swizzles[4]);
LLVMValueRef
lp_build_swizzle_aos_n(struct gallivm_state* gallivm,
LLVMValueRef src,
const unsigned char* swizzles,
unsigned num_swizzles,
unsigned dst_len);
LLVMValueRef
lp_build_swizzle_soa_channel(struct lp_build_context *bld,
const LLVMValueRef *unswizzled,
@@ -113,6 +122,14 @@ lp_build_transpose_aos(struct gallivm_state *gallivm,
LLVMValueRef dst[4]);
void
lp_build_transpose_aos_n(struct gallivm_state *gallivm,
struct lp_type type,
const LLVMValueRef* src,
unsigned num_srcs,
LLVMValueRef* dst);
LLVMValueRef
lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
struct lp_type src_type,
@@ -94,7 +94,7 @@ swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
unsigned chan)
{
chan = bld->swizzles[chan];
return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan);
return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan, 4);
}
@@ -623,7 +623,7 @@ lp_emit_instruction_aos(
case TGSI_OPCODE_EX2:
src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X);
tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS);
dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
break;
+4 -1
View File
@@ -60,10 +60,13 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
struct lp_type type,
unsigned rt,
LLVMValueRef src,
LLVMValueRef src_alpha,
LLVMValueRef dst,
LLVMValueRef mask,
LLVMValueRef const_,
const unsigned char swizzle[4]);
LLVMValueRef const_alpha,
const unsigned char swizzle[4],
int nr_channels);
void
+68 -27
View File
@@ -66,14 +66,18 @@
struct lp_build_blend_aos_context
{
struct lp_build_context base;
LLVMValueRef src;
LLVMValueRef src_alpha;
LLVMValueRef dst;
LLVMValueRef const_;
LLVMValueRef const_alpha;
LLVMValueRef inv_src;
LLVMValueRef inv_src_alpha;
LLVMValueRef inv_dst;
LLVMValueRef inv_const;
LLVMValueRef inv_const_alpha;
LLVMValueRef saturate;
LLVMValueRef rgb_src_factor;
@@ -88,14 +92,18 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
unsigned factor,
boolean alpha)
{
LLVMValueRef src_alpha = bld->src_alpha ? bld->src_alpha : bld->src;
LLVMValueRef const_alpha = bld->const_alpha ? bld->const_alpha : bld->const_;
switch (factor) {
case PIPE_BLENDFACTOR_ZERO:
return bld->base.zero;
case PIPE_BLENDFACTOR_ONE:
return bld->base.one;
case PIPE_BLENDFACTOR_SRC_COLOR:
case PIPE_BLENDFACTOR_SRC_ALPHA:
return bld->src;
case PIPE_BLENDFACTOR_SRC_ALPHA:
return src_alpha;
case PIPE_BLENDFACTOR_DST_COLOR:
case PIPE_BLENDFACTOR_DST_ALPHA:
return bld->dst;
@@ -106,32 +114,39 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
if(!bld->inv_dst)
bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
if(!bld->saturate)
bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst);
bld->saturate = lp_build_min(&bld->base, src_alpha, bld->inv_dst);
return bld->saturate;
}
case PIPE_BLENDFACTOR_CONST_COLOR:
case PIPE_BLENDFACTOR_CONST_ALPHA:
return bld->const_;
case PIPE_BLENDFACTOR_CONST_ALPHA:
return const_alpha;
case PIPE_BLENDFACTOR_SRC1_COLOR:
case PIPE_BLENDFACTOR_SRC1_ALPHA:
/* TODO */
assert(0);
return bld->base.zero;
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
if(!bld->inv_src)
bld->inv_src = lp_build_comp(&bld->base, bld->src);
return bld->inv_src;
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
if(!bld->inv_src_alpha)
bld->inv_src_alpha = lp_build_comp(&bld->base, src_alpha);
return bld->inv_src_alpha;
case PIPE_BLENDFACTOR_INV_DST_COLOR:
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
if(!bld->inv_dst)
bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
return bld->inv_dst;
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
if(!bld->inv_const)
bld->inv_const = lp_build_comp(&bld->base, bld->const_);
return bld->inv_const;
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
if(!bld->inv_const_alpha)
bld->inv_const_alpha = lp_build_comp(&bld->base, const_alpha);
return bld->inv_const_alpha;
case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
/* TODO */
@@ -190,7 +205,8 @@ lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
LLVMValueRef rgb,
LLVMValueRef alpha,
enum lp_build_blend_swizzle rgb_swizzle,
unsigned alpha_swizzle)
unsigned alpha_swizzle,
unsigned num_channels)
{
LLVMValueRef swizzled_rgb;
@@ -199,7 +215,7 @@ lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
swizzled_rgb = rgb;
break;
case LP_BUILD_BLEND_SWIZZLE_AAAA:
swizzled_rgb = lp_build_swizzle_scalar_aos(&bld->base, rgb, alpha_swizzle);
swizzled_rgb = lp_build_swizzle_scalar_aos(&bld->base, rgb, alpha_swizzle, num_channels);
break;
default:
assert(0);
@@ -208,13 +224,13 @@ lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
if (rgb != alpha) {
swizzled_rgb = lp_build_select_aos(&bld->base, 1 << alpha_swizzle,
alpha, swizzled_rgb);
alpha, swizzled_rgb,
num_channels);
}
return swizzled_rgb;
}
/**
* @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
*/
@@ -222,17 +238,22 @@ static LLVMValueRef
lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
unsigned rgb_factor,
unsigned alpha_factor,
unsigned alpha_swizzle)
unsigned alpha_swizzle,
unsigned num_channels)
{
LLVMValueRef rgb_factor_, alpha_factor_;
enum lp_build_blend_swizzle rgb_swizzle;
if (alpha_swizzle == 0) {
return lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
}
rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
if (alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle);
return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, num_channels);
} else {
return rgb_factor_;
}
@@ -261,18 +282,21 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
struct lp_type type,
unsigned rt,
LLVMValueRef src,
LLVMValueRef src_alpha,
LLVMValueRef dst,
LLVMValueRef mask,
LLVMValueRef const_,
const unsigned char swizzle[4])
LLVMValueRef const_alpha,
const unsigned char swizzle[4],
int nr_channels)
{
const struct pipe_rt_blend_state * state = &blend->rt[rt];
const struct util_format_description * desc;
struct lp_build_blend_aos_context bld;
LLVMValueRef src_factor, dst_factor;
LLVMValueRef result;
unsigned alpha_swizzle = swizzle[3];
boolean fullcolormask;
unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE;
unsigned i;
desc = util_format_description(cbuf_format[rt]);
@@ -282,20 +306,32 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
bld.src = src;
bld.dst = dst;
bld.const_ = const_;
bld.src_alpha = src_alpha;
bld.const_alpha = const_alpha;
if (swizzle[3] > UTIL_FORMAT_SWIZZLE_W || swizzle[3] == swizzle[0])
alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE;
/* Find the alpha channel if not provided seperately */
if (!src_alpha) {
for (i = 0; i < 4; ++i) {
if (swizzle[i] == 3) {
alpha_swizzle = i;
}
}
}
if (!state->blend_enable) {
result = src;
} else {
boolean rgb_alpha_same = state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor;
assert(rgb_alpha_same || alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor) || nr_channels == 1;
src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor,
state->alpha_src_factor, alpha_swizzle);
state->alpha_src_factor,
alpha_swizzle,
nr_channels);
dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor,
state->alpha_dst_factor, alpha_swizzle);
state->alpha_dst_factor,
alpha_swizzle,
nr_channels);
result = lp_build_blend(&bld.base,
state->rgb_func,
@@ -308,7 +344,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
rgb_alpha_same,
false);
if(state->rgb_func != state->alpha_func && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
LLVMValueRef alpha;
alpha = lp_build_blend(&bld.base,
@@ -326,22 +362,27 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
result,
alpha,
LP_BUILD_BLEND_SWIZZLE_RGBA,
alpha_swizzle);
alpha_swizzle,
nr_channels);
}
}
/* Check if color mask is necessary */
fullcolormask = util_format_colormask_full(util_format_description(cbuf_format[rt]), state->colormask);
if (!fullcolormask) {
if (!util_format_colormask_full(desc, state->colormask)) {
LLVMValueRef color_mask;
color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state.colormask, desc->nr_channels, swizzle);
color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, nr_channels, swizzle);
lp_build_name(color_mask, "color_mask");
/* Combine with input mask if necessary */
if (mask) {
/* We can be blending floating values but masks are always integer... */
unsigned floating = bld.base.type.floating;
bld.base.type.floating = 0;
mask = lp_build_and(&bld.base, color_mask, mask);
bld.base.type.floating = floating;
} else {
mask = color_mask;
}
+7 -3
View File
@@ -128,7 +128,8 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatTypeInContext(lc);
elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] =
elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32TypeInContext(lc);
elem_types[LP_JIT_CTX_BLEND_COLOR] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
elem_types[LP_JIT_CTX_U8_BLEND_COLOR] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
elem_types[LP_JIT_CTX_F_BLEND_COLOR] = LLVMPointerType(LLVMFloatTypeInContext(lc), 0);
elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type,
PIPE_MAX_SAMPLERS);
@@ -153,9 +154,12 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_back,
gallivm->target, context_type,
LP_JIT_CTX_STENCIL_REF_BACK);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color,
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, u8_blend_color,
gallivm->target, context_type,
LP_JIT_CTX_BLEND_COLOR);
LP_JIT_CTX_U8_BLEND_COLOR);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, f_blend_color,
gallivm->target, context_type,
LP_JIT_CTX_F_BLEND_COLOR);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures,
gallivm->target, context_type,
LP_JIT_CTX_TEXTURES);
+27 -6
View File
@@ -103,8 +103,8 @@ struct lp_jit_context
uint32_t stencil_ref_front, stencil_ref_back;
/* FIXME: store (also?) in floats */
uint8_t *blend_color;
uint8_t *u8_blend_color;
float *f_blend_color;
struct lp_jit_texture textures[PIPE_MAX_SAMPLERS];
};
@@ -119,7 +119,8 @@ enum {
LP_JIT_CTX_ALPHA_REF,
LP_JIT_CTX_STENCIL_REF_FRONT,
LP_JIT_CTX_STENCIL_REF_BACK,
LP_JIT_CTX_BLEND_COLOR,
LP_JIT_CTX_U8_BLEND_COLOR,
LP_JIT_CTX_F_BLEND_COLOR,
LP_JIT_CTX_TEXTURES,
LP_JIT_CTX_COUNT
};
@@ -137,14 +138,33 @@ enum {
#define lp_jit_context_stencil_ref_back_value(_gallivm, _ptr) \
lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_STENCIL_REF_BACK, "stencil_ref_back")
#define lp_jit_context_blend_color(_gallivm, _ptr) \
lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_BLEND_COLOR, "blend_color")
#define lp_jit_context_u8_blend_color(_gallivm, _ptr) \
lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_U8_BLEND_COLOR, "u8_blend_color")
#define lp_jit_context_f_blend_color(_gallivm, _ptr) \
lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_F_BLEND_COLOR, "f_blend_color")
#define lp_jit_context_textures(_gallivm, _ptr) \
lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_CTX_TEXTURES, "textures")
/**
* typedef for fragment shader function
*
* @param context jit context
* @param x block start x
* @param y block start y
* @param facing is front facing
* @param a0 shader input a0
* @param dadx shader input dadx
* @param dady shader input dady
* @param color color buffer
* @param depth depth buffer
* @param mask mask of visible pixels in block
* @param thread_data task thread data
* @param stride color buffer row stride in bytes
*/
typedef void
(*lp_jit_frag_func)(const struct lp_jit_context *context,
uint32_t x,
@@ -156,7 +176,8 @@ typedef void
uint8_t **color,
void *depth,
uint32_t mask,
uint32_t *counter);
uint32_t *counter,
unsigned *stride);
void
+81 -41
View File
@@ -151,47 +151,70 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
const struct lp_scene *scene = task->scene;
const uint8_t *clear_color = arg.clear_color;
uint8_t clear_color[4];
unsigned i;
boolean gray;
LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
for (i = 0; i < 4; ++i) {
clear_color[i] = float_to_ubyte(arg.clear_color[i]);
}
LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
clear_color[0],
clear_color[1],
clear_color[2],
clear_color[3]);
if (clear_color[0] == clear_color[1] &&
clear_color[1] == clear_color[2] &&
clear_color[2] == clear_color[3]) {
/* clear to grayscale value {x, x, x, x} */
for (i = 0; i < scene->fb.nr_cbufs; i++) {
uint8_t *ptr =
lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4);
}
}
else {
/* Non-gray color.
* Note: if the swizzled tile layout changes (see TILE_PIXEL) this code
* will need to change. It'll be pretty obvious when clearing no longer
* works.
*/
const unsigned chunk = TILE_SIZE / 4;
for (i = 0; i < scene->fb.nr_cbufs; i++) {
uint8_t *c =
lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
gray =
clear_color[0] == clear_color[1] &&
clear_color[1] == clear_color[2] &&
clear_color[2] == clear_color[3];
for (i = 0; i < scene->fb.nr_cbufs; i++) {
if (scene->cbufs[i].unswizzled) {
const struct lp_scene *scene = task->scene;
union util_color uc;
util_pack_color(arg.clear_color,
scene->fb.cbufs[i]->format, &uc);
util_fill_rect(scene->cbufs[i].map,
scene->fb.cbufs[i]->format,
scene->cbufs[i].stride,
task->x,
task->y,
TILE_SIZE,
TILE_SIZE,
&uc);
} else {
const unsigned chunk = TILE_SIZE / 4;
uint8_t *ptr;
unsigned j;
for (j = 0; j < 4 * TILE_SIZE; j++) {
memset(c, clear_color[0], chunk);
c += chunk;
memset(c, clear_color[1], chunk);
c += chunk;
memset(c, clear_color[2], chunk);
c += chunk;
memset(c, clear_color[3], chunk);
c += chunk;
ptr = lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
if (gray) {
/* clear to grayscale value {x, x, x, x} */
memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4);
} else {
/* Non-gray color.
* Note: if the swizzled tile layout changes (see TILE_PIXEL) this code
* will need to change. It'll be pretty obvious when clearing no longer
* works.
*/
for (j = 0; j < 4 * TILE_SIZE; j++) {
memset(ptr, clear_color[0], chunk);
ptr += chunk;
memset(ptr, clear_color[1], chunk);
ptr += chunk;
memset(ptr, clear_color[2], chunk);
ptr += chunk;
memset(ptr, clear_color[3], chunk);
ptr += chunk;
}
}
}
}
@@ -311,7 +334,7 @@ lp_rast_store_linear_color( struct lp_rasterizer_task *task )
const unsigned level = cbuf->u.tex.level;
struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture);
if (!task->color_tiles[buf])
if (scene->cbufs[buf].unswizzled || !task->color_tiles[buf])
continue;
llvmpipe_unswizzle_cbuf_tile(lpt,
@@ -358,13 +381,20 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
for (y = 0; y < TILE_SIZE; y += 4){
for (x = 0; x < TILE_SIZE; x += 4) {
uint8_t *color[PIPE_MAX_COLOR_BUFS];
unsigned stride[PIPE_MAX_COLOR_BUFS];
uint32_t *depth;
unsigned i;
/* color buffer */
for (i = 0; i < scene->fb.nr_cbufs; i++)
color[i] = lp_rast_get_color_block_pointer(task, i,
tile_x + x, tile_y + y);
for (i = 0; i < scene->fb.nr_cbufs; i++){
stride[i] = scene->cbufs[i].stride;
if (scene->cbufs[i].unswizzled) {
color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, tile_x + x, tile_y + y);
} else {
color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x, tile_y + y);
}
}
/* depth buffer */
depth = lp_rast_get_depth_block_pointer(task, tile_x + x, tile_y + y);
@@ -380,7 +410,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
color,
depth,
0xffff,
&task->vis_counter);
&task->vis_counter,
stride);
END_JIT_CALL();
}
}
@@ -408,7 +439,9 @@ lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task,
/* this will prevent converting the layout from tiled to linear */
for (i = 0; i < scene->fb.nr_cbufs; i++) {
(void)lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
if (!scene->cbufs[i].unswizzled) {
(void)lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
}
}
lp_rast_shade_tile(task, arg);
@@ -431,6 +464,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
struct lp_fragment_shader_variant *variant = state->variant;
const struct lp_scene *scene = task->scene;
uint8_t *color[PIPE_MAX_COLOR_BUFS];
unsigned stride[PIPE_MAX_COLOR_BUFS];
void *depth;
unsigned i;
@@ -447,15 +481,20 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
/* color buffer */
for (i = 0; i < scene->fb.nr_cbufs; i++) {
color[i] = lp_rast_get_color_block_pointer(task, i, x, y);
assert(lp_check_alignment(color[i], 16));
stride[i] = scene->cbufs[i].stride;
if (scene->cbufs[i].unswizzled) {
color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y);
} else {
color[i] = lp_rast_get_color_block_pointer(task, i, x, y);
}
}
/* depth buffer */
depth = lp_rast_get_depth_block_pointer(task, x, y);
assert(lp_check_alignment(state->jit_context.blend_color, 16));
assert(lp_check_alignment(state->jit_context.u8_blend_color, 16));
/* run shader on 4x4 block */
BEGIN_JIT_CALL(state, task);
@@ -468,7 +507,8 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
color,
depth,
mask,
&task->vis_counter);
&task->vis_counter,
stride);
END_JIT_CALL();
}
+1 -1
View File
@@ -152,7 +152,7 @@ union lp_rast_cmd_arg {
unsigned plane_mask;
} triangle;
const struct lp_rast_state *set_state;
uint8_t clear_color[4];
float clear_color[4];
struct {
uint32_t value;
uint32_t mask;
+78 -3
View File
@@ -189,6 +189,7 @@ lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
assert(task->x % TILE_SIZE == 0);
assert(task->y % TILE_SIZE == 0);
assert(buf < scene->fb.nr_cbufs);
assert(scene->cbufs[buf].unswizzled == 0);
if (!task->color_tiles[buf]) {
struct pipe_surface *cbuf = scene->fb.cbufs[buf];
@@ -210,6 +211,35 @@ lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
}
/**
* Get pointer to the unswizzled color tile
*/
static INLINE uint8_t *
lp_rast_get_unswizzled_color_tile_pointer(struct lp_rasterizer_task *task,
unsigned buf, enum lp_texture_usage usage)
{
const struct lp_scene *scene = task->scene;
unsigned format_bytes;
assert(task->x < scene->tiles_x * TILE_SIZE);
assert(task->y < scene->tiles_y * TILE_SIZE);
assert(task->x % TILE_SIZE == 0);
assert(task->y % TILE_SIZE == 0);
assert(buf < scene->fb.nr_cbufs);
assert(scene->cbufs[buf].unswizzled);
if (!task->color_tiles[buf]) {
struct pipe_surface *cbuf = scene->fb.cbufs[buf];
assert(cbuf);
format_bytes = util_format_description(cbuf->format)->block.bits / 8;
task->color_tiles[buf] = scene->cbufs[buf].map + scene->cbufs[buf].stride * task->y + format_bytes * task->x;
}
return task->color_tiles[buf];
}
/**
* Get the pointer to a 4x4 color block (within a 64x64 tile).
* We'll map the color buffer on demand here.
@@ -228,6 +258,8 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
assert(y < task->scene->tiles_y * TILE_SIZE);
assert((x % TILE_VECTOR_WIDTH) == 0);
assert((y % TILE_VECTOR_HEIGHT) == 0);
assert(buf < task->scene->fb.nr_cbufs);
assert(task->scene->cbufs[buf].unswizzled == 0);
color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE);
assert(color);
@@ -243,6 +275,40 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
}
/**
* Get the pointer to an unswizzled 4x4 color block (within an unswizzled 64x64 tile).
* \param x, y location of 4x4 block in window coords
*/
static INLINE uint8_t *
lp_rast_get_unswizzled_color_block_pointer(struct lp_rasterizer_task *task,
unsigned buf, unsigned x, unsigned y)
{
unsigned px, py, pixel_offset, format_bytes;
uint8_t *color;
assert(x < task->scene->tiles_x * TILE_SIZE);
assert(y < task->scene->tiles_y * TILE_SIZE);
assert((x % TILE_VECTOR_WIDTH) == 0);
assert((y % TILE_VECTOR_HEIGHT) == 0);
assert(buf < task->scene->fb.nr_cbufs);
assert(task->scene->cbufs[buf].unswizzled);
format_bytes = util_format_description(task->scene->fb.cbufs[buf]->format)->block.bits / 8;
color = lp_rast_get_unswizzled_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE);
assert(color);
px = x % TILE_SIZE;
py = y % TILE_SIZE;
pixel_offset = px * format_bytes + py * task->scene->cbufs[buf].stride;
color = color + pixel_offset;
assert(lp_check_alignment(color, llvmpipe_get_format_alignment(task->scene->fb.cbufs[buf]->format)));
return color;
}
/**
* Shade all pixels in a 4x4 block. The fragment code omits the
@@ -258,12 +324,20 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
const struct lp_rast_state *state = task->state;
struct lp_fragment_shader_variant *variant = state->variant;
uint8_t *color[PIPE_MAX_COLOR_BUFS];
unsigned stride[PIPE_MAX_COLOR_BUFS];
void *depth;
unsigned i;
/* color buffer */
for (i = 0; i < scene->fb.nr_cbufs; i++)
color[i] = lp_rast_get_color_block_pointer(task, i, x, y);
for (i = 0; i < scene->fb.nr_cbufs; i++) {
stride[i] = scene->cbufs[i].stride;
if (scene->cbufs[i].unswizzled) {
color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y);
} else {
color[i] = lp_rast_get_color_block_pointer(task, i, x, y);
}
}
depth = lp_rast_get_depth_block_pointer(task, x, y);
@@ -278,7 +352,8 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
color,
depth,
0xffff,
&task->vis_counter );
&task->vis_counter,
stride );
END_JIT_CALL();
}
+2
View File
@@ -150,6 +150,8 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
cbuf->u.tex.first_layer,
LP_TEX_USAGE_READ_WRITE,
LP_TEX_LAYOUT_LINEAR);
scene->cbufs[i].unswizzled = llvmpipe_is_format_unswizzled(cbuf->format);
}
if (fb->zsbuf) {
+1
View File
@@ -137,6 +137,7 @@ struct lp_scene {
uint8_t *map;
unsigned stride;
unsigned blocksize;
unsigned unswizzled;
} zsbuf, cbufs[PIPE_MAX_COLOR_BUFS];
/** the framebuffer to render the scene into */
+16 -3
View File
@@ -390,7 +390,7 @@ lp_setup_try_clear( struct lp_setup_context *setup,
if (flags & PIPE_CLEAR_COLOR) {
for (i = 0; i < 4; i++)
color_arg.clear_color[i] = float_to_ubyte(color[i]);
color_arg.clear_color[i] = color[i];
}
if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
@@ -805,14 +805,26 @@ try_update_scene_state( struct lp_setup_context *setup )
if(setup->dirty & LP_SETUP_NEW_BLEND_COLOR) {
uint8_t *stored;
float* fstored;
unsigned i, j;
unsigned size;
/* Alloc u8_blend_color (16 x i8) and f_blend_color (4 or 8 x f32) */
size = 4 * 16 * sizeof(uint8_t);
size += (LP_MAX_VECTOR_LENGTH / 4) * sizeof(float);
stored = lp_scene_alloc_aligned(scene, size, LP_MAX_VECTOR_LENGTH);
stored = lp_scene_alloc_aligned(scene, 4 * 16, 16);
if (!stored) {
assert(!new_scene);
return FALSE;
}
/* Store floating point colour */
fstored = (float*)(stored + 4*16);
for (i = 0; i < (LP_MAX_VECTOR_LENGTH / 4); ++i) {
fstored[i] = setup->blend_color.current.color[i % 4];
}
/* smear each blend color component across 16 ubyte elements */
for (i = 0; i < 4; ++i) {
uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]);
@@ -821,7 +833,8 @@ try_update_scene_state( struct lp_setup_context *setup )
}
setup->blend_color.stored = stored;
setup->fs.current.jit_context.blend_color = setup->blend_color.stored;
setup->fs.current.jit_context.u8_blend_color = stored;
setup->fs.current.jit_context.f_blend_color = fstored;
setup->dirty |= LP_SETUP_NEW_FS;
}
File diff suppressed because it is too large Load Diff
@@ -94,6 +94,9 @@ struct lp_fragment_shader_variant
lp_jit_frag_func jit_function[2];
/* Bitmask to say what cbufs are unswizzled */
unsigned unswizzled_cbufs;
/* Total number of LLVM instructions generated */
unsigned nr_instrs;
+1 -1
View File
@@ -195,7 +195,7 @@ add_blend_test(struct gallivm_state *gallivm,
dst = LLVMBuildLoad(builder, dst_ptr, "dst");
con = LLVMBuildLoad(builder, const_ptr, "const");
res = lp_build_blend_aos(gallivm, blend, &format, type, rt, src, dst, NULL, con, swizzle);
res = lp_build_blend_aos(gallivm, blend, &format, type, rt, src, NULL, dst, NULL, con, NULL, swizzle, 4);
lp_build_name(res, "res");
+67
View File
@@ -756,6 +756,73 @@ llvmpipe_is_resource_referenced( struct pipe_context *pipe,
return lp_setup_is_resource_referenced(llvmpipe->setup, presource);
}
boolean
llvmpipe_is_format_unswizzled( enum pipe_format format )
{
const struct util_format_description *desc = util_format_description(format);
unsigned chan;
if (format == PIPE_FORMAT_B8G8R8X8_UNORM || format == PIPE_FORMAT_B8G8R8A8_UNORM) {
return FALSE;
}
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||
desc->block.width != 1 ||
desc->block.height != 1) {
return FALSE;
}
for (chan = 0; chan < desc->nr_channels; ++chan) {
if (desc->channel[chan].type == UTIL_FORMAT_TYPE_VOID && (chan + 1) == desc->nr_channels)
continue;
if (desc->channel[chan].type != desc->channel[0].type)
return FALSE;
if (desc->channel[chan].normalized != desc->channel[0].normalized)
return FALSE;
if (desc->channel[chan].pure_integer != desc->channel[0].pure_integer)
return FALSE;
}
/* All code assumes alpha is the last channel */
if (desc->nr_channels == 4 && desc->swizzle[3] < 3) {
return FALSE;
}
return TRUE;
}
/**
* Returns the largest possible alignment for a format in llvmpipe
*/
unsigned
llvmpipe_get_format_alignment( enum pipe_format format )
{
const struct util_format_description *desc = util_format_description(format);
unsigned size = 0;
unsigned bytes;
unsigned i;
for (i = 0; i < desc->nr_channels; ++i) {
size += desc->channel[i].size;
}
bytes = size / 8;
if (!util_is_power_of_two(bytes)) {
bytes /= desc->nr_channels;
}
if (bytes % 2 || bytes < 1) {
return 1;
} else {
return bytes;
}
}
/**
@@ -256,4 +256,10 @@ llvmpipe_is_resource_referenced( struct pipe_context *pipe,
struct pipe_resource *presource,
unsigned level, int layer);
boolean
llvmpipe_is_format_unswizzled(enum pipe_format format);
unsigned
llvmpipe_get_format_alignment(enum pipe_format format);
#endif /* LP_TEXTURE_H */