Merge branch 'shaders_cleanup'

This commit is contained in:
Maciej Cencora
2009-07-13 20:29:11 +02:00
17 changed files with 912 additions and 599 deletions
-2
View File
@@ -418,8 +418,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
_mesa_set_mvp_with_dp4( ctx, GL_TRUE );
_mesa_set_mvp_with_dp4( ctx, GL_TRUE );
/* Initialize the software rasterizer and helper modules.
*/
_swrast_CreateContext(ctx);
+29 -10
View File
@@ -405,12 +405,13 @@ struct r300_hw_state {
#undef TAG
struct r300_vertex_program {
struct gl_vertex_program *Base;
struct r300_vertex_program *next;
struct r300_vertex_program_key {
GLuint InputsRead;
GLuint OutputsWritten;
GLuint OutputsAdded;
GLuint FpReads;
GLuint FogAttr;
GLuint WPosAttr;
} key;
struct r300_vertex_shader_hw_code {
@@ -426,13 +427,17 @@ struct r300_vertex_program {
int pos_end;
int num_temporaries; /* Number of temp vars used by program */
int wpos_idx;
int inputs[VERT_ATTRIB_MAX];
int outputs[VERT_RESULT_MAX];
};
struct r300_vertex_program_cont {
struct gl_vertex_program mesa_program; /* Must be first */
/* This is the unmodified vertex program mesa provided us with.
* We need to keep it unchanged because we may need to create another
* hw specific vertex program based on this.
*/
struct gl_vertex_program mesa_program;
/* This is the list of hw specific vertex programs derived from mesa_program */
struct r300_vertex_program *progs;
};
@@ -546,7 +551,7 @@ struct r500_fragment_program_code {
* to render with that program.
*/
struct r300_fragment_program {
struct gl_fragment_program Base;
struct gl_program *Base;
GLboolean translated;
GLboolean error;
@@ -559,6 +564,23 @@ struct r300_fragment_program {
GLboolean writes_depth;
GLuint optimization;
struct r300_fragment_program *next;
/* attribute that we are sending the WPOS in */
gl_frag_attrib wpos_attr;
/* attribute that we are sending the fog coordinate in */
gl_frag_attrib fog_attr;
};
struct r300_fragment_program_cont {
/* This is the unmodified fragment program mesa provided us with.
* We need to keep it unchanged because we may need to create another
* hw specific fragment program based on this.
*/
struct gl_fragment_program Base;
/* This is the list of hw specific fragment programs derived from Base */
struct r300_fragment_program *progs;
};
struct r300_fragment_program_compiler {
@@ -633,6 +655,7 @@ struct r300_context {
struct r300_hw_state hw;
struct r300_vertex_program *selected_vp;
struct r300_fragment_program *selected_fp;
/* Vertex buffers
*/
@@ -664,11 +687,7 @@ extern GLboolean r300CreateContext(const __GLcontextModes * glVisual,
__DRIcontextPrivate * driContextPriv,
void *sharedContextPrivate);
extern void r300SelectVertexShader(r300ContextPtr r300);
extern void r300InitShaderFuncs(struct dd_function_table *functions);
extern int r300VertexProgUpdateParams(GLcontext * ctx,
struct r300_vertex_program_cont *vp,
float *dst);
extern void r300InitShaderFunctions(r300ContextPtr r300);
+12 -3
View File
@@ -195,6 +195,11 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st
}
GLfloat *dst_ptr, *tmp;
/* Convert value for first element only */
if (input->StrideB == 0)
count = 1;
tmp = dst_ptr = _mesa_malloc(sizeof(GLfloat) * input->Size * count);
switch (input->Type) {
@@ -228,7 +233,11 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st
type = GL_FLOAT;
r300_attr.free_needed = GL_TRUE;
r300_attr.data = tmp;
r300_attr.stride = sizeof(GLfloat) * input->Size;
if (input->StrideB == 0) {
r300_attr.stride = 0;
} else {
r300_attr.stride = sizeof(GLfloat) * input->Size;
}
r300_attr.dwords = input->Size;
} else {
type = input->Type;
@@ -332,7 +341,7 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar
{
int i, tmp;
tmp = r300->selected_vp->key.InputsRead;
tmp = r300->selected_vp->Base->Base.InputsRead;
i = 0;
vbuf->num_attribs = 0;
while (tmp) {
@@ -428,7 +437,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx,
if (r300->fallback)
return GL_FALSE;
r300SetupVAP(ctx, r300->selected_vp->key.InputsRead, r300->selected_vp->key.OutputsWritten);
r300SetupVAP(ctx, r300->selected_vp->Base->Base.InputsRead, r300->selected_vp->Base->Base.OutputsWritten);
r300UpdateShaderStates(r300);
+5 -14
View File
@@ -81,17 +81,17 @@ GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
return vic_1;
}
GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads)
GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes)
{
GLuint ret = 0;
if (vp_writes & (1 << VERT_RESULT_HPOS))
ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
if (vp_writes & (1 << VERT_RESULT_COL0) && fp_reads & FRAG_BIT_COL0)
if (vp_writes & (1 << VERT_RESULT_COL0))
ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT;
if (vp_writes & (1 << VERT_RESULT_COL1) && fp_reads & FRAG_BIT_COL1)
if (vp_writes & (1 << VERT_RESULT_COL1))
ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
/* Two sided lighting works only if all 4 colors are written */
@@ -105,26 +105,17 @@ GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads)
return ret;
}
GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads)
GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes)
{
GLuint i, ret = 0, first_free_texcoord = 0;
for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
if (vp_writes & (1 << (VERT_RESULT_TEX0 + i)) && fp_reads & FRAG_BIT_TEX(i)) {
if (vp_writes & (1 << (VERT_RESULT_TEX0 + i))) {
ret |= (4 << (3 * first_free_texcoord));
++first_free_texcoord;
}
}
if (fp_reads & FRAG_BIT_WPOS) {
ret |= (4 << (3 * first_free_texcoord));
++first_free_texcoord;
}
if (vp_writes & (1 << VERT_RESULT_FOGC) && fp_reads & FRAG_BIT_FOGC) {
ret |= 4 << (3 * first_free_texcoord);
}
if (first_free_texcoord > 8) {
fprintf(stderr, "\tout of free texcoords\n");
_mesa_exit(-1);
+2 -2
View File
@@ -225,7 +225,7 @@ extern void r300EmitCacheFlush(r300ContextPtr rmesa);
extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead);
extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead);
extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads);
extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads);
extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes);
extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes);
#endif
+208 -100
View File
@@ -50,13 +50,6 @@
#include "radeon_program.h"
#include "radeon_program_alu.h"
static void update_params(GLcontext *ctx, struct gl_fragment_program *fp)
{
/* Ask Mesa nicely to fill in ParameterValues for us */
if (fp->Base.Parameters)
_mesa_load_state_parameters(ctx, fp->Base.Parameters);
}
static void nqssadce_init(struct nqssadce_state* s)
{
s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW;
@@ -74,10 +67,12 @@ static void nqssadce_init(struct nqssadce_state* s)
*/
static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
{
GLuint InputsRead = compiler->fp->Base.Base.InputsRead;
GLuint InputsRead = compiler->fp->Base->InputsRead;
if (!(InputsRead & FRAG_BIT_WPOS))
if (!(InputsRead & FRAG_BIT_WPOS)) {
compiler->fp->wpos_attr = FRAG_ATTRIB_MAX;
return;
}
static gl_state_index tokens[STATE_LENGTH] = {
STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
@@ -85,10 +80,23 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
struct prog_instruction *fpi;
GLuint window_index;
int i = 0;
for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
{
if (!(InputsRead & (1 << i))) {
InputsRead &= ~(1 << FRAG_ATTRIB_WPOS);
InputsRead |= 1 << i;
compiler->fp->Base->InputsRead = InputsRead;
compiler->fp->wpos_attr = i;
break;
}
}
GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);
_mesa_insert_instructions(compiler->program, 0, 3);
fpi = compiler->program->Instructions;
i = 0;
/* perspective divide */
fpi[i].Opcode = OPCODE_RCP;
@@ -99,7 +107,7 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
fpi[i].DstReg.CondMask = COND_TR;
fpi[i].SrcReg[0].File = PROGRAM_INPUT;
fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr;
fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
i++;
@@ -111,7 +119,7 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
fpi[i].DstReg.CondMask = COND_TR;
fpi[i].SrcReg[0].File = PROGRAM_INPUT;
fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr;
fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
@@ -154,6 +162,57 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
}
}
/**
* Rewrite fragment.fogcoord to use a texture coordinate slot.
* Note that fogcoord is forced into an X001 pattern, and this enforcement
* is done here.
*
* See also the counterpart rewriting for vertex programs.
*/
static void rewriteFog(struct r300_fragment_program_compiler *compiler)
{
struct r300_fragment_program *fp = compiler->fp;
GLuint InputsRead;
int i;
InputsRead = fp->Base->InputsRead;
if (!(InputsRead & FRAG_BIT_FOGC)) {
fp->fog_attr = FRAG_ATTRIB_MAX;
return;
}
for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
{
if (!(InputsRead & (1 << i))) {
InputsRead &= ~(1 << FRAG_ATTRIB_FOGC);
InputsRead |= 1 << i;
fp->Base->InputsRead = InputsRead;
fp->fog_attr = i;
break;
}
}
{
struct prog_instruction *inst;
inst = compiler->program->Instructions;
while (inst->Opcode != OPCODE_END) {
const int src_regs = _mesa_num_inst_src_regs(inst->Opcode);
for (i = 0; i < src_regs; ++i) {
if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == FRAG_ATTRIB_FOGC) {
inst->SrcReg[i].Index = fp->fog_attr;
inst->SrcReg[i].Swizzle = combine_swizzles(
MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE),
inst->SrcReg[i].Swizzle);
}
}
++inst;
}
}
}
static GLuint build_dtm(GLuint depthmode)
{
switch(depthmode) {
@@ -175,7 +234,7 @@ static GLuint build_func(GLuint comparefunc)
*/
static void build_state(
r300ContextPtr r300,
struct r300_fragment_program *fp,
struct gl_fragment_program *fp,
struct r300_fragment_program_external_state *state)
{
int unit;
@@ -183,7 +242,7 @@ static void build_state(
_mesa_bzero(state, sizeof(*state));
for(unit = 0; unit < 16; ++unit) {
if (fp->Base.Base.ShadowSamplers & (1 << unit)) {
if (fp->Base.ShadowSamplers & (1 << unit)) {
struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode);
@@ -192,100 +251,149 @@ static void build_state(
}
}
void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp)
static void rewrite_depth_out(struct gl_program *prog)
{
struct prog_instruction *inst;
for (inst = prog->Instructions; inst->Opcode != OPCODE_END; ++inst) {
if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != FRAG_RESULT_DEPTH)
continue;
if (inst->DstReg.WriteMask & WRITEMASK_Z) {
inst->DstReg.WriteMask = WRITEMASK_W;
} else {
inst->DstReg.WriteMask = 0;
continue;
}
switch (inst->Opcode) {
case OPCODE_FRC:
case OPCODE_MOV:
inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
break;
case OPCODE_ADD:
case OPCODE_MAX:
case OPCODE_MIN:
case OPCODE_MUL:
inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
break;
case OPCODE_CMP:
case OPCODE_MAD:
inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
break;
default:
// Scalar instructions needn't be reswizzled
break;
}
}
}
void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)fp;
struct r300_fragment_program_compiler compiler;
compiler.r300 = r300;
compiler.fp = fp;
compiler.code = &fp->code;
compiler.program = fp->Base;
if (RADEON_DEBUG & DEBUG_PIXEL) {
fflush(stdout);
_mesa_printf("Fragment Program: Initial program:\n");
_mesa_print_program(compiler.program);
fflush(stdout);
}
insert_WPOS_trailer(&compiler);
rewriteFog(&compiler);
rewrite_depth_out(compiler.program);
if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
struct radeon_program_transformation transformations[] = {
{ &r500_transform_TEX, &compiler },
{ &radeonTransformALU, 0 },
{ &radeonTransformDeriv, 0 },
{ &radeonTransformTrigScale, 0 }
};
radeonLocalTransform(ctx, compiler.program, 4, transformations);
} else {
struct radeon_program_transformation transformations[] = {
{ &r300_transform_TEX, &compiler },
{ &radeonTransformALU, 0 },
{ &radeonTransformTrigSimple, 0 }
};
radeonLocalTransform(ctx, compiler.program, 3, transformations);
}
if (RADEON_DEBUG & DEBUG_PIXEL) {
_mesa_printf("Fragment Program: After native rewrite:\n");
_mesa_print_program(compiler.program);
fflush(stdout);
}
if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadce_init,
.IsNativeSwizzle = &r500FPIsNativeSwizzle,
.BuildSwizzle = &r500FPBuildSwizzle
};
radeonNqssaDce(ctx, compiler.program, &nqssadce);
} else {
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadce_init,
.IsNativeSwizzle = &r300FPIsNativeSwizzle,
.BuildSwizzle = &r300FPBuildSwizzle
};
radeonNqssaDce(ctx, compiler.program, &nqssadce);
}
if (RADEON_DEBUG & DEBUG_PIXEL) {
_mesa_printf("Compiler: after NqSSA-DCE:\n");
_mesa_print_program(compiler.program);
fflush(stdout);
}
if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler))
fp->error = GL_TRUE;
fp->translated = GL_TRUE;
if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL))
r300->vtbl.FragmentProgramDump(&fp->code);
}
struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_fragment_program_cont *fp_list;
struct r300_fragment_program *fp;
struct r300_fragment_program_external_state state;
build_state(r300, r300_fp, &state);
if (_mesa_memcmp(&r300_fp->state, &state, sizeof(state))) {
/* TODO: cache compiled programs */
r300_fp->translated = GL_FALSE;
_mesa_memcpy(&r300_fp->state, &state, sizeof(state));
fp_list = (struct r300_fragment_program_cont *)ctx->FragmentProgram._Current;
build_state(r300, ctx->FragmentProgram._Current, &state);
fp = fp_list->progs;
while (fp) {
if (_mesa_memcmp(&fp->state, &state, sizeof(state)) == 0) {
return r300->selected_fp = fp;
}
fp = fp->next;
}
if (!r300_fp->translated) {
struct r300_fragment_program_compiler compiler;
fp = _mesa_calloc(sizeof(struct r300_fragment_program));
compiler.r300 = r300;
compiler.fp = r300_fp;
compiler.code = &r300_fp->code;
compiler.program = _mesa_clone_program(ctx, &fp->Base);
fp->state = state;
fp->translated = GL_FALSE;
fp->Base = _mesa_clone_program(ctx, &ctx->FragmentProgram._Current->Base);
if (RADEON_DEBUG & DEBUG_PIXEL) {
fflush(stdout);
_mesa_printf("Fragment Program: Initial program:\n");
_mesa_print_program(compiler.program);
fflush(stdout);
}
fp->next = fp_list->progs;
fp_list->progs = fp;
insert_WPOS_trailer(&compiler);
if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
struct radeon_program_transformation transformations[] = {
{ &r500_transform_TEX, &compiler },
{ &radeonTransformALU, 0 },
{ &radeonTransformDeriv, 0 },
{ &radeonTransformTrigScale, 0 }
};
radeonLocalTransform(ctx, compiler.program, 4, transformations);
} else {
struct radeon_program_transformation transformations[] = {
{ &r300_transform_TEX, &compiler },
{ &radeonTransformALU, 0 },
{ &radeonTransformTrigSimple, 0 }
};
radeonLocalTransform(ctx, compiler.program, 3, transformations);
}
if (RADEON_DEBUG & DEBUG_PIXEL) {
_mesa_printf("Fragment Program: After native rewrite:\n");
_mesa_print_program(compiler.program);
fflush(stdout);
}
if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadce_init,
.IsNativeSwizzle = &r500FPIsNativeSwizzle,
.BuildSwizzle = &r500FPBuildSwizzle,
.RewriteDepthOut = GL_TRUE
};
radeonNqssaDce(ctx, compiler.program, &nqssadce);
} else {
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadce_init,
.IsNativeSwizzle = &r300FPIsNativeSwizzle,
.BuildSwizzle = &r300FPBuildSwizzle,
.RewriteDepthOut = GL_TRUE
};
radeonNqssaDce(ctx, compiler.program, &nqssadce);
}
if (RADEON_DEBUG & DEBUG_PIXEL) {
_mesa_printf("Compiler: after NqSSA-DCE:\n");
_mesa_print_program(compiler.program);
fflush(stdout);
}
if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler))
r300_fp->error = GL_TRUE;
/* Subtle: Rescue any parameters that have been added during transformations */
_mesa_free_parameter_list(fp->Base.Parameters);
fp->Base.Parameters = compiler.program->Parameters;
compiler.program->Parameters = 0;
_mesa_reference_program(ctx, &compiler.program, NULL);
r300_fp->translated = GL_TRUE;
r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
if (r300_fp->error || (RADEON_DEBUG & DEBUG_PIXEL))
r300->vtbl.FragmentProgramDump(&r300_fp->code);
}
update_params(ctx, fp);
return r300->selected_fp = fp;
}
@@ -30,6 +30,10 @@
#include "main/mtypes.h"
extern void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp);
#include "r300_context.h"
extern void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp);
struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx);
#endif
+53 -11
View File
@@ -32,22 +32,45 @@
#include "r300_context.h"
#include "r300_fragprog_common.h"
static void freeFragProgCache(GLcontext *ctx, struct r300_fragment_program_cont *cache)
{
struct r300_fragment_program *tmp, *fp = cache->progs;
while (fp) {
tmp = fp->next;
_mesa_reference_program(ctx, &fp->Base, NULL);
_mesa_free(fp);
fp = tmp;
}
}
static void freeVertProgCache(GLcontext *ctx, struct r300_vertex_program_cont *cache)
{
struct r300_vertex_program *tmp, *vp = cache->progs;
while (vp) {
tmp = vp->next;
_mesa_reference_vertprog(ctx, &vp->Base, NULL);
_mesa_free(vp);
vp = tmp;
}
}
static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target,
GLuint id)
{
struct r300_vertex_program_cont *vp;
struct r300_fragment_program *fp;
struct r300_fragment_program_cont *fp;
switch (target) {
case GL_VERTEX_STATE_PROGRAM_NV:
case GL_VERTEX_PROGRAM_ARB:
vp = CALLOC_STRUCT(r300_vertex_program_cont);
return _mesa_init_vertex_program(ctx, &vp->mesa_program,
target, id);
return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
case GL_FRAGMENT_PROGRAM_NV:
case GL_FRAGMENT_PROGRAM_ARB:
fp = CALLOC_STRUCT(r300_fragment_program);
fp = CALLOC_STRUCT(r300_fragment_program_cont);
return _mesa_init_fragment_program(ctx, &fp->Base, target, id);
default:
@@ -59,21 +82,35 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target,
static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog)
{
struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog;
struct r300_fragment_program_cont *fp = (struct r300_fragment_program_cont *)prog;
switch (prog->Target) {
case GL_VERTEX_PROGRAM_ARB:
freeVertProgCache(ctx, vp);
break;
case GL_FRAGMENT_PROGRAM_ARB:
freeFragProgCache(ctx, fp);
break;
}
_mesa_delete_program(ctx, prog);
}
static void
r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
{
struct r300_vertex_program_cont *vp = (void *)prog;
struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog;
struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog;
struct r300_fragment_program_cont *fp = (struct r300_fragment_program_cont *)prog;
switch (target) {
case GL_VERTEX_PROGRAM_ARB:
freeVertProgCache(ctx, vp);
vp->progs = NULL;
break;
case GL_FRAGMENT_PROGRAM_ARB:
r300_fp->translated = GL_FALSE;
freeFragProgCache(ctx, fp);
fp->progs = NULL;
break;
}
@@ -85,13 +122,18 @@ static GLboolean
r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
{
if (target == GL_FRAGMENT_PROGRAM_ARB) {
struct r300_fragment_program *fp = (struct r300_fragment_program *)prog;
struct r300_fragment_program *fp = r300SelectFragmentShader(ctx);
if (!fp->translated)
r300TranslateFragmentShader(ctx, &fp->Base);
r300TranslateFragmentShader(ctx, fp);
return !fp->error;
} else
return GL_TRUE;
} else {
struct r300_vertex_program *vp = r300SelectVertexShader(ctx);
if (!vp->translated)
r300TranslateVertexShader(vp);
return !vp->error;
}
}
void r300InitShaderFuncs(struct dd_function_table *functions)
+56 -110
View File
@@ -452,9 +452,9 @@ static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state)
static GLboolean current_fragment_program_writes_depth(GLcontext* ctx)
{
struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
r300ContextPtr r300 = R300_CONTEXT(ctx);
return (fp && fp->writes_depth);
return ctx->FragmentProgram._Current && r300->selected_fp->writes_depth;
}
static void r300SetEarlyZState(GLcontext * ctx)
@@ -1093,24 +1093,25 @@ r300FetchStateParameter(GLcontext * ctx,
* Update R300's own internal state parameters.
* For now just STATE_R300_WINDOW_DIMENSION
*/
void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
{
struct r300_fragment_program *fp;
r300ContextPtr rmesa = R300_CONTEXT(ctx);
struct gl_program_parameter_list *paramList;
GLuint i;
if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)))
return;
fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current;
if (!fp)
if (!ctx->FragmentProgram._Current || !rmesa->selected_fp)
return;
paramList = fp->Base.Base.Parameters;
paramList = rmesa->selected_fp->Base->Parameters;
if (!paramList)
return;
_mesa_load_state_parameters(ctx, paramList);
for (i = 0; i < paramList->NumParameters; i++) {
if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) {
r300FetchStateParameter(ctx,
@@ -1225,8 +1226,7 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
int i;
struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
struct r300_fragment_program_code *code = &fp->code.r300;
struct r300_fragment_program_code *code = &r300->selected_fp->code.r300;
R300_STATECHANGE(r300, fpt);
@@ -1266,9 +1266,9 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
int i;
struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
struct r500_fragment_program_code *code = &fp->code.r500;
struct r500_fragment_program_code *code = &r300->selected_fp->code.r500;
/* find all the texture instructions and relocate the texture units */
for (i = 0; i < code->inst_end + 1; i++) {
@@ -1316,8 +1316,6 @@ static void r300SetupTextures(GLcontext * ctx)
int hw_tmu = 0;
int last_hw_tmu = -1; /* -1 translates into no setup costs for fields */
int tmu_mappings[R300_MAX_TEXTURE_UNITS] = { -1, };
struct r300_fragment_program *fp = (struct r300_fragment_program *)
(char *)ctx->FragmentProgram._Current;
R300_STATECHANGE(r300, txe);
R300_STATECHANGE(r300, tex.filter);
@@ -1420,7 +1418,7 @@ static void r300SetupTextures(GLcontext * ctx)
cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
if (fp->Base.UsesKill && last_hw_tmu < 0) {
if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) {
// The KILL operation requires the first texture unit
// to be enabled.
r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1;
@@ -1458,11 +1456,11 @@ static void r300SetupRSUnit(GLcontext * ctx)
hw_tcl_on = r300->options.hw_tcl_enabled;
if (hw_tcl_on)
OutputsWritten.vp_outputs = r300->selected_vp->key.OutputsWritten;
OutputsWritten.vp_outputs = r300->selected_vp->Base->Base.OutputsWritten;
else
RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
InputsRead = r300->selected_fp->Base->InputsRead;
R300_STATECHANGE(r300, ri);
R300_STATECHANGE(r300, rc);
@@ -1521,29 +1519,6 @@ static void r300SetupRSUnit(GLcontext * ctx)
++fp_reg;
}
if (InputsRead & FRAG_BIT_WPOS) {
r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count);
r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
InputsRead &= ~FRAG_BIT_WPOS;
rs_tex_count += 4;
++tex_ip;
++fp_reg;
}
if (InputsRead & FRAG_BIT_FOGC) {
if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) {
r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(R300_RS_SEL_K0) | R300_RS_SEL_R(R300_RS_SEL_K0);
r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_Q(R300_RS_SEL_K1) | R300_RS_TEX_PTR(rs_tex_count);
r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
InputsRead &= ~FRAG_BIT_FOGC;
rs_tex_count += 4;
++tex_ip;
++fp_reg;
} else {
WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n");
}
}
/* Setup default color if no color or tex was set */
if (rs_tex_count == 0 && col_ip == 0) {
r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_ADDR(0);
@@ -1575,11 +1550,11 @@ static void r500SetupRSUnit(GLcontext * ctx)
hw_tcl_on = r300->options.hw_tcl_enabled;
if (hw_tcl_on)
OutputsWritten.vp_outputs = r300->selected_vp->key.OutputsWritten;
OutputsWritten.vp_outputs = r300->selected_vp->Base->Base.OutputsWritten;
else
RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
InputsRead = r300->selected_fp->Base->InputsRead;
R300_STATECHANGE(r300, ri);
R300_STATECHANGE(r300, rc);
@@ -1642,36 +1617,6 @@ static void r500SetupRSUnit(GLcontext * ctx)
++fp_reg;
}
if (InputsRead & FRAG_BIT_WPOS) {
r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) |
((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) |
((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) |
((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT);
r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
InputsRead &= ~FRAG_BIT_WPOS;
rs_tex_count += 4;
++tex_ip;
++fp_reg;
}
if (InputsRead & FRAG_BIT_FOGC) {
if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) {
r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= (rs_tex_count << R500_RS_IP_TEX_PTR_S_SHIFT) |
(R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
(R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
(R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT);
r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
InputsRead &= ~FRAG_BIT_FOGC;
rs_tex_count += 4;
++tex_ip;
++fp_reg;
} else {
WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n");
}
}
/* Setup default color if no color or tex was set */
if (rs_tex_count == 0 && col_ip == 0) {
r300->hw.rr.cmd[R300_RR_INST_0] = R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_ADDR(0);
@@ -2036,43 +1981,51 @@ static void r300ResetHwState(r300ContextPtr r300)
void r300UpdateShaders(r300ContextPtr rmesa)
{
GLcontext *ctx;
struct r300_fragment_program *fp;
int i;
ctx = rmesa->radeon.glCtx;
fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
GLcontext *ctx = rmesa->radeon.glCtx;
/* should only happenen once, just after context is created */
/* TODO: shouldn't we fallback to sw here? */
if (!fp) {
if (!ctx->FragmentProgram._Current) {
_mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
return;
}
if (rmesa->radeon.NewGLState && rmesa->options.hw_tcl_enabled) {
for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
rmesa->temp_attrib[i] =
TNL_CONTEXT(ctx)->vb.AttribPtr[i];
TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
&rmesa->dummy_attrib[i];
}
{
struct r300_fragment_program *fp;
_tnl_UpdateFixedFunctionProgram(ctx);
fp = r300SelectFragmentShader(ctx);
if (!fp->translated)
r300TranslateFragmentShader(ctx, fp);
for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
rmesa->temp_attrib[i];
}
r300SelectVertexShader(rmesa);
r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, rmesa->selected_vp->error);
r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error);
}
if (!fp->translated || rmesa->radeon.NewGLState)
r300TranslateFragmentShader(ctx, ctx->FragmentProgram._Current);
if (rmesa->options.hw_tcl_enabled) {
struct r300_vertex_program *vp;
r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error);
if (rmesa->radeon.NewGLState) {
int i;
for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
rmesa->temp_attrib[i] =
TNL_CONTEXT(ctx)->vb.AttribPtr[i];
TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
&rmesa->dummy_attrib[i];
}
_tnl_UpdateFixedFunctionProgram(ctx);
for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
rmesa->temp_attrib[i];
}
}
vp = r300SelectVertexShader(ctx);
if (!vp->translated)
r300TranslateVertexShader(vp);
r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, vp->error);
}
r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
rmesa->radeon.NewGLState = 0;
@@ -2102,7 +2055,7 @@ static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx,
static void r300SetupPixelShader(GLcontext *ctx)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
struct r300_fragment_program *fp = rmesa->selected_fp;
struct r300_fragment_program_code *code;
int i, k;
@@ -2148,8 +2101,7 @@ static void r300SetupPixelShader(GLcontext *ctx)
R300_STATECHANGE(rmesa, fpp);
rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4);
for (i = 0; i < code->const_nr; i++) {
const GLfloat *constant = get_fragmentprogram_constant(ctx,
&fp->Base.Base, code->constant[i]);
const GLfloat *constant = get_fragmentprogram_constant(ctx, fp->Base, code->constant[i]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]);
@@ -2174,7 +2126,7 @@ static void r300SetupPixelShader(GLcontext *ctx)
static void r500SetupPixelShader(GLcontext *ctx)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
struct r300_fragment_program *fp = rmesa->selected_fp;
int i;
struct r500_fragment_program_code *code;
@@ -2210,8 +2162,7 @@ static void r500SetupPixelShader(GLcontext *ctx)
R300_STATECHANGE(rmesa, r500fp_const);
for (i = 0; i < code->const_nr; i++) {
const GLfloat *constant = get_fragmentprogram_constant(ctx,
&fp->Base.Base, code->constant[i]);
const GLfloat *constant = get_fragmentprogram_constant(ctx, fp->Base, code->constant[i]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]);
@@ -2274,20 +2225,17 @@ void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten)
rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten, ctx->FragmentProgram._Current->Base.InputsRead);
rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten, ctx->FragmentProgram._Current->Base.InputsRead);
rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten);
}
void r300UpdateShaderStates(r300ContextPtr rmesa)
{
GLcontext *ctx;
ctx = rmesa->radeon.glCtx;
struct r300_fragment_program *r300_fp;
r300_fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
/* should only happenen once, just after context is created */
if (!r300_fp)
if (!ctx->FragmentProgram._Current)
return;
r300SetEarlyZState(ctx);
@@ -2323,8 +2271,6 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
R300_STATECHANGE(r300, cb);
}
r300UpdateStateParameters(ctx, new_state);
r300->radeon.NewGLState |= new_state;
}
-1
View File
@@ -52,7 +52,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
void r300UpdateViewportOffset (GLcontext * ctx);
void r300UpdateDrawBuffer (GLcontext * ctx);
void r300UpdateStateParameters (GLcontext * ctx, GLuint new_state);
void r300UpdateShaders (r300ContextPtr rmesa);
void r300UpdateShaderStates (r300ContextPtr rmesa);
void r300InitState (r300ContextPtr r300);
+23 -27
View File
@@ -76,7 +76,7 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_
GLuint InputsRead = 0;
GLuint OutputsWritten = 0;
int num_attrs = 0;
GLuint fp_reads = ctx->FragmentProgram._Current->Base.InputsRead;
GLuint fp_reads = rmesa->selected_fp->Base->InputsRead;
struct vertex_attribute *attrs = rmesa->vbuf.attribs;
rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0;
@@ -150,6 +150,22 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_
ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0);
}
if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) {
int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0;
VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
}
if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) {
int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0;
VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
}
/**
* Sending only one texcoord component may lead to lock up,
* so for all textures always output 4 texcoord components to RS.
@@ -192,31 +208,9 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_
}
}
/* RS can't put fragment position on the pixel stack, so stuff it in texcoord if needed */
if (fp_reads & FRAG_BIT_WPOS) {
if (first_free_tex >= ctx->Const.MaxTextureUnits) {
fprintf(stderr, "\tout of free texcoords to write w pos\n");
_mesa_exit(-1);
}
InputsRead |= 1 << (VERT_ATTRIB_TEX0 + first_free_tex);
OutputsWritten |= 1 << (VERT_RESULT_TEX0 + first_free_tex);
EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F );
ADD_ATTR(VERT_ATTRIB_POS, R300_DATA_TYPE_FLOAT_4, SWTCL_OVM_TEX(first_free_tex), SWIZZLE_XYZW, MASK_XYZW, 0);
++first_free_tex;
}
if (fp_reads & FRAG_BIT_FOGC) {
if (first_free_tex >= ctx->Const.MaxTextureUnits) {
fprintf(stderr, "\tout of free texcoords to write fog coordinate\n");
_mesa_exit(-1);
}
InputsRead |= 1 << VERT_ATTRIB_FOG;
OutputsWritten |= 1 << VERT_RESULT_FOGC;
GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO);
EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F );
ADD_ATTR(VERT_ATTRIB_FOG, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_XYZW, 0);
if (first_free_tex >= ctx->Const.MaxTextureUnits) {
fprintf(stderr, "\tout of free texcoords to write fog coordinate\n");
_mesa_exit(-1);
}
R300_NEWPRIM(rmesa);
@@ -497,11 +491,13 @@ void r300RenderStart(GLcontext *ctx)
r300ContextPtr rmesa = R300_CONTEXT( ctx );
r300ChooseRenderState(ctx);
r300UpdateShaders(rmesa);
r300PrepareVertices(ctx);
r300ValidateBuffers(ctx);
r300UpdateShaders(rmesa);
r300UpdateShaderStates(rmesa);
r300EmitCacheFlush(rmesa);
+425 -271
View File
@@ -32,12 +32,15 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/macros.h"
#include "main/enums.h"
#include "shader/program.h"
#include "shader/programopt.h"
#include "shader/prog_instruction.h"
#include "shader/prog_optimize.h"
#include "shader/prog_parameter.h"
#include "shader/prog_print.h"
#include "shader/prog_statevars.h"
#include "tnl/tnl.h"
#include "radeon_nqssadce.h"
#include "r300_context.h"
#include "r300_state.h"
@@ -71,15 +74,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
} while (0)
int r300VertexProgUpdateParams(GLcontext * ctx,
struct r300_vertex_program_cont *vp, float *dst)
static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program *vp, float *dst)
{
int pi;
struct gl_vertex_program *mesa_vp = &vp->mesa_program;
float *dst_o = dst;
struct gl_program_parameter_list *paramList;
if (mesa_vp->IsNVProgram) {
if (vp->IsNVProgram) {
_mesa_load_tracked_matrices(ctx);
for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
@@ -91,16 +92,18 @@ int r300VertexProgUpdateParams(GLcontext * ctx,
return dst - dst_o;
}
assert(mesa_vp->Base.Parameters);
_mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
if (!vp->Base.Parameters)
return 0;
if (mesa_vp->Base.Parameters->NumParameters * 4 >
_mesa_load_state_parameters(ctx, vp->Base.Parameters);
if (vp->Base.Parameters->NumParameters * 4 >
VSF_MAX_FRAGMENT_LENGTH) {
fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
_mesa_exit(-1);
}
paramList = mesa_vp->Base.Parameters;
paramList = vp->Base.Parameters;
for (pi = 0; pi < paramList->NumParameters; pi++) {
switch (paramList->Parameters[pi].Type) {
case PROGRAM_STATE_VAR:
@@ -933,10 +936,14 @@ static void t_inputs_outputs(struct r300_vertex_program *vp)
{
int i;
int cur_reg;
GLuint OutputsWritten, InputsRead;
OutputsWritten = vp->Base->Base.OutputsWritten;
InputsRead = vp->Base->Base.InputsRead;
cur_reg = -1;
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
if (vp->key.InputsRead & (1 << i))
if (InputsRead & (1 << i))
vp->inputs[i] = ++cur_reg;
else
vp->inputs[i] = -1;
@@ -946,13 +953,13 @@ static void t_inputs_outputs(struct r300_vertex_program *vp)
for (i = 0; i < VERT_RESULT_MAX; i++)
vp->outputs[i] = -1;
assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) {
if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
}
if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
}
@@ -962,46 +969,46 @@ static void t_inputs_outputs(struct r300_vertex_program *vp)
* pretend it does by skipping output index reg so the colors
* get written into appropriate output vectors.
*/
if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) {
if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
vp->outputs[VERT_RESULT_COL0] = cur_reg++;
} else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) ||
vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
} else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
OutputsWritten & (1 << VERT_RESULT_BFC1)) {
cur_reg++;
}
if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) {
if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
vp->outputs[VERT_RESULT_COL1] = cur_reg++;
} else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) ||
vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
} else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
OutputsWritten & (1 << VERT_RESULT_BFC1)) {
cur_reg++;
}
if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
} else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
} else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
cur_reg++;
}
if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
} else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
} else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
cur_reg++;
}
for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
if (vp->key.OutputsWritten & (1 << i)) {
if (OutputsWritten & (1 << i)) {
vp->outputs[i] = cur_reg++;
}
}
if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
}
}
static void r300TranslateVertexShader(struct r300_vertex_program *vp,
struct prog_instruction *vpi)
void r300TranslateVertexShader(struct r300_vertex_program *vp)
{
struct prog_instruction *vpi = vp->Base->Base.Instructions;
int i;
GLuint *inst;
unsigned long num_operands;
@@ -1191,313 +1198,463 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
}
}
/* DP4 version seems to trigger some hw peculiarity */
//#define PREFER_DP4
static void position_invariant(struct gl_program *prog)
static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id)
{
struct prog_instruction *vpi;
struct gl_program_parameter_list *paramList;
int i;
gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
_mesa_insert_instructions(prog, prog->NumInstructions - 1, 2);
/* tokens[4] = matrix modifier */
#ifdef PREFER_DP4
tokens[4] = 0; /* not transposed or inverted */
#else
tokens[4] = STATE_MATRIX_TRANSPOSE;
#endif
paramList = prog->Parameters;
vpi = &prog->Instructions[prog->NumInstructions - 3];
vpi = _mesa_alloc_instructions(prog->NumInstructions + 4);
_mesa_init_instructions(vpi, prog->NumInstructions + 4);
vpi->Opcode = OPCODE_MOV;
for (i = 0; i < 4; i++) {
GLint idx;
tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */
idx = _mesa_add_state_reference(paramList, tokens);
#ifdef PREFER_DP4
vpi[i].Opcode = OPCODE_DP4;
vpi[i].StringPos = 0;
vpi[i].Data = 0;
vpi->DstReg.File = PROGRAM_OUTPUT;
vpi->DstReg.Index = VERT_RESULT_HPOS;
vpi->DstReg.WriteMask = WRITEMASK_XYZW;
vpi->DstReg.CondMask = COND_TR;
vpi[i].DstReg.File = PROGRAM_OUTPUT;
vpi[i].DstReg.Index = VERT_RESULT_HPOS;
vpi[i].DstReg.WriteMask = 1 << i;
vpi[i].DstReg.CondMask = COND_TR;
vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
vpi->SrcReg[0].Index = temp_index;
vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
vpi[i].SrcReg[0].Index = idx;
vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
++vpi;
vpi[i].SrcReg[1].File = PROGRAM_INPUT;
vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
#else
if (i == 0)
vpi[i].Opcode = OPCODE_MUL;
else
vpi[i].Opcode = OPCODE_MAD;
vpi->Opcode = OPCODE_MOV;
vpi[i].Data = 0;
vpi->DstReg.File = PROGRAM_OUTPUT;
vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
vpi->DstReg.WriteMask = WRITEMASK_XYZW;
vpi->DstReg.CondMask = COND_TR;
if (i == 3)
vpi[i].DstReg.File = PROGRAM_OUTPUT;
else
vpi[i].DstReg.File = PROGRAM_TEMPORARY;
vpi[i].DstReg.Index = 0;
vpi[i].DstReg.WriteMask = 0xf;
vpi[i].DstReg.CondMask = COND_TR;
vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
vpi->SrcReg[0].Index = temp_index;
vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
vpi[i].SrcReg[0].Index = idx;
vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
++vpi;
vpi[i].SrcReg[1].File = PROGRAM_INPUT;
vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
if (i > 0) {
vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
vpi[i].SrcReg[2].Index = 0;
vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
}
#endif
}
_mesa_copy_instructions(&vpi[i], prog->Instructions,
prog->NumInstructions);
free(prog->Instructions);
prog->Instructions = vpi;
prog->NumInstructions += 4;
vpi = &prog->Instructions[prog->NumInstructions - 1];
assert(vpi->Opcode == OPCODE_END);
vpi->Opcode = OPCODE_END;
}
static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog,
GLuint temp_index)
{
struct prog_instruction *vpi;
struct prog_instruction *vpi_insert;
int i = 0;
vpi = _mesa_alloc_instructions(prog->NumInstructions + 2);
_mesa_init_instructions(vpi, prog->NumInstructions + 2);
/* all but END */
_mesa_copy_instructions(vpi, prog->Instructions,
prog->NumInstructions - 1);
/* END */
_mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
&prog->Instructions[prog->NumInstructions - 1],
1);
vpi_insert = &vpi[prog->NumInstructions - 1];
vpi_insert[i].Opcode = OPCODE_MOV;
vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
vpi_insert[i].DstReg.CondMask = COND_TR;
vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
vpi_insert[i].SrcReg[0].Index = temp_index;
vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
i++;
vpi_insert[i].Opcode = OPCODE_MOV;
vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx;
vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
vpi_insert[i].DstReg.CondMask = COND_TR;
vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
vpi_insert[i].SrcReg[0].Index = temp_index;
vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
i++;
free(prog->Instructions);
prog->Instructions = vpi;
prog->NumInstructions += i;
vpi = &prog->Instructions[prog->NumInstructions - 1];
assert(vpi->Opcode == OPCODE_END);
}
static void pos_as_texcoord(struct r300_vertex_program *vp,
struct gl_program *prog)
static void pos_as_texcoord(struct gl_program *prog, int tex_id)
{
struct prog_instruction *vpi;
GLuint tempregi = prog->NumTemporaries;
/* should do something else if no temps left... */
prog->NumTemporaries++;
for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
if (vpi->DstReg.File == PROGRAM_OUTPUT
&& vpi->DstReg.Index == VERT_RESULT_HPOS) {
if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) {
vpi->DstReg.File = PROGRAM_TEMPORARY;
vpi->DstReg.Index = tempregi;
}
}
insert_wpos(vp, prog, tempregi);
insert_wpos(prog, tempregi, tex_id);
prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
}
static struct r300_vertex_program *build_program(struct r300_vertex_program_key
*wanted_key, struct gl_vertex_program
*mesa_vp, GLint wpos_idx)
/**
* The fogcoord attribute is special in that only the first component
* is relevant, and the remaining components are always fixed (when read
* from by the fragment program) to yield an X001 pattern.
*
* We need to enforce this either in the vertex program or in the fragment
* program, and this code chooses not to enforce it in the vertex program.
* This is slightly cheaper, as long as the fragment program does not use
* weird swizzles.
*
* And it seems that usually, weird swizzles are not used, so...
*
* See also the counterpart rewriting for fragment programs.
*/
static void fog_as_texcoord(struct gl_program *prog, int tex_id)
{
struct r300_vertex_program *vp;
struct prog_instruction *vpi;
vp = _mesa_calloc(sizeof(*vp));
_mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
vp->wpos_idx = wpos_idx;
vpi = prog->Instructions;
while (vpi->Opcode != OPCODE_END) {
if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) {
vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
vpi->DstReg.WriteMask = WRITEMASK_X;
}
if (mesa_vp->IsPositionInvariant) {
position_invariant(&mesa_vp->Base);
++vpi;
}
if (wpos_idx > -1) {
pos_as_texcoord(vp, &mesa_vp->Base);
}
prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC);
prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
}
if (RADEON_DEBUG & DEBUG_VERTS) {
fprintf(stderr, "Vertex program after native rewrite:\n");
_mesa_print_program(&mesa_vp->Base);
fflush(stdout);
static int translateABS(struct gl_program *prog, int pos)
{
struct prog_instruction *inst;
inst = &prog->Instructions[pos];
inst->Opcode = OPCODE_MAX;
inst->SrcReg[1] = inst->SrcReg[0];
inst->SrcReg[1].Negate ^= NEGATE_XYZW;
return 0;
}
static int translateDP3(struct gl_program *prog, int pos)
{
struct prog_instruction *inst;
inst = &prog->Instructions[pos];
inst->Opcode = OPCODE_DP4;
inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
return 0;
}
static int translateDPH(struct gl_program *prog, int pos)
{
struct prog_instruction *inst;
inst = &prog->Instructions[pos];
inst->Opcode = OPCODE_DP4;
inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
return 0;
}
static int translateFLR(struct gl_program *prog, int pos)
{
struct prog_instruction *inst;
struct prog_dst_register dst;
int tmp_idx;
tmp_idx = prog->NumTemporaries++;
_mesa_insert_instructions(prog, pos + 1, 1);
inst = &prog->Instructions[pos];
dst = inst->DstReg;
inst->Opcode = OPCODE_FRC;
inst->DstReg.File = PROGRAM_TEMPORARY;
inst->DstReg.Index = tmp_idx;
++inst;
inst->Opcode = OPCODE_ADD;
inst->DstReg = dst;
inst->SrcReg[0] = (inst-1)->SrcReg[0];
inst->SrcReg[1].File = PROGRAM_TEMPORARY;
inst->SrcReg[1].Index = tmp_idx;
inst->SrcReg[1].Negate = NEGATE_XYZW;
return 1;
}
static int translateSUB(struct gl_program *prog, int pos)
{
struct prog_instruction *inst;
inst = &prog->Instructions[pos];
inst->Opcode = OPCODE_ADD;
inst->SrcReg[1].Negate ^= NEGATE_XYZW;
return 0;
}
static int translateSWZ(struct gl_program *prog, int pos)
{
prog->Instructions[pos].Opcode = OPCODE_MOV;
return 0;
}
static int translateXPD(struct gl_program *prog, int pos)
{
struct prog_instruction *inst;
int tmp_idx;
tmp_idx = prog->NumTemporaries++;
_mesa_insert_instructions(prog, pos + 1, 1);
inst = &prog->Instructions[pos];
*(inst+1) = *inst;
inst->Opcode = OPCODE_MUL;
inst->DstReg.File = PROGRAM_TEMPORARY;
inst->DstReg.Index = tmp_idx;
inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W);
inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W);
++inst;
inst->Opcode = OPCODE_MAD;
inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W);
inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W);
inst->SrcReg[1].Negate ^= NEGATE_XYZW;
inst->SrcReg[2].File = PROGRAM_TEMPORARY;
inst->SrcReg[2].Index = tmp_idx;
return 1;
}
static void translateInsts(struct gl_program *prog)
{
struct prog_instruction *inst;
int i;
for (i = 0; i < prog->NumInstructions; ++i) {
inst = &prog->Instructions[i];
switch (inst->Opcode) {
case OPCODE_ABS:
i += translateABS(prog, i);
break;
case OPCODE_DP3:
i += translateDP3(prog, i);
break;
case OPCODE_DPH:
i += translateDPH(prog, i);
break;
case OPCODE_FLR:
i += translateFLR(prog, i);
break;
case OPCODE_SUB:
i += translateSUB(prog, i);
break;
case OPCODE_SWZ:
i += translateSWZ(prog, i);
break;
case OPCODE_XPD:
i += translateXPD(prog, i);
break;
default:
break;
}
}
}
#define ADD_OUTPUT(fp_attr, vp_result) \
do { \
if ((FpReads & (1 << (fp_attr))) && !(prog->OutputsWritten & (1 << (vp_result)))) { \
OutputsAdded |= 1 << (vp_result); \
count++; \
} \
} while (0)
static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
GLuint OutputsAdded, FpReads;
int i, count;
OutputsAdded = 0;
count = 0;
FpReads = r300->selected_fp->Base->InputsRead;
ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
for (i = 0; i < 7; ++i) {
ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
}
/* Some outputs may be artificially added, to match the inputs of the fragment program.
* Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
* vertex program are undefined, so just use MOV [vertex_result], CONST[0]
*/
{
int i, count = 0;
if (count > 0) {
struct prog_instruction *inst;
_mesa_insert_instructions(prog, prog->NumInstructions - 1, count);
inst = &prog->Instructions[prog->NumInstructions - 1 - count];
for (i = 0; i < VERT_RESULT_MAX; ++i) {
if (vp->key.OutputsAdded & (1 << i)) {
++count;
if (OutputsAdded & (1 << i)) {
inst->Opcode = OPCODE_MOV;
inst->DstReg.File = PROGRAM_OUTPUT;
inst->DstReg.Index = i;
inst->DstReg.WriteMask = WRITEMASK_XYZW;
inst->DstReg.CondMask = COND_TR;
inst->SrcReg[0].File = PROGRAM_CONSTANT;
inst->SrcReg[0].Index = 0;
inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
++inst;
}
}
if (count > 0) {
struct prog_instruction *inst;
prog->OutputsWritten |= OutputsAdded;
}
}
_mesa_insert_instructions(&mesa_vp->Base, mesa_vp->Base.NumInstructions - 1, count);
inst = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions - 1 - count];
#undef ADD_OUTPUT
for (i = 0; i < VERT_RESULT_MAX; ++i) {
if (vp->key.OutputsAdded & (1 << i)) {
inst->Opcode = OPCODE_MOV;
static void nqssadceInit(struct nqssadce_state* s)
{
r300ContextPtr r300 = R300_CONTEXT(s->Ctx);
GLuint fp_reads;
inst->DstReg.File = PROGRAM_OUTPUT;
inst->DstReg.Index = i;
inst->DstReg.WriteMask = WRITEMASK_XYZW;
inst->DstReg.CondMask = COND_TR;
fp_reads = r300->selected_fp->Base->InputsRead;
{
if (fp_reads & FRAG_BIT_COL0) {
s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW;
s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW;
}
inst->SrcReg[0].File = PROGRAM_CONSTANT;
inst->SrcReg[0].Index = 0;
inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
if (fp_reads & FRAG_BIT_COL1) {
s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW;
s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW;
}
}
++inst;
}
{
int i;
for (i = 0; i < 8; ++i) {
if (fp_reads & FRAG_BIT_TEX(i)) {
s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW;
}
}
}
assert(mesa_vp->Base.NumInstructions);
vp->num_temporaries = mesa_vp->Base.NumTemporaries;
r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW;
if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ))
s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X;
}
static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
{
(void) opcode;
(void) reg;
return GL_TRUE;
}
static struct r300_vertex_program *build_program(GLcontext *ctx,
struct r300_vertex_program_key *wanted_key,
const struct gl_vertex_program *mesa_vp)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_vertex_program *vp;
struct gl_program *prog;
vp = _mesa_calloc(sizeof(*vp));
vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base);
_mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
prog = &vp->Base->Base;
if (RADEON_DEBUG & DEBUG_VERTS) {
fprintf(stderr, "Initial vertex program:\n");
_mesa_print_program(prog);
fflush(stdout);
}
if (vp->Base->IsPositionInvariant) {
_mesa_insert_mvp_code(ctx, vp->Base);
}
if (r300->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) {
pos_as_texcoord(&vp->Base->Base, r300->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0);
}
if (r300->selected_fp->fog_attr != FRAG_ATTRIB_MAX) {
fog_as_texcoord(&vp->Base->Base, r300->selected_fp->fog_attr - FRAG_ATTRIB_TEX0);
}
addArtificialOutputs(ctx, prog);
translateInsts(prog);
if (RADEON_DEBUG & DEBUG_VERTS) {
fprintf(stderr, "Vertex program after native rewrite:\n");
_mesa_print_program(prog);
fflush(stdout);
}
{
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadceInit,
.IsNativeSwizzle = &swizzleIsNative,
.BuildSwizzle = NULL
};
radeonNqssaDce(ctx, prog, &nqssadce);
/* We need this step for reusing temporary registers */
_mesa_optimize_program(ctx, prog);
if (RADEON_DEBUG & DEBUG_VERTS) {
fprintf(stderr, "Vertex program after NQSSADCE:\n");
_mesa_print_program(prog);
fflush(stdout);
}
}
assert(prog->NumInstructions);
{
struct prog_instruction *inst;
int max, i, tmp;
inst = prog->Instructions;
max = -1;
while (inst->Opcode != OPCODE_END) {
tmp = _mesa_num_inst_src_regs(inst->Opcode);
for (i = 0; i < tmp; ++i) {
if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) {
if ((int) inst->SrcReg[i].Index > max) {
max = inst->SrcReg[i].Index;
}
}
}
if (_mesa_num_inst_dst_regs(inst->Opcode)) {
if (inst->DstReg.File == PROGRAM_TEMPORARY) {
if ((int) inst->DstReg.Index > max) {
max = inst->DstReg.Index;
}
}
}
++inst;
}
/* We actually want highest index of used temporary register,
* not the number of temporaries used.
* These values aren't always the same.
*/
vp->num_temporaries = max + 1;
}
return vp;
}
static void add_outputs(struct r300_vertex_program_key *key, GLint vert)
struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx)
{
if (key->OutputsWritten & (1 << vert))
return;
key->OutputsWritten |= 1 << vert;
key->OutputsAdded |= 1 << vert;
}
void r300SelectVertexShader(r300ContextPtr r300)
{
GLcontext *ctx = ctx = r300->radeon.glCtx;
GLuint InputsRead;
r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_vertex_program_key wanted_key = { 0 };
GLint i;
struct r300_vertex_program_cont *vpc;
struct r300_vertex_program *vp;
GLint wpos_idx;
vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
wanted_key.FpReads = r300->selected_fp->Base->InputsRead;
wanted_key.FogAttr = r300->selected_fp->fog_attr;
wanted_key.WPosAttr = r300->selected_fp->wpos_attr;
wpos_idx = -1;
if (InputsRead & FRAG_BIT_WPOS) {
for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
break;
if (i == ctx->Const.MaxTextureUnits) {
fprintf(stderr, "\tno free texcoord found\n");
_mesa_exit(-1);
}
wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
wpos_idx = i;
}
if (vpc->mesa_program.IsPositionInvariant) {
wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
wanted_key.OutputsWritten |= (1 << VERT_RESULT_HPOS);
} else {
add_outputs(&wanted_key, VERT_RESULT_HPOS);
}
if (InputsRead & FRAG_BIT_COL0) {
add_outputs(&wanted_key, VERT_RESULT_COL0);
}
if (InputsRead & FRAG_BIT_COL1) {
add_outputs(&wanted_key, VERT_RESULT_COL1);
}
if (InputsRead & FRAG_BIT_FOGC) {
add_outputs(&wanted_key, VERT_RESULT_FOGC);
}
for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
if (InputsRead & (FRAG_BIT_TEX0 << i)) {
add_outputs(&wanted_key, VERT_RESULT_TEX0 + i);
}
}
for (vp = vpc->progs; vp; vp = vp->next)
for (vp = vpc->progs; vp; vp = vp->next) {
if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
== 0) {
r300->selected_vp = vp;
return;
return r300->selected_vp = vp;
}
if (RADEON_DEBUG & DEBUG_VERTS) {
fprintf(stderr, "Initial vertex program:\n");
_mesa_print_program(&vpc->mesa_program.Base);
fflush(stdout);
}
vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
vp = build_program(ctx, &wanted_key, &vpc->mesa_program);
vp->next = vpc->progs;
vpc->progs = vp;
r300->selected_vp = vp;
return r300->selected_vp = vp;
}
#define bump_vpu_count(ptr, new_count) do { \
@@ -1544,25 +1701,22 @@ void r300SetupVertexProgram(r300ContextPtr rmesa)
struct r300_vertex_program *prog = rmesa->selected_vp;
int inst_count = 0;
int param_count = 0;
/* Reset state, in case we don't use something */
((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
R300_STATECHANGE(rmesa, vpp);
param_count = r300VertexProgUpdateParams(ctx,
(struct r300_vertex_program_cont *)
ctx->VertexProgram._Current,
(float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
param_count = r300VertexProgUpdateParams(ctx, prog->Base, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
param_count /= 4;
r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code));
inst_count = (prog->hw_code.length / 4) - 1;
r300VapCntl(rmesa, _mesa_bitcount(prog->key.InputsRead),
_mesa_bitcount(prog->key.OutputsWritten), prog->num_temporaries);
r300VapCntl(rmesa, _mesa_bitcount(prog->Base->Base.InputsRead),
_mesa_bitcount(prog->Base->Base.OutputsWritten), prog->num_temporaries);
R300_STATECHANGE(rmesa, pvs);
rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
@@ -34,4 +34,8 @@
void r300SetupVertexProgram(r300ContextPtr rmesa);
struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx);
void r300TranslateVertexShader(struct r300_vertex_program *vp);
#endif
+53 -41
View File
@@ -46,6 +46,7 @@ static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint fil
switch(file) {
case PROGRAM_TEMPORARY: return &s->Temps[index];
case PROGRAM_OUTPUT: return &s->Outputs[index];
case PROGRAM_ADDRESS: return &s->Address;
default: return 0;
}
}
@@ -56,7 +57,7 @@ static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint fil
*
* @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles.
*/
static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
{
struct prog_src_register tmp = srcreg;
int i;
@@ -114,47 +115,19 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
deswz_source = sourced;
}
struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
struct register_state *regstate;
if (inst->SrcReg[src].RelAddr)
regstate = get_reg_state(s, PROGRAM_ADDRESS, 0);
else
regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
if (regstate)
regstate->Sourced |= deswz_source & 0xf;
return inst;
}
static void rewrite_depth_out(struct prog_instruction *inst)
{
if (inst->DstReg.WriteMask & WRITEMASK_Z) {
inst->DstReg.WriteMask = WRITEMASK_W;
} else {
inst->DstReg.WriteMask = 0;
return;
}
switch (inst->Opcode) {
case OPCODE_FRC:
case OPCODE_MOV:
inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
break;
case OPCODE_ADD:
case OPCODE_MAX:
case OPCODE_MIN:
case OPCODE_MUL:
inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
break;
case OPCODE_CMP:
case OPCODE_MAD:
inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
break;
default:
// Scalar instructions needn't be reswizzled
break;
}
}
static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex)
{
int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
@@ -189,11 +162,6 @@ static void process_instruction(struct nqssadce_state* s)
return;
if (inst->Opcode != OPCODE_KIL) {
if (s->Descr->RewriteDepthOut) {
if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPTH)
rewrite_depth_out(inst);
}
struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
if (!regstate) {
_mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n",
@@ -217,6 +185,7 @@ static void process_instruction(struct nqssadce_state* s)
* might change the instruction stream under us, so we have
* to be careful with the inst pointer. */
switch (inst->Opcode) {
case OPCODE_ARL:
case OPCODE_DDX:
case OPCODE_DDY:
case OPCODE_FRC:
@@ -227,6 +196,8 @@ static void process_instruction(struct nqssadce_state* s)
case OPCODE_MAX:
case OPCODE_MIN:
case OPCODE_MUL:
case OPCODE_SGE:
case OPCODE_SLT:
inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
break;
@@ -258,12 +229,51 @@ static void process_instruction(struct nqssadce_state* s)
case OPCODE_TXP:
inst = track_used_srcreg(s, inst, 0, 0xf);
break;
case OPCODE_DST:
inst = track_used_srcreg(s, inst, 0, 0x6);
inst = track_used_srcreg(s, inst, 1, 0xa);
break;
case OPCODE_EXP:
case OPCODE_LOG:
case OPCODE_POW:
inst = track_used_srcreg(s, inst, 0, 0x3);
break;
case OPCODE_LIT:
inst = track_used_srcreg(s, inst, 0, 0xb);
break;
default:
_mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
return;
}
}
static void calculateInputsOutputs(struct gl_program *p)
{
struct prog_instruction *inst;
GLuint InputsRead, OutputsWritten;
inst = p->Instructions;
InputsRead = 0;
OutputsWritten = 0;
while (inst->Opcode != OPCODE_END)
{
int i, num_src_regs;
num_src_regs = _mesa_num_inst_src_regs(inst->Opcode);
for (i = 0; i < num_src_regs; ++i) {
if (inst->SrcReg[i].File == PROGRAM_INPUT)
InputsRead |= 1 << inst->SrcReg[i].Index;
}
if (inst->DstReg.File == PROGRAM_OUTPUT)
OutputsWritten |= 1 << inst->DstReg.Index;
++inst;
}
p->InputsRead = InputsRead;
p->OutputsWritten = OutputsWritten;
}
void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr)
{
@@ -280,4 +290,6 @@ void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce
s.IP--;
process_instruction(&s);
}
calculateInputsOutputs(p);
}
+2 -5
View File
@@ -58,6 +58,7 @@ struct nqssadce_state {
*/
struct register_state Temps[MAX_PROGRAM_TEMPS];
struct register_state Outputs[VERT_RESULT_MAX];
struct register_state Address;
};
@@ -83,14 +84,10 @@ struct radeon_nqssadce_descr {
*/
void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
/**
* Rewrite instructions that write to DEPR.z to write to DEPR.w
* instead (rewriting is done *before* the WriteMask test).
*/
GLboolean RewriteDepthOut;
void *Data;
};
void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr);
struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg);
#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */
@@ -52,6 +52,38 @@ enum {
#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO)
#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE)
static inline GLuint get_swz(GLuint swz, GLuint idx)
{
if (idx & 0x4)
return idx;
return GET_SWZ(swz, idx);
}
static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w)
{
GLuint ret = 0;
ret |= get_swz(src, swz_x);
ret |= get_swz(src, swz_y) << 3;
ret |= get_swz(src, swz_z) << 6;
ret |= get_swz(src, swz_w) << 9;
return ret;
}
static inline GLuint combine_swizzles(GLuint src, GLuint swz)
{
GLuint ret = 0;
ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_X));
ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Y)) << 3;
ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Z)) << 6;
ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_W)) << 9;
return ret;
}
/**
* Transformation context that is passed to local transformations.
*
@@ -870,7 +870,7 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
_mesa_bzero(&s, sizeof(s));
s.Ctx = ctx;
s.Program = program;
s.Program = _mesa_clone_program(ctx, program);
s.Handler = handler;
s.UserData = userdata;
s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE;
@@ -904,6 +904,8 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
_mesa_free(s.ValuePool);
_mesa_free(s.ReaderPool);
_mesa_reference_program(ctx, &s.Program, NULL);
return !s.Error;
}