diff --git a/src/gallium/drivers/r300/compiler/radeon_code.c b/src/gallium/drivers/r300/compiler/radeon_code.c index 1c7e4800d82..8f45845634a 100644 --- a/src/gallium/drivers/r300/compiler/radeon_code.c +++ b/src/gallium/drivers/r300/compiler/radeon_code.c @@ -155,15 +155,43 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da return rc_constants_add(c, &constant); } -void rc_constants_print(struct rc_constant_list * c) +static char swizzle_char(unsigned swz) { - unsigned int i; - for(i = 0; i < c->Count; i++) { + switch (swz) { + case RC_SWIZZLE_X: + return 'x'; + case RC_SWIZZLE_Y: + return 'y'; + case RC_SWIZZLE_Z: + return 'z'; + case RC_SWIZZLE_W: + return 'w'; + default: + return 'u'; + } +} + +void rc_constants_print(struct rc_constant_list *c, struct const_remap *r) +{ + for (unsigned i = 0; i < c->Count; i++) { if (c->Constants[i].Type == RC_CONSTANT_IMMEDIATE) { - float * values = c->Constants[i].u.Immediate; - fprintf(stderr, "CONST[%u] = " - "{ %10.4f %10.4f %10.4f %10.4f }\n", - i, values[0],values[1], values[2], values[3]); + float *values = c->Constants[i].u.Immediate; + fprintf(stderr, "CONST[%u] = {", i); + for (unsigned chan = 0; chan < 4; chan++) { + if (c->Constants[i].UseMask & 1 << chan) + fprintf(stderr, "%11.6f ", values[chan]); + else + fprintf(stderr, " unused "); + } + fprintf(stderr, "}\n"); + } + if (r && c->Constants[i].Type == RC_CONSTANT_EXTERNAL) { + fprintf(stderr, "CONST[%u] = {", i); + for (unsigned chan = 0; chan < 4; chan++) { + fprintf(stderr, "CONST[%i].%c ", r[i].index[chan], + swizzle_char(r[i].swizzle[chan])); + } + fprintf(stderr, " }\n"); } } } diff --git a/src/gallium/drivers/r300/compiler/radeon_code.h b/src/gallium/drivers/r300/compiler/radeon_code.h index 0a56bda7a72..e7bdd0242a7 100644 --- a/src/gallium/drivers/r300/compiler/radeon_code.h +++ b/src/gallium/drivers/r300/compiler/radeon_code.h @@ -74,6 +74,11 @@ struct rc_constant_list { unsigned _Reserved; }; +struct const_remap { + int index[4]; + uint8_t swizzle[4]; +}; + void rc_constants_init(struct rc_constant_list * c); void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src); void rc_constants_destroy(struct rc_constant_list * c); @@ -81,7 +86,7 @@ unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * cons unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2); unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data); unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle); -void rc_constants_print(struct rc_constant_list * c); +void rc_constants_print(struct rc_constant_list *c, struct const_remap *r); /** * Compare functions. @@ -226,7 +231,7 @@ struct rX00_fragment_program_code { unsigned writes_depth:1; struct rc_constant_list constants; - unsigned *constants_remap_table; + struct const_remap *constants_remap_table; }; @@ -257,7 +262,7 @@ struct r300_vertex_program_code { unsigned last_pos_write; struct rc_constant_list constants; - unsigned *constants_remap_table; + struct const_remap *constants_remap_table; uint32_t InputsRead; uint32_t OutputsWritten; diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c index 76cbbf43306..c35813ba6ac 100644 --- a/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c @@ -566,5 +566,5 @@ void rc_dataflow_swizzles(struct radeon_compiler * c, void *user) } } if (c->Debug & RC_DBG_LOG) - rc_constants_print(&c->Program.Constants); + rc_constants_print(&c->Program.Constants, NULL); } diff --git a/src/gallium/drivers/r300/compiler/radeon_remove_constants.c b/src/gallium/drivers/r300/compiler/radeon_remove_constants.c index b9f418d0d15..b735afe6877 100644 --- a/src/gallium/drivers/r300/compiler/radeon_remove_constants.c +++ b/src/gallium/drivers/r300/compiler/radeon_remove_constants.c @@ -4,126 +4,171 @@ */ #include +#include #include "radeon_remove_constants.h" #include "radeon_dataflow.h" +#include "util/bitscan.h" -struct mark_used_data { - unsigned char * const_used; - unsigned * has_rel_addr; +struct const_remap_state { + /* Used when emiting shaders constants. */ + struct const_remap *remap_table; + /* Used when rewritign registers */ + struct const_remap *inv_remap_table; + /* Old costant layout. */ + struct rc_constant *constants; + /* New constant layout. */ + struct rc_constant_list new_constants; + bool has_rel_addr; + bool are_externals_remapped; + bool is_identity; }; -static void remap_regs(void * userdata, struct rc_instruction * inst, - rc_register_file * pfile, unsigned int * pindex) +static void remap_regs(struct rc_instruction *inst, + struct const_remap *inv_remap_table) { - unsigned *inv_remap_table = userdata; - - if (*pfile == RC_FILE_CONSTANT) { - *pindex = inv_remap_table[*pindex]; + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + for(unsigned src = 0; src < opcode->NumSrcRegs; ++src) { + if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT) + continue; + unsigned old_index = inst->U.I.SrcReg[src].Index; + for (unsigned chan = 0; chan < 4; chan++) { + unsigned old_swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); + if (old_swz <= RC_SWIZZLE_W) { + inst->U.I.SrcReg[src].Index = inv_remap_table[old_index].index[old_swz]; + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, + inv_remap_table[old_index].swizzle[old_swz]); + } + } } } static void mark_used(void * userdata, struct rc_instruction * inst, struct rc_src_register * src) { - struct mark_used_data * d = userdata; + struct const_remap_state* d = userdata; if (src->File == RC_FILE_CONSTANT) { if (src->RelAddr) { - *d->has_rel_addr = 1; + d->has_rel_addr = true; } else { - d->const_used[src->Index] = 1; + for (unsigned chan = 0; chan < 4; chan++) { + char swz = GET_SWZ(src->Swizzle, chan); + if (swz > RC_SWIZZLE_W) + continue; + d->constants[src->Index].UseMask |= 1 << swz; + } + } + } +} + +static void place_constant_in_free_slot(struct const_remap_state *s, unsigned i) +{ + unsigned count = s->new_constants.Count; + for (unsigned chan = 0; chan < 4; chan++) { + s->inv_remap_table[i].index[chan] = count; + s->inv_remap_table[i].swizzle[chan] = chan; + if (s->constants[i].UseMask & (1 << chan)) { + s->remap_table[count].index[chan] = i; + s->remap_table[count].swizzle[chan] = chan; + } + } + s->new_constants.Constants[count] = s->constants[i]; + + if (count != i) { + if (s->constants[i].Type == RC_CONSTANT_EXTERNAL) + s->are_externals_remapped = true; + s->is_identity = false; + } + s->new_constants.Count++; +} + +static void init_constant_remap_state(struct radeon_compiler *c, struct const_remap_state *s) +{ + s->is_identity = true; + s->new_constants.Constants = + malloc(sizeof(struct rc_constant) * c->Program.Constants.Count); + s->new_constants._Reserved = c->Program.Constants.Count; + s->constants = c->Program.Constants.Constants; + + /* Initialize the remap tables. */ + s->remap_table = malloc(c->Program.Constants.Count * sizeof(struct const_remap)); + s->inv_remap_table = + malloc(c->Program.Constants.Count * sizeof(struct const_remap)); + for (unsigned i = 0; i < c->Program.Constants.Count; i++) { + /* Clear the UseMask, we will update it later. */ + s->constants[i].UseMask = 0; + for (unsigned swz = 0; swz < 4; swz++) { + s->remap_table[i].index[swz] = -1; + s->remap_table[i].swizzle[swz] = RC_SWIZZLE_UNUSED; } } } void rc_remove_unused_constants(struct radeon_compiler *c, void *user) { - unsigned **out_remap_table = (unsigned**)user; - unsigned char *const_used; - unsigned *remap_table; - unsigned *inv_remap_table; - unsigned has_rel_addr = 0; - unsigned is_identity = 1; - unsigned are_externals_remapped = 0; + struct const_remap **out_remap_table = (struct const_remap **)user; struct rc_constant *constants = c->Program.Constants.Constants; - struct mark_used_data d; - unsigned new_count; + struct const_remap_state remap_state = {}; + struct const_remap_state *s = &remap_state; if (!c->Program.Constants.Count) { *out_remap_table = NULL; return; } - const_used = malloc(c->Program.Constants.Count); - memset(const_used, 0, c->Program.Constants.Count); - - d.const_used = const_used; - d.has_rel_addr = &has_rel_addr; + init_constant_remap_state(c, s); /* Pass 1: Mark used constants. */ for (struct rc_instruction *inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - rc_for_all_reads_src(inst, mark_used, &d); + rc_for_all_reads_src(inst, mark_used, s); } /* Pass 2: If there is relative addressing or dead constant elimination * is disabled, mark all externals as used. */ - if (has_rel_addr || !c->remove_unused_constants) { + if (s->has_rel_addr || !c->remove_unused_constants) { for (unsigned i = 0; i < c->Program.Constants.Count; i++) if (constants[i].Type == RC_CONSTANT_EXTERNAL) - const_used[i] = 1; + s->constants[i].UseMask = RC_MASK_XYZW; } + /* Pass 3: Make the remapping table and remap constants. * This pass removes unused constants simply by overwriting them by other constants. */ - remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); - inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); - new_count = 0; - for (unsigned i = 0; i < c->Program.Constants.Count; i++) { - if (const_used[i]) { - remap_table[new_count] = i; - inv_remap_table[i] = new_count; - - if (i != new_count) { - if (constants[i].Type == RC_CONSTANT_EXTERNAL) - are_externals_remapped = 1; - - constants[new_count] = constants[i]; - is_identity = 0; - } - new_count++; + if (s->constants[i].UseMask) { + place_constant_in_free_slot(s, i); } } /* is_identity ==> new_count == old_count * !is_identity ==> new_count < old_count */ - assert( is_identity || new_count < c->Program.Constants.Count); - assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped)); + assert(s->is_identity || s->new_constants.Count < c->Program.Constants.Count); + assert(!((s->has_rel_addr || !c->remove_unused_constants) && s->are_externals_remapped)); /* Pass 4: Redirect reads of all constants to their new locations. */ - if (!is_identity) { + if (!s->is_identity) { for (struct rc_instruction *inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - rc_remap_registers(inst, remap_regs, inv_remap_table); + remap_regs(inst, s->inv_remap_table); } } /* Set the new constant count. Note that new_count may be less than * Count even though the remapping function is identity. In that case, * the constants have been removed at the end of the array. */ - c->Program.Constants.Count = new_count; + rc_constants_destroy(&c->Program.Constants); + c->Program.Constants = s->new_constants; - if (are_externals_remapped) { - *out_remap_table = remap_table; + if (s->are_externals_remapped) { + *out_remap_table = s->remap_table; } else { *out_remap_table = NULL; - free(remap_table); + free(s->remap_table); } - free(const_used); - free(inv_remap_table); + free(s->inv_remap_table); if (c->Debug & RC_DBG_LOG) - rc_constants_print(&c->Program.Constants); + rc_constants_print(&c->Program.Constants, s->remap_table); } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index bd1be07ef1d..c358663a342 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -19,6 +19,7 @@ #include "r300_defines.h" #include "r300_screen.h" #include "compiler/radeon_regalloc.h" +#include "compiler/radeon_code.h" struct u_upload_mgr; struct r300_context; @@ -253,7 +254,7 @@ struct r300_constant_buffer { /* Buffer of constants */ uint32_t *ptr; /* Remapping table. */ - unsigned *remap_table; + struct const_remap *remap_table; /* const buffer base */ uint32_t buffer_base; }; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 74f967dbb75..f2a40b0108e 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -212,9 +212,15 @@ void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count * 4); if (buf->remap_table){ for (i = 0; i < count; i++) { - float *data = (float*)&buf->ptr[buf->remap_table[i]*4]; - for (j = 0; j < 4; j++) - OUT_CS(pack_float24(data[j])); + for (j = 0; j < 4; j++) { + unsigned swz = buf->remap_table[i].swizzle[j]; + unsigned index = buf->remap_table[i].index[j]; + if (index == -1) + OUT_CS(pack_float24(0.0f)); + else { + OUT_CS(pack_float24(*(float*)&buf->ptr[index * 4 + swz])); + } + } } } else { for (i = 0; i < count; i++) @@ -277,7 +283,11 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count * 4); if (buf->remap_table){ for (unsigned i = 0; i < count; i++) { - uint32_t *data = &buf->ptr[buf->remap_table[i]*4]; + uint32_t data[4] = {}; + for (unsigned chan = 0; chan < 4; chan++){ + if (buf->remap_table[i].swizzle[chan] != RC_SWIZZLE_UNUSED) + data[chan] = buf->ptr[buf->remap_table[i].index[chan] * 4 + buf->remap_table[i].swizzle[chan]]; + } OUT_CS_TABLE(data, 4); } } else { @@ -1162,9 +1172,14 @@ void r300_emit_vs_constants(struct r300_context* r300, R500_PVS_CONST_START : R300_PVS_CONST_START) + buf->buffer_base); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4); if (buf->remap_table){ + uint32_t *data = buf->ptr; for (i = 0; i < count; i++) { - uint32_t *data = &buf->ptr[buf->remap_table[i]*4]; - OUT_CS_TABLE(data, 4); + uint32_t constant[4]; + for (unsigned chan = 0; chan < 4; chan++) { + constant[chan] = data[buf->remap_table[i].index[chan] * 4 + + buf->remap_table[i].swizzle[chan]]; + } + OUT_CS_TABLE(constant, 4); } } else { OUT_CS_TABLE(buf->ptr, count * 4);