r300: switch to a new constant remap table format

Instead of just moving around constants as full vec4, we will now have
the flexibility to shuffle scalars around. However, this commit just
prepares the infrestructure and converts to it, while the constant
elimination logiic reamins the same, i.e., we only remove constant if it
is fully unused and there is no constant compaction whatsoever.

Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com>
Reviewed-by: Filip Gawin <filip.gawin@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28630>
This commit is contained in:
Pavel Ondračka
2024-03-28 21:26:49 +01:00
parent 71761e2117
commit 5d3483bfe4
6 changed files with 168 additions and 74 deletions
@@ -155,15 +155,43 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da
return rc_constants_add(c, &constant);
}
void rc_constants_print(struct rc_constant_list * c)
static char swizzle_char(unsigned swz)
{
unsigned int i;
for(i = 0; i < c->Count; i++) {
switch (swz) {
case RC_SWIZZLE_X:
return 'x';
case RC_SWIZZLE_Y:
return 'y';
case RC_SWIZZLE_Z:
return 'z';
case RC_SWIZZLE_W:
return 'w';
default:
return 'u';
}
}
void rc_constants_print(struct rc_constant_list *c, struct const_remap *r)
{
for (unsigned i = 0; i < c->Count; i++) {
if (c->Constants[i].Type == RC_CONSTANT_IMMEDIATE) {
float * values = c->Constants[i].u.Immediate;
fprintf(stderr, "CONST[%u] = "
"{ %10.4f %10.4f %10.4f %10.4f }\n",
i, values[0],values[1], values[2], values[3]);
float *values = c->Constants[i].u.Immediate;
fprintf(stderr, "CONST[%u] = {", i);
for (unsigned chan = 0; chan < 4; chan++) {
if (c->Constants[i].UseMask & 1 << chan)
fprintf(stderr, "%11.6f ", values[chan]);
else
fprintf(stderr, " unused ");
}
fprintf(stderr, "}\n");
}
if (r && c->Constants[i].Type == RC_CONSTANT_EXTERNAL) {
fprintf(stderr, "CONST[%u] = {", i);
for (unsigned chan = 0; chan < 4; chan++) {
fprintf(stderr, "CONST[%i].%c ", r[i].index[chan],
swizzle_char(r[i].swizzle[chan]));
}
fprintf(stderr, " }\n");
}
}
}
@@ -74,6 +74,11 @@ struct rc_constant_list {
unsigned _Reserved;
};
struct const_remap {
int index[4];
uint8_t swizzle[4];
};
void rc_constants_init(struct rc_constant_list * c);
void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
void rc_constants_destroy(struct rc_constant_list * c);
@@ -81,7 +86,7 @@ unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * cons
unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
void rc_constants_print(struct rc_constant_list * c);
void rc_constants_print(struct rc_constant_list *c, struct const_remap *r);
/**
* Compare functions.
@@ -226,7 +231,7 @@ struct rX00_fragment_program_code {
unsigned writes_depth:1;
struct rc_constant_list constants;
unsigned *constants_remap_table;
struct const_remap *constants_remap_table;
};
@@ -257,7 +262,7 @@ struct r300_vertex_program_code {
unsigned last_pos_write;
struct rc_constant_list constants;
unsigned *constants_remap_table;
struct const_remap *constants_remap_table;
uint32_t InputsRead;
uint32_t OutputsWritten;
@@ -566,5 +566,5 @@ void rc_dataflow_swizzles(struct radeon_compiler * c, void *user)
}
}
if (c->Debug & RC_DBG_LOG)
rc_constants_print(&c->Program.Constants);
rc_constants_print(&c->Program.Constants, NULL);
}
@@ -4,126 +4,171 @@
*/
#include <stdlib.h>
#include <stdbool.h>
#include "radeon_remove_constants.h"
#include "radeon_dataflow.h"
#include "util/bitscan.h"
struct mark_used_data {
unsigned char * const_used;
unsigned * has_rel_addr;
struct const_remap_state {
/* Used when emiting shaders constants. */
struct const_remap *remap_table;
/* Used when rewritign registers */
struct const_remap *inv_remap_table;
/* Old costant layout. */
struct rc_constant *constants;
/* New constant layout. */
struct rc_constant_list new_constants;
bool has_rel_addr;
bool are_externals_remapped;
bool is_identity;
};
static void remap_regs(void * userdata, struct rc_instruction * inst,
rc_register_file * pfile, unsigned int * pindex)
static void remap_regs(struct rc_instruction *inst,
struct const_remap *inv_remap_table)
{
unsigned *inv_remap_table = userdata;
if (*pfile == RC_FILE_CONSTANT) {
*pindex = inv_remap_table[*pindex];
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
for(unsigned src = 0; src < opcode->NumSrcRegs; ++src) {
if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT)
continue;
unsigned old_index = inst->U.I.SrcReg[src].Index;
for (unsigned chan = 0; chan < 4; chan++) {
unsigned old_swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
if (old_swz <= RC_SWIZZLE_W) {
inst->U.I.SrcReg[src].Index = inv_remap_table[old_index].index[old_swz];
SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
inv_remap_table[old_index].swizzle[old_swz]);
}
}
}
}
static void mark_used(void * userdata, struct rc_instruction * inst,
struct rc_src_register * src)
{
struct mark_used_data * d = userdata;
struct const_remap_state* d = userdata;
if (src->File == RC_FILE_CONSTANT) {
if (src->RelAddr) {
*d->has_rel_addr = 1;
d->has_rel_addr = true;
} else {
d->const_used[src->Index] = 1;
for (unsigned chan = 0; chan < 4; chan++) {
char swz = GET_SWZ(src->Swizzle, chan);
if (swz > RC_SWIZZLE_W)
continue;
d->constants[src->Index].UseMask |= 1 << swz;
}
}
}
}
static void place_constant_in_free_slot(struct const_remap_state *s, unsigned i)
{
unsigned count = s->new_constants.Count;
for (unsigned chan = 0; chan < 4; chan++) {
s->inv_remap_table[i].index[chan] = count;
s->inv_remap_table[i].swizzle[chan] = chan;
if (s->constants[i].UseMask & (1 << chan)) {
s->remap_table[count].index[chan] = i;
s->remap_table[count].swizzle[chan] = chan;
}
}
s->new_constants.Constants[count] = s->constants[i];
if (count != i) {
if (s->constants[i].Type == RC_CONSTANT_EXTERNAL)
s->are_externals_remapped = true;
s->is_identity = false;
}
s->new_constants.Count++;
}
static void init_constant_remap_state(struct radeon_compiler *c, struct const_remap_state *s)
{
s->is_identity = true;
s->new_constants.Constants =
malloc(sizeof(struct rc_constant) * c->Program.Constants.Count);
s->new_constants._Reserved = c->Program.Constants.Count;
s->constants = c->Program.Constants.Constants;
/* Initialize the remap tables. */
s->remap_table = malloc(c->Program.Constants.Count * sizeof(struct const_remap));
s->inv_remap_table =
malloc(c->Program.Constants.Count * sizeof(struct const_remap));
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
/* Clear the UseMask, we will update it later. */
s->constants[i].UseMask = 0;
for (unsigned swz = 0; swz < 4; swz++) {
s->remap_table[i].index[swz] = -1;
s->remap_table[i].swizzle[swz] = RC_SWIZZLE_UNUSED;
}
}
}
void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
{
unsigned **out_remap_table = (unsigned**)user;
unsigned char *const_used;
unsigned *remap_table;
unsigned *inv_remap_table;
unsigned has_rel_addr = 0;
unsigned is_identity = 1;
unsigned are_externals_remapped = 0;
struct const_remap **out_remap_table = (struct const_remap **)user;
struct rc_constant *constants = c->Program.Constants.Constants;
struct mark_used_data d;
unsigned new_count;
struct const_remap_state remap_state = {};
struct const_remap_state *s = &remap_state;
if (!c->Program.Constants.Count) {
*out_remap_table = NULL;
return;
}
const_used = malloc(c->Program.Constants.Count);
memset(const_used, 0, c->Program.Constants.Count);
d.const_used = const_used;
d.has_rel_addr = &has_rel_addr;
init_constant_remap_state(c, s);
/* Pass 1: Mark used constants. */
for (struct rc_instruction *inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next) {
rc_for_all_reads_src(inst, mark_used, &d);
rc_for_all_reads_src(inst, mark_used, s);
}
/* Pass 2: If there is relative addressing or dead constant elimination
* is disabled, mark all externals as used. */
if (has_rel_addr || !c->remove_unused_constants) {
if (s->has_rel_addr || !c->remove_unused_constants) {
for (unsigned i = 0; i < c->Program.Constants.Count; i++)
if (constants[i].Type == RC_CONSTANT_EXTERNAL)
const_used[i] = 1;
s->constants[i].UseMask = RC_MASK_XYZW;
}
/* Pass 3: Make the remapping table and remap constants.
* This pass removes unused constants simply by overwriting them by other constants. */
remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
new_count = 0;
for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
if (const_used[i]) {
remap_table[new_count] = i;
inv_remap_table[i] = new_count;
if (i != new_count) {
if (constants[i].Type == RC_CONSTANT_EXTERNAL)
are_externals_remapped = 1;
constants[new_count] = constants[i];
is_identity = 0;
}
new_count++;
if (s->constants[i].UseMask) {
place_constant_in_free_slot(s, i);
}
}
/* is_identity ==> new_count == old_count
* !is_identity ==> new_count < old_count */
assert( is_identity || new_count < c->Program.Constants.Count);
assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped));
assert(s->is_identity || s->new_constants.Count < c->Program.Constants.Count);
assert(!((s->has_rel_addr || !c->remove_unused_constants) && s->are_externals_remapped));
/* Pass 4: Redirect reads of all constants to their new locations. */
if (!is_identity) {
if (!s->is_identity) {
for (struct rc_instruction *inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next) {
rc_remap_registers(inst, remap_regs, inv_remap_table);
remap_regs(inst, s->inv_remap_table);
}
}
/* Set the new constant count. Note that new_count may be less than
* Count even though the remapping function is identity. In that case,
* the constants have been removed at the end of the array. */
c->Program.Constants.Count = new_count;
rc_constants_destroy(&c->Program.Constants);
c->Program.Constants = s->new_constants;
if (are_externals_remapped) {
*out_remap_table = remap_table;
if (s->are_externals_remapped) {
*out_remap_table = s->remap_table;
} else {
*out_remap_table = NULL;
free(remap_table);
free(s->remap_table);
}
free(const_used);
free(inv_remap_table);
free(s->inv_remap_table);
if (c->Debug & RC_DBG_LOG)
rc_constants_print(&c->Program.Constants);
rc_constants_print(&c->Program.Constants, s->remap_table);
}
+2 -1
View File
@@ -19,6 +19,7 @@
#include "r300_defines.h"
#include "r300_screen.h"
#include "compiler/radeon_regalloc.h"
#include "compiler/radeon_code.h"
struct u_upload_mgr;
struct r300_context;
@@ -253,7 +254,7 @@ struct r300_constant_buffer {
/* Buffer of constants */
uint32_t *ptr;
/* Remapping table. */
unsigned *remap_table;
struct const_remap *remap_table;
/* const buffer base */
uint32_t buffer_base;
};
+21 -6
View File
@@ -212,9 +212,15 @@ void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat
OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count * 4);
if (buf->remap_table){
for (i = 0; i < count; i++) {
float *data = (float*)&buf->ptr[buf->remap_table[i]*4];
for (j = 0; j < 4; j++)
OUT_CS(pack_float24(data[j]));
for (j = 0; j < 4; j++) {
unsigned swz = buf->remap_table[i].swizzle[j];
unsigned index = buf->remap_table[i].index[j];
if (index == -1)
OUT_CS(pack_float24(0.0f));
else {
OUT_CS(pack_float24(*(float*)&buf->ptr[index * 4 + swz]));
}
}
}
} else {
for (i = 0; i < count; i++)
@@ -277,7 +283,11 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat
OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count * 4);
if (buf->remap_table){
for (unsigned i = 0; i < count; i++) {
uint32_t *data = &buf->ptr[buf->remap_table[i]*4];
uint32_t data[4] = {};
for (unsigned chan = 0; chan < 4; chan++){
if (buf->remap_table[i].swizzle[chan] != RC_SWIZZLE_UNUSED)
data[chan] = buf->ptr[buf->remap_table[i].index[chan] * 4 + buf->remap_table[i].swizzle[chan]];
}
OUT_CS_TABLE(data, 4);
}
} else {
@@ -1162,9 +1172,14 @@ void r300_emit_vs_constants(struct r300_context* r300,
R500_PVS_CONST_START : R300_PVS_CONST_START) + buf->buffer_base);
OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4);
if (buf->remap_table){
uint32_t *data = buf->ptr;
for (i = 0; i < count; i++) {
uint32_t *data = &buf->ptr[buf->remap_table[i]*4];
OUT_CS_TABLE(data, 4);
uint32_t constant[4];
for (unsigned chan = 0; chan < 4; chan++) {
constant[chan] = data[buf->remap_table[i].index[chan] * 4 +
buf->remap_table[i].swizzle[chan]];
}
OUT_CS_TABLE(constant, 4);
}
} else {
OUT_CS_TABLE(buf->ptr, count * 4);