nv50: record last access to temp and attr regs
We now inspect the TGSI instructions in tx_prep to determine where temps and FP attrs are last accessed. This will enable us to reclaim some temporaries early and we also use it to omit pre-loading FP attributes that aren't used.
This commit is contained in:
committed by
Ben Skeggs
parent
7e7d3a87ec
commit
bcecb8ff66
@@ -85,6 +85,8 @@ struct nv50_reg {
|
||||
|
||||
int hw;
|
||||
int neg;
|
||||
|
||||
int acc; /* instruction where this reg is last read (first insn == 1) */
|
||||
};
|
||||
|
||||
struct nv50_pc {
|
||||
@@ -108,6 +110,10 @@ struct nv50_pc {
|
||||
|
||||
struct nv50_reg *temp_temp[16];
|
||||
unsigned temp_temp_nr;
|
||||
|
||||
/* current instruction and total number of insns */
|
||||
unsigned insn_cur;
|
||||
unsigned insn_nr;
|
||||
};
|
||||
|
||||
static void
|
||||
@@ -1323,6 +1329,112 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Adjust a bitmask that indicates what components of a source are used,
|
||||
* we use this in tx_prep so we only load interpolants that are needed.
|
||||
*/
|
||||
static void
|
||||
insn_adjust_mask(const struct tgsi_full_instruction *insn, unsigned *mask)
|
||||
{
|
||||
const struct tgsi_instruction_ext_texture *tex;
|
||||
|
||||
switch (insn->Instruction.Opcode) {
|
||||
case TGSI_OPCODE_DP3:
|
||||
*mask = 0x7;
|
||||
break;
|
||||
case TGSI_OPCODE_DP4:
|
||||
case TGSI_OPCODE_DPH:
|
||||
*mask = 0xF;
|
||||
break;
|
||||
case TGSI_OPCODE_LIT:
|
||||
*mask = 0xB;
|
||||
break;
|
||||
case TGSI_OPCODE_RCP:
|
||||
case TGSI_OPCODE_RSQ:
|
||||
*mask = 0x1;
|
||||
break;
|
||||
case TGSI_OPCODE_TEX:
|
||||
case TGSI_OPCODE_TXP:
|
||||
assert(insn->Instruction.Extended);
|
||||
tex = &insn->InstructionExtTexture;
|
||||
|
||||
*mask = 0x7;
|
||||
if (tex->Texture == TGSI_TEXTURE_1D)
|
||||
*mask = 0x1;
|
||||
else
|
||||
if (tex->Texture == TGSI_TEXTURE_2D)
|
||||
*mask = 0x3;
|
||||
|
||||
if (insn->Instruction.Opcode == TGSI_OPCODE_TXP)
|
||||
*mask |= 0x8;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
prep_inspect_insn(struct nv50_pc *pc, const union tgsi_full_token *tok,
|
||||
unsigned *r_usage[2])
|
||||
{
|
||||
const struct tgsi_full_instruction *insn;
|
||||
const struct tgsi_full_src_register *src;
|
||||
const struct tgsi_dst_register *dst;
|
||||
|
||||
unsigned i, c, k, n, mask, *acc_p;
|
||||
|
||||
insn = &tok->FullInstruction;
|
||||
dst = &insn->FullDstRegisters[0].DstRegister;
|
||||
mask = dst->WriteMask;
|
||||
|
||||
if (!r_usage[0])
|
||||
r_usage[0] = CALLOC(pc->temp_nr * 4, sizeof(unsigned));
|
||||
if (!r_usage[1])
|
||||
r_usage[1] = CALLOC(pc->attr_nr * 4, sizeof(unsigned));
|
||||
|
||||
if (dst->File == TGSI_FILE_TEMPORARY) {
|
||||
for (c = 0; c < 4; c++) {
|
||||
if (!(mask & (1 << c)))
|
||||
continue;
|
||||
r_usage[0][dst->Index * 4 + c] = pc->insn_nr;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < insn->Instruction.NumSrcRegs; i++) {
|
||||
src = &insn->FullSrcRegisters[i];
|
||||
|
||||
switch (src->SrcRegister.File) {
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
acc_p = r_usage[0];
|
||||
break;
|
||||
case TGSI_FILE_INPUT:
|
||||
acc_p = r_usage[1];
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
||||
insn_adjust_mask(insn, &mask);
|
||||
|
||||
for (c = 0; c < 4; c++) {
|
||||
if (!(mask & (1 << c)))
|
||||
continue;
|
||||
|
||||
k = tgsi_util_get_full_src_register_extswizzle(src, c);
|
||||
switch (k) {
|
||||
case TGSI_EXTSWIZZLE_X:
|
||||
case TGSI_EXTSWIZZLE_Y:
|
||||
case TGSI_EXTSWIZZLE_Z:
|
||||
case TGSI_EXTSWIZZLE_W:
|
||||
n = src->SrcRegister.Index * 4 + k;
|
||||
acc_p[n] = pc->insn_nr;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static boolean
|
||||
nv50_program_tx_prep(struct nv50_pc *pc)
|
||||
{
|
||||
@@ -1330,6 +1442,11 @@ nv50_program_tx_prep(struct nv50_pc *pc)
|
||||
boolean ret = FALSE;
|
||||
unsigned i, c;
|
||||
|
||||
/* track register access for temps and attrs */
|
||||
unsigned *r_usage[2];
|
||||
r_usage[0] = NULL;
|
||||
r_usage[1] = NULL;
|
||||
|
||||
tgsi_parse_init(&p, pc->p->pipe.tokens);
|
||||
while (!tgsi_parse_end_of_tokens(&p)) {
|
||||
const union tgsi_full_token *tok = &p.FullToken;
|
||||
@@ -1382,6 +1499,8 @@ nv50_program_tx_prep(struct nv50_pc *pc)
|
||||
}
|
||||
break;
|
||||
case TGSI_TOKEN_TYPE_INSTRUCTION:
|
||||
pc->insn_nr++;
|
||||
prep_inspect_insn(pc, tok, r_usage);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -1398,6 +1517,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
|
||||
pc->temp[i*4+c].type = P_TEMP;
|
||||
pc->temp[i*4+c].hw = -1;
|
||||
pc->temp[i*4+c].index = i;
|
||||
pc->temp[i*4+c].acc = r_usage[0][i*4+c];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1427,6 +1547,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
|
||||
pc->attr[i*4+c].type = at->type;
|
||||
pc->attr[i*4+c].hw = at->hw;
|
||||
pc->attr[i*4+c].index = at->index;
|
||||
pc->attr[i*4+c].acc = r_usage[1][i*4+c];
|
||||
} else {
|
||||
pc->p->cfg.vp.attr[aid/32] |=
|
||||
(1 << (aid % 32));
|
||||
@@ -1504,6 +1625,11 @@ nv50_program_tx_prep(struct nv50_pc *pc)
|
||||
|
||||
ret = TRUE;
|
||||
out_err:
|
||||
if (r_usage[0])
|
||||
FREE(r_usage[0]);
|
||||
if (r_usage[1])
|
||||
FREE(r_usage[1]);
|
||||
|
||||
tgsi_parse_free(&p);
|
||||
return ret;
|
||||
}
|
||||
@@ -1558,6 +1684,7 @@ nv50_program_tx(struct nv50_program *p)
|
||||
|
||||
switch (tok->Token.Type) {
|
||||
case TGSI_TOKEN_TYPE_INSTRUCTION:
|
||||
++pc->insn_cur;
|
||||
ret = nv50_program_tx_insn(pc, tok);
|
||||
if (ret == FALSE)
|
||||
goto out_err;
|
||||
|
||||
Reference in New Issue
Block a user