nv50: prepare for having multiple functions

At some point we'll want to support real subroutines instead of
just inlining them into the main shader.

Since recursive calls are forbidden, we can just save all used
registers to a fixed local memory region and restore them on a
return, no need for a stack pointer.
This commit is contained in:
Christoph Bumiller
2010-09-07 15:40:34 +02:00
parent 217542a061
commit d91b8865ec
8 changed files with 174 additions and 55 deletions
+38 -16
View File
@@ -304,7 +304,7 @@ nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv)
}
static void
nv_do_print_program(void *priv, struct nv_basic_block *b)
nv_do_print_function(void *priv, struct nv_basic_block *b)
{
struct nv_instruction *i = b->phi;
@@ -323,11 +323,23 @@ nv_do_print_program(void *priv, struct nv_basic_block *b)
}
void
nv_print_program(struct nv_basic_block *root)
nv_print_function(struct nv_basic_block *root)
{
nv_pc_pass_in_order(root, nv_do_print_program, root);
if (root->subroutine)
debug_printf("SUBROUTINE %i\n", root->subroutine);
else
debug_printf("MAIN\n");
debug_printf("END\n\n");
nv_pc_pass_in_order(root, nv_do_print_function, root);
}
void
nv_print_program(struct nv_pc *pc)
{
int i;
for (i = 0; i < pc->num_subroutines + 1; ++i)
if (pc->root[i])
nv_print_function(pc->root[i]);
}
static INLINE void
@@ -388,11 +400,18 @@ nv50_generate_code(struct nv50_translation_info *ti)
if (!pc)
return 1;
pc->root = CALLOC(ti->subr_nr + 1, sizeof(pc->root[0]));
if (!pc->root) {
FREE(pc);
return 1;
}
pc->num_subroutines = ti->subr_nr;
ret = nv50_tgsi_to_nc(pc, ti);
if (ret)
goto out;
#ifdef NV50PC_DEBUG
nv_print_program(pc->root);
nv_print_program(pc);
#endif
/* optimization */
@@ -400,7 +419,7 @@ nv50_generate_code(struct nv50_translation_info *ti)
if (ret)
goto out;
#ifdef NV50PC_DEBUG
nv_print_program(pc->root);
nv_print_program(pc);
#endif
/* register allocation */
@@ -408,7 +427,7 @@ nv50_generate_code(struct nv50_translation_info *ti)
if (ret)
goto out;
#ifdef NV50PC_DEBUG
nv_print_program(pc->root);
nv_print_program(pc);
#endif
/* prepare for emission */
@@ -441,16 +460,19 @@ nv50_generate_code(struct nv50_translation_info *ti)
out:
nv_pc_free_refs(pc);
if (ret) {
if (pc->emit)
free(pc->emit);
if (pc->immd_buf)
free(pc->immd_buf);
if (pc->fixups)
free(pc->fixups);
}
free(pc);
if (pc->bb_list)
FREE(pc->bb_list);
if (ret) { /* on success, these will be referenced by nv50_program */
if (pc->emit)
FREE(pc->emit);
if (pc->immd_buf)
FREE(pc->immd_buf);
if (pc->fixups)
FREE(pc->fixups);
}
FREE(pc);
return ret;
}
+7 -5
View File
@@ -282,7 +282,7 @@ struct nv_basic_block {
ubyte in_kind[8];
int id;
struct nv_basic_block *last_visitor;
int subroutine;
uint priv;
uint pass_seq;
@@ -314,10 +314,10 @@ nv_fixup_apply(uint32_t *bin, struct nv_fixup *fixup, uint32_t data)
bin[fixup->offset / 4] = val;
}
struct nv_pc {
struct nv50_translation_info *ti;
struct nv50_translation_info;
struct nv_basic_block *root;
struct nv_pc {
struct nv_basic_block **root;
struct nv_basic_block *current_block;
struct nv_basic_block *parent_block;
@@ -332,6 +332,7 @@ struct nv_pc {
int num_instructions;
int num_refs;
int num_blocks;
int num_subroutines;
int max_reg[4];
@@ -463,7 +464,8 @@ void nv_print_instruction(struct nv_instruction *);
/* nv50_pc.c */
void nv_print_program(struct nv_basic_block *b);
void nv_print_function(struct nv_basic_block *root);
void nv_print_program(struct nv_pc *);
boolean nv_op_commutative(uint opcode);
int nv50_indirect_opnd(struct nv_instruction *);
+40 -16
View File
@@ -213,23 +213,36 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
pc->bin_size += b->bin_size *= 4;
}
int
nv_pc_exec_pass2(struct nv_pc *pc)
static int
nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
{
struct nv_pass pass;
pass.pc = pc;
pc->pass_seq++;
nv_pass_flatten(&pass, pc->root);
nv_pass_flatten(&pass, root);
nv_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
return 0;
}
int
nv_pc_exec_pass2(struct nv_pc *pc)
{
int i, ret;
NV50_DBGMSG("preparing %u blocks for emission\n", pc->num_blocks);
pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *));
pc->bb_list = CALLOC(pc->num_blocks, sizeof(pc->bb_list[0]));
pc->num_blocks = 0;
nv_pc_pass_in_order(pc->root, nv_pc_pass_pre_emission, pc);
for (i = 0; i < pc->num_subroutines + 1; ++i)
if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
return ret;
return 0;
}
@@ -1032,8 +1045,8 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
return 0;
}
int
nv_pc_exec_pass0(struct nv_pc *pc)
static int
nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
{
struct nv_pass_reld_elim *reldelim;
struct nv_pass pass;
@@ -1047,35 +1060,35 @@ nv_pc_exec_pass0(struct nv_pc *pc)
* to whether sources are supported memory loads.
*/
pc->pass_seq++;
ret = nv_pass_lower_arith(&pass, pc->root);
ret = nv_pass_lower_arith(&pass, root);
if (ret)
return ret;
pc->pass_seq++;
ret = nv_pass_fold_loads(&pass, pc->root);
ret = nv_pass_fold_loads(&pass, root);
if (ret)
return ret;
pc->pass_seq++;
ret = nv_pass_fold_stores(&pass, pc->root);
ret = nv_pass_fold_stores(&pass, root);
if (ret)
return ret;
reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
reldelim->pc = pc;
pc->pass_seq++;
ret = nv_pass_reload_elim(reldelim, pc->root);
ret = nv_pass_reload_elim(reldelim, root);
FREE(reldelim);
if (ret)
return ret;
pc->pass_seq++;
ret = nv_pass_cse(&pass, pc->root);
ret = nv_pass_cse(&pass, root);
if (ret)
return ret;
pc->pass_seq++;
ret = nv_pass_lower_mods(&pass, pc->root);
ret = nv_pass_lower_mods(&pass, root);
if (ret)
return ret;
@@ -1083,14 +1096,25 @@ nv_pc_exec_pass0(struct nv_pc *pc)
do {
dce.removed = 0;
pc->pass_seq++;
ret = nv_pass_dce(&dce, pc->root);
ret = nv_pass_dce(&dce, root);
if (ret)
return ret;
} while (dce.removed);
ret = nv_pass_tex_mask(&pass, pc->root);
ret = nv_pass_tex_mask(&pass, root);
if (ret)
return ret;
return ret;
}
int
nv_pc_exec_pass0(struct nv_pc *pc)
{
int i, ret;
for (i = 0; i < pc->num_subroutines + 1; ++i)
if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
return ret;
return 0;
}
+17 -6
View File
@@ -874,8 +874,8 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter)
return 0;
}
int
nv_pc_exec_pass1(struct nv_pc *pc)
static int
nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
{
struct nv_pc_pass *ctx;
int i, ret;
@@ -890,12 +890,12 @@ nv_pc_exec_pass1(struct nv_pc *pc)
ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
pc->pass_seq++;
ret = pass_generate_phi_movs(ctx, pc->root);
ret = pass_generate_phi_movs(ctx, root);
assert(!ret);
for (i = 0; i < pc->loop_nesting_bound; ++i) {
pc->pass_seq++;
ret = pass_build_live_sets(ctx, pc->root);
ret = pass_build_live_sets(ctx, root);
assert(!ret && "live sets");
if (ret) {
NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
@@ -904,10 +904,10 @@ nv_pc_exec_pass1(struct nv_pc *pc)
}
pc->pass_seq++;
nv_pc_pass_in_order(pc->root, pass_order_instructions, ctx);
nv_pc_pass_in_order(root, pass_order_instructions, ctx);
pc->pass_seq++;
ret = pass_build_intervals(ctx, pc->root);
ret = pass_build_intervals(ctx, root);
assert(!ret && "build intervals");
if (ret) {
NOUVEAU_ERR("failed to build live intervals\n");
@@ -944,3 +944,14 @@ out:
FREE(ctx);
return ret;
}
int
nv_pc_exec_pass1(struct nv_pc *pc)
{
int i, ret;
for (i = 0; i < pc->num_subroutines + 1; ++i)
if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i])))
return ret;
return 0;
}
+60 -3
View File
@@ -147,10 +147,17 @@ prog_inst(struct nv50_translation_info *ti,
int s, c, k;
unsigned mask;
if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
ti->subr[ti->subr_nr].pos = id - 1;
ti->subr[ti->subr_nr].id = ti->subr_nr + 1; /* id 0 is main program */
++ti->subr_nr;
}
if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
dst = &inst->Dst[0].Register;
for (c = 0; c < 4; ++c) {
dst = &inst->Dst[0].Register;
if (inst->Dst[0].Register.Indirect)
if (dst->Indirect)
nv50_indirect_outputs(ti, id);
if (!(dst->WriteMask & (1 << c)))
continue;
@@ -182,6 +189,44 @@ prog_inst(struct nv50_translation_info *ti,
}
}
/* Probably should introduce something like struct tgsi_function_declaration
* instead of trying to guess inputs/outputs.
*/
static void
prog_subroutine_inst(struct nv50_subroutine *subr,
const struct tgsi_full_instruction *inst)
{
const struct tgsi_dst_register *dst;
const struct tgsi_src_register *src;
int s, c, k;
unsigned mask;
for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
src = &inst->Src[s].Register;
if (src->File != TGSI_FILE_TEMPORARY)
continue;
mask = nv50_tgsi_src_mask(inst, s);
assert(!inst->Src[s].Register.Indirect);
for (c = 0; c < 4; ++c) {
k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W)
if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32))))
subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32);
}
}
if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
dst = &inst->Dst[0].Register;
for (c = 0; c < 4; ++c)
if (dst->WriteMask & (1 << c))
subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32);
}
}
static void
prog_immediate(struct nv50_translation_info *ti,
const struct tgsi_full_immediate *imm)
@@ -482,7 +527,7 @@ nv50_prog_scan(struct nv50_translation_info *ti)
{
struct nv50_program *p = ti->p;
struct tgsi_parse_context parse;
int ret;
int ret, i;
p->vp.edgeflag = 0x40;
p->vp.psiz = 0x40;
@@ -496,6 +541,9 @@ nv50_prog_scan(struct nv50_translation_info *ti)
tgsi_dump(p->pipe.tokens, 0);
#endif
ti->subr =
CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0]));
ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16);
ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte));
@@ -519,6 +567,13 @@ nv50_prog_scan(struct nv50_translation_info *ti)
}
}
/* Scan to determine which registers are inputs/outputs of a subroutine. */
for (i = 0; i < ti->subr_nr; ++i) {
int pc = ti->subr[i].id;
while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB)
prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]);
}
p->in_nr = ti->scan.file_max[TGSI_FILE_INPUT] + 1;
p->out_nr = ti->scan.file_max[TGSI_FILE_OUTPUT] + 1;
@@ -572,6 +627,8 @@ out:
FREE(ti->immd32_ty);
if (ti->insns)
FREE(ti->insns);
if (ti->subr)
FREE(ti->subr);
FREE(ti);
return ret ? FALSE : TRUE;
}
+9 -7
View File
@@ -27,6 +27,8 @@
#include "tgsi/tgsi_scan.h"
#include "nouveau/nouveau_class.h"
#define NV50_CAP_MAX_PROGRAM_TEMPS (128 / 4)
struct nv50_varying {
uint8_t id; /* tgsi index */
uint8_t hw; /* hw index, nv50 wants flat FP inputs last */
@@ -92,13 +94,13 @@ struct nv50_program {
#define NV50_INTERP_FLAT (1 << 1)
#define NV50_INTERP_CENTROID (1 << 2)
#define NV50_PROG_MAX_SUBROUTINES 8
/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */
struct nv50_subroutine {
int id;
uint32_t argv[4][1]; /* 4 bitmasks, for each of xyzw, only allow 32 TEMPs */
uint32_t retv[4][1];
unsigned id;
unsigned pos;
/* function inputs and outputs */
uint32_t argv[NV50_CAP_MAX_PROGRAM_TEMPS][4];
uint32_t retv[NV50_CAP_MAX_PROGRAM_TEMPS][4];
};
struct nv50_translation_info {
@@ -119,8 +121,8 @@ struct nv50_translation_info {
unsigned immd32_nr;
ubyte *immd32_ty;
ubyte edgeflag_out;
struct nv50_subroutine subr[NV50_PROG_MAX_SUBROUTINES];
int subr_nr;
struct nv50_subroutine *subr;
unsigned subr_nr;
};
int nv50_generate_code(struct nv50_translation_info *ti);
+2 -1
View File
@@ -26,6 +26,7 @@
#include "nv50_context.h"
#include "nv50_screen.h"
#include "nv50_resource.h"
#include "nv50_program.h"
#include "nouveau/nouveau_stateobj.h"
@@ -152,7 +153,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 0;
case PIPE_CAP_MAX_VS_TEMPS:
case PIPE_CAP_MAX_FS_TEMPS: /* no spilling atm */
return 128 / 4;
return NV50_CAP_MAX_PROGRAM_TEMPS;
case PIPE_CAP_DEPTH_CLAMP:
return 1;
default:
+1 -1
View File
@@ -1850,7 +1850,7 @@ nv50_tgsi_to_nc(struct nv_pc *pc, struct nv50_translation_info *ti)
struct bld_context *bld = CALLOC_STRUCT(bld_context);
int c;
pc->root = pc->current_block = new_basic_block(pc);
pc->root[0] = pc->current_block = new_basic_block(pc);
bld->pc = pc;
bld->ti = ti;