nv50: hook up to new shader code generator

This commit is contained in:
Christoph Bumiller
2012-04-14 21:42:52 +02:00
parent 322bc7ed68
commit 0bbf1659df
10 changed files with 271 additions and 673 deletions
@@ -13,12 +13,6 @@ C_SOURCES := \
nv50_vbo.c \
nv50_program.c \
nv50_shader_state.c \
nv50_pc.c \
nv50_pc_print.c \
nv50_pc_emit.c \
nv50_tgsi_to_nc.c \
nv50_pc_optimize.c \
nv50_pc_regalloc.c \
nv50_push.c \
nv50_query.c
@@ -1051,6 +1051,7 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
}
info->io.clipDistance = 0xff;
info->io.pointSize = 0xff;
info->io.instanceId = 0xff;
info->io.vertexId = 0xff;
info->io.edgeFlagIn = 0xff;
info->io.edgeFlagOut = 0xff;
@@ -164,6 +164,7 @@ struct nv50_ir_prog_info
uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */
int8_t genUserClip; /* request user clip planes for ClipVertex */
uint8_t pointSize; /* output index for PointSize */
uint8_t instanceId; /* system value index of InstanceID */
uint8_t vertexId; /* system value index of VertexID */
uint8_t edgeFlagIn;
uint8_t edgeFlagOut;
@@ -859,6 +859,9 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
break;
case TGSI_FILE_SYSTEM_VALUE:
switch (sn) {
case TGSI_SEMANTIC_INSTANCEID:
info->io.instanceId = first;
break;
case TGSI_SEMANTIC_VERTEXID:
info->io.vertexId = first;
break;
-5
View File
@@ -162,11 +162,6 @@ void nv50_default_kick_notify(struct nouveau_pushbuf *);
/* nv50_draw.c */
extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
/* nv50_program.c */
boolean nv50_program_translate(struct nv50_program *);
boolean nv50_program_translate_new(struct nv50_program *);
void nv50_program_destroy(struct nv50_context *, struct nv50_program *);
/* nv50_query.c */
void nv50_init_query_functions(struct nv50_context *);
+233 -570
View File
@@ -21,658 +21,321 @@
*/
#include "nv50_program.h"
#include "nv50_pc.h"
#include "nv50_context.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_dump.h"
#include "codegen/nv50_ir_driver.h"
static INLINE unsigned
bitcount4(const uint32_t val)
{
static const unsigned cnt[16]
static const uint8_t cnt[16]
= { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
return cnt[val & 0xf];
}
static unsigned
nv50_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c)
static int
nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
{
unsigned mask = inst->Dst[0].Register.WriteMask;
struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
unsigned i, n, c;
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_COS:
case TGSI_OPCODE_SIN:
return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
case TGSI_OPCODE_DP3:
return 0x7;
case TGSI_OPCODE_DP4:
case TGSI_OPCODE_DPH:
case TGSI_OPCODE_KIL: /* WriteMask ignored */
return 0xf;
case TGSI_OPCODE_DST:
return mask & (c ? 0xa : 0x6);
case TGSI_OPCODE_EX2:
case TGSI_OPCODE_EXP:
case TGSI_OPCODE_LG2:
case TGSI_OPCODE_LOG:
case TGSI_OPCODE_POW:
case TGSI_OPCODE_RCP:
case TGSI_OPCODE_RSQ:
case TGSI_OPCODE_SCS:
return 0x1;
case TGSI_OPCODE_IF:
return 0x1;
case TGSI_OPCODE_LIT:
return 0xb;
case TGSI_OPCODE_TEX:
case TGSI_OPCODE_TXB:
case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXP:
{
const struct tgsi_instruction_texture *tex;
n = 0;
for (i = 0; i < info->numInputs; ++i) {
prog->in[i].id = i;
prog->in[i].sn = info->in[i].sn;
prog->in[i].si = info->in[i].si;
prog->in[i].hw = n;
prog->in[i].mask = info->in[i].mask;
assert(inst->Instruction.Texture);
tex = &inst->Texture;
prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32);
mask = 0x7;
if (inst->Instruction.Opcode != TGSI_OPCODE_TEX &&
inst->Instruction.Opcode != TGSI_OPCODE_TXD)
mask |= 0x8; /* bias, lod or proj */
for (c = 0; c < 4; ++c)
if (info->in[i].mask & (1 << c))
info->in[i].slot[c] = n++;
}
prog->in_nr = info->numInputs;
switch (tex->Texture) {
case TGSI_TEXTURE_1D:
mask &= 0x9;
break;
case TGSI_TEXTURE_SHADOW1D:
mask &= 0x5;
break;
case TGSI_TEXTURE_2D:
mask &= 0xb;
break;
for (i = 0; i < info->numSysVals; ++i) {
switch (info->sv[i].sn) {
case TGSI_SEMANTIC_INSTANCEID:
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
continue;
case TGSI_SEMANTIC_VERTEXID:
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12;
continue;
default:
break;
}
}
return mask;
case TGSI_OPCODE_XPD:
{
unsigned x = 0;
if (mask & 1) x |= 0x6;
if (mask & 2) x |= 0x5;
if (mask & 4) x |= 0x3;
return x;
}
default:
break;
}
/* VertexID before InstanceID */
if (info->io.vertexId < info->numSysVals)
info->sv[info->io.vertexId].slot[0] = n++;
if (info->io.instanceId < info->numSysVals)
info->sv[info->io.instanceId].slot[0] = n++;
return mask;
}
static void
nv50_indirect_inputs(struct nv50_translation_info *ti, int id)
{
int i, c;
for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
for (c = 0; c < 4; ++c)
ti->input_access[i][c] = id;
ti->indirect_inputs = TRUE;
}
static void
nv50_indirect_outputs(struct nv50_translation_info *ti, int id)
{
int i, c;
for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
for (c = 0; c < 4; ++c)
ti->output_access[i][c] = id;
ti->indirect_outputs = TRUE;
}
static void
prog_inst(struct nv50_translation_info *ti,
const struct tgsi_full_instruction *inst, int id)
{
const struct tgsi_dst_register *dst;
const struct tgsi_src_register *src;
int s, c, k;
unsigned mask;
if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
ti->subr[ti->subr_nr].pos = id - 1;
ti->subr[ti->subr_nr].id = ti->subr_nr + 1; /* id 0 is main program */
++ti->subr_nr;
}
if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
dst = &inst->Dst[0].Register;
for (c = 0; c < 4; ++c) {
if (dst->Indirect)
nv50_indirect_outputs(ti, id);
if (!(dst->WriteMask & (1 << c)))
continue;
ti->output_access[dst->Index][c] = id;
}
if (inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
inst->Src[0].Register.File == TGSI_FILE_INPUT &&
dst->Index == ti->edgeflag_out)
ti->p->vp.edgeflag = inst->Src[0].Register.Index;
} else
if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
if (inst->Dst[0].Register.Indirect)
ti->store_to_memory = TRUE;
}
for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
src = &inst->Src[s].Register;
if (src->File == TGSI_FILE_TEMPORARY)
if (inst->Src[s].Register.Indirect)
ti->store_to_memory = TRUE;
if (src->File != TGSI_FILE_INPUT)
continue;
mask = nv50_tgsi_src_mask(inst, s);
if (inst->Src[s].Register.Indirect)
nv50_indirect_inputs(ti, id);
for (c = 0; c < 4; ++c) {
if (!(mask & (1 << c)))
continue;
k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
if (k <= TGSI_SWIZZLE_W)
ti->input_access[src->Index][k] = id;
}
}
}
/* Probably should introduce something like struct tgsi_function_declaration
* instead of trying to guess inputs/outputs.
*/
static void
prog_subroutine_inst(struct nv50_subroutine *subr,
const struct tgsi_full_instruction *inst)
{
const struct tgsi_dst_register *dst;
const struct tgsi_src_register *src;
int s, c, k;
unsigned mask;
for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
src = &inst->Src[s].Register;
if (src->File != TGSI_FILE_TEMPORARY)
continue;
mask = nv50_tgsi_src_mask(inst, s);
assert(!inst->Src[s].Register.Indirect);
for (c = 0; c < 4; ++c) {
k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W)
if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32))))
subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32);
}
}
if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
dst = &inst->Dst[0].Register;
for (c = 0; c < 4; ++c)
if (dst->WriteMask & (1 << c))
subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32);
}
}
static void
prog_immediate(struct nv50_translation_info *ti,
const struct tgsi_full_immediate *imm)
{
int c;
unsigned n = ti->immd32_nr++;
assert(ti->immd32_nr <= ti->scan.immediate_count);
for (c = 0; c < 4; ++c)
ti->immd32[n * 4 + c] = imm->u[c].Uint;
ti->immd32_ty[n] = imm->Immediate.DataType;
}
static INLINE unsigned
translate_interpolate(const struct tgsi_full_declaration *decl)
{
unsigned mode;
if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT)
mode = NV50_INTERP_FLAT;
else
if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
mode = 0;
else
mode = NV50_INTERP_LINEAR;
if (decl->Declaration.Centroid)
mode |= NV50_INTERP_CENTROID;
return mode;
}
static void
prog_decl(struct nv50_translation_info *ti,
const struct tgsi_full_declaration *decl)
{
unsigned i, first, last, sn = 0, si = 0;
first = decl->Range.First;
last = decl->Range.Last;
if (decl->Declaration.Semantic) {
sn = decl->Semantic.Name;
si = decl->Semantic.Index;
}
switch (decl->Declaration.File) {
case TGSI_FILE_INPUT:
for (i = first; i <= last; ++i)
ti->interp_mode[i] = translate_interpolate(decl);
if (!decl->Declaration.Semantic)
break;
for (i = first; i <= last; ++i) {
ti->p->in[i].sn = sn;
ti->p->in[i].si = si;
}
switch (sn) {
case TGSI_SEMANTIC_FACE:
break;
case TGSI_SEMANTIC_COLOR:
if (ti->p->type == PIPE_SHADER_FRAGMENT)
ti->p->vp.bfc[si] = first;
break;
}
break;
case TGSI_FILE_OUTPUT:
if (!decl->Declaration.Semantic)
break;
for (i = first; i <= last; ++i) {
ti->p->out[i].sn = sn;
ti->p->out[i].si = si;
}
switch (sn) {
case TGSI_SEMANTIC_BCOLOR:
ti->p->vp.bfc[si] = first;
break;
n = 0;
for (i = 0; i < info->numOutputs; ++i) {
switch (info->out[i].sn) {
case TGSI_SEMANTIC_PSIZE:
ti->p->vp.psiz = first;
prog->vp.psiz = i;
break;
case TGSI_SEMANTIC_CLIPDIST:
prog->vp.clpd[info->out[i].si] = n;
break;
case TGSI_SEMANTIC_EDGEFLAG:
ti->edgeflag_out = first;
prog->vp.edgeflag = i;
break;
case TGSI_SEMANTIC_BCOLOR:
prog->vp.bfc[info->out[i].si] = i;
break;
default:
break;
}
break;
case TGSI_FILE_SYSTEM_VALUE:
/* For VP/GP inputs, they are put in s[] after the last normal input.
* Let sysval_map reflect the order of the sysvals in s[] and fixup later.
*/
switch (decl->Semantic.Name) {
case TGSI_SEMANTIC_FACE:
break;
case TGSI_SEMANTIC_INSTANCEID:
ti->p->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
ti->sysval_map[first] = 2;
break;
case TGSI_SEMANTIC_PRIMID:
break;
/*
case TGSI_SEMANTIC_PRIMIDIN:
break;
case TGSI_SEMANTIC_VERTEXID:
break;
*/
default:
break;
}
break;
case TGSI_FILE_CONSTANT:
ti->p->parm_size = MAX2(ti->p->parm_size, (last + 1) * 16);
break;
case TGSI_FILE_ADDRESS:
case TGSI_FILE_SAMPLER:
case TGSI_FILE_TEMPORARY:
break;
default:
assert(0);
break;
prog->out[i].id = i;
prog->out[i].sn = info->out[i].sn;
prog->out[i].si = info->out[i].si;
prog->out[i].hw = n;
prog->out[i].mask = info->out[i].mask;
for (c = 0; c < 4; ++c)
if (info->out[i].mask & (1 << c))
info->out[i].slot[c] = n++;
}
}
prog->out_nr = info->numOutputs;
prog->max_out = n;
static int
nv50_vertprog_prepare(struct nv50_translation_info *ti)
{
struct nv50_program *p = ti->p;
int i, c;
unsigned num_inputs = 0;
ti->input_file = NV_FILE_MEM_S;
ti->output_file = NV_FILE_OUT;
for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
p->in[i].id = i;
p->in[i].hw = num_inputs;
for (c = 0; c < 4; ++c) {
if (!ti->input_access[i][c])
continue;
ti->input_map[i][c] = num_inputs++;
p->vp.attrs[(4 * i + c) / 32] |= 1 << ((i * 4 + c) % 32);
}
}
for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
p->out[i].id = i;
p->out[i].hw = p->max_out;
for (c = 0; c < 4; ++c) {
if (!ti->output_access[i][c])
continue;
ti->output_map[i][c] = p->max_out++;
p->out[i].mask |= 1 << c;
}
}
p->vp.clpd = p->max_out;
p->max_out += p->vp.clpd_nr;
for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) {
switch (ti->sysval_map[i]) {
case 2:
if (!(ti->p->vp.attrs[2] & NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID))
ti->sysval_map[i] = 1;
ti->sysval_map[i] = (ti->sysval_map[i] - 1) + num_inputs;
break;
default:
break;
}
}
if (p->vp.psiz < 0x40)
p->vp.psiz = p->out[p->vp.psiz].hw;
if (prog->vp.psiz < info->numOutputs)
prog->vp.psiz = prog->out[prog->vp.psiz].hw;
return 0;
}
static int
nv50_fragprog_prepare(struct nv50_translation_info *ti)
nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info)
{
struct nv50_program *p = ti->p;
int i, j, c;
unsigned nvary, nintp, depr;
unsigned n = 0, m = 0, skip = 0;
ubyte sn[16], si[16];
struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
unsigned i, n, m, c;
unsigned nvary;
unsigned nflat;
unsigned nintp = 0;
/* FP flags */
if (ti->scan.writes_z) {
p->fp.flags[1] = 0x11;
p->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
/* count recorded non-flat inputs */
for (m = 0, i = 0; i < info->numInputs; ++i) {
switch (info->in[i].sn) {
case TGSI_SEMANTIC_POSITION:
case TGSI_SEMANTIC_FACE:
continue;
default:
m += info->in[i].flat ? 0 : 1;
break;
}
}
/* careful: id may be != i in info->in[prog->in[i].id] */
if (ti->scan.uses_kill)
p->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
/* FP inputs */
ti->input_file = NV_FILE_MEM_V;
ti->output_file = NV_FILE_GPR;
/* count non-flat inputs, save semantic info */
for (i = 0; i < p->in_nr; ++i) {
m += (ti->interp_mode[i] & NV50_INTERP_FLAT) ? 0 : 1;
sn[i] = p->in[i].sn;
si[i] = p->in[i].si;
}
/* reorder p->in[] so that non-flat inputs are first and
* kick out special inputs that don't use VP/GP_RESULT_MAP
/* Fill prog->in[] so that non-flat inputs are first and
* kick out special inputs that don't use the RESULT_MAP.
*/
nintp = 0;
for (i = 0; i < p->in_nr; ++i) {
if (sn[i] == TGSI_SEMANTIC_POSITION) {
for (c = 0; c < 4; ++c) {
ti->input_map[i][c] = nintp;
if (ti->input_access[i][c]) {
p->fp.interp |= 1 << (24 + c);
++nintp;
}
}
skip++;
continue;
for (n = 0, i = 0; i < info->numInputs; ++i) {
if (info->in[i].sn == TGSI_SEMANTIC_POSITION) {
prog->fp.interp |= info->in[i].mask << 24;
for (c = 0; c < 4; ++c)
if (info->in[i].mask & (1 << c))
info->in[i].slot[c] = nintp++;
} else
if (sn[i] == TGSI_SEMANTIC_FACE) {
ti->input_map[i][0] = 255;
skip++;
continue;
if (info->in[i].sn == TGSI_SEMANTIC_FACE) {
info->in[i].slot[0] = 255;
} else {
unsigned j = info->in[i].flat ? m++ : n++;
if (info->in[i].sn == TGSI_SEMANTIC_COLOR)
prog->vp.bfc[info->in[i].si] = j;
prog->in[j].id = i;
prog->in[j].mask = info->in[i].mask;
prog->in[j].sn = info->in[i].sn;
prog->in[j].si = info->in[i].si;
prog->in[j].linear = info->in[i].linear;
prog->in_nr++;
}
j = (ti->interp_mode[i] & NV50_INTERP_FLAT) ? m++ : n++;
if (sn[i] == TGSI_SEMANTIC_COLOR)
p->vp.bfc[si[i]] = j;
p->in[j].linear = (ti->interp_mode[i] & NV50_INTERP_LINEAR) ? 1 : 0;
p->in[j].id = i;
p->in[j].sn = sn[i];
p->in[j].si = si[i];
}
assert(n <= m);
p->in_nr -= skip;
if (!(p->fp.interp & (8 << 24))) {
p->fp.interp |= (8 << 24);
if (!(prog->fp.interp & (8 << 24))) {
++nintp;
prog->fp.interp |= 8 << 24;
}
/* after HPOS */
p->fp.colors = 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT;
for (i = 0; i < prog->in_nr; ++i) {
int j = prog->in[i].id;
for (i = 0; i < p->in_nr; ++i) {
int j = p->in[i].id;
p->in[i].hw = nintp;
for (c = 0; c < 4; ++c) {
if (!ti->input_access[j][c])
continue;
p->in[i].mask |= 1 << c;
ti->input_map[j][c] = nintp++;
}
/* count color inputs */
if (i == p->vp.bfc[0] || i == p->vp.bfc[1])
p->fp.colors += bitcount4(p->in[i].mask) << 16;
prog->in[i].hw = nintp;
for (c = 0; c < 4; ++c)
if (info->in[i].mask & (1 << c))
info->in[j].slot[c] = nintp++;
}
nintp -= bitcount4(p->fp.interp >> 24); /* subtract position inputs */
nvary = nintp;
if (n < m)
nvary -= p->in[n].hw;
/* (n == m) if m never increased, i.e. no flat inputs */
nflat = (n < m) ? (nintp - prog->in[n].hw) : 0;
nintp -= bitcount4(prog->fp.interp >> 24); /* subtract position inputs */
nvary = nintp - nflat;
p->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT;
p->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT;
prog->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT;
prog->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT;
/* put front/back colors right after HPOS */
prog->fp.colors = 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT;
for (i = 0; i < 2; ++i)
if (prog->vp.bfc[i] < 0x80)
prog->fp.colors += bitcount4(prog->in[prog->vp.bfc[i]].mask) << 16;
/* FP outputs */
if (p->out_nr > (1 + (ti->scan.writes_z ? 1 : 0)))
p->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS;
if (info->prop.fp.numColourResults > 1)
prog->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS;
depr = p->out_nr;
for (i = 0; i < p->out_nr; ++i) {
p->out[i].id = i;
if (p->out[i].sn == TGSI_SEMANTIC_POSITION) {
depr = i;
for (i = 0; i < info->numOutputs; ++i) {
prog->out[i].id = i;
prog->out[i].sn = info->out[i].sn;
prog->out[i].si = info->out[i].si;
prog->out[i].mask = info->out[i].mask;
if (i == info->io.fragDepth || i == info->io.sampleMask)
continue;
}
p->out[i].hw = p->max_out;
p->out[i].mask = 0xf;
prog->out[i].hw = info->out[i].si * 4;
for (c = 0; c < 4; ++c)
ti->output_map[i][c] = p->max_out++;
}
if (depr < p->out_nr) {
p->out[depr].mask = 0x4;
p->out[depr].hw = ti->output_map[depr][2] = p->max_out++;
} else {
/* allowed values are 1, 4, 5, 8, 9, ... */
p->max_out = MAX2(4, p->max_out);
info->out[i].slot[c] = prog->out[i].hw + c;
prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4);
}
if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
info->out[info->io.sampleMask].slot[0] = prog->max_out++;
if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
info->out[info->io.fragDepth].slot[2] = prog->max_out++;
if (!prog->max_out)
prog->max_out = 4;
return 0;
}
static int
nv50_geomprog_prepare(struct nv50_translation_info *ti)
nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info)
{
ti->input_file = NV_FILE_MEM_S;
ti->output_file = NV_FILE_OUT;
assert(0);
return 1;
}
static int
nv50_prog_scan(struct nv50_translation_info *ti)
{
struct nv50_program *p = ti->p;
struct tgsi_parse_context parse;
int ret, i;
p->vp.edgeflag = 0x40;
p->vp.psiz = 0x40;
p->vp.bfc[0] = 0x40;
p->vp.bfc[1] = 0x40;
p->gp.primid = 0x80;
tgsi_scan_shader(p->pipe.tokens, &ti->scan);
#if NV50_DEBUG & NV50_DEBUG_SHADER
tgsi_dump(p->pipe.tokens, 0);
#endif
ti->subr =
CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0]));
ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16);
ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte));
ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0]));
tgsi_parse_init(&parse, p->pipe.tokens);
while (!tgsi_parse_end_of_tokens(&parse)) {
tgsi_parse_token(&parse);
switch (parse.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_IMMEDIATE:
prog_immediate(ti, &parse.FullToken.FullImmediate);
break;
case TGSI_TOKEN_TYPE_DECLARATION:
prog_decl(ti, &parse.FullToken.FullDeclaration);
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
ti->insns[ti->inst_nr] = parse.FullToken.FullInstruction;
prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->inst_nr);
break;
}
}
/* Scan to determine which registers are inputs/outputs of a subroutine. */
for (i = 0; i < ti->subr_nr; ++i) {
int pc = ti->subr[i].id;
while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB)
prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]);
}
p->in_nr = ti->scan.file_max[TGSI_FILE_INPUT] + 1;
p->out_nr = ti->scan.file_max[TGSI_FILE_OUTPUT] + 1;
switch (p->type) {
switch (info->type) {
case PIPE_SHADER_VERTEX:
ret = nv50_vertprog_prepare(ti);
break;
case PIPE_SHADER_FRAGMENT:
ret = nv50_fragprog_prepare(ti);
break;
return nv50_vertprog_assign_slots(info);
case PIPE_SHADER_GEOMETRY:
ret = nv50_geomprog_prepare(ti);
break;
return nv50_vertprog_assign_slots(info);
case PIPE_SHADER_FRAGMENT:
return nv50_fragprog_assign_slots(info);
default:
assert(!"unsupported program type");
ret = -1;
break;
return -1;
}
assert(!ret);
return ret;
}
/* Temporary, need a reference to nv50_ir_generate_code in libnv50 or
* it "gets disappeared" and cannot be used in libnvc0 ...
*/
boolean
nv50_program_translate_new(struct nv50_program *p)
{
struct nv50_ir_prog_info info;
return nv50_ir_generate_code(&info);
}
boolean
nv50_program_translate(struct nv50_program *p)
nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
{
struct nv50_translation_info *ti;
struct nv50_ir_prog_info *info;
int ret;
ti = CALLOC_STRUCT(nv50_translation_info);
ti->p = p;
info = CALLOC_STRUCT(nv50_ir_prog_info);
if (!info)
return FALSE;
ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS;
info->type = prog->type;
info->target = chipset;
info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
info->bin.source = (void *)prog->pipe.tokens;
ret = nv50_prog_scan(ti);
info->io.genUserClip = prog->vp.clpd_nr;
info->assignSlots = nv50_program_assign_varying_slots;
prog->vp.bfc[0] = 0x80;
prog->vp.bfc[1] = 0x80;
prog->vp.clpd[0] = 0x80;
prog->vp.clpd[1] = 0x80;
prog->vp.psiz = 0x80;
prog->vp.edgeflag = 0x80;
prog->gp.primid = 0x80;
info->driverPriv = prog;
#ifdef DEBUG
info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);
#else
info->optLevel = 3;
#endif
ret = nv50_ir_generate_code(info);
if (ret) {
NOUVEAU_ERR("unsupported shader program\n");
NOUVEAU_ERR("shader translation failed: %i\n", ret);
goto out;
}
prog->code = info->bin.code;
prog->code_size = info->bin.codeSize;
prog->fixups = info->bin.relocData;
prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
ret = nv50_generate_code(ti);
if (ret) {
NOUVEAU_ERR("error during shader translation\n");
goto out;
if (prog->type == PIPE_SHADER_FRAGMENT) {
if (info->prop.fp.writesDepth) {
prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
prog->fp.flags[1] = 0x11;
}
if (info->prop.fp.usesDiscard)
prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
}
out:
if (ti->immd32)
FREE(ti->immd32);
if (ti->immd32_ty)
FREE(ti->immd32_ty);
if (ti->insns)
FREE(ti->insns);
if (ti->subr)
FREE(ti->subr);
FREE(ti);
return ret ? FALSE : TRUE;
FREE(info);
return !ret;
}
boolean
nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
{
struct nouveau_heap *heap;
int ret;
uint32_t size = align(prog->code_size, 0x40);
switch (prog->type) {
case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break;
case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break;
case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break;
default:
assert(!"invalid program type");
return FALSE;
}
ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
if (ret) {
/* Out of space: evict everything to compactify the code segment, hoping
* the working set is much smaller and drifts slowly. Improve me !
*/
while (heap->next) {
struct nv50_program *evict = heap->next->priv;
if (evict)
nouveau_heap_free(&evict->mem);
}
debug_printf("WARNING: out of code space, evicting all shaders.\n");
}
prog->code_base = prog->mem->start;
if (prog->fixups)
nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);
nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
(prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
NOUVEAU_BO_VRAM, prog->code_size, prog->code);
BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
PUSH_DATA (nv50->base.pushbuf, 0);
return TRUE;
}
void
+10 -50
View File
@@ -23,8 +23,10 @@
#ifndef __NV50_PROG_H__
#define __NV50_PROG_H__
struct nv50_context;
#include "pipe/p_state.h"
#include "tgsi/tgsi_scan.h"
#include "pipe/p_shader_tokens.h"
#define NV50_CAP_MAX_PROGRAM_TEMPS 64
@@ -64,10 +66,10 @@ struct nv50_program {
struct {
uint32_t attrs[3]; /* VP_ATTR_EN_0,1 and VP_GP_BUILTIN_ATTR_EN */
ubyte psiz;
ubyte bfc[2];
ubyte psiz; /* output slot of point size */
ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */
ubyte edgeflag;
ubyte clpd;
ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */
ubyte clpd_nr;
} vp;
@@ -83,55 +85,13 @@ struct nv50_program {
uint8_t prim_type; /* point, line strip or tri strip */
} gp;
/* relocation records */
void *fixups;
unsigned num_fixups;
void *fixups; /* relocation records */
struct nouveau_heap *mem;
};
#define NV50_INTERP_LINEAR (1 << 0)
#define NV50_INTERP_FLAT (1 << 1)
#define NV50_INTERP_CENTROID (1 << 2)
/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */
struct nv50_subroutine {
unsigned id;
unsigned pos;
/* function inputs and outputs */
uint32_t argv[NV50_CAP_MAX_PROGRAM_TEMPS][4];
uint32_t retv[NV50_CAP_MAX_PROGRAM_TEMPS][4];
};
struct nv50_translation_info {
struct nv50_program *p;
unsigned inst_nr;
struct tgsi_full_instruction *insns;
ubyte input_file;
ubyte output_file;
ubyte input_map[PIPE_MAX_SHADER_INPUTS][4];
ubyte output_map[PIPE_MAX_SHADER_OUTPUTS][4];
ubyte sysval_map[TGSI_SEMANTIC_COUNT];
ubyte interp_mode[PIPE_MAX_SHADER_INPUTS];
int input_access[PIPE_MAX_SHADER_INPUTS][4];
int output_access[PIPE_MAX_SHADER_OUTPUTS][4];
boolean indirect_inputs;
boolean indirect_outputs;
boolean store_to_memory;
struct tgsi_shader_info scan;
uint32_t *immd32;
unsigned immd32_nr;
ubyte *immd32_ty;
ubyte edgeflag_out;
struct nv50_subroutine *subr;
unsigned subr_nr;
};
int nv50_generate_code(struct nv50_translation_info *ti);
void nv50_relocate_program(struct nv50_program *p,
uint32_t code_base, uint32_t data_base);
boolean nv50_program_tx(struct nv50_program *p);
boolean nv50_program_translate(struct nv50_program *, uint16_t chipset);
boolean nv50_program_upload_code(struct nv50_context *, struct nv50_program *);
void nv50_program_destroy(struct nv50_context *, struct nv50_program *);
#endif /* __NV50_PROG_H__ */
+4 -4
View File
@@ -85,9 +85,9 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: /* shader support missing */
return 0;
case PIPE_CAP_MIN_TEXEL_OFFSET:
return 0 /* -8, TODO */;
return -8;
case PIPE_CAP_MAX_TEXEL_OFFSET:
return 0 /* +7, TODO */;
return 7;
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_TEXTURE_SWIZZLE:
case PIPE_CAP_TEXTURE_SHADOW_MAP:
@@ -108,7 +108,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SM3:
return 1;
case PIPE_CAP_GLSL_FEATURE_LEVEL:
return 120;
return 130;
case PIPE_CAP_MAX_RENDER_TARGETS:
return 8;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
@@ -202,7 +202,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_SUBROUTINES:
return 0; /* please inline, or provide function declarations */
case PIPE_SHADER_CAP_INTEGERS:
return 0;
return 1;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
return 32;
default:
+14 -36
View File
@@ -118,43 +118,16 @@ nv50_constbufs_validate(struct nv50_context *nv50)
static boolean
nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)
{
struct nouveau_heap *heap;
int ret;
unsigned size;
if (!prog->translated) {
prog->translated = nv50_program_translate(prog);
prog->translated = nv50_program_translate(
prog, nv50->screen->base.device->chipset);
if (!prog->translated)
return FALSE;
} else
if (prog->mem)
return TRUE;
if (prog->type == PIPE_SHADER_FRAGMENT) heap = nv50->screen->fp_code_heap;
else
if (prog->type == PIPE_SHADER_GEOMETRY) heap = nv50->screen->gp_code_heap;
else
heap = nv50->screen->vp_code_heap;
size = align(prog->code_size, 0x100);
ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
if (ret) {
NOUVEAU_ERR("out of code space for shader type %i\n", prog->type);
return FALSE;
}
prog->code_base = prog->mem->start;
nv50_relocate_program(prog, prog->code_base, 0);
nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
(prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
NOUVEAU_BO_VRAM, prog->code_size, prog->code);
BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
PUSH_DATA (nv50->base.pushbuf, 0);
return TRUE;
return nv50_program_upload_code(nv50, prog);
}
static INLINE void
@@ -383,20 +356,25 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]);
for (c = 0; c < vp->vp.clpd_nr; ++c)
map[m++] = vp->vp.clpd + c;
map[m++] = vp->vp.clpd[c / 4] + (c % 4);
colors |= m << 8; /* adjust BFC0 id */
dummy.mask = 0x0;
/* if light_twoside is active, FFC0_ID == BFC0_ID is invalid */
if (nv50->rast->pipe.light_twoside) {
for (i = 0; i < 2; ++i)
m = nv50_vec4_map(map, m, lin,
&fp->in[fp->vp.bfc[i]], &vp->out[vp->vp.bfc[i]]);
for (i = 0; i < 2; ++i) {
n = vp->vp.bfc[i];
if (fp->vp.bfc[i] >= fp->in_nr)
continue;
m = nv50_vec4_map(map, m, lin, &fp->in[fp->vp.bfc[i]],
(n < vp->out_nr) ? &vp->out[n] : &dummy);
}
}
colors += m - 4; /* adjust FFC0 id */
interp |= m << 8; /* set map id where 'normal' FP inputs start */
dummy.mask = 0x0;
for (i = 0; i < fp->in_nr; ++i) {
for (n = 0; n < vp->out_nr; ++n)
if (vp->out[n].sn == fp->in[i].sn &&
@@ -409,7 +387,7 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
/* PrimitiveID either is replaced by the system value, or
* written by the geometry shader into an output register
*/
if (fp->gp.primid < 0x40) {
if (fp->gp.primid < 0x80) {
primid = m;
map[m++] = vp->gp.primid;
}
@@ -211,10 +211,13 @@ nv50_check_program_ucps(struct nv50_context *nv50,
nv50_program_destroy(nv50, vp);
vp->vp.clpd_nr = n;
if (likely(vp == nv50->vertprog))
if (likely(vp == nv50->vertprog)) {
nv50->dirty |= NV50_NEW_VERTPROG;
nv50_vertprog_validate(nv50);
else
} else {
nv50->dirty |= NV50_NEW_GMTYPROG;
nv50_gmtyprog_validate(nv50);
}
nv50_fp_linkage_validate(nv50);
}