nvc0: fix max varying count, move CLIPVERTEX,FOG out of the way

The card spews an error if I use all 128 generic slots.
Apparently the real limit isn't just dictated by the address space
layout.
This commit is contained in:
Christoph Bumiller
2013-03-15 23:39:01 +01:00
parent 8acaf862df
commit 529dbbfcf7
3 changed files with 36 additions and 12 deletions
@@ -1009,7 +1009,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
else
info->out[dst.getIndex(0)].mask |= dst.getMask();
if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE)
if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE ||
info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID ||
info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG)
info->out[dst.getIndex(0)].mask &= 1;
if (isEdgeFlagPassthrough(insn))
@@ -1040,14 +1042,25 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
for (unsigned i = 0; i < info->numInputs; ++i)
info->in[i].mask = 0xf;
} else {
const int i = src.getIndex(0);
for (unsigned c = 0; c < 4; ++c) {
if (!(mask & (1 << c)))
continue;
int k = src.getSwizzle(c);
int i = src.getIndex(0);
if (info->in[i].sn != TGSI_SEMANTIC_FOG || k == TGSI_SWIZZLE_X)
if (k <= TGSI_SWIZZLE_W)
info->in[i].mask |= 1 << k;
if (k <= TGSI_SWIZZLE_W)
info->in[i].mask |= 1 << k;
}
switch (info->in[i].sn) {
case TGSI_SEMANTIC_PSIZE:
case TGSI_SEMANTIC_PRIMID:
case TGSI_SEMANTIC_FOG:
info->in[i].mask &= 0x1;
break;
case TGSI_SEMANTIC_PCOORD:
info->in[i].mask &= 0x3;
break;
default:
break;
}
}
}
+8 -5
View File
@@ -27,6 +27,9 @@
#include "nv50/codegen/nv50_ir_driver.h"
#include "nve4_compute.h"
/* NOTE: Using a[0x270] in FP may cause an error even if we're using less than
* 124 scalar varying values.
*/
static uint32_t
nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase)
{
@@ -36,12 +39,12 @@ nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase)
case TGSI_SEMANTIC_PSIZE: return 0x06c;
case TGSI_SEMANTIC_POSITION: return 0x070;
case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10;
case TGSI_SEMANTIC_FOG: return 0x270;
case TGSI_SEMANTIC_FOG: return 0x2e8;
case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10;
case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10;
case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4;
case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10;
case TGSI_SEMANTIC_CLIPVERTEX: return 0x260;
case TGSI_SEMANTIC_CLIPVERTEX: return 0x270;
case TGSI_SEMANTIC_PCOORD: return 0x2e0;
case NV50_SEMANTIC_TESSCOORD: return 0x2f0;
case TGSI_SEMANTIC_INSTANCEID: return 0x2f8;
@@ -66,12 +69,12 @@ nvc0_shader_output_address(unsigned sn, unsigned si, unsigned ubase)
case TGSI_SEMANTIC_PSIZE: return 0x06c;
case TGSI_SEMANTIC_POSITION: return 0x070;
case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10;
case TGSI_SEMANTIC_FOG: return 0x270;
case TGSI_SEMANTIC_FOG: return 0x2e8;
case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10;
case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10;
case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4;
case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10;
case TGSI_SEMANTIC_CLIPVERTEX: return 0x260;
case TGSI_SEMANTIC_CLIPVERTEX: return 0x270;
case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10;
case TGSI_SEMANTIC_EDGEFLAG: return ~0;
default:
@@ -440,7 +443,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
} else
if (info->in[i].slot[0] >= (0x2c0 / 4) &&
info->in[i].slot[0] <= (0x2fc / 4)) {
fp->hdr[14] |= (1 << (a - 0x280 / 4)) & 0x03ff0000;
fp->hdr[14] |= (1 << (a - 0x280 / 4)) & 0x07ff0000;
} else {
if (info->in[i].slot[c] < (0x040 / 4) ||
info->in[i].slot[c] > (0x380 / 4))
+10 -2
View File
@@ -221,9 +221,17 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_MAX_INPUTS:
if (shader == PIPE_SHADER_VERTEX)
return 32;
/* NOTE: These only count our slots for GENERIC varyings.
* The address space may be larger, but the actual hard limit seems to be
* less than what the address space layout permits, so don't add TEXCOORD,
* COLOR, etc. here.
*/
if (shader == PIPE_SHADER_FRAGMENT)
return (0x200 + 0x20 + 0x80) / 16; /* generic + colors + TexCoords */
return (0x200 + 0x40 + 0x80) / 16; /* without 0x60 for per-patch inputs */
return 0x1f0 / 16;
/* Actually this counts CLIPVERTEX, which occupies the last generic slot,
* and excludes 0x60 per-patch inputs.
*/
return 0x200 / 16;
case PIPE_SHADER_CAP_MAX_CONSTS:
return 65536 / 16;
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: