nvc0: fix max varying count, move CLIPVERTEX,FOG out of the way
The card spews an error if I use all 128 generic slots. Apparently the real limit isn't just dictated by the address space layout.
This commit is contained in:
@@ -1009,7 +1009,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
|
||||
else
|
||||
info->out[dst.getIndex(0)].mask |= dst.getMask();
|
||||
|
||||
if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE)
|
||||
if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE ||
|
||||
info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID ||
|
||||
info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG)
|
||||
info->out[dst.getIndex(0)].mask &= 1;
|
||||
|
||||
if (isEdgeFlagPassthrough(insn))
|
||||
@@ -1040,14 +1042,25 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
|
||||
for (unsigned i = 0; i < info->numInputs; ++i)
|
||||
info->in[i].mask = 0xf;
|
||||
} else {
|
||||
const int i = src.getIndex(0);
|
||||
for (unsigned c = 0; c < 4; ++c) {
|
||||
if (!(mask & (1 << c)))
|
||||
continue;
|
||||
int k = src.getSwizzle(c);
|
||||
int i = src.getIndex(0);
|
||||
if (info->in[i].sn != TGSI_SEMANTIC_FOG || k == TGSI_SWIZZLE_X)
|
||||
if (k <= TGSI_SWIZZLE_W)
|
||||
info->in[i].mask |= 1 << k;
|
||||
if (k <= TGSI_SWIZZLE_W)
|
||||
info->in[i].mask |= 1 << k;
|
||||
}
|
||||
switch (info->in[i].sn) {
|
||||
case TGSI_SEMANTIC_PSIZE:
|
||||
case TGSI_SEMANTIC_PRIMID:
|
||||
case TGSI_SEMANTIC_FOG:
|
||||
info->in[i].mask &= 0x1;
|
||||
break;
|
||||
case TGSI_SEMANTIC_PCOORD:
|
||||
info->in[i].mask &= 0x3;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,6 +27,9 @@
|
||||
#include "nv50/codegen/nv50_ir_driver.h"
|
||||
#include "nve4_compute.h"
|
||||
|
||||
/* NOTE: Using a[0x270] in FP may cause an error even if we're using less than
|
||||
* 124 scalar varying values.
|
||||
*/
|
||||
static uint32_t
|
||||
nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase)
|
||||
{
|
||||
@@ -36,12 +39,12 @@ nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase)
|
||||
case TGSI_SEMANTIC_PSIZE: return 0x06c;
|
||||
case TGSI_SEMANTIC_POSITION: return 0x070;
|
||||
case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10;
|
||||
case TGSI_SEMANTIC_FOG: return 0x270;
|
||||
case TGSI_SEMANTIC_FOG: return 0x2e8;
|
||||
case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10;
|
||||
case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10;
|
||||
case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4;
|
||||
case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10;
|
||||
case TGSI_SEMANTIC_CLIPVERTEX: return 0x260;
|
||||
case TGSI_SEMANTIC_CLIPVERTEX: return 0x270;
|
||||
case TGSI_SEMANTIC_PCOORD: return 0x2e0;
|
||||
case NV50_SEMANTIC_TESSCOORD: return 0x2f0;
|
||||
case TGSI_SEMANTIC_INSTANCEID: return 0x2f8;
|
||||
@@ -66,12 +69,12 @@ nvc0_shader_output_address(unsigned sn, unsigned si, unsigned ubase)
|
||||
case TGSI_SEMANTIC_PSIZE: return 0x06c;
|
||||
case TGSI_SEMANTIC_POSITION: return 0x070;
|
||||
case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10;
|
||||
case TGSI_SEMANTIC_FOG: return 0x270;
|
||||
case TGSI_SEMANTIC_FOG: return 0x2e8;
|
||||
case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10;
|
||||
case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10;
|
||||
case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4;
|
||||
case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10;
|
||||
case TGSI_SEMANTIC_CLIPVERTEX: return 0x260;
|
||||
case TGSI_SEMANTIC_CLIPVERTEX: return 0x270;
|
||||
case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10;
|
||||
case TGSI_SEMANTIC_EDGEFLAG: return ~0;
|
||||
default:
|
||||
@@ -440,7 +443,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
|
||||
} else
|
||||
if (info->in[i].slot[0] >= (0x2c0 / 4) &&
|
||||
info->in[i].slot[0] <= (0x2fc / 4)) {
|
||||
fp->hdr[14] |= (1 << (a - 0x280 / 4)) & 0x03ff0000;
|
||||
fp->hdr[14] |= (1 << (a - 0x280 / 4)) & 0x07ff0000;
|
||||
} else {
|
||||
if (info->in[i].slot[c] < (0x040 / 4) ||
|
||||
info->in[i].slot[c] > (0x380 / 4))
|
||||
|
||||
@@ -221,9 +221,17 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
||||
case PIPE_SHADER_CAP_MAX_INPUTS:
|
||||
if (shader == PIPE_SHADER_VERTEX)
|
||||
return 32;
|
||||
/* NOTE: These only count our slots for GENERIC varyings.
|
||||
* The address space may be larger, but the actual hard limit seems to be
|
||||
* less than what the address space layout permits, so don't add TEXCOORD,
|
||||
* COLOR, etc. here.
|
||||
*/
|
||||
if (shader == PIPE_SHADER_FRAGMENT)
|
||||
return (0x200 + 0x20 + 0x80) / 16; /* generic + colors + TexCoords */
|
||||
return (0x200 + 0x40 + 0x80) / 16; /* without 0x60 for per-patch inputs */
|
||||
return 0x1f0 / 16;
|
||||
/* Actually this counts CLIPVERTEX, which occupies the last generic slot,
|
||||
* and excludes 0x60 per-patch inputs.
|
||||
*/
|
||||
return 0x200 / 16;
|
||||
case PIPE_SHADER_CAP_MAX_CONSTS:
|
||||
return 65536 / 16;
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
|
||||
|
||||
Reference in New Issue
Block a user