nvc0: implement new stream output interface

This commit is contained in:
Christoph Bumiller
2011-12-09 18:46:09 +01:00
parent 14193da589
commit 14bd9d7648
15 changed files with 372 additions and 189 deletions
+8 -5
View File
@@ -185,15 +185,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV01_SUBCHAN_OBJECT 0x00000000
#define NV84_SUBCHAN_QUERY_ADDRESS_HIGH 0x00000010
#define NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH 0x00000010
#define NV84_SUBCHAN_QUERY_ADDRESS_LOW 0x00000014
#define NV84_SUBCHAN_SEMAPHORE_ADDRESS_LOW 0x00000014
#define NV84_SUBCHAN_QUERY_COUNTER 0x00000018
#define NV84_SUBCHAN_SEMAPHORE_SEQUENCE 0x00000018
#define NV84_SUBCHAN_QUERY_GET 0x0000001c
#define NV84_SUBCHAN_SEMAPHORE_TRIGGER 0x0000001c
#define NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL 0x00000001
#define NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG 0x00000002
#define NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL 0x00000004
#define NV84_SUBCHAN_QUERY_INTR 0x00000020
#define NV84_SUBCHAN_NOTIFY_INTR 0x00000020
#define NV84_SUBCHAN_WRCACHE_FLUSH 0x00000024
+4 -4
View File
@@ -130,11 +130,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_TFB_BUFFER_SIZE(i0) (0x0000038c + 0x20*(i0))
#define NVC0_3D_TFB_PRIMITIVE_ID(i0) (0x00000390 + 0x20*(i0))
#define NVC0_3D_TFB_BUFFER_OFFSET(i0) (0x00000390 + 0x20*(i0))
#define NVC0_3D_TFB_UNK07X0(i0) (0x00000700 + 0x10*(i0))
#define NVC0_3D_TFB_UNK07X0__ESIZE 0x00000010
#define NVC0_3D_TFB_UNK07X0__LEN 0x00000004
#define NVC0_3D_TFB_STREAM(i0) (0x00000700 + 0x10*(i0))
#define NVC0_3D_TFB_STREAM__ESIZE 0x00000010
#define NVC0_3D_TFB_STREAM__LEN 0x00000004
#define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0))
#define NVC0_3D_TFB_VARYING_COUNT__ESIZE 0x00000010
+1 -1
View File
@@ -77,7 +77,7 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0)
}
for (i = 0; i < nvc0->num_tfbbufs; ++i)
pipe_resource_reference(&nvc0->tfbbuf[i], NULL);
pipe_so_target_reference(&nvc0->tfbbuf[i], NULL);
}
static void
+14 -9
View File
@@ -49,14 +49,14 @@
#define NVC0_NEW_CONSTBUF (1 << 18)
#define NVC0_NEW_TEXTURES (1 << 19)
#define NVC0_NEW_SAMPLERS (1 << 20)
#define NVC0_NEW_TFB (1 << 21)
#define NVC0_NEW_TFB_BUFFERS (1 << 22)
#define NVC0_NEW_TFB_TARGETS (1 << 21)
#define NVC0_BUFCTX_CONSTANT 0
#define NVC0_BUFCTX_FRAME 1
#define NVC0_BUFCTX_VERTEX 2
#define NVC0_BUFCTX_TEXTURES 3
#define NVC0_BUFCTX_COUNT 4
#define NVC0_BUFCTX_TFB 4
#define NVC0_BUFCTX_COUNT 5
struct nvc0_context {
struct nouveau_context base;
@@ -75,6 +75,7 @@ struct nvc0_context {
boolean prim_restart;
boolean early_z;
uint16_t scissor;
boolean rasterizer_discard;
uint8_t num_vtxbufs;
uint8_t num_vtxelts;
uint8_t num_textures[5];
@@ -84,6 +85,7 @@ struct nvc0_context {
uint8_t clip_enable;
uint32_t clip_mode;
uint32_t uniform_buffer_bound[5];
struct nvc0_transform_feedback_state *tfb;
} state;
struct nvc0_blend_stateobj *blend;
@@ -125,10 +127,9 @@ struct nvc0_context {
boolean vbo_push_hint;
struct nvc0_transform_feedback_state *tfb;
struct pipe_resource *tfbbuf[4];
uint8_t tfbbuf_dirty;
struct pipe_stream_output_target *tfbbuf[4];
unsigned num_tfbbufs;
unsigned tfb_offset[4];
struct draw_context *draw;
};
@@ -170,10 +171,14 @@ void nvc0_program_library_upload(struct nvc0_context *);
/* nvc0_query.c */
void nvc0_init_query_functions(struct nvc0_context *);
void nvc0_query_pushbuf_submit(struct nvc0_context *nvc0,
struct pipe_query *pq, unsigned result_offset);
void nvc0_query_pushbuf_submit(struct nouveau_channel *,
struct pipe_query *, unsigned result_offset);
void nvc0_query_fifo_wait(struct nouveau_channel *, struct pipe_query *);
void nvc0_so_target_save_offset(struct pipe_context *,
struct pipe_stream_output_target *, unsigned i,
boolean *serialize);
#define NVC0_QUERY_TFB_BUFFER_OFFSETS (PIPE_QUERY_TYPES + 0)
#define NVC0_QUERY_TFB_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
/* nvc0_shader_state.c */
void nvc0_vertprog_validate(struct nvc0_context *);
+43
View File
@@ -480,6 +480,40 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
return 0;
}
static struct nvc0_transform_feedback_state *
nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info,
const struct pipe_stream_output_info *pso)
{
struct nvc0_transform_feedback_state *tfb;
int n = 0;
int i, c, b;
tfb = MALLOC(sizeof(*tfb) + pso->num_outputs * 4 * sizeof(uint8_t));
if (!tfb)
return NULL;
for (b = 0; b < 4; ++b) {
tfb->varying_count[b] = 0;
for (i = 0; i < pso->num_outputs; ++i) {
if (pso->output[i].output_buffer != b)
continue;
for (c = 0; c < 4; ++c) {
if (!(pso->output[i].register_mask & (1 << c)))
continue;
tfb->varying_count[b]++;
tfb->varying_index[n++] =
info->out[pso->output[i].register_index].slot[c];
}
}
tfb->stride[b] = tfb->varying_count[b] * 4;
}
if (pso->stride)
tfb->stride[0] = pso->stride;
return tfb;
}
#ifdef DEBUG
static void
nvc0_program_dump(struct nvc0_program *prog)
@@ -577,6 +611,10 @@ nvc0_program_translate(struct nvc0_program *prog)
if (info->io.globalAccess)
prog->hdr[0] |= 1 << 16;
if (prog->pipe.stream_output.num_outputs)
prog->tfb = nvc0_program_create_tfb_state(info,
&prog->pipe.stream_output);
out:
FREE(info);
return !ret;
@@ -675,6 +713,11 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
FREE(prog->immd_data);
if (prog->relocs)
FREE(prog->relocs);
if (prog->tfb) {
if (nvc0->state.tfb == prog->tfb)
nvc0->state.tfb = NULL;
FREE(prog->tfb);
}
memset(prog->hdr, 0, sizeof(prog->hdr));
+10 -1
View File
@@ -6,6 +6,14 @@
#define NVC0_CAP_MAX_PROGRAM_TEMPS 128
struct nvc0_transform_feedback_state {
uint32_t stride[4];
uint8_t varying_count[4];
uint8_t varying_index[0];
};
#define NVC0_SHADER_HEADER_SIZE (20 * 4)
struct nvc0_program {
@@ -31,7 +39,6 @@ struct nvc0_program {
uint8_t clip_enable; /* only applies if num_ucps == 0 */
uint8_t edgeflag;
uint8_t num_ucps;
uint8_t out_pos[PIPE_MAX_SHADER_OUTPUTS];
} vp;
struct {
uint8_t early_z;
@@ -44,6 +51,8 @@ struct nvc0_program {
void *relocs;
struct nvc0_transform_feedback_state *tfb;
struct nouveau_resource *res;
};
+15 -6
View File
@@ -273,7 +273,8 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
{
struct push_context ctx;
unsigned i, index_size;
unsigned inst = info->instance_count;
unsigned inst_count = info->instance_count;
unsigned vert_count = info->count;
boolean apply_bias = info->indexed && info->index_bias;
init_push_context(nvc0, &ctx);
@@ -312,26 +313,34 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
index_size = 0;
ctx.primitive_restart = FALSE;
ctx.restart_index = 0;
if (info->count_from_stream_output) {
struct pipe_context *pipe = &nvc0->base.pipe;
struct nvc0_so_target *targ;
targ = nvc0_so_target(info->count_from_stream_output);
pipe->get_query_result(pipe, targ->pq, TRUE, &vert_count);
vert_count /= targ->stride;
}
}
ctx.instance_id = info->start_instance;
ctx.prim = nvc0_prim_gl(info->mode);
while (inst--) {
while (inst_count--) {
BEGIN_RING(ctx.chan, RING_3D(VERTEX_BEGIN_GL), 1);
OUT_RING (ctx.chan, ctx.prim);
switch (index_size) {
case 0:
emit_vertices_seq(&ctx, info->start, info->count);
emit_vertices_seq(&ctx, info->start, vert_count);
break;
case 1:
emit_vertices_i08(&ctx, info->start, info->count);
emit_vertices_i08(&ctx, info->start, vert_count);
break;
case 2:
emit_vertices_i16(&ctx, info->start, info->count);
emit_vertices_i16(&ctx, info->start, vert_count);
break;
case 4:
emit_vertices_i32(&ctx, info->start, info->count);
emit_vertices_i32(&ctx, info->start, vert_count);
break;
default:
assert(0);
+83 -52
View File
@@ -27,7 +27,8 @@
struct nvc0_query {
uint32_t *data;
uint32_t type;
uint16_t type;
uint16_t index;
uint32_t sequence;
struct nouveau_bo *bo;
uint32_t base;
@@ -103,7 +104,6 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type)
switch (type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
q->rotate = 32;
space = NVC0_QUERY_ALLOC_SPACE;
break;
@@ -112,6 +112,7 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type)
space = 512;
break;
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
q->is64bit = TRUE;
space = 64;
break;
@@ -123,7 +124,7 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type)
case PIPE_QUERY_PRIMITIVES_EMITTED:
space = 32;
break;
case NVC0_QUERY_TFB_BUFFER_OFFSETS:
case NVC0_QUERY_TFB_BUFFER_OFFSET:
space = 16;
break;
default:
@@ -141,7 +142,9 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type)
/* we advance before query_begin ! */
q->offset -= q->rotate;
q->data -= q->rotate / sizeof(*q->data);
}
} else
if (!q->is64bit)
q->data[0] = 0; /* initialize sequence */
return (struct pipe_query *)q;
}
@@ -176,8 +179,6 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
struct nouveau_channel *chan = nvc0->screen->base.channel;
struct nvc0_query *q = nvc0_query(pq);
const int index = 0; /* vertex stream */
/* For occlusion queries we have to change the storage, because a previous
* query might set the initial render conition to FALSE even *after* we re-
* initialized it to TRUE.
@@ -188,12 +189,12 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
/* XXX: can we do this with the GPU, and sync with respect to a previous
* query ?
*/
q->data[0] = q->sequence; /* initialize sequence */
q->data[1] = 1; /* initial render condition = TRUE */
q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
q->data[5] = 0;
}
if (!q->is64bit)
q->data[0] = q->sequence++; /* the previously used one */
q->sequence++;
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -208,14 +209,17 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
}
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5));
nvc0_query_get(chan, q, 0x10, 0x06805002 | (q->index << 5));
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
nvc0_query_get(chan, q, 0x10, 0x05805002 | (index << 5));
nvc0_query_get(chan, q, 0x10, 0x05805002 | (q->index << 5));
break;
case PIPE_QUERY_SO_STATISTICS:
nvc0_query_get(chan, q, 0x20, 0x05805002 | (index << 5));
nvc0_query_get(chan, q, 0x30, 0x06805002 | (index << 5));
nvc0_query_get(chan, q, 0x20, 0x05805002 | (q->index << 5));
nvc0_query_get(chan, q, 0x30, 0x06805002 | (q->index << 5));
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
nvc0_query_get(chan, q, 0x10, 0x03005002 | (q->index << 5));
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_TIME_ELAPSED:
@@ -247,15 +251,11 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
struct nouveau_channel *chan = nvc0->screen->base.channel;
struct nvc0_query *q = nvc0_query(pq);
const int index = 0; /* for multiple vertex streams */
if (!q->active) {
/* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
if (q->rotate)
nvc0_query_rotate(nvc0, q);
else
if (!q->is64bit)
q->data[0] = q->sequence++;
q->sequence++;
}
q->ready = FALSE;
q->active = FALSE;
@@ -268,17 +268,20 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 0);
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
nvc0_query_get(chan, q, 0, 0x06805002 | (index << 5));
nvc0_query_get(chan, q, 0, 0x06805002 | (q->index << 5));
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5));
nvc0_query_get(chan, q, 0, 0x05805002 | (q->index << 5));
break;
case PIPE_QUERY_SO_STATISTICS:
nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5));
nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5));
nvc0_query_get(chan, q, 0x00, 0x05805002 | (q->index << 5));
nvc0_query_get(chan, q, 0x10, 0x06805002 | (q->index << 5));
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
nvc0_query_get(chan, q, 0x00, 0x02005002 | (index << 5));
/* TODO: How do we sum over all streams for render condition ? */
/* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */
nvc0_query_get(chan, q, 0x00, 0x03005002 | (q->index << 5));
nvc0_query_get(chan, q, 0x20, 0x00005002);
break;
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIMESTAMP_DISJOINT:
@@ -300,11 +303,9 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
nvc0_query_get(chan, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
nvc0_query_get(chan, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
break;
case NVC0_QUERY_TFB_BUFFER_OFFSETS:
nvc0_query_get(chan, q, 0x00, 0x1d005002); /* TFB, BUFFER_OFFSET */
nvc0_query_get(chan, q, 0x04, 0x1d005022);
nvc0_query_get(chan, q, 0x08, 0x1d005042);
nvc0_query_get(chan, q, 0x0c, 0x1d005062);
case NVC0_QUERY_TFB_BUFFER_OFFSET:
/* indexed by TFB buffer instead of by vertex stream */
nvc0_query_get(chan, q, 0x00, 0x0d005002 | (q->index << 5));
break;
default:
assert(0);
@@ -315,7 +316,14 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
static INLINE boolean
nvc0_query_ready(struct nvc0_query *q)
{
return q->ready || (!q->is64bit && (q->data[0] == q->sequence));
if (q->is64bit) {
if (nouveau_bo_map(q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_NOWAIT))
return FALSE;
nouveau_bo_unmap(q->bo);
return TRUE;
} else {
return q->data[0] == q->sequence;
}
}
static INLINE boolean
@@ -355,14 +363,12 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
switch (q->type) {
case PIPE_QUERY_GPU_FINISHED:
res32[0] = 0;
res8[0] = TRUE;
break;
case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
res64[0] = q->data[1] - q->data[5];
break;
case PIPE_QUERY_OCCLUSION_PREDICATE:
res32[0] = 0;
res8[0] = q->data[1] != q->data[5];
break;
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
@@ -374,15 +380,13 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
res64[1] = data64[2] - data64[6];
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
res32[0] = 0;
res8[0] = !q->data[1];
res8[0] = data64[0] != data64[2];
break;
case PIPE_QUERY_TIMESTAMP:
res64[0] = data64[1];
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
res64[0] = 1000000000;
res32[2] = 0;
res8[8] = (data64[1] == data64[3]) ? FALSE : TRUE;
break;
case PIPE_QUERY_TIME_ELAPSED:
@@ -392,11 +396,8 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
for (i = 0; i < 10; ++i)
res64[i] = data64[i * 2] - data64[24 + i * 2];
break;
case NVC0_QUERY_TFB_BUFFER_OFFSETS:
res32[0] = q->data[0];
res32[1] = q->data[1];
res32[2] = q->data[2];
res32[3] = q->data[3];
case NVC0_QUERY_TFB_BUFFER_OFFSET:
res32[0] = q->data[1];
break;
default:
return FALSE;
@@ -405,6 +406,23 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
return TRUE;
}
void
nvc0_query_fifo_wait(struct nouveau_channel *chan, struct pipe_query *pq)
{
struct nvc0_query *q = nvc0_query(pq);
unsigned offset = q->offset;
if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) offset += 0x20;
MARK_RING (chan, 5, 2);
BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
OUT_RELOCh(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
OUT_RELOCl(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
OUT_RING (chan, q->sequence);
OUT_RING (chan, (1 << 12) |
NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
}
static void
nvc0_render_condition(struct pipe_context *pipe,
struct pipe_query *pq, uint mode)
@@ -427,9 +445,8 @@ nvc0_render_condition(struct pipe_context *pipe,
/* NOTE: comparison of 2 queries only works if both have completed */
switch (q->type) {
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
/* query writes 1 if there was no overflow */
cond = negated ? NVC0_3D_COND_MODE_RES_NON_ZERO :
NVC0_3D_COND_MODE_EQUAL;
cond = negated ? NVC0_3D_COND_MODE_EQUAL :
NVC0_3D_COND_MODE_NOT_EQUAL;
wait = TRUE;
break;
case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -450,14 +467,8 @@ nvc0_render_condition(struct pipe_context *pipe,
break;
}
if (wait) {
MARK_RING (chan, 5, 2);
BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4);
OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
OUT_RING (chan, q->sequence);
OUT_RING (chan, 0x00001001);
}
if (wait)
nvc0_query_fifo_wait(chan, pq);
MARK_RING (chan, 4, 2);
BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3);
@@ -467,13 +478,33 @@ nvc0_render_condition(struct pipe_context *pipe,
}
void
nvc0_query_pushbuf_submit(struct nvc0_context *nvc0,
nvc0_query_pushbuf_submit(struct nouveau_channel *chan,
struct pipe_query *pq, unsigned result_offset)
{
struct nvc0_query *q = nvc0_query(pq);
nouveau_pushbuf_submit(nvc0->screen->base.channel,
q->bo, q->offset + result_offset, 4);
#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
nouveau_pushbuf_submit(chan, q->bo, q->offset + result_offset, 4 |
NVC0_IB_ENTRY_1_NO_PREFETCH);
}
void
nvc0_so_target_save_offset(struct pipe_context *pipe,
struct pipe_stream_output_target *ptarg,
unsigned index, boolean *serialize)
{
struct nvc0_so_target *targ = nvc0_so_target(ptarg);
if (*serialize) {
struct nouveau_channel *chan = nvc0_context(pipe)->screen->base.channel;
*serialize = FALSE;
IMMED_RING(chan, RING_3D(SERIALIZE), 0);
}
nvc0_query(targ->pq)->index = index;
nvc0_query_end(pipe, targ->pq);
}
void
+7 -1
View File
@@ -104,9 +104,15 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 1;
case PIPE_CAP_TIMER_QUERY:
case PIPE_CAP_OCCLUSION_QUERY:
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
return 1;
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
return 0;
return 4;
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_ATTRIBS:
return 4;
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
return 128;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_INDEP_BLEND_FUNC:
+63 -41
View File
@@ -90,7 +90,9 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
return FALSE;
}
return nvc0_program_upload_code(nvc0, prog);
if (likely(prog->code_size))
return nvc0_program_upload_code(nvc0, prog);
return TRUE; /* stream output info only */
}
void
@@ -212,14 +214,15 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
struct nouveau_channel *chan = nvc0->screen->base.channel;
struct nvc0_program *gp = nvc0->gmtyprog;
if (!gp) {
if (gp)
nvc0_program_validate(nvc0, gp);
/* we allow GPs with no code for specifying stream output state only */
if (!gp || !gp->code_size) {
BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
OUT_RING (chan, 0x40);
IMMED_RING(chan, RING_3D(LAYER), 0);
return;
}
if (!nvc0_program_validate(nvc0, gp))
return;
nvc0_program_update_context_state(nvc0, gp, 3);
BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
@@ -234,57 +237,76 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
nvc0_program_validate_clip(nvc0, gp);
}
/* It's *is* kind of shader related. We need to inspect the program
* to get the output locations right.
*/
void
nvc0_tfb_validate(struct nvc0_context *nvc0)
{
struct nouveau_channel *chan = nvc0->screen->base.channel;
struct nvc0_program *vp;
struct nvc0_transform_feedback_state *tfb = nvc0->tfb;
int b;
struct nvc0_transform_feedback_state *tfb;
unsigned b, n, i;
BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1);
if (!tfb) {
OUT_RING(chan, 0);
return;
if (nvc0->gmtyprog) tfb = nvc0->gmtyprog->tfb;
else
if (nvc0->tevlprog) tfb = nvc0->tevlprog->tfb;
else
tfb = nvc0->vertprog->tfb;
IMMED_RING(chan, RING_3D(TFB_ENABLE), (tfb && nvc0->num_tfbbufs) ? 1 : 0);
if (tfb && tfb != nvc0->state.tfb) {
uint8_t var[128];
for (n = 0, b = 0; b < 4; n += tfb->varying_count[b++]) {
if (tfb->varying_count[b]) {
BEGIN_RING(chan, RING_3D(TFB_STREAM(b)), 3);
OUT_RING (chan, 0);
OUT_RING (chan, tfb->varying_count[b]);
OUT_RING (chan, tfb->stride[b]);
for (i = 0; i < tfb->varying_count[b]; ++i)
var[i] = tfb->varying_index[n + i];
for (; i & 3; ++i)
var[i] = 0; /* zero rest of method word bits */
BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(b, 0)), i / 4);
OUT_RINGp (chan, var, i / 4);
if (nvc0->tfbbuf[b])
nvc0_so_target(nvc0->tfbbuf[b])->stride = tfb->stride[b];
} else {
IMMED_RING(chan, RING_3D(TFB_VARYING_COUNT(b)), 0);
}
}
}
OUT_RING(chan, 1);
nvc0->state.tfb = tfb;
vp = nvc0->vertprog ? nvc0->vertprog : nvc0->gmtyprog;
if (!(nvc0->dirty & NVC0_NEW_TFB_TARGETS))
return;
nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_TFB);
for (b = 0; b < nvc0->num_tfbbufs; ++b) {
uint8_t idx, var[128];
int i, n;
struct nv04_resource *buf = nv04_resource(nvc0->tfbbuf[b]);
struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]);
struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 5);
OUT_RING (chan, 1);
OUT_RESRCh(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR);
OUT_RESRCl(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR);
OUT_RING (chan, buf->base.width0 - nvc0->tfb_offset[b]);
OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID <- offset ? */
if (tfb)
targ->stride = tfb->stride[b];
if (!(nvc0->dirty & NVC0_NEW_TFB))
if (!(nvc0->tfbbuf_dirty & (1 << b)))
continue;
BEGIN_RING(chan, RING_3D(TFB_UNK07X0(b)), 3);
OUT_RING (chan, 0);
OUT_RING (chan, tfb->varying_count[b]);
OUT_RING (chan, tfb->stride[b]);
n = b ? tfb->varying_count[b - 1] : 0;
i = 0;
for (; i < tfb->varying_count[b]; ++i) {
idx = tfb->varying_index[n + i];
var[i] = vp->vp.out_pos[idx >> 2] + (idx & 3);
if (!targ->clean)
nvc0_query_fifo_wait(chan, targ->pq);
BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 5);
OUT_RING (chan, 1);
OUT_RESRCh(chan, buf, targ->pipe.buffer_offset, NOUVEAU_BO_WR);
OUT_RESRCl(chan, buf, targ->pipe.buffer_offset, NOUVEAU_BO_WR);
OUT_RING (chan, targ->pipe.buffer_size);
if (!targ->clean) {
nvc0_query_pushbuf_submit(chan, targ->pq, 0x4);
} else {
OUT_RING(chan, 0); /* TFB_BUFFER_OFFSET */
targ->clean = FALSE;
}
for (; i & 3; ++i)
var[i] = 0;
BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(b, 0)), i / 4);
OUT_RINGp (chan, var, i / 4);
nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_TFB, buf, NOUVEAU_BO_WR);
}
for (; b < 4; ++b)
IMMED_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 0);
+60 -60
View File
@@ -520,7 +520,12 @@ nvc0_sp_state_create(struct pipe_context *pipe,
return NULL;
prog->type = type;
prog->pipe.tokens = tgsi_dup_tokens(cso->tokens);
if (cso->tokens)
prog->pipe.tokens = tgsi_dup_tokens(cso->tokens);
if (cso->stream_output.num_outputs)
prog->pipe.stream_output = cso->stream_output;
return (void *)prog;
}
@@ -747,72 +752,75 @@ nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
nvc0->dirty |= NVC0_NEW_VERTEX;
}
static void *
nvc0_tfb_state_create(struct pipe_context *pipe,
const struct pipe_stream_output_info *pso)
static struct pipe_stream_output_target *
nvc0_so_target_create(struct pipe_context *pipe,
struct pipe_resource *res,
unsigned offset, unsigned size)
{
struct nvc0_transform_feedback_state *so;
int n = 0;
int i, c, b;
so = MALLOC(sizeof(*so) + pso->num_outputs * 4 * sizeof(uint8_t));
if (!so)
struct nvc0_so_target *targ = MALLOC_STRUCT(nvc0_so_target);
if (!targ)
return NULL;
for (b = 0; b < 4; ++b) {
for (i = 0; i < pso->num_outputs; ++i) {
if (pso->output[i].output_buffer != b)
continue;
for (c = 0; c < 4; ++c) {
if (!(pso->output[i].register_mask & (1 << c)))
continue;
so->varying_count[b]++;
so->varying_index[n++] = (pso->output[i].register_index << 2) | c;
}
}
so->stride[b] = so->varying_count[b] * 4;
targ->pq = pipe->create_query(pipe, NVC0_QUERY_TFB_BUFFER_OFFSET);
if (!targ->pq) {
FREE(targ);
return NULL;
}
if (pso->stride)
so->stride[0] = pso->stride;
targ->clean = TRUE;
return so;
targ->pipe.buffer_size = size;
targ->pipe.buffer_offset = offset;
targ->pipe.context = pipe;
targ->pipe.buffer = NULL;
pipe_resource_reference(&targ->pipe.buffer, res);
pipe_reference_init(&targ->pipe.reference, 1);
return &targ->pipe;
}
static void
nvc0_tfb_state_delete(struct pipe_context *pipe, void *hwcso)
nvc0_so_target_destroy(struct pipe_context *pipe,
struct pipe_stream_output_target *ptarg)
{
FREE(hwcso);
struct nvc0_so_target *targ = nvc0_so_target(ptarg);
pipe->destroy_query(pipe, targ->pq);
FREE(targ);
}
static void
nvc0_tfb_state_bind(struct pipe_context *pipe, void *hwcso)
{
nvc0_context(pipe)->tfb = hwcso;
nvc0_context(pipe)->dirty |= NVC0_NEW_TFB;
}
static void
nvc0_set_transform_feedback_buffers(struct pipe_context *pipe,
struct pipe_resource **buffers,
int *offsets,
int num_buffers)
nvc0_set_transform_feedback_targets(struct pipe_context *pipe,
unsigned num_targets,
struct pipe_stream_output_target **targets,
unsigned append_mask)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
int i;
unsigned i;
boolean serialize = TRUE;
assert(num_buffers >= 0 && num_buffers <= 4); /* why signed ? */
assert(num_targets <= 4);
for (i = 0; i < num_buffers; ++i) {
assert(offsets[i] >= 0);
nvc0->tfb_offset[i] = offsets[i];
pipe_resource_reference(&nvc0->tfbbuf[i], buffers[i]);
for (i = 0; i < num_targets; ++i) {
if (nvc0->tfbbuf[i] == targets[i] && (append_mask & (1 << i)))
continue;
nvc0->tfbbuf_dirty |= 1 << i;
if (nvc0->tfbbuf[i] && nvc0->tfbbuf[i] != targets[i])
nvc0_so_target_save_offset(pipe, nvc0->tfbbuf[i], i, &serialize);
if (targets[i] && !(append_mask & (1 << i)))
nvc0_so_target(targets[i])->clean = TRUE;
pipe_so_target_reference(&nvc0->tfbbuf[i], targets[i]);
}
for (; i < nvc0->num_tfbbufs; ++i)
pipe_resource_reference(&nvc0->tfbbuf[i], NULL);
for (; i < nvc0->num_tfbbufs; ++i) {
nvc0->tfbbuf_dirty |= 1 << i;
nvc0_so_target_save_offset(pipe, nvc0->tfbbuf[i], i, &serialize);
pipe_so_target_reference(&nvc0->tfbbuf[i], NULL);
}
nvc0->num_tfbbufs = num_targets;
nvc0->num_tfbbufs = num_buffers;
nvc0->dirty |= NVC0_NEW_TFB_BUFFERS;
if (nvc0->tfbbuf_dirty)
nvc0->dirty |= NVC0_NEW_TFB_TARGETS;
}
void
@@ -871,17 +879,9 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->set_vertex_buffers = nvc0_set_vertex_buffers;
pipe->set_index_buffer = nvc0_set_index_buffer;
#if 0
pipe->create_stream_output_state = nvc0_tfb_state_create;
pipe->delete_stream_output_state = nvc0_tfb_state_delete;
pipe->bind_stream_output_state = nvc0_tfb_state_bind;
pipe->set_stream_output_buffers = nvc0_set_transform_feedback_buffers;
#else
(void)nvc0_tfb_state_create;
(void)nvc0_tfb_state_delete;
(void)nvc0_tfb_state_bind;
(void)nvc0_set_transform_feedback_buffers;
#endif
pipe->create_stream_output_target = nvc0_so_target_create;
pipe->stream_output_target_destroy = nvc0_so_target_destroy;
pipe->set_stream_output_targets = nvc0_set_transform_feedback_targets;
pipe->redefine_user_buffer = u_default_redefine_user_buffer;
}
+14 -2
View File
@@ -428,6 +428,7 @@ nvc0_validate_derived_1(struct nvc0_context *nvc0)
{
struct nouveau_channel *chan = nvc0->screen->base.channel;
boolean early_z;
boolean rasterizer_discard;
early_z = nvc0->fragprog->fp.early_z && !nvc0->zsa->pipe.alpha.enabled;
@@ -435,6 +436,16 @@ nvc0_validate_derived_1(struct nvc0_context *nvc0)
nvc0->state.early_z = early_z;
IMMED_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), early_z);
}
rasterizer_discard = (!nvc0->fragprog || !nvc0->fragprog->hdr[18]) &&
!nvc0->zsa->pipe.depth.enabled && !nvc0->zsa->pipe.stencil[0].enabled;
rasterizer_discard = rasterizer_discard ||
nvc0->rast->pipe.rasterizer_discard;
if (rasterizer_discard != nvc0->state.rasterizer_discard) {
nvc0->state.rasterizer_discard = rasterizer_discard;
IMMED_RING(chan, RING_3D(RASTERIZE_ENABLE), !rasterizer_discard);
}
}
static void
@@ -484,13 +495,14 @@ static struct state_validate {
{ nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
{ nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
{ nvc0_fragprog_validate, NVC0_NEW_FRAGPROG },
{ nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA },
{ nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA |
NVC0_NEW_RASTERIZER },
{ nvc0_validate_clip, NVC0_NEW_CLIP },
{ nvc0_constbufs_validate, NVC0_NEW_CONSTBUF },
{ nvc0_validate_textures, NVC0_NEW_TEXTURES },
{ nvc0_validate_samplers, NVC0_NEW_SAMPLERS },
{ nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
{ nvc0_tfb_validate, NVC0_NEW_TFB | NVC0_NEW_TFB_BUFFERS }
{ nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG }
};
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
+11 -5
View File
@@ -50,11 +50,17 @@ struct nvc0_vertex_stateobj {
struct nvc0_vertex_element element[0];
};
/* will have to lookup index -> location qualifier from nvc0_program */
struct nvc0_transform_feedback_state {
uint32_t stride[4];
uint8_t varying_count[4];
uint8_t varying_index[0];
struct nvc0_so_target {
struct pipe_stream_output_target pipe;
struct pipe_query *pq;
unsigned stride;
boolean clean;
};
static INLINE struct nvc0_so_target *
nvc0_so_target(struct pipe_stream_output_target *ptarg)
{
return (struct nvc0_so_target *)ptarg;
}
#endif
+4 -2
View File
@@ -744,7 +744,8 @@ nvc0_blitctx_prepare_state(struct nvc0_blitctx *blit)
IMMED_RING(chan, RING_3D(STENCIL_ENABLE), 0);
IMMED_RING(chan, RING_3D(ALPHA_TEST_ENABLE), 0);
/* transform feedback ? */
/* disable transform feedback */
IMMED_RING(chan, RING_3D(TFB_ENABLE), 0);
}
static void
@@ -830,7 +831,8 @@ nvc0_blitctx_post_blit(struct nvc0_context *nvc0, struct nvc0_blitctx *blit)
NVC0_NEW_RASTERIZER | NVC0_NEW_ZSA | NVC0_NEW_BLEND |
NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS |
NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG |
NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG);
NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG |
NVC0_NEW_TFB_TARGETS);
}
static void
+35
View File
@@ -569,6 +569,38 @@ nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten,
}
}
static void
nvc0_draw_stream_output(struct nvc0_context *nvc0,
const struct pipe_draw_info *info)
{
struct nouveau_channel *chan = nvc0->screen->base.channel;
struct nvc0_so_target *so = nvc0_so_target(info->count_from_stream_output);
struct nv04_resource *res = nv04_resource(so->pipe.buffer);
unsigned mode = nvc0_prim_gl(info->mode);
unsigned num_instances = info->instance_count;
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
IMMED_RING(chan, RING_3D(SERIALIZE), 0);
nvc0_query_fifo_wait(chan, so->pq);
IMMED_RING(chan, RING_3D(VERTEX_ARRAY_FLUSH), 0);
}
while (num_instances--) {
BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
OUT_RING (chan, mode);
BEGIN_RING(chan, RING_3D(DRAW_TFB_BASE), 1);
OUT_RING (chan, 0);
BEGIN_RING(chan, RING_3D(DRAW_TFB_STRIDE), 1);
OUT_RING (chan, so->stride);
BEGIN_RING(chan, RING_3D(DRAW_TFB_BYTES), 1);
nvc0_query_pushbuf_submit(chan, so->pq, 0x4);
IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0);
mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
}
}
void
nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
@@ -615,6 +647,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nvc0->base.vbo_dirty = FALSE;
}
if (unlikely(info->count_from_stream_output)) {
nvc0_draw_stream_output(nvc0, info);
} else
if (!info->indexed) {
nvc0_draw_arrays(nvc0,
info->mode, info->start, info->count,