nv40: simple swtnl path (half broken, but getting there)

This commit is contained in:
Ben Skeggs
2008-03-13 18:29:56 +11:00
parent 7d2c63e909
commit e1cf3f00e5
12 changed files with 537 additions and 97 deletions
+5 -1
View File
@@ -74,8 +74,12 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id)
nv40_init_state_functions(nv40);
nv40_init_miptree_functions(nv40);
/* Create, configure, and install fallback swtnl path */
nv40->draw = draw_create();
assert(nv40->draw);
draw_wide_point_threshold(nv40->draw, 9999999.0);
draw_wide_line_threshold(nv40->draw, 9999999.0);
draw_enable_line_stipple(nv40->draw, FALSE);
draw_enable_point_sprites(nv40->draw, FALSE);
draw_set_rasterize_stage(nv40->draw, nv40_draw_render_stage(nv40));
return &nv40->pipe;
+22 -3
View File
@@ -116,7 +116,20 @@ struct nv40_context {
/* HW state derived from pipe states */
struct nv40_state state;
unsigned fallback;
struct {
struct nv40_vertex_program *vertprog;
unsigned nr_attribs;
unsigned hw[PIPE_MAX_SHADER_INPUTS];
unsigned draw[PIPE_MAX_SHADER_INPUTS];
unsigned emit[PIPE_MAX_SHADER_INPUTS];
} swtnl;
enum {
HW, SWTNL, SWRAST
} render_mode;
unsigned fallback_swtnl;
unsigned fallback_swrast;
/* Context state */
unsigned dirty;
@@ -166,6 +179,10 @@ extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen);
/* nv40_draw.c */
extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40);
extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe,
struct pipe_buffer *idxbuf,
unsigned ib_size, unsigned mode,
unsigned start, unsigned count);
/* nv40_vertprog.c */
extern void nv40_vertprog_destroy(struct nv40_context *,
@@ -179,8 +196,9 @@ extern void nv40_fragprog_destroy(struct nv40_context *,
extern void nv40_fragtex_bind(struct nv40_context *);
/* nv40_state.c and friends */
extern void nv40_emit_hw_state(struct nv40_context *nv40);
extern void nv40_state_tex_update(struct nv40_context *nv40);
extern boolean nv40_state_validate(struct nv40_context *nv40);
extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40);
extern void nv40_state_emit(struct nv40_context *nv40);
extern struct nv40_state_entry nv40_state_clip;
extern struct nv40_state_entry nv40_state_rasterizer;
extern struct nv40_state_entry nv40_state_scissor;
@@ -194,6 +212,7 @@ extern struct nv40_state_entry nv40_state_viewport;
extern struct nv40_state_entry nv40_state_framebuffer;
extern struct nv40_state_entry nv40_state_fragtex;
extern struct nv40_state_entry nv40_state_vbo;
extern struct nv40_state_entry nv40_state_vtxfmt;
/* nv40_vbo.c */
extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode,
+316 -28
View File
@@ -1,62 +1,350 @@
#include "draw/draw_private.h"
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "draw/draw_context.h"
#include "draw/draw_vertex.h"
#include "draw/draw_private.h"
#include "nv40_context.h"
#define NV40_SHADER_NO_FUCKEDNESS
#include "nv40_shader.h"
struct nv40_draw_stage {
struct draw_stage draw;
/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
* often at all. Uses "quadro style" vertex submission + a fixed vertex
* layout to avoid the need to generate a vertex program or vtxfmt.
*/
struct nv40_render_stage {
struct draw_stage stage;
struct nv40_context *nv40;
unsigned prim;
};
static void
nv40_draw_point(struct draw_stage *draw, struct prim_header *prim)
static INLINE struct nv40_render_stage *
nv40_render_stage(struct draw_stage *stage)
{
NOUVEAU_ERR("\n");
return (struct nv40_render_stage *)stage;
}
static INLINE void
nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v)
{
unsigned i;
for (i = 0; i < nv40->swtnl.nr_attribs; i++) {
unsigned idx = nv40->swtnl.draw[i];
unsigned hw = nv40->swtnl.hw[i];
switch (nv40->swtnl.emit[i]) {
case EMIT_OMIT:
break;
case EMIT_1F:
BEGIN_RING(curie, 0x1e40 + (hw * 4), 1);
OUT_RING (fui(v->data[idx][0]));
break;
case EMIT_2F:
BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2);
OUT_RING (fui(v->data[idx][0]));
OUT_RING (fui(v->data[idx][1]));
break;
case EMIT_3F:
BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3);
OUT_RING (fui(v->data[idx][0]));
OUT_RING (fui(v->data[idx][1]));
OUT_RING (fui(v->data[idx][2]));
break;
case EMIT_4F:
BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4);
OUT_RING (fui(v->data[idx][0]));
OUT_RING (fui(v->data[idx][1]));
OUT_RING (fui(v->data[idx][2]));
OUT_RING (fui(v->data[idx][3]));
break;
case EMIT_4UB:
BEGIN_RING(curie, 0x1940 + (hw * 4), 1);
OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]),
float_to_ubyte(v->data[idx][1]),
float_to_ubyte(v->data[idx][2]),
float_to_ubyte(v->data[idx][3])));
break;
default:
assert(0);
break;
}
}
}
static INLINE void
nv40_render_prim(struct draw_stage *stage, struct prim_header *prim,
unsigned mode, unsigned count)
{
struct nv40_render_stage *rs = nv40_render_stage(stage);
struct nv40_context *nv40 = rs->nv40;
struct nouveau_pushbuf *pb = nv40->nvws->channel->pushbuf;
unsigned i;
/* Ensure there's room for 4xfloat32 + potentially 3 begin/end */
if (pb->remaining < ((count * 20) + 6)) {
if (rs->prim != NV40TCL_BEGIN_END_STOP) {
NOUVEAU_ERR("AIII, missed flush\n");
assert(0);
}
FIRE_RING();
nv40_state_emit(nv40);
}
/* Switch primitive modes if necessary */
if (rs->prim != mode) {
if (rs->prim != NV40TCL_BEGIN_END_STOP) {
BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
OUT_RING (NV40TCL_BEGIN_END_STOP);
}
BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
OUT_RING (mode);
rs->prim = mode;
}
/* Emit vertex data */
for (i = 0; i < count; i++)
nv40_render_vertex(nv40, prim->v[i]);
/* If it's likely we'll need to empty the push buffer soon, finish
* off the primitive now.
*/
if (pb->remaining < ((count * 20) + 6)) {
BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
OUT_RING (NV40TCL_BEGIN_END_STOP);
rs->prim = NV40TCL_BEGIN_END_STOP;
}
}
static void
nv40_draw_line(struct draw_stage *draw, struct prim_header *prim)
nv40_render_point(struct draw_stage *draw, struct prim_header *prim)
{
NOUVEAU_ERR("\n");
nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_POINTS, 1);
}
static void
nv40_draw_tri(struct draw_stage *draw, struct prim_header *prim)
nv40_render_line(struct draw_stage *draw, struct prim_header *prim)
{
NOUVEAU_ERR("\n");
nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_LINES, 2);
}
static void
nv40_draw_flush(struct draw_stage *draw, unsigned flags)
nv40_render_tri(struct draw_stage *draw, struct prim_header *prim)
{
nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_TRIANGLES, 3);
}
static void
nv40_render_flush(struct draw_stage *draw, unsigned flags)
{
struct nv40_render_stage *rs = nv40_render_stage(draw);
struct nv40_context *nv40 = rs->nv40;
if (rs->prim != NV40TCL_BEGIN_END_STOP) {
BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
OUT_RING (NV40TCL_BEGIN_END_STOP);
rs->prim = NV40TCL_BEGIN_END_STOP;
}
}
static void
nv40_render_reset_stipple_counter(struct draw_stage *draw)
{
}
static void
nv40_draw_reset_stipple_counter(struct draw_stage *draw)
{
NOUVEAU_ERR("\n");
}
static void
nv40_draw_destroy(struct draw_stage *draw)
nv40_render_destroy(struct draw_stage *draw)
{
free(draw);
}
static INLINE void
emit_mov(struct nv40_vertex_program *vp,
unsigned dst, unsigned src, unsigned vor, unsigned mask)
{
struct nv40_vertex_program_exec *inst;
vp->insns = realloc(vp->insns,
sizeof(struct nv40_vertex_program_exec) *
++vp->nr_insns);
inst = &vp->insns[vp->nr_insns - 1];
inst->data[0] = 0x401f9c6c;
inst->data[1] = 0x0040000d | (src << 8);
inst->data[2] = 0x8106c083;
inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13);
inst->const_index = -1;
inst->has_branch_offset = FALSE;
vp->ir |= (1 << src);
if (vor != ~0)
vp->or |= (1 << vor);
}
static struct nv40_vertex_program *
create_drawvp(struct nv40_context *nv40)
{
struct nv40_vertex_program *vp = CALLOC_STRUCT(nv40_vertex_program);
unsigned i;
emit_mov(vp, NV40_VP_INST_DEST_POS, 0, ~0, 0xf);
emit_mov(vp, NV40_VP_INST_DEST_COL0, 3, 0, 0xf);
emit_mov(vp, NV40_VP_INST_DEST_COL1, 4, 1, 0xf);
emit_mov(vp, NV40_VP_INST_DEST_BFC0, 3, 2, 0xf);
emit_mov(vp, NV40_VP_INST_DEST_BFC1, 4, 3, 0xf);
emit_mov(vp, NV40_VP_INST_DEST_FOGC, 5, 4, 0x8);
for (i = 0; i < 8; i++)
emit_mov(vp, NV40_VP_INST_DEST_TC(i), 8 + i, 14 + i, 0xf);
vp->insns[vp->nr_insns - 1].data[3] |= 1;
vp->translated = TRUE;
return vp;
}
struct draw_stage *
nv40_draw_render_stage(struct nv40_context *nv40)
{
struct nv40_draw_stage *nv40draw = CALLOC_STRUCT(nv40_draw_stage);
struct nv40_render_stage *render = CALLOC_STRUCT(nv40_render_stage);
nv40draw->nv40 = nv40;
nv40draw->draw.draw = nv40->draw;
nv40draw->draw.point = nv40_draw_point;
nv40draw->draw.line = nv40_draw_line;
nv40draw->draw.tri = nv40_draw_tri;
nv40draw->draw.flush = nv40_draw_flush;
nv40draw->draw.reset_stipple_counter = nv40_draw_reset_stipple_counter;
nv40draw->draw.destroy = nv40_draw_destroy;
if (!nv40->swtnl.vertprog)
nv40->swtnl.vertprog = create_drawvp(nv40);
return &nv40draw->draw;
render->nv40 = nv40;
render->stage.draw = nv40->draw;
render->stage.point = nv40_render_point;
render->stage.line = nv40_render_line;
render->stage.tri = nv40_render_tri;
render->stage.flush = nv40_render_flush;
render->stage.reset_stipple_counter = nv40_render_reset_stipple_counter;
render->stage.destroy = nv40_render_destroy;
return &render->stage;
}
boolean
nv40_draw_elements_swtnl(struct pipe_context *pipe,
struct pipe_buffer *idxbuf, unsigned idxbuf_size,
unsigned mode, unsigned start, unsigned count)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct pipe_winsys *ws = pipe->winsys;
unsigned i;
void *map;
if (!nv40_state_validate_swtnl(nv40))
return FALSE;
nv40_state_emit(nv40);
for (i = 0; i < PIPE_ATTRIB_MAX; i++) {
if (!nv40->vtxbuf[i].buffer)
continue;
map = ws->buffer_map(ws, nv40->vtxbuf[i].buffer,
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_vertex_buffer(nv40->draw, i, map);
}
if (idxbuf) {
map = ws->buffer_map(ws, idxbuf, PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map);
} else {
draw_set_mapped_element_buffer(nv40->draw, 0, NULL);
}
if (nv40->constbuf[PIPE_SHADER_VERTEX]) {
map = ws->buffer_map(ws, nv40->constbuf[PIPE_SHADER_VERTEX],
PIPE_BUFFER_USAGE_CPU_READ);
draw_set_mapped_constant_buffer(nv40->draw, map);
}
draw_arrays(nv40->draw, mode, start, count);
for (i = 0; i < PIPE_ATTRIB_MAX; i++) {
if (!nv40->vtxbuf[i].buffer)
continue;
ws->buffer_unmap(ws, nv40->vtxbuf[i].buffer);
}
if (idxbuf)
ws->buffer_unmap(ws, idxbuf);
if (nv40->constbuf[PIPE_SHADER_VERTEX])
ws->buffer_unmap(ws, nv40->constbuf[PIPE_SHADER_VERTEX]);
draw_flush(nv40->draw);
pipe->flush(pipe, 0);
return TRUE;
}
static INLINE void
emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit,
unsigned semantic, unsigned index)
{
unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index);
unsigned a = nv40->swtnl.nr_attribs++;
nv40->swtnl.hw[a] = hw;
nv40->swtnl.emit[a] = emit;
nv40->swtnl.draw[a] = draw_out;
}
static boolean
nv40_state_vtxfmt_validate(struct nv40_context *nv40)
{
struct nv40_fragment_program *fp = nv40->fragprog;
unsigned colour = 0, texcoords = 0, fog = 0, i;
/* Determine needed fragprog inputs */
for (i = 0; i < fp->info.num_inputs; i++) {
switch (fp->info.input_semantic_name[i]) {
case TGSI_SEMANTIC_POSITION:
break;
case TGSI_SEMANTIC_COLOR:
colour |= (1 << fp->info.input_semantic_index[i]);
break;
case TGSI_SEMANTIC_GENERIC:
texcoords |= (1 << fp->info.input_semantic_index[i]);
break;
case TGSI_SEMANTIC_FOG:
fog = 1;
break;
default:
assert(0);
}
}
nv40->swtnl.nr_attribs = 0;
/* Map draw vtxprog output to hw attribute IDs */
for (i = 0; i < 2; i++) {
if (!(colour & (1 << i)))
continue;
emit_attrib(nv40, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i);
}
for (i = 0; i < 8; i++) {
if (!(texcoords & (1 << i)))
continue;
emit_attrib(nv40, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i);
}
if (fog) {
emit_attrib(nv40, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0);
}
emit_attrib(nv40, 0, EMIT_4F, TGSI_SEMANTIC_POSITION, 0);
return FALSE;
}
struct nv40_state_entry nv40_state_vtxfmt = {
.validate = nv40_state_vtxfmt_validate,
.dirty = {
.pipe = NV40_NEW_ARRAYS | NV40_NEW_FRAGPROG,
.hw = 0
}
};
+2 -1
View File
@@ -797,9 +797,10 @@ nv40_fragprog_validate(struct nv40_context *nv40)
if (fp->translated)
goto update_constants;
nv40->fallback_swrast &= ~NV40_NEW_FRAGPROG;
nv40_fragprog_translate(nv40, fp);
if (!fp->translated) {
nv40->fallback |= NV40_FALLBACK_RAST;
nv40->fallback_swrast |= NV40_NEW_FRAGPROG;
return FALSE;
}
+2
View File
@@ -476,6 +476,7 @@
# define NV40_FP_SWIZZLE_W 3
#define NV40_FP_REG_NEGATE (1 << 17)
#ifndef NV40_SHADER_NO_FUCKEDNESS
#define NV40SR_NONE 0
#define NV40SR_OUTPUT 1
#define NV40SR_INPUT 2
@@ -550,5 +551,6 @@ nv40_sr_scale(struct nv40_sreg src, int scale)
src.dst_scale = scale;
return src;
}
#endif
#endif
+24 -3
View File
@@ -3,6 +3,8 @@
#include "pipe/p_util.h"
#include "pipe/p_inlines.h"
#include "draw/draw_context.h"
#include "nv40_context.h"
#include "nv40_state.h"
@@ -345,7 +347,7 @@ nv40_rasterizer_state_create(struct pipe_context *pipe,
so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK);
break;
default:
so_data(so, 0);
so_data(so, NV40TCL_CULL_FACE_BACK);
break;
}
so_data(so, NV40TCL_FRONT_FACE_CCW);
@@ -363,13 +365,13 @@ nv40_rasterizer_state_create(struct pipe_context *pipe,
so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK);
break;
default:
so_data(so, 0);
so_data(so, NV40TCL_CULL_FACE_BACK);
break;
}
so_data(so, NV40TCL_FRONT_FACE_CW);
}
so_data(so, cso->poly_smooth ? 1 : 0);
so_data(so, cso->cull_mode != PIPE_WINDING_NONE ? 1 : 0);
so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0);
so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, cso->poly_stipple_enable ? 1 : 0);
@@ -419,6 +421,9 @@ static void
nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_rasterizer_state *rsso = hwcso;
draw_set_rasterizer_state(nv40->draw, &rsso->pipe);
nv40->rasterizer = hwcso;
nv40->dirty |= NV40_NEW_RAST;
@@ -508,10 +513,12 @@ static void *
nv40_vp_state_create(struct pipe_context *pipe,
const struct pipe_shader_state *cso)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_vertex_program *vp;
vp = CALLOC(1, sizeof(struct nv40_vertex_program));
vp->pipe = *cso;
vp->draw = draw_create_vertex_shader(nv40->draw, &vp->pipe);
return (void *)vp;
}
@@ -520,6 +527,9 @@ static void
nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_vertex_program *vp = hwcso;
draw_bind_vertex_shader(nv40->draw, vp ? vp->draw : NULL);
nv40->vertprog = hwcso;
nv40->dirty |= NV40_NEW_VERTPROG;
@@ -531,6 +541,7 @@ nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso)
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_vertex_program *vp = hwcso;
draw_delete_vertex_shader(nv40->draw, vp->draw);
nv40_vertprog_destroy(nv40, vp);
FREE(vp);
}
@@ -544,6 +555,8 @@ nv40_fp_state_create(struct pipe_context *pipe,
fp = CALLOC(1, sizeof(struct nv40_fragment_program));
fp->pipe = *cso;
tgsi_scan_shader(fp->pipe.tokens, &fp->info);
return (void *)fp;
}
@@ -582,6 +595,8 @@ nv40_set_clip_state(struct pipe_context *pipe,
{
struct nv40_context *nv40 = nv40_context(pipe);
draw_set_clip_state(nv40->draw, clip);
nv40->clip = *clip;
nv40->dirty |= NV40_NEW_UCP;
}
@@ -638,6 +653,8 @@ nv40_set_viewport_state(struct pipe_context *pipe,
{
struct nv40_context *nv40 = nv40_context(pipe);
draw_set_viewport_state(nv40->draw, vpt);
nv40->viewport = *vpt;
nv40->dirty |= NV40_NEW_VIEWPORT;
}
@@ -648,6 +665,8 @@ nv40_set_vertex_buffer(struct pipe_context *pipe, unsigned index,
{
struct nv40_context *nv40 = nv40_context(pipe);
draw_set_vertex_buffer(nv40->draw, index, vb);
nv40->vtxbuf[index] = *vb;
nv40->dirty |= NV40_NEW_ARRAYS;
}
@@ -658,6 +677,8 @@ nv40_set_vertex_element(struct pipe_context *pipe, unsigned index,
{
struct nv40_context *nv40 = nv40_context(pipe);
draw_set_vertex_element(nv40->draw, index, ve);
nv40->vtxelt[index] = *ve;
nv40->dirty |= NV40_NEW_ARRAYS;
}
+4
View File
@@ -2,6 +2,7 @@
#define __NV40_STATE_H__
#include "pipe/p_state.h"
#include "tgsi/util/tgsi_scan.h"
struct nv40_sampler_state {
uint32_t fmt;
@@ -25,6 +26,8 @@ struct nv40_vertex_program_data {
struct nv40_vertex_program {
struct pipe_shader_state pipe;
struct draw_vertex_shader *draw;
boolean translated;
struct nv40_vertex_program_exec *insns;
unsigned nr_insns;
@@ -49,6 +52,7 @@ struct nv40_fragment_program_data {
struct nv40_fragment_program {
struct pipe_shader_state pipe;
struct tgsi_shader_info info;
boolean translated;
unsigned samplers;
+6 -2
View File
@@ -3,8 +3,12 @@
static boolean
nv40_state_clip_validate(struct nv40_context *nv40)
{
if (nv40->clip.nr)
nv40->fallback |= NV40_FALLBACK_TNL;
if (nv40->render_mode == HW) {
nv40->fallback_swtnl &= ~NV40_NEW_UCP;
if (nv40->clip.nr)
nv40->fallback_swtnl |= NV40_NEW_UCP;
}
return FALSE;
}
+91 -41
View File
@@ -1,5 +1,6 @@
#include "nv40_context.h"
#include "nv40_state.h"
#include "draw/draw_context.h"
static struct nv40_state_entry *render_states[] = {
&nv40_state_framebuffer,
@@ -18,15 +19,27 @@ static struct nv40_state_entry *render_states[] = {
NULL
};
static struct nv40_state_entry *swtnl_states[] = {
&nv40_state_framebuffer,
&nv40_state_rasterizer,
&nv40_state_clip,
&nv40_state_scissor,
&nv40_state_stipple,
&nv40_state_fragprog,
&nv40_state_fragtex,
&nv40_state_vertprog,
&nv40_state_blend,
&nv40_state_blend_colour,
&nv40_state_zsa,
&nv40_state_viewport,
&nv40_state_vtxfmt,
NULL
};
static void
nv40_state_validate(struct nv40_context *nv40)
nv40_state_do_validate(struct nv40_context *nv40,
struct nv40_state_entry **states)
{
struct nv40_state_entry **states = render_states;
unsigned last_fallback;
last_fallback = nv40->fallback;
nv40->fallback = 0;
while (*states) {
struct nv40_state_entry *e = *states;
@@ -38,32 +51,15 @@ nv40_state_validate(struct nv40_context *nv40)
states++;
}
nv40->dirty = 0;
if (nv40->fallback & NV40_FALLBACK_TNL &&
!(last_fallback & NV40_FALLBACK_TNL)) {
NOUVEAU_ERR("XXX: hwtnl->swtnl\n");
} else
if (last_fallback & NV40_FALLBACK_TNL &&
!(nv40->fallback & NV40_FALLBACK_TNL)) {
NOUVEAU_ERR("XXX: swtnl->hwtnl\n");
}
if (nv40->fallback & NV40_FALLBACK_RAST &&
!(last_fallback & NV40_FALLBACK_RAST)) {
NOUVEAU_ERR("XXX: hwrast->swrast\n");
} else
if (last_fallback & NV40_FALLBACK_RAST &&
!(nv40->fallback & NV40_FALLBACK_RAST)) {
NOUVEAU_ERR("XXX: swrast->hwrast\n");
}
}
static void
void
nv40_state_emit(struct nv40_context *nv40)
{
struct nv40_state *state = &nv40->state;
struct nv40_screen *screen = nv40->screen;
unsigned i, samplers;
uint64 states;
if (nv40->pctx_id != screen->cur_pctx) {
for (i = 0; i < NV40_STATE_MAX; i++) {
@@ -74,14 +70,24 @@ nv40_state_emit(struct nv40_context *nv40)
screen->cur_pctx = nv40->pctx_id;
}
while (state->dirty) {
unsigned idx = ffsll(state->dirty) - 1;
so_ref (state->hw[idx], &nv40->screen->state[idx]);
so_emit(nv40->nvws, nv40->screen->state[idx]);
state->dirty &= ~(1ULL << idx);
for (i = 0, states = state->dirty; states; i++) {
if (!(states & (1ULL << i)))
continue;
so_ref (state->hw[i], &nv40->screen->state[i]);
so_emit(nv40->nvws, nv40->screen->state[i]);
states &= ~(1ULL << i);
}
if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) |
(1ULL << NV40_STATE_FRAGTEX0))) {
BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
OUT_RING (2);
BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
OUT_RING (1);
}
state->dirty = 0;
so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FB]);
for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
if (!(samplers & (1 << i)))
@@ -91,18 +97,62 @@ nv40_state_emit(struct nv40_context *nv40)
samplers &= ~(1ULL << i);
}
so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FRAGPROG]);
so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_VTXBUF]);
if (state->hw[NV40_STATE_VTXBUF] && nv40->render_mode == HW)
so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_VTXBUF]);
}
void
nv40_emit_hw_state(struct nv40_context *nv40)
boolean
nv40_state_validate(struct nv40_context *nv40)
{
nv40_state_validate(nv40);
nv40_state_emit(nv40);
boolean was_sw = nv40->fallback_swtnl ? TRUE : FALSE;
BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
OUT_RING (2);
BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
OUT_RING (1);
if (nv40->render_mode != HW) {
/* Don't even bother trying to go back to hw if none
* of the states that caused swtnl previously have changed.
*/
if ((nv40->fallback_swtnl & nv40->dirty)
!= nv40->fallback_swtnl)
return FALSE;
/* Attempt to go to hwtnl again */
nv40->pipe.flush(&nv40->pipe, 0);
nv40->dirty |= (NV40_NEW_VIEWPORT |
NV40_NEW_VERTPROG |
NV40_NEW_ARRAYS |
NV40_NEW_UCP);
nv40->render_mode = HW;
}
nv40_state_do_validate(nv40, render_states);
if (nv40->fallback_swtnl || nv40->fallback_swrast)
return FALSE;
if (was_sw)
NOUVEAU_ERR("swtnl->hw\n");
return TRUE;
}
boolean
nv40_state_validate_swtnl(struct nv40_context *nv40)
{
/* Setup for swtnl */
if (nv40->render_mode == HW) {
NOUVEAU_ERR("hw->swtnl 0x%08x\n", nv40->fallback_swtnl);
nv40->pipe.flush(&nv40->pipe, 0);
nv40->dirty |= (NV40_NEW_VIEWPORT |
NV40_NEW_VERTPROG |
NV40_NEW_ARRAYS |
NV40_NEW_UCP);
nv40->render_mode = SWTNL;
}
nv40_state_do_validate(nv40, swtnl_states);
if (nv40->fallback_swrast) {
NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nv40->fallback_swrast);
return FALSE;
}
return TRUE;
}
+35 -10
View File
@@ -3,18 +3,43 @@
static boolean
nv40_state_viewport_validate(struct nv40_context *nv40)
{
struct nouveau_stateobj *so = so_new(9, 0);
struct nouveau_stateobj *so = so_new(11, 0);
struct pipe_viewport_state *vpt = &nv40->viewport;
so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8);
so_data (so, fui(vpt->translate[0]));
so_data (so, fui(vpt->translate[1]));
so_data (so, fui(vpt->translate[2]));
so_data (so, fui(vpt->translate[3]));
so_data (so, fui(vpt->scale[0]));
so_data (so, fui(vpt->scale[1]));
so_data (so, fui(vpt->scale[2]));
so_data (so, fui(vpt->scale[3]));
if (nv40->render_mode == HW) {
so_method(so, nv40->screen->curie,
NV40TCL_VIEWPORT_TRANSLATE_X, 8);
so_data (so, fui(vpt->translate[0]));
so_data (so, fui(vpt->translate[1]));
so_data (so, fui(vpt->translate[2]));
so_data (so, fui(vpt->translate[3]));
so_data (so, fui(vpt->scale[0]));
so_data (so, fui(vpt->scale[1]));
so_data (so, fui(vpt->scale[2]));
so_data (so, fui(vpt->scale[3]));
so_method(so, nv40->screen->curie, 0x1d78, 1);
so_data (so, 1);
} else {
so_method(so, nv40->screen->curie,
NV40TCL_VIEWPORT_TRANSLATE_X, 8);
so_data (so, fui(0.0));
so_data (so, fui(0.0));
so_data (so, fui(0.0));
so_data (so, fui(0.0));
so_data (so, fui(1.0));
so_data (so, fui(1.0));
so_data (so, fui(1.0));
so_data (so, fui(0.0));
/* Not entirely certain what this is yet. The DDX uses this
* value also as it fixes rendering when you pass
* pre-transformed vertices to the GPU. My best gusss is that
* this bypasses some culling/clipping stage. Might be worth
* noting that points/lines are uneffected by whatever this
* value fixes, only filled polygons are effected.
*/
so_method(so, nv40->screen->curie, 0x1d78, 1);
so_data (so, 0x110);
}
so_ref(so, &nv40->state.hw[NV40_STATE_VIEWPORT]);
return TRUE;
+18 -4
View File
@@ -8,6 +8,8 @@
#include "nouveau/nouveau_channel.h"
#include "nouveau/nouveau_pushbuf.h"
#define FORCE_SWTNL 0
static INLINE int
nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
{
@@ -165,7 +167,11 @@ nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
unsigned nr;
nv40_vbo_set_idxbuf(nv40, NULL, 0);
nv40_emit_hw_state(nv40);
if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
return nv40_draw_elements_swtnl(pipe, NULL, 0,
mode, start, count);
}
nv40_state_emit(nv40);
BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
OUT_RING (nvgl_primitive(mode));
@@ -274,7 +280,7 @@ nv40_draw_elements_inline(struct pipe_context *pipe,
struct pipe_winsys *ws = pipe->winsys;
void *map;
nv40_emit_hw_state(nv40);
nv40_state_emit(nv40);
map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ);
if (!ib) {
@@ -315,7 +321,7 @@ nv40_draw_elements_vbo(struct pipe_context *pipe,
struct nv40_context *nv40 = nv40_context(pipe);
unsigned nr;
nv40_emit_hw_state(nv40);
nv40_state_emit(nv40);
BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
OUT_RING (nvgl_primitive(mode));
@@ -352,8 +358,16 @@ nv40_draw_elements(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
struct nv40_context *nv40 = nv40_context(pipe);
boolean idxbuf;
if (nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize)) {
idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize);
if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
return nv40_draw_elements_swtnl(pipe, NULL, 0,
mode, start, count);
}
nv40_state_emit(nv40);
if (idxbuf) {
nv40_draw_elements_vbo(pipe, mode, start, count);
} else {
nv40_draw_elements_inline(pipe, indexBuffer, indexSize,
+12 -4
View File
@@ -634,21 +634,29 @@ out_err:
static boolean
nv40_vertprog_validate(struct nv40_context *nv40)
{
struct nv40_vertex_program *vp = nv40->vertprog;
struct pipe_buffer *constbuf =
nv40->constbuf[PIPE_SHADER_VERTEX];
struct nouveau_winsys *nvws = nv40->nvws;
struct pipe_winsys *ws = nv40->pipe.winsys;
struct nv40_vertex_program *vp;
struct pipe_buffer *constbuf;
boolean upload_code = FALSE, upload_data = FALSE;
int i;
if (nv40->render_mode == HW) {
vp = nv40->vertprog;
constbuf = nv40->constbuf[PIPE_SHADER_VERTEX];
} else {
vp = nv40->swtnl.vertprog;
constbuf = NULL;
}
/* Translate TGSI shader into hw bytecode */
if (vp->translated)
goto check_gpu_resources;
nv40->fallback_swtnl &= ~NV40_NEW_VERTPROG;
nv40_vertprog_translate(nv40, vp);
if (!vp->translated) {
nv40->fallback |= NV40_FALLBACK_TNL;
nv40->fallback_swtnl |= NV40_NEW_VERTPROG;
return FALSE;
}