nouveau: avoid relocations where possible.
Potential relocations are emitted as NOPs where they're needed. In the event a buffer moves, the pushbuf code will emit the relevant state changes into the NOPs. Just a start, more work is needed to get this looking how I want it to.
This commit is contained in:
@@ -61,9 +61,8 @@
|
||||
} while(0)
|
||||
|
||||
#define OUT_RELOC(buf,data,flags,vor,tor) do { \
|
||||
nouveau_pipe_emit_reloc(nv->channel, nv->channel->pushbuf->cur, \
|
||||
nouveau_pipe_emit_reloc(nv->channel, nv->channel->pushbuf->cur++, \
|
||||
buf, (data), (flags), (vor), (tor)); \
|
||||
OUT_RING(0); \
|
||||
} while(0)
|
||||
|
||||
/* Raw data + flags depending on FB/TT buffer */
|
||||
|
||||
@@ -96,6 +96,31 @@ nouveau_pushbuf_init(struct nouveau_channel *chan)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
nouveau_pushbuf_calc_reloc(struct nouveau_bo *bo,
|
||||
struct nouveau_pushbuf_reloc *r)
|
||||
{
|
||||
uint32_t push;
|
||||
|
||||
if (r->flags & NOUVEAU_BO_LOW) {
|
||||
push = bo->offset + r->data;
|
||||
} else
|
||||
if (r->flags & NOUVEAU_BO_HIGH) {
|
||||
push = (bo->offset + r->data) >> 32;
|
||||
} else {
|
||||
push = r->data;
|
||||
}
|
||||
|
||||
if (r->flags & NOUVEAU_BO_OR) {
|
||||
if (bo->flags & NOUVEAU_BO_VRAM)
|
||||
push |= r->vor;
|
||||
else
|
||||
push |= r->tor;
|
||||
}
|
||||
|
||||
return push;
|
||||
}
|
||||
|
||||
/* This would be our TTM "superioctl" */
|
||||
int
|
||||
nouveau_pushbuf_flush(struct nouveau_channel *chan, unsigned min)
|
||||
@@ -133,34 +158,20 @@ nouveau_pushbuf_flush(struct nouveau_channel *chan, unsigned min)
|
||||
|
||||
if (bo->offset == nouveau_bo(bo)->offset &&
|
||||
bo->flags == nouveau_bo(bo)->flags) {
|
||||
/*XXX: could avoid reloc in this case, except with the
|
||||
* current design we'd confuse the GPU quite a bit
|
||||
* if we did this. Will fix soon.
|
||||
*/
|
||||
while ((r = ptr_to_pbrel(pbbo->relocs))) {
|
||||
pbbo->relocs = r->next;
|
||||
free(r);
|
||||
}
|
||||
|
||||
nvpb->buffers = pbbo->next;
|
||||
free(pbbo);
|
||||
continue;
|
||||
}
|
||||
bo->offset = nouveau_bo(bo)->offset;
|
||||
bo->flags = nouveau_bo(bo)->flags;
|
||||
|
||||
while ((r = ptr_to_pbrel(pbbo->relocs))) {
|
||||
uint32_t push;
|
||||
|
||||
if (r->flags & NOUVEAU_BO_LOW) {
|
||||
push = bo->offset + r->data;
|
||||
} else
|
||||
if (r->flags & NOUVEAU_BO_HIGH) {
|
||||
push = (bo->offset + r->data) >> 32;
|
||||
} else {
|
||||
push = r->data;
|
||||
}
|
||||
|
||||
if (r->flags & NOUVEAU_BO_OR) {
|
||||
if (bo->flags & NOUVEAU_BO_VRAM)
|
||||
push |= r->vor;
|
||||
else
|
||||
push |= r->tor;
|
||||
}
|
||||
|
||||
*r->ptr = push;
|
||||
*r->ptr = nouveau_pushbuf_calc_reloc(bo, r);
|
||||
pbbo->relocs = r->next;
|
||||
free(r);
|
||||
}
|
||||
@@ -241,6 +252,10 @@ nouveau_pushbuf_emit_reloc(struct nouveau_channel *chan, void *ptr,
|
||||
r->vor = vor;
|
||||
r->tor = tor;
|
||||
|
||||
if (flags & NOUVEAU_BO_DUMMY)
|
||||
*(uint32_t *)ptr = 0;
|
||||
else
|
||||
*(uint32_t *)ptr = nouveau_pushbuf_calc_reloc(bo, r);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@
|
||||
#define NOUVEAU_BO_HIGH (1 << 7)
|
||||
#define NOUVEAU_BO_OR (1 << 8)
|
||||
#define NOUVEAU_BO_LOCAL (1 << 9)
|
||||
#define NOUVEAU_BO_DUMMY (1 << 31)
|
||||
|
||||
struct nouveau_bo {
|
||||
struct nouveau_device *device;
|
||||
|
||||
@@ -44,9 +44,8 @@
|
||||
#define OUT_RELOC(bo,data,flags,vor,tor) do { \
|
||||
NOUVEAU_PUSH_CONTEXT(pc); \
|
||||
pc->nvws->push_reloc(pc->nvws->channel, \
|
||||
pc->nvws->channel->pushbuf->cur, \
|
||||
pc->nvws->channel->pushbuf->cur++, \
|
||||
(bo), (data), (flags), (vor), (tor)); \
|
||||
OUT_RING(0); \
|
||||
} while(0)
|
||||
|
||||
/* Raw data + flags depending on FB/TT buffer */
|
||||
@@ -71,4 +70,14 @@
|
||||
OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \
|
||||
} while(0)
|
||||
|
||||
/* A reloc which'll recombine into a NV_DMA_METHOD packet header */
|
||||
#define OUT_RELOCm(bo, flags, obj, mthd, size) do { \
|
||||
NOUVEAU_PUSH_CONTEXT(pc); \
|
||||
if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \
|
||||
pc->nvws->push_flush(pc->nvws->channel, ((size) + 1)); \
|
||||
OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd), \
|
||||
(flags), 0, 0); \
|
||||
pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \
|
||||
} while(0)
|
||||
|
||||
#endif
|
||||
|
||||
@@ -815,6 +815,11 @@ nv40_fragprog_bind(struct nv40_context *nv40, struct nv40_fragment_program *fp)
|
||||
fp->on_hw = TRUE;
|
||||
}
|
||||
|
||||
BEGIN_RING(curie, NV40TCL_FP_ADDRESS, 1);
|
||||
OUT_RELOC (fp->buffer, 0, NOUVEAU_BO_VRAM |
|
||||
NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
|
||||
NOUVEAU_BO_OR, NV40TCL_FP_ADDRESS_DMA0,
|
||||
NV40TCL_FP_ADDRESS_DMA1);
|
||||
BEGIN_RING(curie, NV40TCL_FP_CONTROL, 1);
|
||||
OUT_RING (fp->fp_control);
|
||||
|
||||
|
||||
@@ -104,7 +104,13 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit)
|
||||
nv40->tex[unit].buffer = nv40mt->buffer;
|
||||
nv40->tex[unit].format = txf;
|
||||
|
||||
BEGIN_RING(curie, NV40TCL_TEX_WRAP(unit), 6);
|
||||
BEGIN_RING(curie, NV40TCL_TEX_OFFSET(unit), 8);
|
||||
OUT_RELOCl(nv40->tex[unit].buffer, 0, NOUVEAU_BO_VRAM |
|
||||
NOUVEAU_BO_GART | NOUVEAU_BO_RD);
|
||||
OUT_RELOCd(nv40->tex[unit].buffer, nv40->tex[unit].format,
|
||||
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
|
||||
NOUVEAU_BO_OR, NV40TCL_TEX_FORMAT_DMA0,
|
||||
NV40TCL_TEX_FORMAT_DMA1);
|
||||
OUT_RING (ps->wrap);
|
||||
OUT_RING (NV40TCL_TEX_ENABLE_ENABLE | ps->en |
|
||||
(0x00078000) /* mipmap related? */);
|
||||
|
||||
@@ -603,33 +603,51 @@ nv40_set_framebuffer_state(struct pipe_context *pipe,
|
||||
}
|
||||
|
||||
if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) {
|
||||
BEGIN_RING(curie, NV40TCL_COLOR0_PITCH, 1);
|
||||
OUT_RING (rt[0]->pitch * rt[0]->cpp);
|
||||
nv40->rt[0] = rt[0]->buffer;
|
||||
BEGIN_RING(curie, NV40TCL_DMA_COLOR0, 1);
|
||||
OUT_RELOCo(nv40->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
BEGIN_RING(curie, NV40TCL_COLOR0_PITCH, 2);
|
||||
OUT_RING (rt[0]->pitch * rt[0]->cpp);
|
||||
OUT_RELOCl(nv40->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
}
|
||||
|
||||
if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) {
|
||||
BEGIN_RING(curie, NV40TCL_COLOR1_PITCH, 2);
|
||||
OUT_RING (rt[1]->pitch * rt[1]->cpp);
|
||||
nv40->rt[1] = rt[1]->buffer;
|
||||
BEGIN_RING(curie, NV40TCL_DMA_COLOR1, 1);
|
||||
OUT_RELOCo(nv40->rt[1], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
BEGIN_RING(curie, NV40TCL_COLOR1_OFFSET, 2);
|
||||
OUT_RELOCl(nv40->rt[1], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
OUT_RING (rt[1]->pitch * rt[1]->cpp);
|
||||
}
|
||||
|
||||
if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) {
|
||||
nv40->rt[2] = rt[2]->buffer;
|
||||
BEGIN_RING(curie, NV40TCL_DMA_COLOR2, 1);
|
||||
OUT_RELOCo(nv40->rt[2], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
BEGIN_RING(curie, NV40TCL_COLOR2_OFFSET, 1);
|
||||
OUT_RELOCl(nv40->rt[2], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
BEGIN_RING(curie, NV40TCL_COLOR2_PITCH, 1);
|
||||
OUT_RING (rt[2]->pitch * rt[2]->cpp);
|
||||
nv40->rt[2] = rt[2]->buffer;
|
||||
}
|
||||
|
||||
if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) {
|
||||
nv40->rt[3] = rt[3]->buffer;
|
||||
BEGIN_RING(curie, NV40TCL_DMA_COLOR3, 1);
|
||||
OUT_RELOCo(nv40->rt[3], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
BEGIN_RING(curie, NV40TCL_COLOR3_OFFSET, 1);
|
||||
OUT_RELOCl(nv40->rt[3], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
BEGIN_RING(curie, NV40TCL_COLOR3_PITCH, 1);
|
||||
OUT_RING (rt[3]->pitch * rt[3]->cpp);
|
||||
nv40->rt[3] = rt[3]->buffer;
|
||||
}
|
||||
|
||||
if (zeta_format) {
|
||||
nv40->zeta = zeta->buffer;
|
||||
BEGIN_RING(curie, NV40TCL_DMA_ZETA, 1);
|
||||
OUT_RELOCo(nv40->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
BEGIN_RING(curie, NV40TCL_ZETA_OFFSET, 1);
|
||||
OUT_RELOCl(nv40->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
BEGIN_RING(curie, NV40TCL_ZETA_PITCH, 1);
|
||||
OUT_RING (zeta->pitch * zeta->cpp);
|
||||
nv40->zeta = zeta->buffer;
|
||||
}
|
||||
|
||||
nv40->rt_enable = rt_enable;
|
||||
|
||||
@@ -1,11 +1,92 @@
|
||||
#include "nv40_context.h"
|
||||
#include "nv40_state.h"
|
||||
|
||||
/* Emit relocs for every referenced buffer.
|
||||
*
|
||||
* This is to ensure the bufmgr has an accurate idea of how
|
||||
* the buffer is used. These relocs appear in the push buffer as
|
||||
* NOPs, and will only be turned into state changes if a buffer
|
||||
* actually moves.
|
||||
*/
|
||||
static void
|
||||
nv40_state_emit_dummy_relocs(struct nv40_context *nv40)
|
||||
{
|
||||
unsigned rt_flags, tx_flags, fp_flags;
|
||||
int i;
|
||||
|
||||
rt_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR | NOUVEAU_BO_DUMMY;
|
||||
tx_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
|
||||
NOUVEAU_BO_DUMMY;
|
||||
fp_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
|
||||
NOUVEAU_BO_DUMMY;
|
||||
|
||||
/* Render targets */
|
||||
if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR0) {
|
||||
OUT_RELOCm(nv40->rt[0], rt_flags,
|
||||
curie, NV40TCL_DMA_COLOR0, 1);
|
||||
OUT_RELOCo(nv40->rt[0], rt_flags);
|
||||
OUT_RELOCm(nv40->rt[0], rt_flags,
|
||||
curie, NV40TCL_COLOR0_OFFSET, 1);
|
||||
OUT_RELOCl(nv40->rt[0], 0, rt_flags);
|
||||
}
|
||||
|
||||
if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR1) {
|
||||
OUT_RELOCm(nv40->rt[1], rt_flags,
|
||||
curie, NV40TCL_DMA_COLOR1, 1);
|
||||
OUT_RELOCo(nv40->rt[1], rt_flags);
|
||||
OUT_RELOCm(nv40->rt[1], rt_flags,
|
||||
curie, NV40TCL_COLOR1_OFFSET, 1);
|
||||
OUT_RELOCl(nv40->rt[1], 0, rt_flags);
|
||||
}
|
||||
|
||||
if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR2) {
|
||||
OUT_RELOCm(nv40->rt[2], rt_flags,
|
||||
curie, NV40TCL_DMA_COLOR2, 1);
|
||||
OUT_RELOCo(nv40->rt[2], rt_flags);
|
||||
OUT_RELOCm(nv40->rt[2], rt_flags,
|
||||
curie, NV40TCL_COLOR2_OFFSET, 1);
|
||||
OUT_RELOCl(nv40->rt[2], 0, rt_flags);
|
||||
}
|
||||
|
||||
if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR3) {
|
||||
OUT_RELOCm(nv40->rt[3], rt_flags,
|
||||
curie, NV40TCL_DMA_COLOR3, 1);
|
||||
OUT_RELOCo(nv40->rt[3], rt_flags);
|
||||
OUT_RELOCm(nv40->rt[3], rt_flags,
|
||||
curie, NV40TCL_COLOR3_OFFSET, 1);
|
||||
OUT_RELOCl(nv40->rt[3], 0, rt_flags);
|
||||
}
|
||||
|
||||
if (nv40->zeta) {
|
||||
OUT_RELOCm(nv40->zeta, rt_flags, curie, NV40TCL_DMA_ZETA, 1);
|
||||
OUT_RELOCo(nv40->zeta, rt_flags);
|
||||
OUT_RELOCm(nv40->zeta, rt_flags, curie, NV40TCL_ZETA_OFFSET, 1);
|
||||
OUT_RELOCl(nv40->zeta, 0, rt_flags);
|
||||
}
|
||||
|
||||
/* Texture images */
|
||||
for (i = 0; i < 16; i++) {
|
||||
if (!(nv40->fp_samplers & (1 << i)))
|
||||
continue;
|
||||
OUT_RELOCm(nv40->tex[i].buffer, tx_flags,
|
||||
curie, NV40TCL_TEX_OFFSET(i), 2);
|
||||
OUT_RELOCl(nv40->tex[i].buffer, 0, tx_flags);
|
||||
OUT_RELOCd(nv40->tex[i].buffer, nv40->tex[i].format,
|
||||
tx_flags | NOUVEAU_BO_OR, NV40TCL_TEX_FORMAT_DMA0,
|
||||
NV40TCL_TEX_FORMAT_DMA1);
|
||||
}
|
||||
|
||||
/* Fragment program */
|
||||
OUT_RELOCm(nv40->fragprog.active->buffer, fp_flags,
|
||||
curie, NV40TCL_FP_ADDRESS, 1);
|
||||
OUT_RELOC (nv40->fragprog.active->buffer, 0,
|
||||
fp_flags | NOUVEAU_BO_OR | NOUVEAU_BO_LOW,
|
||||
NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1);
|
||||
}
|
||||
|
||||
void
|
||||
nv40_emit_hw_state(struct nv40_context *nv40)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (nv40->dirty & NV40_NEW_FRAGPROG) {
|
||||
nv40_fragprog_bind(nv40, nv40->fragprog.current);
|
||||
/*XXX: clear NV40_NEW_FRAGPROG if no new program uploaded */
|
||||
@@ -28,67 +109,6 @@ nv40_emit_hw_state(struct nv40_context *nv40)
|
||||
|
||||
nv40->dirty_samplers = 0;
|
||||
|
||||
/* Emit relocs for every referenced buffer.
|
||||
* This is to ensure the bufmgr has an accurate idea of how
|
||||
* the buffer is used. This isn't very efficient, but we don't
|
||||
* seem to take a significant performance hit. Will be improved
|
||||
* at some point. Vertex arrays are emitted by nv40_vbo.c
|
||||
*/
|
||||
|
||||
/* Render targets */
|
||||
if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR0) {
|
||||
BEGIN_RING(curie, NV40TCL_DMA_COLOR0, 1);
|
||||
OUT_RELOCo(nv40->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
BEGIN_RING(curie, NV40TCL_COLOR0_OFFSET, 1);
|
||||
OUT_RELOCl(nv40->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
}
|
||||
|
||||
if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR1) {
|
||||
BEGIN_RING(curie, NV40TCL_DMA_COLOR1, 1);
|
||||
OUT_RELOCo(nv40->rt[1], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
BEGIN_RING(curie, NV40TCL_COLOR1_OFFSET, 1);
|
||||
OUT_RELOCl(nv40->rt[1], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
}
|
||||
|
||||
if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR2) {
|
||||
BEGIN_RING(curie, NV40TCL_DMA_COLOR2, 1);
|
||||
OUT_RELOCo(nv40->rt[2], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
BEGIN_RING(curie, NV40TCL_COLOR2_OFFSET, 1);
|
||||
OUT_RELOCl(nv40->rt[2], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
}
|
||||
|
||||
if (nv40->rt_enable & NV40TCL_RT_ENABLE_COLOR3) {
|
||||
BEGIN_RING(curie, NV40TCL_DMA_COLOR3, 1);
|
||||
OUT_RELOCo(nv40->rt[3], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
BEGIN_RING(curie, NV40TCL_COLOR3_OFFSET, 1);
|
||||
OUT_RELOCl(nv40->rt[3], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
}
|
||||
|
||||
if (nv40->zeta) {
|
||||
BEGIN_RING(curie, NV40TCL_DMA_ZETA, 1);
|
||||
OUT_RELOCo(nv40->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
BEGIN_RING(curie, NV40TCL_ZETA_OFFSET, 1);
|
||||
OUT_RELOCl(nv40->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
|
||||
}
|
||||
|
||||
/* Texture images */
|
||||
for (i = 0; i < 16; i++) {
|
||||
if (!(nv40->fp_samplers & (1 << i)))
|
||||
continue;
|
||||
BEGIN_RING(curie, NV40TCL_TEX_OFFSET(i), 2);
|
||||
OUT_RELOCl(nv40->tex[i].buffer, 0, NOUVEAU_BO_VRAM |
|
||||
NOUVEAU_BO_GART | NOUVEAU_BO_RD);
|
||||
OUT_RELOCd(nv40->tex[i].buffer, nv40->tex[i].format,
|
||||
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
|
||||
NOUVEAU_BO_OR, NV40TCL_TEX_FORMAT_DMA0,
|
||||
NV40TCL_TEX_FORMAT_DMA1);
|
||||
}
|
||||
|
||||
/* Fragment program */
|
||||
BEGIN_RING(curie, NV40TCL_FP_ADDRESS, 1);
|
||||
OUT_RELOC (nv40->fragprog.active->buffer, 0, NOUVEAU_BO_VRAM |
|
||||
NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
|
||||
NOUVEAU_BO_OR, NV40TCL_FP_ADDRESS_DMA0,
|
||||
NV40TCL_FP_ADDRESS_DMA1);
|
||||
nv40_state_emit_dummy_relocs(nv40);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user