The previous code would emit a full set of state during the first EmitState on

a new cmdbuf, to ensure that state wasn't lost across UNLOCK/LOCK pairs (in the
case of context switching).  This was rather inefficient.  Instead, after
flushing a cmdbuf, mark the state as needing to be saved on unlock.  Then, at
the beginning of flushing a cmdbuf, if we actually have lost the context, go
back and emit a new cmdbuf with the full set of state, before continuing with
the cmdbuf flush.  Provides a 10-15% improvement in ipers performance in my
tests, along with other apps.

Tested with:	ipers, glxgears, quake3
This commit is contained in:
Eric Anholt
2004-09-25 07:00:15 +00:00
parent 4010481ba3
commit 5562fe653c
10 changed files with 175 additions and 148 deletions
+13 -15
View File
@@ -202,30 +202,28 @@ static void radeonCompatEmitPacket( radeonContextPtr rmesa,
static void radeonCompatEmitStateLocked( radeonContextPtr rmesa )
{
struct radeon_state_atom *state, *tmp;
struct radeon_state_atom *atom;
if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
fprintf(stderr, "%s\n", __FUNCTION__);
if (rmesa->lost_context) {
if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS|DEBUG_IOCTL))
fprintf(stderr, "%s - lost context\n", __FUNCTION__);
if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty)
return;
foreach_s( state, tmp, &(rmesa->hw.clean) )
move_to_tail(&(rmesa->hw.dirty), state );
rmesa->lost_context = 0;
}
foreach_s( state, tmp, &(rmesa->hw.dirty) ) {
if (!state->is_tcl)
radeonCompatEmitPacket( rmesa, state );
move_to_head( &(rmesa->hw.clean), state );
foreach(atom, &rmesa->hw.atomlist) {
if (rmesa->hw.all_dirty)
atom->dirty = GL_TRUE;
if (atom->is_tcl)
atom->dirty = GL_FALSE;
if (atom->dirty)
radeonCompatEmitPacket(rmesa, atom);
}
rmesa->hw.is_dirty = GL_FALSE;
rmesa->hw.all_dirty = GL_FALSE;
}
static void radeonCompatEmitPrimitiveLocked( radeonContextPtr rmesa,
GLuint hw_primitive,
GLuint nverts,
+2 -2
View File
@@ -62,7 +62,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_vtxfmt.h"
#include "radeon_maos.h"
#define DRIVER_DATE "20030328"
#define DRIVER_DATE "20040924"
#include "vblank.h"
#include "utils.h"
@@ -306,7 +306,7 @@ radeonCreateContext( const __GLcontextModes *glVisual,
DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
rmesa->swtcl.RenderIndex = ~0;
rmesa->lost_context = 1;
rmesa->hw.all_dirty = GL_TRUE;
/* Set the maximum texture size small enough that we can guarentee that
* all texture units can bind a maximal texture and have them both in
+5 -5
View File
@@ -185,6 +185,7 @@ struct radeon_state_atom {
GLuint is_tcl;
int *cmd; /* one or more cmd's */
int *lastcmd; /* one or more cmd's */
int *savedcmd; /* one or more cmd's */
GLboolean dirty; /* dirty-mark in emit_state_list */
GLboolean (*check)( GLcontext * ); /* is this state active? */
};
@@ -398,14 +399,11 @@ struct radeon_state_atom {
struct radeon_hw_state {
/* All state should be on one of these lists:
*/
struct radeon_state_atom dirty; /* dirty list head placeholder */
struct radeon_state_atom clean; /* clean list head placeholder */
/* Head of the linked list of state atoms. */
struct radeon_state_atom atomlist;
/* Hardware state, stored as cmdbuf commands:
* -- Need to doublebuffer for
* - reviving state after loss of context
* - eliding noop statechange loops? (except line stipple count)
*/
struct radeon_state_atom ctx;
@@ -428,6 +426,7 @@ struct radeon_hw_state {
struct radeon_state_atom txr[2]; /* for NPOT */
int max_state_size; /* Number of bytes necessary for a full state emit. */
GLboolean is_dirty, all_dirty;
};
struct radeon_state {
@@ -733,6 +732,7 @@ struct radeon_context {
drm_clip_rect_t *pClipRects;
unsigned int lastStamp;
GLboolean lost_context;
GLboolean save_on_next_unlock;
radeonScreenPtr radeonScreen; /* Screen private DRI data */
drm_radeon_sarea_t *sarea; /* Private SAREA data */
+131 -110
View File
@@ -59,6 +59,63 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
static void radeonWaitForIdle( radeonContextPtr rmesa );
static int radeonFlushCmdBufLocked( radeonContextPtr rmesa,
const char * caller );
void radeonSaveHwState( radeonContextPtr rmesa )
{
struct radeon_state_atom *atom;
foreach(atom, &rmesa->hw.atomlist)
memcpy(atom->savedcmd, atom->cmd, atom->cmd_size * 4);
}
static void radeonSwapHwState( radeonContextPtr rmesa )
{
int *temp;
struct radeon_state_atom *atom;
foreach(atom, &rmesa->hw.atomlist) {
temp = atom->cmd;
atom->cmd = atom->savedcmd;
atom->savedcmd = temp;
}
}
/* At this point we were in FlushCmdBufLocked but we had lost our context, so
* we need to unwire our current cmdbuf and hook a new one in, emit that, then
* wire the old cmdbuf back in so that FlushCmdBufLocked can continue and the
* buffer can depend on the state not being lost across lock/unlock.
*/
static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa )
{
GLuint nr_released_bufs;
struct radeon_store store;
rmesa->lost_context = GL_FALSE;
nr_released_bufs = rmesa->dma.nr_released_bufs;
store = rmesa->store;
rmesa->store.statenr = 0;
rmesa->store.primnr = 0;
rmesa->store.cmd_used = 0;
rmesa->store.elts_start = 0;
rmesa->hw.all_dirty = GL_TRUE;
radeonSwapHwState( rmesa );
/* In this case it's okay to EmitState while locked because we won't exhaust
* our (empty) cmdbuf.
*/
radeonEmitState(rmesa);
radeonFlushCmdBufLocked(rmesa, __FUNCTION__);
radeonSwapHwState(rmesa);
/* We've just cleared out the dirty flags, so we don't remember what
* actually needed to be emitted for the next state emit.
*/
rmesa->hw.all_dirty = GL_TRUE;
rmesa->dma.nr_released_bufs = nr_released_bufs;
rmesa->store = store;
}
/* =============================================================
* Kernel command buffer handling
@@ -76,115 +133,93 @@ static void print_state_atom( struct radeon_state_atom *state )
}
static void radeon_emit_state_list( radeonContextPtr rmesa,
struct radeon_state_atom *list )
/* The state atoms will be emitted in the order they appear in the atom list,
* so this step is important.
*/
void radeonSetUpAtomList( radeonContextPtr rmesa )
{
struct radeon_state_atom *state, *tmp;
char *dest;
int i, size, texunits;
int i, mtu = rmesa->glCtx->Const.MaxTextureUnits;
/* It appears that some permutations of state atoms lock up the
* chip. Therefore we make sure that state atoms are emitted in a
* fixed order. First mark all dirty state atoms and then go
* through all state atoms in a well defined order and emit only
* the marked ones.
* FIXME: This requires knowledge of which state atoms exist.
* FIXME: Is the zbs hack below still needed?
*/
size = 0;
foreach_s( state, tmp, list ) {
if (state->check( rmesa->glCtx )) {
size += state->cmd_size;
state->dirty = GL_TRUE;
move_to_head( &(rmesa->hw.clean), state );
if (RADEON_DEBUG & DEBUG_STATE)
print_state_atom( state );
}
else if (RADEON_DEBUG & DEBUG_STATE)
fprintf(stderr, "skip state %s\n", state->name);
make_empty_list(&rmesa->hw.atomlist);
rmesa->hw.atomlist.name = "atom-list";
insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ctx);
insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.set);
insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lin);
insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msk);
insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.vpt);
insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tcl);
insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msc);
for (i = 0; i < mtu; ++i) {
insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tex[i]);
insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.txr[i]);
}
/* short cut */
if (!size)
return;
dest = radeonAllocCmdBuf( rmesa, size * 4, __FUNCTION__);
texunits = rmesa->glCtx->Const.MaxTextureUnits;
#define EMIT_ATOM(ATOM) \
do { \
if (rmesa->hw.ATOM.dirty) { \
rmesa->hw.ATOM.dirty = GL_FALSE; \
memcpy( dest, rmesa->hw.ATOM.cmd, rmesa->hw.ATOM.cmd_size * 4); \
dest += rmesa->hw.ATOM.cmd_size * 4; \
} \
} while (0)
EMIT_ATOM (ctx);
EMIT_ATOM (set);
EMIT_ATOM (lin);
EMIT_ATOM (msk);
EMIT_ATOM (vpt);
EMIT_ATOM (tcl);
EMIT_ATOM (msc);
for (i = 0; i < texunits; ++i) {
EMIT_ATOM (tex[i]);
EMIT_ATOM (txr[i]);
}
EMIT_ATOM (zbs);
EMIT_ATOM (mtl);
for (i = 0; i < 3 + texunits; ++i)
EMIT_ATOM (mat[i]);
insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.zbs);
insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mtl);
for (i = 0; i < 3 + mtu; ++i)
insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mat[i]);
for (i = 0; i < 8; ++i)
EMIT_ATOM (lit[i]);
insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lit[i]);
for (i = 0; i < 6; ++i)
EMIT_ATOM (ucp[i]);
EMIT_ATOM (eye);
EMIT_ATOM (grd);
EMIT_ATOM (fog);
EMIT_ATOM (glt);
#undef EMIT_ATOM
insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ucp[i]);
insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.eye);
insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.grd);
insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.fog);
insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.glt);
}
void radeonEmitState( radeonContextPtr rmesa )
{
struct radeon_state_atom *state, *tmp;
struct radeon_state_atom *atom;
char *dest;
if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
fprintf(stderr, "%s\n", __FUNCTION__);
/* Somewhat overkill:
if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty)
return;
/* To avoid going across the entire set of states multiple times, just check
* for enough space for the case of emitting all state, and inline the
* radeonAllocCmdBuf code here without all the checks.
*/
if (rmesa->lost_context) {
if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS|DEBUG_IOCTL))
fprintf(stderr, "%s - lost context\n", __FUNCTION__);
radeonEnsureCmdBufSpace(rmesa, rmesa->hw.max_state_size);
dest = rmesa->store.cmd_buf + rmesa->store.cmd_used;
foreach_s( state, tmp, &(rmesa->hw.clean) )
move_to_tail(&(rmesa->hw.dirty), state );
rmesa->lost_context = 0;
}
else if (1) {
/* This is a darstardly kludge to work around a lockup that I
* haven't otherwise figured out.
*/
move_to_tail(&(rmesa->hw.dirty), &(rmesa->hw.zbs) );
if (RADEON_DEBUG & DEBUG_STATE) {
foreach(atom, &rmesa->hw.atomlist) {
if (atom->dirty || rmesa->hw.all_dirty) {
if (atom->check(rmesa->glCtx))
print_state_atom(atom);
else
fprintf(stderr, "skip state %s\n", atom->name);
}
}
}
if (!(rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL)) {
foreach_s( state, tmp, &(rmesa->hw.dirty) ) {
if (state->is_tcl) {
move_to_head( &(rmesa->hw.clean), state );
}
}
foreach(atom, &rmesa->hw.atomlist) {
if (rmesa->hw.all_dirty)
atom->dirty = GL_TRUE;
if (!(rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL) &&
atom->is_tcl)
atom->dirty = GL_FALSE;
if (atom->dirty) {
if (atom->check(rmesa->glCtx)) {
int size = atom->cmd_size * 4;
memcpy(dest, atom->cmd, size);
dest += size;
rmesa->store.cmd_used += size;
atom->dirty = GL_FALSE;
}
}
}
radeon_emit_state_list( rmesa, &rmesa->hw.dirty );
assert(rmesa->store.cmd_used <= RADEON_CMD_BUF_SZ);
rmesa->hw.is_dirty = GL_FALSE;
rmesa->hw.all_dirty = GL_FALSE;
}
/* Fire a section of the retained (indexed_verts) buffer as a regular
* primtive.
*/
@@ -376,7 +411,7 @@ void radeonEmitAOS( radeonContextPtr rmesa,
(component[0]->aos_start + offset * component[0]->aos_stride * 4);
#else
drm_radeon_cmd_header_t *cmd;
int sz = AOS_BUFSZ;
int sz = AOS_BUFSZ(nr);
int i;
int *tmp;
@@ -491,6 +526,9 @@ static int radeonFlushCmdBufLocked( radeonContextPtr rmesa,
int ret, i;
drm_radeon_cmd_buffer_t cmd;
if (rmesa->lost_context)
radeonBackUpAndEmitLostStateLocked(rmesa);
if (RADEON_DEBUG & DEBUG_IOCTL) {
fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
@@ -544,18 +582,7 @@ static int radeonFlushCmdBufLocked( radeonContextPtr rmesa,
rmesa->store.statenr = 0;
rmesa->store.cmd_used = 0;
rmesa->dma.nr_released_bufs = 0;
/* Set lost_context so that the first state emit on the new buffer is a full
* one. This is because the context might get lost while preparing the next
* buffer, and when we lock and find out, we don't have the information to
* recreate the state. This function should always be called before the new
* buffer is begun, so it's sufficient to just set lost_context here.
*
* The alternative to this would be to copy out the state on unlock
* (approximately) and if we did lose the context, dispatch a cmdbuf to reset
* the state to that old copy before continuing with the accumulated command
* buffer.
*/
rmesa->lost_context = 1;
rmesa->save_on_next_unlock = 1;
return ret;
}
@@ -897,6 +924,7 @@ void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv )
}
rmesa->swap_ust = ust;
rmesa->hw.all_dirty = GL_TRUE;
}
void radeonPageFlip( const __DRIdrawablePrivate *dPriv )
@@ -1028,13 +1056,6 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
cx += dPriv->x;
cy = dPriv->y + dPriv->h - cy - ch;
/* We have to emit state along with the clear, since the kernel relies on
* some of it. The EmitState that was above RADEON_FIREVERTICES was an
* attempt to do that, except that another context may come in and cause us
* to lose our context while we're unlocked.
*/
radeonEmitState( rmesa );
LOCK_HARDWARE( rmesa );
/* Throttle the number of clear ioctls we do.
@@ -1146,6 +1167,7 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
}
UNLOCK_HARDWARE( rmesa );
rmesa->hw.all_dirty = GL_TRUE;
}
@@ -1189,8 +1211,7 @@ void radeonFlush( GLcontext *ctx )
if (rmesa->dma.flush)
rmesa->dma.flush( rmesa );
if (!is_empty_list(&rmesa->hw.dirty))
radeonEmitState( rmesa );
radeonEmitState( rmesa );
if (rmesa->store.cmd_used)
radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+7 -3
View File
@@ -104,6 +104,9 @@ extern void radeonWaitForVBlank( radeonContextPtr rmesa );
extern void radeonInitIoctlFuncs( GLcontext *ctx );
extern void radeonGetAllParams( radeonContextPtr rmesa );
extern void radeonSaveHwState( radeonContextPtr rmesa );
extern void radeonSetUpAtomList( radeonContextPtr rmesa );
/* radeon_compat.c:
*/
extern void radeonCompatEmitPrimitive( radeonContextPtr rmesa,
@@ -111,7 +114,6 @@ extern void radeonCompatEmitPrimitive( radeonContextPtr rmesa,
GLuint hw_primitive,
GLuint nrverts );
/* ================================================================
* Helper macros:
*/
@@ -130,7 +132,8 @@ do { \
#define RADEON_STATECHANGE( rmesa, ATOM ) \
do { \
RADEON_NEWPRIM( rmesa ); \
move_to_head( &(rmesa->hw.dirty), &(rmesa->hw.ATOM)); \
rmesa->hw.ATOM.dirty = GL_TRUE; \
rmesa->hw.is_dirty = GL_TRUE; \
} while (0)
#define RADEON_DB_STATE( ATOM ) \
@@ -144,7 +147,8 @@ static __inline int RADEON_DB_STATECHANGE(
if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
int *tmp;
RADEON_NEWPRIM( rmesa );
move_to_head( &(rmesa->hw.dirty), atom );
atom->dirty = GL_TRUE;
rmesa->hw.is_dirty = GL_TRUE;
tmp = atom->cmd;
atom->cmd = atom->lastcmd;
atom->lastcmd = tmp;
@@ -124,4 +124,6 @@ void radeonGetLock( radeonContextPtr rmesa, GLuint flags )
DRI_AGE_TEXTURES( rmesa->texture_heaps[ i ] );
}
}
rmesa->lost_context = GL_TRUE;
}
@@ -105,6 +105,10 @@ extern int prevLockLine;
rmesa->dri.hwLock, \
rmesa->dri.hwContext ); \
DEBUG_RESET(); \
if ( rmesa->save_on_next_unlock ) { \
radeonSaveHwState( rmesa ); \
rmesa->save_on_next_unlock = GL_FALSE; \
} \
} while (0)
#endif
@@ -59,8 +59,9 @@ void radeonPrintDirty( radeonContextPtr rmesa, const char *msg )
fprintf(stderr, msg);
fprintf(stderr, ": ");
foreach(l, &(rmesa->hw.dirty)) {
fprintf(stderr, "%s, ", l->name);
foreach(l, &rmesa->hw.atomlist) {
if (l->dirty || rmesa->hw.all_dirty)
fprintf(stderr, "%s, ", l->name);
}
fprintf(stderr, "\n");
@@ -197,11 +198,6 @@ void radeonInitState( radeonContextPtr rmesa )
rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset;
rmesa->state.pixel.readPitch = rmesa->state.color.drawPitch;
/* Initialize lists:
*/
make_empty_list(&(rmesa->hw.dirty));
make_empty_list(&(rmesa->hw.clean));
rmesa->hw.max_state_size = 0;
#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG ) \
@@ -209,10 +205,11 @@ void radeonInitState( radeonContextPtr rmesa )
rmesa->hw.ATOM.cmd_size = SZ; \
rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int)); \
rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int)); \
rmesa->hw.ATOM.savedcmd = (int *)CALLOC(SZ * sizeof(int)); \
rmesa->hw.ATOM.name = NM; \
rmesa->hw.ATOM.is_tcl = FLAG; \
rmesa->hw.ATOM.check = check_##CHK; \
insert_at_head(&(rmesa->hw.dirty), &(rmesa->hw.ATOM)); \
rmesa->hw.ATOM.dirty = GL_TRUE; \
rmesa->hw.max_state_size += SZ * sizeof(int); \
} while (0)
@@ -256,6 +253,7 @@ void radeonInitState( radeonContextPtr rmesa )
ALLOC_STATE( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0 );
ALLOC_STATE( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0 );
radeonSetUpAtomList( rmesa );
/* Fill in the packet headers:
*/
@@ -552,4 +550,7 @@ void radeonInitState( radeonContextPtr rmesa )
rmesa->hw.eye.cmd[EYE_Y] = 0;
rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE;
rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE;
radeonSaveHwState( rmesa );
rmesa->hw.all_dirty = GL_TRUE;
}
@@ -75,8 +75,7 @@ void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
if ( rmesa ) {
if ( t == rmesa->state.texture.unit[0].texobj ) {
rmesa->state.texture.unit[0].texobj = NULL;
remove_from_list( &rmesa->hw.tex[0] );
make_empty_list( &rmesa->hw.tex[0] );
rmesa->hw.tex[0].dirty = GL_FALSE;
}
}
+1 -3
View File
@@ -41,7 +41,6 @@ SOFTWARE.
#include "imports.h"
#include "context.h"
#include "macros.h"
#include "simple_list.h"
#include "radeon_context.h"
#include "radeon_ioctl.h"
@@ -66,8 +65,7 @@ radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) {
if ( t == rmesa->state.texture.unit[i].texobj ) {
rmesa->state.texture.unit[i].texobj = NULL;
remove_from_list( &rmesa->hw.tex[i] );
make_empty_list( &rmesa->hw.tex[i] );
rmesa->hw.tex[i].dirty = GL_FALSE;
}
}
}