Bug #1028: Add hardware-accelerated stencil support to r128. Testing with

stencilwrap reported many issues with various modes.  Some of these were
complicated by the fact that spans are broken (Bug #1615), but some appear to be
real bugs.  However, while spans remain broken, I found that visual results were
better by avoiding fallbacks rather than avoiding just a broken stencil
implementation.  Note that this required changing the depth spans at 24+8bpp
into read-modify-write cycles.  It would be nicer as a single write with
a mask, but the kernel span blits turn off masking.

Reviewed by:	ajax
This commit is contained in:
Eric Anholt
2005-10-27 20:26:24 +00:00
parent 512c994b92
commit 215c4c3a9c
6 changed files with 367 additions and 16 deletions
+1 -1
View File
@@ -44,7 +44,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "utils.h"
#define DRIVER_DATE "20051026"
#define DRIVER_DATE "20051027"
/* Return the width and height of the current color buffer.
+9 -6
View File
@@ -408,6 +408,7 @@ static void r128Clear( GLcontext *ctx, GLbitfield mask, GLboolean all,
GLuint flags = 0;
GLint i;
GLint ret;
GLuint depthmask = 0;
if ( R128_DEBUG & DEBUG_VERBOSE_API ) {
fprintf( stderr, "%s:\n", __FUNCTION__ );
@@ -438,15 +439,17 @@ static void r128Clear( GLcontext *ctx, GLbitfield mask, GLboolean all,
if ( ( mask & BUFFER_BIT_DEPTH ) && ctx->Depth.Mask ) {
flags |= R128_DEPTH;
/* if we're at 16 bits, extra plane mask won't hurt */
depthmask |= 0x00ffffff;
mask &= ~BUFFER_BIT_DEPTH;
}
#if 0
/* FIXME: Add stencil support */
if ( mask & BUFFER_BIT_STENCIL ) {
flags |= DRM_R128_DEPTH_BUFFER;
if ( mask & BUFFER_BIT_STENCIL &&
(ctx->Visual.stencilBits > 0 && ctx->Visual.depthBits == 24) ) {
flags |= R128_DEPTH;
depthmask |= ctx->Stencil.WriteMask[0] << 24;
mask &= ~BUFFER_BIT_STENCIL;
}
#endif
if ( flags ) {
@@ -511,7 +514,7 @@ static void r128Clear( GLcontext *ctx, GLbitfield mask, GLboolean all,
clear.clear_color = rmesa->ClearColor;
clear.clear_depth = rmesa->ClearDepth;
clear.color_mask = rmesa->setup.plane_3d_mask_c;
clear.depth_mask = ~0;
clear.depth_mask = depthmask;
ret = drmCommandWrite( rmesa->driFd, DRM_R128_CLEAR,
&clear, sizeof(clear) );
+20 -4
View File
@@ -271,6 +271,11 @@ r128CreateBuffer( __DRIscreenPrivate *driScrnPriv,
return GL_FALSE; /* not implemented */
}
else {
const GLboolean swDepth = GL_FALSE;
const GLboolean swAlpha = GL_FALSE;
const GLboolean swAccum = mesaVis->accumRedBits > 0;
const GLboolean swStencil = mesaVis->stencilBits > 0 &&
mesaVis->depthBits != 24;
struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
{
@@ -316,12 +321,23 @@ r128CreateBuffer( __DRIscreenPrivate *driScrnPriv,
_mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
}
if (mesaVis->stencilBits > 0 && !swStencil) {
driRenderbuffer *stencilRb
= driNewRenderbuffer(GL_STENCIL_INDEX8_EXT,
NULL,
screen->cpp,
screen->depthOffset, screen->depthPitch,
driDrawPriv);
r128SetSpanFunctions(stencilRb, mesaVis);
_mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base);
}
_mesa_add_soft_renderbuffers(fb,
GL_FALSE, /* color */
GL_FALSE, /* depth */
mesaVis->stencilBits > 0,
mesaVis->accumRedBits > 0,
GL_FALSE, /* alpha */
swDepth,
swStencil,
swAccum,
swAlpha,
GL_FALSE /* aux */);
driDrawPriv->driverPrivate = (void *) fb;
+140 -4
View File
@@ -46,6 +46,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define HAVE_HW_DEPTH_SPANS 1
#define HAVE_HW_DEPTH_PIXELS 1
#define HAVE_HW_STENCIL_SPANS 1
#define HAVE_HW_STENCIL_PIXELS 1
#define LOCAL_VARS \
r128ContextPtr rmesa = R128_CONTEXT(ctx); \
@@ -101,6 +103,21 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
+ ((dPriv->y + (Y)) * drb->flippedPitch + (dPriv->x + (X))) * drb->cpp)
#include "spantmp2.h"
/* Idling in the depth/stencil span functions:
* For writes, the kernel reads from the given user-space buffer at dispatch
* time, and then writes to the depth buffer asynchronously.
* For reads, the kernel reads from the depth buffer and writes to the span
* temporary asynchronously.
* So, if we're going to read from the span temporary, we need to idle before
* doing so. But we don't need to idle after write, because the CPU won't
* be accessing the destination, only the accelerator (through 3d rendering or
* depth span reads)
* However, due to interactions from pixel cache between 2d (what we do with
* depth) and 3d (all other parts of the system), we idle at the begin and end
* of a set of span operations, which should cover the pix cache issue.
* Except, we still have major issues, as shown by no_rast=true glxgears, or
* stencilwrap.
*/
/* ================================================================
* Depth buffer
@@ -110,10 +127,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#define WRITE_DEPTH_SPAN() \
do { \
r128WriteDepthSpanLocked( rmesa, n, \
x + dPriv->x, \
y + dPriv->y, \
depth, mask );
depth, mask ); \
} while (0)
#define WRITE_DEPTH_PIXELS() \
do { \
@@ -183,20 +202,41 @@ do { \
/* 24-bit depth, 8-bit stencil buffer functions
*/
#define WRITE_DEPTH_SPAN() \
do { \
GLint buf[n]; \
GLint i; \
GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
r128scrn->spanOffset); \
r128ReadDepthSpanLocked( rmesa, n, \
x + dPriv->x, \
y + dPriv->y ); \
r128WaitForIdleLocked( rmesa ); \
for ( i = 0 ; i < n ; i++ ) { \
buf[i] = (readbuf[i] & 0xff000000) | (depth[i] & 0x00ffffff); \
} \
r128WriteDepthSpanLocked( rmesa, n, \
x + dPriv->x, \
y + dPriv->y, \
depth, mask );
buf, mask ); \
} while (0)
#define WRITE_DEPTH_PIXELS() \
do { \
GLint buf[n]; \
GLint ox[MAX_WIDTH]; \
GLint oy[MAX_WIDTH]; \
GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
r128scrn->spanOffset); \
for ( i = 0 ; i < n ; i++ ) { \
ox[i] = x[i] + dPriv->x; \
oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
} \
r128WriteDepthPixelsLocked( rmesa, n, ox, oy, depth, mask ); \
r128ReadDepthPixelsLocked( rmesa, n, ox, oy ); \
r128WaitForIdleLocked( rmesa ); \
for ( i = 0 ; i < n ; i++ ) { \
buf[i] = (readbuf[i] & 0xff000000) | (depth[i] & 0x00ffffff); \
} \
r128WriteDepthPixelsLocked( rmesa, n, ox, oy, buf, mask ); \
} while (0)
#define READ_DEPTH_SPAN() \
@@ -205,6 +245,7 @@ do { \
r128scrn->spanOffset); \
GLint i; \
\
/*if (n >= 128) fprintf(stderr, "Large number of pixels: %d\n", n);*/ \
r128ReadDepthSpanLocked( rmesa, n, \
x + dPriv->x, \
y + dPriv->y ); \
@@ -258,8 +299,99 @@ do { \
* Stencil buffer
*/
/* FIXME: Add support for hardware stencil buffers.
/* 24 bit depth, 8 bit stencil depthbuffer functions
*/
#define WRITE_STENCIL_SPAN() \
do { \
GLint buf[n]; \
GLint i; \
GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
r128scrn->spanOffset); \
r128ReadDepthSpanLocked( rmesa, n, \
x + dPriv->x, \
y + dPriv->y ); \
r128WaitForIdleLocked( rmesa ); \
for ( i = 0 ; i < n ; i++ ) { \
buf[i] = (readbuf[i] & 0x00ffffff) | (stencil[i] << 24); \
} \
r128WriteDepthSpanLocked( rmesa, n, \
x + dPriv->x, \
y + dPriv->y, \
buf, mask ); \
} while (0)
#define WRITE_STENCIL_PIXELS() \
do { \
GLint buf[n]; \
GLint ox[MAX_WIDTH]; \
GLint oy[MAX_WIDTH]; \
GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
r128scrn->spanOffset); \
for ( i = 0 ; i < n ; i++ ) { \
ox[i] = x[i] + dPriv->x; \
oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
} \
r128ReadDepthPixelsLocked( rmesa, n, ox, oy ); \
r128WaitForIdleLocked( rmesa ); \
for ( i = 0 ; i < n ; i++ ) { \
buf[i] = (readbuf[i] & 0x00ffffff) | (stencil[i] << 24); \
} \
r128WriteDepthPixelsLocked( rmesa, n, ox, oy, buf, mask ); \
} while (0)
#define READ_STENCIL_SPAN() \
do { \
GLuint *buf = (GLuint *)((GLubyte *)sPriv->pFB + \
r128scrn->spanOffset); \
GLint i; \
\
/*if (n >= 128) fprintf(stderr, "Large number of pixels: %d\n", n);*/ \
r128ReadDepthSpanLocked( rmesa, n, \
x + dPriv->x, \
y + dPriv->y ); \
r128WaitForIdleLocked( rmesa ); \
\
for ( i = 0 ; i < n ; i++ ) { \
stencil[i] = (buf[i] & 0xff000000) >> 24; \
} \
} while (0)
#define READ_STENCIL_PIXELS() \
do { \
GLuint *buf = (GLuint *)((GLubyte *)sPriv->pFB + \
r128scrn->spanOffset); \
GLint i, remaining = n; \
\
while ( remaining > 0 ) { \
GLint ox[128]; \
GLint oy[128]; \
GLint count; \
\
if ( remaining <= 128 ) { \
count = remaining; \
} else { \
count = 128; \
} \
for ( i = 0 ; i < count ; i++ ) { \
ox[i] = x[i] + dPriv->x; \
oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
} \
\
r128ReadDepthPixelsLocked( rmesa, count, ox, oy ); \
r128WaitForIdleLocked( rmesa ); \
\
for ( i = 0 ; i < count ; i++ ) { \
stencil[i] = (buf[i] & 0xff000000) >> 24; \
} \
stencil += count; \
x += count; \
y += count; \
remaining -= count; \
} \
} while (0)
#define TAG(x) radeon##x##_z24_s8
#include "stenciltmp.h"
static void
r128SpanRenderStart( GLcontext *ctx )
@@ -275,6 +407,7 @@ r128SpanRenderFinish( GLcontext *ctx )
{
r128ContextPtr rmesa = R128_CONTEXT(ctx);
_swrast_flush( ctx );
r128WaitForIdleLocked( rmesa );
UNLOCK_HARDWARE( rmesa );
}
@@ -306,4 +439,7 @@ r128SetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
r128InitDepthPointers_z24_s8(&drb->Base);
}
else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
radeonInitStencilPointers_z24_s8(&drb->Base);
}
}
+193 -1
View File
@@ -251,6 +251,179 @@ static void r128DDBlendFuncSeparate( GLcontext *ctx,
rmesa->new_state |= R128_NEW_ALPHA;
}
/* =============================================================
* Stencil
*/
static void
r128DDStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
GLint ref, GLuint mask )
{
r128ContextPtr rmesa = R128_CONTEXT(ctx);
GLuint refmask = ((ctx->Stencil.Ref[0] << 0) |
(ctx->Stencil.ValueMask[0] << 16) |
(ctx->Stencil.WriteMask[0] << 24));
GLuint z = rmesa->setup.z_sten_cntl_c;
z &= ~R128_STENCIL_TEST_MASK;
switch ( ctx->Stencil.Function[0] ) {
case GL_NEVER:
z |= R128_STENCIL_TEST_NEVER;
break;
case GL_LESS:
z |= R128_STENCIL_TEST_LESS;
break;
case GL_EQUAL:
z |= R128_STENCIL_TEST_EQUAL;
break;
case GL_LEQUAL:
z |= R128_STENCIL_TEST_LESSEQUAL;
break;
case GL_GREATER:
z |= R128_STENCIL_TEST_GREATER;
break;
case GL_NOTEQUAL:
z |= R128_STENCIL_TEST_NEQUAL;
break;
case GL_GEQUAL:
z |= R128_STENCIL_TEST_GREATEREQUAL;
break;
case GL_ALWAYS:
z |= R128_STENCIL_TEST_ALWAYS;
break;
}
if ( rmesa->setup.sten_ref_mask_c != refmask ) {
rmesa->setup.sten_ref_mask_c = refmask;
rmesa->dirty |= R128_UPLOAD_MASKS;
}
if ( rmesa->setup.z_sten_cntl_c != z ) {
rmesa->setup.z_sten_cntl_c = z;
rmesa->dirty |= R128_UPLOAD_CONTEXT;
}
}
static void
r128DDStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
{
r128ContextPtr rmesa = R128_CONTEXT(ctx);
GLuint refmask = ((ctx->Stencil.Ref[0] << 0) |
(ctx->Stencil.ValueMask[0] << 16) |
(ctx->Stencil.WriteMask[0] << 24));
if ( rmesa->setup.sten_ref_mask_c != refmask ) {
rmesa->setup.sten_ref_mask_c = refmask;
rmesa->dirty |= R128_UPLOAD_MASKS;
}
}
static void r128DDStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
GLenum zfail, GLenum zpass )
{
r128ContextPtr rmesa = R128_CONTEXT(ctx);
GLuint z = rmesa->setup.z_sten_cntl_c;
GLboolean ok = 1;
if (!( ctx->Visual.stencilBits > 0 && ctx->Visual.depthBits == 24 ))
return;
z &= ~(R128_STENCIL_S_FAIL_MASK | R128_STENCIL_ZPASS_MASK |
R128_STENCIL_ZFAIL_MASK);
switch ( ctx->Stencil.FailFunc[0] ) {
case GL_KEEP:
z |= R128_STENCIL_S_FAIL_KEEP;
break;
case GL_ZERO:
z |= R128_STENCIL_S_FAIL_ZERO;
ok = 0; /* Hardware bug? ZERO maps to KEEP */
break;
case GL_REPLACE:
z |= R128_STENCIL_S_FAIL_REPLACE;
ok = 0; /* Hardware bug? REPLACE maps to KEEP */
break;
case GL_INCR:
z |= R128_STENCIL_S_FAIL_INC;
break;
case GL_DECR:
z |= R128_STENCIL_S_FAIL_DEC;
break;
case GL_INVERT:
z |= R128_STENCIL_S_FAIL_INV;
ok = 0; /* Hardware bug? INV maps to ZERO */
break;
}
switch ( ctx->Stencil.ZFailFunc[0] ) {
case GL_KEEP:
z |= R128_STENCIL_ZFAIL_KEEP;
ok = 0; /* Hardware bug? KEEP maps to ZERO */
break;
case GL_ZERO:
z |= R128_STENCIL_ZFAIL_ZERO;
break;
case GL_REPLACE:
z |= R128_STENCIL_ZFAIL_REPLACE;
break;
case GL_INCR:
z |= R128_STENCIL_ZFAIL_INC;
break;
case GL_DECR:
z |= R128_STENCIL_ZFAIL_DEC;
break;
case GL_INVERT:
z |= R128_STENCIL_ZFAIL_INV;
ok = 0; /* Hardware bug? INV maps to ZERO */
break;
}
switch ( ctx->Stencil.ZPassFunc[0] ) {
case GL_KEEP:
z |= R128_STENCIL_ZPASS_KEEP;
ok = 0; /* Hardware bug? KEEP maps to ZERO */
break;
case GL_ZERO:
z |= R128_STENCIL_ZPASS_ZERO;
break;
case GL_REPLACE:
z |= R128_STENCIL_ZPASS_REPLACE;
break;
case GL_INCR:
z |= R128_STENCIL_ZPASS_INC;
break;
case GL_DECR:
z |= R128_STENCIL_ZPASS_DEC;
ok = 0; /* Hardware bug? DEC maps to INCR_WRAP */
break;
case GL_INVERT:
z |= R128_STENCIL_ZPASS_INV;
ok = 0; /* Hardware bug? INV maps to ZERO */
break;
}
/* XXX: Now that we know whether we can do the given funcs successfully
* (according to testing done with a modified stencilwrap test), go
* ahead and drop that knowledge on the floor. While fallbacks remain
* broken, they make the situation even worse (in test apps, at least) than
* failing in just the stencil part.
*/
/*FALLBACK( rmesa, R128_FALLBACK_STENCIL, !ok );*/
if ( rmesa->setup.z_sten_cntl_c != z ) {
rmesa->setup.z_sten_cntl_c = z;
rmesa->dirty |= R128_UPLOAD_CONTEXT;
}
}
static void r128DDClearStencil( GLcontext *ctx, GLint s )
{
r128ContextPtr rmesa = R128_CONTEXT(ctx);
if (ctx->Visual.stencilBits > 0 && ctx->Visual.depthBits == 24) {
rmesa->ClearDepth &= 0x00ffffff;
rmesa->ClearDepth |= ctx->Stencil.Clear << 24;
}
}
/* =============================================================
* Depth testing
@@ -339,6 +512,7 @@ static void r128DDClearDepth( GLcontext *ctx, GLclampd d )
break;
case R128_Z_PIX_WIDTH_24:
rmesa->ClearDepth = d * 0x00ffffff;
rmesa->ClearDepth |= ctx->Stencil.Clear << 24;
break;
case R128_Z_PIX_WIDTH_32:
rmesa->ClearDepth = d * 0xffffffff;
@@ -853,7 +1027,21 @@ static void r128DDEnable( GLcontext *ctx, GLenum cap, GLboolean state )
case GL_STENCIL_TEST:
FLUSH_BATCH( rmesa );
FALLBACK( rmesa, R128_FALLBACK_STENCIL, state );
if ( ctx->Visual.stencilBits > 0 && ctx->Visual.depthBits == 24 ) {
if ( state ) {
rmesa->setup.tex_cntl_c |= R128_STENCIL_ENABLE;
/* Reset the fallback (if any) for bad stencil funcs */
r128DDStencilOpSeparate( ctx, 0, ctx->Stencil.FailFunc[0],
ctx->Stencil.ZFailFunc[0],
ctx->Stencil.ZPassFunc[0] );
} else {
rmesa->setup.tex_cntl_c &= ~R128_STENCIL_ENABLE;
FALLBACK( rmesa, R128_FALLBACK_STENCIL, GL_FALSE );
}
rmesa->dirty |= R128_UPLOAD_CONTEXT;
} else {
FALLBACK( rmesa, R128_FALLBACK_STENCIL, state );
}
break;
case GL_TEXTURE_1D:
@@ -1184,6 +1372,7 @@ void r128DDInitStateFuncs( GLcontext *ctx )
ctx->Driver.ClearIndex = NULL;
ctx->Driver.ClearColor = r128DDClearColor;
ctx->Driver.ClearStencil = r128DDClearStencil;
ctx->Driver.DrawBuffer = r128DDDrawBuffer;
ctx->Driver.ReadBuffer = r128DDReadBuffer;
@@ -1208,6 +1397,9 @@ void r128DDInitStateFuncs( GLcontext *ctx )
ctx->Driver.RenderMode = r128DDRenderMode;
ctx->Driver.Scissor = r128DDScissor;
ctx->Driver.ShadeModel = r128DDShadeModel;
ctx->Driver.StencilFuncSeparate = r128DDStencilFuncSeparate;
ctx->Driver.StencilMaskSeparate = r128DDStencilMaskSeparate;
ctx->Driver.StencilOpSeparate = r128DDStencilOpSeparate;
ctx->Driver.DepthRange = r128DepthRange;
ctx->Driver.Viewport = r128Viewport;
@@ -1068,24 +1068,28 @@
# define R128_STENCIL_TEST_GREATER (5 << 12)
# define R128_STENCIL_TEST_NEQUAL (6 << 12)
# define R128_STENCIL_TEST_ALWAYS (7 << 12)
# define R128_STENCIL_TEST_MASK (7 << 12)
# define R128_STENCIL_S_FAIL_KEEP (0 << 16)
# define R128_STENCIL_S_FAIL_ZERO (1 << 16)
# define R128_STENCIL_S_FAIL_REPLACE (2 << 16)
# define R128_STENCIL_S_FAIL_INC (3 << 16)
# define R128_STENCIL_S_FAIL_DEC (4 << 16)
# define R128_STENCIL_S_FAIL_INV (5 << 16)
# define R128_STENCIL_S_FAIL_MASK (7 << 16)
# define R128_STENCIL_ZPASS_KEEP (0 << 20)
# define R128_STENCIL_ZPASS_ZERO (1 << 20)
# define R128_STENCIL_ZPASS_REPLACE (2 << 20)
# define R128_STENCIL_ZPASS_INC (3 << 20)
# define R128_STENCIL_ZPASS_DEC (4 << 20)
# define R128_STENCIL_ZPASS_INV (5 << 20)
# define R128_STENCIL_ZPASS_MASK (7 << 20)
# define R128_STENCIL_ZFAIL_KEEP (0 << 24)
# define R128_STENCIL_ZFAIL_ZERO (1 << 24)
# define R128_STENCIL_ZFAIL_REPLACE (2 << 24)
# define R128_STENCIL_ZFAIL_INC (3 << 24)
# define R128_STENCIL_ZFAIL_DEC (4 << 24)
# define R128_STENCIL_ZFAIL_INV (5 << 24)
# define R128_STENCIL_ZFAIL_MASK (7 << 24)
#define R128_TEX_CNTL_C 0x1c9c
# define R128_Z_ENABLE (1 << 0)
# define R128_Z_WRITE_ENABLE (1 << 1)