i965/msaa: Work around problems with null render targets on Gen6.

On Gen6, multisampled null render targets don't seem to work
properly--they cause the GPU to hang.  So, as a workaround, we render
into a dummy color buffer.

Fortunately this situation (multisampled rendering without a color
buffer) is rare, and we don't have to waste too much memory, because
we can give the workaround buffer a very small pitch.

Fixes piglit test "EXT_framebuffer_multisample/no-color {2,4}
depth-computed *" on Gen6.

Reviewed-by: Chad Versace <chad.versace@linux.intel.com>
This commit is contained in:
Paul Berry
2012-07-10 11:23:25 -07:00
parent 0aeb87023e
commit bac43b8bb7
2 changed files with 49 additions and 4 deletions
+6
View File
@@ -971,6 +971,12 @@ struct brw_context
drm_intel_bo *scratch_bo;
/**
* Buffer object used in place of multisampled null render targets on
* Gen6. See brw_update_null_renderbuffer_surface().
*/
drm_intel_bo *multisampled_null_render_target_bo;
/** Offset in the program cache to the WM program */
uint32_t prog_offset;
@@ -982,6 +982,10 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
uint32_t *surf;
unsigned surface_type = BRW_SURFACE_NULL;
drm_intel_bo *bo = NULL;
unsigned pitch_minus_1 = 0;
uint32_t multisampling_state = 0;
/* _NEW_BUFFERS */
const struct gl_framebuffer *fb = ctx->DrawBuffer;
@@ -989,7 +993,34 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
6 * 4, 32, &brw->wm.surf_offset[unit]);
surf[0] = (BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
if (fb->Visual.samples > 0) {
/* On Gen6, null render targets seem to cause GPU hangs when
* multisampling. So work around this problem by rendering into dummy
* color buffer.
*
* To decrease the amount of memory needed by the workaround buffer, we
* set its pitch to 128 bytes (the width of a Y tile). This means that
* the amount of memory needed for the workaround buffer is
* (width_in_tiles + height_in_tiles - 1) tiles.
*
* Note that since the workaround buffer will be interpreted by the
* hardware as an interleaved multisampled buffer, we need to compute
* width_in_tiles and height_in_tiles by dividing the width and height
* by 16 rather than the normal Y-tile size of 32.
*/
unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
brw_get_scratch_bo(intel, &brw->wm.multisampled_null_render_target_bo,
size_needed);
bo = brw->wm.multisampled_null_render_target_bo;
surface_type = BRW_SURFACE_2D;
pitch_minus_1 = 127;
multisampling_state =
brw_get_surface_num_multisamples(fb->Visual.samples);
}
surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
if (intel->gen < 6) {
surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
@@ -997,7 +1028,7 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
}
surf[1] = 0;
surf[1] = bo ? bo->offset : 0;
surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
(fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
@@ -1006,9 +1037,17 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
*
* If Surface Type is SURFTYPE_NULL, this field must be TRUE
*/
surf[3] = BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
surf[4] = 0;
surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
surf[4] = multisampling_state;
surf[5] = 0;
if (bo) {
drm_intel_bo_emit_reloc(brw->intel.batch.bo,
brw->wm.surf_offset[unit] + 4,
bo, 0,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
}
}
/**