i965/copy_image: Re-implement the blitter path with emit_miptree_blit

By using emit_miptree_blit which does chunking, this fixes the blitter path
for the case where the image is too tall to blit normally.  We also pull it
into intel_blit as intel_miptree_copy.  This matches the naming of the
blorp blit and copy functions brw_blorp_blit and brw_blorp_copy.

Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
Cc: "13.0" <mesa-dev@lists.freedesktop.org>
This commit is contained in:
Jason Ekstrand
2016-11-30 19:08:51 -08:00
parent 6c74e7f492
commit 06d864921e
3 changed files with 80 additions and 97 deletions
+68
View File
@@ -386,6 +386,74 @@ intel_miptree_blit(struct brw_context *brw,
return true;
}
bool
intel_miptree_copy(struct brw_context *brw,
struct intel_mipmap_tree *src_mt,
int src_level, int src_slice,
uint32_t src_x, uint32_t src_y,
struct intel_mipmap_tree *dst_mt,
int dst_level, int dst_slice,
uint32_t dst_x, uint32_t dst_y,
uint32_t src_width, uint32_t src_height)
{
/* The blitter doesn't understand multisampling at all. */
if (src_mt->num_samples > 0 || dst_mt->num_samples > 0)
return false;
if (src_mt->format == MESA_FORMAT_S_UINT8)
return false;
/* The blitter has no idea about HiZ or fast color clears, so we need to
* resolve the miptrees before we do anything.
*/
intel_miptree_slice_resolve_depth(brw, src_mt, src_level, src_slice);
intel_miptree_slice_resolve_depth(brw, dst_mt, dst_level, dst_slice);
intel_miptree_resolve_color(brw, src_mt, src_level, src_slice, 1, 0);
intel_miptree_resolve_color(brw, dst_mt, dst_level, dst_slice, 1, 0);
uint32_t src_image_x, src_image_y;
intel_miptree_get_image_offset(src_mt, src_level, src_slice,
&src_image_x, &src_image_y);
if (_mesa_is_format_compressed(src_mt->format)) {
GLuint bw, bh;
_mesa_get_format_block_size(src_mt->format, &bw, &bh);
assert(src_x % bw == 0);
assert(src_y % bh == 0);
assert(src_width % bw == 0);
assert(src_height % bh == 0);
src_x /= (int)bw;
src_y /= (int)bh;
src_width /= (int)bw;
src_height /= (int)bh;
}
src_x += src_image_x;
src_y += src_image_y;
uint32_t dst_image_x, dst_image_y;
intel_miptree_get_image_offset(dst_mt, dst_level, dst_slice,
&dst_image_x, &dst_image_y);
if (_mesa_is_format_compressed(dst_mt->format)) {
GLuint bw, bh;
_mesa_get_format_block_size(dst_mt->format, &bw, &bh);
assert(dst_x % bw == 0);
assert(dst_y % bh == 0);
dst_x /= (int)bw;
dst_y /= (int)bh;
}
dst_x += dst_image_x;
dst_y += dst_image_y;
return emit_miptree_blit(brw, src_mt, src_x, src_y,
dst_mt, dst_x, dst_y,
src_width, src_height, false, GL_COPY);
}
static bool
alignment_valid(struct brw_context *brw, unsigned offset, uint32_t tiling)
{
+9
View File
@@ -58,6 +58,15 @@ bool intel_miptree_blit(struct brw_context *brw,
uint32_t width, uint32_t height,
GLenum logicop);
bool intel_miptree_copy(struct brw_context *brw,
struct intel_mipmap_tree *src_mt,
int src_level, int src_slice,
uint32_t src_x, uint32_t src_y,
struct intel_mipmap_tree *dst_mt,
int dst_level, int dst_slice,
uint32_t dst_x, uint32_t dst_y,
uint32_t src_width, uint32_t src_height);
bool
intelEmitImmediateColorExpandBlit(struct brw_context *brw,
GLuint cpp,
+3 -97
View File
@@ -34,98 +34,6 @@
#include "main/teximage.h"
#include "drivers/common/meta.h"
static bool
copy_image_with_blitter(struct brw_context *brw,
struct intel_mipmap_tree *src_mt, int src_level,
int src_x, int src_y, int src_z,
struct intel_mipmap_tree *dst_mt, int dst_level,
int dst_x, int dst_y, int dst_z,
int src_width, int src_height)
{
GLuint bw, bh;
uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
/* The blitter doesn't understand multisampling at all. */
if (src_mt->num_samples > 0 || dst_mt->num_samples > 0)
return false;
if (src_mt->format == MESA_FORMAT_S_UINT8)
return false;
/* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
* Data Size Limitations):
*
* The BLT engine is capable of transferring very large quantities of
* graphics data. Any graphics data read from and written to the
* destination is permitted to represent a number of pixels that
* occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
* at the destination. The maximum number of pixels that may be
* represented per scan lines worth of graphics data depends on the
* color depth.
*
* Furthermore, intelEmitCopyBlit (which is called below) uses a signed
* 16-bit integer to represent buffer pitch, so it can only handle buffer
* pitches < 32k.
*
* As a result of these two limitations, we can only use the blitter to do
* this copy when the miptree's pitch is less than 32k.
*/
if (src_mt->pitch >= 32768 ||
dst_mt->pitch >= 32768) {
perf_debug("Falling back due to >=32k pitch\n");
return false;
}
intel_miptree_get_image_offset(src_mt, src_level, src_z,
&src_image_x, &src_image_y);
if (_mesa_is_format_compressed(src_mt->format)) {
_mesa_get_format_block_size(src_mt->format, &bw, &bh);
assert(src_x % bw == 0);
assert(src_y % bh == 0);
assert(src_width % bw == 0);
assert(src_height % bh == 0);
src_x /= (int)bw;
src_y /= (int)bh;
src_width /= (int)bw;
src_height /= (int)bh;
}
src_x += src_image_x;
src_y += src_image_y;
intel_miptree_get_image_offset(dst_mt, dst_level, dst_z,
&dst_image_x, &dst_image_y);
if (_mesa_is_format_compressed(dst_mt->format)) {
_mesa_get_format_block_size(dst_mt->format, &bw, &bh);
assert(dst_x % bw == 0);
assert(dst_y % bh == 0);
dst_x /= (int)bw;
dst_y /= (int)bh;
}
dst_x += dst_image_x;
dst_y += dst_image_y;
return intelEmitCopyBlit(brw,
src_mt->cpp,
src_mt->pitch,
src_mt->bo, src_mt->offset,
src_mt->tiling,
src_mt->tr_mode,
dst_mt->pitch,
dst_mt->bo, dst_mt->offset,
dst_mt->tiling,
dst_mt->tr_mode,
src_x, src_y,
dst_x, dst_y,
src_width, src_height,
GL_COPY);
}
static void
copy_image_with_memcpy(struct brw_context *brw,
struct intel_mipmap_tree *src_mt, int src_level,
@@ -246,11 +154,9 @@ copy_miptrees(struct brw_context *brw,
if (src_height < bh)
src_height = ALIGN_NPOT(src_height, bh);
if (copy_image_with_blitter(brw, src_mt, src_level,
src_x, src_y, src_z,
dst_mt, dst_level,
dst_x, dst_y, dst_z,
src_width, src_height))
if (intel_miptree_copy(brw, src_mt, src_level, src_z, src_x, src_y,
dst_mt, dst_level, dst_z, dst_x, dst_y,
src_width, src_height))
return;
/* This is a worst-case scenario software fallback that maps the two