From 40bcb588ddf91195ec757f9a597ee0f092c8ce72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 25 Mar 2024 20:57:05 -0400 Subject: [PATCH] radeonsi: remove the old si_compute_copy_image It's replaced by the compute blit. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- .../drivers/radeonsi/si_compute_blit.c | 212 ------------------ src/gallium/drivers/radeonsi/si_pipe.c | 8 - src/gallium/drivers/radeonsi/si_pipe.h | 7 - .../drivers/radeonsi/si_shaderlib_nir.c | 65 ------ 4 files changed, 292 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index d7b40c53f14..48905d2af86 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -11,36 +11,6 @@ #include "util/hash_table.h" #include "util/u_pack_color.h" -static bool si_can_use_compute_blit(struct si_context *sctx, enum pipe_format format, - unsigned num_samples, bool is_store, bool has_dcc) -{ - /* TODO: This format fails AMD_TEST=imagecopy. */ - if (format == PIPE_FORMAT_A8R8_UNORM && is_store) - return false; - - /* MSAA image stores are broken. AMD_DEBUG=nofmask fixes them, implying that the FMASK - * expand pass doesn't work, but let's use the gfx blit, which should be faster because - * it doesn't require expanding the FMASK. - * - * TODO: Broken MSAA stores can cause app issues, though this issue might only affect - * internal blits, not sure. - * - * EQAA image stores are also unimplemented, which should be rejected here after MSAA - * image stores are fixed. - */ - if (num_samples > 1 && is_store) - return false; - - if (util_format_is_depth_or_stencil(format)) - return false; - - /* Image stores support DCC since GFX10. */ - if (has_dcc && is_store && sctx->gfx_level < GFX10) - return false; - - return true; -} - /* Determine the cache policy. */ static enum si_cache_policy get_cache_policy(struct si_context *sctx, enum si_coherency coher, uint64_t size) @@ -553,188 +523,6 @@ static void si_launch_grid_internal_images(struct si_context *sctx, pipe_resource_reference(&saved_image[i].resource, NULL); } -bool si_compute_copy_image_old(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_level, - struct pipe_resource *src, unsigned src_level, unsigned dstx, - unsigned dsty, unsigned dstz, const struct pipe_box *src_box, - unsigned flags) -{ - struct si_texture *ssrc = (struct si_texture*)src; - struct si_texture *sdst = (struct si_texture*)dst; - - /* The compute copy is mandatory for compressed and subsampled formats because the gfx copy - * doesn't support them. In all other cases, call si_can_use_compute_blit. - * - * The format is identical (we only need to check the src format) except compressed formats, - * which can be paired with an equivalent integer format. - */ - if (!util_format_is_compressed(src->format) && - !util_format_is_compressed(dst->format) && - !util_format_is_subsampled_422(src->format)) { - bool src_can_use_compute_blit = - si_can_use_compute_blit(sctx, src->format, src->nr_samples, false, - vi_dcc_enabled(ssrc, src_level)); - - if (!src_can_use_compute_blit) - return false; - - bool dst_can_use_compute_blit = - si_can_use_compute_blit(sctx, dst->format, dst->nr_samples, true, - vi_dcc_enabled(sdst, dst_level)); - - if (!dst_can_use_compute_blit && !sctx->has_graphics && - si_can_use_compute_blit(sctx, dst->format, dst->nr_samples, false, - vi_dcc_enabled(sdst, dst_level))) { - /* Non-graphics context don't have a blitter, so try harder to do - * a compute blit by disabling dcc on the destination texture. - */ - dst_can_use_compute_blit = si_texture_disable_dcc(sctx, sdst); - } - - if (!dst_can_use_compute_blit) - return false; - } - - enum pipe_format src_format = util_format_linear(src->format); - enum pipe_format dst_format = util_format_linear(dst->format); - bool is_linear = ssrc->surface.is_linear || sdst->surface.is_linear; - - assert(util_format_is_subsampled_422(src_format) == util_format_is_subsampled_422(dst_format)); - - /* Interpret as integer values to avoid NaN issues */ - if (!vi_dcc_enabled(ssrc, src_level) && - !vi_dcc_enabled(sdst, dst_level) && - src_format == dst_format && - util_format_is_float(src_format) && - !util_format_is_compressed(src_format)) { - switch(util_format_get_blocksizebits(src_format)) { - case 16: - src_format = dst_format = PIPE_FORMAT_R16_UINT; - break; - case 32: - src_format = dst_format = PIPE_FORMAT_R32_UINT; - break; - case 64: - src_format = dst_format = PIPE_FORMAT_R32G32_UINT; - break; - case 128: - src_format = dst_format = PIPE_FORMAT_R32G32B32A32_UINT; - break; - default: - assert(false); - } - } - - /* Interpret compressed formats as UINT. */ - struct pipe_box new_box; - unsigned src_access = 0, dst_access = 0; - - /* Note that staging copies do compressed<->UINT, so one of the formats is already UINT. */ - if (util_format_is_compressed(src_format) || util_format_is_compressed(dst_format)) { - if (util_format_is_compressed(src_format)) - src_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT; - if (util_format_is_compressed(dst_format)) - dst_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT; - - dstx = util_format_get_nblocksx(dst_format, dstx); - dsty = util_format_get_nblocksy(dst_format, dsty); - - new_box.x = util_format_get_nblocksx(src_format, src_box->x); - new_box.y = util_format_get_nblocksy(src_format, src_box->y); - new_box.z = src_box->z; - new_box.width = util_format_get_nblocksx(src_format, src_box->width); - new_box.height = util_format_get_nblocksy(src_format, src_box->height); - new_box.depth = src_box->depth; - src_box = &new_box; - - if (ssrc->surface.bpe == 8) - src_format = dst_format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */ - else - src_format = dst_format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */ - } - - if (util_format_is_subsampled_422(src_format)) { - assert(src_format == dst_format); - - src_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT; - dst_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT; - - dstx = util_format_get_nblocksx(src_format, dstx); - - src_format = dst_format = PIPE_FORMAT_R32_UINT; - - /* Interpreting 422 subsampled format (16 bpp) as 32 bpp - * should force us to divide src_box->x, dstx and width by 2. - * But given that ac_surface allocates this format as 32 bpp - * and that surf_size is then modified to pack the values - * we must keep the original values to get the correct results. - */ - } - - /* SNORM blitting has precision issues. Use the SINT equivalent instead, which doesn't - * force DCC decompression. - */ - if (util_format_is_snorm(dst_format)) - src_format = dst_format = util_format_snorm_to_sint(dst_format); - - if (src_box->width == 0 || src_box->height == 0 || src_box->depth == 0) - return true; /* success - nothing to do */ - - struct pipe_image_view image[2] = {0}; - image[0].resource = src; - image[0].shader_access = image[0].access = PIPE_IMAGE_ACCESS_READ | src_access; - image[0].format = src_format; - image[0].u.tex.level = src_level; - image[0].u.tex.first_layer = 0; - image[0].u.tex.last_layer = util_max_layer(src, src_level); - image[1].resource = dst; - image[1].shader_access = image[1].access = PIPE_IMAGE_ACCESS_WRITE | dst_access; - image[1].format = dst_format; - image[1].u.tex.level = dst_level; - image[1].u.tex.first_layer = 0; - image[1].u.tex.last_layer = util_max_layer(dst, dst_level); - - struct pipe_grid_info info = {0}; - - bool dst_is_1d = dst->target == PIPE_TEXTURE_1D || - dst->target == PIPE_TEXTURE_1D_ARRAY; - bool src_is_1d = src->target == PIPE_TEXTURE_1D || - src->target == PIPE_TEXTURE_1D_ARRAY; - int block_x, block_y; - int block_z = 1; - - /* Choose the block dimensions based on the copy area size. */ - if (src_box->height <= 4) { - block_y = util_next_power_of_two(src_box->height); - block_x = 64 / block_y; - } else if (src_box->width <= 4) { - block_x = util_next_power_of_two(src_box->width); - block_y = 64 / block_x; - } else if (is_linear) { - block_x = 64; - block_y = 1; - } else { - block_x = 8; - block_y = 8; - } - - sctx->cs_user_data[0] = src_box->x | (dstx << 16); - sctx->cs_user_data[1] = src_box->y | (dsty << 16); - sctx->cs_user_data[2] = src_box->z | (dstz << 16); - - unsigned wg_dim = - set_work_size(&info, block_x, block_y, block_z, - src_box->width, src_box->height, src_box->depth); - - void **copy_image_cs_ptr = &sctx->cs_copy_image[wg_dim - 1][src_is_1d][dst_is_1d]; - if (!*copy_image_cs_ptr) - *copy_image_cs_ptr = si_create_copy_image_cs(sctx, wg_dim, src_is_1d, dst_is_1d); - - assert(*copy_image_cs_ptr); - - si_launch_grid_internal_images(sctx, image, 2, &info, *copy_image_cs_ptr, flags); - return true; -} - void si_retile_dcc(struct si_context *sctx, struct si_texture *tex) { assert(sctx->gfx_level < GFX12); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 7dad6182cb2..ad18e5d0f39 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -272,14 +272,6 @@ static void si_destroy_context(struct pipe_context *context) sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_buffer); if (sctx->cs_ubyte_to_ushort) sctx->b.delete_compute_state(&sctx->b, sctx->cs_ubyte_to_ushort); - for (unsigned i = 0; i < ARRAY_SIZE(sctx->cs_copy_image); i++) { - for (unsigned j = 0; j < ARRAY_SIZE(sctx->cs_copy_image[i]); j++) { - for (unsigned k = 0; k < ARRAY_SIZE(sctx->cs_copy_image[i][j]); k++) { - if (sctx->cs_copy_image[i][j][k]) - sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image[i][j][k]); - } - } - } if (sctx->cs_clear_12bytes_buffer) sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_12bytes_buffer); for (unsigned i = 0; i < ARRAY_SIZE(sctx->cs_dcc_retile); i++) { diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 53c5cb2d03d..d31d65b0161 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -990,7 +990,6 @@ struct si_context { void *cs_clear_buffer_rmw; void *cs_copy_buffer; void *cs_ubyte_to_ushort; - void *cs_copy_image[3][2][2]; /* [wg_dim-1][src_is_1d][dst_is_1d] */ void *cs_clear_12bytes_buffer; void *cs_dcc_retile[32]; void *cs_fmask_expand[3][2]; /* [log2(samples)-1][is_array] */ @@ -1502,10 +1501,6 @@ void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct p uint64_t dst_offset, uint64_t src_offset, unsigned size, unsigned flags); void si_compute_shorten_ubyte_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src, uint64_t dst_offset, uint64_t src_offset, unsigned size, unsigned flags); -bool si_compute_copy_image_old(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_level, - struct pipe_resource *src, unsigned src_level, unsigned dstx, - unsigned dsty, unsigned dstz, const struct pipe_box *src_box, - unsigned flags); void si_compute_clear_image_dcc_single(struct si_context *sctx, struct si_texture *tex, unsigned level, enum pipe_format format, const union pipe_color_union *color, unsigned flags); @@ -1632,8 +1627,6 @@ void si_suspend_queries(struct si_context *sctx); void si_resume_queries(struct si_context *sctx); /* si_shaderlib_nir.c */ -void *si_create_copy_image_cs(struct si_context *sctx, unsigned wg_dim, - bool src_is_1d_array, bool dst_is_1d_array); void *si_create_dcc_retile_cs(struct si_context *sctx, struct radeon_surf *surf); void *gfx9_create_clear_dcc_msaa_cs(struct si_context *sctx, struct si_texture *tex); void *si_create_passthrough_tcs(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c index 26dc52c6059..7e97d057214 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c @@ -48,71 +48,6 @@ deref_ssa(nir_builder *b, nir_variable *var) return &nir_build_deref_var(b, var)->def; } -/* Create a NIR compute shader implementing copy_image. - * - * This shader can handle 1D and 2D, linear and non-linear images. - * It expects the source and destination (x,y,z) coords as user_data_amd, - * packed into 3 SGPRs as 2x16bits per component. - */ -void *si_create_copy_image_cs(struct si_context *sctx, unsigned wg_dim, - bool src_is_1d_array, bool dst_is_1d_array) -{ - const nir_shader_compiler_options *options = - sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE); - - nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "copy_image_cs"); - b.shader->info.num_images = 2; - - /* The workgroup size is either 8x8 for normal (non-linear) 2D images, - * or 64x1 for 1D and linear-2D images. - */ - b.shader->info.workgroup_size_variable = true; - - b.shader->info.cs.user_data_components_amd = 3; - nir_def *ids = nir_pad_vector_imm_int(&b, get_global_ids(&b, wg_dim), 0, 3); - - nir_def *coord_src = NULL, *coord_dst = NULL; - unpack_2x16(&b, nir_trim_vector(&b, nir_load_user_data_amd(&b), 3), - &coord_src, &coord_dst); - - coord_src = nir_iadd(&b, coord_src, ids); - coord_dst = nir_iadd(&b, coord_dst, ids); - - /* Coordinates must have 4 channels in NIR. */ - coord_src = nir_pad_vector(&b, coord_src, 4); - coord_dst = nir_pad_vector(&b, coord_dst, 4); - - static unsigned swizzle_xz[] = {0, 2, 0, 0}; - - if (src_is_1d_array) - coord_src = nir_swizzle(&b, coord_src, swizzle_xz, 4); - if (dst_is_1d_array) - coord_dst = nir_swizzle(&b, coord_dst, swizzle_xz, 4); - - const struct glsl_type *src_img_type = glsl_image_type(src_is_1d_array ? GLSL_SAMPLER_DIM_1D - : GLSL_SAMPLER_DIM_2D, - /*is_array*/ true, GLSL_TYPE_FLOAT); - const struct glsl_type *dst_img_type = glsl_image_type(dst_is_1d_array ? GLSL_SAMPLER_DIM_1D - : GLSL_SAMPLER_DIM_2D, - /*is_array*/ true, GLSL_TYPE_FLOAT); - - nir_variable *img_src = nir_variable_create(b.shader, nir_var_image, src_img_type, "img_src"); - img_src->data.binding = 0; - - nir_variable *img_dst = nir_variable_create(b.shader, nir_var_image, dst_img_type, "img_dst"); - img_dst->data.binding = 1; - - nir_def *undef32 = nir_undef(&b, 1, 32); - nir_def *zero = nir_imm_int(&b, 0); - - nir_def *data = nir_image_deref_load(&b, /*num_components*/ 4, /*bit_size*/ 32, - deref_ssa(&b, img_src), coord_src, undef32, zero); - - nir_image_deref_store(&b, deref_ssa(&b, img_dst), coord_dst, undef32, data, zero); - - return create_shader_state(sctx, b.shader); -} - void *si_create_dcc_retile_cs(struct si_context *sctx, struct radeon_surf *surf) { nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, sctx->screen->nir_options,