From 7d27de70af72b9152e2a73c37799fafc22c75fa0 Mon Sep 17 00:00:00 2001 From: David Rosca Date: Fri, 22 Nov 2024 13:03:33 +0100 Subject: [PATCH] radeonsi/vcn: Create decode DPB surfaces with PIPE_BIND_VIDEO_DECODE_DPB Always use surface for DPB now that it is not GFX12 only. Reviewed-by: Ruijing Dong Part-of: --- src/gallium/drivers/radeonsi/radeon_vcn_dec.c | 148 ++++-------------- src/gallium/drivers/radeonsi/radeon_vcn_dec.h | 3 - 2 files changed, 32 insertions(+), 119 deletions(-) diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_dec.c b/src/gallium/drivers/radeonsi/radeon_vcn_dec.c index 32f033d3757..2edcda86ae5 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_dec.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_dec.c @@ -20,7 +20,6 @@ #include #include "ac_vcn_av1_default.h" -#include "ac_drm_fourcc.h" #include "ac_debug.h" #define FB_BUFFER_OFFSET 0x2000 @@ -1367,21 +1366,17 @@ static unsigned rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder *dec, rvcn { struct rvcn_dec_dynamic_dpb_t2 *dpb = NULL, *dummy = NULL; struct si_resource *res; - unsigned width, height, size; + unsigned width, height; uint64_t addr; int i; width = align(decode->width_in_samples, dec->db_alignment); height = align(decode->height_in_samples, dec->db_alignment); - size = align((width * height * 3) / 2, 256); - if (dec->ref_codec.bts == CODEC_10_BITS || dec->ref_codec.bts == CODEC_12_BITS) - size = (((struct si_screen *)dec->screen)->info.vcn_ip_version == VCN_5_0_0) ? size * 2 : size * 3 / 2; list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) { bool found = false; - res = (dec->dpb_use_surf) ? &(((struct si_texture *)((struct vl_video_buffer *)d->vbuf)->resources[0])->buffer) : - d->dpb.res; + res = &(((struct si_texture *)((struct vl_video_buffer *)d->vbuf)->resources[0])->buffer); for (i = 0; i < dec->ref_codec.ref_size; ++i) { if (((dec->ref_codec.ref_list[i] & 0x7f) != 0x7f) && (d->index == (dec->ref_codec.ref_list[i] & 0x7f))) { if (!dummy) @@ -1390,8 +1385,7 @@ static unsigned rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder *dec, rvcn if (!addr && dummy) { struct si_resource *dummy_res; RVID_ERR("Ref list from application is incorrect, using dummy buffer instead.\n"); - dummy_res = (dec->dpb_use_surf) ? &(((struct si_texture *)((struct vl_video_buffer *)dummy->vbuf)->resources[0])->buffer) : - dummy->dpb.res; + dummy_res = &(((struct si_texture *)((struct vl_video_buffer *)dummy->vbuf)->resources[0])->buffer); addr = dec->ws->buffer_get_virtual_address(dummy_res->buf); } dec->ws->cs_add_buffer(&dec->cs, res->buf, RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, RADEON_DOMAIN_VRAM); @@ -1402,8 +1396,7 @@ static unsigned rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder *dec, rvcn } } if (!found) { - if ((!dec->dpb_use_surf && (d->dpb.res->b.b.width0 * d->dpb.res->b.b.height0 != size)) || - ((dec->dpb_use_surf) && (d->vbuf->width != width || d->vbuf->height != height))) { + if (d->vbuf->width != width || d->vbuf->height != height) { list_del(&d->list); list_addtail(&d->list, &dec->dpb_unref_list); } else { @@ -1413,8 +1406,7 @@ static unsigned rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder *dec, rvcn } list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) { - if ((!dec->dpb_use_surf && (d->dpb.res->b.b.width0 * d->dpb.res->b.b.height0 == size && d->index == dec->ref_codec.index)) || - (dec->dpb_use_surf && ((d->vbuf->width == width && d->vbuf->height == height) && d->index == dec->ref_codec.index))) { + if (d->vbuf->width == width && d->vbuf->height == height && d->index == dec->ref_codec.index) { dpb = d; break; } @@ -1434,42 +1426,28 @@ static unsigned rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder *dec, rvcn if (dec->prev_fence) dec->ws->fence_wait(dec->ws, dec->prev_fence, PIPE_DEFAULT_DECODER_FEEDBACK_TIMEOUT_NS); list_del(&d->list); - if (dec->dpb_use_surf) - d->vbuf->destroy(d->vbuf); - else - si_vid_destroy_buffer(&d->dpb); + d->vbuf->destroy(d->vbuf); FREE(d); } if (!dpb) { - bool r; dpb = CALLOC_STRUCT(rvcn_dec_dynamic_dpb_t2); if (!dpb) return 1; dpb->index = dec->ref_codec.index; - if (dec->dpb_use_surf) { - struct pipe_video_buffer templat; - memset(&templat, 0, sizeof(templat)); - templat.buffer_format = get_buffer_format(dec); - templat.width = width; - templat.height = height; - if (encrypted) - templat.bind |= PIPE_BIND_PROTECTED; + struct pipe_video_buffer templat; + memset(&templat, 0, sizeof(templat)); + templat.buffer_format = get_buffer_format(dec); + templat.width = width; + templat.height = height; + templat.bind = PIPE_BIND_VIDEO_DECODE_DPB; + if (encrypted) + templat.bind |= PIPE_BIND_PROTECTED; + dpb->vbuf = dec->base.context->create_video_buffer(dec->base.context, &templat); - dpb->vbuf = - dec->base.context->create_video_buffer_with_modifiers(dec->base.context, &templat, &dec->dpb_modifier, 1); - r = (dpb->vbuf != NULL); - } else { - if (encrypted) - r = si_vid_create_tmz_buffer(dec->screen, &dpb->dpb, size, PIPE_USAGE_DEFAULT); - else - r = si_vid_create_buffer(dec->screen, &dpb->dpb, size, PIPE_USAGE_DEFAULT); - assert(encrypted == (bool)(dpb->dpb.res->flags & RADEON_FLAG_ENCRYPTED)); - } - - if (!r) { - RVID_ERR("Can't allocated dpb buffer.\n"); + if (!dpb->vbuf) { + RVID_ERR("Can't allocate dpb buffer.\n"); FREE(dpb); return 1; } @@ -1480,13 +1458,11 @@ static unsigned rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder *dec, rvcn struct rvcn_dec_dynamic_dpb_t2 *d = list_first_entry(&dec->dpb_ref_list, struct rvcn_dec_dynamic_dpb_t2, list); - res = (dec->dpb_use_surf) ? &(((struct si_texture *)((struct vl_video_buffer *)d->vbuf)->resources[0])->buffer) : - d->dpb.res; + res = &(((struct si_texture *)((struct vl_video_buffer *)d->vbuf)->resources[0])->buffer); addr = dec->ws->buffer_get_virtual_address(res->buf); if (!addr && dummy) { struct si_resource *dummy_res; - dummy_res = (dec->dpb_use_surf) ? &(((struct si_texture *)((struct vl_video_buffer *)dummy->vbuf)->resources[0])->buffer) : - dummy->dpb.res; + dummy_res = &(((struct si_texture *)((struct vl_video_buffer *)dummy->vbuf)->resources[0])->buffer); addr = dec->ws->buffer_get_virtual_address(dummy_res->buf); } assert(addr); @@ -1500,48 +1476,22 @@ static unsigned rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder *dec, rvcn assert(dynamic_dpb_t2->dpbArraySize == dec->ref_codec.num_refs); } - if (dec->dpb_use_surf) { - struct si_texture *dpb_luma, *dpb_chroma; + struct si_texture *dpb_luma, *dpb_chroma; - dpb_luma = (struct si_texture *)((struct vl_video_buffer *)dpb->vbuf)->resources[0]; - dpb_chroma = (struct si_texture *)((struct vl_video_buffer *)dpb->vbuf)->resources[1]; + dpb_luma = (struct si_texture *)((struct vl_video_buffer *)dpb->vbuf)->resources[0]; + dpb_chroma = (struct si_texture *)((struct vl_video_buffer *)dpb->vbuf)->resources[1]; - decode->db_swizzle_mode = dpb_luma->surface.u.gfx9.swizzle_mode; + decode->db_swizzle_mode = dpb_luma->surface.u.gfx9.swizzle_mode; - dec->ws->cs_add_buffer(&dec->cs, (dpb_luma->buffer).buf, RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, RADEON_DOMAIN_VRAM); - addr = dec->ws->buffer_get_virtual_address((dpb_luma->buffer).buf); + dec->ws->cs_add_buffer(&dec->cs, (dpb_luma->buffer).buf, RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, RADEON_DOMAIN_VRAM); + addr = dec->ws->buffer_get_virtual_address((dpb_luma->buffer).buf); - dynamic_dpb_t2->dpbLumaPitch = dpb_luma->buffer.b.b.width0; - dynamic_dpb_t2->dpbLumaAlignedHeight = dpb_luma->buffer.b.b.height0; - dynamic_dpb_t2->dpbLumaAlignedSize = dpb_luma->surface.u.gfx9.surf_slice_size; - dynamic_dpb_t2->dpbChromaPitch = dpb_chroma->buffer.b.b.width0; - dynamic_dpb_t2->dpbChromaAlignedHeight = dpb_chroma->buffer.b.b.height0; - dynamic_dpb_t2->dpbChromaAlignedSize = dpb_chroma->surface.u.gfx9.surf_slice_size; - } else { - dec->ws->cs_add_buffer(&dec->cs, dpb->dpb.res->buf, RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, RADEON_DOMAIN_VRAM); - addr = dec->ws->buffer_get_virtual_address(dpb->dpb.res->buf); - - if (((struct si_screen *)dec->screen)->info.vcn_ip_version == VCN_5_0_0) - decode->db_swizzle_mode = RDECODE_VCN5_256B_D; - dynamic_dpb_t2->dpbLumaPitch = align(decode->width_in_samples, dec->db_alignment); - dynamic_dpb_t2->dpbLumaAlignedHeight = align(decode->height_in_samples, dec->db_alignment); - dynamic_dpb_t2->dpbLumaAlignedSize = dynamic_dpb_t2->dpbLumaPitch * - dynamic_dpb_t2->dpbLumaAlignedHeight; - dynamic_dpb_t2->dpbChromaPitch = dynamic_dpb_t2->dpbLumaPitch >> 1; - dynamic_dpb_t2->dpbChromaAlignedHeight = dynamic_dpb_t2->dpbLumaAlignedHeight >> 1; - dynamic_dpb_t2->dpbChromaAlignedSize = dynamic_dpb_t2->dpbChromaPitch * - dynamic_dpb_t2->dpbChromaAlignedHeight * 2; - - if (dec->ref_codec.bts == CODEC_10_BITS || dec->ref_codec.bts == CODEC_12_BITS) { - if (((struct si_screen *)dec->screen)->info.vcn_ip_version == VCN_5_0_0) { - dynamic_dpb_t2->dpbLumaAlignedSize = dynamic_dpb_t2->dpbLumaAlignedSize * 2; - dynamic_dpb_t2->dpbChromaAlignedSize = dynamic_dpb_t2->dpbChromaAlignedSize * 2; - } else { - dynamic_dpb_t2->dpbLumaAlignedSize = dynamic_dpb_t2->dpbLumaAlignedSize * 3 / 2; - dynamic_dpb_t2->dpbChromaAlignedSize = dynamic_dpb_t2->dpbChromaAlignedSize * 3 / 2; - } - } - } + dynamic_dpb_t2->dpbLumaPitch = dpb_luma->surface.u.gfx9.surf_pitch; + dynamic_dpb_t2->dpbLumaAlignedHeight = dpb_luma->surface.u.gfx9.surf_height; + dynamic_dpb_t2->dpbLumaAlignedSize = dpb_luma->surface.u.gfx9.surf_slice_size; + dynamic_dpb_t2->dpbChromaPitch = dpb_chroma->surface.u.gfx9.surf_pitch; + dynamic_dpb_t2->dpbChromaAlignedHeight = dpb_chroma->surface.u.gfx9.surf_height; + dynamic_dpb_t2->dpbChromaAlignedSize = dpb_chroma->surface.u.gfx9.surf_slice_size; dynamic_dpb_t2->dpbCurrLo = addr; dynamic_dpb_t2->dpbCurrHi = addr >> 32; @@ -2437,10 +2387,7 @@ static void radeon_dec_destroy(struct pipe_video_codec *decoder) } else { list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) { list_del(&d->list); - if (dec->dpb_use_surf) - d->vbuf->destroy(d->vbuf); - else - si_vid_destroy_buffer(&d->dpb); + d->vbuf->destroy(d->vbuf); FREE(d); } } @@ -2659,35 +2606,6 @@ static void radeon_dec_destroy_fence(struct pipe_video_codec *decoder, dec->ws->fence_reference(dec->ws, &fence, NULL); } -static bool radeon_has_video_preferred_gfx12_swizzle_mode(struct radeon_decoder *dec, uint64_t* modifier) -{ - uint64_t *allowed_modifiers; - int allowed_mod_count, i, j; - enum pipe_format format[2] = { PIPE_FORMAT_NV12, PIPE_FORMAT_P010 }; - - for (i = 0; i < 2; ++i) { - dec->screen->query_dmabuf_modifiers(dec->screen, format[i], 0, NULL, NULL, &allowed_mod_count); - allowed_modifiers = (uint64_t *)calloc(allowed_mod_count, sizeof(uint64_t)); - if (!allowed_modifiers) - return false; - - dec->screen->query_dmabuf_modifiers(dec->screen, format[i], allowed_mod_count, - allowed_modifiers, NULL, &allowed_mod_count); - for (j = 0; j < allowed_mod_count; ++j) { - if (AMD_FMT_MOD_GET(TILE, allowed_modifiers[j]) == AMD_FMT_MOD_TILE_GFX12_256B_2D) - break; - } - if (j == allowed_mod_count) { - free(allowed_modifiers); - return false;; - } - *modifier = allowed_modifiers[j]; - free(allowed_modifiers); - } - - return true; -} - /** * create and HW decoder */ @@ -2960,8 +2878,6 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V3; dec->addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX11; dec->av1_version = RDECODE_AV1_VER_1; - if (radeon_has_video_preferred_gfx12_swizzle_mode(dec, &dec->dpb_modifier)) - dec->dpb_use_surf = true; break; default: RVID_ERR("VCN is not supported.\n"); diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_dec.h b/src/gallium/drivers/radeonsi/radeon_vcn_dec.h index 98904e38792..aeb96e28034 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_dec.h +++ b/src/gallium/drivers/radeonsi/radeon_vcn_dec.h @@ -20,7 +20,6 @@ struct rvcn_dec_dynamic_dpb_t2 { struct list_head list; uint8_t index; - struct rvid_buffer dpb; struct pipe_video_buffer *vbuf; }; @@ -139,8 +138,6 @@ struct radeon_decoder { unsigned njctx; struct pipe_fence_handle *prev_fence; struct pipe_fence_handle *destroy_fence; - bool dpb_use_surf; - uint64_t dpb_modifier; struct pipe_context *ectx; };