radeonsi/vcn: Create decode DPB surfaces with PIPE_BIND_VIDEO_DECODE_DPB

Always use surface for DPB now that it is not GFX12 only.

Reviewed-by: Ruijing Dong <ruijing.dong@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32303>
This commit is contained in:
David Rosca
2024-11-22 13:03:33 +01:00
committed by Marge Bot
parent 1d206ca94c
commit 7d27de70af
2 changed files with 32 additions and 119 deletions
+32 -116
View File
@@ -20,7 +20,6 @@
#include <stdio.h>
#include "ac_vcn_av1_default.h"
#include "ac_drm_fourcc.h"
#include "ac_debug.h"
#define FB_BUFFER_OFFSET 0x2000
@@ -1367,21 +1366,17 @@ static unsigned rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder *dec, rvcn
{
struct rvcn_dec_dynamic_dpb_t2 *dpb = NULL, *dummy = NULL;
struct si_resource *res;
unsigned width, height, size;
unsigned width, height;
uint64_t addr;
int i;
width = align(decode->width_in_samples, dec->db_alignment);
height = align(decode->height_in_samples, dec->db_alignment);
size = align((width * height * 3) / 2, 256);
if (dec->ref_codec.bts == CODEC_10_BITS || dec->ref_codec.bts == CODEC_12_BITS)
size = (((struct si_screen *)dec->screen)->info.vcn_ip_version == VCN_5_0_0) ? size * 2 : size * 3 / 2;
list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) {
bool found = false;
res = (dec->dpb_use_surf) ? &(((struct si_texture *)((struct vl_video_buffer *)d->vbuf)->resources[0])->buffer) :
d->dpb.res;
res = &(((struct si_texture *)((struct vl_video_buffer *)d->vbuf)->resources[0])->buffer);
for (i = 0; i < dec->ref_codec.ref_size; ++i) {
if (((dec->ref_codec.ref_list[i] & 0x7f) != 0x7f) && (d->index == (dec->ref_codec.ref_list[i] & 0x7f))) {
if (!dummy)
@@ -1390,8 +1385,7 @@ static unsigned rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder *dec, rvcn
if (!addr && dummy) {
struct si_resource *dummy_res;
RVID_ERR("Ref list from application is incorrect, using dummy buffer instead.\n");
dummy_res = (dec->dpb_use_surf) ? &(((struct si_texture *)((struct vl_video_buffer *)dummy->vbuf)->resources[0])->buffer) :
dummy->dpb.res;
dummy_res = &(((struct si_texture *)((struct vl_video_buffer *)dummy->vbuf)->resources[0])->buffer);
addr = dec->ws->buffer_get_virtual_address(dummy_res->buf);
}
dec->ws->cs_add_buffer(&dec->cs, res->buf, RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, RADEON_DOMAIN_VRAM);
@@ -1402,8 +1396,7 @@ static unsigned rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder *dec, rvcn
}
}
if (!found) {
if ((!dec->dpb_use_surf && (d->dpb.res->b.b.width0 * d->dpb.res->b.b.height0 != size)) ||
((dec->dpb_use_surf) && (d->vbuf->width != width || d->vbuf->height != height))) {
if (d->vbuf->width != width || d->vbuf->height != height) {
list_del(&d->list);
list_addtail(&d->list, &dec->dpb_unref_list);
} else {
@@ -1413,8 +1406,7 @@ static unsigned rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder *dec, rvcn
}
list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) {
if ((!dec->dpb_use_surf && (d->dpb.res->b.b.width0 * d->dpb.res->b.b.height0 == size && d->index == dec->ref_codec.index)) ||
(dec->dpb_use_surf && ((d->vbuf->width == width && d->vbuf->height == height) && d->index == dec->ref_codec.index))) {
if (d->vbuf->width == width && d->vbuf->height == height && d->index == dec->ref_codec.index) {
dpb = d;
break;
}
@@ -1434,42 +1426,28 @@ static unsigned rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder *dec, rvcn
if (dec->prev_fence)
dec->ws->fence_wait(dec->ws, dec->prev_fence, PIPE_DEFAULT_DECODER_FEEDBACK_TIMEOUT_NS);
list_del(&d->list);
if (dec->dpb_use_surf)
d->vbuf->destroy(d->vbuf);
else
si_vid_destroy_buffer(&d->dpb);
d->vbuf->destroy(d->vbuf);
FREE(d);
}
if (!dpb) {
bool r;
dpb = CALLOC_STRUCT(rvcn_dec_dynamic_dpb_t2);
if (!dpb)
return 1;
dpb->index = dec->ref_codec.index;
if (dec->dpb_use_surf) {
struct pipe_video_buffer templat;
memset(&templat, 0, sizeof(templat));
templat.buffer_format = get_buffer_format(dec);
templat.width = width;
templat.height = height;
if (encrypted)
templat.bind |= PIPE_BIND_PROTECTED;
struct pipe_video_buffer templat;
memset(&templat, 0, sizeof(templat));
templat.buffer_format = get_buffer_format(dec);
templat.width = width;
templat.height = height;
templat.bind = PIPE_BIND_VIDEO_DECODE_DPB;
if (encrypted)
templat.bind |= PIPE_BIND_PROTECTED;
dpb->vbuf = dec->base.context->create_video_buffer(dec->base.context, &templat);
dpb->vbuf =
dec->base.context->create_video_buffer_with_modifiers(dec->base.context, &templat, &dec->dpb_modifier, 1);
r = (dpb->vbuf != NULL);
} else {
if (encrypted)
r = si_vid_create_tmz_buffer(dec->screen, &dpb->dpb, size, PIPE_USAGE_DEFAULT);
else
r = si_vid_create_buffer(dec->screen, &dpb->dpb, size, PIPE_USAGE_DEFAULT);
assert(encrypted == (bool)(dpb->dpb.res->flags & RADEON_FLAG_ENCRYPTED));
}
if (!r) {
RVID_ERR("Can't allocated dpb buffer.\n");
if (!dpb->vbuf) {
RVID_ERR("Can't allocate dpb buffer.\n");
FREE(dpb);
return 1;
}
@@ -1480,13 +1458,11 @@ static unsigned rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder *dec, rvcn
struct rvcn_dec_dynamic_dpb_t2 *d =
list_first_entry(&dec->dpb_ref_list, struct rvcn_dec_dynamic_dpb_t2, list);
res = (dec->dpb_use_surf) ? &(((struct si_texture *)((struct vl_video_buffer *)d->vbuf)->resources[0])->buffer) :
d->dpb.res;
res = &(((struct si_texture *)((struct vl_video_buffer *)d->vbuf)->resources[0])->buffer);
addr = dec->ws->buffer_get_virtual_address(res->buf);
if (!addr && dummy) {
struct si_resource *dummy_res;
dummy_res = (dec->dpb_use_surf) ? &(((struct si_texture *)((struct vl_video_buffer *)dummy->vbuf)->resources[0])->buffer) :
dummy->dpb.res;
dummy_res = &(((struct si_texture *)((struct vl_video_buffer *)dummy->vbuf)->resources[0])->buffer);
addr = dec->ws->buffer_get_virtual_address(dummy_res->buf);
}
assert(addr);
@@ -1500,48 +1476,22 @@ static unsigned rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder *dec, rvcn
assert(dynamic_dpb_t2->dpbArraySize == dec->ref_codec.num_refs);
}
if (dec->dpb_use_surf) {
struct si_texture *dpb_luma, *dpb_chroma;
struct si_texture *dpb_luma, *dpb_chroma;
dpb_luma = (struct si_texture *)((struct vl_video_buffer *)dpb->vbuf)->resources[0];
dpb_chroma = (struct si_texture *)((struct vl_video_buffer *)dpb->vbuf)->resources[1];
dpb_luma = (struct si_texture *)((struct vl_video_buffer *)dpb->vbuf)->resources[0];
dpb_chroma = (struct si_texture *)((struct vl_video_buffer *)dpb->vbuf)->resources[1];
decode->db_swizzle_mode = dpb_luma->surface.u.gfx9.swizzle_mode;
decode->db_swizzle_mode = dpb_luma->surface.u.gfx9.swizzle_mode;
dec->ws->cs_add_buffer(&dec->cs, (dpb_luma->buffer).buf, RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, RADEON_DOMAIN_VRAM);
addr = dec->ws->buffer_get_virtual_address((dpb_luma->buffer).buf);
dec->ws->cs_add_buffer(&dec->cs, (dpb_luma->buffer).buf, RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, RADEON_DOMAIN_VRAM);
addr = dec->ws->buffer_get_virtual_address((dpb_luma->buffer).buf);
dynamic_dpb_t2->dpbLumaPitch = dpb_luma->buffer.b.b.width0;
dynamic_dpb_t2->dpbLumaAlignedHeight = dpb_luma->buffer.b.b.height0;
dynamic_dpb_t2->dpbLumaAlignedSize = dpb_luma->surface.u.gfx9.surf_slice_size;
dynamic_dpb_t2->dpbChromaPitch = dpb_chroma->buffer.b.b.width0;
dynamic_dpb_t2->dpbChromaAlignedHeight = dpb_chroma->buffer.b.b.height0;
dynamic_dpb_t2->dpbChromaAlignedSize = dpb_chroma->surface.u.gfx9.surf_slice_size;
} else {
dec->ws->cs_add_buffer(&dec->cs, dpb->dpb.res->buf, RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, RADEON_DOMAIN_VRAM);
addr = dec->ws->buffer_get_virtual_address(dpb->dpb.res->buf);
if (((struct si_screen *)dec->screen)->info.vcn_ip_version == VCN_5_0_0)
decode->db_swizzle_mode = RDECODE_VCN5_256B_D;
dynamic_dpb_t2->dpbLumaPitch = align(decode->width_in_samples, dec->db_alignment);
dynamic_dpb_t2->dpbLumaAlignedHeight = align(decode->height_in_samples, dec->db_alignment);
dynamic_dpb_t2->dpbLumaAlignedSize = dynamic_dpb_t2->dpbLumaPitch *
dynamic_dpb_t2->dpbLumaAlignedHeight;
dynamic_dpb_t2->dpbChromaPitch = dynamic_dpb_t2->dpbLumaPitch >> 1;
dynamic_dpb_t2->dpbChromaAlignedHeight = dynamic_dpb_t2->dpbLumaAlignedHeight >> 1;
dynamic_dpb_t2->dpbChromaAlignedSize = dynamic_dpb_t2->dpbChromaPitch *
dynamic_dpb_t2->dpbChromaAlignedHeight * 2;
if (dec->ref_codec.bts == CODEC_10_BITS || dec->ref_codec.bts == CODEC_12_BITS) {
if (((struct si_screen *)dec->screen)->info.vcn_ip_version == VCN_5_0_0) {
dynamic_dpb_t2->dpbLumaAlignedSize = dynamic_dpb_t2->dpbLumaAlignedSize * 2;
dynamic_dpb_t2->dpbChromaAlignedSize = dynamic_dpb_t2->dpbChromaAlignedSize * 2;
} else {
dynamic_dpb_t2->dpbLumaAlignedSize = dynamic_dpb_t2->dpbLumaAlignedSize * 3 / 2;
dynamic_dpb_t2->dpbChromaAlignedSize = dynamic_dpb_t2->dpbChromaAlignedSize * 3 / 2;
}
}
}
dynamic_dpb_t2->dpbLumaPitch = dpb_luma->surface.u.gfx9.surf_pitch;
dynamic_dpb_t2->dpbLumaAlignedHeight = dpb_luma->surface.u.gfx9.surf_height;
dynamic_dpb_t2->dpbLumaAlignedSize = dpb_luma->surface.u.gfx9.surf_slice_size;
dynamic_dpb_t2->dpbChromaPitch = dpb_chroma->surface.u.gfx9.surf_pitch;
dynamic_dpb_t2->dpbChromaAlignedHeight = dpb_chroma->surface.u.gfx9.surf_height;
dynamic_dpb_t2->dpbChromaAlignedSize = dpb_chroma->surface.u.gfx9.surf_slice_size;
dynamic_dpb_t2->dpbCurrLo = addr;
dynamic_dpb_t2->dpbCurrHi = addr >> 32;
@@ -2437,10 +2387,7 @@ static void radeon_dec_destroy(struct pipe_video_codec *decoder)
} else {
list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) {
list_del(&d->list);
if (dec->dpb_use_surf)
d->vbuf->destroy(d->vbuf);
else
si_vid_destroy_buffer(&d->dpb);
d->vbuf->destroy(d->vbuf);
FREE(d);
}
}
@@ -2659,35 +2606,6 @@ static void radeon_dec_destroy_fence(struct pipe_video_codec *decoder,
dec->ws->fence_reference(dec->ws, &fence, NULL);
}
static bool radeon_has_video_preferred_gfx12_swizzle_mode(struct radeon_decoder *dec, uint64_t* modifier)
{
uint64_t *allowed_modifiers;
int allowed_mod_count, i, j;
enum pipe_format format[2] = { PIPE_FORMAT_NV12, PIPE_FORMAT_P010 };
for (i = 0; i < 2; ++i) {
dec->screen->query_dmabuf_modifiers(dec->screen, format[i], 0, NULL, NULL, &allowed_mod_count);
allowed_modifiers = (uint64_t *)calloc(allowed_mod_count, sizeof(uint64_t));
if (!allowed_modifiers)
return false;
dec->screen->query_dmabuf_modifiers(dec->screen, format[i], allowed_mod_count,
allowed_modifiers, NULL, &allowed_mod_count);
for (j = 0; j < allowed_mod_count; ++j) {
if (AMD_FMT_MOD_GET(TILE, allowed_modifiers[j]) == AMD_FMT_MOD_TILE_GFX12_256B_2D)
break;
}
if (j == allowed_mod_count) {
free(allowed_modifiers);
return false;;
}
*modifier = allowed_modifiers[j];
free(allowed_modifiers);
}
return true;
}
/**
* create and HW decoder
*/
@@ -2960,8 +2878,6 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V3;
dec->addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX11;
dec->av1_version = RDECODE_AV1_VER_1;
if (radeon_has_video_preferred_gfx12_swizzle_mode(dec, &dec->dpb_modifier))
dec->dpb_use_surf = true;
break;
default:
RVID_ERR("VCN is not supported.\n");
@@ -20,7 +20,6 @@
struct rvcn_dec_dynamic_dpb_t2 {
struct list_head list;
uint8_t index;
struct rvid_buffer dpb;
struct pipe_video_buffer *vbuf;
};
@@ -139,8 +138,6 @@ struct radeon_decoder {
unsigned njctx;
struct pipe_fence_handle *prev_fence;
struct pipe_fence_handle *destroy_fence;
bool dpb_use_surf;
uint64_t dpb_modifier;
struct pipe_context *ectx;
};