radeonsi/vcn: add encode context ib for vcn 4.0

Implement encode context ib based on new reconstructed picture and
interface for VCN 4.0, and modify dpb setup accordingly.

Signed-off-by: Boyuan Zhang <boyuan.zhang@amd.com>
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16328>
This commit is contained in:
Boyuan Zhang
2022-02-26 20:14:30 -05:00
committed by Marge Bot
parent 38a95f2109
commit dbd75e0d48
3 changed files with 83 additions and 6 deletions
+14 -6
View File
@@ -476,7 +476,8 @@ static void radeon_enc_get_feedback(struct pipe_video_codec *encoder, void *feed
FREE(fb);
}
static int setup_dpb(struct radeon_encoder *enc, enum pipe_format buffer_format)
static int setup_dpb(struct radeon_encoder *enc, enum pipe_format buffer_format,
enum chip_class chip_class)
{
uint32_t aligned_width = align(enc->base.width, 16);
uint32_t aligned_height = align(enc->base.height, 16);
@@ -493,10 +494,17 @@ static int setup_dpb(struct radeon_encoder *enc, enum pipe_format buffer_format)
int i;
for (i = 0; i < num_reconstructed_pictures; i++) {
enc->enc_pic.ctx_buf.reconstructed_pictures[i].luma_offset = offset;
offset += luma_size;
enc->enc_pic.ctx_buf.reconstructed_pictures[i].chroma_offset = offset;
offset += chroma_size;
if (chip_class >= GFX11) {
enc->enc_pic.ctx_buf.reconstructed_pictures_v4_0[i].luma_offset = offset;
offset += luma_size;
enc->enc_pic.ctx_buf.reconstructed_pictures_v4_0[i].chroma_offset = offset;
offset += chroma_size;
} else {
enc->enc_pic.ctx_buf.reconstructed_pictures[i].luma_offset = offset;
offset += luma_size;
enc->enc_pic.ctx_buf.reconstructed_pictures[i].chroma_offset = offset;
offset += chroma_size;
}
}
for (; i < RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES; i++) {
enc->enc_pic.ctx_buf.reconstructed_pictures[i].luma_offset = 0;
@@ -574,7 +582,7 @@ struct pipe_video_codec *radeon_create_encoder(struct pipe_context *context,
cpb_size = cpb_size * enc->cpb_num;
tmp_buf->destroy(tmp_buf);
cpb_size += setup_dpb(enc, templat.buffer_format);
cpb_size += setup_dpb(enc, templat.buffer_format, sscreen->info.chip_class);
if (!si_vid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't create CPB buffer.\n");
@@ -353,6 +353,18 @@ typedef struct rvcn_enc_reconstructed_picture_s {
uint32_t chroma_offset;
} rvcn_enc_reconstructed_picture_t;
typedef struct rvcn_enc_reconstructed_picture_v4_0_s {
uint32_t luma_offset;
uint32_t chroma_offset;
union {
struct
{
uint32_t unused_offset1;
uint32_t unused_offset2;
} unused;
};
} rvcn_enc_reconstructed_picture_v4_0_t;
typedef struct rvcn_enc_picture_info_s
{
bool in_use;
@@ -381,12 +393,14 @@ typedef struct rvcn_enc_encode_context_buffer_s {
uint32_t rec_chroma_pitch;
uint32_t num_reconstructed_pictures;
rvcn_enc_reconstructed_picture_t reconstructed_pictures[RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES];
rvcn_enc_reconstructed_picture_v4_0_t reconstructed_pictures_v4_0[RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES];
uint32_t pre_encode_picture_luma_pitch;
uint32_t pre_encode_picture_chroma_pitch;
rvcn_enc_reconstructed_picture_t
pre_encode_reconstructed_pictures[RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES];
rvcn_enc_pre_encode_input_picture_t pre_encode_input_picture;
uint32_t two_pass_search_center_map_offset;
uint32_t colloc_buffer_offset;
} rvcn_enc_encode_context_buffer_t;
typedef struct rvcn_enc_video_bitstream_buffer_s {
@@ -38,10 +38,65 @@
#define RENCODE_FW_INTERFACE_MAJOR_VERSION 1
#define RENCODE_FW_INTERFACE_MINOR_VERSION 0
static void radeon_enc_ctx(struct radeon_encoder *enc)
{
enc->enc_pic.ctx_buf.swizzle_mode = 0;
enc->enc_pic.ctx_buf.two_pass_search_center_map_offset = 0;
enc->enc_pic.ctx_buf.colloc_buffer_offset = enc->dpb_size;
uint32_t aligned_width = enc->enc_pic.session_init.aligned_picture_width;
uint32_t aligned_height = enc->enc_pic.session_init.aligned_picture_height;
enc->enc_pic.ctx_buf.rec_luma_pitch = align(aligned_width, enc->alignment);
enc->enc_pic.ctx_buf.rec_chroma_pitch = align(aligned_width, enc->alignment);
int luma_size = enc->enc_pic.ctx_buf.rec_luma_pitch * align(aligned_height, enc->alignment);
if (enc->enc_pic.bit_depth_luma_minus8 == 2)
luma_size *= 2;
int chroma_size = align(luma_size / 2, enc->alignment);
int offset = 0;
for (int i = 0; i < enc->enc_pic.ctx_buf.num_reconstructed_pictures; i++) {
offset += luma_size;
offset += chroma_size;
}
assert(offset == enc->dpb_size);
RADEON_ENC_BEGIN(enc->cmd.ctx);
RADEON_ENC_READWRITE(enc->cpb.res->buf, enc->cpb.res->domains, 0);
RADEON_ENC_CS(enc->enc_pic.ctx_buf.swizzle_mode);
RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_luma_pitch);
RADEON_ENC_CS(enc->enc_pic.ctx_buf.rec_chroma_pitch);
RADEON_ENC_CS(enc->enc_pic.ctx_buf.num_reconstructed_pictures);
for (int i = 0; i < RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES; i++) {
RADEON_ENC_CS(enc->enc_pic.ctx_buf.reconstructed_pictures_v4_0[i].luma_offset);
RADEON_ENC_CS(enc->enc_pic.ctx_buf.reconstructed_pictures_v4_0[i].chroma_offset);
RADEON_ENC_CS(enc->enc_pic.ctx_buf.reconstructed_pictures_v4_0[i].unused.unused_offset1);
RADEON_ENC_CS(enc->enc_pic.ctx_buf.reconstructed_pictures_v4_0[i].unused.unused_offset2);
}
// 2: 1 pre encode pitch * 2 (luma + chroma)
// 136: 34 pre encode reconstructed pics * 4 (luma + chroma offsets + unused union)
// 3: 1 pre encode input pic * 3 (r,g,b offset union)
//----
// 141
for (int i = 0; i < 141; i++)
RADEON_ENC_CS(0x00000000);
RADEON_ENC_CS(enc->enc_pic.ctx_buf.two_pass_search_center_map_offset);
RADEON_ENC_CS(enc->enc_pic.ctx_buf.colloc_buffer_offset);
RADEON_ENC_END();
}
void radeon_enc_4_0_init(struct radeon_encoder *enc)
{
radeon_enc_3_0_init(enc);
enc->ctx = radeon_enc_ctx;
enc->enc_pic.session_info.interface_version =
((RENCODE_FW_INTERFACE_MAJOR_VERSION << RENCODE_IF_MAJOR_VERSION_SHIFT) |
(RENCODE_FW_INTERFACE_MINOR_VERSION << RENCODE_IF_MINOR_VERSION_SHIFT));