radeon/vcn: add decode message for hevc codec

Signed-off-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
This commit is contained in:
Leo Liu
2017-05-03 09:40:44 -04:00
parent 9c21f6abda
commit a55d2659d9
+225
View File
@@ -180,6 +180,212 @@ static void radeon_dec_destroy_associated_data(void *data)
/* NOOP, since we only use an intptr */
}
static rvcn_dec_message_hevc_t get_h265_msg(struct radeon_decoder *dec,
struct pipe_video_buffer *target,
struct pipe_h265_picture_desc *pic)
{
rvcn_dec_message_hevc_t result;
unsigned i;
memset(&result, 0, sizeof(result));
result.sps_info_flags = 0;
result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0;
result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1;
result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2;
result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3;
result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4;
result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5;
result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;
result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;
result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO)
result.sps_info_flags |= 1 << 9;
if (pic->UseRefPicList == true)
result.sps_info_flags |= 1 << 10;
result.chroma_format = pic->pps->sps->chroma_format_idc;
result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1;
result.log2_min_luma_coding_block_size_minus3 =
pic->pps->sps->log2_min_luma_coding_block_size_minus3;
result.log2_diff_max_min_luma_coding_block_size =
pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
result.log2_min_transform_block_size_minus2 =
pic->pps->sps->log2_min_transform_block_size_minus2;
result.log2_diff_max_min_transform_block_size =
pic->pps->sps->log2_diff_max_min_transform_block_size;
result.max_transform_hierarchy_depth_inter =
pic->pps->sps->max_transform_hierarchy_depth_inter;
result.max_transform_hierarchy_depth_intra =
pic->pps->sps->max_transform_hierarchy_depth_intra;
result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1;
result.pcm_sample_bit_depth_chroma_minus1 =
pic->pps->sps->pcm_sample_bit_depth_chroma_minus1;
result.log2_min_pcm_luma_coding_block_size_minus3 =
pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3;
result.log2_diff_max_min_pcm_luma_coding_block_size =
pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size;
result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets;
result.pps_info_flags = 0;
result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0;
result.pps_info_flags |= pic->pps->output_flag_present_flag << 1;
result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2;
result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3;
result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4;
result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5;
result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6;
result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7;
result.pps_info_flags |= pic->pps->weighted_pred_flag << 8;
result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9;
result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10;
result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11;
result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12;
result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13;
result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14;
result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15;
result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16;
result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17;
result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18;
result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19;
result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits;
result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps;
result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1;
result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1;
result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset;
result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset;
result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2;
result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2;
result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth;
result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1;
result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1;
result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2;
result.init_qp_minus26 = pic->pps->init_qp_minus26;
for (i = 0; i < 19; ++i)
result.column_width_minus1[i] = pic->pps->column_width_minus1[i];
for (i = 0; i < 21; ++i)
result.row_height_minus1[i] = pic->pps->row_height_minus1[i];
result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx;
result.curr_idx = pic->CurrPicOrderCntVal;
result.curr_poc = pic->CurrPicOrderCntVal;
vl_video_buffer_set_associated_data(target, &dec->base,
(void *)(uintptr_t)pic->CurrPicOrderCntVal,
&radeon_dec_destroy_associated_data);
for (i = 0; i < 16; ++i) {
struct pipe_video_buffer *ref = pic->ref[i];
uintptr_t ref_pic = 0;
result.poc_list[i] = pic->PicOrderCntVal[i];
if (ref)
ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
else
ref_pic = 0x7F;
result.ref_pic_list[i] = ref_pic;
}
for (i = 0; i < 8; ++i) {
result.ref_pic_set_st_curr_before[i] = 0xFF;
result.ref_pic_set_st_curr_after[i] = 0xFF;
result.ref_pic_set_lt_curr[i] = 0xFF;
}
for (i = 0; i < pic->NumPocStCurrBefore; ++i)
result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i];
for (i = 0; i < pic->NumPocStCurrAfter; ++i)
result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i];
for (i = 0; i < pic->NumPocLtCurr; ++i)
result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i];
for (i = 0; i < 6; ++i)
result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i];
for (i = 0; i < 2; ++i)
result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i];
memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16);
memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64);
memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);
memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);
for (i = 0 ; i < 2 ; i++) {
for (int j = 0 ; j < 15 ; j++)
result.direct_reflist[i][j] = pic->RefPicList[i][j];
}
if ((pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) &&
(target->buffer_format == PIPE_FORMAT_NV12)) {
result.p010_mode = 0;
result.luma_10to8 = 5;
result.chroma_10to8 = 5;
result.hevc_reserved[0] = 4; /* sclr_luma10to8 */
result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */
}
return result;
}
static unsigned calc_ctx_size_h265_main(struct radeon_decoder *dec)
{
unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
unsigned max_references = dec->base.max_references + 1;
if (dec->base.width * dec->base.height >= 4096*2000)
max_references = MAX2(max_references, 8);
else
max_references = MAX2(max_references, 17);
width = align (width, 16);
height = align (height, 16);
return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
}
static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec, struct pipe_h265_picture_desc *pic)
{
unsigned block_size, log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
unsigned coeff_10bit = (pic->pps->sps->bit_depth_luma_minus8 ||
pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1;
unsigned max_references = dec->base.max_references + 1;
if (dec->base.width * dec->base.height >= 4096*2000)
max_references = MAX2(max_references, 8);
else
max_references = MAX2(max_references, 17);
block_size = (1 << (pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3));
log2_ctb_size = block_size + pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4);
context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256);
max_mb_address = (unsigned) ceil(height * 8 / 2048.0);
cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb;
db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024);
return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
}
static void rvcn_dec_message_create(struct radeon_decoder *dec)
{
rvcn_dec_message_header_t *header = dec->msg;
@@ -294,6 +500,25 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
index->message_id = RDECODE_MESSAGE_AVC;
break;
}
case PIPE_VIDEO_FORMAT_HEVC: {
rvcn_dec_message_hevc_t hevc =
get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture);
memcpy(codec, (void*)&hevc, sizeof(rvcn_dec_message_hevc_t));
index->message_id = RDECODE_MESSAGE_HEVC;
if (dec->ctx.res == NULL) {
unsigned ctx_size;
if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
ctx_size = calc_ctx_size_h265_main10(dec,
(struct pipe_h265_picture_desc*)picture);
else
ctx_size = calc_ctx_size_h265_main(dec);
if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT))
RVID_ERR("Can't allocated context buffer.\n");
rvid_clear_buffer(dec->base.context, &dec->ctx);
}
break;
}
default:
assert(0);
return NULL;