diff --git a/src/amd/vpelib/inc/vpe_types.h b/src/amd/vpelib/inc/vpe_types.h index 3fec8f8e7c0..161390099bb 100644 --- a/src/amd/vpelib/inc/vpe_types.h +++ b/src/amd/vpelib/inc/vpe_types.h @@ -837,6 +837,18 @@ struct vpe_stream { } flags; }; +enum predication_polarity { + PREDICATION_OP_EQUAL_ZERO = 0, /**< Enables predication if all 64-bits are zero. */ + PREDICATION_OP_NOT_EQUAL_ZERO = + 1, /**< Enables predication if at least one of the 64-bits are not zero.*/ +}; + +struct vpe_predication_info { + bool enable; /**< Enable predication */ + uint64_t gpu_va; /**< GPU start address of the buffer */ + enum predication_polarity polarity; /**< Predication polarity */ +}; + /** @struct vpe_build_param * @brief Build parametrs for vpelib. Must get populated before vpe_check_support() call. */ @@ -852,7 +864,7 @@ struct vpe_build_param { surface */ struct vpe_hdr_metadata hdr_metadata; /**< HDR Metadata */ struct vpe_reserved_param dst_reserved_param; - + struct vpe_predication_info predication_info; // data flags struct { uint32_t hdr_metadata : 1; diff --git a/src/amd/vpelib/inc/vpelib.h b/src/amd/vpelib/inc/vpelib.h index d2f969fde1d..bcbd1a83163 100644 --- a/src/amd/vpelib/inc/vpelib.h +++ b/src/amd/vpelib/inc/vpelib.h @@ -136,6 +136,35 @@ enum vpe_status vpe_build_commands( */ void vpe_get_optimal_num_of_taps(struct vpe *vpe, struct vpe_scaling_info *scaling_info); +/** + * @brief + * Build the command descriptor for timestamp operation + * gets global gpu timestamp and writes it to the given gpu address + * + * @param[in,out] bufs [in] memory allocated for the command buffer. + * If size is 0, it reports the required size for this checked + * operation. [out] the next write address and the filled sizes. + * @param[in] dst_address address where the data is written to + * @return status + */ +enum vpe_status vpe_build_timestamp(struct vpe_buf *buf, uint64_t dst_address); + +/** + * @brief + * Build the command descriptor for resolve operation + * copies the data from the read address to the write address to the number of dwords specified. + * + * @param[in,out] bufs [in] memory allocated for the command buffer. + * If size is 0, it reports the required size for this checked + * operation. [out] the next write address and the filled sizes. + * @param[in] read_addr GPU virtual address where the data is read from + * @param[in] write_addr GPU virtual address where the data is written to + * @param[in] dword_count number of dwords to be copierd + * @return status + */ +enum vpe_status vpe_build_resolve_query( + struct vpe_buf *buf, uint64_t read_addr, uint64_t write_addr, uint32_t dword_count); + #ifdef __cplusplus } #endif diff --git a/src/amd/vpelib/src/core/inc/vpe_command.h b/src/amd/vpelib/src/core/inc/vpe_command.h index 07294cc1d58..1e2d006740f 100644 --- a/src/amd/vpelib/src/core/inc/vpe_command.h +++ b/src/amd/vpelib/src/core/inc/vpe_command.h @@ -45,6 +45,8 @@ enum VPE_CMD_OPCODE { VPE_CMD_OPCODE_PLANE_FILL = 0xB, VPE_CMD_OPCODE_COLLABORATE_SYNC = 0xC, VPE_CMD_OPCODE_TIMESTAMP = 0xD, + VPE_CMD_OPCODE_QUERY_RESOLVE = 0xF, + VPE_CMD_OPCODE_SET_PREDICATION = 0x9 }; /** Generic Command Header @@ -62,6 +64,25 @@ enum VPE_CMD_OPCODE { (((subop << VPE_HEADER_SUB_OPCODE__SHIFT) & VPE_HEADER_SUB_OPCODE_MASK) | \ ((op << VPE_HEADER_OPCODE__SHIFT) & VPE_HEADER_OPCODE_MASK)) +#define VPE_PREDICATION_SUB_OPCODE 1 +#define VPE_PREDICATION_CMD_SIZE 16 +#define VPE_PREDICATION_POLARITY_SHIFT 31 +#define VPE_PREDICATION_ADDR_SHIFT 32 +#define VPE_PREDICATION_HIGH_ADDR_MASK 0xFFFFFFFF00000000 +#define VPE_PREDICATION_LOW_ADDR_MASK 0x00000000FFFFFFFF + +#define VPE_TIMESTAMP_SUB_OPCODE 2 +#define VPE_TIMESTAMP_CMD_SIZE 12 +#define VPE_TIMESTAMP_ADDR_SHIFT 32 +#define VPE_TIMESTAMP_HIGH_ADDR_MASK 0xFFFFFFFF00000000 +#define VPE_TIMESTAMP_LOW_ADDR_MASK 0x00000000FFFFFFFF + +#define VPE_RESOLVE_QUERY_SUB_OPCODE 0 +#define VPE_RESOLVE_QUERY_CMD_SIZE 24 +#define VPE_RESOLVE_QUERY_ADDR_SHIFT 32 +#define VPE_RESOLVE_QUERY_HIGH_ADDR_MASK 0xFFFFFFFF00000000 +#define VPE_RESOLVE_QUERY_LOW_ADDR_MASK 0x00000000FFFFFFFF + /************************ * VPEP Config ************************/ diff --git a/src/amd/vpelib/src/core/vpelib.c b/src/amd/vpelib/src/core/vpelib.c index e07b1f32a5f..ac3f71fc66d 100644 --- a/src/amd/vpelib/src/core/vpelib.c +++ b/src/amd/vpelib/src/core/vpelib.c @@ -39,6 +39,7 @@ #include "geometric_scaling.h" #include #include +#include static void dummy_sys_event(enum vpe_event_id eventId, ...) { @@ -186,6 +187,33 @@ static void free_output_ctx(struct vpe_priv *vpe_priv) destroy_output_config_vector(vpe_priv); } +static enum vpe_status vpe_build_set_predication(uint64_t buf_cpu_va, + enum predication_polarity polarity, uint64_t condition_address, uint32_t execution_count) +{ + if (!buf_cpu_va || !condition_address || !execution_count) + return VPE_STATUS_ERROR; + + uint32_t *buffer = (uint32_t *)(uintptr_t)buf_cpu_va; + uint32_t header = VPE_CMD_HEADER(VPE_CMD_OPCODE_SET_PREDICATION, VPE_PREDICATION_SUB_OPCODE); + header |= (polarity << VPE_PREDICATION_POLARITY_SHIFT); + + uint32_t low_condition_addr = (condition_address & VPE_PREDICATION_LOW_ADDR_MASK); + uint32_t high_condition_addr = + (condition_address & VPE_PREDICATION_HIGH_ADDR_MASK) >> VPE_PREDICATION_ADDR_SHIFT; + + uint32_t number_of_dwords = (execution_count + sizeof(uint32_t) - 1) / sizeof(uint32_t); + + *buffer = header; + buffer++; + *buffer = low_condition_addr; + buffer++; + *buffer = high_condition_addr; + buffer++; + *buffer = number_of_dwords; + + return VPE_STATUS_OK; +} + struct vpe *vpe_create(const struct vpe_init_data *params) { struct vpe_priv *vpe_priv; @@ -681,6 +709,10 @@ enum vpe_status vpe_build_commands( bufs->cmd_buf.size = vpe_priv->bufs_required.cmd_buf_size; bufs->emb_buf.size = vpe_priv->bufs_required.emb_buf_size; + if (param->predication_info.enable == true) { + bufs->cmd_buf.size += VPE_PREDICATION_CMD_SIZE; + } + return VPE_STATUS_OK; } else if ((bufs->cmd_buf.size < vpe_priv->bufs_required.cmd_buf_size) || (bufs->emb_buf.size < vpe_priv->bufs_required.emb_buf_size)) { @@ -758,6 +790,12 @@ enum vpe_status vpe_build_commands( vpe_priv->output_ctx.surface.format, &vpe_priv->output_ctx.mpc_bg_color, &vpe_priv->output_ctx.opp_bg_color, vpe_priv->stream_ctx[0].enable_3dlut); + if (param->predication_info.enable == true) { + curr_bufs.cmd_buf.cpu_va += VPE_PREDICATION_CMD_SIZE; + curr_bufs.cmd_buf.gpu_va += VPE_PREDICATION_CMD_SIZE; + curr_bufs.cmd_buf.size -= VPE_PREDICATION_CMD_SIZE; + } + if (vpe_priv->collaboration_mode == true) { status = builder->build_collaborate_sync_cmd(vpe_priv, &curr_bufs); if (status != VPE_STATUS_OK) { @@ -821,6 +859,17 @@ enum vpe_status vpe_build_commands( bufs->emb_buf.cpu_va = emb_buf_cpu_a; } + if (status == VPE_STATUS_OK && param->predication_info.enable == true) { + status = vpe_build_set_predication(bufs->cmd_buf.cpu_va, param->predication_info.polarity, + param->predication_info.gpu_va, + (uint32_t)(bufs->cmd_buf.size - + VPE_PREDICATION_CMD_SIZE)); // build cmd size - predication size + + if (status != VPE_STATUS_OK) { + vpe_log("failed in building vpe predication cmd %d\n", (int)status); + } + } + vpe_priv->ops_support = false; if (vpe_priv->init.debug.assert_when_not_support) @@ -840,3 +889,73 @@ void vpe_get_optimal_num_of_taps(struct vpe *vpe, struct vpe_scaling_info *scali dpp->funcs->get_optimal_number_of_taps( &scaling_info->src_rect, &scaling_info->dst_rect, &scaling_info->taps); } + +enum vpe_status vpe_build_timestamp(struct vpe_buf *buf, uint64_t dst_addr) +{ + if (!dst_addr || !buf) + return VPE_STATUS_ERROR; + + enum vpe_status result = VPE_STATUS_OK; + + // We return required size if size is equal to 0 + if (buf->size == 0) { + buf->size = VPE_TIMESTAMP_CMD_SIZE; + } else if (buf->size < VPE_TIMESTAMP_CMD_SIZE) { + result = VPE_STATUS_BUFFER_OVERFLOW; + } else { + uint32_t *buffer = (uint32_t *)(uintptr_t)buf->cpu_va; + uint32_t header = VPE_CMD_HEADER(VPE_CMD_OPCODE_TIMESTAMP, VPE_TIMESTAMP_SUB_OPCODE); + uint32_t low_addr = (dst_addr & VPE_TIMESTAMP_LOW_ADDR_MASK); + uint32_t high_addr = (dst_addr & VPE_TIMESTAMP_HIGH_ADDR_MASK) >> VPE_TIMESTAMP_ADDR_SHIFT; + + *buffer = header; + buffer++; + *buffer = low_addr; + buffer++; + *buffer = high_addr; + } + + return result; +} + +enum vpe_status vpe_build_resolve_query( + struct vpe_buf *buf, uint64_t read_addr, uint64_t write_addr, uint32_t dword_count) +{ + if (!buf || !read_addr || !write_addr || !dword_count) + return VPE_STATUS_ERROR; + + enum vpe_status result = VPE_STATUS_OK; + + // We return required size if size is equal to 0 + if (buf->size == 0) { + buf->size = VPE_RESOLVE_QUERY_CMD_SIZE; + } else if (buf->size < VPE_RESOLVE_QUERY_CMD_SIZE) { + result = VPE_STATUS_BUFFER_OVERFLOW; + } else { + uint32_t *buffer = (uint32_t *)(uintptr_t)buf->cpu_va; + uint32_t header = + VPE_CMD_HEADER(VPE_CMD_OPCODE_QUERY_RESOLVE, VPE_RESOLVE_QUERY_SUB_OPCODE); + + uint32_t low_read_addr = (read_addr & VPE_RESOLVE_QUERY_LOW_ADDR_MASK); + uint32_t high_read_addr = + (read_addr & VPE_RESOLVE_QUERY_HIGH_ADDR_MASK) >> VPE_RESOLVE_QUERY_ADDR_SHIFT; + + uint32_t low_write_addr = (write_addr & VPE_RESOLVE_QUERY_LOW_ADDR_MASK); + uint32_t high_write_addr = + (write_addr & VPE_RESOLVE_QUERY_HIGH_ADDR_MASK) >> VPE_RESOLVE_QUERY_ADDR_SHIFT; + + *buffer = header; + buffer++; + *buffer = dword_count; + buffer++; + *buffer = low_read_addr; + buffer++; + *buffer = high_read_addr; + buffer++; + *buffer = low_write_addr; + buffer++; + *buffer = high_write_addr; + } + + return result; +}