winsys/amdgpu: use next_wptr as cache for userq

The userq packets are added using _pkt_begin(), _pkt_add(), _pkt_end()
functions. As of now _pkt_being() and _pkt_add() is called once. It
is not advisible to update wptr value in mqd multiple times. Hence use
next_wptr as cache in the macros and update mqd mptr before job submission
only once.

Suggested-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32700>
This commit is contained in:
Yogesh Mohan Marimuthu
2024-12-18 16:44:07 +05:30
committed by Marge Bot
parent acbfcb4d36
commit 57f28ad47f
3 changed files with 15 additions and 8 deletions
+3 -2
View File
@@ -1428,7 +1428,7 @@ static void amdgpu_cs_add_userq_packets(struct amdgpu_userq *userq,
* Calculcating userq_fence_seq_num this way to match with kernel fence that is
* returned in userq_wait iotl.
*/
userq->user_fence_seq_num = *userq->wptr_bo_map + __num_dw_written + 8 + 2;
userq->user_fence_seq_num = __next_wptr + 8 + 2;
/* add release mem for user fence */
amdgpu_pkt_add_dw(PKT3(PKT3_RELEASE_MEM, 6, 0));
@@ -1548,7 +1548,8 @@ static int amdgpu_cs_submit_ib_userq(struct amdgpu_userq *userq,
.num_bo_write_handles = num_shared_buf_write,
};
userq->doorbell_bo_map[AMDGPU_USERQ_DOORBELL_INDEX] = *userq->wptr_bo_map;
*userq->wptr_bo_map = userq->next_wptr;
userq->doorbell_bo_map[AMDGPU_USERQ_DOORBELL_INDEX] = userq->next_wptr;
r = ac_drm_userq_signal(aws->dev, &userq_signal_data);
*seq_no = userq->user_fence_seq_num;
@@ -38,6 +38,7 @@ amdgpu_userq_ring_init(struct amdgpu_winsys *aws, struct amdgpu_userq *userq)
userq->user_fence_va = amdgpu_bo_get_va(userq->gtt_bo) + AMDGPU_USERQ_RING_SIZE;
*userq->user_fence_ptr = 0;
*userq->wptr_bo_map = 0;
userq->next_wptr = 0;
userq->rptr_bo = amdgpu_bo_create(aws, aws->info.gart_page_size, 256, RADEON_DOMAIN_VRAM,
RADEON_FLAG_CLEAR_VRAM | RADEON_FLAG_GL2_BYPASS |
+11 -6
View File
@@ -19,17 +19,17 @@ extern "C" {
/* An offset into doorbell page. Any number will work. */
#define AMDGPU_USERQ_DOORBELL_INDEX 4
#define amdgpu_pkt_begin() uint32_t __num_dw_written = 0; \
uint32_t __ring_start = *userq->wptr_bo_map & AMDGPU_USERQ_RING_SIZE_DW_MASK;
#define amdgpu_pkt_begin() uint32_t *__ring_ptr = userq->ring_ptr; \
uint64_t __next_wptr = userq->next_wptr;
#define amdgpu_pkt_add_dw(value) do { \
*(userq->ring_ptr + ((__ring_start + __num_dw_written) & AMDGPU_USERQ_RING_SIZE_DW_MASK)) \
= value; \
__num_dw_written++; \
*(__ring_ptr + (__next_wptr & AMDGPU_USERQ_RING_SIZE_DW_MASK)) = value; \
__next_wptr++; \
} while (0)
#define amdgpu_pkt_end() do { \
*userq->wptr_bo_map += __num_dw_written; \
assert(__next_wptr - *userq->user_fence_ptr <= AMDGPU_USERQ_RING_SIZE_DW); \
userq->next_wptr = __next_wptr; \
} while (0)
struct amdgpu_winsys;
@@ -62,6 +62,11 @@ struct amdgpu_userq {
struct pb_buffer_lean *wptr_bo;
uint64_t *wptr_bo_map;
/* Holds the wptr value for the in-progress submission. When we're ready
* to submit it, this value will be written to the door bell.
* (this avoids writing multiple times to the door bell for the same
* submission) */
uint64_t next_wptr;
struct pb_buffer_lean *rptr_bo;
struct pb_buffer_lean *doorbell_bo;