winsys/amdgpu: use next_wptr as cache for userq
The userq packets are added using _pkt_begin(), _pkt_add(), _pkt_end() functions. As of now _pkt_being() and _pkt_add() is called once. It is not advisible to update wptr value in mqd multiple times. Hence use next_wptr as cache in the macros and update mqd mptr before job submission only once. Suggested-by: Christian König <christian.koenig@amd.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32700>
This commit is contained in:
committed by
Marge Bot
parent
acbfcb4d36
commit
57f28ad47f
@@ -1428,7 +1428,7 @@ static void amdgpu_cs_add_userq_packets(struct amdgpu_userq *userq,
|
||||
* Calculcating userq_fence_seq_num this way to match with kernel fence that is
|
||||
* returned in userq_wait iotl.
|
||||
*/
|
||||
userq->user_fence_seq_num = *userq->wptr_bo_map + __num_dw_written + 8 + 2;
|
||||
userq->user_fence_seq_num = __next_wptr + 8 + 2;
|
||||
|
||||
/* add release mem for user fence */
|
||||
amdgpu_pkt_add_dw(PKT3(PKT3_RELEASE_MEM, 6, 0));
|
||||
@@ -1548,7 +1548,8 @@ static int amdgpu_cs_submit_ib_userq(struct amdgpu_userq *userq,
|
||||
.num_bo_write_handles = num_shared_buf_write,
|
||||
};
|
||||
|
||||
userq->doorbell_bo_map[AMDGPU_USERQ_DOORBELL_INDEX] = *userq->wptr_bo_map;
|
||||
*userq->wptr_bo_map = userq->next_wptr;
|
||||
userq->doorbell_bo_map[AMDGPU_USERQ_DOORBELL_INDEX] = userq->next_wptr;
|
||||
r = ac_drm_userq_signal(aws->dev, &userq_signal_data);
|
||||
|
||||
*seq_no = userq->user_fence_seq_num;
|
||||
|
||||
@@ -38,6 +38,7 @@ amdgpu_userq_ring_init(struct amdgpu_winsys *aws, struct amdgpu_userq *userq)
|
||||
userq->user_fence_va = amdgpu_bo_get_va(userq->gtt_bo) + AMDGPU_USERQ_RING_SIZE;
|
||||
*userq->user_fence_ptr = 0;
|
||||
*userq->wptr_bo_map = 0;
|
||||
userq->next_wptr = 0;
|
||||
|
||||
userq->rptr_bo = amdgpu_bo_create(aws, aws->info.gart_page_size, 256, RADEON_DOMAIN_VRAM,
|
||||
RADEON_FLAG_CLEAR_VRAM | RADEON_FLAG_GL2_BYPASS |
|
||||
|
||||
@@ -19,17 +19,17 @@ extern "C" {
|
||||
/* An offset into doorbell page. Any number will work. */
|
||||
#define AMDGPU_USERQ_DOORBELL_INDEX 4
|
||||
|
||||
#define amdgpu_pkt_begin() uint32_t __num_dw_written = 0; \
|
||||
uint32_t __ring_start = *userq->wptr_bo_map & AMDGPU_USERQ_RING_SIZE_DW_MASK;
|
||||
#define amdgpu_pkt_begin() uint32_t *__ring_ptr = userq->ring_ptr; \
|
||||
uint64_t __next_wptr = userq->next_wptr;
|
||||
|
||||
#define amdgpu_pkt_add_dw(value) do { \
|
||||
*(userq->ring_ptr + ((__ring_start + __num_dw_written) & AMDGPU_USERQ_RING_SIZE_DW_MASK)) \
|
||||
= value; \
|
||||
__num_dw_written++; \
|
||||
*(__ring_ptr + (__next_wptr & AMDGPU_USERQ_RING_SIZE_DW_MASK)) = value; \
|
||||
__next_wptr++; \
|
||||
} while (0)
|
||||
|
||||
#define amdgpu_pkt_end() do { \
|
||||
*userq->wptr_bo_map += __num_dw_written; \
|
||||
assert(__next_wptr - *userq->user_fence_ptr <= AMDGPU_USERQ_RING_SIZE_DW); \
|
||||
userq->next_wptr = __next_wptr; \
|
||||
} while (0)
|
||||
|
||||
struct amdgpu_winsys;
|
||||
@@ -62,6 +62,11 @@ struct amdgpu_userq {
|
||||
|
||||
struct pb_buffer_lean *wptr_bo;
|
||||
uint64_t *wptr_bo_map;
|
||||
/* Holds the wptr value for the in-progress submission. When we're ready
|
||||
* to submit it, this value will be written to the door bell.
|
||||
* (this avoids writing multiple times to the door bell for the same
|
||||
* submission) */
|
||||
uint64_t next_wptr;
|
||||
struct pb_buffer_lean *rptr_bo;
|
||||
|
||||
struct pb_buffer_lean *doorbell_bo;
|
||||
|
||||
Reference in New Issue
Block a user