diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp index 95c233d8519..91b70b0d91a 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp @@ -1428,7 +1428,7 @@ static void amdgpu_cs_add_userq_packets(struct amdgpu_userq *userq, * Calculcating userq_fence_seq_num this way to match with kernel fence that is * returned in userq_wait iotl. */ - userq->user_fence_seq_num = *userq->wptr_bo_map + __num_dw_written + 8 + 2; + userq->user_fence_seq_num = __next_wptr + 8 + 2; /* add release mem for user fence */ amdgpu_pkt_add_dw(PKT3(PKT3_RELEASE_MEM, 6, 0)); @@ -1548,7 +1548,8 @@ static int amdgpu_cs_submit_ib_userq(struct amdgpu_userq *userq, .num_bo_write_handles = num_shared_buf_write, }; - userq->doorbell_bo_map[AMDGPU_USERQ_DOORBELL_INDEX] = *userq->wptr_bo_map; + *userq->wptr_bo_map = userq->next_wptr; + userq->doorbell_bo_map[AMDGPU_USERQ_DOORBELL_INDEX] = userq->next_wptr; r = ac_drm_userq_signal(aws->dev, &userq_signal_data); *seq_no = userq->user_fence_seq_num; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_userq.c b/src/gallium/winsys/amdgpu/drm/amdgpu_userq.c index 79955fb6daf..6ce420feb14 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_userq.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_userq.c @@ -38,6 +38,7 @@ amdgpu_userq_ring_init(struct amdgpu_winsys *aws, struct amdgpu_userq *userq) userq->user_fence_va = amdgpu_bo_get_va(userq->gtt_bo) + AMDGPU_USERQ_RING_SIZE; *userq->user_fence_ptr = 0; *userq->wptr_bo_map = 0; + userq->next_wptr = 0; userq->rptr_bo = amdgpu_bo_create(aws, aws->info.gart_page_size, 256, RADEON_DOMAIN_VRAM, RADEON_FLAG_CLEAR_VRAM | RADEON_FLAG_GL2_BYPASS | diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_userq.h b/src/gallium/winsys/amdgpu/drm/amdgpu_userq.h index 8679d2c13b7..28213778643 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_userq.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_userq.h @@ -19,17 +19,17 @@ extern "C" { /* An offset into doorbell page. Any number will work. */ #define AMDGPU_USERQ_DOORBELL_INDEX 4 -#define amdgpu_pkt_begin() uint32_t __num_dw_written = 0; \ - uint32_t __ring_start = *userq->wptr_bo_map & AMDGPU_USERQ_RING_SIZE_DW_MASK; +#define amdgpu_pkt_begin() uint32_t *__ring_ptr = userq->ring_ptr; \ + uint64_t __next_wptr = userq->next_wptr; #define amdgpu_pkt_add_dw(value) do { \ - *(userq->ring_ptr + ((__ring_start + __num_dw_written) & AMDGPU_USERQ_RING_SIZE_DW_MASK)) \ - = value; \ - __num_dw_written++; \ + *(__ring_ptr + (__next_wptr & AMDGPU_USERQ_RING_SIZE_DW_MASK)) = value; \ + __next_wptr++; \ } while (0) #define amdgpu_pkt_end() do { \ - *userq->wptr_bo_map += __num_dw_written; \ + assert(__next_wptr - *userq->user_fence_ptr <= AMDGPU_USERQ_RING_SIZE_DW); \ + userq->next_wptr = __next_wptr; \ } while (0) struct amdgpu_winsys; @@ -62,6 +62,11 @@ struct amdgpu_userq { struct pb_buffer_lean *wptr_bo; uint64_t *wptr_bo_map; + /* Holds the wptr value for the in-progress submission. When we're ready + * to submit it, this value will be written to the door bell. + * (this avoids writing multiple times to the door bell for the same + * submission) */ + uint64_t next_wptr; struct pb_buffer_lean *rptr_bo; struct pb_buffer_lean *doorbell_bo;