diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 3f1d359c4b9..5bd07650217 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -354,8 +354,7 @@ genX(simple_shader_push_state_address)(struct anv_simple_shader *state, void genX(emit_simple_shader_end)(struct anv_simple_shader *state); -VkResult genX(init_trtt_context_state)(struct anv_device *device, - struct anv_async_submit *submit); +VkResult genX(init_trtt_context_state)(struct anv_async_submit *submit); void genX(write_trtt_entries)(struct anv_async_submit *submit, struct anv_trtt_bind *l3l2_binds, diff --git a/src/intel/vulkan/anv_sparse.c b/src/intel/vulkan/anv_sparse.c index 3c5c8e751d0..b80b681d44d 100644 --- a/src/intel/vulkan/anv_sparse.c +++ b/src/intel/vulkan/anv_sparse.c @@ -405,9 +405,8 @@ trtt_get_page_table_bo(struct anv_device *device, struct anv_bo **bo, } static VkResult -anv_trtt_init_context_state(struct anv_queue *queue) +anv_trtt_init_queues_state(struct anv_device *device) { - struct anv_device *device = queue->device; struct anv_trtt *trtt = &device->trtt; struct anv_bo *l3_bo; @@ -417,43 +416,52 @@ anv_trtt_init_context_state(struct anv_queue *queue) trtt->l3_mirror = vk_zalloc(&device->vk.alloc, 4096, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (!trtt->l3_mirror) { - result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - return result; - } + if (!trtt->l3_mirror) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); /* L3 has 512 entries, so we can have up to 512 L2 tables. */ trtt->l2_mirror = vk_zalloc(&device->vk.alloc, 512 * 4096, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (!trtt->l2_mirror) { - result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_free_l3; + vk_free(&device->vk.alloc, trtt->l3_mirror); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } + struct anv_async_submit submits[device->queue_count]; + int submits_used = 0; + for (uint32_t i = 0; i < device->queue_count; i++) { + struct anv_queue *q = &device->queues[i]; - struct anv_async_submit submit; - result = anv_async_submit_init(&submit, queue, &device->batch_bo_pool, - false, true); - if (result != VK_SUCCESS) - return result; + result = anv_async_submit_init(&submits[submits_used], q, + &device->batch_bo_pool, false, true); + if (result != VK_SUCCESS) + break; - result = anv_genX(device->info, init_trtt_context_state)(device, &submit); - if (result != VK_SUCCESS) - goto fail_fini_submit; + struct anv_async_submit *submit = &submits[submits_used++]; - anv_genX(device->info, async_submit_end)(&submit); + result = anv_genX(device->info, init_trtt_context_state)(submit); + if (result != VK_SUCCESS) { + anv_async_submit_fini(submit); + submits_used--; + break; + } - result = device->kmd_backend->queue_exec_async(&submit, 0, NULL, 1, - &submit.signal); + anv_genX(device->info, async_submit_end)(submit); - anv_async_submit_wait(&submit); + result = device->kmd_backend->queue_exec_async(submit, 0, NULL, 1, + &submit->signal); + if (result != VK_SUCCESS) { + anv_async_submit_fini(submit); + submits_used--; + break; + } + } -fail_fini_submit: - anv_async_submit_fini(&submit); - return result; + for (uint32_t i = 0; i < submits_used; i++) { + anv_async_submit_wait(&submits[i]); + anv_async_submit_fini(&submits[i]); + } -fail_free_l3: - vk_free(&device->vk.alloc, trtt->l3_mirror); return result; } @@ -645,7 +653,7 @@ anv_sparse_bind_trtt(struct anv_device *device, * submission. */ if (!trtt->l3_addr) { - result = anv_trtt_init_context_state(sparse_submit->queue); + result = anv_trtt_init_queues_state(device); if (result != VK_SUCCESS) goto error_add_bind; } diff --git a/src/intel/vulkan/genX_init_state.c b/src/intel/vulkan/genX_init_state.c index e15729253aa..7f5ff759830 100644 --- a/src/intel/vulkan/genX_init_state.c +++ b/src/intel/vulkan/genX_init_state.c @@ -1442,10 +1442,11 @@ genX(apply_task_urb_workaround)(struct anv_cmd_buffer *cmd_buffer) } VkResult -genX(init_trtt_context_state)(struct anv_device *device, - struct anv_async_submit *submit) +genX(init_trtt_context_state)(struct anv_async_submit *submit) { #if GFX_VER >= 12 + struct anv_queue *queue = submit->queue; + struct anv_device *device = queue->device; struct anv_trtt *trtt = &device->trtt; struct anv_batch *batch = &submit->batch; @@ -1462,25 +1463,61 @@ genX(init_trtt_context_state)(struct anv_device *device, anv_batch_write_reg(batch, GENX(GFX_TRTT_L3_BASE_HIGH), trtt_base_high) trtt_base_high.TRVAL3PointerUpperAddress = l3_addr_high; + anv_batch_write_reg(batch, GENX(BLT_TRTT_INVAL), trtt_inval) + trtt_inval.InvalidTileDetectionValue = ANV_TRTT_L1_INVALID_TILE_VAL; + anv_batch_write_reg(batch, GENX(BLT_TRTT_NULL), trtt_null) + trtt_null.NullTileDetectionValue = ANV_TRTT_L1_NULL_TILE_VAL; + anv_batch_write_reg(batch, GENX(BLT_TRTT_L3_BASE_LOW), trtt_base_low) + trtt_base_low.TRVAL3PointerLowerAddress = l3_addr_low; + anv_batch_write_reg(batch, GENX(BLT_TRTT_L3_BASE_HIGH), trtt_base_high) + trtt_base_high.TRVAL3PointerUpperAddress = l3_addr_high; + + anv_batch_write_reg(batch, GENX(COMP_CTX0_TRTT_INVAL), trtt_inval) + trtt_inval.InvalidTileDetectionValue = ANV_TRTT_L1_INVALID_TILE_VAL; + anv_batch_write_reg(batch, GENX(COMP_CTX0_TRTT_NULL), trtt_null) + trtt_null.NullTileDetectionValue = ANV_TRTT_L1_NULL_TILE_VAL; + anv_batch_write_reg(batch, GENX(COMP_CTX0_TRTT_L3_BASE_LOW), trtt_base_low) + trtt_base_low.TRVAL3PointerLowerAddress = l3_addr_low; + anv_batch_write_reg(batch, GENX(COMP_CTX0_TRTT_L3_BASE_HIGH), trtt_base_high) + trtt_base_high.TRVAL3PointerUpperAddress = l3_addr_high; + #if GFX_VER >= 20 uint32_t trva_base = device->physical->va.trtt.addr >> 44; anv_batch_write_reg(batch, GENX(GFX_TRTT_VA_RANGE), trtt_va_range) trtt_va_range.TRVABase = trva_base; + anv_batch_write_reg(batch, GENX(BLT_TRTT_VA_RANGE), trtt_va_range) + trtt_va_range.TRVABase = trva_base; + anv_batch_write_reg(batch, GENX(COMP_CTX0_TRTT_VA_RANGE), trtt_va_range) + trtt_va_range.TRVABase = trva_base; #else anv_batch_write_reg(batch, GENX(GFX_TRTT_VA_RANGE), trtt_va_range) { trtt_va_range.TRVAMaskValue = 0xF; trtt_va_range.TRVADataValue = 0xF; } + anv_batch_write_reg(batch, GENX(BLT_TRTT_VA_RANGE), trtt_va_range) { + trtt_va_range.TRVAMaskValue = 0xF; + trtt_va_range.TRVADataValue = 0xF; + } + anv_batch_write_reg(batch, GENX(COMP_CTX0_TRTT_VA_RANGE), trtt_va_range) { + trtt_va_range.TRVAMaskValue = 0xF; + trtt_va_range.TRVADataValue = 0xF; + } #endif /* Enabling TR-TT needs to be done after setting up the other registers. */ anv_batch_write_reg(batch, GENX(GFX_TRTT_CR), trtt_cr) trtt_cr.TRTTEnable = true; + anv_batch_write_reg(batch, GENX(BLT_TRTT_CR), trtt_cr) + trtt_cr.TRTTEnable = true; + anv_batch_write_reg(batch, GENX(COMP_CTX0_TRTT_CR), trtt_cr) + trtt_cr.TRTTEnable = true; - genx_batch_emit_pipe_control(batch, device->info, _3D, - ANV_PIPE_CS_STALL_BIT | - ANV_PIPE_TLB_INVALIDATE_BIT); + if (queue->family->engine_class != INTEL_ENGINE_CLASS_COPY) { + genx_batch_emit_pipe_control(batch, device->info, _3D, + ANV_PIPE_CS_STALL_BIT | + ANV_PIPE_TLB_INVALIDATE_BIT); + } #endif return VK_SUCCESS; }