anv: Make a batch decoder for each queue family

The decoder context needs to know what engine it's associated with.
Nowadays, we have render, compute, blitter, even video engines being
used from the same driver.  Rather than trying to have a single decoder
and thwacking the engine field back and forth between calls, we make
one per queue family, and stash a pointer in anv_queue for easy access.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21149>
This commit is contained in:
Kenneth Graunke
2023-01-30 14:46:26 -08:00
committed by Marge Bot
parent 1a1fa2393e
commit 79caf8a44b
4 changed files with 33 additions and 21 deletions
+4 -4
View File
@@ -1177,7 +1177,7 @@ anv_cmd_buffer_exec_batch_debug(struct anv_queue *queue,
uint64_t pass_batch_offset =
khr_perf_query_preamble_offset(perf_query_pool, perf_query_pass);
intel_print_batch(&device->decoder_ctx,
intel_print_batch(queue->decoder,
pass_batch_bo->map + pass_batch_offset, 64,
pass_batch_bo->offset + pass_batch_offset, false);
}
@@ -1185,12 +1185,12 @@ anv_cmd_buffer_exec_batch_debug(struct anv_queue *queue,
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
struct anv_batch_bo **bo = u_vector_tail(&cmd_buffers[i]->seen_bbos);
device->cmd_buffer_being_decoded = cmd_buffers[i];
intel_print_batch(&device->decoder_ctx, (*bo)->bo->map,
intel_print_batch(queue->decoder, (*bo)->bo->map,
(*bo)->bo->size, (*bo)->bo->offset, false);
device->cmd_buffer_being_decoded = NULL;
}
} else {
intel_print_batch(&device->decoder_ctx, device->trivial_batch_bo->map,
intel_print_batch(queue->decoder, device->trivial_batch_bo->map,
device->trivial_batch_bo->size,
device->trivial_batch_bo->offset, false);
}
@@ -1383,7 +1383,7 @@ anv_queue_submit_simple_batch(struct anv_queue *queue,
#endif
if (INTEL_DEBUG(DEBUG_BATCH)) {
intel_print_batch(&device->decoder_ctx,
intel_print_batch(queue->decoder,
batch_bo->map,
batch_bo->size,
batch_bo->offset, false);
+24 -16
View File
@@ -3200,22 +3200,26 @@ VkResult anv_CreateDevice(
goto fail_alloc;
if (INTEL_DEBUG(DEBUG_BATCH)) {
const unsigned decode_flags =
INTEL_BATCH_DECODE_FULL |
(INTEL_DEBUG(DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0) |
INTEL_BATCH_DECODE_OFFSETS |
INTEL_BATCH_DECODE_FLOATS;
for (unsigned i = 0; i < physical_device->queue.family_count; i++) {
struct intel_batch_decode_ctx *decoder = &device->decoder[i];
intel_batch_decode_ctx_init(&device->decoder_ctx,
&physical_device->compiler->isa,
&physical_device->info,
stderr, decode_flags, NULL,
decode_get_bo, NULL, device);
const unsigned decode_flags =
INTEL_BATCH_DECODE_FULL |
(INTEL_DEBUG(DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0) |
INTEL_BATCH_DECODE_OFFSETS |
INTEL_BATCH_DECODE_FLOATS;
device->decoder_ctx.dynamic_base = DYNAMIC_STATE_POOL_MIN_ADDRESS;
device->decoder_ctx.surface_base = INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS;
device->decoder_ctx.instruction_base =
INSTRUCTION_STATE_POOL_MIN_ADDRESS;
intel_batch_decode_ctx_init(decoder,
&physical_device->compiler->isa,
&physical_device->info,
stderr, decode_flags, NULL,
decode_get_bo, NULL, device);
decoder->engine = physical_device->queue.families[i].engine_class;
decoder->dynamic_base = DYNAMIC_STATE_POOL_MIN_ADDRESS;
decoder->surface_base = INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS;
decoder->instruction_base = INSTRUCTION_STATE_POOL_MIN_ADDRESS;
}
}
anv_device_set_physical(device, physical_device);
@@ -3635,6 +3639,8 @@ void anv_DestroyDevice(
if (!device)
return;
struct anv_physical_device *pdevice = device->physical;
anv_device_utrace_finish(device);
anv_device_finish_blorp(device);
@@ -3707,8 +3713,10 @@ void anv_DestroyDevice(
intel_gem_destroy_context(device->fd, device->context_id);
if (INTEL_DEBUG(DEBUG_BATCH))
intel_batch_decode_ctx_finish(&device->decoder_ctx);
if (INTEL_DEBUG(DEBUG_BATCH)) {
for (unsigned i = 0; i < pdevice->queue.family_count; i++)
intel_batch_decode_ctx_finish(&device->decoder[i]);
}
close(device->fd);
+3 -1
View File
@@ -1050,6 +1050,8 @@ struct anv_queue {
const struct anv_queue_family * family;
struct intel_batch_decode_ctx * decoder;
uint32_t exec_flags;
/** Synchronization object for debug purposes (DEBUG_SYNC) */
@@ -1221,7 +1223,7 @@ struct anv_device {
pthread_mutex_t mutex;
pthread_cond_t queue_submit;
struct intel_batch_decode_ctx decoder_ctx;
struct intel_batch_decode_ctx decoder[ANV_MAX_QUEUE_FAMILIES];
/*
* When decoding a anv_cmd_buffer, we might need to search for BOs through
* the cmd_buffer's list.
+2
View File
@@ -59,6 +59,8 @@ anv_queue_init(struct anv_device *device, struct anv_queue *queue,
queue->family = &pdevice->queue.families[queue->vk.queue_family_index];
queue->exec_flags = exec_flags;
queue->decoder = &device->decoder[queue->vk.queue_family_index];
return VK_SUCCESS;
}