anv: update batch chaining to Gfx9 commands

I don't know this fixes anything but I noticed the generated draws
jump into addresses slightly different from CPU generated jumps.

After checking the genxml, I noticed MI_BATCH_BUFFER_START "Batch
Buffer Start Address" fields have different sizes in Gfx8 & Gfx9+.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25705>
This commit is contained in:
Lionel Landwerlin
2023-10-13 11:57:11 +03:00
committed by Marge Bot
parent 6c268cdf05
commit cfdc57d120

View File

@@ -32,7 +32,7 @@
#include "anv_private.h"
#include "anv_measure.h"
#include "genxml/gen8_pack.h"
#include "genxml/gen9_pack.h"
#include "genxml/genX_bits.h"
#include "util/perf/u_trace.h"
@@ -346,7 +346,7 @@ anv_batch_bo_link(struct anv_cmd_buffer *cmd_buffer,
uint32_t next_bbo_offset)
{
const uint32_t bb_start_offset =
prev_bbo->length - GFX8_MI_BATCH_BUFFER_START_length * 4;
prev_bbo->length - GFX9_MI_BATCH_BUFFER_START_length * 4;
ASSERTED const uint32_t *bb_start = prev_bbo->bo->map + bb_start_offset;
/* Make sure we're looking at a MI_BATCH_BUFFER_START */
@@ -452,9 +452,9 @@ static void
emit_batch_buffer_start(struct anv_batch *batch,
struct anv_bo *bo, uint32_t offset)
{
anv_batch_emit(batch, GFX8_MI_BATCH_BUFFER_START, bbs) {
bbs.DWordLength = GFX8_MI_BATCH_BUFFER_START_length -
GFX8_MI_BATCH_BUFFER_START_length_bias;
anv_batch_emit(batch, GFX9_MI_BATCH_BUFFER_START, bbs) {
bbs.DWordLength = GFX9_MI_BATCH_BUFFER_START_length -
GFX9_MI_BATCH_BUFFER_START_length_bias;
bbs.SecondLevelBatchBuffer = Firstlevelbatch;
bbs.AddressSpaceIndicator = ASI_PPGTT;
bbs.BatchBufferStartAddress = (struct anv_address) { bo, offset };
@@ -483,7 +483,7 @@ cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer,
* have room for the chaining command. Since we're about to emit the
* chaining command, let's set it back where it should go.
*/
batch->end += GFX8_MI_BATCH_BUFFER_START_length * 4;
batch->end += GFX9_MI_BATCH_BUFFER_START_length * 4;
assert(batch->end == current_bbo->bo->map + current_bbo->bo->size);
emit_batch_buffer_start(batch, bbo->bo, 0);
@@ -507,8 +507,8 @@ anv_cmd_buffer_record_chain_submit(struct anv_cmd_buffer *cmd_buffer_from,
struct anv_batch_bo *first_bbo =
list_first_entry(&cmd_buffer_to->batch_bos, struct anv_batch_bo, link);
struct GFX8_MI_BATCH_BUFFER_START gen_bb_start = {
__anv_cmd_header(GFX8_MI_BATCH_BUFFER_START),
struct GFX9_MI_BATCH_BUFFER_START gen_bb_start = {
__anv_cmd_header(GFX9_MI_BATCH_BUFFER_START),
.SecondLevelBatchBuffer = Firstlevelbatch,
.AddressSpaceIndicator = ASI_PPGTT,
.BatchBufferStartAddress = (struct anv_address) { first_bbo->bo, 0 },
@@ -520,7 +520,7 @@ anv_cmd_buffer_record_chain_submit(struct anv_cmd_buffer *cmd_buffer_from,
.alloc = &cmd_buffer_from->vk.pool->alloc,
};
__anv_cmd_pack(GFX8_MI_BATCH_BUFFER_START)(&local_batch, bb_start, &gen_bb_start);
__anv_cmd_pack(GFX9_MI_BATCH_BUFFER_START)(&local_batch, bb_start, &gen_bb_start);
last_bbo->chained = true;
}
@@ -533,8 +533,8 @@ anv_cmd_buffer_record_end_submit(struct anv_cmd_buffer *cmd_buffer)
last_bbo->chained = false;
uint32_t *batch = cmd_buffer->batch_end;
anv_pack_struct(batch, GFX8_MI_BATCH_BUFFER_END,
__anv_cmd_header(GFX8_MI_BATCH_BUFFER_END));
anv_pack_struct(batch, GFX9_MI_BATCH_BUFFER_END,
__anv_cmd_header(GFX9_MI_BATCH_BUFFER_END));
}
static VkResult
@@ -550,7 +550,7 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, uint32_t size, void *_data)
/* Amount of reserved space at the end of the batch to account for the
* chaining instruction.
*/
const uint32_t batch_padding = GFX8_MI_BATCH_BUFFER_START_length * 4;
const uint32_t batch_padding = GFX9_MI_BATCH_BUFFER_START_length * 4;
/* Cap reallocation to chunk. */
uint32_t alloc_size = MIN2(
MAX2(batch->allocated_batch_size, size + batch_padding),
@@ -613,7 +613,7 @@ anv_cmd_buffer_chain_generation_batch(struct anv_batch *batch, uint32_t size, vo
list_addtail(&new_bbo->link, &cmd_buffer->generation_batch_bos);
anv_batch_bo_start(new_bbo, batch, GFX8_MI_BATCH_BUFFER_START_length * 4);
anv_batch_bo_start(new_bbo, batch, GFX9_MI_BATCH_BUFFER_START_length * 4);
return VK_SUCCESS;
}
@@ -840,7 +840,7 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer->batch.engine_class = cmd_buffer->queue_family->engine_class;
anv_batch_bo_start(batch_bo, &cmd_buffer->batch,
GFX8_MI_BATCH_BUFFER_START_length * 4);
GFX9_MI_BATCH_BUFFER_START_length * 4);
/* Generation batch is initialized empty since it's possible it won't be
* used.
@@ -924,7 +924,7 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer),
&cmd_buffer->batch,
GFX8_MI_BATCH_BUFFER_START_length * 4);
GFX9_MI_BATCH_BUFFER_START_length * 4);
while (u_vector_length(&cmd_buffer->bt_block_states) > 0) {
struct anv_state *bt_block = u_vector_remove(&cmd_buffer->bt_block_states);
@@ -974,7 +974,7 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
* that padding before we end the batch; otherwise, we may end up
* with our BATCH_BUFFER_END in another BO.
*/
cmd_buffer->batch.end += GFX8_MI_BATCH_BUFFER_START_length * 4;
cmd_buffer->batch.end += GFX9_MI_BATCH_BUFFER_START_length * 4;
assert(cmd_buffer->batch.start == batch_bo->bo->map);
assert(cmd_buffer->batch.end == batch_bo->bo->map + batch_bo->bo->size);
@@ -988,11 +988,11 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
if (batch_bo->chained)
emit_batch_buffer_start(&cmd_buffer->batch, batch_bo->bo, 0);
else
anv_batch_emit(&cmd_buffer->batch, GFX8_MI_BATCH_BUFFER_END, bbe);
anv_batch_emit(&cmd_buffer->batch, GFX9_MI_BATCH_BUFFER_END, bbe);
/* Round batch up to an even number of dwords. */
if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4)
anv_batch_emit(&cmd_buffer->batch, GFX8_MI_NOOP, noop);
anv_batch_emit(&cmd_buffer->batch, GFX9_MI_NOOP, noop);
cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY;
} else {
@@ -1017,16 +1017,16 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
int32_t batch_len = cmd_buffer->batch.next - cmd_buffer->batch.start;
for (int32_t i = 0; i < (prefetch_len - batch_len); i += 4)
anv_batch_emit(&cmd_buffer->batch, GFX8_MI_NOOP, noop);
anv_batch_emit(&cmd_buffer->batch, GFX9_MI_NOOP, noop);
}
void *jump_addr =
anv_batch_emitn(&cmd_buffer->batch,
GFX8_MI_BATCH_BUFFER_START_length,
GFX8_MI_BATCH_BUFFER_START,
GFX9_MI_BATCH_BUFFER_START_length,
GFX9_MI_BATCH_BUFFER_START,
.AddressSpaceIndicator = ASI_PPGTT,
.SecondLevelBatchBuffer = Firstlevelbatch) +
(GFX8_MI_BATCH_BUFFER_START_BatchBufferStartAddress_start / 8);
(GFX9_MI_BATCH_BUFFER_START_BatchBufferStartAddress_start / 8);
cmd_buffer->return_addr = anv_batch_address(&cmd_buffer->batch, jump_addr);
/* The emit above may have caused us to chain batch buffers which
@@ -1054,7 +1054,7 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
* have room for the chaining command. Since we're about to emit the
* chaining command, let's set it back where it should go.
*/
cmd_buffer->batch.end += GFX8_MI_BATCH_BUFFER_START_length * 4;
cmd_buffer->batch.end += GFX9_MI_BATCH_BUFFER_START_length * 4;
assert(cmd_buffer->batch.start == batch_bo->bo->map);
assert(cmd_buffer->batch.end == batch_bo->bo->map + batch_bo->bo->size);
@@ -1138,7 +1138,7 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
list_splicetail(&copy_list, &primary->batch_bos);
anv_batch_bo_continue(last_bbo, &primary->batch,
GFX8_MI_BATCH_BUFFER_START_length * 4);
GFX9_MI_BATCH_BUFFER_START_length * 4);
break;
}
case ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN: {
@@ -1147,10 +1147,10 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
uint64_t *write_return_addr =
anv_batch_emitn(&primary->batch,
GFX8_MI_STORE_DATA_IMM_length + 1 /* QWord write */,
GFX8_MI_STORE_DATA_IMM,
GFX9_MI_STORE_DATA_IMM_length + 1 /* QWord write */,
GFX9_MI_STORE_DATA_IMM,
.Address = secondary->return_addr)
+ (GFX8_MI_STORE_DATA_IMM_ImmediateData_start / 8);
+ (GFX9_MI_STORE_DATA_IMM_ImmediateData_start / 8);
emit_batch_buffer_start(&primary->batch, first_bbo->bo, 0);