From 2e9450f49fec2ce1bf9ea32194f164d946ffdeb5 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 5 Jul 2023 11:10:11 +0200 Subject: [PATCH] pan/genxml: Various CS related improvements in v10.xml Various improvements to the CS related definitions: - make the field name consistent across all instructions using the same pattern - define missing fields, - replace the CEU prefix by a CS prefix - define enums where it makes sense - re-order instruction definitions by IDs - add missing instructions While at it, extend decode_csf.c to support all known instructions. Signed-off-by: Boris Brezillon Reviewed-by: Antonino Maniscalco Reviewed-by: Erik Faye-Lund Part-of: --- src/panfrost/lib/genxml/decode_csf.c | 587 ++++++++++++++++++--------- src/panfrost/lib/genxml/v10.xml | 387 ++++++++++-------- 2 files changed, 608 insertions(+), 366 deletions(-) diff --git a/src/panfrost/lib/genxml/decode_csf.c b/src/panfrost/lib/genxml/decode_csf.c index f713b468554..603beea4f4d 100644 --- a/src/panfrost/lib/genxml/decode_csf.c +++ b/src/panfrost/lib/genxml/decode_csf.c @@ -69,14 +69,16 @@ cs_get_u64(struct queue_ctx *qctx, uint8_t reg) static void pandecode_run_compute(struct pandecode_context *ctx, FILE *fp, - struct queue_ctx *qctx, struct MALI_CEU_RUN_COMPUTE *I) + struct queue_ctx *qctx, struct MALI_CS_RUN_COMPUTE *I) { const char *axes[4] = {"x_axis", "y_axis", "z_axis"}; /* Print the instruction. Ignore the selects and the flags override * since we'll print them implicitly later. */ - fprintf(fp, "RUN_COMPUTE.%s #%u\n", axes[I->task_axis], I->task_increment); + fprintf(fp, "RUN_COMPUTE%s.%s #%u\n", + I->progress_increment ? ".progress_inc" : "", axes[I->task_axis], + I->task_increment); ctx->indent++; @@ -111,13 +113,142 @@ pandecode_run_compute(struct pandecode_context *ctx, FILE *fp, } static void -pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp, - struct queue_ctx *qctx, struct MALI_CEU_RUN_IDVS *I) +pandecode_run_compute_indirect(struct pandecode_context *ctx, FILE *fp, + struct queue_ctx *qctx, + struct MALI_CS_RUN_COMPUTE_INDIRECT *I) { /* Print the instruction. Ignore the selects and the flags override * since we'll print them implicitly later. */ - fprintf(fp, "RUN_IDVS%s", I->malloc_enable ? "" : ".no_malloc"); + fprintf(fp, "RUN_COMPUTE_INDIRECT%s #%u\n", + I->progress_increment ? ".progress_inc" : "", + I->workgroups_per_task); + + ctx->indent++; + + unsigned reg_srt = 0 + (I->srt_select * 2); + unsigned reg_fau = 8 + (I->fau_select * 2); + unsigned reg_spd = 16 + (I->spd_select * 2); + unsigned reg_tsd = 24 + (I->tsd_select * 2); + + GENX(pandecode_resource_tables)(ctx, cs_get_u64(qctx, reg_srt), "Resources"); + + mali_ptr fau = cs_get_u64(qctx, reg_fau); + + if (fau) + GENX(pandecode_fau)(ctx, fau & BITFIELD64_MASK(48), fau >> 56, "FAU"); + + GENX(pandecode_shader) + (ctx, cs_get_u64(qctx, reg_spd), "Shader", qctx->gpu_id); + + DUMP_ADDR(ctx, LOCAL_STORAGE, cs_get_u64(qctx, reg_tsd), + "Local Storage @%" PRIx64 ":\n", cs_get_u64(qctx, reg_tsd)); + + pandecode_log(ctx, "Global attribute offset: %u\n", cs_get_u32(qctx, 32)); + DUMP_CL(ctx, COMPUTE_SIZE_WORKGROUP, &qctx->regs[33], "Workgroup size\n"); + pandecode_log(ctx, "Job offset X: %u\n", cs_get_u32(qctx, 34)); + pandecode_log(ctx, "Job offset Y: %u\n", cs_get_u32(qctx, 35)); + pandecode_log(ctx, "Job offset Z: %u\n", cs_get_u32(qctx, 36)); + pandecode_log(ctx, "Job size X: %u\n", cs_get_u32(qctx, 37)); + pandecode_log(ctx, "Job size Y: %u\n", cs_get_u32(qctx, 38)); + pandecode_log(ctx, "Job size Z: %u\n", cs_get_u32(qctx, 39)); + + ctx->indent--; +} + +static void +pandecode_run_tiling(struct pandecode_context *ctx, FILE *fp, + struct queue_ctx *qctx, struct MALI_CS_RUN_TILING *I) +{ + /* Print the instruction. Ignore the selects and the flags override + * since we'll print them implicitly later. + */ + fprintf(fp, "RUN_TILING%s", I->progress_increment ? ".progress_inc" : ""); + + fprintf(fp, "\n"); + + ctx->indent++; + + /* Merge flag overrides with the register flags */ + uint32_t tiler_flags_raw = cs_get_u64(qctx, 56); + tiler_flags_raw |= I->flags_override; + pan_unpack(&tiler_flags_raw, PRIMITIVE_FLAGS, tiler_flags); + + unsigned reg_srt = I->srt_select * 2; + unsigned reg_fau = 8 + I->fau_select * 2; + unsigned reg_spd = 16 + I->spd_select * 2; + unsigned reg_tsd = 24 + I->tsd_select; + + mali_ptr srt = cs_get_u64(qctx, reg_srt); + mali_ptr fau = cs_get_u64(qctx, reg_fau); + mali_ptr spd = cs_get_u64(qctx, reg_spd); + mali_ptr tsd = cs_get_u64(qctx, reg_tsd); + + if (srt) + GENX(pandecode_resource_tables)(ctx, srt, "Fragment resources"); + + if (fau) { + uint64_t lo = fau & BITFIELD64_MASK(48); + uint64_t hi = fau >> 56; + + GENX(pandecode_fau)(ctx, lo, hi, "Fragment FAU"); + } + + if (spd) { + GENX(pandecode_shader) + (ctx, spd, "Fragment shader", qctx->gpu_id); + } + + DUMP_ADDR(ctx, LOCAL_STORAGE, tsd, "Fragment Local Storage @%" PRIx64 ":\n", + tsd); + + pandecode_log(ctx, "Global attribute offset: %u\n", cs_get_u32(qctx, 32)); + pandecode_log(ctx, "Index count: %u\n", cs_get_u32(qctx, 33)); + pandecode_log(ctx, "Instance count: %u\n", cs_get_u32(qctx, 34)); + + if (tiler_flags.index_type) + pandecode_log(ctx, "Index offset: %u\n", cs_get_u32(qctx, 35)); + + pandecode_log(ctx, "Vertex offset: %d\n", cs_get_u32(qctx, 36)); + pandecode_log(ctx, "Tiler DCD flags2: %X\n", cs_get_u32(qctx, 38)); + + if (tiler_flags.index_type) + pandecode_log(ctx, "Index array size: %u\n", cs_get_u32(qctx, 39)); + + GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40), qctx->gpu_id); + + DUMP_CL(ctx, SCISSOR, &qctx->regs[42], "Scissor\n"); + pandecode_log(ctx, "Low depth clamp: %f\n", uif(cs_get_u32(qctx, 44))); + pandecode_log(ctx, "High depth clamp: %f\n", uif(cs_get_u32(qctx, 45))); + pandecode_log(ctx, "Occlusion: %" PRIx64 "\n", cs_get_u64(qctx, 46)); + pandecode_log(ctx, "Vertex position array: %" PRIx64 "\n", + cs_get_u64(qctx, 48)); + + mali_ptr blend = cs_get_u64(qctx, 50); + GENX(pandecode_blend_descs)(ctx, blend & ~7, blend & 7, 0, qctx->gpu_id); + + DUMP_ADDR(ctx, DEPTH_STENCIL, cs_get_u64(qctx, 52), "Depth/stencil"); + + if (tiler_flags.index_type) + pandecode_log(ctx, "Indices: %" PRIx64 "\n", cs_get_u64(qctx, 54)); + + DUMP_UNPACKED(ctx, PRIMITIVE_FLAGS, tiler_flags, "Primitive flags\n"); + DUMP_CL(ctx, DCD_FLAGS_0, &qctx->regs[57], "DCD Flags 0\n"); + DUMP_CL(ctx, DCD_FLAGS_1, &qctx->regs[58], "DCD Flags 1\n"); + pandecode_log(ctx, "Vertex bounds: %u\n", cs_get_u32(qctx, 59)); + DUMP_CL(ctx, PRIMITIVE_SIZE, &qctx->regs[60], "Primitive size\n"); + + ctx->indent--; +} +static void +pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp, + struct queue_ctx *qctx, struct MALI_CS_RUN_IDVS *I) +{ + /* Print the instruction. Ignore the selects and the flags override + * since we'll print them implicitly later. + */ + fprintf(fp, "RUN_IDVS%s%s", I->progress_increment ? ".progress_inc" : "", + I->malloc_enable ? "" : ".no_malloc"); if (I->draw_id_register_enable) fprintf(fp, " r%u", I->draw_id); @@ -248,9 +379,21 @@ pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp, } static void -pandecode_run_fragment(struct pandecode_context *ctx, struct queue_ctx *qctx, - struct MALI_CEU_RUN_FRAGMENT *I) +pandecode_run_fragment(struct pandecode_context *ctx, FILE *fp, + struct queue_ctx *qctx, struct MALI_CS_RUN_FRAGMENT *I) { + static const char *tile_order[] = { + "zorder", "horizontal", "vertical", "unknown", + "unknown", "rev_horizontal", "rev_vertical", "unknown", + "unknown", "unknown", "unknown", "unknown", + "unknown", "unknown", "unknown", "unknown", + }; + + fprintf(fp, "RUN_FRAGMENT%s.tile_order=%s%s\n", + I->enable_tem ? ".tile_enable_map_enable" : "", + tile_order[I->tile_order], + I->progress_increment ? ".progress_inc" : ""); + ctx->indent++; DUMP_CL(ctx, SCISSOR, &qctx->regs[42], "Scissor\n"); @@ -262,6 +405,32 @@ pandecode_run_fragment(struct pandecode_context *ctx, struct queue_ctx *qctx, ctx->indent--; } +static void +pandecode_run_fullscreen(struct pandecode_context *ctx, FILE *fp, + struct queue_ctx *qctx, + struct MALI_CS_RUN_FULLSCREEN *I) +{ + fprintf(fp, "RUN_FULLSCREEN%s\n", + I->progress_increment ? ".progress_inc" : ""); + + ctx->indent++; + + /* Merge flag overrides with the register flags */ + uint32_t tiler_flags_raw = cs_get_u64(qctx, 56); + tiler_flags_raw |= I->flags_override; + pan_unpack(&tiler_flags_raw, PRIMITIVE_FLAGS, tiler_flags); + DUMP_UNPACKED(ctx, PRIMITIVE_FLAGS, tiler_flags, "Primitive flags\n"); + + GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40), qctx->gpu_id); + + DUMP_CL(ctx, SCISSOR, &qctx->regs[42], "Scissor\n"); + + pan_unpack(PANDECODE_PTR(ctx, cs_get_u64(qctx, I->dcd), void), DRAW, dcd); + GENX(pandecode_dcd)(ctx, &dcd, 0, qctx->gpu_id); + + ctx->indent--; +} + static void print_indirect(unsigned address, int16_t offset, FILE *fp) { @@ -285,6 +454,10 @@ print_reg_tuple(unsigned base, uint16_t mask, FILE *fp) fprintf(fp, "_"); } +static const char *conditions_str[] = { + "le", "gt", "eq", "ne", "lt", "ge", "always", +}; + static void disassemble_ceu_instr(struct pandecode_context *ctx, uint64_t dword, unsigned indent, bool verbose, FILE *fp, @@ -301,11 +474,11 @@ disassemble_ceu_instr(struct pandecode_context *ctx, uint64_t dword, /* Unpack the base so we get the opcode */ uint8_t *bytes = (uint8_t *)&dword; - pan_unpack(bytes, CEU_BASE, base); + pan_unpack(bytes, CS_BASE, base); switch (base.opcode) { - case MALI_CEU_OPCODE_NOP: { - pan_unpack(bytes, CEU_NOP, I); + case MALI_CS_OPCODE_NOP: { + pan_unpack(bytes, CS_NOP, I); if (I.ignored) fprintf(fp, "NOP // 0x%" PRIX64 "\n", I.ignored); @@ -314,25 +487,25 @@ disassemble_ceu_instr(struct pandecode_context *ctx, uint64_t dword, break; } - case MALI_CEU_OPCODE_MOVE: { - pan_unpack(bytes, CEU_MOVE, I); + case MALI_CS_OPCODE_MOVE: { + pan_unpack(bytes, CS_MOVE, I); fprintf(fp, "MOVE d%u, #0x%" PRIX64 "\n", I.destination, I.immediate); break; } - case MALI_CEU_OPCODE_MOVE32: { - pan_unpack(bytes, CEU_MOVE32, I); + case MALI_CS_OPCODE_MOVE32: { + pan_unpack(bytes, CS_MOVE32, I); fprintf(fp, "MOVE32 r%u, #0x%X\n", I.destination, I.immediate); break; } - case MALI_CEU_OPCODE_WAIT: { + case MALI_CS_OPCODE_WAIT: { bool first = true; - pan_unpack(bytes, CEU_WAIT, I); - fprintf(fp, "WAIT "); + pan_unpack(bytes, CS_WAIT, I); + fprintf(fp, "WAIT%s ", I.progress_increment ? ".progress_inc" : ""); - u_foreach_bit(i, I.slots) { + u_foreach_bit(i, I.wait_mask) { fprintf(fp, "%s%u", first ? "" : ",", i); first = false; } @@ -341,149 +514,137 @@ disassemble_ceu_instr(struct pandecode_context *ctx, uint64_t dword, break; } - case MALI_CEU_OPCODE_RUN_COMPUTE: { - pan_unpack(bytes, CEU_RUN_COMPUTE, I); + case MALI_CS_OPCODE_RUN_COMPUTE: { + pan_unpack(bytes, CS_RUN_COMPUTE, I); pandecode_run_compute(ctx, fp, qctx, &I); break; } - case MALI_CEU_OPCODE_RUN_IDVS: { - pan_unpack(bytes, CEU_RUN_IDVS, I); + case MALI_CS_OPCODE_RUN_TILING: { + pan_unpack(bytes, CS_RUN_TILING, I); + pandecode_run_tiling(ctx, fp, qctx, &I); + break; + } + + case MALI_CS_OPCODE_RUN_IDVS: { + pan_unpack(bytes, CS_RUN_IDVS, I); pandecode_run_idvs(ctx, fp, qctx, &I); break; } - case MALI_CEU_OPCODE_RUN_FRAGMENT: { - pan_unpack(bytes, CEU_RUN_FRAGMENT, I); - fprintf(fp, "RUN_FRAGMENT%s\n", - I.enable_tem ? ".tile_enable_map_enable" : ""); - pandecode_run_fragment(ctx, qctx, &I); + case MALI_CS_OPCODE_RUN_FRAGMENT: { + pan_unpack(bytes, CS_RUN_FRAGMENT, I); + pandecode_run_fragment(ctx, fp, qctx, &I); break; } - case MALI_CEU_OPCODE_ADD_IMMEDIATE32: { - pan_unpack(bytes, CEU_ADD_IMMEDIATE32, I); + case MALI_CS_OPCODE_RUN_FULLSCREEN: { + pan_unpack(bytes, CS_RUN_FULLSCREEN, I); + pandecode_run_fullscreen(ctx, fp, qctx, &I); + break; + } + + case MALI_CS_OPCODE_FINISH_TILING: { + pan_unpack(bytes, CS_FINISH_TILING, I); + fprintf(fp, "FINISH_TILING%s\n", + I.progress_increment ? ".progress_inc" : ""); + break; + } + + case MALI_CS_OPCODE_FINISH_FRAGMENT: { + pan_unpack(bytes, CS_FINISH_FRAGMENT, I); + fprintf(fp, "FINISH_FRAGMENT.%s, d%u, d%u, #%x, #%u\n", + I.increment_fragment_completed ? ".frag_end" : "", + I.last_heap_chunk, I.first_heap_chunk, I.wait_mask, + I.signal_slot); + break; + } + + case MALI_CS_OPCODE_ADD_IMMEDIATE32: { + pan_unpack(bytes, CS_ADD_IMMEDIATE32, I); fprintf(fp, "ADD_IMMEDIATE32 r%u, r%u, #%d\n", I.destination, I.source, I.immediate); break; } - case MALI_CEU_OPCODE_ADD_IMMEDIATE64: { - pan_unpack(bytes, CEU_ADD_IMMEDIATE64, I); + case MALI_CS_OPCODE_ADD_IMMEDIATE64: { + pan_unpack(bytes, CS_ADD_IMMEDIATE64, I); fprintf(fp, "ADD_IMMEDIATE64 d%u, d%u, #%d\n", I.destination, I.source, I.immediate); break; } - case MALI_CEU_OPCODE_LOAD_MULTIPLE: { - pan_unpack(bytes, CEU_LOAD_MULTIPLE, I); + case MALI_CS_OPCODE_UMIN32: { + pan_unpack(bytes, CS_UMIN32, I); + + fprintf(fp, "UMIN32 r%u, r%u, r%u\n", I.destination, I.source_1, + I.source_2); + break; + } + + case MALI_CS_OPCODE_LOAD_MULTIPLE: { + pan_unpack(bytes, CS_LOAD_MULTIPLE, I); fprintf(fp, "LOAD_MULTIPLE "); - print_reg_tuple(I.base, I.mask, fp); + print_reg_tuple(I.base_register, I.mask, fp); fprintf(fp, ", "); print_indirect(I.address, I.offset, fp); fprintf(fp, "\n"); break; } - case MALI_CEU_OPCODE_STORE_MULTIPLE: { - pan_unpack(bytes, CEU_STORE_MULTIPLE, I); + case MALI_CS_OPCODE_STORE_MULTIPLE: { + pan_unpack(bytes, CS_STORE_MULTIPLE, I); fprintf(fp, "STORE_MULTIPLE "); print_indirect(I.address, I.offset, fp); fprintf(fp, ", "); - print_reg_tuple(I.base, I.mask, fp); + print_reg_tuple(I.base_register, I.mask, fp); fprintf(fp, "\n"); break; } - case MALI_CEU_OPCODE_SET_SB_ENTRY: { - pan_unpack(bytes, CEU_SET_SB_ENTRY, I); + case MALI_CS_OPCODE_BRANCH: { + pan_unpack(bytes, CS_BRANCH, I); + fprintf(fp, "BRANCH.%s r%u, #%d\n", conditions_str[I.condition], I.value, + I.offset); + break; + } + case MALI_CS_OPCODE_SET_SB_ENTRY: { + pan_unpack(bytes, CS_SET_SB_ENTRY, I); fprintf(fp, "SET_SB_ENTRY #%u, #%u\n", I.endpoint_entry, I.other_entry); break; } - case MALI_CEU_OPCODE_SYNC_ADD32: { - pan_unpack(bytes, CEU_SYNC_ADD32, I); - bool first = true; - fprintf(fp, "SYNC_ADD32%s%s signal(%u), wait(", - I.error_propagate ? ".error_propagate" : "", - I.scope_csg ? ".csg" : ".system", I.scoreboard_slot); - - u_foreach_bit(i, I.wait_mask) { - fprintf(fp, "%s%u", first ? "" : ",", i); - first = false; - } - - fprintf(fp, ") [d%u], r%u\n", I.address, I.data); + case MALI_CS_OPCODE_PROGRESS_WAIT: { + pan_unpack(bytes, CS_PROGRESS_WAIT, I); + fprintf(fp, "PROGRESS_WAIT d%u, #%u\n", I.source, I.queue); break; } - case MALI_CEU_OPCODE_SYNC_ADD64: { - pan_unpack(bytes, CEU_SYNC_ADD64, I); - bool first = true; - fprintf(fp, "SYNC_ADD64%s%s signal(%u), wait(", - I.error_propagate ? ".error_propagate" : "", - I.scope_csg ? ".csg" : ".system", I.scoreboard_slot); - - u_foreach_bit(i, I.wait_mask) { - fprintf(fp, "%s%u", first ? "" : ",", i); - first = false; - } - - fprintf(fp, ") [d%u], d%u\n", I.address, I.data); + case MALI_CS_OPCODE_SET_EXCEPTION_HANDLER: { + pan_unpack(bytes, CS_SET_EXCEPTION_HANDLER, I); + fprintf(fp, "SET_EXCEPTION_HANDLER d%u, r%u\n", I.address, I.length); break; } - case MALI_CEU_OPCODE_SYNC_SET32: { - pan_unpack(bytes, CEU_SYNC_SET32, I); - bool first = true; - fprintf(fp, "SYNC_SET32.%s%s signal(%u), wait(", - I.error_propagate ? ".error_propagate" : "", - I.scope_csg ? ".csg" : ".system", I.scoreboard_slot); - - u_foreach_bit(i, I.wait_mask) { - fprintf(fp, "%s%u", first ? "" : ",", i); - first = false; - } - - fprintf(fp, ") [d%u], r%u\n", I.address, I.data); - break; - } - - case MALI_CEU_OPCODE_SYNC_SET64: { - pan_unpack(bytes, CEU_SYNC_SET64, I); - bool first = true; - fprintf(fp, "SYNC_SET64.%s%s signal(%u), wait(", - I.error_propagate ? ".error_propagate" : "", - I.scope_csg ? ".csg" : ".system", I.scoreboard_slot); - - u_foreach_bit(i, I.wait_mask) { - fprintf(fp, "%s%u", first ? "" : ",", i); - first = false; - } - - fprintf(fp, ") [d%u], d%u\n", I.address, I.data); - break; - } - - case MALI_CEU_OPCODE_CALL: { - pan_unpack(bytes, CEU_CALL, I); + case MALI_CS_OPCODE_CALL: { + pan_unpack(bytes, CS_CALL, I); fprintf(fp, "CALL d%u, r%u\n", I.address, I.length); break; } - case MALI_CEU_OPCODE_JUMP: { - pan_unpack(bytes, CEU_JUMP, I); + case MALI_CS_OPCODE_JUMP: { + pan_unpack(bytes, CS_JUMP, I); fprintf(fp, "JUMP d%u, r%u\n", I.address, I.length); break; } - case MALI_CEU_OPCODE_REQ_RESOURCE: { - pan_unpack(bytes, CEU_REQ_RESOURCE, I); + case MALI_CS_OPCODE_REQ_RESOURCE: { + pan_unpack(bytes, CS_REQ_RESOURCE, I); fprintf(fp, "REQ_RESOURCE"); if (I.compute) @@ -498,44 +659,8 @@ disassemble_ceu_instr(struct pandecode_context *ctx, uint64_t dword, break; } - case MALI_CEU_OPCODE_SYNC_WAIT32: { - pan_unpack(bytes, CEU_SYNC_WAIT32, I); - - fprintf(fp, "SYNC_WAIT32%s%s d%u, r%u\n", I.invert ? ".gt" : ".le", - I.error_reject ? ".reject" : ".inherit", I.address, I.data); - break; - } - - case MALI_CEU_OPCODE_SYNC_WAIT64: { - pan_unpack(bytes, CEU_SYNC_WAIT64, I); - - fprintf(fp, "SYNC_WAIT64%s%s d%u, d%u\n", I.invert ? ".gt" : ".le", - I.error_reject ? ".reject" : ".inherit", I.address, I.data); - break; - } - - case MALI_CEU_OPCODE_UMIN32: { - pan_unpack(bytes, CEU_UMIN32, I); - - fprintf(fp, "UMIN32 r%u, r%u, r%u\n", I.destination, I.source_1, - I.source_2); - break; - } - - case MALI_CEU_OPCODE_BRANCH: { - pan_unpack(bytes, CEU_BRANCH, I); - - static const char *condition[] = { - "le", "gt", "eq", "ne", "lt", "ge", "always", - }; - fprintf(fp, "BRANCH.%s r%u, #%d\n", condition[I.condition], I.value, - I.offset); - - break; - } - - case MALI_CEU_OPCODE_FLUSH_CACHE2: { - pan_unpack(bytes, CEU_FLUSH_CACHE2, I); + case MALI_CS_OPCODE_FLUSH_CACHE2: { + pan_unpack(bytes, CS_FLUSH_CACHE2, I); static const char *mode[] = { "nop", "clean", @@ -543,66 +668,134 @@ disassemble_ceu_instr(struct pandecode_context *ctx, uint64_t dword, "clean_invalidate", }; - fprintf(fp, "FLUSH_CACHE2.%s_l2.%s_lsc%s r%u, signal(%u), wait(", + fprintf(fp, "FLUSH_CACHE2.%s_l2.%s_lsc%s r%u, #%x, #%u\n", mode[I.l2_flush_mode], mode[I.lsc_flush_mode], - I.other_invalidate ? ".invalidate_other" : "", I.latest_flush_id, - I.scoreboard_entry); - - bool first = true; - u_foreach_bit(i, I.scoreboard_mask) { - fprintf(fp, "%s%u", first ? "" : ",", i); - first = false; - } - fprintf(fp, ")\n"); + I.other_invalidate ? ".invalidate_other" : ".nop_other", + I.latest_flush_id, I.wait_mask, I.signal_slot); break; } - case MALI_CEU_OPCODE_FINISH_TILING: { - pan_unpack(bytes, CEU_FINISH_TILING, I); - fprintf(fp, "FINISH_TILING\n"); + case MALI_CS_OPCODE_SYNC_ADD32: { + pan_unpack(bytes, CS_SYNC_ADD32, I); + fprintf(fp, "SYNC_ADD32%s%s [d%u], r%u, #%x, #%u\n", + I.error_propagate ? ".error_propagate" : "", + I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system", I.address, + I.data, I.wait_mask, I.signal_slot); break; } - case MALI_CEU_OPCODE_FINISH_FRAGMENT: { - pan_unpack(bytes, CEU_FINISH_FRAGMENT, I); - - bool first = true; - fprintf(fp, "FINISH_FRAGMENT.%s, d%u, d%u, signal(%u), wait(", - I.increment_fragment_completed ? ".frag_end" : "", - I.last_heap_chunk, I.first_heap_chunk, I.scoreboard_entry); - - u_foreach_bit(i, I.wait_mask) { - fprintf(fp, "%s%u", first ? "" : ",", i); - first = false; - } - fprintf(fp, ")\n"); + case MALI_CS_OPCODE_SYNC_SET32: { + pan_unpack(bytes, CS_SYNC_SET32, I); + fprintf(fp, "SYNC_SET32.%s%s [d%u], r%u, #%x, #%u\n", + I.error_propagate ? ".error_propagate" : "", + I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system", I.address, + I.data, I.wait_mask, I.signal_slot); break; } - case MALI_CEU_OPCODE_HEAP_OPERATION: { - pan_unpack(bytes, CEU_HEAP_OPERATION, I); - const char *counter_names[] = {"vt_start", "vt_end", NULL, "frag_end"}; - bool first = true; - fprintf(fp, "HEAP_OPERATION.%s signal(%u), wait(", - counter_names[I.operation], I.scoreboard_entry); - - u_foreach_bit(i, I.wait_mask) { - fprintf(fp, "%s%u", first ? "" : ",", i); - first = false; - } - - fprintf(fp, ")\n"); + case MALI_CS_OPCODE_SYNC_WAIT32: { + pan_unpack(bytes, CS_SYNC_WAIT32, I); + fprintf(fp, "SYNC_WAIT32%s%s d%u, r%u\n", conditions_str[I.condition], + I.error_reject ? ".reject" : ".inherit", I.address, I.data); break; } - case MALI_CEU_OPCODE_HEAP_SET: { - pan_unpack(bytes, CEU_HEAP_SET, I); + case MALI_CS_OPCODE_STORE_STATE: { + static const char *states_str[] = { + "SYSTEM_TIMESTAMP", + "CYCLE_COUNT", + "DISJOINT_COUNT", + "ERROR_STATE", + }; + + pan_unpack(bytes, CS_STORE_STATE, I); + fprintf(fp, "STORE_STATE.%s d%u, #%i, #%x, #%u\n", + I.state >= ARRAY_SIZE(states_str) ? "UNKNOWN_STATE" + : states_str[I.state], + I.address, I.offset, I.wait_mask, I.signal_slot); + break; + } + + case MALI_CS_OPCODE_PROT_REGION: { + pan_unpack(bytes, CS_PROT_REGION, I); + fprintf(fp, "PROT_REGION #%u\n", I.size); + break; + } + + case MALI_CS_OPCODE_PROGRESS_STORE: { + pan_unpack(bytes, CS_PROGRESS_STORE, I); + fprintf(fp, "PROGRESS_STORE d%u\n", I.source); + break; + } + + case MALI_CS_OPCODE_PROGRESS_LOAD: { + pan_unpack(bytes, CS_PROGRESS_LOAD, I); + fprintf(fp, "PROGRESS_LOAD d%u\n", I.destination); + break; + } + + case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT: { + pan_unpack(bytes, CS_RUN_COMPUTE_INDIRECT, I); + pandecode_run_compute_indirect(ctx, fp, qctx, &I); + break; + } + + case MALI_CS_OPCODE_ERROR_BARRIER: { + pan_unpack(bytes, CS_ERROR_BARRIER, I); + fprintf(fp, "ERROR_BARRIER"); + break; + } + + case MALI_CS_OPCODE_HEAP_SET: { + pan_unpack(bytes, CS_HEAP_SET, I); fprintf(fp, "HEAP_SET d%u\n", I.address); break; } + case MALI_CS_OPCODE_HEAP_OPERATION: { + pan_unpack(bytes, CS_HEAP_OPERATION, I); + const char *counter_names[] = {"vt_start", "vt_end", NULL, "frag_end"}; + fprintf(fp, "HEAP_OPERATION.%s #%x, #%d\n", counter_names[I.operation], + I.wait_mask, I.signal_slot); + break; + } + + case MALI_CS_OPCODE_TRACE_POINT: { + pan_unpack(bytes, CS_TRACE_POINT, I); + fprintf(fp, "TRACE_POINT r%d:r%d, #%x, #%u\n", I.base_register, + I.base_register + I.register_count - 1, I.wait_mask, + I.signal_slot); + break; + } + + case MALI_CS_OPCODE_SYNC_ADD64: { + pan_unpack(bytes, CS_SYNC_ADD64, I); + fprintf(fp, "SYNC_ADD64%s%s [d%u], d%u, #%x, #%u\n", + I.error_propagate ? ".error_propagate" : "", + I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system", I.address, + I.data, I.wait_mask, I.signal_slot); + break; + } + + case MALI_CS_OPCODE_SYNC_SET64: { + pan_unpack(bytes, CS_SYNC_SET64, I); + fprintf(fp, "SYNC_SET64.%s%s [d%u], d%u, #%x, #%u\n", + I.error_propagate ? ".error_propagate" : "", + I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system", I.address, + I.data, I.wait_mask, I.signal_slot); + break; + } + + case MALI_CS_OPCODE_SYNC_WAIT64: { + pan_unpack(bytes, CS_SYNC_WAIT64, I); + + fprintf(fp, "SYNC_WAIT64%s%s d%u, d%u\n", conditions_str[I.condition], + I.error_reject ? ".reject" : ".inherit", I.address, I.data); + break; + } + default: { - fprintf(fp, "INVALID_%u 0x%" PRIX64 "\n", base.opcode, base.data); + fprintf(fp, "UNKNOWN_%u 0x%" PRIX64 "\n", base.opcode, base.data); break; } } @@ -633,7 +826,7 @@ interpret_ceu_jump(struct pandecode_context *ctx, struct queue_ctx *qctx, } /* - * Interpret a single instruction of the CEU, updating the register file, + * Interpret a single instruction of the CS, updating the register file, * instruction pointer, and call stack. Memory access and GPU controls are * ignored for now. * @@ -644,35 +837,35 @@ interpret_ceu_instr(struct pandecode_context *ctx, struct queue_ctx *qctx) { /* Unpack the base so we get the opcode */ uint8_t *bytes = (uint8_t *)qctx->ip; - pan_unpack(bytes, CEU_BASE, base); + pan_unpack(bytes, CS_BASE, base); assert(qctx->ip < qctx->end); switch (base.opcode) { - case MALI_CEU_OPCODE_MOVE: { - pan_unpack(bytes, CEU_MOVE, I); + case MALI_CS_OPCODE_MOVE: { + pan_unpack(bytes, CS_MOVE, I); qctx->regs[I.destination + 0] = (uint32_t)I.immediate; qctx->regs[I.destination + 1] = (uint32_t)(I.immediate >> 32); break; } - case MALI_CEU_OPCODE_MOVE32: { - pan_unpack(bytes, CEU_MOVE32, I); + case MALI_CS_OPCODE_MOVE32: { + pan_unpack(bytes, CS_MOVE32, I); qctx->regs[I.destination] = I.immediate; break; } - case MALI_CEU_OPCODE_ADD_IMMEDIATE32: { - pan_unpack(bytes, CEU_ADD_IMMEDIATE32, I); + case MALI_CS_OPCODE_ADD_IMMEDIATE32: { + pan_unpack(bytes, CS_ADD_IMMEDIATE32, I); qctx->regs[I.destination] = qctx->regs[I.source] + I.immediate; break; } - case MALI_CEU_OPCODE_ADD_IMMEDIATE64: { - pan_unpack(bytes, CEU_ADD_IMMEDIATE64, I); + case MALI_CS_OPCODE_ADD_IMMEDIATE64: { + pan_unpack(bytes, CS_ADD_IMMEDIATE64, I); int64_t value = (qctx->regs[I.source] | ((int64_t)qctx->regs[I.source + 1] << 32)) + @@ -683,8 +876,8 @@ interpret_ceu_instr(struct pandecode_context *ctx, struct queue_ctx *qctx) break; } - case MALI_CEU_OPCODE_CALL: { - pan_unpack(bytes, CEU_CALL, I); + case MALI_CS_OPCODE_CALL: { + pan_unpack(bytes, CS_CALL, I); if (qctx->call_stack_depth == MAX_CALL_STACK_DEPTH) { fprintf(stderr, "CS call stack overflow\n"); @@ -706,8 +899,8 @@ interpret_ceu_instr(struct pandecode_context *ctx, struct queue_ctx *qctx) return interpret_ceu_jump(ctx, qctx, I.address, I.length); } - case MALI_CEU_OPCODE_JUMP: { - pan_unpack(bytes, CEU_JUMP, I); + case MALI_CS_OPCODE_JUMP: { + pan_unpack(bytes, CS_JUMP, I); if (qctx->call_stack_depth == 0) { fprintf(stderr, "Cannot jump from the entrypoint\n"); diff --git a/src/panfrost/lib/genxml/v10.xml b/src/panfrost/lib/genxml/v10.xml index 249aaff7850..43bb33227f9 100644 --- a/src/panfrost/lib/genxml/v10.xml +++ b/src/panfrost/lib/genxml/v10.xml @@ -449,7 +449,7 @@ - + @@ -459,26 +459,35 @@ - + - + - + - + + + + + + + + + + @@ -498,7 +507,7 @@ - + @@ -509,7 +518,7 @@ - + @@ -520,65 +529,58 @@ - + - + - + - + - - - - - - - - - - - - - - - - - - - - - + - + - + - + - - - + + + + - + + - + - + + + + + + + + + + + + @@ -587,201 +589,248 @@ - + - + - + + + - - + + + + + - + + + + + + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + - + - + - + - + - + - - + + - + - - + + - + - + - + - - - - - - + - + - - - - - - + + + + + + + + + + + + + + + - - + - - - - - - + + - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + - - - + + + + + + + + + + + + + + + - + + - - - + + + + + - + + - + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +