pan/genxml: Various CS related improvements in v10.xml

Various improvements to the CS related definitions:

- make the field name consistent across all instructions using the same
  pattern
- define missing fields,
- replace the CEU prefix by a CS prefix
- define enums where it makes sense
- re-order instruction definitions by IDs
- add missing instructions

While at it, extend decode_csf.c to support all known instructions.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Antonino Maniscalco <antonino.maniscalco@collabora.com>
Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26358>
This commit is contained in:
Boris Brezillon
2023-07-05 11:10:11 +02:00
committed by Marge Bot
parent 729f6b28a6
commit 2e9450f49f
2 changed files with 608 additions and 366 deletions
+390 -197
View File
@@ -69,14 +69,16 @@ cs_get_u64(struct queue_ctx *qctx, uint8_t reg)
static void
pandecode_run_compute(struct pandecode_context *ctx, FILE *fp,
struct queue_ctx *qctx, struct MALI_CEU_RUN_COMPUTE *I)
struct queue_ctx *qctx, struct MALI_CS_RUN_COMPUTE *I)
{
const char *axes[4] = {"x_axis", "y_axis", "z_axis"};
/* Print the instruction. Ignore the selects and the flags override
* since we'll print them implicitly later.
*/
fprintf(fp, "RUN_COMPUTE.%s #%u\n", axes[I->task_axis], I->task_increment);
fprintf(fp, "RUN_COMPUTE%s.%s #%u\n",
I->progress_increment ? ".progress_inc" : "", axes[I->task_axis],
I->task_increment);
ctx->indent++;
@@ -111,13 +113,142 @@ pandecode_run_compute(struct pandecode_context *ctx, FILE *fp,
}
static void
pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp,
struct queue_ctx *qctx, struct MALI_CEU_RUN_IDVS *I)
pandecode_run_compute_indirect(struct pandecode_context *ctx, FILE *fp,
struct queue_ctx *qctx,
struct MALI_CS_RUN_COMPUTE_INDIRECT *I)
{
/* Print the instruction. Ignore the selects and the flags override
* since we'll print them implicitly later.
*/
fprintf(fp, "RUN_IDVS%s", I->malloc_enable ? "" : ".no_malloc");
fprintf(fp, "RUN_COMPUTE_INDIRECT%s #%u\n",
I->progress_increment ? ".progress_inc" : "",
I->workgroups_per_task);
ctx->indent++;
unsigned reg_srt = 0 + (I->srt_select * 2);
unsigned reg_fau = 8 + (I->fau_select * 2);
unsigned reg_spd = 16 + (I->spd_select * 2);
unsigned reg_tsd = 24 + (I->tsd_select * 2);
GENX(pandecode_resource_tables)(ctx, cs_get_u64(qctx, reg_srt), "Resources");
mali_ptr fau = cs_get_u64(qctx, reg_fau);
if (fau)
GENX(pandecode_fau)(ctx, fau & BITFIELD64_MASK(48), fau >> 56, "FAU");
GENX(pandecode_shader)
(ctx, cs_get_u64(qctx, reg_spd), "Shader", qctx->gpu_id);
DUMP_ADDR(ctx, LOCAL_STORAGE, cs_get_u64(qctx, reg_tsd),
"Local Storage @%" PRIx64 ":\n", cs_get_u64(qctx, reg_tsd));
pandecode_log(ctx, "Global attribute offset: %u\n", cs_get_u32(qctx, 32));
DUMP_CL(ctx, COMPUTE_SIZE_WORKGROUP, &qctx->regs[33], "Workgroup size\n");
pandecode_log(ctx, "Job offset X: %u\n", cs_get_u32(qctx, 34));
pandecode_log(ctx, "Job offset Y: %u\n", cs_get_u32(qctx, 35));
pandecode_log(ctx, "Job offset Z: %u\n", cs_get_u32(qctx, 36));
pandecode_log(ctx, "Job size X: %u\n", cs_get_u32(qctx, 37));
pandecode_log(ctx, "Job size Y: %u\n", cs_get_u32(qctx, 38));
pandecode_log(ctx, "Job size Z: %u\n", cs_get_u32(qctx, 39));
ctx->indent--;
}
static void
pandecode_run_tiling(struct pandecode_context *ctx, FILE *fp,
struct queue_ctx *qctx, struct MALI_CS_RUN_TILING *I)
{
/* Print the instruction. Ignore the selects and the flags override
* since we'll print them implicitly later.
*/
fprintf(fp, "RUN_TILING%s", I->progress_increment ? ".progress_inc" : "");
fprintf(fp, "\n");
ctx->indent++;
/* Merge flag overrides with the register flags */
uint32_t tiler_flags_raw = cs_get_u64(qctx, 56);
tiler_flags_raw |= I->flags_override;
pan_unpack(&tiler_flags_raw, PRIMITIVE_FLAGS, tiler_flags);
unsigned reg_srt = I->srt_select * 2;
unsigned reg_fau = 8 + I->fau_select * 2;
unsigned reg_spd = 16 + I->spd_select * 2;
unsigned reg_tsd = 24 + I->tsd_select;
mali_ptr srt = cs_get_u64(qctx, reg_srt);
mali_ptr fau = cs_get_u64(qctx, reg_fau);
mali_ptr spd = cs_get_u64(qctx, reg_spd);
mali_ptr tsd = cs_get_u64(qctx, reg_tsd);
if (srt)
GENX(pandecode_resource_tables)(ctx, srt, "Fragment resources");
if (fau) {
uint64_t lo = fau & BITFIELD64_MASK(48);
uint64_t hi = fau >> 56;
GENX(pandecode_fau)(ctx, lo, hi, "Fragment FAU");
}
if (spd) {
GENX(pandecode_shader)
(ctx, spd, "Fragment shader", qctx->gpu_id);
}
DUMP_ADDR(ctx, LOCAL_STORAGE, tsd, "Fragment Local Storage @%" PRIx64 ":\n",
tsd);
pandecode_log(ctx, "Global attribute offset: %u\n", cs_get_u32(qctx, 32));
pandecode_log(ctx, "Index count: %u\n", cs_get_u32(qctx, 33));
pandecode_log(ctx, "Instance count: %u\n", cs_get_u32(qctx, 34));
if (tiler_flags.index_type)
pandecode_log(ctx, "Index offset: %u\n", cs_get_u32(qctx, 35));
pandecode_log(ctx, "Vertex offset: %d\n", cs_get_u32(qctx, 36));
pandecode_log(ctx, "Tiler DCD flags2: %X\n", cs_get_u32(qctx, 38));
if (tiler_flags.index_type)
pandecode_log(ctx, "Index array size: %u\n", cs_get_u32(qctx, 39));
GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40), qctx->gpu_id);
DUMP_CL(ctx, SCISSOR, &qctx->regs[42], "Scissor\n");
pandecode_log(ctx, "Low depth clamp: %f\n", uif(cs_get_u32(qctx, 44)));
pandecode_log(ctx, "High depth clamp: %f\n", uif(cs_get_u32(qctx, 45)));
pandecode_log(ctx, "Occlusion: %" PRIx64 "\n", cs_get_u64(qctx, 46));
pandecode_log(ctx, "Vertex position array: %" PRIx64 "\n",
cs_get_u64(qctx, 48));
mali_ptr blend = cs_get_u64(qctx, 50);
GENX(pandecode_blend_descs)(ctx, blend & ~7, blend & 7, 0, qctx->gpu_id);
DUMP_ADDR(ctx, DEPTH_STENCIL, cs_get_u64(qctx, 52), "Depth/stencil");
if (tiler_flags.index_type)
pandecode_log(ctx, "Indices: %" PRIx64 "\n", cs_get_u64(qctx, 54));
DUMP_UNPACKED(ctx, PRIMITIVE_FLAGS, tiler_flags, "Primitive flags\n");
DUMP_CL(ctx, DCD_FLAGS_0, &qctx->regs[57], "DCD Flags 0\n");
DUMP_CL(ctx, DCD_FLAGS_1, &qctx->regs[58], "DCD Flags 1\n");
pandecode_log(ctx, "Vertex bounds: %u\n", cs_get_u32(qctx, 59));
DUMP_CL(ctx, PRIMITIVE_SIZE, &qctx->regs[60], "Primitive size\n");
ctx->indent--;
}
static void
pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp,
struct queue_ctx *qctx, struct MALI_CS_RUN_IDVS *I)
{
/* Print the instruction. Ignore the selects and the flags override
* since we'll print them implicitly later.
*/
fprintf(fp, "RUN_IDVS%s%s", I->progress_increment ? ".progress_inc" : "",
I->malloc_enable ? "" : ".no_malloc");
if (I->draw_id_register_enable)
fprintf(fp, " r%u", I->draw_id);
@@ -248,9 +379,21 @@ pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp,
}
static void
pandecode_run_fragment(struct pandecode_context *ctx, struct queue_ctx *qctx,
struct MALI_CEU_RUN_FRAGMENT *I)
pandecode_run_fragment(struct pandecode_context *ctx, FILE *fp,
struct queue_ctx *qctx, struct MALI_CS_RUN_FRAGMENT *I)
{
static const char *tile_order[] = {
"zorder", "horizontal", "vertical", "unknown",
"unknown", "rev_horizontal", "rev_vertical", "unknown",
"unknown", "unknown", "unknown", "unknown",
"unknown", "unknown", "unknown", "unknown",
};
fprintf(fp, "RUN_FRAGMENT%s.tile_order=%s%s\n",
I->enable_tem ? ".tile_enable_map_enable" : "",
tile_order[I->tile_order],
I->progress_increment ? ".progress_inc" : "");
ctx->indent++;
DUMP_CL(ctx, SCISSOR, &qctx->regs[42], "Scissor\n");
@@ -262,6 +405,32 @@ pandecode_run_fragment(struct pandecode_context *ctx, struct queue_ctx *qctx,
ctx->indent--;
}
static void
pandecode_run_fullscreen(struct pandecode_context *ctx, FILE *fp,
struct queue_ctx *qctx,
struct MALI_CS_RUN_FULLSCREEN *I)
{
fprintf(fp, "RUN_FULLSCREEN%s\n",
I->progress_increment ? ".progress_inc" : "");
ctx->indent++;
/* Merge flag overrides with the register flags */
uint32_t tiler_flags_raw = cs_get_u64(qctx, 56);
tiler_flags_raw |= I->flags_override;
pan_unpack(&tiler_flags_raw, PRIMITIVE_FLAGS, tiler_flags);
DUMP_UNPACKED(ctx, PRIMITIVE_FLAGS, tiler_flags, "Primitive flags\n");
GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40), qctx->gpu_id);
DUMP_CL(ctx, SCISSOR, &qctx->regs[42], "Scissor\n");
pan_unpack(PANDECODE_PTR(ctx, cs_get_u64(qctx, I->dcd), void), DRAW, dcd);
GENX(pandecode_dcd)(ctx, &dcd, 0, qctx->gpu_id);
ctx->indent--;
}
static void
print_indirect(unsigned address, int16_t offset, FILE *fp)
{
@@ -285,6 +454,10 @@ print_reg_tuple(unsigned base, uint16_t mask, FILE *fp)
fprintf(fp, "_");
}
static const char *conditions_str[] = {
"le", "gt", "eq", "ne", "lt", "ge", "always",
};
static void
disassemble_ceu_instr(struct pandecode_context *ctx, uint64_t dword,
unsigned indent, bool verbose, FILE *fp,
@@ -301,11 +474,11 @@ disassemble_ceu_instr(struct pandecode_context *ctx, uint64_t dword,
/* Unpack the base so we get the opcode */
uint8_t *bytes = (uint8_t *)&dword;
pan_unpack(bytes, CEU_BASE, base);
pan_unpack(bytes, CS_BASE, base);
switch (base.opcode) {
case MALI_CEU_OPCODE_NOP: {
pan_unpack(bytes, CEU_NOP, I);
case MALI_CS_OPCODE_NOP: {
pan_unpack(bytes, CS_NOP, I);
if (I.ignored)
fprintf(fp, "NOP // 0x%" PRIX64 "\n", I.ignored);
@@ -314,25 +487,25 @@ disassemble_ceu_instr(struct pandecode_context *ctx, uint64_t dword,
break;
}
case MALI_CEU_OPCODE_MOVE: {
pan_unpack(bytes, CEU_MOVE, I);
case MALI_CS_OPCODE_MOVE: {
pan_unpack(bytes, CS_MOVE, I);
fprintf(fp, "MOVE d%u, #0x%" PRIX64 "\n", I.destination, I.immediate);
break;
}
case MALI_CEU_OPCODE_MOVE32: {
pan_unpack(bytes, CEU_MOVE32, I);
case MALI_CS_OPCODE_MOVE32: {
pan_unpack(bytes, CS_MOVE32, I);
fprintf(fp, "MOVE32 r%u, #0x%X\n", I.destination, I.immediate);
break;
}
case MALI_CEU_OPCODE_WAIT: {
case MALI_CS_OPCODE_WAIT: {
bool first = true;
pan_unpack(bytes, CEU_WAIT, I);
fprintf(fp, "WAIT ");
pan_unpack(bytes, CS_WAIT, I);
fprintf(fp, "WAIT%s ", I.progress_increment ? ".progress_inc" : "");
u_foreach_bit(i, I.slots) {
u_foreach_bit(i, I.wait_mask) {
fprintf(fp, "%s%u", first ? "" : ",", i);
first = false;
}
@@ -341,149 +514,137 @@ disassemble_ceu_instr(struct pandecode_context *ctx, uint64_t dword,
break;
}
case MALI_CEU_OPCODE_RUN_COMPUTE: {
pan_unpack(bytes, CEU_RUN_COMPUTE, I);
case MALI_CS_OPCODE_RUN_COMPUTE: {
pan_unpack(bytes, CS_RUN_COMPUTE, I);
pandecode_run_compute(ctx, fp, qctx, &I);
break;
}
case MALI_CEU_OPCODE_RUN_IDVS: {
pan_unpack(bytes, CEU_RUN_IDVS, I);
case MALI_CS_OPCODE_RUN_TILING: {
pan_unpack(bytes, CS_RUN_TILING, I);
pandecode_run_tiling(ctx, fp, qctx, &I);
break;
}
case MALI_CS_OPCODE_RUN_IDVS: {
pan_unpack(bytes, CS_RUN_IDVS, I);
pandecode_run_idvs(ctx, fp, qctx, &I);
break;
}
case MALI_CEU_OPCODE_RUN_FRAGMENT: {
pan_unpack(bytes, CEU_RUN_FRAGMENT, I);
fprintf(fp, "RUN_FRAGMENT%s\n",
I.enable_tem ? ".tile_enable_map_enable" : "");
pandecode_run_fragment(ctx, qctx, &I);
case MALI_CS_OPCODE_RUN_FRAGMENT: {
pan_unpack(bytes, CS_RUN_FRAGMENT, I);
pandecode_run_fragment(ctx, fp, qctx, &I);
break;
}
case MALI_CEU_OPCODE_ADD_IMMEDIATE32: {
pan_unpack(bytes, CEU_ADD_IMMEDIATE32, I);
case MALI_CS_OPCODE_RUN_FULLSCREEN: {
pan_unpack(bytes, CS_RUN_FULLSCREEN, I);
pandecode_run_fullscreen(ctx, fp, qctx, &I);
break;
}
case MALI_CS_OPCODE_FINISH_TILING: {
pan_unpack(bytes, CS_FINISH_TILING, I);
fprintf(fp, "FINISH_TILING%s\n",
I.progress_increment ? ".progress_inc" : "");
break;
}
case MALI_CS_OPCODE_FINISH_FRAGMENT: {
pan_unpack(bytes, CS_FINISH_FRAGMENT, I);
fprintf(fp, "FINISH_FRAGMENT.%s, d%u, d%u, #%x, #%u\n",
I.increment_fragment_completed ? ".frag_end" : "",
I.last_heap_chunk, I.first_heap_chunk, I.wait_mask,
I.signal_slot);
break;
}
case MALI_CS_OPCODE_ADD_IMMEDIATE32: {
pan_unpack(bytes, CS_ADD_IMMEDIATE32, I);
fprintf(fp, "ADD_IMMEDIATE32 r%u, r%u, #%d\n", I.destination, I.source,
I.immediate);
break;
}
case MALI_CEU_OPCODE_ADD_IMMEDIATE64: {
pan_unpack(bytes, CEU_ADD_IMMEDIATE64, I);
case MALI_CS_OPCODE_ADD_IMMEDIATE64: {
pan_unpack(bytes, CS_ADD_IMMEDIATE64, I);
fprintf(fp, "ADD_IMMEDIATE64 d%u, d%u, #%d\n", I.destination, I.source,
I.immediate);
break;
}
case MALI_CEU_OPCODE_LOAD_MULTIPLE: {
pan_unpack(bytes, CEU_LOAD_MULTIPLE, I);
case MALI_CS_OPCODE_UMIN32: {
pan_unpack(bytes, CS_UMIN32, I);
fprintf(fp, "UMIN32 r%u, r%u, r%u\n", I.destination, I.source_1,
I.source_2);
break;
}
case MALI_CS_OPCODE_LOAD_MULTIPLE: {
pan_unpack(bytes, CS_LOAD_MULTIPLE, I);
fprintf(fp, "LOAD_MULTIPLE ");
print_reg_tuple(I.base, I.mask, fp);
print_reg_tuple(I.base_register, I.mask, fp);
fprintf(fp, ", ");
print_indirect(I.address, I.offset, fp);
fprintf(fp, "\n");
break;
}
case MALI_CEU_OPCODE_STORE_MULTIPLE: {
pan_unpack(bytes, CEU_STORE_MULTIPLE, I);
case MALI_CS_OPCODE_STORE_MULTIPLE: {
pan_unpack(bytes, CS_STORE_MULTIPLE, I);
fprintf(fp, "STORE_MULTIPLE ");
print_indirect(I.address, I.offset, fp);
fprintf(fp, ", ");
print_reg_tuple(I.base, I.mask, fp);
print_reg_tuple(I.base_register, I.mask, fp);
fprintf(fp, "\n");
break;
}
case MALI_CEU_OPCODE_SET_SB_ENTRY: {
pan_unpack(bytes, CEU_SET_SB_ENTRY, I);
case MALI_CS_OPCODE_BRANCH: {
pan_unpack(bytes, CS_BRANCH, I);
fprintf(fp, "BRANCH.%s r%u, #%d\n", conditions_str[I.condition], I.value,
I.offset);
break;
}
case MALI_CS_OPCODE_SET_SB_ENTRY: {
pan_unpack(bytes, CS_SET_SB_ENTRY, I);
fprintf(fp, "SET_SB_ENTRY #%u, #%u\n", I.endpoint_entry, I.other_entry);
break;
}
case MALI_CEU_OPCODE_SYNC_ADD32: {
pan_unpack(bytes, CEU_SYNC_ADD32, I);
bool first = true;
fprintf(fp, "SYNC_ADD32%s%s signal(%u), wait(",
I.error_propagate ? ".error_propagate" : "",
I.scope_csg ? ".csg" : ".system", I.scoreboard_slot);
u_foreach_bit(i, I.wait_mask) {
fprintf(fp, "%s%u", first ? "" : ",", i);
first = false;
}
fprintf(fp, ") [d%u], r%u\n", I.address, I.data);
case MALI_CS_OPCODE_PROGRESS_WAIT: {
pan_unpack(bytes, CS_PROGRESS_WAIT, I);
fprintf(fp, "PROGRESS_WAIT d%u, #%u\n", I.source, I.queue);
break;
}
case MALI_CEU_OPCODE_SYNC_ADD64: {
pan_unpack(bytes, CEU_SYNC_ADD64, I);
bool first = true;
fprintf(fp, "SYNC_ADD64%s%s signal(%u), wait(",
I.error_propagate ? ".error_propagate" : "",
I.scope_csg ? ".csg" : ".system", I.scoreboard_slot);
u_foreach_bit(i, I.wait_mask) {
fprintf(fp, "%s%u", first ? "" : ",", i);
first = false;
}
fprintf(fp, ") [d%u], d%u\n", I.address, I.data);
case MALI_CS_OPCODE_SET_EXCEPTION_HANDLER: {
pan_unpack(bytes, CS_SET_EXCEPTION_HANDLER, I);
fprintf(fp, "SET_EXCEPTION_HANDLER d%u, r%u\n", I.address, I.length);
break;
}
case MALI_CEU_OPCODE_SYNC_SET32: {
pan_unpack(bytes, CEU_SYNC_SET32, I);
bool first = true;
fprintf(fp, "SYNC_SET32.%s%s signal(%u), wait(",
I.error_propagate ? ".error_propagate" : "",
I.scope_csg ? ".csg" : ".system", I.scoreboard_slot);
u_foreach_bit(i, I.wait_mask) {
fprintf(fp, "%s%u", first ? "" : ",", i);
first = false;
}
fprintf(fp, ") [d%u], r%u\n", I.address, I.data);
break;
}
case MALI_CEU_OPCODE_SYNC_SET64: {
pan_unpack(bytes, CEU_SYNC_SET64, I);
bool first = true;
fprintf(fp, "SYNC_SET64.%s%s signal(%u), wait(",
I.error_propagate ? ".error_propagate" : "",
I.scope_csg ? ".csg" : ".system", I.scoreboard_slot);
u_foreach_bit(i, I.wait_mask) {
fprintf(fp, "%s%u", first ? "" : ",", i);
first = false;
}
fprintf(fp, ") [d%u], d%u\n", I.address, I.data);
break;
}
case MALI_CEU_OPCODE_CALL: {
pan_unpack(bytes, CEU_CALL, I);
case MALI_CS_OPCODE_CALL: {
pan_unpack(bytes, CS_CALL, I);
fprintf(fp, "CALL d%u, r%u\n", I.address, I.length);
break;
}
case MALI_CEU_OPCODE_JUMP: {
pan_unpack(bytes, CEU_JUMP, I);
case MALI_CS_OPCODE_JUMP: {
pan_unpack(bytes, CS_JUMP, I);
fprintf(fp, "JUMP d%u, r%u\n", I.address, I.length);
break;
}
case MALI_CEU_OPCODE_REQ_RESOURCE: {
pan_unpack(bytes, CEU_REQ_RESOURCE, I);
case MALI_CS_OPCODE_REQ_RESOURCE: {
pan_unpack(bytes, CS_REQ_RESOURCE, I);
fprintf(fp, "REQ_RESOURCE");
if (I.compute)
@@ -498,44 +659,8 @@ disassemble_ceu_instr(struct pandecode_context *ctx, uint64_t dword,
break;
}
case MALI_CEU_OPCODE_SYNC_WAIT32: {
pan_unpack(bytes, CEU_SYNC_WAIT32, I);
fprintf(fp, "SYNC_WAIT32%s%s d%u, r%u\n", I.invert ? ".gt" : ".le",
I.error_reject ? ".reject" : ".inherit", I.address, I.data);
break;
}
case MALI_CEU_OPCODE_SYNC_WAIT64: {
pan_unpack(bytes, CEU_SYNC_WAIT64, I);
fprintf(fp, "SYNC_WAIT64%s%s d%u, d%u\n", I.invert ? ".gt" : ".le",
I.error_reject ? ".reject" : ".inherit", I.address, I.data);
break;
}
case MALI_CEU_OPCODE_UMIN32: {
pan_unpack(bytes, CEU_UMIN32, I);
fprintf(fp, "UMIN32 r%u, r%u, r%u\n", I.destination, I.source_1,
I.source_2);
break;
}
case MALI_CEU_OPCODE_BRANCH: {
pan_unpack(bytes, CEU_BRANCH, I);
static const char *condition[] = {
"le", "gt", "eq", "ne", "lt", "ge", "always",
};
fprintf(fp, "BRANCH.%s r%u, #%d\n", condition[I.condition], I.value,
I.offset);
break;
}
case MALI_CEU_OPCODE_FLUSH_CACHE2: {
pan_unpack(bytes, CEU_FLUSH_CACHE2, I);
case MALI_CS_OPCODE_FLUSH_CACHE2: {
pan_unpack(bytes, CS_FLUSH_CACHE2, I);
static const char *mode[] = {
"nop",
"clean",
@@ -543,66 +668,134 @@ disassemble_ceu_instr(struct pandecode_context *ctx, uint64_t dword,
"clean_invalidate",
};
fprintf(fp, "FLUSH_CACHE2.%s_l2.%s_lsc%s r%u, signal(%u), wait(",
fprintf(fp, "FLUSH_CACHE2.%s_l2.%s_lsc%s r%u, #%x, #%u\n",
mode[I.l2_flush_mode], mode[I.lsc_flush_mode],
I.other_invalidate ? ".invalidate_other" : "", I.latest_flush_id,
I.scoreboard_entry);
bool first = true;
u_foreach_bit(i, I.scoreboard_mask) {
fprintf(fp, "%s%u", first ? "" : ",", i);
first = false;
}
fprintf(fp, ")\n");
I.other_invalidate ? ".invalidate_other" : ".nop_other",
I.latest_flush_id, I.wait_mask, I.signal_slot);
break;
}
case MALI_CEU_OPCODE_FINISH_TILING: {
pan_unpack(bytes, CEU_FINISH_TILING, I);
fprintf(fp, "FINISH_TILING\n");
case MALI_CS_OPCODE_SYNC_ADD32: {
pan_unpack(bytes, CS_SYNC_ADD32, I);
fprintf(fp, "SYNC_ADD32%s%s [d%u], r%u, #%x, #%u\n",
I.error_propagate ? ".error_propagate" : "",
I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system", I.address,
I.data, I.wait_mask, I.signal_slot);
break;
}
case MALI_CEU_OPCODE_FINISH_FRAGMENT: {
pan_unpack(bytes, CEU_FINISH_FRAGMENT, I);
bool first = true;
fprintf(fp, "FINISH_FRAGMENT.%s, d%u, d%u, signal(%u), wait(",
I.increment_fragment_completed ? ".frag_end" : "",
I.last_heap_chunk, I.first_heap_chunk, I.scoreboard_entry);
u_foreach_bit(i, I.wait_mask) {
fprintf(fp, "%s%u", first ? "" : ",", i);
first = false;
}
fprintf(fp, ")\n");
case MALI_CS_OPCODE_SYNC_SET32: {
pan_unpack(bytes, CS_SYNC_SET32, I);
fprintf(fp, "SYNC_SET32.%s%s [d%u], r%u, #%x, #%u\n",
I.error_propagate ? ".error_propagate" : "",
I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system", I.address,
I.data, I.wait_mask, I.signal_slot);
break;
}
case MALI_CEU_OPCODE_HEAP_OPERATION: {
pan_unpack(bytes, CEU_HEAP_OPERATION, I);
const char *counter_names[] = {"vt_start", "vt_end", NULL, "frag_end"};
bool first = true;
fprintf(fp, "HEAP_OPERATION.%s signal(%u), wait(",
counter_names[I.operation], I.scoreboard_entry);
u_foreach_bit(i, I.wait_mask) {
fprintf(fp, "%s%u", first ? "" : ",", i);
first = false;
}
fprintf(fp, ")\n");
case MALI_CS_OPCODE_SYNC_WAIT32: {
pan_unpack(bytes, CS_SYNC_WAIT32, I);
fprintf(fp, "SYNC_WAIT32%s%s d%u, r%u\n", conditions_str[I.condition],
I.error_reject ? ".reject" : ".inherit", I.address, I.data);
break;
}
case MALI_CEU_OPCODE_HEAP_SET: {
pan_unpack(bytes, CEU_HEAP_SET, I);
case MALI_CS_OPCODE_STORE_STATE: {
static const char *states_str[] = {
"SYSTEM_TIMESTAMP",
"CYCLE_COUNT",
"DISJOINT_COUNT",
"ERROR_STATE",
};
pan_unpack(bytes, CS_STORE_STATE, I);
fprintf(fp, "STORE_STATE.%s d%u, #%i, #%x, #%u\n",
I.state >= ARRAY_SIZE(states_str) ? "UNKNOWN_STATE"
: states_str[I.state],
I.address, I.offset, I.wait_mask, I.signal_slot);
break;
}
case MALI_CS_OPCODE_PROT_REGION: {
pan_unpack(bytes, CS_PROT_REGION, I);
fprintf(fp, "PROT_REGION #%u\n", I.size);
break;
}
case MALI_CS_OPCODE_PROGRESS_STORE: {
pan_unpack(bytes, CS_PROGRESS_STORE, I);
fprintf(fp, "PROGRESS_STORE d%u\n", I.source);
break;
}
case MALI_CS_OPCODE_PROGRESS_LOAD: {
pan_unpack(bytes, CS_PROGRESS_LOAD, I);
fprintf(fp, "PROGRESS_LOAD d%u\n", I.destination);
break;
}
case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT: {
pan_unpack(bytes, CS_RUN_COMPUTE_INDIRECT, I);
pandecode_run_compute_indirect(ctx, fp, qctx, &I);
break;
}
case MALI_CS_OPCODE_ERROR_BARRIER: {
pan_unpack(bytes, CS_ERROR_BARRIER, I);
fprintf(fp, "ERROR_BARRIER");
break;
}
case MALI_CS_OPCODE_HEAP_SET: {
pan_unpack(bytes, CS_HEAP_SET, I);
fprintf(fp, "HEAP_SET d%u\n", I.address);
break;
}
case MALI_CS_OPCODE_HEAP_OPERATION: {
pan_unpack(bytes, CS_HEAP_OPERATION, I);
const char *counter_names[] = {"vt_start", "vt_end", NULL, "frag_end"};
fprintf(fp, "HEAP_OPERATION.%s #%x, #%d\n", counter_names[I.operation],
I.wait_mask, I.signal_slot);
break;
}
case MALI_CS_OPCODE_TRACE_POINT: {
pan_unpack(bytes, CS_TRACE_POINT, I);
fprintf(fp, "TRACE_POINT r%d:r%d, #%x, #%u\n", I.base_register,
I.base_register + I.register_count - 1, I.wait_mask,
I.signal_slot);
break;
}
case MALI_CS_OPCODE_SYNC_ADD64: {
pan_unpack(bytes, CS_SYNC_ADD64, I);
fprintf(fp, "SYNC_ADD64%s%s [d%u], d%u, #%x, #%u\n",
I.error_propagate ? ".error_propagate" : "",
I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system", I.address,
I.data, I.wait_mask, I.signal_slot);
break;
}
case MALI_CS_OPCODE_SYNC_SET64: {
pan_unpack(bytes, CS_SYNC_SET64, I);
fprintf(fp, "SYNC_SET64.%s%s [d%u], d%u, #%x, #%u\n",
I.error_propagate ? ".error_propagate" : "",
I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system", I.address,
I.data, I.wait_mask, I.signal_slot);
break;
}
case MALI_CS_OPCODE_SYNC_WAIT64: {
pan_unpack(bytes, CS_SYNC_WAIT64, I);
fprintf(fp, "SYNC_WAIT64%s%s d%u, d%u\n", conditions_str[I.condition],
I.error_reject ? ".reject" : ".inherit", I.address, I.data);
break;
}
default: {
fprintf(fp, "INVALID_%u 0x%" PRIX64 "\n", base.opcode, base.data);
fprintf(fp, "UNKNOWN_%u 0x%" PRIX64 "\n", base.opcode, base.data);
break;
}
}
@@ -633,7 +826,7 @@ interpret_ceu_jump(struct pandecode_context *ctx, struct queue_ctx *qctx,
}
/*
* Interpret a single instruction of the CEU, updating the register file,
* Interpret a single instruction of the CS, updating the register file,
* instruction pointer, and call stack. Memory access and GPU controls are
* ignored for now.
*
@@ -644,35 +837,35 @@ interpret_ceu_instr(struct pandecode_context *ctx, struct queue_ctx *qctx)
{
/* Unpack the base so we get the opcode */
uint8_t *bytes = (uint8_t *)qctx->ip;
pan_unpack(bytes, CEU_BASE, base);
pan_unpack(bytes, CS_BASE, base);
assert(qctx->ip < qctx->end);
switch (base.opcode) {
case MALI_CEU_OPCODE_MOVE: {
pan_unpack(bytes, CEU_MOVE, I);
case MALI_CS_OPCODE_MOVE: {
pan_unpack(bytes, CS_MOVE, I);
qctx->regs[I.destination + 0] = (uint32_t)I.immediate;
qctx->regs[I.destination + 1] = (uint32_t)(I.immediate >> 32);
break;
}
case MALI_CEU_OPCODE_MOVE32: {
pan_unpack(bytes, CEU_MOVE32, I);
case MALI_CS_OPCODE_MOVE32: {
pan_unpack(bytes, CS_MOVE32, I);
qctx->regs[I.destination] = I.immediate;
break;
}
case MALI_CEU_OPCODE_ADD_IMMEDIATE32: {
pan_unpack(bytes, CEU_ADD_IMMEDIATE32, I);
case MALI_CS_OPCODE_ADD_IMMEDIATE32: {
pan_unpack(bytes, CS_ADD_IMMEDIATE32, I);
qctx->regs[I.destination] = qctx->regs[I.source] + I.immediate;
break;
}
case MALI_CEU_OPCODE_ADD_IMMEDIATE64: {
pan_unpack(bytes, CEU_ADD_IMMEDIATE64, I);
case MALI_CS_OPCODE_ADD_IMMEDIATE64: {
pan_unpack(bytes, CS_ADD_IMMEDIATE64, I);
int64_t value =
(qctx->regs[I.source] | ((int64_t)qctx->regs[I.source + 1] << 32)) +
@@ -683,8 +876,8 @@ interpret_ceu_instr(struct pandecode_context *ctx, struct queue_ctx *qctx)
break;
}
case MALI_CEU_OPCODE_CALL: {
pan_unpack(bytes, CEU_CALL, I);
case MALI_CS_OPCODE_CALL: {
pan_unpack(bytes, CS_CALL, I);
if (qctx->call_stack_depth == MAX_CALL_STACK_DEPTH) {
fprintf(stderr, "CS call stack overflow\n");
@@ -706,8 +899,8 @@ interpret_ceu_instr(struct pandecode_context *ctx, struct queue_ctx *qctx)
return interpret_ceu_jump(ctx, qctx, I.address, I.length);
}
case MALI_CEU_OPCODE_JUMP: {
pan_unpack(bytes, CEU_JUMP, I);
case MALI_CS_OPCODE_JUMP: {
pan_unpack(bytes, CS_JUMP, I);
if (qctx->call_stack_depth == 0) {
fprintf(stderr, "Cannot jump from the entrypoint\n");
+218 -169
View File
@@ -449,7 +449,7 @@
<value name="Instance" value="1"/>
</enum>
<enum name="CEU Condition">
<enum name="CS Condition">
<value name="Lequal" value="0"/>
<value name="Greater" value="1"/>
<value name="Equal" value="2"/>
@@ -459,26 +459,35 @@
<value name="Always" value="6"/>
</enum>
<enum name="CEU State">
<enum name="CS State">
<value name="Timestamp" value="0"/>
<value name="Cycle count" value="1"/>
<value name="Disjoint count" value="2"/>
<value name="Error status" value="3"/>
</enum>
<enum name="CEU Heap Operation">
<enum name="CS Heap Operation">
<value name="Vertex/Tiler Started" value="0"/>
<value name="Vertex/Tiler Completed" value="1"/>
<value name="Fragment Completed" value="3"/>
</enum>
<enum name="CEU Flush Mode">
<enum name="CS Flush Mode">
<value name="None" value="0"/>
<value name="Clean" value="1"/>
<value name="Clean and invalidate" value="3"/>
</enum>
<enum name="CEU Opcode">
<enum name="CS Sync scope">
<value name="System" value="0"/>
<value name="CSG" value="2"/>
</enum>
<enum name="CS Exception type">
<value name="Tiler OOM" value="2"/>
</enum>
<enum name="CS Opcode">
<value name="NOP" value="0"/>
<value name="MOVE" value="1"/>
<value name="MOVE32" value="2"/>
@@ -498,7 +507,7 @@
<value name="BRANCH" value="22"/>
<value name="SET_SB_ENTRY" value="23"/>
<value name="PROGRESS_WAIT" value="24"/>
<!-- SET_EXCEPTION_HANDLER -->
<value name="SET_EXCEPTION_HANDLER" value="25"/>
<value name="CALL" value="32"/>
<value name="JUMP" value="33"/>
<value name="REQ_RESOURCE" value="34"/>
@@ -509,7 +518,7 @@
<value name="STORE_STATE" value="40"/>
<value name="PROT_REGION" value="41"/>
<value name="PROGRESS_STORE" value="42"/>
<value name="PROGEESS_LOAD" value="43"/>
<value name="PROGRESS_LOAD" value="43"/>
<value name="RUN_COMPUTE_INDIRECT" value="44"/>
<value name="ERROR_BARRIER" value="47"/>
<value name="HEAP_SET" value="48"/>
@@ -520,65 +529,58 @@
<value name="SYNC_WAIT64" value="53"/>
</enum>
<struct name="CEU Base" size="2">
<struct name="CS Base" size="2">
<field name="Data" size="56" start="0" type="hex"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode"/>
<field name="Opcode" size="8" start="56" type="CS Opcode"/>
</struct>
<struct name="CEU NOP" size="2">
<struct name="CS NOP" size="2">
<field name="Ignored" size="56" start="0" type="hex"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="NOP"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="NOP"/>
</struct>
<struct name="CEU ERROR_BARRIER" size="2">
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="ERROR_BARRIER"/>
</struct>
<struct name="CEU PROGRESS_WAIT" size="2">
<field name="Trace Buffer ID" size="5" start="0" type="uint"/>
<field name="Source" size="8" start="40" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="PROGRESS_WAIT"/>
</struct>
<struct name="CEU PROGRESS_STORE" size="2">
<field name="Source" size="8" start="40" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="PROGRESS_STORE"/>
</struct>
<struct name="CEU PROGRESS_LOAD" size="2">
<field name="Destination" size="8" start="40" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="PROGRESS_LOAD"/>
</struct>
<struct name="CEU MOVE" size="2">
<struct name="CS MOVE" size="2">
<field name="Immediate" size="48" start="0" type="hex"/>
<field name="Destination" size="8" start="48" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="MOVE"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="MOVE"/>
</struct>
<struct name="CEU MOVE32" size="2">
<struct name="CS MOVE32" size="2">
<field name="Immediate" size="32" start="0" type="hex"/>
<field name="Destination" size="8" start="48" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="MOVE32"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="MOVE32"/>
</struct>
<struct name="CEU WAIT" size="2">
<field name="Slots" size="8" start="16" type="hex"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="WAIT"/>
<struct name="CS WAIT" size="2">
<field name="Wait mask" size="8" start="16" type="hex"/>
<field name="Progress increment" size="1" start="32" type="bool" default="false"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="WAIT"/>
</struct>
<struct name="CEU RUN_COMPUTE" size="2">
<struct name="CS RUN_COMPUTE" size="2">
<field name="Task increment" size="14" start="0" type="uint"/>
<field name="Task axis" size="2" start="14" type="Task Axis"/>
<field name="Progress increment" size="1" start="32" type="bool" default="false"/>
<field name="SRT select" size="2" start="40" type="uint"/>
<field name="SPD select" size="2" start="42" type="uint"/>
<field name="TSD select" size="2" start="44" type="uint"/>
<field name="FAU select" size="2" start="46" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="RUN_COMPUTE"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="RUN_COMPUTE"/>
</struct>
<struct name="CEU RUN_IDVS" size="2">
<struct name="CS RUN_TILING" size="2">
<field name="Flags override" size="32" start="0" type="hex"/>
<field name="Progress increment" size="1" start="32" type="bool" default="false"/>
<field name="SRT select" size="2" start="40" type="uint"/>
<field name="SPD select" size="2" start="42" type="uint"/>
<field name="TSD select" size="2" start="44" type="uint"/>
<field name="FAU select" size="2" start="46" type="uint"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="RUN_TILING"/>
</struct>
<struct name="CS RUN_IDVS" size="2">
<field name="Flags override" size="32" start="0" type="hex"/>
<field name="Progress increment" size="1" start="32" type="bool" default="false"/>
<field name="Malloc enable" size="1" start="33" type="bool"/>
<field name="Draw ID register enable" size="1" start="34" type="bool"/>
<field name="Varying SRT select" size="1" start="35" type="bool"/>
@@ -587,201 +589,248 @@
<field name="Fragment SRT select" size="1" start="38" type="bool"/>
<field name="Fragment TSD select" size="1" start="39" type="bool"/>
<field name="Draw ID" size="8" start="40" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="RUN_IDVS"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="RUN_IDVS"/>
</struct>
<struct name="CEU RUN_FRAGMENT" size="2">
<struct name="CS RUN_FRAGMENT" size="2">
<field name="Enable TEM" size="1" start="0" type="bool"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="RUN_FRAGMENT"/>
<field name="Tile order" size="4" start="4" type="Tile Render Order" default="Z Order"/>
<field name="Progress increment" size="1" start="32" type="bool" default="false"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="RUN_FRAGMENT"/>
</struct>
<struct name="CEU FINISH_TILING" size="2">
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="FINISH_TILING"/>
<struct name="CS RUN_FULLSCREEN" size="2">
<field name="Flags override" size="32" start="0" type="hex"/>
<field name="Progress increment" size="1" start="32" type="bool" default="false"/>
<field name="DCD" size="8" start="40" type="uint"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="RUN_FULLSCREEN"/>
</struct>
<struct name="CEU FINISH_FRAGMENT" size="2">
<struct name="CS FINISH_TILING" size="2">
<field name="Progress increment" size="1" start="32" type="bool" default="false"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="FINISH_TILING"/>
</struct>
<struct name="CS FINISH_FRAGMENT" size="2">
<field name="Increment Fragment Completed" size="1" start="0" type="bool"/>
<field name="Wait mask" size="16" start="16" type="hex"/>
<field name="Last Heap Chunk" size="8" start="32" type="hex"/>
<field name="First Heap Chunk" size="8" start="40" type="hex"/>
<field name="Scoreboard entry" size="4" start="48" type="hex"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="FINISH_FRAGMENT"/>
<field name="Signal slot" size="4" start="48" type="hex"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="FINISH_FRAGMENT"/>
</struct>
<struct name="CEU HEAP_OPERATION" size="2">
<field name="Wait mask" size="16" start="16" type="hex"/>
<field name="Operation" size="2" start="32" type="CEU Heap Operation"/>
<field name="Scoreboard entry" size="4" start="48" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="HEAP_OPERATION"/>
</struct>
<struct name="CEU TRACE_POINT" size="2">
<field name="Wait mask" size="16" start="16" type="hex"/>
<field name="Base Register" size="8" start="32" type="uint"/>
<field name="Register Count" size="8" start="40" type="uint"/>
<field name="Scoreboard slot" size="4" start="48" type="hex"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="TRACE_POINT"/>
</struct>
<struct name="CEU CALL" size="2">
<field name="Length" size="8" start="32" type="uint"/>
<field name="Address" size="8" start="40" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="CALL"/>
</struct>
<struct name="CEU JUMP" size="2">
<field name="Length" size="8" start="32" type="uint"/>
<field name="Address" size="8" start="40" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="JUMP"/>
</struct>
<struct name="CEU ADD_IMMEDIATE64" size="2">
<struct name="CS ADD_IMMEDIATE32" size="2">
<field name="Immediate" size="32" start="0" type="int"/>
<field name="Source" size="8" start="40" type="uint"/>
<field name="Destination" size="8" start="48" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="ADD_IMMEDIATE64"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="ADD_IMMEDIATE32"/>
</struct>
<struct name="CEU ADD_IMMEDIATE32" size="2">
<struct name="CS ADD_IMMEDIATE64" size="2">
<field name="Immediate" size="32" start="0" type="int"/>
<field name="Source" size="8" start="40" type="uint"/>
<field name="Destination" size="8" start="48" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="ADD_IMMEDIATE32"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="ADD_IMMEDIATE64"/>
</struct>
<struct name="CEU UMIN32" size="2">
<struct name="CS UMIN32" size="2">
<field name="Source 1" size="8" start="32" type="uint"/>
<field name="Source 2" size="8" start="40" type="uint"/>
<field name="Destination" size="8" start="48" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="UMIN32"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="UMIN32"/>
</struct>
<struct name="CEU LOAD_MULTIPLE" size="2">
<struct name="CS LOAD_MULTIPLE" size="2">
<field name="Offset" size="16" start="0" type="int"/>
<field name="Mask" size="16" start="16" type="hex"/>
<field name="Address" size="8" start="40" type="uint"/>
<field name="Base" size="8" start="48" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="LOAD_MULTIPLE"/>
<field name="Base register" size="8" start="48" type="uint"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="LOAD_MULTIPLE"/>
</struct>
<struct name="CEU STORE_MULTIPLE" size="2">
<struct name="CS STORE_MULTIPLE" size="2">
<field name="Offset" size="16" start="0" type="int"/>
<field name="Mask" size="16" start="16" type="hex"/>
<field name="Address" size="8" start="40" type="uint"/>
<field name="Base" size="8" start="48" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="STORE_MULTIPLE"/>
<field name="Base register" size="8" start="48" type="uint"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="STORE_MULTIPLE"/>
</struct>
<struct name="CEU BRANCH" size="2">
<struct name="CS BRANCH" size="2">
<field name="Offset" size="16" start="0" type="int"/>
<field name="Condition" size="3" start="28" type="CEU Condition"/>
<field name="Condition" size="3" start="28" type="CS Condition"/>
<field name="Value" size="8" start="40" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="BRANCH"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="BRANCH"/>
</struct>
<struct name="CEU PROT_REGION" size="2">
<field name="Size (instructions)" size="16" start="0" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="PROT_REGION"/>
</struct>
<struct name="CEU SET_SB_ENTRY" size="2">
<struct name="CS SET_SB_ENTRY" size="2">
<field name="Endpoint entry" size="4" start="0" type="uint"/>
<field name="Other entry" size="4" start="4" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="SET_SB_ENTRY"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="SET_SB_ENTRY"/>
</struct>
<struct name="CEU SYNC_ADD32" size="2">
<!-- Iter status is written at [Address + 4], 0 == NO_FAULT, 1 == FAULT -->
<field name="Error propagate" size="1" start="0" type="bool"/>
<field name="Scope CSG" size="1" start="2" type="bool"/>
<field name="Wait mask" size="16" start="16" type="hex"/>
<field name="Data" size="8" start="32" type="uint"/>
<struct name="CS PROGRESS_WAIT" size="2">
<field name="Queue" size="5" start="0" type="uint"/>
<field name="Source" size="8" start="40" type="uint"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="PROGRESS_WAIT"/>
</struct>
<struct name="CS SET_EXCEPTION_HANDLER" size="2">
<field name="Exception type" size="8" start="0" type="CS Exception type"/>
<field name="Length" size="8" start="32" type="uint"/>
<field name="Address" size="8" start="32" type="uint"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="SET_EXCEPTION_HANDLER"/>
</struct>
<struct name="CS CALL" size="2">
<field name="Length" size="8" start="32" type="uint"/>
<field name="Address" size="8" start="40" type="uint"/>
<field name="Scoreboard slot" size="4" start="48" type="hex"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="SYNC_ADD32"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="CALL"/>
</struct>
<struct name="CEU SYNC_SET32" size="2">
<!-- Iter status is written at [Address + 4], 0 == NO_FAULT, 1 == FAULT -->
<field name="Error propagate" size="1" start="0" type="bool"/>
<field name="Scope CSG" size="1" start="2" type="bool"/>
<field name="Wait mask" size="16" start="16" type="hex"/>
<field name="Data" size="8" start="32" type="uint"/>
<struct name="CS JUMP" size="2">
<field name="Length" size="8" start="32" type="uint"/>
<field name="Address" size="8" start="40" type="uint"/>
<field name="Scoreboard slot" size="4" start="48" type="hex"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="SYNC_SET32"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="JUMP"/>
</struct>
<struct name="CEU STORE_STATE" size="2">
<!-- Iter status is written at [Address + 4], 0 == NO_FAULT, 1 == FAULT -->
<field name="Offset" size="16" start="0" type="int"/>
<field name="Wait mask" size="16" start="16" type="hex"/>
<field name="State" size="2" start="32" type="CEU State"/>
<field name="Address" size="8" start="40" type="uint"/>
<field name="Scoreboard slot" size="4" start="48" type="hex"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="STORE_STATE"/>
</struct>
<struct name="CEU SYNC_ADD64" size="2">
<!-- Iter status is written at [Address + 8], 0 == NO_FAULT, 1 == FAULT -->
<field name="Error propagate" size="1" start="0" type="bool"/>
<!-- Used for inter-queue synchronization (unblocking a queue waiting on a mem-based fence) -->
<field name="Scope CSG" size="1" start="2" type="bool"/>
<field name="Wait mask" size="16" start="16" type="hex"/>
<field name="Data" size="8" start="32" type="uint"/>
<field name="Address" size="8" start="40" type="uint"/>
<field name="Scoreboard slot" size="4" start="48" type="hex"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="SYNC_ADD64"/>
</struct>
<struct name="CEU SYNC_SET64" size="2">
<!-- Iter status is written at [Address + 8], 0 == NO_FAULT, 1 == FAULT -->
<field name="Error propagate" size="1" start="0" type="bool"/>
<field name="Scope CSG" size="1" start="2" type="bool"/>
<field name="Wait mask" size="16" start="16" type="hex"/>
<field name="Data" size="8" start="32" type="uint"/>
<field name="Address" size="8" start="40" type="uint"/>
<field name="Scoreboard slot" size="4" start="48" type="hex"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="SYNC_SET64"/>
</struct>
<struct name="CEU FLUSH_CACHE2" size="2">
<field name="L2 flush mode" size="4" start="0" type="CEU Flush Mode"/>
<field name="LSC flush mode" size="4" start="4" type="CEU Flush Mode"/>
<field name="Other invalidate" size="1" start="9" type="bool"/>
<field name="Scoreboard mask" size="16" start="16" type="hex"/>
<field name="Latest Flush ID" size="8" start="40" type="uint"/>
<field name="Scoreboard entry" size="4" start="48" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="FLUSH_CACHE2"/>
</struct>
<struct name="CEU REQ_RESOURCE" size="2">
<struct name="CS REQ_RESOURCE" size="2">
<field name="Compute" size="1" start="0" type="bool"/>
<field name="Fragment" size="1" start="1" type="bool"/>
<field name="Tiler" size="1" start="2" type="bool"/>
<field name="IDVS" size="1" start="3" type="bool"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="REQ_RESOURCE"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="REQ_RESOURCE"/>
</struct>
<struct name="CEU SYNC_WAIT32" size="2">
<field name="Error reject" size="1" start="0" type="bool"/>
<field name="Invert" size="1" start="28" type="bool"/>
<struct name="CS FLUSH_CACHE2" size="2">
<field name="L2 flush mode" size="4" start="0" type="CS Flush Mode"/>
<field name="LSC flush mode" size="4" start="4" type="CS Flush Mode"/>
<field name="Other invalidate" size="1" start="9" type="bool"/>
<field name="Wait mask" size="16" start="16" type="hex"/>
<field name="Latest Flush ID" size="8" start="40" type="uint"/>
<field name="Signal slot" size="4" start="48" type="uint"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="FLUSH_CACHE2"/>
</struct>
<struct name="CS SYNC_ADD32" size="2">
<!-- Iter status is written at [Address + 4], 0 == NO_FAULT, 1 == FAULT -->
<field name="Error propagate" size="1" start="0" type="bool"/>
<field name="Scope" size="2" start="1" type="CS Sync scope"/>
<field name="Wait mask" size="16" start="16" type="hex"/>
<field name="Data" size="8" start="32" type="uint"/>
<field name="Address" size="8" start="40" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="SYNC_WAIT32"/>
<field name="Signal slot" size="4" start="48" type="hex"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="SYNC_ADD32"/>
</struct>
<struct name="CEU SYNC_WAIT64" size="2">
<field name="Error reject" size="1" start="0" type="bool"/>
<field name="Invert" size="1" start="28" type="bool"/>
<struct name="CS SYNC_SET32" size="2">
<!-- Iter status is written at [Address + 4], 0 == NO_FAULT, 1 == FAULT -->
<field name="Error propagate" size="1" start="0" type="bool"/>
<field name="Scope" size="2" start="1" type="CS Sync scope"/>
<field name="Wait mask" size="16" start="16" type="hex"/>
<field name="Data" size="8" start="32" type="uint"/>
<field name="Address" size="8" start="40" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="SYNC_WAIT64"/>
<field name="Signal slot" size="4" start="48" type="hex"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="SYNC_SET32"/>
</struct>
<struct name="CEU HEAP_SET" size="2">
<struct name="CS SYNC_WAIT32" size="2">
<field name="Error reject" size="1" start="0" type="bool"/>
<field name="Condition" size="4" start="28" type="CS Condition"/>
<field name="Data" size="8" start="32" type="uint"/>
<field name="Address" size="8" start="40" type="uint"/>
<field name="Opcode" size="8" start="56" type="CEU Opcode" default="HEAP_SET"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="SYNC_WAIT32"/>
</struct>
<struct name="CS STORE_STATE" size="2">
<!-- Iter status is written at [Address + 4], 0 == NO_FAULT, 1 == FAULT -->
<field name="Offset" size="16" start="0" type="int"/>
<field name="Wait mask" size="16" start="16" type="hex"/>
<field name="State" size="2" start="32" type="CS State"/>
<field name="Address" size="8" start="40" type="uint"/>
<field name="Signal slot" size="4" start="48" type="hex"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="STORE_STATE"/>
</struct>
<struct name="CS PROT_REGION" size="2">
<field name="Size" size="16" start="0" type="uint" modifier="shr(3)"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="PROT_REGION"/>
</struct>
<struct name="CS PROGRESS_STORE" size="2">
<field name="Source" size="8" start="40" type="uint"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="PROGRESS_STORE"/>
</struct>
<struct name="CS PROGRESS_LOAD" size="2">
<field name="Destination" size="8" start="40" type="uint"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="PROGRESS_LOAD"/>
</struct>
<struct name="CS RUN_COMPUTE_INDIRECT" size="2">
<field name="Workgroups per task" size="16" start="0" type="uint"/>
<field name="Progress increment" size="1" start="32" type="bool" default="false"/>
<field name="SRT select" size="2" start="40" type="uint"/>
<field name="SPD select" size="2" start="42" type="uint"/>
<field name="TSD select" size="2" start="44" type="uint"/>
<field name="FAU select" size="2" start="46" type="uint"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="RUN_COMPUTE_INDIRECT"/>
</struct>
<struct name="CS ERROR_BARRIER" size="2">
<field name="Opcode" size="8" start="56" type="CS Opcode" default="ERROR_BARRIER"/>
</struct>
<struct name="CS HEAP_SET" size="2">
<field name="Address" size="8" start="40" type="uint"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="HEAP_SET"/>
</struct>
<struct name="CS HEAP_OPERATION" size="2">
<field name="Wait mask" size="16" start="16" type="hex"/>
<field name="Operation" size="2" start="32" type="CS Heap Operation"/>
<field name="Signal slot" size="4" start="48" type="uint"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="HEAP_OPERATION"/>
</struct>
<struct name="CS TRACE_POINT" size="2">
<field name="Wait mask" size="16" start="16" type="hex"/>
<field name="Base Register" size="8" start="32" type="uint"/>
<field name="Register Count" size="8" start="40" type="uint"/>
<field name="Signal slot" size="4" start="48" type="hex"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="TRACE_POINT"/>
</struct>
<struct name="CS SYNC_ADD64" size="2">
<!-- Iter status is written at [Address + 8], 0 == NO_FAULT, 1 == FAULT -->
<field name="Error propagate" size="1" start="0" type="bool"/>
<!-- Used for inter-queue synchronization (unblocking a queue waiting on a mem-based fence) -->
<field name="Scope" size="2" start="1" type="CS Sync scope"/>
<field name="Wait mask" size="16" start="16" type="hex"/>
<field name="Data" size="8" start="32" type="uint"/>
<field name="Address" size="8" start="40" type="uint"/>
<field name="Signal slot" size="4" start="48" type="hex"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="SYNC_ADD64"/>
</struct>
<struct name="CS SYNC_SET64" size="2">
<!-- Iter status is written at [Address + 8], 0 == NO_FAULT, 1 == FAULT -->
<field name="Error propagate" size="1" start="0" type="bool"/>
<field name="Scope" size="2" start="1" type="CS Sync scope"/>
<field name="Wait mask" size="16" start="16" type="hex"/>
<field name="Data" size="8" start="32" type="uint"/>
<field name="Address" size="8" start="40" type="uint"/>
<field name="Signal slot" size="4" start="48" type="hex"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="SYNC_SET64"/>
</struct>
<struct name="CS SYNC_WAIT64" size="2">
<field name="Error reject" size="1" start="0" type="bool"/>
<field name="Condition" size="4" start="28" type="CS Condition"/>
<field name="Data" size="8" start="32" type="uint"/>
<field name="Address" size="8" start="40" type="uint"/>
<field name="Opcode" size="8" start="56" type="CS Opcode" default="SYNC_WAIT64"/>
</struct>
<struct name="Attribute" size="8" align="32">