From 98a5b9b45407e90f1ae076958388ca5ebacd69af Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 23 Feb 2021 23:22:13 -0600 Subject: [PATCH] intel/mi_builder: Add control-flow support Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/common/mi_builder.h | 131 ++++++++++++++- src/intel/common/tests/mi_builder_test.cpp | 180 +++++++++++++++++++++ 2 files changed, 307 insertions(+), 4 deletions(-) diff --git a/src/intel/common/mi_builder.h b/src/intel/common/mi_builder.h index 57f64369a86..1a9eb1a7691 100644 --- a/src/intel/common/mi_builder.h +++ b/src/intel/common/mi_builder.h @@ -174,11 +174,17 @@ mi_builder_flush_math(struct mi_builder *b) #if GEN_GEN >= 8 || GEN_IS_HASWELL +static inline bool +mi_value_is_reg(struct mi_value val) +{ + return val.type == MI_VALUE_TYPE_REG32 || + val.type == MI_VALUE_TYPE_REG64; +} + static inline bool mi_value_is_gpr(struct mi_value val) { - return (val.type == MI_VALUE_TYPE_REG32 || - val.type == MI_VALUE_TYPE_REG64) && + return mi_value_is_reg(val) && val.reg >= _MI_BUILDER_GPR_BASE && val.reg < _MI_BUILDER_GPR_BASE + _MI_BUILDER_NUM_HW_GPRS * 8; @@ -187,8 +193,7 @@ mi_value_is_gpr(struct mi_value val) static inline bool _mi_value_is_allocated_gpr(struct mi_value val) { - return (val.type == MI_VALUE_TYPE_REG32 || - val.type == MI_VALUE_TYPE_REG64) && + return mi_value_is_reg(val) && val.reg >= _MI_BUILDER_GPR_BASE && val.reg < _MI_BUILDER_GPR_BASE + MI_BUILDER_NUM_ALLOC_GPRS * 8; @@ -1238,6 +1243,124 @@ mi_store_mem64_offset(struct mi_builder *b, mi_builder_flush_math(b); } +/* + * Control-flow Section. Only available on GFX 12.5+ + */ + +struct _mi_goto { + bool predicated; + void *mi_bbs; +}; + +struct mi_goto_target { + bool placed; + unsigned num_gotos; + struct _mi_goto gotos[8]; + __gen_address_type addr; +}; + +#define MI_GOTO_TARGET_INIT ((struct mi_goto_target) {}) + +#define MI_BUILDER_MI_PREDICATE_RESULT_num 0x2418 + +static inline void +mi_goto_if(struct mi_builder *b, struct mi_value cond, + struct mi_goto_target *t) +{ + /* First, set up the predicate, if any */ + bool predicated; + if (cond.type == MI_VALUE_TYPE_IMM) { + /* If it's an immediate, the goto either doesn't happen or happens + * unconditionally. + */ + if (mi_value_to_u64(cond) == 0) + return; + + assert(mi_value_to_u64(cond) == ~0ull); + predicated = false; + } else if (mi_value_is_reg(cond) && + cond.reg == MI_BUILDER_MI_PREDICATE_RESULT_num) { + /* If it's MI_PREDICATE_RESULT, we use whatever predicate the client + * provided us with + */ + assert(cond.type == MI_VALUE_TYPE_REG32); + predicated = true; + } else { + mi_store(b, mi_reg32(MI_BUILDER_MI_PREDICATE_RESULT_num), cond); + predicated = true; + } + + if (predicated) { + mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) { + sp.PredicateEnable = NOOPOnResultClear; + } + } + if (t->placed) { + mi_builder_emit(b, GENX(MI_BATCH_BUFFER_START), bbs) { + bbs.PredicationEnable = predicated; + bbs.AddressSpaceIndicator = ASI_PPGTT; + bbs.BatchBufferStartAddress = t->addr; + } + } else { + assert(t->num_gotos < ARRAY_SIZE(t->gotos)); + struct _mi_goto g = { + .predicated = predicated, + .mi_bbs = __gen_get_batch_dwords(b->user_data, + GENX(MI_BATCH_BUFFER_START_length)), + }; + memset(g.mi_bbs, 0, 4 * GENX(MI_BATCH_BUFFER_START_length)); + t->gotos[t->num_gotos++] = g; + } + if (predicated) { + mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) { + sp.PredicateEnable = NOOPNever; + } + } +} + +static inline void +mi_goto(struct mi_builder *b, struct mi_goto_target *t) +{ + mi_goto_if(b, mi_imm(-1), t); +} + +static inline void +mi_goto_target(struct mi_builder *b, struct mi_goto_target *t) +{ + mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) { + sp.PredicateEnable = NOOPNever; + t->addr = __gen_get_batch_address(b->user_data, _dst); + } + t->placed = true; + + struct GENX(MI_BATCH_BUFFER_START) bbs = { GENX(MI_BATCH_BUFFER_START_header) }; + bbs.AddressSpaceIndicator = ASI_PPGTT; + bbs.BatchBufferStartAddress = t->addr; + + for (unsigned i = 0; i < t->num_gotos; i++) { + bbs.PredicationEnable = t->gotos[i].predicated; + GENX(MI_BATCH_BUFFER_START_pack)(b->user_data, t->gotos[i].mi_bbs, &bbs); + } +} + +static inline struct mi_goto_target +mi_goto_target_init_and_place(struct mi_builder *b) +{ + struct mi_goto_target t = MI_GOTO_TARGET_INIT; + mi_goto_target(b, &t); + return t; +} + +#define mi_loop(b) \ + for (struct mi_goto_target __break = MI_GOTO_TARGET_INIT, \ + __continue = mi_goto_target_init_and_place(b); !__break.placed; \ + mi_goto(b, &__continue), mi_goto_target(b, &__break)) + +#define mi_break(b) mi_goto(b, &__break) +#define mi_break_if(b, cond) mi_goto_if(b, cond, &__break) +#define mi_continue(b) mi_goto(b, &__continue) +#define mi_continue_if(b, cond) mi_goto_if(b, cond, &__continue) + #endif /* GEN_VERSIONx10 >= 125 */ #endif /* MI_BUILDER_H */ diff --git a/src/intel/common/tests/mi_builder_test.cpp b/src/intel/common/tests/mi_builder_test.cpp index 07bb474fef3..d051f2e5c1d 100644 --- a/src/intel/common/tests/mi_builder_test.cpp +++ b/src/intel/common/tests/mi_builder_test.cpp @@ -45,6 +45,8 @@ struct address { uint64_t __gen_combine_address(mi_builder_test *test, void *location, struct address addr, uint32_t delta); void * __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords); +struct address __gen_get_batch_address(mi_builder_test *test, + void *location); struct address __gen_address_offset(address addr, uint64_t offset) @@ -375,6 +377,20 @@ __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords) return test->emit_dwords(num_dwords); } +struct address +__gen_get_batch_address(mi_builder_test *test, void *location) +{ + assert(location >= test->batch_map); + size_t offset = (char *)location - (char *)test->batch_map; + assert(offset < BATCH_BO_SIZE); + assert(offset <= UINT32_MAX); + + return (struct address) { + .gem_handle = test->batch_bo_handle, + .offset = (uint32_t)offset, + }; +} + #include "genxml/genX_pack.h" #include "mi_builder.h" @@ -1000,4 +1016,168 @@ TEST_F(mi_builder_test, store_mem64_offset) for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) EXPECT_EQ(*(uint64_t *)(output + offsets[i]), values[i]); } + +/* + * Control-flow tests. Only available on GFX 12.5+ + */ + +TEST_F(mi_builder_test, goto) +{ + const uint64_t value = 0xb453b411deadc0deull; + + mi_store(&b, out_mem64(0), mi_imm(value)); + + struct mi_goto_target t = MI_GOTO_TARGET_INIT; + mi_goto(&b, &t); + + /* This one should be skipped */ + mi_store(&b, out_mem64(0), mi_imm(0)); + + mi_goto_target(&b, &t); + + submit_batch(); + + EXPECT_EQ(*(uint64_t *)(output + 0), value); +} + +#define MI_PREDICATE_RESULT 0x2418 + +TEST_F(mi_builder_test, goto_if) +{ + const uint64_t values[] = { + 0xb453b411deadc0deull, + 0x0123456789abcdefull, + 0, + }; + + mi_store(&b, out_mem64(0), mi_imm(values[0])); + + emit_cmd(GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOAD; + mip.CombineOperation = COMBINE_SET; + mip.CompareOperation = COMPARE_FALSE; + } + + struct mi_goto_target t = MI_GOTO_TARGET_INIT; + mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t); + + mi_store(&b, out_mem64(0), mi_imm(values[1])); + + emit_cmd(GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOAD; + mip.CombineOperation = COMBINE_SET; + mip.CompareOperation = COMPARE_TRUE; + } + + mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t); + + /* This one should be skipped */ + mi_store(&b, out_mem64(0), mi_imm(values[2])); + + mi_goto_target(&b, &t); + + submit_batch(); + + EXPECT_EQ(*(uint64_t *)(output + 0), values[1]); +} + +TEST_F(mi_builder_test, loop_simple) +{ + const uint64_t loop_count = 8; + + mi_store(&b, out_mem64(0), mi_imm(0)); + + mi_loop(&b) { + mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count))); + + mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1)); + } + + submit_batch(); + + EXPECT_EQ(*(uint64_t *)(output + 0), loop_count); +} + +TEST_F(mi_builder_test, loop_break) +{ + mi_loop(&b) { + mi_store(&b, out_mem64(0), mi_imm(1)); + + mi_break_if(&b, mi_imm(0)); + + mi_store(&b, out_mem64(0), mi_imm(2)); + + mi_break(&b); + + mi_store(&b, out_mem64(0), mi_imm(3)); + } + + submit_batch(); + + EXPECT_EQ(*(uint64_t *)(output + 0), 2); +} + +TEST_F(mi_builder_test, loop_continue) +{ + const uint64_t loop_count = 8; + + mi_store(&b, out_mem64(0), mi_imm(0)); + mi_store(&b, out_mem64(8), mi_imm(0)); + + mi_loop(&b) { + mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count))); + + mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1)); + mi_store(&b, out_mem64(8), mi_imm(5)); + + mi_continue(&b); + + mi_store(&b, out_mem64(8), mi_imm(10)); + } + + submit_batch(); + + EXPECT_EQ(*(uint64_t *)(output + 0), loop_count); + EXPECT_EQ(*(uint64_t *)(output + 8), 5); +} + +TEST_F(mi_builder_test, loop_continue_if) +{ + const uint64_t loop_count = 8; + + mi_store(&b, out_mem64(0), mi_imm(0)); + mi_store(&b, out_mem64(8), mi_imm(0)); + + mi_loop(&b) { + mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count))); + + mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1)); + mi_store(&b, out_mem64(8), mi_imm(5)); + + emit_cmd(GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOAD; + mip.CombineOperation = COMBINE_SET; + mip.CompareOperation = COMPARE_FALSE; + } + + mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT)); + + mi_store(&b, out_mem64(8), mi_imm(10)); + + emit_cmd(GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOAD; + mip.CombineOperation = COMBINE_SET; + mip.CompareOperation = COMPARE_TRUE; + } + + mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT)); + + mi_store(&b, out_mem64(8), mi_imm(15)); + } + + submit_batch(); + + EXPECT_EQ(*(uint64_t *)(output + 0), loop_count); + EXPECT_EQ(*(uint64_t *)(output + 8), 10); +} #endif /* GEN_VERSIONx10 >= 125 */