diff --git a/src/intel/compiler/brw_analysis_performance.cpp b/src/intel/compiler/brw_analysis_performance.cpp index 0c3f993004c..d5b73b73519 100644 --- a/src/intel/compiler/brw_analysis_performance.cpp +++ b/src/intel/compiler/brw_analysis_performance.cpp @@ -713,6 +713,7 @@ namespace { case SHADER_OPCODE_UNDEF: case SHADER_OPCODE_HALT_TARGET: case FS_OPCODE_SCHEDULING_FENCE: + case SHADER_OPCODE_FLOW: return calculate_desc(info, EU_UNIT_NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); diff --git a/src/intel/compiler/brw_builder.h b/src/intel/compiler/brw_builder.h index 5417cfdad8e..37faa045743 100644 --- a/src/intel/compiler/brw_builder.h +++ b/src/intel/compiler/brw_builder.h @@ -847,12 +847,22 @@ public: brw_shader *shader; brw_inst *BREAK() const { return emit(BRW_OPCODE_BREAK); } - brw_inst *DO() const { return emit(BRW_OPCODE_DO); } brw_inst *ENDIF() const { return emit(BRW_OPCODE_ENDIF); } brw_inst *NOP() const { return emit(BRW_OPCODE_NOP); } brw_inst *WHILE() const { return emit(BRW_OPCODE_WHILE); } brw_inst *CONTINUE() const { return emit(BRW_OPCODE_CONTINUE); } + void DO() const { + emit(BRW_OPCODE_DO); + /* Ensure that there'll always be a block after DO to add + * instructions and serve as sucessor for predicated WHILE + * and CONTINUE. + * + * See more details in brw_cfg::validate(). + */ + emit(SHADER_OPCODE_FLOW); + } + bool has_writemask_all() const { return force_writemask_all; } diff --git a/src/intel/compiler/brw_cfg.cpp b/src/intel/compiler/brw_cfg.cpp index e88895edf48..e153e978096 100644 --- a/src/intel/compiler/brw_cfg.cpp +++ b/src/intel/compiler/brw_cfg.cpp @@ -215,6 +215,13 @@ cfg_t::cfg_t(const brw_shader *s, exec_list *instructions) : inst->exec_node::remove(); switch (inst->opcode) { + case SHADER_OPCODE_FLOW: + cur->instructions.push_tail(inst); + next = new_block(); + cur->add_successor(mem_ctx, next, bblock_link_logical); + set_next_block(&cur, next, ip); + break; + case BRW_OPCODE_IF: cur->instructions.push_tail(inst); @@ -736,6 +743,8 @@ cfg_t::validate(const char *stage_abbrev) } } + cfgv_assert(!block->instructions.is_empty()); + brw_inst *first_inst = block->start(); if (first_inst->opcode == BRW_OPCODE_DO) { /* DO instructions both begin and end a block, so the DO instruction @@ -763,6 +772,24 @@ cfg_t::validate(const char *stage_abbrev) cfgv_assert(logical_block != nullptr); cfgv_assert(physical_block != nullptr); + + /* A flow block (block ending with SHADER_OPCODE_FLOW) is + * used to ensure that the block right after DO is always + * present even if it doesn't have actual instructions. + * + * This way predicated WHILE and CONTINUE don't need to be + * repaired when adding instructions right after the DO. + * They will point to the flow block whether is empty or not. + */ + cfgv_assert(logical_block->end()->opcode == SHADER_OPCODE_FLOW); + } + + brw_inst *last_inst = block->end(); + if (last_inst->opcode == SHADER_OPCODE_FLOW) { + /* A flow block only has one successor -- the instruction disappears + * when generating code. + */ + cfgv_assert(block->children.length() == 1); } } } diff --git a/src/intel/compiler/brw_cfg.h b/src/intel/compiler/brw_cfg.h index efed9f11564..2f1d9d7c015 100644 --- a/src/intel/compiler/brw_cfg.h +++ b/src/intel/compiler/brw_cfg.h @@ -214,7 +214,8 @@ bblock_ends_with_control_flow(const struct bblock_t *block) op == BRW_OPCODE_ELSE || op == BRW_OPCODE_WHILE || op == BRW_OPCODE_BREAK || - op == BRW_OPCODE_CONTINUE; + op == BRW_OPCODE_CONTINUE || + op == SHADER_OPCODE_FLOW; } static inline brw_inst * diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 9177cc98a94..63bf0bc4181 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -553,6 +553,9 @@ enum opcode { SHADER_OPCODE_MEMORY_LOAD_LOGICAL, SHADER_OPCODE_MEMORY_STORE_LOGICAL, SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL, + + /* Ends a block moving to the next one. See brw_cfg for details. */ + SHADER_OPCODE_FLOW, }; enum fb_write_logical_srcs { diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp index 2a62b9034bc..5fbd0b49106 100644 --- a/src/intel/compiler/brw_from_nir.cpp +++ b/src/intel/compiler/brw_from_nir.cpp @@ -471,6 +471,8 @@ brw_from_nir_emit_loop(nir_to_brw_state &ntb, nir_loop *loop) assert(!nir_loop_has_continue_construct(loop)); bld.DO(); + bld.emit(SHADER_OPCODE_FLOW); + brw_from_nir_emit_cf_list(ntb, &loop->body); brw_inst *peep_while = bld.emit(BRW_OPCODE_WHILE); diff --git a/src/intel/compiler/brw_generator.cpp b/src/intel/compiler/brw_generator.cpp index 115da7e5b28..27fcb03a07b 100644 --- a/src/intel/compiler/brw_generator.cpp +++ b/src/intel/compiler/brw_generator.cpp @@ -1083,6 +1083,10 @@ brw_generator::generate_code(const cfg_t *cfg, int dispatch_width, brw_DO(p, brw_get_default_exec_size(p)); break; + case SHADER_OPCODE_FLOW: + /* Do nothing. */ + break; + case BRW_OPCODE_BREAK: brw_BREAK(p); break; diff --git a/src/intel/compiler/brw_inst.cpp b/src/intel/compiler/brw_inst.cpp index e4ca30e9a78..b5cb9bb7337 100644 --- a/src/intel/compiler/brw_inst.cpp +++ b/src/intel/compiler/brw_inst.cpp @@ -808,6 +808,7 @@ brw_inst::is_control_flow_end() const case BRW_OPCODE_ELSE: case BRW_OPCODE_WHILE: case BRW_OPCODE_ENDIF: + case SHADER_OPCODE_FLOW: return true; default: return false; @@ -825,6 +826,7 @@ brw_inst::is_control_flow() const case BRW_OPCODE_ENDIF: case BRW_OPCODE_BREAK: case BRW_OPCODE_CONTINUE: + case SHADER_OPCODE_FLOW: return true; default: return false; diff --git a/src/intel/compiler/brw_lower_scoreboard.cpp b/src/intel/compiler/brw_lower_scoreboard.cpp index 941583a4740..04edd552906 100644 --- a/src/intel/compiler/brw_lower_scoreboard.cpp +++ b/src/intel/compiler/brw_lower_scoreboard.cpp @@ -185,6 +185,7 @@ namespace { case SHADER_OPCODE_UNDEF: case SHADER_OPCODE_HALT_TARGET: case FS_OPCODE_SCHEDULING_FENCE: + case SHADER_OPCODE_FLOW: return 0; default: /* Note that the following is inaccurate for virtual instructions diff --git a/src/intel/compiler/brw_print.cpp b/src/intel/compiler/brw_print.cpp index f0bf6491875..d19b4fe479e 100644 --- a/src/intel/compiler/brw_print.cpp +++ b/src/intel/compiler/brw_print.cpp @@ -294,6 +294,9 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) return "read_from_live_channel"; case SHADER_OPCODE_READ_FROM_CHANNEL: return "read_from_channel"; + + case SHADER_OPCODE_FLOW: + return "flow"; } unreachable("not reached"); diff --git a/src/intel/compiler/test_lower_scoreboard.cpp b/src/intel/compiler/test_lower_scoreboard.cpp index e60b8650fbb..288e7d27ef6 100644 --- a/src/intel/compiler/test_lower_scoreboard.cpp +++ b/src/intel/compiler/test_lower_scoreboard.cpp @@ -515,12 +515,14 @@ TEST_F(scoreboard_test, loop1) brw_calculate_cfg(*v); lower_scoreboard(v); - bblock_t *body = v->cfg->blocks[2]; + const int num_blocks = v->cfg->num_blocks; + + bblock_t *body = v->cfg->blocks[num_blocks - 2]; brw_inst *add = instruction(body, 0); EXPECT_EQ(add->opcode, BRW_OPCODE_ADD); EXPECT_EQ(add->sched, regdist(TGL_PIPE_FLOAT, 1)); - bblock_t *last_block = v->cfg->blocks[3]; + bblock_t *last_block = v->cfg->blocks[num_blocks - 1]; brw_inst *mul = instruction(last_block, 0); EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL); EXPECT_EQ(mul->sched, regdist(TGL_PIPE_FLOAT, 1)); @@ -550,12 +552,14 @@ TEST_F(scoreboard_test, loop2) /* Now the write in ADD has the tightest RegDist for both ADD and MUL. */ - bblock_t *body = v->cfg->blocks[2]; + const int num_blocks = v->cfg->num_blocks; + + bblock_t *body = v->cfg->blocks[num_blocks - 2]; brw_inst *add = instruction(body, 0); EXPECT_EQ(add->opcode, BRW_OPCODE_ADD); EXPECT_EQ(add->sched, regdist(TGL_PIPE_FLOAT, 2)); - bblock_t *last_block = v->cfg->blocks[3]; + bblock_t *last_block = v->cfg->blocks[num_blocks - 1]; brw_inst *mul = instruction(last_block, 0); EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL); EXPECT_EQ(mul->sched, regdist(TGL_PIPE_FLOAT, 2)); @@ -586,12 +590,14 @@ TEST_F(scoreboard_test, loop3) brw_calculate_cfg(*v); lower_scoreboard(v); - bblock_t *body = v->cfg->blocks[2]; + const int num_blocks = v->cfg->num_blocks; + + bblock_t *body = v->cfg->blocks[num_blocks - 2]; brw_inst *add = instruction(body, 4); EXPECT_EQ(add->opcode, BRW_OPCODE_ADD); EXPECT_EQ(add->sched, regdist(TGL_PIPE_FLOAT, 5)); - bblock_t *last_block = v->cfg->blocks[3]; + bblock_t *last_block = v->cfg->blocks[num_blocks - 1]; brw_inst *mul = instruction(last_block, 0); EXPECT_EQ(mul->opcode, BRW_OPCODE_MUL); EXPECT_EQ(mul->sched, regdist(TGL_PIPE_FLOAT, 1));