diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index 11bb77720e2..40b0f838d31 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -15,9 +15,11 @@ #define SMEM_WINDOW_SIZE (350 - ctx.num_waves * 35) #define VMEM_WINDOW_SIZE (1024 - ctx.num_waves * 64) +#define LDS_WINDOW_SIZE 64 #define POS_EXP_WINDOW_SIZE 512 #define SMEM_MAX_MOVES (64 - ctx.num_waves * 4) #define VMEM_MAX_MOVES (256 - ctx.num_waves * 16) +#define LDSDIR_MAX_MOVES 10 /* creating clauses decreases def-use distances, so make it less aggressive the lower num_waves is */ #define VMEM_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 2) #define VMEM_STORE_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 4) @@ -979,6 +981,85 @@ schedule_VMEM(sched_ctx& ctx, Block* block, std::vector& registe } } +void +schedule_LDS(sched_ctx& ctx, Block* block, std::vector& register_demand, + Instruction* current, int idx) +{ + assert(idx != 0); + int window_size = LDS_WINDOW_SIZE; + int max_moves = LDSDIR_MAX_MOVES; + int16_t k = 0; + + /* first, check if we have instructions before current to move down */ + hazard_query hq; + init_hazard_query(ctx, &hq); + add_to_hazard_query(&hq, current); + + DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, false); + + for (int i = 0; k < max_moves && i < window_size; i++) { + aco_ptr& candidate = block->instructions[cursor.source_idx]; + bool is_mem = candidate->isVMEM() || candidate->isFlatLike() || candidate->isSMEM(); + if (candidate->opcode == aco_opcode::p_logical_start || is_mem) + break; + + if (candidate->isDS() || candidate->isLDSDIR()) { + add_to_hazard_query(&hq, candidate.get()); + ctx.mv.downwards_skip(cursor); + continue; + } + + if (perform_hazard_query(&hq, candidate.get(), false) != hazard_success || + ctx.mv.downwards_move(cursor, false) != move_success) + break; + + k++; + } + + /* second, check if we have instructions after current to move up */ + bool found_dependency = false; + int i = 0; + UpwardsCursor up_cursor = ctx.mv.upwards_init(idx + 1, true); + /* find the first instruction depending on current */ + for (; k < max_moves && i < window_size; i++) { + aco_ptr& candidate = block->instructions[up_cursor.source_idx]; + bool is_mem = candidate->isVMEM() || candidate->isFlatLike() || candidate->isSMEM(); + if (candidate->opcode == aco_opcode::p_logical_end || is_mem) + break; + + /* check if candidate depends on current */ + if (!ctx.mv.upwards_check_deps(up_cursor)) { + init_hazard_query(ctx, &hq); + add_to_hazard_query(&hq, candidate.get()); + ctx.mv.upwards_update_insert_idx(up_cursor); + ctx.mv.upwards_skip(up_cursor); + found_dependency = true; + i++; + break; + } + + ctx.mv.upwards_skip(up_cursor); + } + + for (; found_dependency && k < max_moves && i < window_size; i++) { + aco_ptr& candidate = block->instructions[up_cursor.source_idx]; + bool is_mem = candidate->isVMEM() || candidate->isFlatLike() || candidate->isSMEM(); + if (candidate->opcode == aco_opcode::p_logical_end || is_mem) + break; + + HazardResult haz = perform_hazard_query(&hq, candidate.get(), true); + if (haz == hazard_fail_exec || haz == hazard_fail_unreorderable) + break; + + if (haz != hazard_success || ctx.mv.upwards_move(up_cursor) != move_success) { + add_to_hazard_query(&hq, candidate.get()); + ctx.mv.upwards_skip(up_cursor); + } else { + k++; + } + } +} + void schedule_position_export(sched_ctx& ctx, Block* block, std::vector& register_demand, Instruction* current, int idx) @@ -1096,6 +1177,11 @@ schedule_block(sched_ctx& ctx, Program* program, Block* block, live& live_vars) ctx.mv.current = current; schedule_SMEM(ctx, block, live_vars.register_demand[block->index], current, idx); } + + if (current->isLDSDIR()) { + ctx.mv.current = current; + schedule_LDS(ctx, block, live_vars.register_demand[block->index], current, idx); + } } /* GFX11 benefits from creating VMEM store clauses. */