diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp
index 11bb77720e2..40b0f838d31 100644
--- a/src/amd/compiler/aco_scheduler.cpp
+++ b/src/amd/compiler/aco_scheduler.cpp
@@ -15,9 +15,11 @@
 
 #define SMEM_WINDOW_SIZE    (350 - ctx.num_waves * 35)
 #define VMEM_WINDOW_SIZE    (1024 - ctx.num_waves * 64)
+#define LDS_WINDOW_SIZE     64
 #define POS_EXP_WINDOW_SIZE 512
 #define SMEM_MAX_MOVES      (64 - ctx.num_waves * 4)
 #define VMEM_MAX_MOVES      (256 - ctx.num_waves * 16)
+#define LDSDIR_MAX_MOVES    10
 /* creating clauses decreases def-use distances, so make it less aggressive the lower num_waves is */
 #define VMEM_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 2)
 #define VMEM_STORE_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 4)
@@ -979,6 +981,85 @@ schedule_VMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
    }
 }
 
+void
+schedule_LDS(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& register_demand,
+             Instruction* current, int idx)
+{
+   assert(idx != 0);
+   int window_size = LDS_WINDOW_SIZE;
+   int max_moves = LDSDIR_MAX_MOVES;
+   int16_t k = 0;
+
+   /* first, check if we have instructions before current to move down */
+   hazard_query hq;
+   init_hazard_query(ctx, &hq);
+   add_to_hazard_query(&hq, current);
+
+   DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, false);
+
+   for (int i = 0; k < max_moves && i < window_size; i++) {
+      aco_ptr<Instruction>& candidate = block->instructions[cursor.source_idx];
+      bool is_mem = candidate->isVMEM() || candidate->isFlatLike() || candidate->isSMEM();
+      if (candidate->opcode == aco_opcode::p_logical_start || is_mem)
+         break;
+
+      if (candidate->isDS() || candidate->isLDSDIR()) {
+         add_to_hazard_query(&hq, candidate.get());
+         ctx.mv.downwards_skip(cursor);
+         continue;
+      }
+
+      if (perform_hazard_query(&hq, candidate.get(), false) != hazard_success ||
+          ctx.mv.downwards_move(cursor, false) != move_success)
+         break;
+
+      k++;
+   }
+
+   /* second, check if we have instructions after current to move up */
+   bool found_dependency = false;
+   int i = 0;
+   UpwardsCursor up_cursor = ctx.mv.upwards_init(idx + 1, true);
+   /* find the first instruction depending on current */
+   for (; k < max_moves && i < window_size; i++) {
+      aco_ptr<Instruction>& candidate = block->instructions[up_cursor.source_idx];
+      bool is_mem = candidate->isVMEM() || candidate->isFlatLike() || candidate->isSMEM();
+      if (candidate->opcode == aco_opcode::p_logical_end || is_mem)
+         break;
+
+      /* check if candidate depends on current */
+      if (!ctx.mv.upwards_check_deps(up_cursor)) {
+         init_hazard_query(ctx, &hq);
+         add_to_hazard_query(&hq, candidate.get());
+         ctx.mv.upwards_update_insert_idx(up_cursor);
+         ctx.mv.upwards_skip(up_cursor);
+         found_dependency = true;
+         i++;
+         break;
+      }
+
+      ctx.mv.upwards_skip(up_cursor);
+   }
+
+   for (; found_dependency && k < max_moves && i < window_size; i++) {
+      aco_ptr<Instruction>& candidate = block->instructions[up_cursor.source_idx];
+      bool is_mem = candidate->isVMEM() || candidate->isFlatLike() || candidate->isSMEM();
+      if (candidate->opcode == aco_opcode::p_logical_end || is_mem)
+         break;
+
+      HazardResult haz = perform_hazard_query(&hq, candidate.get(), true);
+      if (haz == hazard_fail_exec || haz == hazard_fail_unreorderable)
+         break;
+
+      if (haz != hazard_success || ctx.mv.upwards_move(up_cursor) != move_success) {
+         add_to_hazard_query(&hq, candidate.get());
+         ctx.mv.upwards_skip(up_cursor);
+      } else {
+         k++;
+      }
+   }
+}
+
 void
 schedule_position_export(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& register_demand,
                          Instruction* current, int idx)
@@ -1096,6 +1177,11 @@ schedule_block(sched_ctx& ctx, Program* program, Block* block, live& live_vars)
          ctx.mv.current = current;
          schedule_SMEM(ctx, block, live_vars.register_demand[block->index], current, idx);
       }
+
+      if (current->isLDSDIR()) {
+         ctx.mv.current = current;
+         schedule_LDS(ctx, block, live_vars.register_demand[block->index], current, idx);
+      }
    }
 
    /* GFX11 benefits from creating VMEM store clauses. */