diff --git a/src/asahi/compiler/README.md b/src/asahi/compiler/README.md
index be35c339c60..074a112bb29 100644
--- a/src/asahi/compiler/README.md
+++ b/src/asahi/compiler/README.md
@@ -38,16 +38,17 @@ For a vertex shader reading $n$ attributes, the following layout is used:
 * The first $n$ 64-bit uniforms are the base addresses of each attribute.
 * The next $n$ 32-bit uniforms are the associated clamps (sizes). Presently
   robustness is always used.
-* The next 32-bit uniform is the base instance. This must always be reserved
-  because it is unknown at vertex shader compile-time whether any attribute will
-  use instancing.
-* For a hardware compute shader, the next 32-bit uniform is the base/first
-  vertex.
+* The next 2x32-bit uniform is the base vertex and base instance. This must
+  always be reserved because it is unknown at vertex shader compile-time whether
+  any attribute will use instancing. Reserving also the base vertex allows us to
+  push both conveniently with a single USC Uniform word.
+* The next 16-bit is the draw ID.
+* For a hardware compute shader, the next 48-bit is padding.
 * For a hardware compute shader, the next 64-bit uniform is a pointer to the
   input assembly buffer.
 
-In total, the first $6n + 2$ 16-bit uniform slots are reserved for a hardware
-vertex shader, or $6n + 8$ for a hardware compute shader.
+In total, the first $6n + 5$ 16-bit uniform slots are reserved for a hardware
+vertex shader, or $6n + 12$ for a hardware compute shader.
 
 ## Fragment
 
diff --git a/src/asahi/lib/agx_nir_prolog_epilog.c b/src/asahi/lib/agx_nir_prolog_epilog.c
index e5b34efa485..8f584f01109 100644
--- a/src/asahi/lib/agx_nir_prolog_epilog.c
+++ b/src/asahi/lib/agx_nir_prolog_epilog.c
@@ -80,12 +80,12 @@ map_vs_part_uniform(nir_intrinsic_instr *intr, unsigned nr_attribs)
       return 4 * nir_src_as_uint(intr->src[0]);
    case nir_intrinsic_load_attrib_clamp_agx:
       return (4 * nr_attribs) + (2 * nir_src_as_uint(intr->src[0]));
-   case nir_intrinsic_load_base_instance:
-      return (6 * nr_attribs);
    case nir_intrinsic_load_first_vertex:
+      return (6 * nr_attribs);
+   case nir_intrinsic_load_base_instance:
       return (6 * nr_attribs) + 2;
    case nir_intrinsic_load_input_assembly_buffer_agx:
-      return (6 * nr_attribs) + 4;
+      return (6 * nr_attribs) + 8;
    default:
       return -1;
    }
diff --git a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c
index 88bd0651050..e1afb663d11 100644
--- a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c
+++ b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c
@@ -395,28 +395,21 @@ lay_out_uniforms(struct agx_compiled_shader *shader, struct state *state)
       shader->push[shader->push_range_count++] = (struct agx_push_range){
          .uniform = 6 * count,
          .table = AGX_SYSVAL_TABLE_PARAMS,
-         .offset = 4,
-         .length = 2,
+         .offset = 0,
+         .length = 4,
       };
 
-      uniform = (6 * count) + 2;
+      uniform = (6 * count) + 4;
 
       if (state->hw_stage == PIPE_SHADER_COMPUTE) {
          shader->push[shader->push_range_count++] = (struct agx_push_range){
-            .uniform = (6 * count) + 2,
-            .table = AGX_SYSVAL_TABLE_PARAMS,
-            .offset = 0,
-            .length = 2,
-         };
-
-         shader->push[shader->push_range_count++] = (struct agx_push_range){
-            .uniform = (6 * count) + 4,
+            .uniform = (6 * count) + 8,
             .table = AGX_SYSVAL_TABLE_ROOT,
             .offset = (uintptr_t)&u->input_assembly,
             .length = 4,
          };
 
-         uniform = (6 * count) + 8;
+         uniform = (6 * count) + 12;
       }
    } else if (state->stage == PIPE_SHADER_FRAGMENT) {
       struct agx_draw_uniforms *u = NULL;