diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index b5522af93b1..09f3de94a55 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -1094,6 +1094,7 @@ agx_lod_mode_for_nir(nir_texop op) case nir_texop_txd: return AGX_LOD_MODE_LOD_GRAD; case nir_texop_txl: return AGX_LOD_MODE_LOD_MIN; case nir_texop_txf: return AGX_LOD_MODE_LOD_MIN; + case nir_texop_txf_ms: return AGX_LOD_MODE_AUTO_LOD; /* no mipmapping */ default: unreachable("Unhandled texture op"); } } @@ -1108,13 +1109,12 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr) compare = agx_null(), packed_offset = agx_null(); - bool txf = instr->op == nir_texop_txf; + bool txf = (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms); for (unsigned i = 0; i < instr->num_srcs; ++i) { agx_index index = agx_src_index(&instr->src[i].src); switch (instr->src[i].src_type) { - case nir_tex_src_coord: case nir_tex_src_backend1: coords = index; break; @@ -1175,7 +1175,6 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr) /* handled above */ break; - case nir_tex_src_ms_index: case nir_tex_src_texture_offset: case nir_tex_src_sampler_offset: default: @@ -1939,12 +1938,18 @@ agx_preprocess_nir(nir_shader *nir) nir_tex_src_type_constraints tex_constraints = { [nir_tex_src_lod] = { true, 16 }, [nir_tex_src_bias] = { true, 16 }, + [nir_tex_src_ms_index] = { true, 16 }, }; NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options); + NIR_PASS_V(nir, nir_legalize_16bit_sampler_srcs, tex_constraints); + + /* Lower texture sources after legalizing types (as the lowering depends on + * 16-bit multisample indices) but before lowering queries (as the lowering + * generates txs for array textures). + */ NIR_PASS_V(nir, agx_nir_lower_array_texture); NIR_PASS_V(nir, agx_lower_resinfo); - NIR_PASS_V(nir, nir_legalize_16bit_sampler_srcs, tex_constraints); nir->info.io_lowered = true; } diff --git a/src/asahi/compiler/agx_compiler.h b/src/asahi/compiler/agx_compiler.h index 517673c182c..46f76589d6e 100644 --- a/src/asahi/compiler/agx_compiler.h +++ b/src/asahi/compiler/agx_compiler.h @@ -316,7 +316,7 @@ typedef struct { bool invert_cond : 1; /* TODO: Handle tex ops more efficient */ - enum agx_dim dim : 3; + enum agx_dim dim : 4; bool offset : 1; bool shadow : 1; diff --git a/src/asahi/compiler/agx_lower_resinfo.c b/src/asahi/compiler/agx_lower_resinfo.c index 7f33f478299..a37d59b0376 100644 --- a/src/asahi/compiler/agx_lower_resinfo.c +++ b/src/asahi/compiler/agx_lower_resinfo.c @@ -79,7 +79,8 @@ agx_txs(nir_builder *b, nir_tex_instr *tex) /* Add LOD offset to first level to get the interesting LOD */ int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod); if (lod_idx >= 0) - lod = nir_iadd(b, lod, nir_ssa_for_src(b, tex->src[lod_idx].src, 1)); + lod = nir_iadd(b, lod, nir_u2u32(b, nir_ssa_for_src(b, + tex->src[lod_idx].src, 1))); /* Add 1 to width-1, height-1 to get base dimensions */ nir_ssa_def *width = nir_iadd_imm(b, width_m1, 1); diff --git a/src/asahi/compiler/agx_nir_lower_array_texture.c b/src/asahi/compiler/agx_nir_lower_texture.c similarity index 52% rename from src/asahi/compiler/agx_nir_lower_array_texture.c rename to src/asahi/compiler/agx_nir_lower_texture.c index 1621909d8b4..7e068f6f0d6 100644 --- a/src/asahi/compiler/agx_nir_lower_array_texture.c +++ b/src/asahi/compiler/agx_nir_lower_texture.c @@ -28,6 +28,19 @@ #include "compiler/nir/nir_builder.h" #include "compiler/nir/nir_builtin_builder.h" +static nir_ssa_def * +steal_tex_src(nir_tex_instr *tex, nir_tex_src_type type_) +{ + int idx = nir_tex_instr_src_index(tex, type_); + + if (idx < 0) + return NULL; + + nir_ssa_def *ssa = tex->src[idx].src.ssa; + nir_tex_instr_remove_src(tex, idx); + return ssa; +} + /* * NIR indexes into array textures with unclamped floats (integer for txf). AGX * requires the index to be a clamped integer. Lower tex_src_coord into @@ -42,31 +55,54 @@ lower_array_texture(nir_builder *b, nir_instr *instr, UNUSED void *data) nir_tex_instr *tex = nir_instr_as_tex(instr); b->cursor = nir_before_instr(instr); - if (!tex->is_array || nir_tex_instr_is_query(tex)) + if (nir_tex_instr_is_query(tex)) return false; /* Get the coordinates */ - int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord); - nir_ssa_def *coord = tex->src[coord_idx].src.ssa; - unsigned nr = nir_src_num_components(tex->src[coord_idx].src); + nir_ssa_def *coord = steal_tex_src(tex, nir_tex_src_coord); + nir_ssa_def *ms_idx = steal_tex_src(tex, nir_tex_src_ms_index); - /* The layer is always the last component of the NIR coordinate */ - unsigned lidx = nr - 1; - nir_ssa_def *layer = nir_channel(b, coord, lidx); + /* The layer is always the last component of the NIR coordinate, split it off + * because we'll need to swizzle. + */ + nir_ssa_def *layer = NULL; - /* Round layer to nearest even */ - if (tex->op != nir_texop_txf) - layer = nir_f2u32(b, nir_fround_even(b, layer)); + if (tex->is_array) { + unsigned lidx = coord->num_components - 1; + nir_ssa_def *unclamped_layer = nir_channel(b, coord, lidx); + coord = nir_trim_vector(b, coord, lidx); - /* Clamp to max layer = (# of layers - 1) for out-of-bounds handling */ - nir_ssa_def *txs = nir_get_texture_size(b, tex); - nir_ssa_def *nr_layers = nir_channel(b, txs, lidx); - layer = nir_umin(b, layer, nir_iadd_imm(b, nr_layers, -1)); + /* Round layer to nearest even */ + if (tex->op != nir_texop_txf && tex->op != nir_texop_txf_ms) + unclamped_layer = nir_f2u32(b, nir_fround_even(b, unclamped_layer)); - nir_tex_instr_remove_src(tex, coord_idx); - nir_tex_instr_add_src(tex, nir_tex_src_backend1, - nir_src_for_ssa(nir_vector_insert_imm(b, coord, layer, - lidx))); + /* Clamp to max layer = (# of layers - 1) for out-of-bounds handling. + * Layer must be 16-bits for the hardware, drop top bits after clamping. + */ + nir_ssa_def *txs = nir_get_texture_size(b, tex); + nir_ssa_def *nr_layers = nir_channel(b, txs, lidx); + nir_ssa_def *max_layer = nir_iadd_imm(b, nr_layers, -1); + layer = nir_u2u16(b, nir_umin(b, unclamped_layer, max_layer)); + } + + /* Combine layer and multisample index into 32-bit so we don't need a vec5 or + * vec6 16-bit coordinate tuple, which would be inconvenient in NIR for + * little benefit (a minor optimization, I guess). + */ + nir_ssa_def *sample_array = + (ms_idx && layer) ? nir_pack_32_2x16_split(b, ms_idx, layer) : + ms_idx ? nir_u2u32(b, ms_idx) : + layer ? nir_u2u32(b, layer) : + NULL; + + /* Combine into the final 32-bit tuple */ + if (sample_array != NULL) { + unsigned end = coord->num_components; + coord = nir_pad_vector(b, coord, end + 1); + coord = nir_vector_insert_imm(b, coord, sample_array, end); + } + + nir_tex_instr_add_src(tex, nir_tex_src_backend1, nir_src_for_ssa(coord)); return true; } diff --git a/src/asahi/compiler/agx_opcodes.py b/src/asahi/compiler/agx_opcodes.py index 191396d3013..0cdf1c3cd00 100644 --- a/src/asahi/compiler/agx_opcodes.py +++ b/src/asahi/compiler/agx_opcodes.py @@ -103,7 +103,8 @@ DIM = enum("dim", { 4: '2d_ms', 5: '3d', 6: 'cube', - 7: 'cube_array' + 7: 'cube_array', + 8: '2d_ms_array', }) OFFSET = immediate("offset", "bool") diff --git a/src/asahi/compiler/agx_pack.c b/src/asahi/compiler/agx_pack.c index 6df6deea4e3..ca9044b894e 100644 --- a/src/asahi/compiler/agx_pack.c +++ b/src/asahi/compiler/agx_pack.c @@ -618,6 +618,7 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx uint32_t extend = ((U & BITFIELD_MASK(5)) << 0) | (kill << 5) | + ((I->dim >> 3) << 7) | ((R >> 6) << 8) | ((C >> 6) << 10) | ((D >> 6) << 12) | @@ -644,7 +645,7 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx (q2 << 30) | (((uint64_t) (T & BITFIELD_MASK(6))) << 32) | (((uint64_t) Tt) << 38) | - (((uint64_t) I->dim) << 40) | + (((uint64_t) (I->dim & BITFIELD_MASK(3))) << 40) | (((uint64_t) q3) << 43) | (((uint64_t) I->mask) << 48) | (((uint64_t) I->lod_mode) << 52) | diff --git a/src/asahi/compiler/meson.build b/src/asahi/compiler/meson.build index 4ab662147d9..8663508a572 100644 --- a/src/asahi/compiler/meson.build +++ b/src/asahi/compiler/meson.build @@ -23,7 +23,7 @@ libasahi_agx_files = files( 'agx_compile.c', 'agx_dce.c', 'agx_liveness.c', - 'agx_nir_lower_array_texture.c', + 'agx_nir_lower_texture.c', 'agx_nir_lower_load_mask.c', 'agx_nir_opt_preamble.c', 'agx_lower_64bit.c',