diff --git a/src/gallium/drivers/asahi/agx_nir_lower_bindings.c b/src/gallium/drivers/asahi/agx_nir_lower_bindings.c new file mode 100644 index 00000000000..258f67cd6ef --- /dev/null +++ b/src/gallium/drivers/asahi/agx_nir_lower_bindings.c @@ -0,0 +1,134 @@ +/* + * Copyright 2023 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#include "compiler/nir/nir_builder.h" +#include "agx_state.h" + +#define AGX_TEXTURE_DESC_STRIDE 24 + +/* + * Construct a bindless handle corresponding to an index into the binding + * tables. Our driver ABI maps everything to a table addressed by u0_u1, with + * indices mapped 1:1 with the binding table. So we want the bindless handle + * (u0_u1, index) which is encoded in NIR as (0, index). + */ +static nir_ssa_def * +index_to_handle(nir_builder *b, nir_ssa_def *index) +{ + nir_ssa_def *table = nir_imm_int(b, 0); + nir_ssa_def *offset = nir_imul_imm(b, index, AGX_TEXTURE_DESC_STRIDE); + + return nir_vec2(b, table, offset); +} + +/* + * Lower binding table textures and images to texture state registers and (if + * necessary) bindless access into an internal table mapped like additional + * texture state registers. The following layout is used: + * + * 1. Textures + * 2. Images (read/write interleaved) + */ +static bool +lower(nir_builder *b, nir_instr *instr, void *data) +{ + bool *internal_bindless = data; + b->cursor = nir_before_instr(instr); + + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + nir_intrinsic_op bindless_op; + +#define CASE(op) \ + case nir_intrinsic_##op: \ + bindless_op = nir_intrinsic_bindless_##op; \ + break; + + switch (intr->intrinsic) { + CASE(image_load) + CASE(image_store) + CASE(image_size) + CASE(image_atomic) + CASE(image_atomic_swap) + default: + return false; + } +#undef CASE + + nir_ssa_def *index = intr->src[0].ssa; + nir_ssa_scalar index_scalar = nir_ssa_scalar_resolved(index, 0); + + /* Remap according to the driver layout */ + unsigned offset = BITSET_LAST_BIT(b->shader->info.textures_used); + + /* For reads and image_size, we use the texture descriptor which is first. + * Writes and atomics use the PBE descriptor. + */ + if (intr->intrinsic != nir_intrinsic_image_load && + intr->intrinsic != nir_intrinsic_image_size) + offset++; + + /* If we can determine statically that the image fits in texture state + * registers, avoid lowering to bindless access. + */ + if (nir_ssa_scalar_is_const(index_scalar)) { + unsigned idx = (nir_ssa_scalar_as_uint(index_scalar) * 2) + offset; + + if (idx < AGX_NUM_TEXTURE_STATE_REGS) { + nir_src_rewrite_ssa(&intr->src[0], nir_imm_intN_t(b, idx, 16)); + return true; + } + } + + /* Otherwise, lower to bindless */ + intr->intrinsic = bindless_op; + *internal_bindless = true; + + index = nir_iadd_imm(b, nir_imul_imm(b, index, 2), offset); + nir_src_rewrite_ssa(&intr->src[0], index_to_handle(b, index)); + } else if (instr->type == nir_instr_type_tex) { + nir_tex_instr *tex = nir_instr_as_tex(instr); + + /* Nothing to do for "real" bindless */ + if (nir_tex_instr_src_index(tex, nir_tex_src_texture_handle) >= 0) + return false; + + /* Textures are mapped 1:1, so if we can prove it fits in a texture state + * register, use the texture state register. + */ + if (tex->texture_index < AGX_NUM_TEXTURE_STATE_REGS && + nir_tex_instr_src_index(tex, nir_tex_src_texture_offset) == -1) + return false; + + /* Otherwise, lower to bindless. Could be optimized. */ + nir_ssa_def *index = nir_steal_tex_src(tex, nir_tex_src_texture_offset); + if (!index) + index = nir_imm_int(b, tex->texture_index); + + *internal_bindless = true; + nir_tex_instr_add_src(tex, nir_tex_src_texture_handle, + nir_src_for_ssa(index_to_handle(b, index))); + } + + return false; +} + +bool +agx_nir_lower_bindings(nir_shader *shader, bool *internal_bindless) +{ + /* First lower index to offset so we can lower more naturally */ + bool progress = nir_lower_tex( + shader, &(nir_lower_tex_options){.lower_index_to_offset = true}); + + /* Next run constant folding so the constant optimizations above have a + * chance. + */ + progress |= nir_opt_constant_folding(shader); + + progress |= nir_shader_instructions_pass( + shader, lower, nir_metadata_block_index | nir_metadata_dominance, + internal_bindless); + return progress; +} diff --git a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c index 657701ce165..abde5f74ec7 100644 --- a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c +++ b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c @@ -275,6 +275,29 @@ lay_out_table(struct agx_compiled_shader *shader, struct table_state *state, return uniform; } +/* Reserve u0_u1 for the texture base if needed for internal bindless operation. + * When we have too many textures/images for the available texture state + * registers, an early lowering pass in the driver spills some textures/images + * out of texture state registers and instead accesses them as bindless + * internally. That pass assumes u0_u1 points to the texture descriptors + * otherwise bound to texture state registers. + */ +static void +reserve_internal_bindless(struct state *state) +{ + struct table_state *table = &state->tables[AGX_SYSVAL_TABLE_ROOT]; + struct agx_draw_uniforms *u = NULL; + const unsigned len_words = sizeof(u->texture_base) / sizeof(uint16_t); + + static_assert(offsetof(struct agx_draw_uniforms, texture_base) == 0, "ABI"); + static_assert(sizeof(u->texture_base) == 8, "64-bit pointer"); + + BITSET_SET_RANGE(table->pushed, 0, len_words - 1); + + for (unsigned i = 0; i < len_words; ++i) + table->element_size[i] = len_words; +} + static unsigned lay_out_uniforms(struct agx_compiled_shader *shader, struct state *state) { @@ -301,14 +324,14 @@ lay_out_uniforms(struct agx_compiled_shader *shader, struct state *state) } bool -agx_nir_lower_sysvals(nir_shader *shader, struct agx_compiled_shader *compiled, - unsigned *push_size) +agx_nir_lower_sysvals(nir_shader *shader, bool internal_bindless, + struct agx_compiled_shader *compiled, unsigned *push_size) { bool progress = nir_shader_instructions_pass( shader, lower_sysvals, nir_metadata_block_index | nir_metadata_dominance, NULL); - if (!progress) { + if (!progress && !internal_bindless) { *push_size = 0; return false; } @@ -318,6 +341,9 @@ agx_nir_lower_sysvals(nir_shader *shader, struct agx_compiled_shader *compiled, shader, record_loads, nir_metadata_block_index | nir_metadata_dominance, &state); + if (internal_bindless) + reserve_internal_bindless(&state); + *push_size = lay_out_uniforms(compiled, &state); util_dynarray_fini(&state.load_preambles); diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index da772ca4089..d4b986dbb44 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -1514,7 +1514,7 @@ agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so, base_key.vs.outputs_linear_shaded = key_->vs.outputs_linear_shaded; } - NIR_PASS_V(nir, agx_nir_lower_sysvals, compiled, + NIR_PASS_V(nir, agx_nir_lower_sysvals, so->internal_bindless, compiled, &base_key.reserved_preamble); agx_compile_shader_nir(nir, &base_key, debug, &binary, &compiled->info); @@ -1581,6 +1581,11 @@ agx_shader_initialize(struct agx_uncompiled_shader *so, nir_shader *nir) { so->type = pipe_shader_type_from_mesa(nir->info.stage); + /* We need to lower binding tables before calling agx_preprocess_nir, since + * that does texture lowering that needs to know the binding model. + */ + NIR_PASS_V(nir, agx_nir_lower_bindings, &so->internal_bindless); + agx_preprocess_nir(nir, true, &so->info); blob_init(&so->serialized_nir); diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 909134a505a..9334e129822 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -34,6 +34,8 @@ #define agx_msg(...) fprintf(stderr, __VA_ARGS__) #endif +#define AGX_NUM_TEXTURE_STATE_REGS 16 + struct agx_streamout_target { struct pipe_stream_output_target base; uint32_t offset; @@ -95,12 +97,15 @@ enum agx_sysval_table { /* Root system value table */ struct PACKED agx_draw_uniforms { + /* Pointer to binding table for texture descriptor, or 0 if none. This must + * be first so that u0_u1 is always available for lowering binding + * tables to bindless access. + */ + uint64_t texture_base; + /* Pointers to the system value tables themselves (for indirection) */ uint64_t tables[AGX_NUM_SYSVAL_TABLES]; - /* Pointer to binding table for texture descriptor, or 0 if none */ - uint64_t texture_base; - /* Uniform buffer objects */ uint64_t ubo_base[PIPE_MAX_CONSTANT_BUFFERS]; @@ -175,6 +180,12 @@ struct agx_uncompiled_shader { struct hash_table *variants; bool has_xfb_info; + /* If set, we need to pass the address of the texture/image table as uniform + * u0_u1 due to binding tables that were lowered to be internally bindless + * with that base address. + */ + bool internal_bindless; + /* For compute kernels */ unsigned static_shared_mem; @@ -659,10 +670,12 @@ agx_transfer(struct pipe_transfer *p) uint64_t agx_upload_uniforms(struct agx_batch *batch, uint64_t textures, enum pipe_shader_type stage); -bool agx_nir_lower_sysvals(nir_shader *shader, +bool agx_nir_lower_sysvals(nir_shader *shader, bool internal_bindless, struct agx_compiled_shader *compiled, unsigned *push_size); +bool agx_nir_lower_bindings(nir_shader *shader, bool *internal_bindless); + bool agx_batch_is_active(struct agx_batch *batch); bool agx_batch_is_submitted(struct agx_batch *batch); diff --git a/src/gallium/drivers/asahi/meson.build b/src/gallium/drivers/asahi/meson.build index 6a80e1f803a..9a279e8011f 100644 --- a/src/gallium/drivers/asahi/meson.build +++ b/src/gallium/drivers/asahi/meson.build @@ -8,6 +8,7 @@ files_asahi = files( 'agx_fence.c', 'agx_pipe.c', 'agx_nir_lower_sysvals.c', + 'agx_nir_lower_bindings.c', 'agx_query.c', 'agx_state.c', 'agx_streamout.c',