asahi: Add texture/image indexing lowering pass
Both textures and images share a unified indexing scheme in AGX. When binding tables are used, they can be mapped to texture state registers. Otherwise, there is bindless access available. It would be nice to map OpenGL's binding table based textures and images to AGX texture state registers 1:1. The problem is that OpenGL allows more combined textures and images than we necessarily have texture state registers. So, we use as many texture state registers as we can, and then we fallback on an internal bindless scheme mapping an extended binding table. Add and use a lowering pass to map all of the API-level texture/image indices to either texture state registers or bindless handles as required. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24258>
This commit is contained in:
committed by
Marge Bot
parent
1ad4a35a6c
commit
274d0d1c82
@@ -0,0 +1,134 @@
|
||||
/*
|
||||
* Copyright 2023 Valve Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "agx_state.h"
|
||||
|
||||
#define AGX_TEXTURE_DESC_STRIDE 24
|
||||
|
||||
/*
|
||||
* Construct a bindless handle corresponding to an index into the binding
|
||||
* tables. Our driver ABI maps everything to a table addressed by u0_u1, with
|
||||
* indices mapped 1:1 with the binding table. So we want the bindless handle
|
||||
* (u0_u1, index) which is encoded in NIR as (0, index).
|
||||
*/
|
||||
static nir_ssa_def *
|
||||
index_to_handle(nir_builder *b, nir_ssa_def *index)
|
||||
{
|
||||
nir_ssa_def *table = nir_imm_int(b, 0);
|
||||
nir_ssa_def *offset = nir_imul_imm(b, index, AGX_TEXTURE_DESC_STRIDE);
|
||||
|
||||
return nir_vec2(b, table, offset);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lower binding table textures and images to texture state registers and (if
|
||||
* necessary) bindless access into an internal table mapped like additional
|
||||
* texture state registers. The following layout is used:
|
||||
*
|
||||
* 1. Textures
|
||||
* 2. Images (read/write interleaved)
|
||||
*/
|
||||
static bool
|
||||
lower(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
bool *internal_bindless = data;
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
if (instr->type == nir_instr_type_intrinsic) {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
nir_intrinsic_op bindless_op;
|
||||
|
||||
#define CASE(op) \
|
||||
case nir_intrinsic_##op: \
|
||||
bindless_op = nir_intrinsic_bindless_##op; \
|
||||
break;
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
CASE(image_load)
|
||||
CASE(image_store)
|
||||
CASE(image_size)
|
||||
CASE(image_atomic)
|
||||
CASE(image_atomic_swap)
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
#undef CASE
|
||||
|
||||
nir_ssa_def *index = intr->src[0].ssa;
|
||||
nir_ssa_scalar index_scalar = nir_ssa_scalar_resolved(index, 0);
|
||||
|
||||
/* Remap according to the driver layout */
|
||||
unsigned offset = BITSET_LAST_BIT(b->shader->info.textures_used);
|
||||
|
||||
/* For reads and image_size, we use the texture descriptor which is first.
|
||||
* Writes and atomics use the PBE descriptor.
|
||||
*/
|
||||
if (intr->intrinsic != nir_intrinsic_image_load &&
|
||||
intr->intrinsic != nir_intrinsic_image_size)
|
||||
offset++;
|
||||
|
||||
/* If we can determine statically that the image fits in texture state
|
||||
* registers, avoid lowering to bindless access.
|
||||
*/
|
||||
if (nir_ssa_scalar_is_const(index_scalar)) {
|
||||
unsigned idx = (nir_ssa_scalar_as_uint(index_scalar) * 2) + offset;
|
||||
|
||||
if (idx < AGX_NUM_TEXTURE_STATE_REGS) {
|
||||
nir_src_rewrite_ssa(&intr->src[0], nir_imm_intN_t(b, idx, 16));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Otherwise, lower to bindless */
|
||||
intr->intrinsic = bindless_op;
|
||||
*internal_bindless = true;
|
||||
|
||||
index = nir_iadd_imm(b, nir_imul_imm(b, index, 2), offset);
|
||||
nir_src_rewrite_ssa(&intr->src[0], index_to_handle(b, index));
|
||||
} else if (instr->type == nir_instr_type_tex) {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
|
||||
/* Nothing to do for "real" bindless */
|
||||
if (nir_tex_instr_src_index(tex, nir_tex_src_texture_handle) >= 0)
|
||||
return false;
|
||||
|
||||
/* Textures are mapped 1:1, so if we can prove it fits in a texture state
|
||||
* register, use the texture state register.
|
||||
*/
|
||||
if (tex->texture_index < AGX_NUM_TEXTURE_STATE_REGS &&
|
||||
nir_tex_instr_src_index(tex, nir_tex_src_texture_offset) == -1)
|
||||
return false;
|
||||
|
||||
/* Otherwise, lower to bindless. Could be optimized. */
|
||||
nir_ssa_def *index = nir_steal_tex_src(tex, nir_tex_src_texture_offset);
|
||||
if (!index)
|
||||
index = nir_imm_int(b, tex->texture_index);
|
||||
|
||||
*internal_bindless = true;
|
||||
nir_tex_instr_add_src(tex, nir_tex_src_texture_handle,
|
||||
nir_src_for_ssa(index_to_handle(b, index)));
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
agx_nir_lower_bindings(nir_shader *shader, bool *internal_bindless)
|
||||
{
|
||||
/* First lower index to offset so we can lower more naturally */
|
||||
bool progress = nir_lower_tex(
|
||||
shader, &(nir_lower_tex_options){.lower_index_to_offset = true});
|
||||
|
||||
/* Next run constant folding so the constant optimizations above have a
|
||||
* chance.
|
||||
*/
|
||||
progress |= nir_opt_constant_folding(shader);
|
||||
|
||||
progress |= nir_shader_instructions_pass(
|
||||
shader, lower, nir_metadata_block_index | nir_metadata_dominance,
|
||||
internal_bindless);
|
||||
return progress;
|
||||
}
|
||||
@@ -275,6 +275,29 @@ lay_out_table(struct agx_compiled_shader *shader, struct table_state *state,
|
||||
return uniform;
|
||||
}
|
||||
|
||||
/* Reserve u0_u1 for the texture base if needed for internal bindless operation.
|
||||
* When we have too many textures/images for the available texture state
|
||||
* registers, an early lowering pass in the driver spills some textures/images
|
||||
* out of texture state registers and instead accesses them as bindless
|
||||
* internally. That pass assumes u0_u1 points to the texture descriptors
|
||||
* otherwise bound to texture state registers.
|
||||
*/
|
||||
static void
|
||||
reserve_internal_bindless(struct state *state)
|
||||
{
|
||||
struct table_state *table = &state->tables[AGX_SYSVAL_TABLE_ROOT];
|
||||
struct agx_draw_uniforms *u = NULL;
|
||||
const unsigned len_words = sizeof(u->texture_base) / sizeof(uint16_t);
|
||||
|
||||
static_assert(offsetof(struct agx_draw_uniforms, texture_base) == 0, "ABI");
|
||||
static_assert(sizeof(u->texture_base) == 8, "64-bit pointer");
|
||||
|
||||
BITSET_SET_RANGE(table->pushed, 0, len_words - 1);
|
||||
|
||||
for (unsigned i = 0; i < len_words; ++i)
|
||||
table->element_size[i] = len_words;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
lay_out_uniforms(struct agx_compiled_shader *shader, struct state *state)
|
||||
{
|
||||
@@ -301,14 +324,14 @@ lay_out_uniforms(struct agx_compiled_shader *shader, struct state *state)
|
||||
}
|
||||
|
||||
bool
|
||||
agx_nir_lower_sysvals(nir_shader *shader, struct agx_compiled_shader *compiled,
|
||||
unsigned *push_size)
|
||||
agx_nir_lower_sysvals(nir_shader *shader, bool internal_bindless,
|
||||
struct agx_compiled_shader *compiled, unsigned *push_size)
|
||||
{
|
||||
bool progress = nir_shader_instructions_pass(
|
||||
shader, lower_sysvals, nir_metadata_block_index | nir_metadata_dominance,
|
||||
NULL);
|
||||
|
||||
if (!progress) {
|
||||
if (!progress && !internal_bindless) {
|
||||
*push_size = 0;
|
||||
return false;
|
||||
}
|
||||
@@ -318,6 +341,9 @@ agx_nir_lower_sysvals(nir_shader *shader, struct agx_compiled_shader *compiled,
|
||||
shader, record_loads, nir_metadata_block_index | nir_metadata_dominance,
|
||||
&state);
|
||||
|
||||
if (internal_bindless)
|
||||
reserve_internal_bindless(&state);
|
||||
|
||||
*push_size = lay_out_uniforms(compiled, &state);
|
||||
|
||||
util_dynarray_fini(&state.load_preambles);
|
||||
|
||||
@@ -1514,7 +1514,7 @@ agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so,
|
||||
base_key.vs.outputs_linear_shaded = key_->vs.outputs_linear_shaded;
|
||||
}
|
||||
|
||||
NIR_PASS_V(nir, agx_nir_lower_sysvals, compiled,
|
||||
NIR_PASS_V(nir, agx_nir_lower_sysvals, so->internal_bindless, compiled,
|
||||
&base_key.reserved_preamble);
|
||||
|
||||
agx_compile_shader_nir(nir, &base_key, debug, &binary, &compiled->info);
|
||||
@@ -1581,6 +1581,11 @@ agx_shader_initialize(struct agx_uncompiled_shader *so, nir_shader *nir)
|
||||
{
|
||||
so->type = pipe_shader_type_from_mesa(nir->info.stage);
|
||||
|
||||
/* We need to lower binding tables before calling agx_preprocess_nir, since
|
||||
* that does texture lowering that needs to know the binding model.
|
||||
*/
|
||||
NIR_PASS_V(nir, agx_nir_lower_bindings, &so->internal_bindless);
|
||||
|
||||
agx_preprocess_nir(nir, true, &so->info);
|
||||
|
||||
blob_init(&so->serialized_nir);
|
||||
|
||||
@@ -34,6 +34,8 @@
|
||||
#define agx_msg(...) fprintf(stderr, __VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#define AGX_NUM_TEXTURE_STATE_REGS 16
|
||||
|
||||
struct agx_streamout_target {
|
||||
struct pipe_stream_output_target base;
|
||||
uint32_t offset;
|
||||
@@ -95,12 +97,15 @@ enum agx_sysval_table {
|
||||
|
||||
/* Root system value table */
|
||||
struct PACKED agx_draw_uniforms {
|
||||
/* Pointer to binding table for texture descriptor, or 0 if none. This must
|
||||
* be first so that u0_u1 is always available for lowering binding
|
||||
* tables to bindless access.
|
||||
*/
|
||||
uint64_t texture_base;
|
||||
|
||||
/* Pointers to the system value tables themselves (for indirection) */
|
||||
uint64_t tables[AGX_NUM_SYSVAL_TABLES];
|
||||
|
||||
/* Pointer to binding table for texture descriptor, or 0 if none */
|
||||
uint64_t texture_base;
|
||||
|
||||
/* Uniform buffer objects */
|
||||
uint64_t ubo_base[PIPE_MAX_CONSTANT_BUFFERS];
|
||||
|
||||
@@ -175,6 +180,12 @@ struct agx_uncompiled_shader {
|
||||
struct hash_table *variants;
|
||||
bool has_xfb_info;
|
||||
|
||||
/* If set, we need to pass the address of the texture/image table as uniform
|
||||
* u0_u1 due to binding tables that were lowered to be internally bindless
|
||||
* with that base address.
|
||||
*/
|
||||
bool internal_bindless;
|
||||
|
||||
/* For compute kernels */
|
||||
unsigned static_shared_mem;
|
||||
|
||||
@@ -659,10 +670,12 @@ agx_transfer(struct pipe_transfer *p)
|
||||
uint64_t agx_upload_uniforms(struct agx_batch *batch, uint64_t textures,
|
||||
enum pipe_shader_type stage);
|
||||
|
||||
bool agx_nir_lower_sysvals(nir_shader *shader,
|
||||
bool agx_nir_lower_sysvals(nir_shader *shader, bool internal_bindless,
|
||||
struct agx_compiled_shader *compiled,
|
||||
unsigned *push_size);
|
||||
|
||||
bool agx_nir_lower_bindings(nir_shader *shader, bool *internal_bindless);
|
||||
|
||||
bool agx_batch_is_active(struct agx_batch *batch);
|
||||
bool agx_batch_is_submitted(struct agx_batch *batch);
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ files_asahi = files(
|
||||
'agx_fence.c',
|
||||
'agx_pipe.c',
|
||||
'agx_nir_lower_sysvals.c',
|
||||
'agx_nir_lower_bindings.c',
|
||||
'agx_query.c',
|
||||
'agx_state.c',
|
||||
'agx_streamout.c',
|
||||
|
||||
Reference in New Issue
Block a user