nv50/ir/nir: rework indirect function_temp handling

Signed-off-by: Karol Herbst <kherbst@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6338>
This commit is contained in:
Karol Herbst
2020-08-16 19:33:02 +02:00
committed by Marge Bot
parent fd265bb4f1
commit d79c946fb3
@@ -59,6 +59,18 @@ type_size(const struct glsl_type *type, bool bindless)
return glsl_count_attribute_slots(type, false);
}
static void
function_temp_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
{
assert(glsl_type_is_vector_or_scalar(type));
unsigned comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
unsigned length = glsl_get_vector_elements(type);
*size = comp_size * length;
*align = 0x10;
}
class Converter : public ConverterCommon
{
public:
@@ -69,7 +81,6 @@ private:
typedef std::vector<LValue*> LValues;
typedef unordered_map<unsigned, LValues> NirDefMap;
typedef unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;
typedef unordered_map<unsigned, uint32_t> NirArrayLMemOffsets;
typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;
CacheMode convert(enum gl_access_qualifier);
@@ -124,6 +135,8 @@ private:
DataType getDType(nir_intrinsic_instr *, bool isSigned);
DataType getDType(nir_op, uint8_t);
DataFile getFile(nir_intrinsic_op);
std::vector<DataType> getSTypes(nir_alu_instr *);
DataType getSType(nir_src &, bool isFloat, bool isSigned);
@@ -162,7 +175,6 @@ private:
NirDefMap ssaDefs;
NirDefMap regDefs;
ImmediateMap immediates;
NirArrayLMemOffsets regToLmemOffset;
NirBlockMap blocks;
unsigned int curLoopDepth;
unsigned int curIfDepth;
@@ -336,6 +348,28 @@ Converter::getSType(nir_src &src, bool isFloat, bool isSigned)
return ty;
}
DataFile
Converter::getFile(nir_intrinsic_op op)
{
switch (op) {
case nir_intrinsic_load_global:
case nir_intrinsic_store_global:
return FILE_MEMORY_GLOBAL;
case nir_intrinsic_load_scratch:
case nir_intrinsic_store_scratch:
return FILE_MEMORY_LOCAL;
case nir_intrinsic_load_shared:
case nir_intrinsic_store_shared:
return FILE_MEMORY_SHARED;
case nir_intrinsic_load_kernel_input:
return FILE_SHADER_INPUT;
default:
ERROR("couldn't get DateFile for op %s\n", nir_intrinsic_infos[op].name);
assert(false);
}
return FILE_NULL;
}
operation
Converter::getOperation(nir_op op)
{
@@ -702,6 +736,8 @@ Converter::convert(nir_dest *dest)
Converter::LValues&
Converter::convert(nir_register *reg)
{
assert(!reg->num_array_elems);
NirDefMap::iterator it = regDefs.find(reg->index);
if (it != regDefs.end())
return it->second;
@@ -1246,7 +1282,7 @@ Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,
bool
Converter::parseNIR()
{
info->bin.tlsSpace = 0;
info->bin.tlsSpace = nir->scratch_size;
info->io.clipDistances = nir->info.clip_distance_array_size;
info->io.cullDistances = nir->info.cull_distance_array_size;
info->io.layer_viewport_relative = nir->info.layer_viewport_relative;
@@ -1336,16 +1372,6 @@ Converter::visit(nir_function *function)
break;
}
nir_foreach_register(reg, &function->impl->registers) {
if (reg->num_array_elems) {
// TODO: packed variables would be nice, but MemoryOpt fails
// replace 4 with reg->num_components
uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8);
regToLmemOffset[reg->index] = info->bin.tlsSpace;
info->bin.tlsSpace += size;
}
}
nir_index_ssa_defs(function->impl);
foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
if (!visit(node))
@@ -1752,18 +1778,6 @@ Converter::visit(nir_intrinsic_instr *insn)
}
break;
}
case nir_intrinsic_load_kernel_input: {
assert(prog->getType() == Program::TYPE_COMPUTE);
assert(insn->num_components == 1);
LValues &newDefs = convert(&insn->dest);
const DataType dType = getDType(insn);
Value *indirect;
uint32_t idx = getIndirect(insn, 0, 0, indirect, true);
mkLoad(dType, newDefs[0], mkSymbol(FILE_SHADER_INPUT, 0, dType, idx), indirect);
break;
}
case nir_intrinsic_load_barycentric_at_offset:
case nir_intrinsic_load_barycentric_at_sample:
case nir_intrinsic_load_barycentric_centroid:
@@ -2252,6 +2266,7 @@ Converter::visit(nir_intrinsic_instr *insn)
break;
}
case nir_intrinsic_store_scratch:
case nir_intrinsic_store_shared: {
DataType sType = getSType(insn->src[0], false, false);
Value *indirectOffset;
@@ -2260,11 +2275,13 @@ Converter::visit(nir_intrinsic_instr *insn)
for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
if (!((1u << i) & nir_intrinsic_write_mask(insn)))
continue;
Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
Symbol *sym = mkSymbol(getFile(op), 0, sType, offset + i * typeSizeof(sType));
mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));
}
break;
}
case nir_intrinsic_load_kernel_input:
case nir_intrinsic_load_scratch:
case nir_intrinsic_load_shared: {
const DataType dType = getDType(insn);
LValues &newDefs = convert(&insn->dest);
@@ -2272,7 +2289,7 @@ Converter::visit(nir_intrinsic_instr *insn)
uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
for (uint8_t i = 0u; i < dest_components; ++i)
loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
loadFrom(getFile(op), 0, dType, newDefs[i], offset, i, indirectOffset);
break;
}
@@ -2567,55 +2584,7 @@ Converter::visit(nir_alu_instr *insn)
i->sType = sTypes[0];
break;
}
// those are weird ALU ops and need special handling, because
// 1. they are always componend based
// 2. they basically just merge multiple values into one data type
case nir_op_mov:
if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) {
nir_reg_dest& reg = insn->dest.dest.reg;
uint32_t goffset = regToLmemOffset[reg.reg->index];
uint8_t comps = reg.reg->num_components;
uint8_t size = reg.reg->bit_size / 8;
uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
uint32_t aoffset = csize * reg.base_offset;
Value *indirect = NULL;
if (reg.indirect)
indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS),
getSrc(reg.indirect, 0), mkImm(csize));
for (uint8_t i = 0u; i < comps; ++i) {
if (!((1u << i) & insn->dest.write_mask))
continue;
Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size);
mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i));
}
break;
} else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) {
LValues &newDefs = convert(&insn->dest);
nir_reg_src& reg = insn->src[0].src.reg;
uint32_t goffset = regToLmemOffset[reg.reg->index];
// uint8_t comps = reg.reg->num_components;
uint8_t size = reg.reg->bit_size / 8;
uint8_t csize = 4 * size; // TODO after fixing MemoryOpts: comps * size;
uint32_t aoffset = csize * reg.base_offset;
Value *indirect = NULL;
if (reg.indirect)
indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize));
for (uint8_t i = 0u; i < newDefs.size(); ++i)
loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect);
break;
} else {
LValues &newDefs = convert(&insn->dest);
for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {
mkMov(newDefs[c], getSrc(&insn->src[0], c), dType);
}
}
break;
case nir_op_vec2:
case nir_op_vec3:
case nir_op_vec4:
@@ -3135,13 +3104,23 @@ Converter::run()
.ballot_bit_size = 32,
};
/* prepare for IO lowering */
NIR_PASS_V(nir, nir_opt_deref);
NIR_PASS_V(nir, nir_lower_regs_to_ssa);
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
/* codegen assumes vec4 alignment for memory */
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, function_temp_type_info);
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp, nir_address_format_32bit_offset);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
NIR_PASS_V(nir, nir_lower_io,
(nir_variable_mode)(nir_var_shader_in | nir_var_shader_out),
type_size, (nir_lower_io_options)0);
NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
NIR_PASS_V(nir, nir_lower_regs_to_ssa);
NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
NIR_PASS_V(nir, nir_lower_phis_to_scalar);
@@ -3164,8 +3143,6 @@ Converter::run()
} while (progress);
NIR_PASS_V(nir, nir_lower_bool_to_int32);
NIR_PASS_V(nir, nir_lower_locals_to_regs);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
NIR_PASS_V(nir, nir_convert_from_ssa, true);
// Garbage collect dead instructions