freedreno/ir3: Clean up instrlen setup.
We were calculating it with the gpu_id check in two places, do it once and use ir3_compiler for the gpu_id dependency. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5990>
This commit is contained in:
+7
-10
@@ -921,6 +921,7 @@ void * ir3_assemble(struct ir3_shader_variant *v)
|
||||
uint32_t *ptr, *dwords;
|
||||
struct ir3_info *info = &v->info;
|
||||
struct ir3 *shader = v->ir;
|
||||
const struct ir3_compiler *compiler = v->shader->compiler;
|
||||
|
||||
memset(info, 0, sizeof(*info));
|
||||
info->data = v;
|
||||
@@ -928,21 +929,17 @@ void * ir3_assemble(struct ir3_shader_variant *v)
|
||||
info->max_half_reg = -1;
|
||||
info->max_const = -1;
|
||||
|
||||
uint32_t instr_count = 0;
|
||||
foreach_block (block, &shader->block_list) {
|
||||
foreach_instr (instr, &block->instr_list) {
|
||||
info->sizedwords += 2;
|
||||
instr_count++;
|
||||
}
|
||||
}
|
||||
|
||||
/* need an integer number of instruction "groups" (sets of 16
|
||||
* instructions on a4xx or sets of 4 instructions on a3xx),
|
||||
* so pad out w/ NOPs if needed: (NOTE each instruction is 64bits)
|
||||
*/
|
||||
if (v->shader->compiler->gpu_id >= 400) {
|
||||
info->sizedwords = align(info->sizedwords, 16 * 2);
|
||||
} else {
|
||||
info->sizedwords = align(info->sizedwords, 4 * 2);
|
||||
}
|
||||
v->instrlen = DIV_ROUND_UP(instr_count, compiler->instr_align);
|
||||
|
||||
/* Pad out with NOPs to instrlen. */
|
||||
info->sizedwords = v->instrlen * compiler->instr_align * sizeof(instr_t) / 4;
|
||||
|
||||
ptr = dwords = rzalloc_size(v, 4 * info->sizedwords);
|
||||
|
||||
|
||||
@@ -115,6 +115,7 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id)
|
||||
compiler->unminify_coords = false;
|
||||
compiler->txf_ms_with_isaml = false;
|
||||
compiler->array_index_add_half = true;
|
||||
compiler->instr_align = 16;
|
||||
compiler->const_upload_unit = 4;
|
||||
} else {
|
||||
/* no special handling for "flat" */
|
||||
@@ -123,6 +124,7 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id)
|
||||
compiler->unminify_coords = true;
|
||||
compiler->txf_ms_with_isaml = true;
|
||||
compiler->array_index_add_half = false;
|
||||
compiler->instr_align = 4;
|
||||
compiler->const_upload_unit = 8;
|
||||
}
|
||||
|
||||
|
||||
@@ -96,6 +96,11 @@ struct ir3_compiler {
|
||||
/* The maximum number of constants, in vec4's, for compute shaders. */
|
||||
uint16_t max_const_compute;
|
||||
|
||||
/* Number of instructions that the shader's base address and length
|
||||
* (instrlen divides instruction count by this) must be aligned to.
|
||||
*/
|
||||
uint32_t instr_align;
|
||||
|
||||
/* on a3xx, the unit of indirect const load is higher than later gens (in
|
||||
* vec4 units):
|
||||
*/
|
||||
|
||||
@@ -124,19 +124,13 @@ fixup_regfootprint(struct ir3_shader_variant *v)
|
||||
*/
|
||||
void * ir3_shader_assemble(struct ir3_shader_variant *v)
|
||||
{
|
||||
unsigned gpu_id = v->shader->compiler->gpu_id;
|
||||
const struct ir3_compiler *compiler = v->shader->compiler;
|
||||
void *bin;
|
||||
|
||||
bin = ir3_assemble(v);
|
||||
if (!bin)
|
||||
return NULL;
|
||||
|
||||
if (gpu_id >= 400) {
|
||||
v->instrlen = v->info.sizedwords / (2 * 16);
|
||||
} else {
|
||||
v->instrlen = v->info.sizedwords / (2 * 4);
|
||||
}
|
||||
|
||||
/* NOTE: if relative addressing is used, we set constlen in
|
||||
* the compiler (to worst-case value) since we don't know in
|
||||
* the assembler what the max addr reg value can be:
|
||||
@@ -147,7 +141,7 @@ void * ir3_shader_assemble(struct ir3_shader_variant *v)
|
||||
* uploads are in units of 4 dwords. Round it up here to make calculations
|
||||
* regarding the shared constlen simpler.
|
||||
*/
|
||||
if (gpu_id >= 400)
|
||||
if (compiler->gpu_id >= 400)
|
||||
v->constlen = align(v->constlen, 4);
|
||||
|
||||
fixup_regfootprint(v);
|
||||
|
||||
Reference in New Issue
Block a user