intel/brw: Simplify variant tracking in brw_compile_fs

Remove the cfg variables and use the shader pointers directly.  Reset
the variant pointer if a shader failed or will not be used.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33541>
This commit is contained in:
Caio Oliveira
2025-02-13 23:40:19 -08:00
committed by Marge Bot
parent 834e30d244
commit c19a4150b5
+31 -44
View File
@@ -1587,8 +1587,6 @@ brw_compile_fs(const struct brw_compiler *compiler,
unsigned dispatch_width_limit = UINT_MAX;
std::unique_ptr<brw_shader> v8, v16, v32, vmulti;
cfg_t *simd8_cfg = NULL, *simd16_cfg = NULL, *simd32_cfg = NULL,
*multi_cfg = NULL;
float throughput = 0;
bool has_spilled = false;
@@ -1628,8 +1626,6 @@ brw_compile_fs(const struct brw_compiler *compiler,
dispatch_width_limit = MIN2(dispatch_width_limit, v8->max_dispatch_width);
if (INTEL_SIMD(FS, 8)) {
simd8_cfg = v8->cfg;
assert(v8->payload().num_regs % reg_unit(devinfo) == 0);
prog_data->base.dispatch_grf_start_reg = v8->payload().num_regs / reg_unit(devinfo);
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
@@ -1639,12 +1635,14 @@ brw_compile_fs(const struct brw_compiler *compiler,
throughput = MAX2(throughput, perf.throughput);
has_spilled = v8->spilled_any_registers;
allow_spilling = false;
} else {
/* Not using SIMD8. */
v8.reset();
}
}
if (devinfo->ver >= 30) {
unsigned max_dispatch_width = reqd_dispatch_width ? reqd_dispatch_width : 32;
brw_shader *vbase = NULL;
if (max_polygons >= 2 && !key->coarse_pixel) {
if (max_polygons >= 4 && max_dispatch_width >= 32 &&
@@ -1661,14 +1659,13 @@ brw_compile_fs(const struct brw_compiler *compiler,
brw_shader_perf_log(compiler, params->base.log_data,
"Quad-SIMD8 shader failed to compile: %s\n",
vmulti->fail_msg);
vmulti.reset();
} else {
vbase = vmulti.get();
multi_cfg = vmulti->cfg;
assert(!vmulti->spilled_any_registers);
}
}
if (!vbase && max_dispatch_width >= 32 &&
if (!vmulti && max_dispatch_width >= 32 &&
2 * prog_data->num_varying_inputs <= MAX_VARYING &&
INTEL_SIMD(FS, 2X16)) {
/* Try a dual-SIMD16 compile */
@@ -1683,14 +1680,13 @@ brw_compile_fs(const struct brw_compiler *compiler,
brw_shader_perf_log(compiler, params->base.log_data,
"Dual-SIMD16 shader failed to compile: %s\n",
vmulti->fail_msg);
vmulti.reset();
} else {
vbase = vmulti.get();
multi_cfg = vmulti->cfg;
assert(!vmulti->spilled_any_registers);
}
}
if (!vbase && max_dispatch_width >= 16 &&
if (!vmulti && max_dispatch_width >= 16 &&
2 * prog_data->num_varying_inputs <= MAX_VARYING &&
INTEL_SIMD(FS, 2X8)) {
/* Try a dual-SIMD8 compile */
@@ -1705,14 +1701,12 @@ brw_compile_fs(const struct brw_compiler *compiler,
brw_shader_perf_log(compiler, params->base.log_data,
"Dual-SIMD8 shader failed to compile: %s\n",
vmulti->fail_msg);
} else {
vbase = vmulti.get();
multi_cfg = vmulti->cfg;
vmulti.reset();
}
}
}
if ((!vbase || vbase->dispatch_width < 32) &&
if ((!vmulti || vmulti->dispatch_width < 32) &&
max_dispatch_width >= 32 &&
INTEL_SIMD(FS, 32) &&
!prog_data->base.ray_queries) {
@@ -1727,11 +1721,8 @@ brw_compile_fs(const struct brw_compiler *compiler,
brw_shader_perf_log(compiler, params->base.log_data,
"SIMD32 shader failed to compile: %s\n",
v32->fail_msg);
v32.reset();
} else {
if (!vbase)
vbase = v32.get();
simd32_cfg = v32->cfg;
assert(v32->payload().num_regs % reg_unit(devinfo) == 0);
prog_data->dispatch_grf_start_reg_32 = v32->payload().num_regs / reg_unit(devinfo);
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
@@ -1739,7 +1730,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
}
}
if (!vbase && INTEL_SIMD(FS, 16)) {
if (!vmulti && !v32 && INTEL_SIMD(FS, 16)) {
/* Try a SIMD16 compile */
brw_shader_params shader_params = base_shader_params;
shader_params.dispatch_width = 16;
@@ -1750,9 +1741,8 @@ brw_compile_fs(const struct brw_compiler *compiler,
brw_shader_perf_log(compiler, params->base.log_data,
"SIMD16 shader failed to compile: %s\n",
v16->fail_msg);
v16.reset();
} else {
simd16_cfg = v16->cfg;
assert(v16->payload().num_regs % reg_unit(devinfo) == 0);
prog_data->dispatch_grf_start_reg_16 = v16->payload().num_regs / reg_unit(devinfo);
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
@@ -1774,9 +1764,9 @@ brw_compile_fs(const struct brw_compiler *compiler,
brw_shader_perf_log(compiler, params->base.log_data,
"SIMD16 shader failed to compile: %s\n",
v16->fail_msg);
v16.reset();
} else {
dispatch_width_limit = MIN2(dispatch_width_limit, v16->max_dispatch_width);
simd16_cfg = v16->cfg;
assert(v16->payload().num_regs % reg_unit(devinfo) == 0);
prog_data->dispatch_grf_start_reg_16 = v16->payload().num_regs / reg_unit(devinfo);
@@ -1790,7 +1780,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
}
}
const bool simd16_failed = v16 && !simd16_cfg;
const bool simd16_failed = !v16;
/* Currently, the compiler only supports SIMD32 on SNB+ */
if (!has_spilled &&
@@ -1808,15 +1798,15 @@ brw_compile_fs(const struct brw_compiler *compiler,
brw_shader_perf_log(compiler, params->base.log_data,
"SIMD32 shader failed to compile: %s\n",
v32->fail_msg);
v32.reset();
} else {
const brw_performance &perf = v32->performance_analysis.require();
if (!INTEL_DEBUG(DEBUG_DO32) && throughput >= perf.throughput) {
brw_shader_perf_log(compiler, params->base.log_data,
"SIMD32 shader inefficient\n");
v32.reset();
} else {
simd32_cfg = v32->cfg;
assert(v32->payload().num_regs % reg_unit(devinfo) == 0);
prog_data->dispatch_grf_start_reg_32 = v32->payload().num_regs / reg_unit(devinfo);
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
@@ -1830,7 +1820,6 @@ brw_compile_fs(const struct brw_compiler *compiler,
if (devinfo->ver >= 12 && !has_spilled &&
max_polygons >= 2 && !key->coarse_pixel &&
reqd_dispatch_width == SUBGROUP_SIZE_VARYING) {
assert(v8 || v16 || v32);
if (devinfo->ver >= 20 && max_polygons >= 4 &&
dispatch_width_limit >= 32 &&
@@ -1846,13 +1835,13 @@ brw_compile_fs(const struct brw_compiler *compiler,
brw_shader_perf_log(compiler, params->base.log_data,
"Quad-SIMD8 shader failed to compile: %s\n",
vmulti->fail_msg);
vmulti.reset();
} else {
multi_cfg = vmulti->cfg;
assert(!vmulti->spilled_any_registers);
}
}
if (!multi_cfg && devinfo->ver >= 20 &&
if (!vmulti && devinfo->ver >= 20 &&
dispatch_width_limit >= 32 &&
2 * prog_data->num_varying_inputs <= MAX_VARYING &&
INTEL_SIMD(FS, 2X16)) {
@@ -1866,13 +1855,13 @@ brw_compile_fs(const struct brw_compiler *compiler,
brw_shader_perf_log(compiler, params->base.log_data,
"Dual-SIMD16 shader failed to compile: %s\n",
vmulti->fail_msg);
vmulti.reset();
} else {
multi_cfg = vmulti->cfg;
assert(!vmulti->spilled_any_registers);
}
}
if (!multi_cfg && dispatch_width_limit >= 16 &&
if (!vmulti && dispatch_width_limit >= 16 &&
2 * prog_data->num_varying_inputs <= MAX_VARYING &&
INTEL_SIMD(FS, 2X8)) {
/* Try a dual-SIMD8 compile */
@@ -1886,14 +1875,13 @@ brw_compile_fs(const struct brw_compiler *compiler,
brw_shader_perf_log(compiler, params->base.log_data,
"Dual-SIMD8 shader failed to compile: %s\n",
vmulti->fail_msg);
} else {
multi_cfg = vmulti->cfg;
vmulti.reset();
}
}
}
}
if (multi_cfg) {
if (vmulti) {
assert(vmulti->payload().num_regs % reg_unit(devinfo) == 0);
prog_data->base.dispatch_grf_start_reg = vmulti->payload().num_regs / reg_unit(devinfo);
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
@@ -1904,7 +1892,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
* want SIMD16-only.
*/
if (reqd_dispatch_width == SUBGROUP_SIZE_REQUIRE_16)
simd8_cfg = NULL;
v8.reset();
brw_generator g(compiler, &params->base, &prog_data->base,
MESA_SHADER_FRAGMENT);
@@ -1920,36 +1908,35 @@ brw_compile_fs(const struct brw_compiler *compiler,
struct brw_compile_stats *stats = params->base.stats;
uint32_t max_dispatch_width = 0;
if (multi_cfg) {
if (vmulti) {
prog_data->dispatch_multi = vmulti->dispatch_width;
prog_data->max_polygons = vmulti->max_polygons;
g.generate_code(multi_cfg, vmulti->dispatch_width, vmulti->shader_stats,
g.generate_code(vmulti->cfg, vmulti->dispatch_width, vmulti->shader_stats,
vmulti->performance_analysis.require(),
stats, vmulti->max_polygons);
stats = stats ? stats + 1 : NULL;
max_dispatch_width = vmulti->dispatch_width;
} else if (simd8_cfg) {
} else if (v8) {
prog_data->dispatch_8 = true;
g.generate_code(simd8_cfg, 8, v8->shader_stats,
g.generate_code(v8->cfg, 8, v8->shader_stats,
v8->performance_analysis.require(), stats, 1);
stats = stats ? stats + 1 : NULL;
max_dispatch_width = 8;
}
if (simd16_cfg) {
if (v16) {
prog_data->dispatch_16 = true;
prog_data->prog_offset_16 = g.generate_code(
simd16_cfg, 16, v16->shader_stats,
v16->cfg, 16, v16->shader_stats,
v16->performance_analysis.require(), stats, 1);
stats = stats ? stats + 1 : NULL;
max_dispatch_width = 16;
}
if (simd32_cfg) {
if (v32) {
prog_data->dispatch_32 = true;
prog_data->prog_offset_32 = g.generate_code(
simd32_cfg, 32, v32->shader_stats,
v32->cfg, 32, v32->shader_stats,
v32->performance_analysis.require(), stats, 1);
stats = stats ? stats + 1 : NULL;
max_dispatch_width = 32;