intel/brw: Remove Gfx8- passes from optimize()
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26887>
This commit is contained in:
@@ -3167,200 +3167,6 @@ fs_visitor::opt_redundant_halt()
|
||||
return progress;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a bitmask with GRF granularity with a bit set for each GRF starting
|
||||
* from \p r.offset which overlaps the region starting at \p s.offset and
|
||||
* spanning \p ds bytes.
|
||||
*/
|
||||
static inline unsigned
|
||||
mask_relative_to(const fs_reg &r, const fs_reg &s, unsigned ds)
|
||||
{
|
||||
const int rel_offset = reg_offset(s) - reg_offset(r);
|
||||
const int shift = rel_offset / REG_SIZE;
|
||||
const unsigned n = DIV_ROUND_UP(rel_offset % REG_SIZE + ds, REG_SIZE);
|
||||
assert(reg_space(r) == reg_space(s) &&
|
||||
shift >= 0 && shift < int(8 * sizeof(unsigned)));
|
||||
return ((1 << n) - 1) << shift;
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::compute_to_mrf()
|
||||
{
|
||||
bool progress = false;
|
||||
int next_ip = 0;
|
||||
|
||||
/* No MRFs on Gen >= 7. */
|
||||
if (devinfo->ver >= 7)
|
||||
return false;
|
||||
|
||||
const fs_live_variables &live = live_analysis.require();
|
||||
|
||||
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
|
||||
int ip = next_ip;
|
||||
next_ip++;
|
||||
|
||||
if (inst->opcode != BRW_OPCODE_MOV ||
|
||||
inst->is_partial_write() ||
|
||||
inst->dst.file != MRF || inst->src[0].file != VGRF ||
|
||||
inst->dst.type != inst->src[0].type ||
|
||||
inst->src[0].abs || inst->src[0].negate ||
|
||||
!inst->src[0].is_contiguous() ||
|
||||
inst->src[0].offset % REG_SIZE != 0)
|
||||
continue;
|
||||
|
||||
/* Can't compute-to-MRF this GRF if someone else was going to
|
||||
* read it later.
|
||||
*/
|
||||
if (live.vgrf_end[inst->src[0].nr] > ip)
|
||||
continue;
|
||||
|
||||
/* Found a move of a GRF to a MRF. Let's see if we can go rewrite the
|
||||
* things that computed the value of all GRFs of the source region. The
|
||||
* regs_left bitset keeps track of the registers we haven't yet found a
|
||||
* generating instruction for.
|
||||
*/
|
||||
unsigned regs_left = (1 << regs_read(inst, 0)) - 1;
|
||||
|
||||
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
|
||||
if (regions_overlap(scan_inst->dst, scan_inst->size_written,
|
||||
inst->src[0], inst->size_read(0))) {
|
||||
/* Found the last thing to write our reg we want to turn
|
||||
* into a compute-to-MRF.
|
||||
*/
|
||||
|
||||
/* If this one instruction didn't populate all the
|
||||
* channels, bail. We might be able to rewrite everything
|
||||
* that writes that reg, but it would require smarter
|
||||
* tracking.
|
||||
*/
|
||||
if (scan_inst->is_partial_write())
|
||||
break;
|
||||
|
||||
/* Handling things not fully contained in the source of the copy
|
||||
* would need us to understand coalescing out more than one MOV at
|
||||
* a time.
|
||||
*/
|
||||
if (!region_contained_in(scan_inst->dst, scan_inst->size_written,
|
||||
inst->src[0], inst->size_read(0)))
|
||||
break;
|
||||
|
||||
/* SEND instructions can't have MRF as a destination. */
|
||||
if (scan_inst->mlen)
|
||||
break;
|
||||
|
||||
if (devinfo->ver == 6) {
|
||||
/* gfx6 math instructions must have the destination be
|
||||
* GRF, so no compute-to-MRF for them.
|
||||
*/
|
||||
if (scan_inst->is_math()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Clear the bits for any registers this instruction overwrites. */
|
||||
regs_left &= ~mask_relative_to(
|
||||
inst->src[0], scan_inst->dst, scan_inst->size_written);
|
||||
if (!regs_left)
|
||||
break;
|
||||
}
|
||||
|
||||
/* We don't handle control flow here. Most computation of
|
||||
* values that end up in MRFs are shortly before the MRF
|
||||
* write anyway.
|
||||
*/
|
||||
if (block->start() == scan_inst)
|
||||
break;
|
||||
|
||||
/* You can't read from an MRF, so if someone else reads our
|
||||
* MRF's source GRF that we wanted to rewrite, that stops us.
|
||||
*/
|
||||
bool interfered = false;
|
||||
for (int i = 0; i < scan_inst->sources; i++) {
|
||||
if (regions_overlap(scan_inst->src[i], scan_inst->size_read(i),
|
||||
inst->src[0], inst->size_read(0))) {
|
||||
interfered = true;
|
||||
}
|
||||
}
|
||||
if (interfered)
|
||||
break;
|
||||
|
||||
if (regions_overlap(scan_inst->dst, scan_inst->size_written,
|
||||
inst->dst, inst->size_written)) {
|
||||
/* If somebody else writes our MRF here, we can't
|
||||
* compute-to-MRF before that.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
|
||||
if (scan_inst->mlen > 0 && scan_inst->base_mrf != -1 &&
|
||||
regions_overlap(fs_reg(MRF, scan_inst->base_mrf), scan_inst->mlen * REG_SIZE,
|
||||
inst->dst, inst->size_written)) {
|
||||
/* Found a SEND instruction, which means that there are
|
||||
* live values in MRFs from base_mrf to base_mrf +
|
||||
* scan_inst->mlen - 1. Don't go pushing our MRF write up
|
||||
* above it.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (regs_left)
|
||||
continue;
|
||||
|
||||
/* Found all generating instructions of our MRF's source value, so it
|
||||
* should be safe to rewrite them to point to the MRF directly.
|
||||
*/
|
||||
regs_left = (1 << regs_read(inst, 0)) - 1;
|
||||
|
||||
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
|
||||
if (regions_overlap(scan_inst->dst, scan_inst->size_written,
|
||||
inst->src[0], inst->size_read(0))) {
|
||||
/* Clear the bits for any registers this instruction overwrites. */
|
||||
regs_left &= ~mask_relative_to(
|
||||
inst->src[0], scan_inst->dst, scan_inst->size_written);
|
||||
|
||||
const unsigned rel_offset = reg_offset(scan_inst->dst) -
|
||||
reg_offset(inst->src[0]);
|
||||
|
||||
if (inst->dst.nr & BRW_MRF_COMPR4) {
|
||||
/* Apply the same address transformation done by the hardware
|
||||
* for COMPR4 MRF writes.
|
||||
*/
|
||||
assert(rel_offset < 2 * REG_SIZE);
|
||||
scan_inst->dst.nr = inst->dst.nr + rel_offset / REG_SIZE * 4;
|
||||
|
||||
/* Clear the COMPR4 bit if the generating instruction is not
|
||||
* compressed.
|
||||
*/
|
||||
if (scan_inst->size_written < 2 * REG_SIZE)
|
||||
scan_inst->dst.nr &= ~BRW_MRF_COMPR4;
|
||||
|
||||
} else {
|
||||
/* Calculate the MRF number the result of this instruction is
|
||||
* ultimately written to.
|
||||
*/
|
||||
scan_inst->dst.nr = inst->dst.nr + rel_offset / REG_SIZE;
|
||||
}
|
||||
|
||||
scan_inst->dst.file = MRF;
|
||||
scan_inst->dst.offset = inst->dst.offset + rel_offset % REG_SIZE;
|
||||
scan_inst->saturate |= inst->saturate;
|
||||
if (!regs_left)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert(!regs_left);
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if (progress)
|
||||
invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
/**
|
||||
* Eliminate FIND_LIVE_CHANNEL instructions occurring outside any control
|
||||
* flow. We could probably do better here with some form of divergence
|
||||
@@ -3494,81 +3300,6 @@ fs_visitor::emit_repclear_shader()
|
||||
lower_scoreboard();
|
||||
}
|
||||
|
||||
/**
|
||||
* Walks through basic blocks, looking for repeated MRF writes and
|
||||
* removing the later ones.
|
||||
*/
|
||||
bool
|
||||
fs_visitor::remove_duplicate_mrf_writes()
|
||||
{
|
||||
fs_inst *last_mrf_move[BRW_MAX_MRF(devinfo->ver)];
|
||||
bool progress = false;
|
||||
|
||||
/* Need to update the MRF tracking for compressed instructions. */
|
||||
if (dispatch_width >= 16)
|
||||
return false;
|
||||
|
||||
memset(last_mrf_move, 0, sizeof(last_mrf_move));
|
||||
|
||||
foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
|
||||
if (inst->is_control_flow()) {
|
||||
memset(last_mrf_move, 0, sizeof(last_mrf_move));
|
||||
}
|
||||
|
||||
if (inst->opcode == BRW_OPCODE_MOV &&
|
||||
inst->dst.file == MRF) {
|
||||
fs_inst *prev_inst = last_mrf_move[inst->dst.nr];
|
||||
if (prev_inst && prev_inst->opcode == BRW_OPCODE_MOV &&
|
||||
inst->dst.equals(prev_inst->dst) &&
|
||||
inst->src[0].equals(prev_inst->src[0]) &&
|
||||
inst->saturate == prev_inst->saturate &&
|
||||
inst->predicate == prev_inst->predicate &&
|
||||
inst->conditional_mod == prev_inst->conditional_mod &&
|
||||
inst->exec_size == prev_inst->exec_size) {
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* Clear out the last-write records for MRFs that were overwritten. */
|
||||
if (inst->dst.file == MRF) {
|
||||
last_mrf_move[inst->dst.nr] = NULL;
|
||||
}
|
||||
|
||||
if (inst->mlen > 0 && inst->base_mrf != -1) {
|
||||
/* Found a SEND instruction, which will include two or fewer
|
||||
* implied MRF writes. We could do better here.
|
||||
*/
|
||||
for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
|
||||
last_mrf_move[inst->base_mrf + i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Clear out any MRF move records whose sources got overwritten. */
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(last_mrf_move); i++) {
|
||||
if (last_mrf_move[i] &&
|
||||
regions_overlap(inst->dst, inst->size_written,
|
||||
last_mrf_move[i]->src[0],
|
||||
last_mrf_move[i]->size_read(0))) {
|
||||
last_mrf_move[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->opcode == BRW_OPCODE_MOV &&
|
||||
inst->dst.file == MRF &&
|
||||
inst->src[0].file != ARF &&
|
||||
!inst->is_partial_write()) {
|
||||
last_mrf_move[inst->dst.nr] = inst;
|
||||
}
|
||||
}
|
||||
|
||||
if (progress)
|
||||
invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
/**
|
||||
* Rounding modes for conversion instructions are included for each
|
||||
* conversion, but right now it is a state. So once it is set,
|
||||
@@ -3618,185 +3349,6 @@ fs_visitor::remove_extra_rounding_modes()
|
||||
return progress;
|
||||
}
|
||||
|
||||
static void
|
||||
clear_deps_for_inst_src(fs_inst *inst, bool *deps, int first_grf, int grf_len)
|
||||
{
|
||||
/* Clear the flag for registers that actually got read (as expected). */
|
||||
for (int i = 0; i < inst->sources; i++) {
|
||||
int grf;
|
||||
if (inst->src[i].file == VGRF || inst->src[i].file == FIXED_GRF) {
|
||||
grf = inst->src[i].nr;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (grf >= first_grf &&
|
||||
grf < first_grf + grf_len) {
|
||||
deps[grf - first_grf] = false;
|
||||
if (inst->exec_size == 16)
|
||||
deps[grf - first_grf + 1] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements this workaround for the original 965:
|
||||
*
|
||||
* "[DevBW, DevCL] Implementation Restrictions: As the hardware does not
|
||||
* check for post destination dependencies on this instruction, software
|
||||
* must ensure that there is no destination hazard for the case of ‘write
|
||||
* followed by a posted write’ shown in the following example.
|
||||
*
|
||||
* 1. mov r3 0
|
||||
* 2. send r3.xy <rest of send instruction>
|
||||
* 3. mov r2 r3
|
||||
*
|
||||
* Due to no post-destination dependency check on the ‘send’, the above
|
||||
* code sequence could have two instructions (1 and 2) in flight at the
|
||||
* same time that both consider ‘r3’ as the target of their final writes.
|
||||
*/
|
||||
void
|
||||
fs_visitor::insert_gfx4_pre_send_dependency_workarounds(bblock_t *block,
|
||||
fs_inst *inst)
|
||||
{
|
||||
int write_len = regs_written(inst);
|
||||
int first_write_grf = inst->dst.nr;
|
||||
bool needs_dep[BRW_MAX_MRF(devinfo->ver)];
|
||||
assert(write_len < (int)sizeof(needs_dep) - 1);
|
||||
|
||||
memset(needs_dep, false, sizeof(needs_dep));
|
||||
memset(needs_dep, true, write_len);
|
||||
|
||||
clear_deps_for_inst_src(inst, needs_dep, first_write_grf, write_len);
|
||||
|
||||
/* Walk backwards looking for writes to registers we're writing which
|
||||
* aren't read since being written. If we hit the start of the program,
|
||||
* we assume that there are no outstanding dependencies on entry to the
|
||||
* program.
|
||||
*/
|
||||
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
|
||||
/* If we hit control flow, assume that there *are* outstanding
|
||||
* dependencies, and force their cleanup before our instruction.
|
||||
*/
|
||||
if (block->start() == scan_inst && block->num != 0) {
|
||||
for (int i = 0; i < write_len; i++) {
|
||||
if (needs_dep[i])
|
||||
DEP_RESOLVE_MOV(fs_builder(this, block, inst),
|
||||
first_write_grf + i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* We insert our reads as late as possible on the assumption that any
|
||||
* instruction but a MOV that might have left us an outstanding
|
||||
* dependency has more latency than a MOV.
|
||||
*/
|
||||
if (scan_inst->dst.file == VGRF) {
|
||||
for (unsigned i = 0; i < regs_written(scan_inst); i++) {
|
||||
int reg = scan_inst->dst.nr + i;
|
||||
|
||||
if (reg >= first_write_grf &&
|
||||
reg < first_write_grf + write_len &&
|
||||
needs_dep[reg - first_write_grf]) {
|
||||
DEP_RESOLVE_MOV(fs_builder(this, block, inst), reg);
|
||||
needs_dep[reg - first_write_grf] = false;
|
||||
if (scan_inst->exec_size == 16)
|
||||
needs_dep[reg - first_write_grf + 1] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Clear the flag for registers that actually got read (as expected). */
|
||||
clear_deps_for_inst_src(scan_inst, needs_dep, first_write_grf, write_len);
|
||||
|
||||
/* Continue the loop only if we haven't resolved all the dependencies */
|
||||
int i;
|
||||
for (i = 0; i < write_len; i++) {
|
||||
if (needs_dep[i])
|
||||
break;
|
||||
}
|
||||
if (i == write_len)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements this workaround for the original 965:
|
||||
*
|
||||
* "[DevBW, DevCL] Errata: A destination register from a send can not be
|
||||
* used as a destination register until after it has been sourced by an
|
||||
* instruction with a different destination register.
|
||||
*/
|
||||
void
|
||||
fs_visitor::insert_gfx4_post_send_dependency_workarounds(bblock_t *block, fs_inst *inst)
|
||||
{
|
||||
int write_len = regs_written(inst);
|
||||
unsigned first_write_grf = inst->dst.nr;
|
||||
bool needs_dep[BRW_MAX_MRF(devinfo->ver)];
|
||||
assert(write_len < (int)sizeof(needs_dep) - 1);
|
||||
|
||||
memset(needs_dep, false, sizeof(needs_dep));
|
||||
memset(needs_dep, true, write_len);
|
||||
/* Walk forwards looking for writes to registers we're writing which aren't
|
||||
* read before being written.
|
||||
*/
|
||||
foreach_inst_in_block_starting_from(fs_inst, scan_inst, inst) {
|
||||
/* If we hit control flow, force resolve all remaining dependencies. */
|
||||
if (block->end() == scan_inst && block->num != cfg->num_blocks - 1) {
|
||||
for (int i = 0; i < write_len; i++) {
|
||||
if (needs_dep[i])
|
||||
DEP_RESOLVE_MOV(fs_builder(this, block, scan_inst),
|
||||
first_write_grf + i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Clear the flag for registers that actually got read (as expected). */
|
||||
clear_deps_for_inst_src(scan_inst, needs_dep, first_write_grf, write_len);
|
||||
|
||||
/* We insert our reads as late as possible since they're reading the
|
||||
* result of a SEND, which has massive latency.
|
||||
*/
|
||||
if (scan_inst->dst.file == VGRF &&
|
||||
scan_inst->dst.nr >= first_write_grf &&
|
||||
scan_inst->dst.nr < first_write_grf + write_len &&
|
||||
needs_dep[scan_inst->dst.nr - first_write_grf]) {
|
||||
DEP_RESOLVE_MOV(fs_builder(this, block, scan_inst),
|
||||
scan_inst->dst.nr);
|
||||
needs_dep[scan_inst->dst.nr - first_write_grf] = false;
|
||||
}
|
||||
|
||||
/* Continue the loop only if we haven't resolved all the dependencies */
|
||||
int i;
|
||||
for (i = 0; i < write_len; i++) {
|
||||
if (needs_dep[i])
|
||||
break;
|
||||
}
|
||||
if (i == write_len)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::insert_gfx4_send_dependency_workarounds()
|
||||
{
|
||||
if (devinfo->ver != 4 || devinfo->platform == INTEL_PLATFORM_G4X)
|
||||
return;
|
||||
|
||||
bool progress = false;
|
||||
|
||||
foreach_block_and_inst(block, fs_inst, inst, cfg) {
|
||||
if (inst->mlen != 0 && inst->dst.file == VGRF) {
|
||||
insert_gfx4_pre_send_dependency_workarounds(block, inst);
|
||||
insert_gfx4_post_send_dependency_workarounds(block, inst);
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (progress)
|
||||
invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::lower_load_payload()
|
||||
{
|
||||
@@ -4396,44 +3948,6 @@ fs_visitor::lower_integer_multiplication()
|
||||
return progress;
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::lower_minmax()
|
||||
{
|
||||
assert(devinfo->ver < 6);
|
||||
|
||||
bool progress = false;
|
||||
|
||||
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
|
||||
const fs_builder ibld(this, block, inst);
|
||||
|
||||
if (inst->opcode == BRW_OPCODE_SEL &&
|
||||
inst->predicate == BRW_PREDICATE_NONE) {
|
||||
/* If src1 is an immediate value that is not NaN, then it can't be
|
||||
* NaN. In that case, emit CMP because it is much better for cmod
|
||||
* propagation. Likewise if src1 is not float. Gfx4 and Gfx5 don't
|
||||
* support HF or DF, so it is not necessary to check for those.
|
||||
*/
|
||||
if (inst->src[1].type != BRW_REGISTER_TYPE_F ||
|
||||
(inst->src[1].file == IMM && !isnan(inst->src[1].f))) {
|
||||
ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1],
|
||||
inst->conditional_mod);
|
||||
} else {
|
||||
ibld.CMPN(ibld.null_reg_d(), inst->src[0], inst->src[1],
|
||||
inst->conditional_mod);
|
||||
}
|
||||
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||
inst->conditional_mod = BRW_CONDITIONAL_NONE;
|
||||
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (progress)
|
||||
invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::lower_sub_sat()
|
||||
{
|
||||
@@ -6163,8 +5677,6 @@ fs_visitor::optimize()
|
||||
pass_num = 0;
|
||||
iteration++;
|
||||
|
||||
OPT(remove_duplicate_mrf_writes);
|
||||
|
||||
OPT(opt_algebraic);
|
||||
OPT(opt_cse);
|
||||
OPT(opt_copy_propagation);
|
||||
@@ -6175,7 +5687,6 @@ fs_visitor::optimize()
|
||||
OPT(dead_control_flow_eliminate, this);
|
||||
OPT(opt_saturate_propagation);
|
||||
OPT(register_coalesce);
|
||||
OPT(compute_to_mrf);
|
||||
OPT(eliminate_find_live_channel);
|
||||
|
||||
OPT(compact_virtual_grfs);
|
||||
@@ -6201,10 +5712,8 @@ fs_visitor::optimize()
|
||||
/* Identify trailing zeros LOAD_PAYLOAD of sampler messages.
|
||||
* Do this before splitting SENDs.
|
||||
*/
|
||||
if (devinfo->ver >= 7) {
|
||||
if (OPT(opt_zero_samples) && OPT(opt_copy_propagation))
|
||||
OPT(opt_algebraic);
|
||||
}
|
||||
if (OPT(opt_zero_samples) && OPT(opt_copy_propagation))
|
||||
OPT(opt_algebraic);
|
||||
|
||||
OPT(opt_split_sends);
|
||||
OPT(fixup_nomask_control_flow);
|
||||
@@ -6220,9 +5729,7 @@ fs_visitor::optimize()
|
||||
*/
|
||||
OPT(opt_cse);
|
||||
OPT(register_coalesce);
|
||||
OPT(compute_to_mrf);
|
||||
OPT(dead_code_eliminate);
|
||||
OPT(remove_duplicate_mrf_writes);
|
||||
OPT(opt_peephole_sel);
|
||||
}
|
||||
|
||||
@@ -6237,7 +5744,6 @@ fs_visitor::optimize()
|
||||
|
||||
OPT(register_coalesce);
|
||||
OPT(lower_simd_width);
|
||||
OPT(compute_to_mrf);
|
||||
OPT(dead_code_eliminate);
|
||||
}
|
||||
|
||||
@@ -6251,14 +5757,6 @@ fs_visitor::optimize()
|
||||
}
|
||||
OPT(lower_sub_sat);
|
||||
|
||||
if (devinfo->ver <= 5 && OPT(lower_minmax)) {
|
||||
OPT(opt_cmod_propagation);
|
||||
OPT(opt_cse);
|
||||
if (OPT(opt_copy_propagation))
|
||||
OPT(opt_algebraic);
|
||||
OPT(dead_code_eliminate);
|
||||
}
|
||||
|
||||
progress = false;
|
||||
OPT(lower_derivatives);
|
||||
OPT(lower_regioning);
|
||||
@@ -6770,12 +6268,6 @@ fs_visitor::allocate_registers(bool allow_spilling)
|
||||
_mesa_shader_stage_to_string(stage));
|
||||
}
|
||||
|
||||
/* This must come after all optimization and register allocation, since
|
||||
* it inserts dead code that happens to have side effects, and it does
|
||||
* so based on the actual physical registers in use.
|
||||
*/
|
||||
insert_gfx4_send_dependency_workarounds();
|
||||
|
||||
if (failed)
|
||||
return;
|
||||
|
||||
|
||||
@@ -276,10 +276,8 @@ public:
|
||||
bool opt_bank_conflicts();
|
||||
bool opt_split_sends();
|
||||
bool register_coalesce();
|
||||
bool compute_to_mrf();
|
||||
bool eliminate_find_live_channel();
|
||||
bool dead_code_eliminate();
|
||||
bool remove_duplicate_mrf_writes();
|
||||
bool remove_extra_rounding_modes();
|
||||
|
||||
fs_instruction_scheduler *prepare_scheduler(void *mem_ctx);
|
||||
@@ -287,11 +285,6 @@ public:
|
||||
instruction_scheduler_mode mode);
|
||||
void schedule_instructions_post_ra();
|
||||
|
||||
void insert_gfx4_send_dependency_workarounds();
|
||||
void insert_gfx4_pre_send_dependency_workarounds(bblock_t *block,
|
||||
fs_inst *inst);
|
||||
void insert_gfx4_post_send_dependency_workarounds(bblock_t *block,
|
||||
fs_inst *inst);
|
||||
void vfail(const char *msg, va_list args);
|
||||
void fail(const char *msg, ...);
|
||||
void limit_dispatch_width(unsigned n, const char *msg);
|
||||
@@ -301,7 +294,6 @@ public:
|
||||
bool lower_regioning();
|
||||
bool lower_logical_sends();
|
||||
bool lower_integer_multiplication();
|
||||
bool lower_minmax();
|
||||
bool lower_simd_width();
|
||||
bool lower_barycentrics();
|
||||
bool lower_derivatives();
|
||||
|
||||
Reference in New Issue
Block a user