i965/fs: fix copy/constant propagation regioning checks
We were not accounting for subreg_offset in the check for the start of the region. Also, fs_reg::regs_read() already takes the stride into account, so we should not multiply its result by the stride again. This was making copy-propagation fail to copy-propagate cases that would otherwise be safe to copy-propagate. Again, this was observed in fp64 code, since there we use stride > 1 often. v2 (Sam): - Rename function and add comment (Jason, Curro). - Assert that register files and number are the same (Jason). - Fix code to take into account the assumption that src.subreg_offset is strictly less than the reg_offset unit (Curro). - Don't pass the registers by value to the function, use 'const fs_reg &' instead (Curro). - Remove obsolete comment in the commit log (Curro). v3 (Sam): - Remove the assert and put the condition in the return (Curro). - Fix function name (Curro). Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Francisco Jerez <currojerez@riseup.net>
This commit is contained in:
committed by
Samuel Iglesias Gonsálvez
parent
789eecdb79
commit
9149fd6817
@@ -330,6 +330,22 @@ can_take_stride(fs_inst *inst, unsigned arg, unsigned stride,
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that the register region read by src [src.reg_offset,
|
||||
* src.reg_offset + regs_read] is contained inside the register
|
||||
* region written by dst [dst.reg_offset, dst.reg_offset + regs_written]
|
||||
* Both src and dst must have the same register number and file.
|
||||
*/
|
||||
static inline bool
|
||||
region_contained_in(const fs_reg &src, unsigned regs_read,
|
||||
const fs_reg &dst, unsigned regs_written)
|
||||
{
|
||||
return src.file == dst.file && src.nr == dst.nr &&
|
||||
(src.reg_offset * REG_SIZE + src.subreg_offset >=
|
||||
dst.reg_offset * REG_SIZE + dst.subreg_offset) &&
|
||||
src.reg_offset + regs_read <= dst.reg_offset + regs_written;
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
|
||||
{
|
||||
@@ -352,10 +368,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
|
||||
/* Bail if inst is reading a range that isn't contained in the range
|
||||
* that entry is writing.
|
||||
*/
|
||||
if (inst->src[arg].reg_offset < entry->dst.reg_offset ||
|
||||
(inst->src[arg].reg_offset * 32 + inst->src[arg].subreg_offset +
|
||||
inst->regs_read(arg) * inst->src[arg].stride * 32) >
|
||||
(entry->dst.reg_offset + entry->regs_written) * 32)
|
||||
if (!region_contained_in(inst->src[arg], inst->regs_read(arg),
|
||||
entry->dst, entry->regs_written))
|
||||
return false;
|
||||
|
||||
/* we can't generally copy-propagate UD negations because we
|
||||
@@ -520,10 +534,8 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
|
||||
/* Bail if inst is reading a range that isn't contained in the range
|
||||
* that entry is writing.
|
||||
*/
|
||||
if (inst->src[i].reg_offset < entry->dst.reg_offset ||
|
||||
(inst->src[i].reg_offset * 32 + inst->src[i].subreg_offset +
|
||||
inst->regs_read(i) * inst->src[i].stride * 32) >
|
||||
(entry->dst.reg_offset + entry->regs_written) * 32)
|
||||
if (!region_contained_in(inst->src[i], inst->regs_read(i),
|
||||
entry->dst, entry->regs_written))
|
||||
continue;
|
||||
|
||||
fs_reg val = entry->src;
|
||||
|
||||
Reference in New Issue
Block a user