r300/compiler: Add peephole optimization for the 'add' presubtract operation
This commit is contained in:
@@ -38,6 +38,10 @@ struct peephole_state {
|
||||
unsigned int WriteMask;
|
||||
};
|
||||
|
||||
typedef void (*rc_presub_replace_fn)(struct peephole_state *,
|
||||
struct rc_instruction *,
|
||||
unsigned int);
|
||||
|
||||
static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
|
||||
{
|
||||
struct rc_src_register combine;
|
||||
@@ -516,6 +520,164 @@ static void peephole_scan_write(void * data, struct rc_instruction * inst,
|
||||
}
|
||||
}
|
||||
|
||||
static int presub_helper(
|
||||
struct radeon_compiler * c,
|
||||
struct peephole_state * s,
|
||||
rc_presubtract_op presub_opcode,
|
||||
rc_presub_replace_fn presub_replace)
|
||||
{
|
||||
struct rc_instruction * inst;
|
||||
unsigned int can_remove = 0;
|
||||
unsigned int cant_sub = 0;
|
||||
|
||||
for(inst = s->Inst->Next; inst != &c->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
unsigned int i;
|
||||
const struct rc_opcode_info * info =
|
||||
rc_get_opcode_info(inst->U.I.Opcode);
|
||||
|
||||
for(i = 0; i < info->NumSrcRegs; i++) {
|
||||
if(s->Inst->U.I.DstReg.WriteMask !=
|
||||
src_reads_dst_mask(inst->U.I.SrcReg[i],
|
||||
s->Inst->U.I.DstReg)) {
|
||||
continue;
|
||||
}
|
||||
if (cant_sub) {
|
||||
can_remove = 0;
|
||||
break;
|
||||
}
|
||||
/* XXX: There are some situations where instructions
|
||||
* with more than 2 src registers can use the
|
||||
* presubtract select, but to keep things simple we
|
||||
* will disable presubtract on these instructions for
|
||||
* now. Note: This if statement should not be pulled
|
||||
* outside of the loop, because it only applies to
|
||||
* instructions that could potentially use the
|
||||
* presubtract source. */
|
||||
if (info->NumSrcRegs > 2) {
|
||||
can_remove = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* We can't use more than one presubtract value in an
|
||||
* instruction, unless the two prsubtract operations
|
||||
* are the same and read from the same registers. */
|
||||
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
|
||||
if (inst->U.I.PreSub.Opcode != presub_opcode
|
||||
|| inst->U.I.PreSub.SrcReg[0].File !=
|
||||
s->Inst->U.I.SrcReg[1].File
|
||||
|| inst->U.I.PreSub.SrcReg[0].Index !=
|
||||
s->Inst->U.I.SrcReg[1].Index) {
|
||||
|
||||
can_remove = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
presub_replace(s, inst, i);
|
||||
can_remove = 1;
|
||||
}
|
||||
if(!can_remove)
|
||||
break;
|
||||
rc_for_all_writes_mask(inst, peephole_scan_write, s);
|
||||
/* If all components of inst_add's destination register have
|
||||
* been written to by subsequent instructions, the original
|
||||
* value of the destination register is no longer valid and
|
||||
* we can't keep doing substitutions. */
|
||||
if (!s->WriteMask){
|
||||
break;
|
||||
}
|
||||
/* Make this instruction doesn't write to the presubtract source. */
|
||||
if (inst->U.I.DstReg.WriteMask &
|
||||
src_reads_dst_mask(s->Inst->U.I.SrcReg[1],
|
||||
inst->U.I.DstReg)
|
||||
|| info->IsFlowControl) {
|
||||
cant_sub = 1;
|
||||
}
|
||||
}
|
||||
return can_remove;
|
||||
}
|
||||
|
||||
static void presub_replace_add(struct peephole_state *s,
|
||||
struct rc_instruction * inst,
|
||||
unsigned int src_index)
|
||||
{
|
||||
inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[0];
|
||||
inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[1];
|
||||
inst->U.I.PreSub.SrcReg[0].Negate = 0;
|
||||
inst->U.I.PreSub.SrcReg[1].Negate = 0;
|
||||
inst->U.I.PreSub.Opcode = RC_PRESUB_ADD;
|
||||
inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
|
||||
inst->U.I.PreSub.SrcReg[0]);
|
||||
inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
|
||||
inst->U.I.SrcReg[src_index].Index = RC_PRESUB_ADD;
|
||||
}
|
||||
|
||||
static int peephole_add_presub_add(
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst_add)
|
||||
{
|
||||
struct rc_src_register * src0 = NULL;
|
||||
struct rc_src_register * src1 = NULL;
|
||||
unsigned int i;
|
||||
struct peephole_state s;
|
||||
|
||||
if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE)
|
||||
return 0;
|
||||
|
||||
if (inst_add->U.I.SaturateMode)
|
||||
return 0;
|
||||
|
||||
if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
|
||||
return 0;
|
||||
|
||||
/* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
|
||||
for (i = 0; i < 2; i++) {
|
||||
if (inst_add->U.I.SrcReg[i].Abs)
|
||||
return 0;
|
||||
if ((inst_add->U.I.SrcReg[i].Negate
|
||||
& inst_add->U.I.DstReg.WriteMask) ==
|
||||
inst_add->U.I.DstReg.WriteMask) {
|
||||
src0 = &inst_add->U.I.SrcReg[i];
|
||||
} else if (!src1) {
|
||||
src1 = &inst_add->U.I.SrcReg[i];
|
||||
} else {
|
||||
src0 = &inst_add->U.I.SrcReg[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (!src1)
|
||||
return 0;
|
||||
|
||||
/* XXX Only do add for now. */
|
||||
if (src0->Negate)
|
||||
return 0;
|
||||
|
||||
s.Inst = inst_add;
|
||||
s.WriteMask = inst_add->U.I.DstReg.WriteMask;
|
||||
if (presub_helper(c, &s, RC_PRESUB_ADD, presub_replace_add)) {
|
||||
rc_remove_instruction(inst_add);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void presub_replace_inv(struct peephole_state * s,
|
||||
struct rc_instruction * inst,
|
||||
unsigned int src_index)
|
||||
{
|
||||
/* We must be careful not to modify s->Inst, since it
|
||||
* is possible it will remain part of the program.
|
||||
* XXX Maybe pass a struct instead of a pointer for s->Inst.*/
|
||||
inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1];
|
||||
inst->U.I.PreSub.SrcReg[0].Negate = 0;
|
||||
inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
|
||||
inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
|
||||
inst->U.I.PreSub.SrcReg[0]);
|
||||
|
||||
inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
|
||||
inst->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
|
||||
}
|
||||
|
||||
/**
|
||||
* PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
|
||||
* Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
|
||||
@@ -531,11 +693,11 @@ static int peephole_add_presub_inv(
|
||||
struct rc_instruction * inst_add)
|
||||
{
|
||||
unsigned int i, swz, mask;
|
||||
unsigned int can_remove = 0;
|
||||
unsigned int cant_sub = 0;
|
||||
struct rc_instruction * inst;
|
||||
struct peephole_state s;
|
||||
|
||||
if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE)
|
||||
return 0;
|
||||
|
||||
if (inst_add->U.I.SaturateMode)
|
||||
return 0;
|
||||
|
||||
@@ -567,81 +729,7 @@ static int peephole_add_presub_inv(
|
||||
s.Inst = inst_add;
|
||||
s.WriteMask = inst_add->U.I.DstReg.WriteMask;
|
||||
|
||||
/* For all instructions that read inst_add->U.I.DstReg before it is
|
||||
* written again, use the 1 - src0 presubtact instead. */
|
||||
for(inst = inst_add->Next; inst != &c->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
const struct rc_opcode_info * info =
|
||||
rc_get_opcode_info(inst->U.I.Opcode);
|
||||
|
||||
for(i = 0; i < info->NumSrcRegs; i++) {
|
||||
if(inst_add->U.I.DstReg.WriteMask !=
|
||||
src_reads_dst_mask(inst->U.I.SrcReg[i],
|
||||
inst_add->U.I.DstReg)) {
|
||||
continue;
|
||||
}
|
||||
if (cant_sub) {
|
||||
can_remove = 0;
|
||||
break;
|
||||
}
|
||||
/* XXX: There are some situations where instructions
|
||||
* with more than 2 src registers can use the
|
||||
* presubtract select, but to keep things simple we
|
||||
* will disable presubtract on these instructions for
|
||||
* now. Note: This if statement should not be pulled
|
||||
* outside of the loop, because it only applies to
|
||||
* instructions that could potentially use the
|
||||
* presubtract source. */
|
||||
if (info->NumSrcRegs > 2) {
|
||||
can_remove = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* We can't use more than one presubtract value in an
|
||||
* instruction, unless the two prsubtract operations
|
||||
* are the same and read from the same registers. */
|
||||
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
|
||||
if (inst->U.I.PreSub.Opcode != RC_PRESUB_INV
|
||||
|| inst->U.I.PreSub.SrcReg[0].File !=
|
||||
inst_add->U.I.SrcReg[1].File
|
||||
|| inst->U.I.PreSub.SrcReg[0].Index !=
|
||||
inst_add->U.I.SrcReg[1].Index) {
|
||||
|
||||
can_remove = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* We must be careful not to modify inst_add, since it
|
||||
* is possible it will remain part of the program. */
|
||||
inst->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
|
||||
inst->U.I.PreSub.SrcReg[0].Negate = 0;
|
||||
inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
|
||||
inst->U.I.SrcReg[i] = chain_srcregs(inst->U.I.SrcReg[i],
|
||||
inst->U.I.PreSub.SrcReg[0]);
|
||||
|
||||
inst->U.I.SrcReg[i].File = RC_FILE_PRESUB;
|
||||
inst->U.I.SrcReg[i].Index = RC_PRESUB_INV;
|
||||
can_remove = 1;
|
||||
}
|
||||
if(!can_remove)
|
||||
break;
|
||||
rc_for_all_writes_mask(inst, peephole_scan_write, &s);
|
||||
/* If all components of inst_add's destination register have
|
||||
* been written to by subsequent instructions, the original
|
||||
* value of the destination register is no longer valid and
|
||||
* we can't keep doing substitutions. */
|
||||
if (!s.WriteMask){
|
||||
break;
|
||||
}
|
||||
/* Make this instruction doesn't write to the presubtract source. */
|
||||
if (inst->U.I.DstReg.WriteMask &
|
||||
src_reads_dst_mask(inst_add->U.I.SrcReg[1],
|
||||
inst->U.I.DstReg)
|
||||
|| info->IsFlowControl) {
|
||||
cant_sub = 1;
|
||||
}
|
||||
}
|
||||
if(can_remove) {
|
||||
if (presub_helper(c, &s, RC_PRESUB_INV, presub_replace_inv)) {
|
||||
rc_remove_instruction(inst_add);
|
||||
return 1;
|
||||
}
|
||||
@@ -660,6 +748,8 @@ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
|
||||
if (c->has_presub) {
|
||||
if(peephole_add_presub_inv(c, inst))
|
||||
return 1;
|
||||
if(peephole_add_presub_add(c, inst))
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
||||
@@ -300,6 +300,7 @@ static int destructive_merge_instructions(
|
||||
for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
|
||||
unsigned int arg;
|
||||
int free_source;
|
||||
unsigned int one_way = 0;
|
||||
struct radeon_pair_instruction_source srcp =
|
||||
alpha->RGB.Src[srcp_src];
|
||||
struct radeon_pair_instruction_source temp;
|
||||
@@ -307,14 +308,27 @@ static int destructive_merge_instructions(
|
||||
* 3rd arg of 0 means this is not an alpha source. */
|
||||
free_source = rc_pair_alloc_source(rgb, 1, 0,
|
||||
srcp.File, srcp.Index);
|
||||
/* If free_source == srcp_src, then either the
|
||||
* presubtract source is already in the correct place. */
|
||||
if (free_source == srcp_src)
|
||||
continue;
|
||||
/* If free_source < 0 then there are no free source
|
||||
* slots. */
|
||||
if (free_source < 0)
|
||||
return 0;
|
||||
|
||||
temp = rgb->RGB.Src[srcp_src];
|
||||
rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source];
|
||||
/* srcp needs src0 and src1 to be the same */
|
||||
if (free_source < srcp_src) {
|
||||
if (!temp.Used)
|
||||
continue;
|
||||
free_source = rc_pair_alloc_source(rgb, 1, 0,
|
||||
srcp.File, srcp.Index);
|
||||
one_way = 1;
|
||||
} else {
|
||||
rgb->RGB.Src[free_source] = temp;
|
||||
}
|
||||
/* If free_source == srcp_src, then the presubtract
|
||||
* source is already in the correct place. */
|
||||
if (free_source == srcp_src)
|
||||
continue;
|
||||
/* Shuffle the sources, so we can put the
|
||||
* presubtract source in the correct place. */
|
||||
for (arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
|
||||
@@ -331,12 +345,11 @@ static int destructive_merge_instructions(
|
||||
/* We need to do this just in case register
|
||||
* is one of the sources already, but in the
|
||||
* wrong spot. */
|
||||
else if(rgb->RGB.Arg[arg].Source == free_source)
|
||||
else if(rgb->RGB.Arg[arg].Source == free_source
|
||||
&& !one_way) {
|
||||
rgb->RGB.Arg[arg].Source = srcp_src;
|
||||
}
|
||||
}
|
||||
temp = rgb->RGB.Src[srcp_src];
|
||||
rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source];
|
||||
rgb->RGB.Src[free_source] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -352,6 +365,7 @@ static int destructive_merge_instructions(
|
||||
for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
|
||||
unsigned int arg;
|
||||
int free_source;
|
||||
unsigned int one_way = 0;
|
||||
struct radeon_pair_instruction_source srcp =
|
||||
alpha->Alpha.Src[srcp_src];
|
||||
struct radeon_pair_instruction_source temp;
|
||||
@@ -359,14 +373,27 @@ static int destructive_merge_instructions(
|
||||
* 3rd arg of 1 means this is an alpha source. */
|
||||
free_source = rc_pair_alloc_source(rgb, 0, 1,
|
||||
srcp.File, srcp.Index);
|
||||
/* If free_source == srcp_src, then either the
|
||||
* presubtract source is already in the correct place. */
|
||||
if (free_source == srcp_src)
|
||||
continue;
|
||||
/* If free_source < 0 then there are no free source
|
||||
* slots. */
|
||||
if (free_source < 0)
|
||||
return 0;
|
||||
|
||||
temp = rgb->Alpha.Src[srcp_src];
|
||||
rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source];
|
||||
/* srcp needs src0 and src1 to be the same. */
|
||||
if (free_source < srcp_src) {
|
||||
if (!temp.Used)
|
||||
continue;
|
||||
free_source = rc_pair_alloc_source(rgb, 0, 1,
|
||||
temp.File, temp.Index);
|
||||
one_way = 1;
|
||||
} else {
|
||||
rgb->Alpha.Src[free_source] = temp;
|
||||
}
|
||||
/* If free_source == srcp_src, then the presubtract
|
||||
* source is already in the correct place. */
|
||||
if (free_source == srcp_src)
|
||||
continue;
|
||||
/* Shuffle the sources, so we can put the
|
||||
* presubtract source in the correct place. */
|
||||
for(arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
|
||||
@@ -380,12 +407,11 @@ static int destructive_merge_instructions(
|
||||
}
|
||||
if (rgb->RGB.Arg[arg].Source == srcp_src)
|
||||
rgb->RGB.Arg[arg].Source = free_source;
|
||||
else if (rgb->RGB.Arg[arg].Source == free_source)
|
||||
else if (rgb->RGB.Arg[arg].Source == free_source
|
||||
&& !one_way) {
|
||||
rgb->RGB.Arg[arg].Source = srcp_src;
|
||||
}
|
||||
}
|
||||
temp = rgb->Alpha.Src[srcp_src];
|
||||
rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source];
|
||||
rgb->Alpha.Src[free_source] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user