From a64b4a05af362fff52c9e52eb51cd92fe164afcc Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 30 Aug 2010 08:59:30 -0700 Subject: [PATCH] r300/compiler: Add peephole optimization for the 'add' presubtract operation --- .../dri/r300/compiler/radeon_optimize.c | 246 ++++++++++++------ .../dri/r300/compiler/radeon_pair_schedule.c | 58 +++-- 2 files changed, 210 insertions(+), 94 deletions(-) diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index e01ba85aa56..3ff07d60396 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -38,6 +38,10 @@ struct peephole_state { unsigned int WriteMask; }; +typedef void (*rc_presub_replace_fn)(struct peephole_state *, + struct rc_instruction *, + unsigned int); + static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) { struct rc_src_register combine; @@ -516,6 +520,164 @@ static void peephole_scan_write(void * data, struct rc_instruction * inst, } } +static int presub_helper( + struct radeon_compiler * c, + struct peephole_state * s, + rc_presubtract_op presub_opcode, + rc_presub_replace_fn presub_replace) +{ + struct rc_instruction * inst; + unsigned int can_remove = 0; + unsigned int cant_sub = 0; + + for(inst = s->Inst->Next; inst != &c->Program.Instructions; + inst = inst->Next) { + unsigned int i; + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + + for(i = 0; i < info->NumSrcRegs; i++) { + if(s->Inst->U.I.DstReg.WriteMask != + src_reads_dst_mask(inst->U.I.SrcReg[i], + s->Inst->U.I.DstReg)) { + continue; + } + if (cant_sub) { + can_remove = 0; + break; + } + /* XXX: There are some situations where instructions + * with more than 2 src registers can use the + * presubtract select, but to keep things simple we + * will disable presubtract on these instructions for + * now. Note: This if statement should not be pulled + * outside of the loop, because it only applies to + * instructions that could potentially use the + * presubtract source. */ + if (info->NumSrcRegs > 2) { + can_remove = 0; + break; + } + + /* We can't use more than one presubtract value in an + * instruction, unless the two prsubtract operations + * are the same and read from the same registers. */ + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + if (inst->U.I.PreSub.Opcode != presub_opcode + || inst->U.I.PreSub.SrcReg[0].File != + s->Inst->U.I.SrcReg[1].File + || inst->U.I.PreSub.SrcReg[0].Index != + s->Inst->U.I.SrcReg[1].Index) { + + can_remove = 0; + break; + } + } + presub_replace(s, inst, i); + can_remove = 1; + } + if(!can_remove) + break; + rc_for_all_writes_mask(inst, peephole_scan_write, s); + /* If all components of inst_add's destination register have + * been written to by subsequent instructions, the original + * value of the destination register is no longer valid and + * we can't keep doing substitutions. */ + if (!s->WriteMask){ + break; + } + /* Make this instruction doesn't write to the presubtract source. */ + if (inst->U.I.DstReg.WriteMask & + src_reads_dst_mask(s->Inst->U.I.SrcReg[1], + inst->U.I.DstReg) + || info->IsFlowControl) { + cant_sub = 1; + } + } + return can_remove; +} + +static void presub_replace_add(struct peephole_state *s, + struct rc_instruction * inst, + unsigned int src_index) +{ + inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[0]; + inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[1]; + inst->U.I.PreSub.SrcReg[0].Negate = 0; + inst->U.I.PreSub.SrcReg[1].Negate = 0; + inst->U.I.PreSub.Opcode = RC_PRESUB_ADD; + inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index], + inst->U.I.PreSub.SrcReg[0]); + inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst->U.I.SrcReg[src_index].Index = RC_PRESUB_ADD; +} + +static int peephole_add_presub_add( + struct radeon_compiler * c, + struct rc_instruction * inst_add) +{ + struct rc_src_register * src0 = NULL; + struct rc_src_register * src1 = NULL; + unsigned int i; + struct peephole_state s; + + if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE) + return 0; + + if (inst_add->U.I.SaturateMode) + return 0; + + if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) + return 0; + + /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */ + for (i = 0; i < 2; i++) { + if (inst_add->U.I.SrcReg[i].Abs) + return 0; + if ((inst_add->U.I.SrcReg[i].Negate + & inst_add->U.I.DstReg.WriteMask) == + inst_add->U.I.DstReg.WriteMask) { + src0 = &inst_add->U.I.SrcReg[i]; + } else if (!src1) { + src1 = &inst_add->U.I.SrcReg[i]; + } else { + src0 = &inst_add->U.I.SrcReg[i]; + } + } + + if (!src1) + return 0; + + /* XXX Only do add for now. */ + if (src0->Negate) + return 0; + + s.Inst = inst_add; + s.WriteMask = inst_add->U.I.DstReg.WriteMask; + if (presub_helper(c, &s, RC_PRESUB_ADD, presub_replace_add)) { + rc_remove_instruction(inst_add); + return 1; + } + return 0; +} + +static void presub_replace_inv(struct peephole_state * s, + struct rc_instruction * inst, + unsigned int src_index) +{ + /* We must be careful not to modify s->Inst, since it + * is possible it will remain part of the program. + * XXX Maybe pass a struct instead of a pointer for s->Inst.*/ + inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1]; + inst->U.I.PreSub.SrcReg[0].Negate = 0; + inst->U.I.PreSub.Opcode = RC_PRESUB_INV; + inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index], + inst->U.I.PreSub.SrcReg[0]); + + inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; +} + /** * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source @@ -531,11 +693,11 @@ static int peephole_add_presub_inv( struct rc_instruction * inst_add) { unsigned int i, swz, mask; - unsigned int can_remove = 0; - unsigned int cant_sub = 0; - struct rc_instruction * inst; struct peephole_state s; + if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE) + return 0; + if (inst_add->U.I.SaturateMode) return 0; @@ -567,81 +729,7 @@ static int peephole_add_presub_inv( s.Inst = inst_add; s.WriteMask = inst_add->U.I.DstReg.WriteMask; - /* For all instructions that read inst_add->U.I.DstReg before it is - * written again, use the 1 - src0 presubtact instead. */ - for(inst = inst_add->Next; inst != &c->Program.Instructions; - inst = inst->Next) { - const struct rc_opcode_info * info = - rc_get_opcode_info(inst->U.I.Opcode); - - for(i = 0; i < info->NumSrcRegs; i++) { - if(inst_add->U.I.DstReg.WriteMask != - src_reads_dst_mask(inst->U.I.SrcReg[i], - inst_add->U.I.DstReg)) { - continue; - } - if (cant_sub) { - can_remove = 0; - break; - } - /* XXX: There are some situations where instructions - * with more than 2 src registers can use the - * presubtract select, but to keep things simple we - * will disable presubtract on these instructions for - * now. Note: This if statement should not be pulled - * outside of the loop, because it only applies to - * instructions that could potentially use the - * presubtract source. */ - if (info->NumSrcRegs > 2) { - can_remove = 0; - break; - } - - /* We can't use more than one presubtract value in an - * instruction, unless the two prsubtract operations - * are the same and read from the same registers. */ - if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { - if (inst->U.I.PreSub.Opcode != RC_PRESUB_INV - || inst->U.I.PreSub.SrcReg[0].File != - inst_add->U.I.SrcReg[1].File - || inst->U.I.PreSub.SrcReg[0].Index != - inst_add->U.I.SrcReg[1].Index) { - - can_remove = 0; - break; - } - } - /* We must be careful not to modify inst_add, since it - * is possible it will remain part of the program. */ - inst->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; - inst->U.I.PreSub.SrcReg[0].Negate = 0; - inst->U.I.PreSub.Opcode = RC_PRESUB_INV; - inst->U.I.SrcReg[i] = chain_srcregs(inst->U.I.SrcReg[i], - inst->U.I.PreSub.SrcReg[0]); - - inst->U.I.SrcReg[i].File = RC_FILE_PRESUB; - inst->U.I.SrcReg[i].Index = RC_PRESUB_INV; - can_remove = 1; - } - if(!can_remove) - break; - rc_for_all_writes_mask(inst, peephole_scan_write, &s); - /* If all components of inst_add's destination register have - * been written to by subsequent instructions, the original - * value of the destination register is no longer valid and - * we can't keep doing substitutions. */ - if (!s.WriteMask){ - break; - } - /* Make this instruction doesn't write to the presubtract source. */ - if (inst->U.I.DstReg.WriteMask & - src_reads_dst_mask(inst_add->U.I.SrcReg[1], - inst->U.I.DstReg) - || info->IsFlowControl) { - cant_sub = 1; - } - } - if(can_remove) { + if (presub_helper(c, &s, RC_PRESUB_INV, presub_replace_inv)) { rc_remove_instruction(inst_add); return 1; } @@ -660,6 +748,8 @@ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) if (c->has_presub) { if(peephole_add_presub_inv(c, inst)) return 1; + if(peephole_add_presub_add(c, inst)) + return 1; } break; default: diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index 32c54fd74bc..5269d659851 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -300,6 +300,7 @@ static int destructive_merge_instructions( for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { unsigned int arg; int free_source; + unsigned int one_way = 0; struct radeon_pair_instruction_source srcp = alpha->RGB.Src[srcp_src]; struct radeon_pair_instruction_source temp; @@ -307,14 +308,27 @@ static int destructive_merge_instructions( * 3rd arg of 0 means this is not an alpha source. */ free_source = rc_pair_alloc_source(rgb, 1, 0, srcp.File, srcp.Index); - /* If free_source == srcp_src, then either the - * presubtract source is already in the correct place. */ - if (free_source == srcp_src) - continue; /* If free_source < 0 then there are no free source * slots. */ if (free_source < 0) return 0; + + temp = rgb->RGB.Src[srcp_src]; + rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source]; + /* srcp needs src0 and src1 to be the same */ + if (free_source < srcp_src) { + if (!temp.Used) + continue; + free_source = rc_pair_alloc_source(rgb, 1, 0, + srcp.File, srcp.Index); + one_way = 1; + } else { + rgb->RGB.Src[free_source] = temp; + } + /* If free_source == srcp_src, then the presubtract + * source is already in the correct place. */ + if (free_source == srcp_src) + continue; /* Shuffle the sources, so we can put the * presubtract source in the correct place. */ for (arg = 0; arg < rgb_info->NumSrcRegs; arg++) { @@ -331,12 +345,11 @@ static int destructive_merge_instructions( /* We need to do this just in case register * is one of the sources already, but in the * wrong spot. */ - else if(rgb->RGB.Arg[arg].Source == free_source) + else if(rgb->RGB.Arg[arg].Source == free_source + && !one_way) { rgb->RGB.Arg[arg].Source = srcp_src; + } } - temp = rgb->RGB.Src[srcp_src]; - rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source]; - rgb->RGB.Src[free_source] = temp; } } @@ -352,6 +365,7 @@ static int destructive_merge_instructions( for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { unsigned int arg; int free_source; + unsigned int one_way = 0; struct radeon_pair_instruction_source srcp = alpha->Alpha.Src[srcp_src]; struct radeon_pair_instruction_source temp; @@ -359,14 +373,27 @@ static int destructive_merge_instructions( * 3rd arg of 1 means this is an alpha source. */ free_source = rc_pair_alloc_source(rgb, 0, 1, srcp.File, srcp.Index); - /* If free_source == srcp_src, then either the - * presubtract source is already in the correct place. */ - if (free_source == srcp_src) - continue; /* If free_source < 0 then there are no free source * slots. */ if (free_source < 0) return 0; + + temp = rgb->Alpha.Src[srcp_src]; + rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source]; + /* srcp needs src0 and src1 to be the same. */ + if (free_source < srcp_src) { + if (!temp.Used) + continue; + free_source = rc_pair_alloc_source(rgb, 0, 1, + temp.File, temp.Index); + one_way = 1; + } else { + rgb->Alpha.Src[free_source] = temp; + } + /* If free_source == srcp_src, then the presubtract + * source is already in the correct place. */ + if (free_source == srcp_src) + continue; /* Shuffle the sources, so we can put the * presubtract source in the correct place. */ for(arg = 0; arg < rgb_info->NumSrcRegs; arg++) { @@ -380,12 +407,11 @@ static int destructive_merge_instructions( } if (rgb->RGB.Arg[arg].Source == srcp_src) rgb->RGB.Arg[arg].Source = free_source; - else if (rgb->RGB.Arg[arg].Source == free_source) + else if (rgb->RGB.Arg[arg].Source == free_source + && !one_way) { rgb->RGB.Arg[arg].Source = srcp_src; + } } - temp = rgb->Alpha.Src[srcp_src]; - rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source]; - rgb->Alpha.Src[free_source] = temp; } }