r300/compiler: optimize CMP for vertex shaders a bit
This commit is contained in:
@@ -511,37 +511,26 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c,
|
||||
{
|
||||
/* There is no decent CMP available, so let's rig one up.
|
||||
* CMP is defined as dst = src0 < 0.0 ? src1 : src2
|
||||
* The following sequence consumes two temps and three extra slots,
|
||||
* The following sequence consumes two temps and two extra slots
|
||||
* (the second temp and the second slot is consumed by transform_LRP),
|
||||
* but should be equivalent:
|
||||
*
|
||||
* SLT tmp0, src0, 0.0
|
||||
* SGE tmp1, src0, 0.0
|
||||
* MUL tmp0, tmp0, src1
|
||||
* MAD dst, src2, tmp1, tmp0
|
||||
* LRP dst, tmp0, src1, src2
|
||||
*
|
||||
* Yes, I know, I'm a mad scientist. ~ C. */
|
||||
* Yes, I know, I'm a mad scientist. ~ C. & M. */
|
||||
int tempreg0 = rc_find_free_temporary(c);
|
||||
int tempreg1 = rc_find_free_temporary(c);
|
||||
|
||||
/* SLT tmp0, src0, 0.0 */
|
||||
emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
|
||||
dstreg(RC_FILE_TEMPORARY, tempreg0),
|
||||
inst->U.I.SrcReg[0], builtin_zero);
|
||||
|
||||
/* SGE tmp1, src0, 0.0 */
|
||||
emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
|
||||
dstreg(RC_FILE_TEMPORARY, tempreg1),
|
||||
inst->U.I.SrcReg[0], builtin_zero);
|
||||
|
||||
/* MUL tmp0, tmp0, src1 */
|
||||
emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
|
||||
dstreg(RC_FILE_TEMPORARY, tempreg0),
|
||||
srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1]);
|
||||
|
||||
/* MAD dst, src2, tmp1, tmp0 */
|
||||
emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
|
||||
inst->U.I.DstReg,
|
||||
inst->U.I.SrcReg[2], srcreg(RC_FILE_TEMPORARY, tempreg1), srcreg(RC_FILE_TEMPORARY, tempreg0));
|
||||
/* LRP dst, tmp0, src1, src2 */
|
||||
transform_LRP(c,
|
||||
emit3(c, inst->Prev, RC_OPCODE_LRP, 0,
|
||||
inst->U.I.DstReg,
|
||||
srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]));
|
||||
|
||||
rc_remove_instruction(inst);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user