r300/compiler: optimize CMP for vertex shaders a bit

This commit is contained in:
Marek Olšák
2010-04-18 20:49:50 +02:00
parent 65fd6fb204
commit ebd05a798e
@@ -511,37 +511,26 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c,
{
/* There is no decent CMP available, so let's rig one up.
* CMP is defined as dst = src0 < 0.0 ? src1 : src2
* The following sequence consumes two temps and three extra slots,
* The following sequence consumes two temps and two extra slots
* (the second temp and the second slot is consumed by transform_LRP),
* but should be equivalent:
*
* SLT tmp0, src0, 0.0
* SGE tmp1, src0, 0.0
* MUL tmp0, tmp0, src1
* MAD dst, src2, tmp1, tmp0
* LRP dst, tmp0, src1, src2
*
* Yes, I know, I'm a mad scientist. ~ C. */
* Yes, I know, I'm a mad scientist. ~ C. & M. */
int tempreg0 = rc_find_free_temporary(c);
int tempreg1 = rc_find_free_temporary(c);
/* SLT tmp0, src0, 0.0 */
emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
dstreg(RC_FILE_TEMPORARY, tempreg0),
inst->U.I.SrcReg[0], builtin_zero);
/* SGE tmp1, src0, 0.0 */
emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
dstreg(RC_FILE_TEMPORARY, tempreg1),
inst->U.I.SrcReg[0], builtin_zero);
/* MUL tmp0, tmp0, src1 */
emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
dstreg(RC_FILE_TEMPORARY, tempreg0),
srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1]);
/* MAD dst, src2, tmp1, tmp0 */
emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
inst->U.I.DstReg,
inst->U.I.SrcReg[2], srcreg(RC_FILE_TEMPORARY, tempreg1), srcreg(RC_FILE_TEMPORARY, tempreg0));
/* LRP dst, tmp0, src1, src2 */
transform_LRP(c,
emit3(c, inst->Prev, RC_OPCODE_LRP, 0,
inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]));
rc_remove_instruction(inst);
}