gallium: remove TGSI opcode DP2A
use DP3 instead. Reviewed-by: Roland Scheidegger <sroland@vmware.com>
This commit is contained in:
@@ -159,34 +159,6 @@ static struct lp_build_tgsi_action dp2_action = {
|
||||
dp2_emit /* emit */
|
||||
};
|
||||
|
||||
/* TGSI_OPCODE_DP2A */
|
||||
static void
|
||||
dp2a_fetch_args(
|
||||
struct lp_build_tgsi_context * bld_base,
|
||||
struct lp_build_emit_data * emit_data)
|
||||
{
|
||||
dp_fetch_args(bld_base, emit_data, 2);
|
||||
emit_data->args[5] = lp_build_emit_fetch(bld_base, emit_data->inst,
|
||||
2, TGSI_CHAN_X);
|
||||
}
|
||||
|
||||
static void
|
||||
dp2a_emit(
|
||||
const struct lp_build_tgsi_action * action,
|
||||
struct lp_build_tgsi_context * bld_base,
|
||||
struct lp_build_emit_data * emit_data)
|
||||
{
|
||||
LLVMValueRef tmp;
|
||||
tmp = lp_build_emit_llvm(bld_base, TGSI_OPCODE_DP2, emit_data);
|
||||
emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD,
|
||||
emit_data->args[5], tmp);
|
||||
}
|
||||
|
||||
static struct lp_build_tgsi_action dp2a_action = {
|
||||
dp2a_fetch_args, /* fetch_args */
|
||||
dp2a_emit /* emit */
|
||||
};
|
||||
|
||||
/* TGSI_OPCODE_DP3 */
|
||||
static void
|
||||
dp3_fetch_args(
|
||||
@@ -1286,7 +1258,6 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
|
||||
bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_DP2A] = dp2a_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
|
||||
|
||||
@@ -554,9 +554,6 @@ lp_emit_instruction_aos(
|
||||
dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_DP2A:
|
||||
return FALSE;
|
||||
|
||||
case TGSI_OPCODE_FRC:
|
||||
src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
|
||||
tmp0 = lp_build_floor(&bld->bld_base.base, src0);
|
||||
|
||||
@@ -1000,15 +1000,6 @@ ttn_xpd(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
|
||||
ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
|
||||
}
|
||||
|
||||
static void
|
||||
ttn_dp2a(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
|
||||
{
|
||||
ttn_move_dest(b, dest,
|
||||
ttn_channel(b, nir_fadd(b, nir_fdot2(b, src[0], src[1]),
|
||||
src[2]),
|
||||
X));
|
||||
}
|
||||
|
||||
static void
|
||||
ttn_dp2(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
|
||||
{
|
||||
@@ -1536,7 +1527,6 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
|
||||
[TGSI_OPCODE_MAD] = nir_op_ffma,
|
||||
[TGSI_OPCODE_LRP] = 0,
|
||||
[TGSI_OPCODE_SQRT] = nir_op_fsqrt,
|
||||
[TGSI_OPCODE_DP2A] = 0,
|
||||
[TGSI_OPCODE_FRC] = nir_op_ffract,
|
||||
[TGSI_OPCODE_FLR] = nir_op_ffloor,
|
||||
[TGSI_OPCODE_ROUND] = nir_op_fround_even,
|
||||
@@ -1773,10 +1763,6 @@ ttn_emit_instruction(struct ttn_compile *c)
|
||||
ttn_dp4(b, op_trans[tgsi_op], dest, src);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_DP2A:
|
||||
ttn_dp2a(b, op_trans[tgsi_op], dest, src);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_DPH:
|
||||
ttn_dph(b, op_trans[tgsi_op], dest, src);
|
||||
break;
|
||||
|
||||
@@ -3184,31 +3184,6 @@ exec_dp4(struct tgsi_exec_machine *mach,
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
exec_dp2a(struct tgsi_exec_machine *mach,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
unsigned int chan;
|
||||
union tgsi_exec_channel arg[3];
|
||||
|
||||
fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
|
||||
fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
|
||||
micro_mul(&arg[2], &arg[0], &arg[1]);
|
||||
|
||||
fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
|
||||
fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
|
||||
micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
|
||||
|
||||
fetch_source(mach, &arg[1], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
|
||||
micro_add(&arg[0], &arg[0], &arg[1]);
|
||||
|
||||
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
|
||||
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
|
||||
store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
exec_dph(struct tgsi_exec_machine *mach,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
@@ -5183,10 +5158,6 @@ exec_instruction(
|
||||
exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_DP2A:
|
||||
exec_dp2a(mach, inst);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_FRC:
|
||||
exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
|
||||
break;
|
||||
|
||||
@@ -58,7 +58,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
|
||||
{ 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, REPL, "", 21 }, /* removed */
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC },
|
||||
|
||||
@@ -920,14 +920,11 @@ transform_log(struct tgsi_transform_context *tctx,
|
||||
* DP2 - 2-component Dot Product
|
||||
* dst = src0.x \times src1.x + src0.y \times src1.y
|
||||
*
|
||||
* DP2A - 2-component Dot Product And Add
|
||||
* dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
|
||||
*
|
||||
* NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
|
||||
* operations, which is what you'd prefer for a ISA that is natively
|
||||
* scalar. Probably a native vector ISA would at least already have
|
||||
* DP4/DP3 instructions, but perhaps there is room for an alternative
|
||||
* translation for DPH/DP2/DP2A using vector instructions.
|
||||
* translation for DPH/DP2 using vector instructions.
|
||||
*
|
||||
* ; needs: 1 tmp
|
||||
* MUL tmpA.x, src0.x, src1.x
|
||||
@@ -939,8 +936,6 @@ transform_log(struct tgsi_transform_context *tctx,
|
||||
* } else if (DP4) {
|
||||
* MAD tmpA.x, src0.w, src1.w, tmpA.x
|
||||
* }
|
||||
* } else if (DP2A) {
|
||||
* ADD tmpA.x, src2.x, tmpA.x
|
||||
* }
|
||||
* ; fixup last instruction to replicate into dst
|
||||
*/
|
||||
@@ -948,7 +943,6 @@ transform_log(struct tgsi_transform_context *tctx,
|
||||
#define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2))
|
||||
#define DPH_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
|
||||
#define DP2_GROW (NINST(2) + NINST(3) - OINST(2))
|
||||
#define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3))
|
||||
#define DOTP_TMP 1
|
||||
static void
|
||||
transform_dotp(struct tgsi_transform_context *tctx,
|
||||
@@ -958,7 +952,6 @@ transform_dotp(struct tgsi_transform_context *tctx,
|
||||
struct tgsi_full_dst_register *dst = &inst->Dst[0];
|
||||
struct tgsi_full_src_register *src0 = &inst->Src[0];
|
||||
struct tgsi_full_src_register *src1 = &inst->Src[1];
|
||||
struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */
|
||||
struct tgsi_full_instruction new_inst;
|
||||
unsigned opcode = inst->Instruction.Opcode;
|
||||
|
||||
@@ -1026,17 +1019,6 @@ transform_dotp(struct tgsi_transform_context *tctx,
|
||||
reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
|
||||
reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
|
||||
}
|
||||
} else if (opcode == TGSI_OPCODE_DP2A) {
|
||||
tctx->emit_instruction(tctx, &new_inst);
|
||||
|
||||
/* ADD tmpA.x, src2.x, tmpA.x */
|
||||
new_inst = tgsi_default_full_instruction();
|
||||
new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
|
||||
new_inst.Instruction.NumDstRegs = 1;
|
||||
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
|
||||
new_inst.Instruction.NumSrcRegs = 2;
|
||||
reg_src(&new_inst.Src[0], src2, SWIZ(X, X, X, X));
|
||||
reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
|
||||
}
|
||||
|
||||
/* fixup last instruction to write to dst: */
|
||||
@@ -1562,11 +1544,6 @@ transform_instr(struct tgsi_transform_context *tctx,
|
||||
goto skip;
|
||||
transform_dotp(tctx, inst);
|
||||
break;
|
||||
case TGSI_OPCODE_DP2A:
|
||||
if (!ctx->config->lower_DP2A)
|
||||
goto skip;
|
||||
transform_dotp(tctx, inst);
|
||||
break;
|
||||
case TGSI_OPCODE_FLR:
|
||||
if (!ctx->config->lower_FLR)
|
||||
goto skip;
|
||||
@@ -1657,7 +1634,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
|
||||
OPCS(DP3) ||
|
||||
OPCS(DPH) ||
|
||||
OPCS(DP2) ||
|
||||
OPCS(DP2A) ||
|
||||
OPCS(FLR) ||
|
||||
OPCS(CEIL) ||
|
||||
OPCS(TRUNC) ||
|
||||
@@ -1725,10 +1701,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
|
||||
newlen += DP2_GROW * OPCS(DP2);
|
||||
numtmp = MAX2(numtmp, DOTP_TMP);
|
||||
}
|
||||
if (OPCS(DP2A)) {
|
||||
newlen += DP2A_GROW * OPCS(DP2A);
|
||||
numtmp = MAX2(numtmp, DOTP_TMP);
|
||||
}
|
||||
if (OPCS(FLR)) {
|
||||
newlen += FLR_GROW * OPCS(FLR);
|
||||
numtmp = MAX2(numtmp, FLR_TMP);
|
||||
|
||||
@@ -67,7 +67,6 @@ struct tgsi_lowering_config
|
||||
unsigned lower_DP3:1;
|
||||
unsigned lower_DPH:1;
|
||||
unsigned lower_DP2:1;
|
||||
unsigned lower_DP2A:1;
|
||||
unsigned lower_FLR:1;
|
||||
unsigned lower_CEIL:1;
|
||||
unsigned lower_TRUNC:1;
|
||||
|
||||
@@ -56,7 +56,6 @@ OP13(MAD)
|
||||
OP12_TEX(TEX_LZ)
|
||||
OP13(LRP)
|
||||
OP11(SQRT)
|
||||
OP13(DP2A)
|
||||
OP11(FRC)
|
||||
OP12_TEX(TXF_LZ)
|
||||
OP11(FLR)
|
||||
|
||||
@@ -262,10 +262,6 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
|
||||
read_mask = write_mask & TGSI_WRITEMASK_XYZ ? TGSI_WRITEMASK_X : 0;
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_DP2A:
|
||||
read_mask = src_idx == 2 ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_XY;
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_DP2:
|
||||
read_mask = TGSI_WRITEMASK_XY;
|
||||
break;
|
||||
|
||||
@@ -285,19 +285,6 @@ Perform a * b + c with no intermediate rounding step.
|
||||
dst.w = src0.w \times src1.w + src2.w
|
||||
|
||||
|
||||
.. opcode:: DP2A - 2-component Dot Product And Add
|
||||
|
||||
.. math::
|
||||
|
||||
dst.x = src0.x \times src1.x + src0.y \times src1.y + src2.x
|
||||
|
||||
dst.y = src0.x \times src1.x + src0.y \times src1.y + src2.x
|
||||
|
||||
dst.z = src0.x \times src1.x + src0.y \times src1.y + src2.x
|
||||
|
||||
dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x
|
||||
|
||||
|
||||
.. opcode:: FRC - Fraction
|
||||
|
||||
.. math::
|
||||
|
||||
@@ -2344,7 +2344,6 @@ etna_compile_shader(struct etna_shader_variant *v)
|
||||
.lower_EXP = true,
|
||||
.lower_LOG = true,
|
||||
.lower_DP2 = true,
|
||||
.lower_DP2A = true,
|
||||
.lower_TRUNC = true,
|
||||
.lower_XPD = true
|
||||
};
|
||||
|
||||
@@ -51,7 +51,6 @@ static unsigned translate_opcode(unsigned opcode)
|
||||
case TGSI_OPCODE_SGE: return RC_OPCODE_SGE;
|
||||
case TGSI_OPCODE_MAD: return RC_OPCODE_MAD;
|
||||
case TGSI_OPCODE_LRP: return RC_OPCODE_LRP;
|
||||
/* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */
|
||||
case TGSI_OPCODE_FRC: return RC_OPCODE_FRC;
|
||||
case TGSI_OPCODE_FLR: return RC_OPCODE_FLR;
|
||||
case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND;
|
||||
|
||||
@@ -9089,7 +9089,7 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]
|
||||
[TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
|
||||
[TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
|
||||
[TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[21] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[22] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[23] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2},
|
||||
@@ -9287,7 +9287,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
|
||||
[TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
|
||||
[TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3},
|
||||
[TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
|
||||
[TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[21] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[22] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[23] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2},
|
||||
@@ -9510,7 +9510,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
|
||||
[TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
|
||||
[TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3},
|
||||
[TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
|
||||
[TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[21] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[22] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[23] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2},
|
||||
|
||||
@@ -44,7 +44,6 @@ translate_opcode(uint opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
case TGSI_OPCODE_ADD: return SVGA3DOP_ADD;
|
||||
case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD;
|
||||
case TGSI_OPCODE_DP3: return SVGA3DOP_DP3;
|
||||
case TGSI_OPCODE_DP4: return SVGA3DOP_DP4;
|
||||
case TGSI_OPCODE_FRC: return SVGA3DOP_FRC;
|
||||
|
||||
@@ -3577,55 +3577,6 @@ emit_cmp(struct svga_shader_emitter_v10 *emit,
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit code for TGSI_OPCODE_DP2A instruction.
|
||||
*/
|
||||
static boolean
|
||||
emit_dp2a(struct svga_shader_emitter_v10 *emit,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
/* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
|
||||
* dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
|
||||
* dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
|
||||
* dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
|
||||
* Translate into
|
||||
* MAD tmp.x, s0.y, s1.y, s2.x
|
||||
* MAD tmp.x, s0.x, s1.x, tmp.x
|
||||
* MOV dst.xyzw, tmp.xxxx
|
||||
*/
|
||||
unsigned tmp = get_temp_index(emit);
|
||||
struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
|
||||
struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
|
||||
|
||||
struct tgsi_full_src_register tmp_src_xxxx =
|
||||
scalar_src(&tmp_src, TGSI_SWIZZLE_X);
|
||||
struct tgsi_full_dst_register tmp_dst_x =
|
||||
writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
|
||||
|
||||
struct tgsi_full_src_register src0_xxxx =
|
||||
scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
|
||||
struct tgsi_full_src_register src0_yyyy =
|
||||
scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
|
||||
struct tgsi_full_src_register src1_xxxx =
|
||||
scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
|
||||
struct tgsi_full_src_register src1_yyyy =
|
||||
scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
|
||||
struct tgsi_full_src_register src2_xxxx =
|
||||
scalar_src(&inst->Src[2], TGSI_SWIZZLE_X);
|
||||
|
||||
emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy,
|
||||
&src1_yyyy, &src2_xxxx, FALSE);
|
||||
emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx,
|
||||
&src1_xxxx, &tmp_src_xxxx, FALSE);
|
||||
emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
|
||||
&tmp_src_xxxx, inst->Instruction.Saturate);
|
||||
|
||||
free_temp_indexes(emit);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit code for TGSI_OPCODE_DPH instruction.
|
||||
*/
|
||||
@@ -5761,8 +5712,6 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
|
||||
return emit_cmp(emit, inst);
|
||||
case TGSI_OPCODE_COS:
|
||||
return emit_sincos(emit, inst);
|
||||
case TGSI_OPCODE_DP2A:
|
||||
return emit_dp2a(emit, inst);
|
||||
case TGSI_OPCODE_DPH:
|
||||
return emit_dph(emit, inst);
|
||||
case TGSI_OPCODE_DST:
|
||||
|
||||
@@ -358,7 +358,7 @@ struct tgsi_property_data {
|
||||
#define TGSI_OPCODE_LRP 18
|
||||
#define TGSI_OPCODE_FMA 19
|
||||
#define TGSI_OPCODE_SQRT 20
|
||||
#define TGSI_OPCODE_DP2A 21
|
||||
/* gap */
|
||||
#define TGSI_OPCODE_F2U64 22
|
||||
#define TGSI_OPCODE_F2I64 23
|
||||
#define TGSI_OPCODE_FRC 24
|
||||
|
||||
@@ -228,7 +228,6 @@ emit_special_inst(struct st_translate *t, const struct instruction_desc *desc,
|
||||
src[2] = args[0];
|
||||
ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3, 0);
|
||||
} else if (!strcmp(desc->name, "DOT2_ADD")) {
|
||||
/* note: DP2A is not implemented in most pipe drivers */
|
||||
tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); /* re-purpose a1 */
|
||||
src[0] = args[0];
|
||||
src[1] = args[1];
|
||||
|
||||
Reference in New Issue
Block a user