gallicm: Newton-Raphson step to improve precision.
Disabled as it doesn't make VS/PSPrecision DCT happy, and it would unnecessarily slow some cases where it is not needed.
This commit is contained in:
@@ -1177,9 +1177,34 @@ lp_build_rcp(struct lp_build_context *bld,
|
||||
if(LLVMIsConstant(a))
|
||||
return LLVMConstFDiv(bld->one, a);
|
||||
|
||||
if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
|
||||
/* FIXME: improve precision */
|
||||
if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
|
||||
/*
|
||||
* XXX: Added precision is not always necessary, so only enable this
|
||||
* when we have a better system in place to track minimum precision.
|
||||
*/
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* Do one Newton-Raphson step to improve precision:
|
||||
*
|
||||
* x1 = (2 - a * rcp(a)) * rcp(a)
|
||||
*/
|
||||
|
||||
LLVMValueRef two = lp_build_const_vec(bld->type, 2.0);
|
||||
LLVMValueRef rcp_a;
|
||||
LLVMValueRef res;
|
||||
|
||||
rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
|
||||
|
||||
res = LLVMBuildMul(bld->builder, a, rcp_a, "");
|
||||
res = LLVMBuildSub(bld->builder, two, res, "");
|
||||
res = LLVMBuildMul(bld->builder, res, rcp_a, "");
|
||||
|
||||
return rcp_a;
|
||||
#else
|
||||
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
|
||||
#endif
|
||||
}
|
||||
|
||||
return LLVMBuildFDiv(bld->builder, bld->one, a, "");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user