nouveau/nir: Split fewer 64-bit loads
Also adjust the lowering pass to handle wide SSBO loads that we now emit for the nir case. This improves generated code quality since memoryopt can't merge SSBO loads that end up predicated on a bounds check. This also happens to fix a few test cases, only because the simpler generated IR is less likely to trigger other compiler bugs. Eg on kepler with NV50_PROG_USE_NIR=1, this fixes arb_gpu_shader_fp64-fs-non-uniform-control-flow-ubo Reviewed-by: Karol Herbst <kherbst@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16063>
This commit is contained in:
@@ -30,6 +30,7 @@
|
||||
#include "codegen/nv50_ir.h"
|
||||
#include "codegen/nv50_ir_from_common.h"
|
||||
#include "codegen/nv50_ir_lowering_helper.h"
|
||||
#include "codegen/nv50_ir_target.h"
|
||||
#include "codegen/nv50_ir_util.h"
|
||||
#include "tgsi/tgsi_from_mesa.h"
|
||||
|
||||
@@ -1238,7 +1239,7 @@ Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,
|
||||
unsigned int tySize = typeSizeof(ty);
|
||||
|
||||
if (tySize == 8 &&
|
||||
(file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {
|
||||
(indirect0 || !prog->getTarget()->isAccessSupported(file, TYPE_U64))) {
|
||||
Value *lo = getSSA();
|
||||
Value *hi = getSSA();
|
||||
|
||||
|
||||
@@ -2897,12 +2897,13 @@ NVC0LoweringPass::handleLDST(Instruction *i)
|
||||
i->setPredicate(CC_NOT_P, pred);
|
||||
if (i->defExists(0)) {
|
||||
Value *zero, *dst = i->getDef(0);
|
||||
i->setDef(0, bld.getSSA());
|
||||
uint8_t size = dst->reg.size;
|
||||
i->setDef(0, bld.getSSA(size));
|
||||
|
||||
bld.setPosition(i, true);
|
||||
bld.mkMov((zero = bld.getSSA()), bld.mkImm(0))
|
||||
bld.mkMov((zero = bld.getSSA(size)), bld.mkImm(0), i->dType)
|
||||
->setPredicate(CC_P, pred);
|
||||
bld.mkOp2(OP_UNION, TYPE_U32, dst, i->getDef(0), zero);
|
||||
bld.mkOp2(OP_UNION, i->dType, dst, i->getDef(0), zero);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user