From 696f37f5c3f4527cc204906f272bc33c5eae8ea0 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Wed, 19 Jul 2023 17:37:53 +0200 Subject: [PATCH] freedreno/regs: Rename SP_FS_CTRL_REG0.DIFF_FINE into LODPIXMASK That's the "real" name of the field. It enables ALL helper invocations in a quad, which is necessary for fine derivatives and quad subgroup ops. While PIXLODENABLE by itself enables only 3 out 4 fragments in a quad. Cc: mesa-stable Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/registers/adreno/a6xx.xml | 15 +++++++++++++-- src/freedreno/vulkan/tu_pipeline.cc | 2 +- src/gallium/drivers/freedreno/a6xx/fd6_program.cc | 2 +- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/freedreno/registers/adreno/a6xx.xml b/src/freedreno/registers/adreno/a6xx.xml index e65528da4bd..61bc4508fea 100644 --- a/src/freedreno/registers/adreno/a6xx.xml +++ b/src/freedreno/registers/adreno/a6xx.xml @@ -3498,11 +3498,22 @@ to upconvert to 32b float internally? - + + + Enable ALL helper invocations in a quad. Necessary for + fine derivatives and quad subgroup ops. + + - + + + Enable helper invocations. Enables 3 out of 4 fragments, + because the coarse derivatives only use half of the quad + and so one pixel's value is always unused. + + diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index 1b24997e151..59d5ca5b8f2 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -496,7 +496,7 @@ tu6_emit_xs(struct tu_cs *cs, .branchstack = ir3_shader_branchstack_hw(xs), .threadsize = thrsz, .varying = xs->total_in != 0, - .diff_fine = xs->need_fine_derivatives, + .lodpixmask = xs->need_fine_derivatives, /* unknown bit, seems unnecessary */ .unk24 = true, .pixlodenable = xs->need_pixlod, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.cc b/src/gallium/drivers/freedreno/a6xx/fd6_program.cc index 818e4593331..cbb987c8edf 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.cc @@ -933,7 +933,7 @@ setup_stateobj(struct fd_screen *screen, struct fd_ringbuffer *ring, ring, A6XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | COND(enable_varyings, A6XX_SP_FS_CTRL_REG0_VARYING) | 0x1000000 | - COND(fs->need_fine_derivatives, A6XX_SP_FS_CTRL_REG0_DIFF_FINE) | + COND(fs->need_fine_derivatives, A6XX_SP_FS_CTRL_REG0_LODPIXMASK) | A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fs->info.max_reg + 1) | A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fs->info.max_half_reg + 1) | COND(fs->mergedregs, A6XX_SP_FS_CTRL_REG0_MERGEDREGS) |