llvmpipe: improve polygon-offset precision

This performs the polygon offset addition after interpolation, which prevents floating-point cancellation issues completely. This does mean that we have to perform a single floating-point addition more per fragment than before, unless we also want to spend a bit in the fragment-shader variant key to avoid this. Reviewed-by: Roland Scheidegger <sroland@vmware.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12442>
2021-08-23 16:07:47 +02:00
parent 1fa61483de
commit 4d6e18b6cb
5 changed files with 40 additions and 47 deletions
@@ -10,7 +10,6 @@ dEQP-EGL.functional.robustness.reset_context.shaders.infinite_loop.sync_status.v
 dEQP-EGL.functional.robustness.reset_context.shaders.infinite_loop.sync_status.vertex_and_fragment,Fail
 dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_center,Fail
 dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_corner,Fail
-dEQP-GLES2.functional.polygon_offset.default_displacement_with_units,Fail
 dEQP-GLES3.functional.clipping.line.wide_line_clip_viewport_center,Fail
 dEQP-GLES3.functional.clipping.line.wide_line_clip_viewport_corner,Fail
 dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_mag,Fail
@@ -23,9 +22,6 @@ dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_dst_x,Fail
 dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_x,Fail
 dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_y,Fail
 dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_x,Fail
-dEQP-GLES3.functional.polygon_offset.default_displacement_with_units,Fail
-dEQP-GLES3.functional.polygon_offset.fixed24_displacement_with_units,Fail
-dEQP-GLES3.functional.polygon_offset.float32_displacement_with_units,Fail
 dEQP-GLES31.functional.primitive_bounding_box.wide_points.global_state.vertex_tessellation_fragment.default_framebuffer_bbox_equal,Fail
 dEQP-GLES31.functional.primitive_bounding_box.wide_points.global_state.vertex_tessellation_fragment.default_framebuffer_bbox_larger,Fail
 dEQP-GLES31.functional.primitive_bounding_box.wide_points.global_state.vertex_tessellation_fragment.fbo_bbox_equal,Fail
@@ -413,15 +413,25 @@ attribs_update_simple(struct lp_build_interp_soa_context *bld,
               break;
            }

-            if ((attrib == 0) && (chan == 2) && !bld->depth_clamp){
-               /* OpenGL requires clamping z to 0..1 range after polgon offset
-                * is applied if depth-clamping isn't enabled.
-                *
-                * This also fixes the problem that depth values can exceed 1.0,
-                * due to imprecision in the calculations.
-                */
-               a = lp_build_clamp(coeff_bld, a, coeff_bld->zero, coeff_bld->one);
+            if ((attrib == 0) && (chan == 2)) {
+               /* add polygon-offset value, stored in the X component of a0 */
+               LLVMValueRef offset =
+                  lp_build_extract_broadcast(gallivm, setup_bld->type,
+                                             coeff_bld->type, bld->a0aos[0],
+                                             lp_build_const_int32(gallivm, 0));
+               a = LLVMBuildFAdd(builder, a, offset, "");
+
+               if (!bld->depth_clamp){
+                  /* OpenGL requires clamping z to 0..1 range after polgon offset
+                  * is applied if depth-clamping isn't enabled.
+                  *
+                  * This also fixes the problem that depth values can exceed 1.0,
+                  * due to imprecision in the calculations.
+                  */
+                  a = lp_build_clamp(coeff_bld, a, coeff_bld->zero, coeff_bld->one);
+               }
            }
+
            bld->attribs[attrib][chan] = a;
         }
      }
@@ -201,7 +201,7 @@ lp_twoside(struct gallivm_state *gallivm,

 }

-static void
+static LLVMValueRef
 lp_do_offset_tri(struct gallivm_state *gallivm,
                 struct lp_setup_args *args,
                 const struct lp_setup_variant_key *key,
@@ -215,9 +215,7 @@ lp_do_offset_tri(struct gallivm_state *gallivm,
   struct lp_build_context int_scalar_bld;
   struct lp_build_context *bld = &args->bld;
   LLVMValueRef zoffset, mult;
-   LLVMValueRef z0_new, z1_new, z2_new;
   LLVMValueRef dzdxdzdy, dzdx, dzdy, dzxyz20, dyzzx01, dyzzx01_dzxyz20, dzx01_dyz20;
-   LLVMValueRef z0z1, z0z1z2;
   LLVMValueRef max, max_value, res12;
   LLVMValueRef shuffles[4];
   LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
@@ -323,29 +321,7 @@ lp_do_offset_tri(struct gallivm_state *gallivm,
                             zoffset);
   }

-   /* yuck */
-   shuffles[0] = twoi;
-   shuffles[1] = lp_build_const_int32(gallivm, 6);
-   shuffles[2] = LLVMGetUndef(shuf_type);
-   shuffles[3] = LLVMGetUndef(shuf_type);
-   z0z1 = LLVMBuildShuffleVector(b, attribv[0], attribv[1], LLVMConstVector(shuffles, 4), "");
-   shuffles[0] = zeroi;
-   shuffles[1] = onei;
-   shuffles[2] = lp_build_const_int32(gallivm, 6);
-   shuffles[3] = LLVMGetUndef(shuf_type);
-   z0z1z2 = LLVMBuildShuffleVector(b, z0z1, attribv[2], LLVMConstVector(shuffles, 4), "");
-   zoffset = lp_build_broadcast_scalar(bld, zoffset);
-
-   z0z1z2 = LLVMBuildFAdd(b, z0z1z2, zoffset, "");
-
-   /* insert into args->a0.z, a1.z, a2.z:
-    */
-   z0_new = LLVMBuildExtractElement(b, z0z1z2, zeroi, "");
-   z1_new = LLVMBuildExtractElement(b, z0z1z2, onei, "");
-   z2_new = LLVMBuildExtractElement(b, z0z1z2, twoi, "");
-   attribv[0] = LLVMBuildInsertElement(b, attribv[0], z0_new, twoi, "");
-   attribv[1] = LLVMBuildInsertElement(b, attribv[1], z1_new, twoi, "");
-   attribv[2] = LLVMBuildInsertElement(b, attribv[2], z2_new, twoi, "");
+   return zoffset;
 }

 static void
@@ -653,6 +629,7 @@ init_args(struct gallivm_state *gallivm,
   LLVMValueRef e, f, ef, ooa;
   LLVMValueRef shuffles[4], shuf10;
   LLVMValueRef attr_pos[3];
+   LLVMValueRef polygon_offset;
   struct lp_type typef4 = lp_type_float_vec(32, 128);
   struct lp_build_context bld;

@@ -693,7 +670,9 @@ init_args(struct gallivm_state *gallivm,

   /* tri offset calc shares a lot of arithmetic, do it here */
   if (key->pgon_offset_scale != 0.0f || key->pgon_offset_units != 0.0f) {
-      lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos);
+      polygon_offset = lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos);
+   } else {
+      polygon_offset = lp_build_const_float(gallivm, 0.0f);
   }

   dxy20 = LLVMBuildFMul(b, dxy20, ooa, "");
@@ -708,7 +687,22 @@ init_args(struct gallivm_state *gallivm,
   args->x0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, zeroi);
   args->y0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, onei);

-   emit_linear_coef(gallivm, args, 0, attr_pos);
+   LLVMValueRef coeffs[3];
+   calc_coef4(gallivm, args,
+              attr_pos[0], attr_pos[1], attr_pos[2],
+              coeffs);
+
+   /* This is a bit sneaky:
+    * Because we observe that the X component of A0 is otherwise unused,
+    * we can overwrite it with the computed polygon-offset value, to make
+    * sure it's available in the fragment shader without having to change
+    * the interface (which is error-prone).
+    */
+   coeffs[0] = LLVMBuildInsertElement(b, coeffs[0], polygon_offset,
+                                      lp_build_const_int32(gallivm, 0), "");
+
+   store_coef(gallivm, args, 0,
+              coeffs[0], coeffs[1], coeffs[2]);
 }

 /**
@@ -3,7 +3,6 @@ dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_corner,Fail
 dEQP-GLES2.functional.clipping.point.wide_point_clip,Fail
 dEQP-GLES2.functional.clipping.point.wide_point_clip_viewport_center,Fail
 dEQP-GLES2.functional.clipping.point.wide_point_clip_viewport_corner,Fail
-dEQP-GLES2.functional.polygon_offset.default_displacement_with_units,Fail
 dEQP-GLES3.functional.clipping.line.wide_line_clip_viewport_center,Fail
 dEQP-GLES3.functional.clipping.line.wide_line_clip_viewport_corner,Fail
 dEQP-GLES3.functional.clipping.point.wide_point_clip,Fail
@@ -19,9 +18,6 @@ dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_dst_x,Fail
 dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_x,Fail
 dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_dst_y,Fail
 dEQP-GLES3.functional.fbo.blit.rect.nearest_consistency_min_reverse_src_x,Fail
-dEQP-GLES3.functional.polygon_offset.default_displacement_with_units,Fail
-dEQP-GLES3.functional.polygon_offset.fixed24_displacement_with_units,Fail
-dEQP-GLES3.functional.polygon_offset.float32_displacement_with_units,Fail
 dEQP-GLES31.functional.draw_buffers_indexed.random.max_required_draw_buffers.4,Fail
 dEQP-GLES31.functional.draw_buffers_indexed.random.max_implementation_draw_buffers.8,Fail
 dEQP-GLES31.functional.image_load_store.buffer.image_size.writeonly_12,Fail
@@ -1,9 +1,6 @@
 dEQP-GLES2.functional.clipping.point.wide_point_clip,Fail
 dEQP-GLES2.functional.clipping.point.wide_point_clip_viewport_center,Fail
 dEQP-GLES2.functional.clipping.point.wide_point_clip_viewport_corner,Fail
-dEQP-GLES2.functional.polygon_offset.default_displacement_with_units,Fail
-dEQP-GLES3.functional.polygon_offset.default_displacement_with_units,Fail
-dEQP-GLES3.functional.polygon_offset.float32_displacement_with_units,Fail
 dEQP-GLES3.functional.clipping.line.wide_line_clip_viewport_center,Fail
 dEQP-GLES3.functional.clipping.line.wide_line_clip_viewport_corner,Fail
 dEQP-GLES3.functional.clipping.point.wide_point_clip,Fail