From bbe5136658f048b6403f31f0d01dff856dd3ff81 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Fri, 20 May 2022 11:04:11 -0400 Subject: [PATCH] zink: fix 32bit bo rewriting this was correct for 64bit loads and manually converted 32bit loads (e.g., bindless), but it was broken for the case where 64bit was not supported, as the offset wasn't being correctly adjusted break out the offset division to hopefully make this a little clearer Fixes: 150d6ee97e3 ("zink: move all 64-32bit shader load rewriting to nir pass") Reviewed-by: Adam Jackson Part-of: --- .../drivers/zink/ci/zink-tu-a630-fails.txt | 153 ------------------ src/gallium/drivers/zink/zink_compiler.c | 29 ++-- 2 files changed, 19 insertions(+), 163 deletions(-) diff --git a/src/gallium/drivers/zink/ci/zink-tu-a630-fails.txt b/src/gallium/drivers/zink/ci/zink-tu-a630-fails.txt index 0b90047a3fb..1c4ceaba8be 100644 --- a/src/gallium/drivers/zink/ci/zink-tu-a630-fails.txt +++ b/src/gallium/drivers/zink/ci/zink-tu-a630-fails.txt @@ -19,160 +19,13 @@ GTF-GL46.gtf40.GL3Tests.transform_feedback3.transform_feedback3_streams_overflow GTF-GL46.gtf40.GL3Tests.transform_feedback3.transform_feedback3_streams_queried,Fail KHR-GL46.buffer_storage.map_persistent_draw,Fail KHR-GL46.compute_shader.fp64-case1,Crash -KHR-GL46.compute_shader.fp64-case2,Fail KHR-GL46.compute_shader.fp64-case3,Crash KHR-GL46.copy_image.functional,Fail KHR-GL46.direct_state_access.buffers_functional,Fail KHR-GL46.geometry_shader.api.max_atomic_counters,Fail -KHR-GL46.gpu_shader_fp64.builtin.abs_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.abs_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.abs_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.ceil_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.ceil_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.ceil_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.clamp_against_scalar_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.clamp_against_scalar_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.clamp_against_scalar_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.clamp_double,Fail -KHR-GL46.gpu_shader_fp64.builtin.clamp_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.clamp_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.clamp_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.cross_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.determinant_dmat2,Fail -KHR-GL46.gpu_shader_fp64.builtin.determinant_dmat3,Fail -KHR-GL46.gpu_shader_fp64.builtin.distance_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.distance_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.distance_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.dot_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.dot_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.dot_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.equal_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.equal_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.equal_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.faceforward_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.faceforward_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.faceforward_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.floor_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.floor_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.floor_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.fma_double,Fail -KHR-GL46.gpu_shader_fp64.builtin.fma_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.fma_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.fma_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.fract_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.fract_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.fract_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.frexp_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.frexp_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.frexp_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.greaterthan_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.greaterthan_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.greaterthan_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.greaterthanequal_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.greaterthanequal_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.greaterthanequal_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.inverse_dmat2,Fail -KHR-GL46.gpu_shader_fp64.builtin.inverse_dmat3,Fail -KHR-GL46.gpu_shader_fp64.builtin.inversesqrt_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.inversesqrt_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.inversesqrt_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.ldexp_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.ldexp_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.ldexp_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.length_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.length_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.length_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.lessthan_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.lessthan_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.lessthan_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.lessthanequal_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.lessthanequal_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.lessthanequal_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.matrixcompmult_dmat2,Fail -KHR-GL46.gpu_shader_fp64.builtin.matrixcompmult_dmat2x3,Fail -KHR-GL46.gpu_shader_fp64.builtin.matrixcompmult_dmat2x4,Fail -KHR-GL46.gpu_shader_fp64.builtin.matrixcompmult_dmat3,Fail -KHR-GL46.gpu_shader_fp64.builtin.matrixcompmult_dmat3x2,Fail -KHR-GL46.gpu_shader_fp64.builtin.matrixcompmult_dmat3x4,Fail -KHR-GL46.gpu_shader_fp64.builtin.matrixcompmult_dmat4,Fail -KHR-GL46.gpu_shader_fp64.builtin.matrixcompmult_dmat4x2,Fail -KHR-GL46.gpu_shader_fp64.builtin.matrixcompmult_dmat4x3,Fail -KHR-GL46.gpu_shader_fp64.builtin.max_against_scalar_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.max_against_scalar_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.max_against_scalar_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.max_double,Fail -KHR-GL46.gpu_shader_fp64.builtin.max_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.max_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.max_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.min_against_scalar_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.min_against_scalar_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.min_against_scalar_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.min_double,Fail -KHR-GL46.gpu_shader_fp64.builtin.min_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.min_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.min_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.mix_double,Fail -KHR-GL46.gpu_shader_fp64.builtin.mix_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.mix_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.mix_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.mod_against_scalar_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.mod_against_scalar_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.mod_against_scalar_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.mod_double,Fail KHR-GL46.gpu_shader_fp64.builtin.mod_dvec2,Fail KHR-GL46.gpu_shader_fp64.builtin.mod_dvec3,Fail KHR-GL46.gpu_shader_fp64.builtin.mod_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.modf_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.modf_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.modf_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.normalize_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.normalize_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.normalize_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.notequal_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.notequal_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.notequal_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.outerproduct_dmat2,Fail -KHR-GL46.gpu_shader_fp64.builtin.outerproduct_dmat2x3,Fail -KHR-GL46.gpu_shader_fp64.builtin.outerproduct_dmat2x4,Fail -KHR-GL46.gpu_shader_fp64.builtin.outerproduct_dmat3,Fail -KHR-GL46.gpu_shader_fp64.builtin.outerproduct_dmat3x2,Fail -KHR-GL46.gpu_shader_fp64.builtin.outerproduct_dmat3x4,Fail -KHR-GL46.gpu_shader_fp64.builtin.outerproduct_dmat4,Fail -KHR-GL46.gpu_shader_fp64.builtin.outerproduct_dmat4x2,Fail -KHR-GL46.gpu_shader_fp64.builtin.outerproduct_dmat4x3,Fail -KHR-GL46.gpu_shader_fp64.builtin.reflect_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.reflect_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.reflect_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.refract_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.refract_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.refract_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.round_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.round_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.round_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.roundeven_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.roundeven_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.roundeven_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.smoothstep_against_scalar_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.smoothstep_against_scalar_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.smoothstep_against_scalar_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.smoothstep_double,Fail -KHR-GL46.gpu_shader_fp64.builtin.smoothstep_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.smoothstep_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.smoothstep_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.sqrt_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.sqrt_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.sqrt_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.step_against_scalar_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.step_against_scalar_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.step_against_scalar_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.step_double,Fail -KHR-GL46.gpu_shader_fp64.builtin.step_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.step_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.step_dvec4,Fail -KHR-GL46.gpu_shader_fp64.builtin.trunc_dvec2,Fail -KHR-GL46.gpu_shader_fp64.builtin.trunc_dvec3,Fail -KHR-GL46.gpu_shader_fp64.builtin.trunc_dvec4,Fail -KHR-GL46.gpu_shader_fp64.fp64.max_uniform_components,Fail KHR-GL46.limits.max_fragment_input_components,Fail KHR-GL46.packed_pixels.pbo_rectangle.r11f_g11f_b10f,Fail KHR-GL46.packed_pixels.pbo_rectangle.r16,Fail @@ -235,14 +88,9 @@ KHR-GL46.texture_view.view_classes,Fail KHR-GL46.vertex_attrib_64bit.vao,Fail KHR-Single-GL46.arrays_of_arrays_gl.AtomicUsage,Fail KHR-Single-GL46.arrays_of_arrays_gl.SubroutineFunctionCalls2,Crash -KHR-Single-GL46.enhanced_layouts.ssb_member_offset_and_align,Fail -KHR-Single-GL46.enhanced_layouts.uniform_block_member_offset_and_align,Fail KHR-Single-GL46.enhanced_layouts.varying_structure_locations,Crash KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_block_member,Fail KHR-Single-GL46.enhanced_layouts.xfb_capture_struct,Fail -KHR-Single-GL46.enhanced_layouts.xfb_global_buffer,Fail -KHR-Single-GL46.enhanced_layouts.xfb_override_qualifiers_with_api,Fail -KHR-Single-GL46.enhanced_layouts.xfb_stride,Fail KHR-Single-GL46.enhanced_layouts.xfb_struct_explicit_location,Crash KHR-Single-GL46.enhanced_layouts.xfb_vertex_streams,Fail dEQP-GLES3.functional.occlusion_query.depth_clear,Fail @@ -338,7 +186,6 @@ dEQP-GLES31.functional.texture.border_clamp.formats.depth24_stencil8_sample_dept dEQP-GLES31.functional.texture.border_clamp.formats.depth24_stencil8_sample_depth.nearest_size_pot,Fail dEQP-GLES31.functional.texture.border_clamp.formats.depth24_stencil8_sample_stencil.nearest_size_npot,Fail dEQP-GLES31.functional.texture.border_clamp.formats.depth24_stencil8_sample_stencil.nearest_size_pot,Fail -dEQP-GLES31.functional.texture.border_clamp.formats.depth32f_stencil8_sample_depth.nearest_size_pot,Fail dEQP-GLES31.functional.texture.border_clamp.formats.depth32f_stencil8_sample_stencil.nearest_size_npot,Fail dEQP-GLES31.functional.texture.border_clamp.formats.depth32f_stencil8_sample_stencil.nearest_size_pot,Fail dEQP-GLES31.functional.texture.border_clamp.formats.depth_component24.gather_size_npot,Fail diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 403e801dca0..ff0ed5ba851 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -948,10 +948,11 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) nir_src_as_uint(intr->src[0]) == 0 && nir_dest_bit_size(intr->dest) == 64 && nir_intrinsic_align_offset(intr) % 8 != 0; - nir_instr_rewrite_src_ssa(instr, &intr->src[1], nir_udiv_imm(b, intr->src[1].ssa, - (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8)); + force_2x32 |= nir_dest_bit_size(intr->dest) == 64 && !has_int64; + nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8); + nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset); /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ - if (force_2x32 || (nir_dest_bit_size(intr->dest) == 64 && !has_int64)) { + if (force_2x32) { /* this is always scalarized */ assert(intr->dest.ssa.num_components == 1); /* rewrite as 2x32 */ @@ -972,9 +973,11 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) } case nir_intrinsic_load_shared: b->cursor = nir_before_instr(instr); - nir_instr_rewrite_src_ssa(instr, &intr->src[0], nir_udiv_imm(b, intr->src[0].ssa, nir_dest_bit_size(intr->dest) / 8)); + bool force_2x32 = nir_dest_bit_size(intr->dest) == 64 && !has_int64; + nir_ssa_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8); + nir_instr_rewrite_src_ssa(instr, &intr->src[0], offset); /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ - if (nir_dest_bit_size(intr->dest) == 64 && !has_int64) { + if (force_2x32) { /* this is always scalarized */ assert(intr->dest.ssa.num_components == 1); /* rewrite as 2x32 */ @@ -988,11 +991,13 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) return true; } break; - case nir_intrinsic_store_ssbo: + case nir_intrinsic_store_ssbo: { b->cursor = nir_before_instr(instr); - nir_instr_rewrite_src_ssa(instr, &intr->src[2], nir_udiv_imm(b, intr->src[2].ssa, nir_src_bit_size(intr->src[0]) / 8)); + bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64; + nir_ssa_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8); + nir_instr_rewrite_src_ssa(instr, &intr->src[2], offset); /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ - if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) { + if (force_2x32) { /* this is always scalarized */ assert(intr->src[0].ssa->num_components == 1); nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)}; @@ -1001,9 +1006,12 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) nir_instr_remove(instr); } return true; - case nir_intrinsic_store_shared: + } + case nir_intrinsic_store_shared: { b->cursor = nir_before_instr(instr); - nir_instr_rewrite_src_ssa(instr, &intr->src[1], nir_udiv_imm(b, intr->src[1].ssa, nir_src_bit_size(intr->src[0]) / 8)); + bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64; + nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8); + nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset); /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) { /* this is always scalarized */ @@ -1014,6 +1022,7 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) nir_instr_remove(instr); } return true; + } default: break; }