From de42707101cdf2dbf3a1d9939e020d9c68f3d47b Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 1 Jun 2021 20:24:31 -0400 Subject: [PATCH] pan/bi: Lower loads with component > 0 We have no native way to swizzle out a nonzero component in a load, but we can simply load extra components and do the swizzle in shader instructions. This is inefficient, since it loads data to discard immediately, but it's required for conformance in some edge cases. Signed-off-by: Alyssa Rosenzweig Part-of: --- .../panfrost/ci/deqp-panfrost-g52-fails.txt | 4 +- src/panfrost/bifrost/bifrost_compile.c | 70 ++++++++++++++----- 2 files changed, 53 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/panfrost/ci/deqp-panfrost-g52-fails.txt b/src/gallium/drivers/panfrost/ci/deqp-panfrost-g52-fails.txt index a3ac4fe074f..18517e962fa 100644 --- a/src/gallium/drivers/panfrost/ci/deqp-panfrost-g52-fails.txt +++ b/src/gallium/drivers/panfrost/ci/deqp-panfrost-g52-fails.txt @@ -14,14 +14,12 @@ dEQP-GLES31.functional.draw_indirect.draw_elements_indirect.line_strip.instanced dEQP-GLES31.functional.draw_indirect.random.31,Fail dEQP-GLES31.functional.layout_binding.image.image2d.vertex_binding_max_array,Fail dEQP-GLES31.functional.layout_binding.image.image3d.vertex_binding_max_array,Fail +dEQP-GLES31.functional.separate_shader.random.22,Fail dEQP-GLES31.functional.separate_shader.random.23,Fail dEQP-GLES31.functional.separate_shader.random.35,Fail -dEQP-GLES31.functional.separate_shader.random.49,Fail dEQP-GLES31.functional.separate_shader.random.68,Fail -dEQP-GLES31.functional.separate_shader.random.6,Fail dEQP-GLES31.functional.separate_shader.random.79,Fail dEQP-GLES31.functional.separate_shader.random.80,Fail -dEQP-GLES31.functional.separate_shader.random.82,Fail dEQP-GLES31.functional.separate_shader.random.89,Fail dEQP-GLES31.functional.draw_base_vertex.draw_elements_base_vertex.builtin_variable.vertex_id,Fail dEQP-GLES31.functional.draw_base_vertex.draw_elements_instanced_base_vertex.builtin_variable.vertex_id,Fail diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index d67e685e3f8..a611b2727f9 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -183,21 +183,52 @@ bi_is_intr_immediate(nir_intrinsic_instr *instr, unsigned *immediate, unsigned m return (*immediate) < max; } +static void +bi_make_vec_to(bi_builder *b, bi_index final_dst, + bi_index *src, + unsigned *channel, + unsigned count, + unsigned bitsize); + +/* Bifrost's load instructions lack a component offset despite operating in + * terms of vec4 slots. Usually I/O vectorization avoids nonzero components, + * but they may be unavoidable with separate shaders in use. To solve this, we + * lower to a larger load and an explicit copy of the desired components. */ + +static void +bi_copy_component(bi_builder *b, nir_intrinsic_instr *instr, bi_index tmp) +{ + unsigned component = nir_intrinsic_component(instr); + + if (component == 0) + return; + + bi_index srcs[] = { tmp, tmp, tmp, tmp }; + unsigned channels[] = { component, component + 1, component + 2 }; + + bi_make_vec_to(b, + bi_dest_index(&instr->dest), + srcs, channels, instr->num_components, + nir_dest_bit_size(instr->dest)); +} + static void bi_emit_load_attr(bi_builder *b, nir_intrinsic_instr *instr) { nir_alu_type T = nir_intrinsic_dest_type(instr); enum bi_register_format regfmt = bi_reg_fmt_for_nir(T); nir_src *offset = nir_get_io_offset_src(instr); + unsigned component = nir_intrinsic_component(instr); + enum bi_vecsize vecsize = (instr->num_components + component - 1); unsigned imm_index = 0; unsigned base = nir_intrinsic_base(instr); bool constant = nir_src_is_const(*offset); bool immediate = bi_is_intr_immediate(instr, &imm_index, 16); + bi_index dest = (component == 0) ? bi_dest_index(&instr->dest) : bi_temp(b->shader); if (immediate) { - bi_ld_attr_imm_to(b, bi_dest_index(&instr->dest), - bi_register(61), bi_register(62), - regfmt, instr->num_components - 1, imm_index); + bi_ld_attr_imm_to(b, dest, bi_register(61), bi_register(62), + regfmt, vecsize, imm_index); } else { bi_index idx = bi_src_index(&instr->src[0]); @@ -206,10 +237,11 @@ bi_emit_load_attr(bi_builder *b, nir_intrinsic_instr *instr) else if (base != 0) idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false); - bi_ld_attr_to(b, bi_dest_index(&instr->dest), - bi_register(61), bi_register(62), - idx, regfmt, instr->num_components - 1); + bi_ld_attr_to(b, dest, bi_register(61), bi_register(62), + idx, regfmt, vecsize); } + + bi_copy_component(b, instr, dest); } static void @@ -218,10 +250,13 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr) enum bi_sample sample = BI_SAMPLE_CENTER; enum bi_update update = BI_UPDATE_STORE; enum bi_register_format regfmt = BI_REGISTER_FORMAT_AUTO; - enum bi_vecsize vecsize = instr->num_components - 1; bool smooth = instr->intrinsic == nir_intrinsic_load_interpolated_input; bi_index src0 = bi_null(); + unsigned component = nir_intrinsic_component(instr); + enum bi_vecsize vecsize = (instr->num_components + component - 1); + bi_index dest = (component == 0) ? bi_dest_index(&instr->dest) : bi_temp(b->shader); + if (smooth) { nir_intrinsic_instr *parent = nir_src_as_intrinsic(instr->src[0]); assert(parent); @@ -243,12 +278,11 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr) bool immediate = bi_is_intr_immediate(instr, &imm_index, 20); if (immediate && smooth) { - bi_ld_var_imm_to(b, bi_dest_index(&instr->dest), - src0, regfmt, sample, update, vecsize, - imm_index); + bi_ld_var_imm_to(b, dest, src0, regfmt, sample, update, + vecsize, imm_index); } else if (immediate && !smooth) { - bi_ld_var_flat_imm_to(b, bi_dest_index(&instr->dest), - BI_FUNCTION_NONE, regfmt, vecsize, imm_index); + bi_ld_var_flat_imm_to(b, dest, BI_FUNCTION_NONE, regfmt, + vecsize, imm_index); } else { bi_index idx = bi_src_index(offset); unsigned base = nir_intrinsic_base(instr); @@ -257,15 +291,15 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr) idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false); if (smooth) { - bi_ld_var_to(b, bi_dest_index(&instr->dest), - src0, idx, regfmt, sample, update, - vecsize); + bi_ld_var_to(b, dest, src0, idx, regfmt, sample, + update, vecsize); } else { - bi_ld_var_flat_to(b, bi_dest_index(&instr->dest), - idx, BI_FUNCTION_NONE, regfmt, - vecsize); + bi_ld_var_flat_to(b, dest, idx, BI_FUNCTION_NONE, + regfmt, vecsize); } } + + bi_copy_component(b, instr, dest); } static void