diff --git a/src/gallium/drivers/panfrost/ci/deqp-panfrost-g52-fails.txt b/src/gallium/drivers/panfrost/ci/deqp-panfrost-g52-fails.txt index a3ac4fe074f..18517e962fa 100644 --- a/src/gallium/drivers/panfrost/ci/deqp-panfrost-g52-fails.txt +++ b/src/gallium/drivers/panfrost/ci/deqp-panfrost-g52-fails.txt @@ -14,14 +14,12 @@ dEQP-GLES31.functional.draw_indirect.draw_elements_indirect.line_strip.instanced dEQP-GLES31.functional.draw_indirect.random.31,Fail dEQP-GLES31.functional.layout_binding.image.image2d.vertex_binding_max_array,Fail dEQP-GLES31.functional.layout_binding.image.image3d.vertex_binding_max_array,Fail +dEQP-GLES31.functional.separate_shader.random.22,Fail dEQP-GLES31.functional.separate_shader.random.23,Fail dEQP-GLES31.functional.separate_shader.random.35,Fail -dEQP-GLES31.functional.separate_shader.random.49,Fail dEQP-GLES31.functional.separate_shader.random.68,Fail -dEQP-GLES31.functional.separate_shader.random.6,Fail dEQP-GLES31.functional.separate_shader.random.79,Fail dEQP-GLES31.functional.separate_shader.random.80,Fail -dEQP-GLES31.functional.separate_shader.random.82,Fail dEQP-GLES31.functional.separate_shader.random.89,Fail dEQP-GLES31.functional.draw_base_vertex.draw_elements_base_vertex.builtin_variable.vertex_id,Fail dEQP-GLES31.functional.draw_base_vertex.draw_elements_instanced_base_vertex.builtin_variable.vertex_id,Fail diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index d67e685e3f8..a611b2727f9 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -183,21 +183,52 @@ bi_is_intr_immediate(nir_intrinsic_instr *instr, unsigned *immediate, unsigned m return (*immediate) < max; } +static void +bi_make_vec_to(bi_builder *b, bi_index final_dst, + bi_index *src, + unsigned *channel, + unsigned count, + unsigned bitsize); + +/* Bifrost's load instructions lack a component offset despite operating in + * terms of vec4 slots. Usually I/O vectorization avoids nonzero components, + * but they may be unavoidable with separate shaders in use. To solve this, we + * lower to a larger load and an explicit copy of the desired components. */ + +static void +bi_copy_component(bi_builder *b, nir_intrinsic_instr *instr, bi_index tmp) +{ + unsigned component = nir_intrinsic_component(instr); + + if (component == 0) + return; + + bi_index srcs[] = { tmp, tmp, tmp, tmp }; + unsigned channels[] = { component, component + 1, component + 2 }; + + bi_make_vec_to(b, + bi_dest_index(&instr->dest), + srcs, channels, instr->num_components, + nir_dest_bit_size(instr->dest)); +} + static void bi_emit_load_attr(bi_builder *b, nir_intrinsic_instr *instr) { nir_alu_type T = nir_intrinsic_dest_type(instr); enum bi_register_format regfmt = bi_reg_fmt_for_nir(T); nir_src *offset = nir_get_io_offset_src(instr); + unsigned component = nir_intrinsic_component(instr); + enum bi_vecsize vecsize = (instr->num_components + component - 1); unsigned imm_index = 0; unsigned base = nir_intrinsic_base(instr); bool constant = nir_src_is_const(*offset); bool immediate = bi_is_intr_immediate(instr, &imm_index, 16); + bi_index dest = (component == 0) ? bi_dest_index(&instr->dest) : bi_temp(b->shader); if (immediate) { - bi_ld_attr_imm_to(b, bi_dest_index(&instr->dest), - bi_register(61), bi_register(62), - regfmt, instr->num_components - 1, imm_index); + bi_ld_attr_imm_to(b, dest, bi_register(61), bi_register(62), + regfmt, vecsize, imm_index); } else { bi_index idx = bi_src_index(&instr->src[0]); @@ -206,10 +237,11 @@ bi_emit_load_attr(bi_builder *b, nir_intrinsic_instr *instr) else if (base != 0) idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false); - bi_ld_attr_to(b, bi_dest_index(&instr->dest), - bi_register(61), bi_register(62), - idx, regfmt, instr->num_components - 1); + bi_ld_attr_to(b, dest, bi_register(61), bi_register(62), + idx, regfmt, vecsize); } + + bi_copy_component(b, instr, dest); } static void @@ -218,10 +250,13 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr) enum bi_sample sample = BI_SAMPLE_CENTER; enum bi_update update = BI_UPDATE_STORE; enum bi_register_format regfmt = BI_REGISTER_FORMAT_AUTO; - enum bi_vecsize vecsize = instr->num_components - 1; bool smooth = instr->intrinsic == nir_intrinsic_load_interpolated_input; bi_index src0 = bi_null(); + unsigned component = nir_intrinsic_component(instr); + enum bi_vecsize vecsize = (instr->num_components + component - 1); + bi_index dest = (component == 0) ? bi_dest_index(&instr->dest) : bi_temp(b->shader); + if (smooth) { nir_intrinsic_instr *parent = nir_src_as_intrinsic(instr->src[0]); assert(parent); @@ -243,12 +278,11 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr) bool immediate = bi_is_intr_immediate(instr, &imm_index, 20); if (immediate && smooth) { - bi_ld_var_imm_to(b, bi_dest_index(&instr->dest), - src0, regfmt, sample, update, vecsize, - imm_index); + bi_ld_var_imm_to(b, dest, src0, regfmt, sample, update, + vecsize, imm_index); } else if (immediate && !smooth) { - bi_ld_var_flat_imm_to(b, bi_dest_index(&instr->dest), - BI_FUNCTION_NONE, regfmt, vecsize, imm_index); + bi_ld_var_flat_imm_to(b, dest, BI_FUNCTION_NONE, regfmt, + vecsize, imm_index); } else { bi_index idx = bi_src_index(offset); unsigned base = nir_intrinsic_base(instr); @@ -257,15 +291,15 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr) idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false); if (smooth) { - bi_ld_var_to(b, bi_dest_index(&instr->dest), - src0, idx, regfmt, sample, update, - vecsize); + bi_ld_var_to(b, dest, src0, idx, regfmt, sample, + update, vecsize); } else { - bi_ld_var_flat_to(b, bi_dest_index(&instr->dest), - idx, BI_FUNCTION_NONE, regfmt, - vecsize); + bi_ld_var_flat_to(b, dest, idx, BI_FUNCTION_NONE, + regfmt, vecsize); } } + + bi_copy_component(b, instr, dest); } static void