From e5ec0dc1832061a4ae11ba04b4b90685daac89a2 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 16 Dec 2020 14:24:18 -0500 Subject: [PATCH] pan/bi: Remove NIR->old IR Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bi_special.c | 278 ---- src/panfrost/bifrost/bifrost_compile.c | 2027 ------------------------ src/panfrost/bifrost/compiler.h | 6 - src/panfrost/bifrost/meson.build | 1 - 4 files changed, 2312 deletions(-) delete mode 100644 src/panfrost/bifrost/bi_special.c diff --git a/src/panfrost/bifrost/bi_special.c b/src/panfrost/bifrost/bi_special.c deleted file mode 100644 index 96938360908..00000000000 --- a/src/panfrost/bifrost/bi_special.c +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Copyright (C) 2020 Collabora Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors (Collabora): - * Alyssa Rosenzweig - */ - -#include "compiler.h" - -/* Bifrost requires special functions to be lowered in various machine specific - * ways. The routines in this file are used in codegen for this. */ - -/* New Bifrost has a FEXP2_FAST instruction but requires an auxiliary - * parameter. */ - -static void -bi_emit_fexp2_new(bi_context *ctx, nir_alu_instr *instr) -{ - /* FMA_MSCALE T, X, 1.0, 0, 0x18 */ - - bi_instruction mscale = { - .type = BI_FMA, - .op = { .mscale = true }, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_float32, - .src = { - pan_src_index(&instr->src[0].src), - BIR_INDEX_CONSTANT | 0, - BIR_INDEX_ZERO, - BIR_INDEX_CONSTANT | 32, - }, - .src_types = { - nir_type_float32, - nir_type_float32, - nir_type_float32, - nir_type_int32, - }, - .constant = { - /* 0x3f80000000 = 1.0f as fp32 - * 24 = shift to multiply by 2^24 */ - .u64 = (0x3f800000) | (24ull << 32) - }, - .swizzle = { { instr->src[0].swizzle[0] } } - }; - - /* F2I_RTE T, T */ - - bi_instruction f2i = { - .type = BI_CONVERT, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_int32, - .src = { mscale.dest }, - .src_types = { nir_type_float32 }, - .round = BI_ROUND_NONE - }; - - /* FEXP2_FAST T, T, X */ - - bi_instruction fexp = { - .type = BI_SPECIAL_ADD, - .op = { .special = BI_SPECIAL_EXP2_LOW }, - .dest = pan_dest_index(&instr->dest.dest), - .dest_type = nir_type_float32, - .src = { f2i.dest, mscale.src[0] }, - .src_types = { nir_type_int32, nir_type_float32 }, - .swizzle = { {}, { instr->src[0].swizzle[0] } } - }; - - bi_emit(ctx, mscale); - bi_emit(ctx, f2i); - bi_emit(ctx, fexp); -} - -/* Even on new Bifrost, there are a bunch of reductions to do */ - -static void -bi_emit_flog2_new(bi_context *ctx, nir_alu_instr *instr) -{ - /* LOG_FREXPE X */ - bi_instruction frexpe = { - .type = BI_FREXP, - .op = { .frexp = BI_FREXPE_LOG }, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_int32, - .src = { pan_src_index(&instr->src[0].src) }, - .src_types = { nir_type_float32 }, - .swizzle = { { instr->src[0].swizzle[0] } } - }; - - /* I32_TO_F32 m */ - bi_instruction i2f = { - .type = BI_CONVERT, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_float32, - .src = { frexpe.dest }, - .src_types = { nir_type_int32 }, - .round = BI_ROUND_RTZ - }; - - /* ADD_FREXPM (x-1), -1.0, X */ - bi_instruction x_minus_1 = { - .type = BI_REDUCE_FMA, - .op = { .reduce = BI_REDUCE_ADD_FREXPM }, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_float32, - .src = { - BIR_INDEX_CONSTANT, - pan_src_index(&instr->src[0].src), - }, - .src_types = { nir_type_float32, nir_type_float32 }, - .constant = { - .u64 = 0xBF800000 /* -1.0 */ - }, - .swizzle = { {}, { instr->src[0].swizzle[0] } } - }; - - /* FLOG2_HELP log2(x)/(x-1), x */ - bi_instruction help = { - .type = BI_TABLE, - .op = { .table = BI_TABLE_LOG2_U_OVER_U_1_LOW }, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_float32, - .src = { pan_src_index(&instr->src[0].src) }, - .src_types = { nir_type_float32 }, - .swizzle = { { instr->src[0].swizzle[0] } } - }; - - /* FMA log2(x)/(x - 1), (x - 1), M */ - bi_instruction fma = { - .type = BI_FMA, - .dest = pan_dest_index(&instr->dest.dest), - .dest_type = nir_type_float32, - .src = { - help.dest, - x_minus_1.dest, - i2f.dest - }, - .src_types = { - nir_type_float32, - nir_type_float32, - nir_type_float32 - } - }; - - bi_emit(ctx, frexpe); - bi_emit(ctx, i2f); - bi_emit(ctx, x_minus_1); - bi_emit(ctx, help); - bi_emit(ctx, fma); -} - -void -bi_emit_fexp2(bi_context *ctx, nir_alu_instr *instr) -{ - /* TODO: G71 */ - bi_emit_fexp2_new(ctx, instr); -} - -void -bi_emit_flog2(bi_context *ctx, nir_alu_instr *instr) -{ - /* TODO: G71 */ - bi_emit_flog2_new(ctx, instr); -} - -void -bi_emit_deriv(bi_context *ctx, nir_alu_instr *instr) -{ - bi_instruction cur_lane = { - .type = BI_MOV, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_uint32, - .src = { BIR_INDEX_FAU | BIR_FAU_LANE_ID }, - .src_types = { nir_type_uint32 } - }; - - bi_instruction lane1 = { - .type = BI_BITWISE, - .op.bitwise = BI_BITWISE_AND, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_uint32, - .src = { - cur_lane.dest, - BIR_INDEX_CONSTANT | 0, - BIR_INDEX_ZERO, - }, - .src_types = { - nir_type_uint32, - nir_type_uint32, - nir_type_uint8, - }, - .constant.u64 = instr->op == nir_op_fddx ? 2 : 1, - }; - - bi_instruction lane2 = { - .type = BI_IMATH, - .op.imath = BI_IMATH_ADD, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_uint32, - .src = { - lane1.dest, - BIR_INDEX_CONSTANT | 0, - BIR_INDEX_ZERO, - }, - .src_types = { - nir_type_uint32, - nir_type_uint32, - nir_type_uint32, - }, - .constant.u64 = instr->op == nir_op_fddx ? 1 : 2, - }; - - unsigned src = pan_src_index(&instr->src[0].src); - - bi_instruction clper1 = { - .type = BI_SPECIAL_ADD, - .op.special = ctx->arch == 6 ? BI_SPECIAL_CLPER_V6 : BI_SPECIAL_CLPER_V7, - .special.subgroup_sz = BI_SUBGROUP_SUBGROUP4, - .special.clper.lane_op_mod = BI_LANE_OP_NONE, - .special.clper.inactive_res = BI_INACTIVE_RESULT_ZERO, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_uint32, - .src = { src, lane1.dest }, - .src_types = { nir_type_uint32, nir_type_uint32 }, - .swizzle[0][0] = instr->src[0].swizzle[0], - }; - - bi_instruction clper2 = { - .type = BI_SPECIAL_ADD, - .op.special = ctx->arch == 6 ? BI_SPECIAL_CLPER_V6 : BI_SPECIAL_CLPER_V7, - .special.subgroup_sz = BI_SUBGROUP_SUBGROUP4, - .special.clper.lane_op_mod = BI_LANE_OP_NONE, - .special.clper.inactive_res = BI_INACTIVE_RESULT_ZERO, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_uint32, - .src = { src, lane2.dest }, - .src_types = { nir_type_uint32, nir_type_uint32 }, - .swizzle[0][0] = instr->src[0].swizzle[0], - }; - - nir_alu_type type = nir_type_float | - nir_dest_bit_size(instr->dest.dest); - - bi_instruction sub = { - .type = BI_ADD, - .src = { clper2.dest, clper1.dest }, - .src_types = { type, type }, - .src_neg[1] = true, - .dest = pan_dest_index(&instr->dest.dest), - .dest_type = type, - }; - - bi_emit(ctx, cur_lane); - bi_emit(ctx, lane1); - bi_emit(ctx, lane2); - bi_emit(ctx, clper1); - bi_emit(ctx, clper2); - bi_emit(ctx, sub); -} diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 2cd3983bbab..110734db01c 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -67,7 +67,6 @@ bi_init_builder(bi_context *ctx) } static bi_block *emit_cf_list(bi_context *ctx, struct exec_list *list); -static bi_instruction *bi_emit_branch(bi_context *ctx); static void bi_emit_jump(bi_builder *b, nir_jump_instr *instr) @@ -89,56 +88,6 @@ bi_emit_jump(bi_builder *b, nir_jump_instr *instr) b->shader->current_block->base.unconditional_jumps = true; } -static void -emit_jump(bi_context *ctx, nir_jump_instr *instr) -{ - bi_instruction *branch = bi_emit_branch(ctx); - - switch (instr->type) { - case nir_jump_break: - branch->branch_target = ctx->break_block; - break; - case nir_jump_continue: - branch->branch_target = ctx->continue_block; - break; - default: - unreachable("Unhandled jump type"); - } - - pan_block_add_successor(&ctx->current_block->base, &branch->branch_target->base); - ctx->current_block->base.unconditional_jumps = true; -} - -static bi_instruction -bi_load_old(enum bi_class T, nir_intrinsic_instr *instr, unsigned offset_idx) -{ - bi_instruction load = { - .type = T, - .vector_channels = instr->num_components, - }; - - const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; - - if (info->has_dest) - load.dest = pan_dest_index(&instr->dest); - - if (info->has_dest && nir_intrinsic_has_dest_type(instr)) - load.dest_type = nir_intrinsic_dest_type(instr); - - nir_src *offset = nir_get_io_offset_src(instr); - - load.src_types[offset_idx] = nir_type_uint32; - if (nir_src_is_const(*offset)) { - load.src[offset_idx] = BIR_INDEX_CONSTANT | 0; - load.constant.u64 = nir_src_as_uint(*offset) + - nir_intrinsic_base(instr); - } else { - load.src[offset_idx] = pan_src_index(offset); - } - - return load; -} - static void bi_emit_ld_tile(bi_builder *b, nir_intrinsic_instr *instr) { @@ -163,46 +112,6 @@ bi_emit_ld_tile(bi_builder *b, nir_intrinsic_instr *instr) } -static void -bi_emit_ld_output(bi_context *ctx, nir_intrinsic_instr *instr) -{ - assert(ctx->is_blend); - - bi_instruction ins = { - .type = BI_LOAD_TILE, - .vector_channels = instr->num_components, - .dest = pan_dest_index(&instr->dest), - .dest_type = nir_type_float16, - .src = { - /* PixelIndices */ - BIR_INDEX_CONSTANT, - /* PixelCoverage: we simply pass r60 which contains the cumulative - * coverage bitmap - */ - BIR_INDEX_REGISTER | 60, - /* InternalConversionDescriptor (see src/panfrost/lib/midgard.xml for more - * details) - */ - BIR_INDEX_CONSTANT | 32 - }, - .src_types = { nir_type_uint32, nir_type_uint32, nir_type_uint32 }, - }; - - /* We want to load the current pixel. - * FIXME: The sample to load is currently hardcoded to 0. This should - * be addressed for multi-sample FBs. - */ - struct bifrost_pixel_indices pix = { - .y = BIFROST_CURRENT_PIXEL, - }; - memcpy(&ins.constant.u64, &pix, sizeof(pix)); - - /* Only keep the conversion part of the blend descriptor. */ - ins.constant.u64 |= ctx->blend_desc & 0xffffffff00000000ULL; - - bi_emit(ctx, ins); -} - static enum bi_sample bi_interp_for_intrinsic(nir_intrinsic_op op) { @@ -332,58 +241,6 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr) } } -static void -bi_emit_ld_vary(bi_context *ctx, nir_intrinsic_instr *instr) -{ - bi_instruction ins = { - .type = BI_LOAD_VAR, - .load_vary = { - .interp_mode = BI_SAMPLE_CENTER, - .update_mode = BI_UPDATE_STORE, - .reuse = false, - .flat = instr->intrinsic != nir_intrinsic_load_interpolated_input, - }, - .dest = pan_dest_index(&instr->dest), - .dest_type = nir_dest_bit_size(instr->dest), - .src_types = { nir_type_uint32, nir_type_uint32, nir_type_uint32 }, - .vector_channels = instr->num_components, - }; - - if (instr->intrinsic == nir_intrinsic_load_interpolated_input) { - nir_intrinsic_instr *parent = nir_src_as_intrinsic(instr->src[0]); - if (parent) { - ins.load_vary.interp_mode = - bi_interp_for_intrinsic(parent->intrinsic); - } - } - - if (ins.load_vary.interp_mode == BI_SAMPLE_CENTER) { - /* Zero it out for center interpolation */ - ins.src[0] = BIR_INDEX_ZERO; - } else { - /* R61 contains sample mask stuff, TODO RA XXX */ - ins.src[0] = BIR_INDEX_REGISTER | 61; - } - - nir_src *offset = nir_get_io_offset_src(instr); - if (nir_src_is_const(*offset)) { - unsigned offset_val = nir_intrinsic_base(instr) + - nir_src_as_uint(*offset); - - if (offset_val < 20) { - ins.load_vary.immediate = true; - ins.load_vary.index = offset_val; - } else { - ins.src[1] = BIR_INDEX_CONSTANT | 0; - ins.constant.u64 = offset_val; - } - } else { - ins.src[1] = pan_src_index(offset); - } - - bi_emit(ctx, ins); -} - static void bi_make_vec_to(bi_builder *b, bi_index final_dst, bi_index *src, @@ -456,59 +313,6 @@ bi_emit_load_blend_input(bi_builder *b, nir_intrinsic_instr *instr) bi_make_vec_to(b, bi_dest_index(&instr->dest), srcs, NULL, 4, 32); } -static void -bi_emit_ld_blend_input(bi_context *ctx, nir_intrinsic_instr *instr) -{ - ASSERTED nir_io_semantics sem = nir_intrinsic_io_semantics(instr); - - /* We don't support dual-source blending yet. */ - assert(sem.location == VARYING_SLOT_COL0); - - bi_instruction ins = { - .type = BI_COMBINE, - .dest_type = nir_type_uint32, - .dest = pan_dest_index(&instr->dest), - .src_types = { - nir_type_uint32, nir_type_uint32, - nir_type_uint32, nir_type_uint32, - }, - - /* Source color is passed through r0-r3. - * TODO: We should probably find a way to avoid this - * combine/mov and use r0-r3 directly. - */ - .src = { - BIR_INDEX_REGISTER | 0, - BIR_INDEX_REGISTER | 1, - BIR_INDEX_REGISTER | 2, - BIR_INDEX_REGISTER | 3, - }, - }; - - bi_emit(ctx, ins); -} - -static void -bi_emit_atest(bi_context *ctx, unsigned rgba, nir_alu_type T) -{ - bi_instruction ins = { - .type = BI_ATEST, - .src = { - BIR_INDEX_REGISTER | 60 /* TODO: RA */, - rgba, - }, - .src_types = { nir_type_uint32, T }, - .swizzle = { - { 0 }, - { 3, 0 } /* swizzle out the alpha */ - }, - .dest = BIR_INDEX_REGISTER | 60 /* TODO: RA */, - .dest_type = nir_type_uint32, - }; - - bi_emit(ctx, ins); -} - static void bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt) { @@ -534,81 +338,6 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt) b->shader->blend_types[rt] = T; } -static void -bi_emit_blend(bi_context *ctx, unsigned rgba, nir_alu_type T, unsigned rt) -{ - bi_instruction blend = { - .type = BI_BLEND, - .blend_location = rt, - .src = { - rgba, - BIR_INDEX_REGISTER | 60 /* TODO: RA */ - }, - .src_types = { - T, - nir_type_uint32, - nir_type_uint32, - nir_type_uint32, - }, - .swizzle = { - { 0, 1, 2, 3 }, - { 0 } - }, - .dest_type = nir_type_uint32, - .vector_channels = 4 - }; - - if (ctx->is_blend) { - /* Blend descriptor comes from the compile inputs */ - blend.src[2] = BIR_INDEX_CONSTANT | 0; - blend.src[3] = BIR_INDEX_CONSTANT | 32; - blend.constant.u64 = ctx->blend_desc; - - /* Put the result in r0 */ - blend.dest = BIR_INDEX_REGISTER | 0; - } else { - /* Blend descriptor comes from the FAU RAM */ - blend.src[2] = BIR_INDEX_FAU | (BIR_FAU_BLEND_0 + rt); - blend.src[3] = blend.src[2] | BIR_FAU_HI; - - /* By convention, the return address is stored in r48 and will - * be used by the blend shader to jump back to the fragment - * shader when it's done. - */ - blend.dest = BIR_INDEX_REGISTER | 48; - } - - assert(blend.blend_location < 8); - assert(ctx->blend_types); - assert(blend.src_types[0]); - ctx->blend_types[blend.blend_location] = blend.src_types[0]; - - bi_emit(ctx, blend); -} - -static void -bi_emit_zs_emit(bi_context *ctx, unsigned z, unsigned stencil) -{ - bi_instruction ins = { - .type = BI_ZS_EMIT, - .src = { - z, - stencil, - BIR_INDEX_REGISTER | 60 /* TODO: RA */, - }, - .src_types = { - nir_type_float32, - nir_type_uint8, - nir_type_uint32, - }, - .swizzle = { { 0 }, { 0 }, { 0 } }, - .dest = BIR_INDEX_REGISTER | 60 /* TODO: RA */, - .dest_type = nir_type_uint32, - }; - - bi_emit(ctx, ins); -} - static void bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr) { @@ -672,83 +401,6 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr) } } -static void -bi_emit_frag_out(bi_context *ctx, nir_intrinsic_instr *instr) -{ - bool combined = instr->intrinsic == - nir_intrinsic_store_combined_output_pan; - - unsigned writeout = combined ? nir_intrinsic_component(instr) : - PAN_WRITEOUT_C; - - bool emit_blend = writeout & (PAN_WRITEOUT_C); - bool emit_zs = writeout & (PAN_WRITEOUT_Z | PAN_WRITEOUT_S); - - const nir_variable *var = - nir_find_variable_with_driver_location(ctx->nir, nir_var_shader_out, - nir_intrinsic_base(instr)); - assert(var); - - if (!ctx->emitted_atest && !ctx->is_blend) { - bi_emit_atest(ctx, - pan_src_index(&instr->src[0]), - nir_intrinsic_src_type(instr)); - - ctx->emitted_atest = true; - } - - if (emit_zs) { - unsigned z = writeout & PAN_WRITEOUT_Z ? - pan_src_index(&instr->src[2]) : 0; - unsigned s = writeout & PAN_WRITEOUT_S ? - pan_src_index(&instr->src[3]) : 0; - - bi_emit_zs_emit(ctx, z, s); - } - - if (emit_blend) { - unsigned loc = var->data.location; - assert(loc == FRAG_RESULT_COLOR || loc >= FRAG_RESULT_DATA0); - - unsigned rt = loc == FRAG_RESULT_COLOR ? 0 : - (loc - FRAG_RESULT_DATA0); - - bi_emit_blend(ctx, - pan_src_index(&instr->src[0]), - nir_intrinsic_src_type(instr), - rt); - } - - if (ctx->is_blend) { - /* Jump back to the fragment shader, return address is stored - * in r48 (see above). - */ - bi_instruction *ret = bi_emit_branch(ctx); - ret->src[2] = BIR_INDEX_REGISTER | 48; - } -} - -static bi_instruction -bi_load_with_r61(enum bi_class T, nir_intrinsic_instr *instr) -{ - bi_instruction ld = bi_load_old(T, instr, 2); - ld.src[0] = BIR_INDEX_REGISTER | 61; /* TODO: RA */ - ld.src[1] = BIR_INDEX_REGISTER | 62; - ld.src_types[0] = nir_type_uint32; - ld.src_types[1] = nir_type_uint32; - ld.format = instr->intrinsic == nir_intrinsic_store_output ? - nir_intrinsic_src_type(instr) : - nir_intrinsic_dest_type(instr); - - /* Promote to immediate instruction if we can */ - if (ld.src[0] & BIR_INDEX_CONSTANT && ld.constant.u64 < 16) { - ld.attribute.immediate = true; - ld.attribute.index = ld.constant.u64; - } - - return ld; -} - static void bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr) { @@ -783,47 +435,6 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr) regfmt, nr - 1); } -static void -bi_emit_st_vary(bi_context *ctx, nir_intrinsic_instr *instr) -{ - bi_instruction address = bi_load_with_r61(BI_LOAD_VAR_ADDRESS, instr); - address.dest = bi_make_temp(ctx); - address.dest_type = nir_type_uint32; - address.vector_channels = 3; - - /* Only look at the total components needed. In effect, we fill in all - * the intermediate "holes" in the write mask, since we can't mask off - * stores. Since nir_lower_io_to_temporaries ensures each varying is - * written at most once, anything that's masked out is undefined, so it - * doesn't matter what we write there. So we may as well do the - * simplest thing possible. */ - unsigned nr = util_last_bit(nir_intrinsic_write_mask(instr)); - assert(nr > 0 && nr <= nir_intrinsic_src_components(instr, 0)); - - bi_instruction st = { - .type = BI_STORE_VAR, - .src = { - pan_src_index(&instr->src[0]), - address.dest, address.dest, address.dest, - }, - .src_types = { - nir_type_uint32, - nir_type_uint32, nir_type_uint32, nir_type_uint32, - }, - .swizzle = { - { 0 }, - { 0 }, { 1 }, { 2} - }, - .vector_channels = nr, - }; - - for (unsigned i = 0; i < nr; ++i) - st.swizzle[0][i] = i; - - bi_emit(ctx, address); - bi_emit(ctx, st); -} - static void bi_emit_load_ubo(bi_builder *b, nir_intrinsic_instr *instr) { @@ -860,68 +471,6 @@ bi_emit_load_ubo(bi_builder *b, nir_intrinsic_instr *instr) BI_SEG_UBO); } -static void -bi_emit_ld_ubo(bi_context *ctx, nir_intrinsic_instr *instr) -{ - /* nir_lower_uniforms_to_ubo() should have been called, reserving - * UBO #0 for uniforms even if the shaders doesn't have uniforms. - */ - assert(ctx->nir->info.first_ubo_is_default_ubo); - - bool offset_is_const = nir_src_is_const(instr->src[1]); - unsigned dyn_offset = pan_src_index(&instr->src[1]); - uint32_t const_offset = 0; - - if (nir_src_is_const(instr->src[1])) - const_offset = nir_src_as_uint(instr->src[1]); - - if (nir_src_is_const(instr->src[0]) && - nir_src_as_uint(instr->src[0]) == 0 && - ctx->sysvals.sysval_count) { - if (offset_is_const) { - const_offset += 16 * ctx->sysvals.sysval_count; - } else { - bi_instruction add = { - .type = BI_IMATH, - .op.imath = BI_IMATH_ADD, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_uint32, - .src = { dyn_offset, BIR_INDEX_CONSTANT | 0, BIR_INDEX_ZERO }, - .src_types = { nir_type_uint32, nir_type_uint32, nir_type_uint32 }, - .constant.u64 = 16 * ctx->sysvals.sysval_count, - }; - - bi_emit(ctx, add); - dyn_offset = add.dest; - } - } - - bi_instruction ld = { - .type = BI_LOAD_UNIFORM, - .segment = BI_SEG_UBO, - .vector_channels = instr->num_components, - .src_types = { nir_type_uint32, nir_type_uint32 }, - .dest = pan_dest_index(&instr->dest), - .dest_type = nir_type_uint | nir_dest_bit_size(instr->dest), - }; - - if (offset_is_const) { - ld.src[0] = BIR_INDEX_CONSTANT | 0; - ld.constant.u64 |= const_offset; - } else { - ld.src[0] = dyn_offset; - } - - if (nir_src_is_const(instr->src[0])) { - ld.src[1] = BIR_INDEX_CONSTANT | 32; - ld.constant.u64 |= nir_src_as_uint(instr->src[0]) << 32; - } else { - ld.src[1] = pan_src_index(&instr->src[0]); - } - - bi_emit(ctx, ld); -} - static void bi_load_sysval(bi_builder *b, nir_instr *instr, unsigned nr_components, unsigned offset) @@ -940,35 +489,6 @@ bi_load_sysval(bi_builder *b, nir_instr *instr, bi_imm_u32(idx), bi_zero(), BI_SEG_UBO); } -static void -bi_emit_sysval(bi_context *ctx, nir_instr *instr, - unsigned nr_components, unsigned offset) -{ - nir_dest nir_dest; - - /* Figure out which uniform this is */ - int sysval = panfrost_sysval_for_instr(instr, &nir_dest); - void *val = _mesa_hash_table_u64_search(ctx->sysvals.sysval_to_id, sysval); - - /* Sysvals are prefix uniforms */ - unsigned uniform = ((uintptr_t) val) - 1; - - /* Emit the read itself -- this is never indirect */ - - bi_instruction load = { - .type = BI_LOAD_UNIFORM, - .segment = BI_SEG_UBO, - .vector_channels = nr_components, - .src = { BIR_INDEX_CONSTANT, BIR_INDEX_ZERO }, - .src_types = { nir_type_uint32, nir_type_uint32 }, - .constant = { (uniform * 16) + offset }, - .dest = pan_dest_index(&nir_dest), - .dest_type = nir_type_uint32, /* TODO */ - }; - - bi_emit(ctx, load); -} - /* gl_FragCoord.xy = u16_to_f32(R59.xy) + 0.5 * gl_FragCoord.z = ld_vary(fragz) * gl_FragCoord.w = ld_vary(fragw) @@ -997,262 +517,6 @@ bi_emit_load_frag_coord(bi_builder *b, nir_intrinsic_instr *instr) bi_make_vec_to(b, bi_dest_index(&instr->dest), src, NULL, 4, 32); } -/* gl_FragCoord.xy = u16_to_f32(R59.xy) + 0.5 - * gl_FragCoord.z = ld_vary(fragz) - * gl_FragCoord.w = ld_vary(fragw) - */ - -static void -bi_emit_ld_frag_coord(bi_context *ctx, nir_intrinsic_instr *instr) -{ - /* Future proofing for mediump fragcoord at some point.. */ - nir_alu_type T = nir_type_float32; - - /* First, sketch a combine */ - bi_instruction combine = { - .type = BI_COMBINE, - .dest_type = nir_type_uint32, - .dest = pan_dest_index(&instr->dest), - .src_types = { T, T, T, T }, - }; - - /* Second, handle xy */ - for (unsigned i = 0; i < 2; ++i) { - bi_instruction conv = { - .type = BI_CONVERT, - .dest_type = T, - .dest = bi_make_temp(ctx), - .src = { - /* TODO: RA XXX */ - BIR_INDEX_REGISTER | 59 - }, - .src_types = { nir_type_uint16 }, - .swizzle = { { i } } - }; - - bi_instruction add = { - .type = BI_ADD, - .dest_type = T, - .dest = bi_make_temp(ctx), - .src = { conv.dest, BIR_INDEX_CONSTANT }, - .src_types = { T, T }, - }; - - float half = 0.5; - memcpy(&add.constant.u32, &half, sizeof(float)); - - bi_emit(ctx, conv); - bi_emit(ctx, add); - - combine.src[i] = add.dest; - } - - /* Third, zw */ - for (unsigned i = 0; i < 2; ++i) { - bi_instruction load = { - .type = BI_LOAD_VAR, - .load_vary = { - .interp_mode = BI_SAMPLE_CENTER, - .update_mode = BI_UPDATE_CLOBBER, - .var_id = (i == 0) ? - BI_VARYING_NAME_FRAG_Z : - BI_VARYING_NAME_FRAG_W, - .special = true, - .reuse = false, - .flat = true - }, - .vector_channels = 1, - .dest_type = nir_type_float32, - .format = nir_type_float32, - .dest = bi_make_temp(ctx), - .src[0] = BIR_INDEX_PASS | BIFROST_SRC_FAU_LO, - .src_types[0] = nir_type_uint32, - }; - - bi_emit(ctx, load); - - combine.src[i + 2] = load.dest; - } - - /* Finally, emit the combine */ - bi_emit(ctx, combine); -} - -static void -bi_emit_discard(bi_context *ctx, nir_intrinsic_instr *instr) -{ - /* Goofy lowering */ - bi_instruction discard = { - .type = BI_DISCARD, - .cond = BI_COND_EQ, - .src_types = { nir_type_uint32, nir_type_uint32 }, - .src = { BIR_INDEX_ZERO, BIR_INDEX_ZERO }, - }; - - bi_emit(ctx, discard); -} - -static void -bi_fuse_cond(bi_instruction *csel, nir_alu_src cond, - unsigned *constants_left, unsigned *constant_shift, - unsigned comps, bool float_only); - -static void -bi_emit_discard_if(bi_context *ctx, nir_intrinsic_instr *instr) -{ - nir_src cond = instr->src[0]; - nir_alu_type T = nir_type_uint | nir_src_bit_size(cond); - - bi_instruction discard = { - .type = BI_DISCARD, - .cond = BI_COND_NE, - .src_types = { T, T }, - .src = { - pan_src_index(&cond), - BIR_INDEX_ZERO - }, - }; - - /* Try to fuse in the condition */ - unsigned constants_left = 1, constant_shift = 0; - - /* Scalar so no swizzle */ - nir_alu_src wrap = { - .src = instr->src[0] - }; - - /* May or may not succeed but we're optimistic */ - bi_fuse_cond(&discard, wrap, &constants_left, &constant_shift, 1, true); - - bi_emit(ctx, discard); -} - -static void -bi_emit_blend_const(bi_context *ctx, nir_intrinsic_instr *instr) -{ - assert(ctx->is_blend); - - unsigned comp; - switch (instr->intrinsic) { - case nir_intrinsic_load_blend_const_color_r_float: comp = 0; break; - case nir_intrinsic_load_blend_const_color_g_float: comp = 1; break; - case nir_intrinsic_load_blend_const_color_b_float: comp = 2; break; - case nir_intrinsic_load_blend_const_color_a_float: comp = 3; break; - default: unreachable("Invalid load blend constant intrinsic"); - } - - bi_instruction move = { - .type = BI_MOV, - .dest = pan_dest_index(&instr->dest), - .dest_type = nir_type_uint32, - .src = { BIR_INDEX_CONSTANT }, - .src_types = { nir_type_uint32 }, - }; - - memcpy(&move.constant.u32, &ctx->blend_constants[comp], sizeof(float)); - - bi_emit(ctx, move); -} - -static void -bi_emit_sample_id(bi_context *ctx, nir_intrinsic_instr *instr) -{ - bi_instruction ins = { - .type = BI_BITWISE, - .op.bitwise = BI_BITWISE_AND, - .bitwise.rshift = true, - .dest = pan_dest_index(&instr->dest), - .dest_type = nir_type_uint32, - .src = { - /* r61[16:23] contains the sampleID */ - BIR_INDEX_REGISTER | 61, - /* mask */ - BIR_INDEX_CONSTANT | 0, - /* shift */ - BIR_INDEX_CONSTANT | 32, - }, - .src_types = { - nir_type_uint32, - nir_type_uint32, - nir_type_uint8, - }, - .constant.u64 = 0xffull | (0x10ull << 32ull) - }; - - bi_emit(ctx, ins); -} - -static void -bi_emit_front_face(bi_context *ctx, nir_intrinsic_instr *instr) -{ - bi_instruction ins = { - .type = BI_CMP, - .cond = BI_COND_EQ, - .dest = pan_dest_index(&instr->dest), - .dest_type = nir_type_uint32, - .src = { - /* r58 == 0 means primitive is front facing */ - BIR_INDEX_REGISTER | 58, - BIR_INDEX_ZERO, - }, - .src_types = { - nir_type_uint32, - nir_type_uint32, - }, - }; - - bi_emit(ctx, ins); -} - -static void -bi_emit_point_coord(bi_context *ctx, nir_intrinsic_instr *instr) -{ - bi_instruction ins = { - .type = BI_LOAD_VAR, - .load_vary = { - .update_mode = BI_UPDATE_CLOBBER, - .var_id = BI_VARYING_NAME_POINT, - .special = true, - }, - .vector_channels = 2, - .dest = pan_dest_index(&instr->dest), - .dest_type = nir_type_float32, - .format = nir_type_float32, - .src[0] = BIR_INDEX_ZERO, - .src_types[0] = nir_type_uint32, - }; - - bi_emit(ctx, ins); -} - -static void -bi_emit_vertex_id(bi_context *ctx, nir_intrinsic_instr *instr) -{ - bi_instruction mov = { - .type = BI_MOV, - .dest = pan_dest_index(&instr->dest), - .dest_type = nir_type_int32, - .src = { BIR_INDEX_REGISTER | 61 }, - .src_types = { nir_type_int32 }, - }; - - bi_emit(ctx, mov); -} - -static void -bi_emit_instance_id(bi_context *ctx, nir_intrinsic_instr *instr) -{ - bi_instruction mov = { - .type = BI_MOV, - .dest = pan_dest_index(&instr->dest), - .dest_type = nir_type_int32, - .src = { BIR_INDEX_REGISTER | 62 }, - .src_types = { nir_type_int32 }, - }; - - bi_emit(ctx, mov); -} - static void bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) { @@ -1389,111 +653,6 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) } } -static void -emit_intrinsic(bi_context *ctx, nir_intrinsic_instr *instr) -{ - - switch (instr->intrinsic) { - case nir_intrinsic_load_barycentric_pixel: - case nir_intrinsic_load_barycentric_centroid: - case nir_intrinsic_load_barycentric_sample: - /* stub */ - break; - case nir_intrinsic_load_interpolated_input: - case nir_intrinsic_load_input: - if (ctx->is_blend) - bi_emit_ld_blend_input(ctx, instr); - else if (ctx->stage == MESA_SHADER_FRAGMENT) - bi_emit_ld_vary(ctx, instr); - else if (ctx->stage == MESA_SHADER_VERTEX) - bi_emit(ctx, bi_load_with_r61(BI_LOAD_ATTR, instr)); - else { - unreachable("Unsupported shader stage"); - } - break; - - case nir_intrinsic_store_output: - if (ctx->stage == MESA_SHADER_FRAGMENT) - bi_emit_frag_out(ctx, instr); - else if (ctx->stage == MESA_SHADER_VERTEX) - bi_emit_st_vary(ctx, instr); - else - unreachable("Unsupported shader stage"); - break; - - case nir_intrinsic_store_combined_output_pan: - assert(ctx->stage == MESA_SHADER_FRAGMENT); - bi_emit_frag_out(ctx, instr); - break; - - case nir_intrinsic_load_ubo: - bi_emit_ld_ubo(ctx, instr); - break; - - case nir_intrinsic_load_frag_coord: - bi_emit_ld_frag_coord(ctx, instr); - break; - - case nir_intrinsic_discard: - bi_emit_discard(ctx, instr); - break; - - case nir_intrinsic_discard_if: - bi_emit_discard_if(ctx, instr); - break; - - case nir_intrinsic_load_ssbo_address: - bi_emit_sysval(ctx, &instr->instr, 1, 0); - break; - - case nir_intrinsic_get_ssbo_size: - bi_emit_sysval(ctx, &instr->instr, 1, 8); - break; - - case nir_intrinsic_load_output: - bi_emit_ld_output(ctx, instr); - break; - - case nir_intrinsic_load_viewport_scale: - case nir_intrinsic_load_viewport_offset: - case nir_intrinsic_load_num_work_groups: - case nir_intrinsic_load_sampler_lod_parameters_pan: - bi_emit_sysval(ctx, &instr->instr, 3, 0); - break; - - case nir_intrinsic_load_blend_const_color_r_float: - case nir_intrinsic_load_blend_const_color_g_float: - case nir_intrinsic_load_blend_const_color_b_float: - case nir_intrinsic_load_blend_const_color_a_float: - bi_emit_blend_const(ctx, instr); - break; - - case nir_intrinsic_load_sample_id: - bi_emit_sample_id(ctx, instr); - break; - - case nir_intrinsic_load_front_face: - bi_emit_front_face(ctx, instr); - break; - - case nir_intrinsic_load_point_coord: - bi_emit_point_coord(ctx, instr); - break; - - case nir_intrinsic_load_vertex_id: - bi_emit_vertex_id(ctx, instr); - break; - - case nir_intrinsic_load_instance_id: - bi_emit_instance_id(ctx, instr); - break; - - default: - unreachable("Unknown intrinsic"); - break; - } -} - static void bi_emit_load_const(bi_builder *b, nir_load_const_instr *instr) { @@ -1512,505 +671,6 @@ bi_emit_load_const(bi_builder *b, nir_load_const_instr *instr) bi_mov_i32_to(b, bi_get_index(instr->def.index, false, 0), bi_imm_u32(acc)); } -static void -emit_load_const(bi_context *ctx, nir_load_const_instr *instr) -{ - /* Make sure we've been lowered */ - assert(instr->def.num_components <= (32 / instr->def.bit_size)); - - /* Accumulate all the channels of the constant, as if we did an - * implicit SEL over them */ - uint32_t acc = 0; - - for (unsigned i = 0; i < instr->def.num_components; ++i) { - unsigned v = nir_const_value_as_uint(instr->value[i], instr->def.bit_size); - acc |= (v << (i * instr->def.bit_size)); - } - - bi_instruction move = { - .type = BI_MOV, - .dest = pan_ssa_index(&instr->def), - .dest_type = nir_type_uint32, - .src = { - BIR_INDEX_CONSTANT - }, - .src_types = { - nir_type_uint32, - }, - .constant = { - .u32 = acc - } - }; - - bi_emit(ctx, move); -} - -#define BI_CASE_CMP(op) \ - case op##8: \ - case op##16: \ - case op##32: \ - -static enum bi_class -bi_class_for_nir_alu(nir_op op) -{ - switch (op) { - case nir_op_fadd: - case nir_op_fsub: - return BI_ADD; - - case nir_op_iadd: - case nir_op_isub: - return BI_IMATH; - - case nir_op_imul: - return BI_IMUL; - - case nir_op_iand: - case nir_op_ior: - case nir_op_ixor: - case nir_op_inot: - case nir_op_ishl: - case nir_op_ishr: - case nir_op_ushr: - return BI_BITWISE; - - BI_CASE_CMP(nir_op_flt) - BI_CASE_CMP(nir_op_fge) - BI_CASE_CMP(nir_op_feq) - BI_CASE_CMP(nir_op_fneu) - BI_CASE_CMP(nir_op_ilt) - BI_CASE_CMP(nir_op_ige) - BI_CASE_CMP(nir_op_ieq) - BI_CASE_CMP(nir_op_ine) - BI_CASE_CMP(nir_op_uge) - BI_CASE_CMP(nir_op_ult) - return BI_CMP; - - case nir_op_b8csel: - case nir_op_b16csel: - case nir_op_b32csel: - return BI_CSEL; - - case nir_op_i2i8: - case nir_op_i2i16: - case nir_op_i2i32: - case nir_op_i2i64: - case nir_op_u2u8: - case nir_op_u2u16: - case nir_op_u2u32: - case nir_op_u2u64: - case nir_op_f2i16: - case nir_op_f2i32: - case nir_op_f2i64: - case nir_op_f2u16: - case nir_op_f2u32: - case nir_op_f2u64: - case nir_op_i2f16: - case nir_op_i2f32: - case nir_op_i2f64: - case nir_op_u2f16: - case nir_op_u2f32: - case nir_op_u2f64: - case nir_op_f2f16: - case nir_op_f2f32: - case nir_op_f2f64: - case nir_op_f2fmp: - return BI_CONVERT; - - case nir_op_vec2: - case nir_op_vec3: - case nir_op_vec4: - return BI_COMBINE; - - case nir_op_vec8: - case nir_op_vec16: - unreachable("should've been lowered"); - - case nir_op_ffma: - case nir_op_fmul: - return BI_FMA; - - case nir_op_imin: - case nir_op_imax: - case nir_op_umin: - case nir_op_umax: - case nir_op_fmin: - case nir_op_fmax: - return BI_MINMAX; - - case nir_op_fsat: - case nir_op_fneg: - case nir_op_fabs: - return BI_FMOV; - case nir_op_mov: - return BI_MOV; - - case nir_op_fround_even: - case nir_op_fceil: - case nir_op_ffloor: - case nir_op_ftrunc: - return BI_ROUND; - - case nir_op_frcp: - case nir_op_frsq: - case nir_op_iabs: - return BI_SPECIAL_ADD; - - default: - unreachable("Unknown ALU op"); - } -} - -/* Gets a bi_cond for a given NIR comparison opcode. In soft mode, it will - * return BI_COND_ALWAYS as a sentinel if it fails to do so (when used for - * optimizations). Otherwise it will bail (when used for primary code - * generation). */ - -static enum bi_cond -bi_cond_for_nir(nir_op op, bool soft) -{ - switch (op) { - BI_CASE_CMP(nir_op_flt) - BI_CASE_CMP(nir_op_ilt) - BI_CASE_CMP(nir_op_ult) - return BI_COND_LT; - - BI_CASE_CMP(nir_op_fge) - BI_CASE_CMP(nir_op_ige) - BI_CASE_CMP(nir_op_uge) - return BI_COND_GE; - - BI_CASE_CMP(nir_op_feq) - BI_CASE_CMP(nir_op_ieq) - return BI_COND_EQ; - - BI_CASE_CMP(nir_op_fneu) - BI_CASE_CMP(nir_op_ine) - return BI_COND_NE; - default: - if (soft) - return BI_COND_ALWAYS; - else - unreachable("Invalid compare"); - } -} - -static void -bi_copy_src(bi_instruction *alu, nir_alu_instr *instr, unsigned i, unsigned to, - unsigned *constants_left, unsigned *constant_shift) -{ - unsigned bits = nir_src_bit_size(instr->src[i].src); - unsigned dest_bits = nir_dest_bit_size(instr->dest.dest); - - alu->src_types[to] = nir_op_infos[instr->op].input_types[i] - | bits; - - /* Try to inline a constant */ - if (nir_src_is_const(instr->src[i].src) && *constants_left && (dest_bits == bits)) { - uint64_t mask = (1ull << dest_bits) - 1; - uint64_t cons = nir_src_as_uint(instr->src[i].src); - - /* Try to reuse a constant */ - for (unsigned i = 0; i < (*constant_shift); i += dest_bits) { - if (((alu->constant.u64 >> i) & mask) == cons) { - alu->src[to] = BIR_INDEX_CONSTANT | i; - return; - } - } - - alu->constant.u64 |= cons << *constant_shift; - alu->src[to] = BIR_INDEX_CONSTANT | (*constant_shift); - --(*constants_left); - (*constant_shift) += MAX2(dest_bits, 32); /* lo/hi */ - return; - } - - alu->src[to] = pan_src_index(&instr->src[i].src); - - /* Copy swizzle for all vectored components, replicating last component - * to fill undersized */ - - unsigned vec = alu->type == BI_COMBINE ? 1 : - MAX2(1, 32 / bits); - - unsigned comps = nir_ssa_alu_instr_src_components(instr, i); - for (unsigned j = 0; j < vec; ++j) - alu->swizzle[to][j] = instr->src[i].swizzle[MIN2(j, comps - 1)]; -} - -static void -bi_fuse_cond(bi_instruction *csel, nir_alu_src cond, - unsigned *constants_left, unsigned *constant_shift, - unsigned comps, bool float_only) -{ - /* Bail for vector weirdness */ - if (cond.swizzle[0] != 0) - return; - - if (!cond.src.is_ssa) - return; - - nir_ssa_def *def = cond.src.ssa; - nir_instr *parent = def->parent_instr; - - if (parent->type != nir_instr_type_alu) - return; - - nir_alu_instr *alu = nir_instr_as_alu(parent); - - /* Try to match a condition */ - enum bi_cond bcond = bi_cond_for_nir(alu->op, true); - - if (bcond == BI_COND_ALWAYS) - return; - - /* Some instructions can't compare ints */ - if (float_only) { - nir_alu_type T = nir_op_infos[alu->op].input_types[0]; - T = nir_alu_type_get_base_type(T); - - if (T != nir_type_float) - return; - } - - /* We found one, let's fuse it in */ - csel->cond = bcond; - bi_copy_src(csel, alu, 0, 0, constants_left, constant_shift); - bi_copy_src(csel, alu, 1, 1, constants_left, constant_shift); -} - -static void -emit_alu(bi_context *ctx, nir_alu_instr *instr) -{ - /* Try some special functions */ - switch (instr->op) { - case nir_op_fexp2: - bi_emit_fexp2(ctx, instr); - return; - case nir_op_flog2: - bi_emit_flog2(ctx, instr); - return; - case nir_op_fddx: - case nir_op_fddy: - bi_emit_deriv(ctx, instr); - return; - default: - break; - } - - /* Otherwise, assume it's something we can handle normally */ - bi_instruction alu = { - .type = bi_class_for_nir_alu(instr->op), - .dest = pan_dest_index(&instr->dest.dest), - .dest_type = nir_op_infos[instr->op].output_type - | nir_dest_bit_size(instr->dest.dest), - }; - - /* TODO: Implement lowering of special functions for older Bifrost */ - assert(alu.type != BI_SPECIAL_ADD || !(ctx->quirks & BIFROST_NO_FAST_OP)); - - unsigned comps = nir_dest_num_components(instr->dest.dest); - bool vector = comps > MAX2(1, 32 / nir_dest_bit_size(instr->dest.dest)); - assert(!vector || alu.type == BI_COMBINE || alu.type == BI_MOV); - - if (!instr->dest.dest.is_ssa) { - for (unsigned i = 0; i < comps; ++i) - assert(instr->dest.write_mask); - } - - /* We inline constants as we go. This tracks how many constants have - * been inlined, since we're limited to 64-bits of constants per - * instruction */ - - unsigned dest_bits = nir_dest_bit_size(instr->dest.dest); - unsigned constants_left = (64 / dest_bits); - unsigned constant_shift = 0; - - if (alu.type == BI_COMBINE) - constants_left = 0; - - /* Copy sources */ - - unsigned num_inputs = nir_op_infos[instr->op].num_inputs; - assert(num_inputs <= ARRAY_SIZE(alu.src)); - - for (unsigned i = 0; i < num_inputs; ++i) { - unsigned f = 0; - - if (i && alu.type == BI_CSEL) - f++; - - bi_copy_src(&alu, instr, i, i + f, &constants_left, &constant_shift); - } - - /* Op-specific fixup */ - switch (instr->op) { - case nir_op_fmul: - alu.src[2] = BIR_INDEX_ZERO; /* FMA */ - alu.src_types[2] = alu.src_types[1]; - break; - case nir_op_fsat: - alu.clamp = BI_CLAMP_CLAMP_0_1; /* FMOV */ - break; - case nir_op_fneg: - alu.src_neg[0] = true; /* FMOV */ - break; - case nir_op_fabs: - alu.src_abs[0] = true; /* FMOV */ - break; - case nir_op_fsub: - alu.src_neg[1] = true; /* FADD */ - break; - case nir_op_iadd: - alu.op.imath = BI_IMATH_ADD; - /* Carry */ - alu.src[2] = BIR_INDEX_ZERO; - break; - case nir_op_isub: - alu.op.imath = BI_IMATH_SUB; - /* Borrow */ - alu.src[2] = BIR_INDEX_ZERO; - break; - case nir_op_iabs: - alu.op.special = BI_SPECIAL_IABS; - break; - case nir_op_inot: - /* no dedicated bitwise not, but we can invert sources. convert to ~(a | 0) */ - alu.op.bitwise = BI_BITWISE_OR; - alu.bitwise.dest_invert = true; - alu.src[1] = BIR_INDEX_ZERO; - /* zero shift */ - alu.src[2] = BIR_INDEX_ZERO; - alu.src_types[2] = nir_type_uint8; - break; - case nir_op_ushr: - alu.bitwise.rshift = true; - /* fallthrough */ - case nir_op_ishl: - alu.op.bitwise = BI_BITWISE_OR; - /* move src1 to src2 and replace with zero. underlying op is (src0 << src2) | src1 */ - alu.src[2] = alu.src[1]; - alu.src_types[2] = nir_type_uint8; - alu.src[1] = BIR_INDEX_ZERO; - break; - case nir_op_ishr: - alu.op.bitwise = BI_BITWISE_ARSHIFT; - alu.bitwise.rshift = true; - /* move src1 to src2 and replace with zero. underlying op is (src0 >> src2) */ - alu.src[2] = alu.src[1]; - alu.src_types[2] = nir_type_uint8; - alu.src[1] = BIR_INDEX_ZERO; - break; - case nir_op_imul: - alu.op.imul = BI_IMUL_IMUL; - break; - case nir_op_fmax: - case nir_op_imax: - case nir_op_umax: - alu.op.minmax = BI_MINMAX_MAX; /* MINMAX */ - break; - case nir_op_frcp: - alu.op.special = BI_SPECIAL_FRCP; - break; - case nir_op_frsq: - alu.op.special = BI_SPECIAL_FRSQ; - break; - BI_CASE_CMP(nir_op_flt) - BI_CASE_CMP(nir_op_ilt) - BI_CASE_CMP(nir_op_fge) - BI_CASE_CMP(nir_op_ige) - BI_CASE_CMP(nir_op_feq) - BI_CASE_CMP(nir_op_ieq) - BI_CASE_CMP(nir_op_fneu) - BI_CASE_CMP(nir_op_ine) - BI_CASE_CMP(nir_op_uge) - BI_CASE_CMP(nir_op_ult) - alu.cond = bi_cond_for_nir(instr->op, false); - break; - case nir_op_fround_even: - alu.round = BI_ROUND_NONE; - break; - case nir_op_fceil: - alu.round = BI_ROUND_RTP; - break; - case nir_op_ffloor: - alu.round = BI_ROUND_RTN; - break; - case nir_op_ftrunc: - alu.round = BI_ROUND_RTZ; - break; - case nir_op_iand: - alu.op.bitwise = BI_BITWISE_AND; - /* zero shift */ - alu.src[2] = BIR_INDEX_ZERO; - alu.src_types[2] = nir_type_uint8; - break; - case nir_op_ior: - alu.op.bitwise = BI_BITWISE_OR; - /* zero shift */ - alu.src[2] = BIR_INDEX_ZERO; - alu.src_types[2] = nir_type_uint8; - break; - case nir_op_ixor: - alu.op.bitwise = BI_BITWISE_XOR; - /* zero shift */ - alu.src[2] = BIR_INDEX_ZERO; - alu.src_types[2] = nir_type_uint8; - break; - case nir_op_f2i32: - case nir_op_f2u32: - alu.round = BI_ROUND_RTZ; - break; - - case nir_op_f2f16: - case nir_op_i2i16: - case nir_op_u2u16: { - if (nir_src_bit_size(instr->src[0].src) != 32) - break; - - /* Should have been const folded */ - assert(!nir_src_is_const(instr->src[0].src)); - - alu.src_types[1] = alu.src_types[0]; - alu.src[1] = alu.src[0]; - - unsigned last = nir_dest_num_components(instr->dest.dest) - 1; - assert(last <= 1); - - alu.swizzle[1][0] = instr->src[0].swizzle[last]; - break; - } - - default: - break; - } - - if (alu.type == BI_MOV && vector) { - alu.type = BI_COMBINE; - - for (unsigned i = 0; i < comps; ++i) { - alu.src[i] = alu.src[0]; - alu.swizzle[i][0] = instr->src[0].swizzle[i]; - } - } - - if (alu.type == BI_CSEL) { - /* Default to csel3 */ - alu.cond = BI_COND_NE; - alu.src[1] = BIR_INDEX_ZERO; - alu.src_types[1] = alu.src_types[0]; - - /* TODO: Reenable cond fusing when we can split up registers - * when scheduling */ -#if 0 - bi_fuse_cond(&alu, instr->src[0], - &constants_left, &constant_shift, comps, false); -#endif - } - - bi_emit(ctx, alu); -} - static bi_index bi_alu_src_index(nir_alu_src src, unsigned comps) { @@ -2507,40 +1167,6 @@ bi_emit_texs(bi_builder *b, nir_tex_instr *instr) instr->sampler_index, instr->texture_index); } -static void -emit_texs(bi_context *ctx, nir_tex_instr *instr) -{ - bi_instruction tex = { - .type = BI_TEXS, - .texture = { - .texture_index = instr->texture_index, - .sampler_index = instr->sampler_index, - .compute_lod = instr->op == nir_texop_tex, - }, - .dest = pan_dest_index(&instr->dest), - .dest_type = instr->dest_type, - .src_types = { nir_type_float32, nir_type_float32 }, - .vector_channels = 4 - }; - - for (unsigned i = 0; i < instr->num_srcs; ++i) { - int index = pan_src_index(&instr->src[i].src); - - /* We were checked ahead-of-time */ - if (instr->src[i].src_type == nir_tex_src_lod) - continue; - - assert (instr->src[i].src_type == nir_tex_src_coord); - - tex.src[0] = index; - tex.src[1] = index; - tex.swizzle[0][0] = 0; - tex.swizzle[1][0] = 1; - } - - bi_emit(ctx, tex); -} - /* Returns dimension with 0 special casing cubemaps. Shamelessly copied from Midgard */ static unsigned bifrost_tex_format(enum glsl_sampler_dim dim) @@ -2603,38 +1229,6 @@ bi_emit_texc_array_index(bi_builder *b, bi_index idx, nir_alu_type T) return bi_f32_to_u32(b, idx, BI_ROUND_NONE); } -static unsigned -bi_emit_array_index(bi_context *ctx, unsigned idx, nir_alu_type T, unsigned *c) -{ - /* For (u)int we can just passthrough */ - nir_alu_type base = nir_alu_type_get_base_type(T); - if (base == nir_type_int || base == nir_type_uint) { - *c = 2; - return idx; - } - - /* Otherwise we convert */ - assert(T == nir_type_float16 || T == nir_type_float32); - - /* OpenGL ES 3.2 specification section 8.14.2 ("Coordinate Wrapping and - * Texel Selection") defines the layer to be taken from clamp(RNE(r), - * 0, dt - 1). So we use round RTE, clamping is handled at the data - * structure level */ - bi_instruction f2i = { - .type = BI_CONVERT, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_uint32, - .src = { idx }, - .src_types = { T }, - .swizzle = { { 2 } }, - .round = BI_ROUND_NONE - }; - - *c = 0; - bi_emit(ctx, f2i); - return f2i.dest; -} - /* TEXC's explicit and bias LOD modes requires the LOD to be transformed to a * 16-bit 8:8 fixed-point format. We lower as: * @@ -2664,70 +1258,6 @@ bi_emit_texc_lod_88(bi_builder *b, bi_index lod, bool fp16) bi_imm_u16(0)); } -static unsigned -bi_emit_lod_88(bi_context *ctx, unsigned lod, bool fp16) -{ - nir_alu_type T = fp16 ? nir_type_float16 : nir_type_float32; - - /* Sort of arbitrary. Must be less than 128.0, greater than or equal to - * the max LOD (16 since we cap at 2^16 texture dimensions), and - * preferably small to minimize precision loss */ - const float max_lod = 16.0; - - /* FMA.f16/f32.sat_signed, saturated, lod, #1.0/max_lod, #0 */ - bi_instruction fsat = { - .type = BI_FMA, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_float32, - .src = { lod, BIR_INDEX_CONSTANT, BIR_INDEX_ZERO }, - .src_types = { T, nir_type_float32, nir_type_float32 }, - .clamp = BI_CLAMP_CLAMP_M1_1, - .round = BI_ROUND_NONE, - .constant = { - .u64 = fui(1.0 / max_lod) - }, - }; - - /* FMA.f32 scaled, saturated, lod, #(max_lod * 256.0), #0 */ - bi_instruction fmul = { - .type = BI_FMA, - .dest = bi_make_temp(ctx), - .dest_type = T, - .src = { fsat.dest, BIR_INDEX_CONSTANT, BIR_INDEX_ZERO }, - .src_types = { nir_type_float32, nir_type_float32, nir_type_float32 }, - .round = BI_ROUND_NONE, - .constant = { - .u64 = fui(max_lod * 256.0) - }, - }; - - /* F32_TO_S32 s32, scaled */ - bi_instruction f2i = { - .type = BI_CONVERT, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_int32, - .src = { fmul.dest }, - .src_types = { T }, - .round = BI_ROUND_RTZ - }; - - /* MKVEC.v2i16 s32.h0, #0 */ - bi_instruction mkvec = { - .type = BI_SELECT, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_int16, - .src = { f2i.dest, BIR_INDEX_ZERO }, - .src_types = { nir_type_int16, nir_type_int16 }, - }; - - bi_emit(ctx, fsat); - bi_emit(ctx, fmul); - bi_emit(ctx, f2i); - bi_emit(ctx, mkvec); - - return mkvec.dest; -} - /* FETCH takes a 32-bit staging register containing the LOD as an integer in * the bottom 16-bits and (if present) the cube face index in the top 16-bits. * TODO: Cube face. @@ -2739,31 +1269,6 @@ bi_emit_texc_lod_cube(bi_builder *b, bi_index lod) return bi_lshift_or_i32(b, lod, bi_zero(), bi_imm_u8(8)); } -static unsigned -bi_emit_lod_cube(bi_context *ctx, unsigned lod) -{ - bi_instruction or = { - .type = BI_BITWISE, - .op.bitwise = BI_BITWISE_OR, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_uint32, - .src = { - lod ? : BIR_INDEX_ZERO, - BIR_INDEX_ZERO, - BIR_INDEX_CONSTANT | 0, - }, - .src_types = { - nir_type_uint32, - nir_type_uint32, - nir_type_uint8, - }, - .constant.u8[0] = 8, - }; - - bi_emit(ctx, or); - return or.dest; -} - /* The hardware specifies texel offsets and multisample indices together as a * u8vec4 . By default all are zero, so if have either a * nonzero texel offset or a nonzero multisample index, we build a u8vec4 with @@ -2800,69 +1305,6 @@ bi_emit_texc_offset_ms_index(bi_builder *b, nir_tex_instr *instr) return dest; } -static unsigned -bi_emit_tex_offset_ms_index(bi_context *ctx, nir_tex_instr *instr) -{ - unsigned dest = 0; - - int offs_idx = nir_tex_instr_src_index(instr, nir_tex_src_offset); - if (offs_idx >= 0 && - (!nir_src_is_const(instr->src[offs_idx].src) || - nir_src_as_uint(instr->src[offs_idx].src) != 0)) { - bi_instruction mkvec = { - .type = BI_SELECT, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_uint32, - .src = { - BIR_INDEX_ZERO, BIR_INDEX_ZERO, - BIR_INDEX_ZERO, BIR_INDEX_ZERO - }, - .src_types = { - nir_type_uint8, nir_type_uint8, - nir_type_uint8, nir_type_uint8 - } - }; - - unsigned ncomps = nir_src_num_components(instr->src[offs_idx].src); - unsigned src = pan_src_index(&instr->src[offs_idx].src); - for (unsigned i = 0; i < ncomps; i++) { - mkvec.src[i] = src; - mkvec.swizzle[i][0] = i * 4; - } - - bi_emit(ctx, mkvec); - dest = mkvec.dest; - } - - int ms_idx = nir_tex_instr_src_index(instr, nir_tex_src_ms_index); - if (ms_idx >= 0 && - (!nir_src_is_const(instr->src[ms_idx].src) || - nir_src_as_uint(instr->src[ms_idx].src) != 0)) { - bi_instruction or = { - .type = BI_BITWISE, - .op.bitwise = BI_BITWISE_OR, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_uint32, - .src = { - pan_src_index(&instr->src[ms_idx].src), - dest ? dest : BIR_INDEX_ZERO, - BIR_INDEX_CONSTANT | 0, - }, - .src_types = { - nir_type_uint32, - nir_type_uint32, - nir_type_uint8, - }, - .constant.u8[0] = 24, - }; - - bi_emit(ctx, or); - dest = or.dest; - } - - return dest; -} - static void bi_emit_cube_coord(bi_builder *b, bi_index coord, bi_index *face, bi_index *s, bi_index *t) @@ -2924,128 +1366,6 @@ bi_emit_cube_coord(bi_builder *b, bi_index coord, *face = cubeface2->dest[0]; } -static void -bi_lower_cube_coord(bi_context *ctx, unsigned coord, - unsigned *face, unsigned *s, unsigned *t) -{ - /* Compute max { |x|, |y|, |z| } */ - bi_instruction cubeface1 = { - .type = BI_SPECIAL_FMA, - .op.special = BI_SPECIAL_CUBEFACE1, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_float32, - .src = { coord, coord, coord }, - .src_types = { nir_type_float32, nir_type_float32, nir_type_float32 }, - .swizzle = { {0}, {1}, {2} } - }; - - /* Calculate packed exponent / face / infinity. In reality this reads - * the destination from cubeface1 but that's handled by lowering */ - bi_instruction cubeface2 = { - .type = BI_SPECIAL_ADD, - .op.special = BI_SPECIAL_CUBEFACE2, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_uint32, - .src = { coord, coord, coord }, - .src_types = { nir_type_float32, nir_type_float32, nir_type_float32 }, - .swizzle = { {0}, {1}, {2} } - }; - - /* Select S coordinate */ - bi_instruction cube_ssel = { - .type = BI_SPECIAL_ADD, - .op.special = BI_SPECIAL_CUBE_SSEL, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_float32, - .src = { coord, coord, cubeface2.dest }, - .src_types = { nir_type_float32, nir_type_float32, nir_type_uint32 }, - .swizzle = { {2}, {0} } - }; - - /* Select T coordinate */ - bi_instruction cube_tsel = { - .type = BI_SPECIAL_ADD, - .op.special = BI_SPECIAL_CUBE_TSEL, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_float32, - .src = { coord, coord, cubeface2.dest }, - .src_types = { nir_type_float32, nir_type_float32, nir_type_uint32 }, - .swizzle = { {1}, {2} } - }; - - /* The OpenGL ES specification requires us to transform an input vector - * (x, y, z) to the coordinate, given the selected S/T: - * - * (1/2 ((s / max{x,y,z}) + 1), 1/2 ((t / max{x, y, z}) + 1)) - * - * We implement (s shown, t similar) in a form friendlier to FMA - * instructions, and clamp coordinates at the end for correct - * NaN/infinity handling: - * - * fsat(s * (0.5 * (1 / max{x, y, z})) + 0.5) - * - * Take the reciprocal of max{x, y, z} - */ - - bi_instruction frcp = { - .type = BI_SPECIAL_ADD, - .op.special = BI_SPECIAL_FRCP, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_float32, - .src = { cubeface1.dest }, - .src_types = { nir_type_float32 }, - }; - - /* Calculate 0.5 * (1.0 / max{x, y, z}) */ - bi_instruction fma1 = { - .type = BI_FMA, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_float32, - .src = { frcp.dest, BIR_INDEX_CONSTANT | 0, BIR_INDEX_ZERO }, - .src_types = { nir_type_float32, nir_type_float32, nir_type_float32 }, - .constant.u64 = 0x3f000000, /* 0.5f */ - }; - - /* Transform the s coordinate */ - bi_instruction fma2 = { - .type = BI_FMA, - .clamp = BI_CLAMP_CLAMP_0_1, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_float32, - .src = { fma1.dest, cube_ssel.dest, BIR_INDEX_CONSTANT | 0 }, - .src_types = { nir_type_float32, nir_type_float32, nir_type_float32 }, - .constant.u64 = 0x3f000000, /* 0.5f */ - }; - - /* Transform the t coordinate */ - bi_instruction fma3 = { - .type = BI_FMA, - .clamp = BI_CLAMP_CLAMP_0_1, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_float32, - .src = { fma1.dest, cube_tsel.dest, BIR_INDEX_CONSTANT | 0 }, - .src_types = { nir_type_float32, nir_type_float32, nir_type_float32 }, - .constant.u64 = 0x3f000000, /* 0.5f */ - }; - - bi_emit(ctx, cubeface1); - bi_emit(ctx, cubeface2); - bi_emit(ctx, cube_ssel); - bi_emit(ctx, cube_tsel); - bi_emit(ctx, frcp); - bi_emit(ctx, fma1); - bi_emit(ctx, fma2); - bi_emit(ctx, fma3); - - /* Cube face is stored in bit[29:31], we don't apply the shift here - * because the TEXS_CUBE and TEXC instructions expect the face index to - * be at this position. - */ - *face = cubeface2.dest; - *s = fma2.dest; - *t = fma3.dest; -} - /* Emits a cube map descriptor, returning lower 32-bits and putting upper * 32-bits in passed pointer t */ @@ -3064,51 +1384,6 @@ bi_emit_texc_cube_coord(bi_builder *b, bi_index coord, bi_index *t) return bi_lshift_or_i32(b, and1, and2, bi_imm_u8(0)); } -static void -texc_pack_cube_coord(bi_context *ctx, unsigned coord, - unsigned *face_s, unsigned *t) -{ - unsigned face, s; - - bi_lower_cube_coord(ctx, coord, &face, &s, t); - - bi_instruction and1 = { - .type = BI_BITWISE, - .op.bitwise = BI_BITWISE_AND, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_uint32, - .src = { face, BIR_INDEX_CONSTANT | 0, BIR_INDEX_ZERO }, - .src_types = { nir_type_uint32, nir_type_uint32, nir_type_uint8 }, - .constant.u64 = 0xe0000000, - }; - - bi_instruction and2 = { - .type = BI_BITWISE, - .op.bitwise = BI_BITWISE_AND, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_uint32, - .src = { s, BIR_INDEX_CONSTANT | 0, BIR_INDEX_ZERO }, - .src_types = { nir_type_uint32, nir_type_uint32, nir_type_uint8 }, - .constant.u64 = 0x1fffffff, - }; - - bi_instruction or = { - .type = BI_BITWISE, - .op.bitwise = BI_BITWISE_OR, - .dest = bi_make_temp(ctx), - .dest_type = nir_type_uint32, - .src = { and1.dest, and2.dest, BIR_INDEX_ZERO }, - .src_types = { nir_type_uint32, nir_type_uint32, nir_type_uint8 }, - }; - - bi_emit(ctx, and1); - bi_emit(ctx, and2); - bi_emit(ctx, or); - - /* packed cube-face + s */ - *face_s = or.dest; -} - /* Map to the main texture op used. Some of these (txd in particular) will * lower to multiple texture ops with different opcodes (GRDESC_DER + TEX in * sequence). We assume that lowering is handled elsewhere. @@ -3324,205 +1599,6 @@ bi_emit_texc(bi_builder *b, nir_tex_instr *instr) bi_make_vec_to(b, bi_dest_index(&instr->dest), srcs, channels, 4, 32); } -static void -emit_texc(bi_context *ctx, nir_tex_instr *instr) -{ - /* TODO: support more with other encodings */ - assert(instr->sampler_index < 16); - - /* TODO: support more ops */ - switch (instr->op) { - case nir_texop_tex: - case nir_texop_txl: - case nir_texop_txb: - case nir_texop_txf: - case nir_texop_txf_ms: - break; - default: - unreachable("Unsupported texture op"); - } - - bi_instruction tex = { - .type = BI_TEXC, - .dest = pan_dest_index(&instr->dest), - .dest_type = instr->dest_type, - .src_types = { - /* Staging registers */ - nir_type_uint32, - nir_type_float32, nir_type_float32, - nir_type_uint32 - }, - .vector_channels = 4 - }; - - struct bifrost_texture_operation desc = { - .sampler_index_or_mode = instr->sampler_index, - .index = instr->texture_index, - .immediate_indices = 1, /* TODO */ - .op = bi_tex_op(instr->op), - .offset_or_bias_disable = false, /* TODO */ - .shadow_or_clamp_disable = instr->is_shadow, - .array = instr->is_array, - .dimension = bifrost_tex_format(instr->sampler_dim), - .format = bi_texture_format(instr->dest_type, BI_CLAMP_NONE), /* TODO */ - .mask = (1 << tex.vector_channels) - 1 - }; - - switch (desc.op) { - case BIFROST_TEX_OP_TEX: - desc.lod_or_fetch = BIFROST_LOD_MODE_COMPUTE; - break; - case BIFROST_TEX_OP_FETCH: - /* TODO: gathers */ - desc.lod_or_fetch = BIFROST_TEXTURE_FETCH_TEXEL; - break; - default: - unreachable("texture op unsupported"); - } - - /* 32-bit indices to be allocated as consecutive data registers. */ - unsigned dregs[BIFROST_TEX_DREG_COUNT] = { 0 }; - unsigned dregs_swiz[BIFROST_TEX_DREG_COUNT] = { 0 }; - - for (unsigned i = 0; i < instr->num_srcs; ++i) { - unsigned index = pan_src_index(&instr->src[i].src); - unsigned sz = nir_src_bit_size(instr->src[i].src); - ASSERTED nir_alu_type base = nir_tex_instr_src_type(instr, i); - nir_alu_type T = base | sz; - - switch (instr->src[i].src_type) { - case nir_tex_src_coord: - if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { - texc_pack_cube_coord(ctx, index, - &tex.src[1], &tex.src[2]); - } else { - unsigned components = nir_src_num_components(instr->src[i].src); - - tex.src[1] = index; - tex.src[2] = index; - tex.swizzle[1][0] = 0; - - if (components >= 2) { - tex.swizzle[2][0] = 1; - } else { - /* Dummy for reg alloc to be happy */ - tex.swizzle[2][0] = 0; - } - - assert(components >= 1 && components <= 3); - - if (components < 3) { - /* nothing to do */ - } else if (desc.array) { - /* 2D array */ - dregs[BIFROST_TEX_DREG_ARRAY] = - bi_emit_array_index(ctx, index, T, - &dregs_swiz[BIFROST_TEX_DREG_ARRAY]); - } else { - /* 3D */ - dregs[BIFROST_TEX_DREG_Z_COORD] = index; - dregs_swiz[BIFROST_TEX_DREG_Z_COORD] = 2; - } - } - break; - - case nir_tex_src_lod: - if (desc.op == BIFROST_TEX_OP_TEX && - nir_src_is_const(instr->src[i].src) && - nir_src_as_uint(instr->src[i].src) == 0) { - desc.lod_or_fetch = BIFROST_LOD_MODE_ZERO; - } else if (desc.op == BIFROST_TEX_OP_TEX) { - assert(base == nir_type_float); - - assert(sz == 16 || sz == 32); - dregs[BIFROST_TEX_DREG_LOD] = - bi_emit_lod_88(ctx, index, sz == 16); - desc.lod_or_fetch = BIFROST_LOD_MODE_EXPLICIT; - } else { - assert(desc.op == BIFROST_TEX_OP_FETCH); - assert(base == nir_type_uint || base == nir_type_int); - assert(sz == 16 || sz == 32); - - dregs[BIFROST_TEX_DREG_LOD] = - bi_emit_lod_cube(ctx, index); - } - - break; - - case nir_tex_src_bias: - /* Upper 16-bits interpreted as a clamp, leave zero */ - assert(desc.op == BIFROST_TEX_OP_TEX); - assert(base == nir_type_float); - assert(sz == 16 || sz == 32); - dregs[BIFROST_TEX_DREG_LOD] = - bi_emit_lod_88(ctx, index, sz == 16); - desc.lod_or_fetch = BIFROST_LOD_MODE_BIAS; - break; - - case nir_tex_src_ms_index: - case nir_tex_src_offset: - if (desc.offset_or_bias_disable) - break; - - dregs[BIFROST_TEX_DREG_OFFSETMS] = - bi_emit_tex_offset_ms_index(ctx, instr); - if (dregs[BIFROST_TEX_DREG_OFFSETMS]) - desc.offset_or_bias_disable = true; - break; - - case nir_tex_src_comparator: - dregs[BIFROST_TEX_DREG_SHADOW] = index; - break; - - default: - unreachable("Unhandled src type in texc emit"); - } - } - - if (desc.op == BIFROST_TEX_OP_FETCH && !dregs[BIFROST_TEX_DREG_LOD]) - dregs[BIFROST_TEX_DREG_LOD] = bi_emit_lod_cube(ctx, 0); - - /* Allocate data registers contiguously. Index must not be marked SSA - * due to a quirk of RA for tied operands, could be fixed eventually */ - bi_instruction combine = { - .type = BI_COMBINE, - .dest_type = nir_type_uint32, - .dest = bi_make_temp_reg(ctx), - .src_types = { - nir_type_uint32, nir_type_uint32, - nir_type_uint32, nir_type_uint32, - }, - }; - - unsigned dreg_index = 0; - - for (unsigned i = 0; i < ARRAY_SIZE(dregs); ++i) { - assert(dreg_index < 4); - - if (dregs[i]) { - combine.swizzle[dreg_index][0] = dregs_swiz[i]; - combine.src[dreg_index++] = dregs[i]; - } - } - - if (dreg_index >= 1) { - /* Pass combined data registers together */ - tex.src[0] = combine.dest; - bi_emit(ctx, combine); - - for (unsigned i = 0; i < dreg_index; ++i) - tex.swizzle[0][i] = i; - } else { - tex.src[0] = bi_make_temp_reg(ctx); - } - - /* Pass the texture operation descriptor in src2 */ - tex.src[3] = BIR_INDEX_CONSTANT; - memcpy(&tex.constant.u64, &desc, sizeof(desc)); - - bi_emit(ctx, tex); -} - /* Simple textures ops correspond to NIR tex or txl with LOD = 0 on 2D (or cube * map, TODO) textures. Anything else needs a complete texture op. */ @@ -3546,40 +1622,6 @@ bi_is_simple_tex(nir_tex_instr *instr) return nir_src_is_const(lod) && nir_src_as_uint(lod) == 0; } -static void -emit_tex(bi_context *ctx, nir_tex_instr *instr) -{ - switch (instr->op) { - case nir_texop_txs: - bi_emit_sysval(ctx, &instr->instr, 4, 0); - return; - - case nir_texop_tex: - case nir_texop_txl: - case nir_texop_txb: - case nir_texop_txf: - case nir_texop_txf_ms: - break; - - default: - unreachable("Invalid texture operation"); - } - - nir_alu_type base = nir_alu_type_get_base_type(instr->dest_type); - unsigned sz = nir_dest_bit_size(instr->dest); - instr->dest_type = base | sz; - - bool is_normal = bi_is_simple_tex(instr); - bool is_2d = instr->sampler_dim == GLSL_SAMPLER_DIM_2D || - instr->sampler_dim == GLSL_SAMPLER_DIM_EXTERNAL; - bool is_f = base == nir_type_float && (sz == 16 || sz == 32); - - if (is_normal && is_2d && is_f && !instr->is_shadow && !instr->is_array) - emit_texs(ctx, instr); - else - emit_texc(ctx, instr); -} - static void bi_emit_tex(bi_builder *b, nir_tex_instr *instr) { @@ -3612,43 +1654,6 @@ bi_emit_tex(bi_builder *b, nir_tex_instr *instr) } static void -emit_instr(bi_context *ctx, struct nir_instr *instr) -{ - switch (instr->type) { - case nir_instr_type_load_const: - emit_load_const(ctx, nir_instr_as_load_const(instr)); - break; - - case nir_instr_type_intrinsic: - emit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); - break; - - case nir_instr_type_alu: - emit_alu(ctx, nir_instr_as_alu(instr)); - break; - - case nir_instr_type_tex: - emit_tex(ctx, nir_instr_as_tex(instr)); - break; - - case nir_instr_type_jump: - emit_jump(ctx, nir_instr_as_jump(instr)); - break; - - case nir_instr_type_ssa_undef: - unreachable("should've been lowered"); - - default: - unreachable("Unhandled instruction type"); - break; - } -} - -/* Prototype to avoid -Wunused-function warning during merge */ -void -bi_emit_instr(bi_builder *b, struct nir_instr *instr); - -void bi_emit_instr(bi_builder *b, struct nir_instr *instr) { switch (instr->type) { @@ -3712,38 +1717,6 @@ emit_block(bi_context *ctx, nir_block *block) return ctx->current_block; } -/* Emits an unconditional branch to the end of the current block, returning a - * pointer so the user can fill in details */ - -static bi_instruction * -bi_emit_branch(bi_context *ctx) -{ - bi_instruction branch = { - .type = BI_BRANCH, - .cond = BI_COND_ALWAYS - }; - - return bi_emit(ctx, branch); -} - -/* Sets a condition for a branch by examing the NIR condition. If we're - * familiar with the condition, we unwrap it to fold it into the branch - * instruction. Otherwise, we consume the condition directly. We - * generally use 1-bit booleans which allows us to use small types for - * the conditions. - */ - -static void -bi_set_branch_cond(bi_instruction *branch, nir_src *cond, bool invert) -{ - /* TODO: Try to unwrap instead of always bailing */ - branch->src[0] = pan_src_index(cond); - branch->src[1] = BIR_INDEX_ZERO; - branch->src_types[0] = branch->src_types[1] = nir_type_uint | - nir_src_bit_size(*cond); - branch->cond = invert ? BI_COND_EQ : BI_COND_NE; -} - /* Emits a direct branch based on a given condition. TODO: try to unwrap the * condition to optimize */ diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index 9ce8bb31f03..9bc62519469 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -1059,12 +1059,6 @@ pan_next_block(pan_block *block) return list_first_entry(&(block->link), pan_block, link); } -/* Special functions */ - -void bi_emit_fexp2(bi_context *ctx, nir_alu_instr *instr); -void bi_emit_flog2(bi_context *ctx, nir_alu_instr *instr); -void bi_emit_deriv(bi_context *ctx, nir_alu_instr *instr); - /* BIR manipulation */ bool bi_has_clamp(bi_instruction *ins); diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index 4e0f2b91613..dd9e7e1a440 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -27,7 +27,6 @@ libpanfrost_bifrost_files = files( 'bi_pack.c', 'bi_ra.c', 'bi_schedule.c', - 'bi_special.c', 'bi_tables.c', 'bir.c', 'bifrost_compile.c',