diff --git a/src/panfrost/bifrost/bi_special.c b/src/panfrost/bifrost/bi_special.c index 13ce3abd13a..8ee62e9e3ca 100644 --- a/src/panfrost/bifrost/bi_special.c +++ b/src/panfrost/bifrost/bi_special.c @@ -181,3 +181,98 @@ bi_emit_flog2(bi_context *ctx, nir_alu_instr *instr) /* TODO: G71 */ bi_emit_flog2_new(ctx, instr); } + +void +bi_emit_deriv(bi_context *ctx, nir_alu_instr *instr) +{ + bi_instruction cur_lane = { + .type = BI_MOV, + .dest = bi_make_temp(ctx), + .dest_type = nir_type_uint32, + .src = { BIR_INDEX_FAU | BIR_FAU_LANE_ID }, + .src_types = { nir_type_uint32 } + }; + + bi_instruction lane1 = { + .type = BI_BITWISE, + .op.bitwise = BI_BITWISE_AND, + .dest = bi_make_temp(ctx), + .dest_type = nir_type_uint32, + .src = { + cur_lane.dest, + BIR_INDEX_CONSTANT | 0, + BIR_INDEX_ZERO, + }, + .src_types = { + nir_type_uint32, + nir_type_uint32, + nir_type_uint8, + }, + .constant.u64 = instr->op == nir_op_fddx ? 2 : 1, + }; + + bi_instruction lane2 = { + .type = BI_IMATH, + .op.imath = BI_IMATH_ADD, + .dest = bi_make_temp(ctx), + .dest_type = nir_type_uint32, + .src = { + lane1.dest, + BIR_INDEX_CONSTANT | 0, + BIR_INDEX_ZERO, + }, + .src_types = { + nir_type_uint32, + nir_type_uint32, + nir_type_uint32, + }, + .constant.u64 = instr->op == nir_op_fddx ? 1 : 2, + }; + + unsigned src = pan_src_index(&instr->src[0].src); + + bi_instruction clper1 = { + .type = BI_SPECIAL_ADD, + .op.special = ctx->arch == 6 ? BI_SPECIAL_CLPER_V6 : BI_SPECIAL_CLPER_V7, + .special.subgroup_sz = BI_CLPER_SUBGROUP_SZ_4, + .special.clper.lane_op_mod = BI_CLPER_LANE_OP_MOD_NONE, + .special.clper.inactive_res = BI_CLPER_INACTIVE_RES_ZERO, + .dest = bi_make_temp(ctx), + .dest_type = nir_type_uint32, + .src = { src, lane1.dest }, + .src_types = { nir_type_uint32, nir_type_uint32 }, + .swizzle[0][0] = instr->src[0].swizzle[0], + }; + + bi_instruction clper2 = { + .type = BI_SPECIAL_ADD, + .op.special = ctx->arch == 6 ? BI_SPECIAL_CLPER_V6 : BI_SPECIAL_CLPER_V7, + .special.subgroup_sz = BI_CLPER_SUBGROUP_SZ_4, + .special.clper.lane_op_mod = BI_CLPER_LANE_OP_MOD_NONE, + .special.clper.inactive_res = BI_CLPER_INACTIVE_RES_ZERO, + .dest = bi_make_temp(ctx), + .dest_type = nir_type_uint32, + .src = { src, lane2.dest }, + .src_types = { nir_type_uint32, nir_type_uint32 }, + .swizzle[0][0] = instr->src[0].swizzle[0], + }; + + nir_alu_type type = nir_type_float | + nir_dest_bit_size(instr->dest.dest); + + bi_instruction sub = { + .type = BI_ADD, + .src = { clper2.dest, clper1.dest }, + .src_types = { type, type }, + .src_neg[1] = true, + .dest = pan_dest_index(&instr->dest.dest), + .dest_type = type, + }; + + bi_emit(ctx, cur_lane); + bi_emit(ctx, lane1); + bi_emit(ctx, lane2); + bi_emit(ctx, clper1); + bi_emit(ctx, clper2); + bi_emit(ctx, sub); +} diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 989f038bcaf..2e4d156e278 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -1152,6 +1152,10 @@ emit_alu(bi_context *ctx, nir_alu_instr *instr) case nir_op_flog2: bi_emit_flog2(ctx, instr); return; + case nir_op_fddx: + case nir_op_fddy: + bi_emit_deriv(ctx, instr); + return; default: break; } diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index 22cc615ba2b..c044b5803ed 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -706,6 +706,7 @@ pan_next_block(pan_block *block) void bi_emit_fexp2(bi_context *ctx, nir_alu_instr *instr); void bi_emit_flog2(bi_context *ctx, nir_alu_instr *instr); +void bi_emit_deriv(bi_context *ctx, nir_alu_instr *instr); /* BIR manipulation */