c2c8983cf4
The reason for doing this is two-fold:
1. These passes are likely to be shared with the Bifrost compiler
Therefore, we don't want to restrict them to Midgard
2. The coding style is different (NIR-style vs Panfrost-style)
The NIR passes are candidates for moving upstream into
compiler/nir, so don't block that off for stylistic reasons
Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
346 lines
12 KiB
C
346 lines
12 KiB
C
/*
|
|
* Copyright (C) 2019 Collabora, Ltd.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*
|
|
* Authors (Collabora):
|
|
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
|
|
*/
|
|
|
|
/**
|
|
* @file
|
|
*
|
|
* Implements framebuffer format conversions in software, specifically for
|
|
* blend shaders on Midgard/Bifrost. load_output/store_output (derefs more
|
|
* correctly -- pre I/O lowering) normally for the fragment stage within the
|
|
* blend shader will operate with purely vec4 float ("nir") encodings. This
|
|
* lowering stage, to be run before I/O is lowered, converts the native
|
|
* framebuffer format to a NIR encoding after loads and vice versa before
|
|
* stores. This pass is designed for a single render target; Midgard duplicates
|
|
* blend shaders for MRT to simplify everything.
|
|
*/
|
|
|
|
#include "compiler/nir/nir.h"
|
|
#include "compiler/nir/nir_builder.h"
|
|
#include "compiler/nir/nir_format_convert.h"
|
|
#include "nir_lower_blend.h"
|
|
#include "util/u_format.h"
|
|
|
|
/* Converters for UNORM8 formats, e.g. R8G8B8A8_UNORM */
|
|
|
|
static nir_ssa_def *
|
|
nir_float_to_unorm8(nir_builder *b, nir_ssa_def *c_float)
|
|
{
|
|
/* First, we degrade quality to fp16; we don't need the extra bits */
|
|
nir_ssa_def *degraded = nir_f2f16(b, c_float);
|
|
|
|
/* Scale from [0, 1] to [0, 255.0] */
|
|
nir_ssa_def *scaled = nir_fmul_imm(b, nir_fsat(b, degraded), 255.0);
|
|
|
|
/* Next, we type convert */
|
|
nir_ssa_def *converted = nir_u2u8(b, nir_f2u16(b,
|
|
nir_fround_even(b, scaled)));
|
|
|
|
return converted;
|
|
}
|
|
|
|
static nir_ssa_def *
|
|
nir_unorm8_to_float(nir_builder *b, nir_ssa_def *c_native)
|
|
{
|
|
/* First, we convert up from u8 to f16 */
|
|
nir_ssa_def *converted = nir_u2f16(b, nir_u2u16(b, c_native));
|
|
|
|
/* Next, we scale down from [0, 255.0] to [0, 1] */
|
|
nir_ssa_def *scaled = nir_fsat(b, nir_fmul_imm(b, converted, 1.0/255.0));
|
|
|
|
return scaled;
|
|
}
|
|
|
|
/* Converters for UNORM4 formats, packing the final result into 16-bit */
|
|
|
|
static nir_ssa_def *
|
|
nir_float_to_unorm4(nir_builder *b, nir_ssa_def *c_float)
|
|
{
|
|
/* First, we degrade quality to fp16; we don't need the extra bits */
|
|
nir_ssa_def *degraded = nir_f2f16(b, c_float);
|
|
|
|
/* Scale from [0, 1] to [0, 15.0] */
|
|
nir_ssa_def *scaled = nir_fmul_imm(b, nir_fsat(b, degraded), 15.0);
|
|
|
|
/* Next, we type convert to u16 */
|
|
nir_ssa_def *converted = nir_f2u16(b,
|
|
nir_fround_even(b, scaled));
|
|
|
|
/* In u16 land, we now need to pack */
|
|
nir_ssa_def *cr = nir_channel(b, converted, 0);
|
|
nir_ssa_def *cg = nir_channel(b, converted, 1);
|
|
nir_ssa_def *cb = nir_channel(b, converted, 2);
|
|
nir_ssa_def *ca = nir_channel(b, converted, 3);
|
|
|
|
nir_ssa_def *pack =
|
|
nir_ior(b,
|
|
nir_ior(b, cr, nir_ishl(b, cg, nir_imm_int(b, 4))),
|
|
nir_ior(b, nir_ishl(b, cb, nir_imm_int(b, 8)), nir_ishl(b, ca, nir_imm_int(b, 12))));
|
|
|
|
return pack;
|
|
}
|
|
|
|
static nir_ssa_def *
|
|
nir_float_to_rgb10a2(nir_builder *b, nir_ssa_def *c_float, bool normalize)
|
|
{
|
|
nir_ssa_def *converted = c_float;
|
|
|
|
if (normalize) {
|
|
nir_ssa_def *scaled = nir_fmul(b, nir_fsat(b, c_float),
|
|
nir_imm_vec4(b, 1023.0, 1023.0, 1023.0, 3.0));
|
|
|
|
converted = nir_f2u32(b,
|
|
nir_fround_even(b, scaled));
|
|
}
|
|
|
|
nir_ssa_def *cr = nir_channel(b, converted, 0);
|
|
nir_ssa_def *cg = nir_channel(b, converted, 1);
|
|
nir_ssa_def *cb = nir_channel(b, converted, 2);
|
|
nir_ssa_def *ca = nir_channel(b, converted, 3);
|
|
|
|
nir_ssa_def *pack =
|
|
nir_ior(b,
|
|
nir_ior(b, cr, nir_ishl(b, cg, nir_imm_int(b, 10))),
|
|
nir_ior(b, nir_ishl(b, cb, nir_imm_int(b, 20)), nir_ishl(b, ca, nir_imm_int(b, 30))));
|
|
|
|
return pack;
|
|
}
|
|
|
|
static nir_ssa_def *
|
|
nir_float_to_rgb5a1(nir_builder *b, nir_ssa_def *c_float)
|
|
{
|
|
nir_ssa_def *degraded = nir_f2f16(b, c_float);
|
|
|
|
nir_ssa_def *scaled = nir_fmul(b, nir_fsat(b, degraded),
|
|
nir_imm_vec4_16(b, 31.0, 31.0, 31.0, 1.0));
|
|
|
|
nir_ssa_def *converted = nir_f2u16(b,
|
|
nir_fround_even(b, scaled));
|
|
|
|
nir_ssa_def *cr = nir_channel(b, converted, 0);
|
|
nir_ssa_def *cg = nir_channel(b, converted, 1);
|
|
nir_ssa_def *cb = nir_channel(b, converted, 2);
|
|
nir_ssa_def *ca = nir_channel(b, converted, 3);
|
|
|
|
nir_ssa_def *pack =
|
|
nir_ior(b,
|
|
nir_ior(b, cr, nir_ishl(b, cg, nir_imm_int(b, 5))),
|
|
nir_ior(b, nir_ishl(b, cb, nir_imm_int(b, 10)), nir_ishl(b, ca, nir_imm_int(b, 15))));
|
|
|
|
return pack;
|
|
}
|
|
|
|
static nir_ssa_def *
|
|
nir_shader_to_native(nir_builder *b,
|
|
nir_ssa_def *c_shader,
|
|
const struct util_format_description *desc,
|
|
unsigned bits,
|
|
bool homogenous_bits)
|
|
{
|
|
bool float_or_pure_int =
|
|
util_format_is_float(desc->format) ||
|
|
util_format_is_pure_integer(desc->format);
|
|
|
|
if (util_format_is_unorm8(desc))
|
|
return nir_float_to_unorm8(b, c_shader);
|
|
else if (homogenous_bits && float_or_pure_int)
|
|
return c_shader; /* type is already correct */
|
|
|
|
//unsigned bgra[4] = { 2, 1, 0, 3 }; /* BGRA */
|
|
//c_shader = nir_swizzle(b, c_shader, swiz, 4);
|
|
|
|
/* Special formats */
|
|
switch (desc->format) {
|
|
case PIPE_FORMAT_B4G4R4A4_UNORM:
|
|
case PIPE_FORMAT_B4G4R4X4_UNORM:
|
|
case PIPE_FORMAT_A4R4_UNORM:
|
|
case PIPE_FORMAT_R4A4_UNORM:
|
|
case PIPE_FORMAT_A4B4G4R4_UNORM:
|
|
return nir_float_to_unorm4(b, c_shader);
|
|
|
|
case PIPE_FORMAT_R10G10B10A2_UNORM:
|
|
case PIPE_FORMAT_B10G10R10A2_UNORM:
|
|
case PIPE_FORMAT_R10G10B10X2_UNORM:
|
|
case PIPE_FORMAT_B10G10R10X2_UNORM:
|
|
return nir_float_to_rgb10a2(b, c_shader, true);
|
|
|
|
case PIPE_FORMAT_R10G10B10A2_UINT:
|
|
return nir_float_to_rgb10a2(b, c_shader, false);
|
|
|
|
case PIPE_FORMAT_B5G5R5A1_UNORM:
|
|
return nir_float_to_rgb5a1(b, c_shader);
|
|
|
|
case PIPE_FORMAT_R11G11B10_FLOAT:
|
|
return nir_format_pack_11f11f10f(b, c_shader);
|
|
|
|
default:
|
|
printf("%s\n", desc->name);
|
|
unreachable("Unknown format name");
|
|
}
|
|
}
|
|
|
|
static nir_ssa_def *
|
|
nir_native_to_shader(nir_builder *b,
|
|
nir_ssa_def *c_native,
|
|
const struct util_format_description *desc,
|
|
unsigned bits,
|
|
bool homogenous_bits)
|
|
{
|
|
bool float_or_pure_int =
|
|
util_format_is_float(desc->format) ||
|
|
util_format_is_pure_integer(desc->format);
|
|
|
|
if (util_format_is_unorm8(desc))
|
|
return nir_unorm8_to_float(b, c_native);
|
|
else if (homogenous_bits && float_or_pure_int)
|
|
return c_native; /* type is already correct */
|
|
else {
|
|
printf("%s\n", desc->name);
|
|
unreachable("Unknown format name");
|
|
}
|
|
}
|
|
|
|
void
|
|
nir_lower_framebuffer(nir_shader *shader, enum pipe_format format)
|
|
{
|
|
/* Blend shaders are represented as special fragment shaders */
|
|
assert(shader->info.stage == MESA_SHADER_FRAGMENT);
|
|
|
|
const struct util_format_description *format_desc =
|
|
util_format_description(format);
|
|
|
|
unsigned nr_channels = format_desc->nr_channels;
|
|
unsigned bits = format_desc->channel[0].size;
|
|
|
|
/* Do all channels have the same bit count? */
|
|
bool homogenous_bits = true;
|
|
|
|
for (unsigned c = 1; c < nr_channels; ++c)
|
|
homogenous_bits &= (format_desc->channel[c].size == bits);
|
|
|
|
if (format == PIPE_FORMAT_R11G11B10_FLOAT)
|
|
homogenous_bits = false;
|
|
|
|
/* Figure out the formats for the raw */
|
|
unsigned raw_bitsize_in = bits;
|
|
unsigned raw_bitsize_out = bits;
|
|
unsigned raw_out_components = 4;
|
|
|
|
/* We pack a 4-bit vec4 as 16-bit vec1 */
|
|
if ((homogenous_bits && bits == 4 && util_format_is_unorm(format)) || format == PIPE_FORMAT_B5G5R5A1_UNORM) {
|
|
raw_bitsize_out = 16;
|
|
raw_out_components = 1;
|
|
} else if (format == PIPE_FORMAT_R10G10B10A2_UNORM || format == PIPE_FORMAT_B10G10R10A2_UNORM || format == PIPE_FORMAT_R10G10B10A2_UINT || format == PIPE_FORMAT_R11G11B10_FLOAT) {
|
|
raw_bitsize_out = 32;
|
|
raw_out_components = 1;
|
|
}
|
|
|
|
nir_foreach_function(func, shader) {
|
|
nir_foreach_block(block, func->impl) {
|
|
nir_foreach_instr_safe(instr, block) {
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
continue;
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
|
|
bool is_load = intr->intrinsic == nir_intrinsic_load_deref;
|
|
bool is_store = intr->intrinsic == nir_intrinsic_store_deref;
|
|
|
|
if (!(is_load || is_store))
|
|
continue;
|
|
|
|
/* Don't worry about MRT */
|
|
nir_variable *var = nir_intrinsic_get_var(intr, 0);
|
|
|
|
if (var->data.location != FRAG_RESULT_COLOR)
|
|
continue;
|
|
|
|
nir_builder b;
|
|
nir_builder_init(&b, func->impl);
|
|
|
|
if (is_store) {
|
|
/* For stores, add conversion before */
|
|
b.cursor = nir_before_instr(instr);
|
|
|
|
/* Grab the input color */
|
|
nir_ssa_def *c_nir = nir_ssa_for_src(&b, intr->src[1], 4);
|
|
|
|
/* Format convert */
|
|
nir_ssa_def *converted = nir_shader_to_native(&b, c_nir, format_desc, bits, homogenous_bits);
|
|
|
|
if (util_format_is_float(format)) {
|
|
if (raw_bitsize_out == 16)
|
|
converted = nir_f2f16(&b, converted);
|
|
else if (raw_bitsize_out == 32)
|
|
converted = nir_f2f32(&b, converted);
|
|
} else {
|
|
converted = nir_i2i(&b, converted, raw_bitsize_out);
|
|
}
|
|
|
|
/* Rewrite to use a native store by creating a new intrinsic */
|
|
nir_intrinsic_instr *new =
|
|
nir_intrinsic_instr_create(shader, nir_intrinsic_store_raw_output_pan);
|
|
new->src[0] = nir_src_for_ssa(converted);
|
|
|
|
new->num_components = raw_out_components;
|
|
|
|
nir_builder_instr_insert(&b, &new->instr);
|
|
|
|
/* (And finally removing the old) */
|
|
nir_instr_remove(instr);
|
|
} else {
|
|
/* For loads, add conversion after */
|
|
b.cursor = nir_after_instr(instr);
|
|
|
|
/* Rewrite to use a native load by creating a new intrinsic */
|
|
|
|
nir_intrinsic_instr *new =
|
|
nir_intrinsic_instr_create(shader, nir_intrinsic_load_raw_output_pan);
|
|
|
|
new->num_components = 4;
|
|
|
|
unsigned bitsize = raw_bitsize_in;
|
|
nir_ssa_dest_init(&new->instr, &new->dest, 4, bitsize, NULL);
|
|
nir_builder_instr_insert(&b, &new->instr);
|
|
|
|
/* Convert the raw value */
|
|
nir_ssa_def *raw = &new->dest.ssa;
|
|
nir_ssa_def *converted = nir_native_to_shader(&b, raw, format_desc, bits, homogenous_bits);
|
|
|
|
/* Rewrite to use the converted value */
|
|
nir_src rewritten = nir_src_for_ssa(converted);
|
|
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, rewritten, instr);
|
|
|
|
/* Finally, remove the old load */
|
|
nir_instr_remove(instr);
|
|
}
|
|
}
|
|
}
|
|
|
|
nir_metadata_preserve(func->impl, nir_metadata_block_index |
|
|
nir_metadata_dominance);
|
|
}
|
|
}
|