freedreno/ir3: sample-shading support
The compiler support for: OES_sample_shading OES_sample_variables OES_shader_multisample_interpolation Signed-off-by: Rob Clark <robdclark@chromium.org>
This commit is contained in:
@@ -1311,6 +1311,9 @@ INSTR1(SQRT)
|
||||
/* cat5 instructions: */
|
||||
INSTR1(DSX)
|
||||
INSTR1(DSY)
|
||||
INSTR1F(3D, DSX)
|
||||
INSTR1F(3D, DSY)
|
||||
INSTR1(RGETPOS)
|
||||
|
||||
static inline struct ir3_instruction *
|
||||
ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
|
||||
|
||||
@@ -1127,6 +1127,55 @@ static void add_sysval_input(struct ir3_context *ctx, gl_system_value slot,
|
||||
add_sysval_input_compmask(ctx, slot, 0x1, instr);
|
||||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
get_barycentric_centroid(struct ir3_context *ctx)
|
||||
{
|
||||
if (!ctx->ij_centroid) {
|
||||
struct ir3_instruction *xy[2];
|
||||
struct ir3_instruction *ij;
|
||||
|
||||
ij = create_input_compmask(ctx, 0, 0x3);
|
||||
ir3_split_dest(ctx->block, xy, ij, 0, 2);
|
||||
|
||||
ctx->ij_centroid = ir3_create_collect(ctx, xy, 2);
|
||||
|
||||
add_sysval_input_compmask(ctx,
|
||||
SYSTEM_VALUE_BARYCENTRIC_CENTROID,
|
||||
0x3, ij);
|
||||
}
|
||||
|
||||
return ctx->ij_centroid;
|
||||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
get_barycentric_sample(struct ir3_context *ctx)
|
||||
{
|
||||
if (!ctx->ij_sample) {
|
||||
struct ir3_instruction *xy[2];
|
||||
struct ir3_instruction *ij;
|
||||
|
||||
ij = create_input_compmask(ctx, 0, 0x3);
|
||||
ir3_split_dest(ctx->block, xy, ij, 0, 2);
|
||||
|
||||
ctx->ij_sample = ir3_create_collect(ctx, xy, 2);
|
||||
|
||||
add_sysval_input_compmask(ctx,
|
||||
SYSTEM_VALUE_BARYCENTRIC_SAMPLE,
|
||||
0x3, ij);
|
||||
}
|
||||
|
||||
return ctx->ij_sample;
|
||||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
get_barycentric_pixel(struct ir3_context *ctx)
|
||||
{
|
||||
/* TODO when tgsi_to_nir supports "new-style" FS inputs switch
|
||||
* this to create ij_pixel only on demand:
|
||||
*/
|
||||
return ctx->ij_pixel;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
@@ -1168,13 +1217,40 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
case nir_intrinsic_load_ubo:
|
||||
emit_intrinsic_load_ubo(ctx, intr, dst);
|
||||
break;
|
||||
case nir_intrinsic_load_barycentric_centroid:
|
||||
case nir_intrinsic_load_barycentric_pixel:
|
||||
/* NOTE: we still pre-create ij_pixel just to keep things working with
|
||||
* nir producers that create "old style" frag shader inputs (ie. just
|
||||
* load_input, vs load_barycentric_* + load_interpolated_input)
|
||||
case nir_intrinsic_load_sample_pos_from_id: {
|
||||
/* NOTE: blob seems to always use TYPE_F16 and then cov.f16f32,
|
||||
* but that doesn't seem necessary.
|
||||
*/
|
||||
ir3_split_dest(b, dst, ctx->ij_pixel, 0, 2);
|
||||
struct ir3_instruction *offset =
|
||||
ir3_RGETPOS(b, ir3_get_src(ctx, &intr->src[0])[0], 0);
|
||||
offset->regs[0]->wrmask = 0x3;
|
||||
offset->cat5.type = TYPE_F32;
|
||||
|
||||
ir3_split_dest(b, dst, offset, 0, 2);
|
||||
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_size_ir3:
|
||||
if (!ctx->ij_size) {
|
||||
ctx->ij_size = create_input(ctx, 0);
|
||||
|
||||
add_sysval_input(ctx, SYSTEM_VALUE_BARYCENTRIC_SIZE,
|
||||
ctx->ij_size);
|
||||
}
|
||||
dst[0] = ctx->ij_size;
|
||||
break;
|
||||
case nir_intrinsic_load_barycentric_centroid:
|
||||
ir3_split_dest(b, dst, get_barycentric_centroid(ctx), 0, 2);
|
||||
break;
|
||||
case nir_intrinsic_load_barycentric_sample:
|
||||
if (ctx->so->key.msaa) {
|
||||
ir3_split_dest(b, dst, get_barycentric_sample(ctx), 0, 2);
|
||||
} else {
|
||||
ir3_split_dest(b, dst, get_barycentric_pixel(ctx), 0, 2);
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_barycentric_pixel:
|
||||
ir3_split_dest(b, dst, get_barycentric_pixel(ctx), 0, 2);
|
||||
break;
|
||||
case nir_intrinsic_load_interpolated_input:
|
||||
idx = nir_intrinsic_base(intr);
|
||||
@@ -1345,6 +1421,8 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
dst[0] = ctx->instance_id;
|
||||
break;
|
||||
case nir_intrinsic_load_sample_id:
|
||||
ctx->so->per_samp = true;
|
||||
/* fall-thru */
|
||||
case nir_intrinsic_load_sample_id_no_per_sample:
|
||||
if (!ctx->samp_id) {
|
||||
ctx->samp_id = create_input(ctx, 0);
|
||||
@@ -2282,6 +2360,12 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
|
||||
so->inputs[n].interpolate = in->data.interpolation;
|
||||
|
||||
if (ctx->so->type == MESA_SHADER_FRAGMENT) {
|
||||
|
||||
/* if any varyings have 'sample' qualifer, that triggers us
|
||||
* to run in per-sample mode:
|
||||
*/
|
||||
so->per_samp |= in->data.sample;
|
||||
|
||||
for (int i = 0; i < ncomp; i++) {
|
||||
struct ir3_instruction *instr = NULL;
|
||||
unsigned idx = (n * 4) + i + frac;
|
||||
@@ -2457,6 +2541,9 @@ setup_output(struct ir3_context *ctx, nir_variable *out)
|
||||
case FRAG_RESULT_COLOR:
|
||||
so->color0_mrt = 1;
|
||||
break;
|
||||
case FRAG_RESULT_SAMPLE_MASK:
|
||||
so->writes_smask = true;
|
||||
break;
|
||||
default:
|
||||
if (slot >= FRAG_RESULT_DATA0)
|
||||
break;
|
||||
|
||||
@@ -65,7 +65,7 @@ struct ir3_context {
|
||||
* inputs. So we do all the input tracking normally and fix
|
||||
* things up after compile_instructions()
|
||||
*/
|
||||
struct ir3_instruction *ij_pixel;
|
||||
struct ir3_instruction *ij_pixel, *ij_sample, *ij_centroid, *ij_size;
|
||||
|
||||
/* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */
|
||||
struct ir3_instruction *frag_face, *frag_coord;
|
||||
|
||||
@@ -262,8 +262,15 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir)
|
||||
NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size,
|
||||
(nir_lower_io_options)0);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
/* NOTE: lower load_barycentric_at_sample first, since it
|
||||
* produces load_barycentric_at_offset:
|
||||
*/
|
||||
NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_sample);
|
||||
NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_offset);
|
||||
|
||||
NIR_PASS_V(nir, ir3_nir_move_varying_inputs);
|
||||
}
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
|
||||
|
||||
@@ -409,6 +416,10 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
dump_reg(out, "pos (ij_pixel)",
|
||||
ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_PIXEL));
|
||||
dump_reg(out, "pos (ij_centroid)",
|
||||
ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_CENTROID));
|
||||
dump_reg(out, "pos (ij_size)",
|
||||
ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_SIZE));
|
||||
dump_output(out, so, FRAG_RESULT_DEPTH, "posz");
|
||||
if (so->color0_mrt) {
|
||||
dump_output(out, so, FRAG_RESULT_COLOR, "color");
|
||||
|
||||
@@ -154,6 +154,8 @@ struct ir3_shader_key {
|
||||
/*
|
||||
* Fragment shader variant parameters:
|
||||
*/
|
||||
unsigned sample_shading : 1;
|
||||
unsigned msaa : 1;
|
||||
unsigned color_two_side : 1;
|
||||
unsigned half_precision : 1;
|
||||
/* used when shader needs to handle flat varyings (a4xx)
|
||||
@@ -389,7 +391,7 @@ struct ir3_shader_variant {
|
||||
uint8_t slot;
|
||||
uint8_t regid;
|
||||
} outputs[16 + 2]; /* +POSITION +PSIZE */
|
||||
bool writes_pos, writes_psize;
|
||||
bool writes_pos, writes_smask, writes_psize;
|
||||
|
||||
/* attributes (VS) / varyings (FS):
|
||||
* Note that sysval's should come *after* normal inputs.
|
||||
@@ -439,6 +441,8 @@ struct ir3_shader_variant {
|
||||
/* do we have kill, image write, etc (which prevents early-z): */
|
||||
bool no_earlyz;
|
||||
|
||||
bool per_samp;
|
||||
|
||||
/* Layout of constant registers, each section (in vec4). Pointer size
|
||||
* is 32b (a3xx, a4xx), or 64b (a5xx+), which effects the size of the
|
||||
* UBO and stream-out consts.
|
||||
|
||||
Reference in New Issue
Block a user