kk: enable shaderClipDistance

Since Metal doesn't pass clip distance into the fragment shader, we have to
do it ourselves. The CLIP_DIST0/1 varying slots are used to represent the
user-defined varyings we use to pass them from vertex to fragment and
a new intrinsic is added to represent the write to the built-in
clip_distance variable. Since the CLIP_DIST0/1 varying slots are not affected
by opt_varyings, there can be potential interface mismatches so the machinery
in msl_iomap.c is refactored to allow them to be output as a series of scalars
rather than vectors.

Reviewed-by: Aitor Camacho <aitor@lunarg.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38839>
This commit is contained in:
Arcady Goldmints-Orlov
2025-10-21 16:02:04 -04:00
parent 6d6634e805
commit 68bb5d9e49
9 changed files with 214 additions and 117 deletions

View File

@@ -2898,3 +2898,4 @@ load("depth_texture_kk", [1], [IMAGE_DIM, IMAGE_ARRAY], [CAN_ELIMINATE])
intrinsic("load_sampler_handle_kk", [1], 1, [],
flags=[CAN_ELIMINATE, CAN_REORDER],
bit_sizes=[16])
store("clip_distance_kk", [], [BASE])

View File

@@ -56,86 +56,77 @@ static const char *VERTEX_OUTPUT_TYPE = "VertexOut";
static const char *FRAGMENT_OUTPUT_TYPE = "FragmentOut";
/* Mapping from NIR's varying slots to the generated struct member name */
static const char *VARYING_SLOT_NAME[NUM_TOTAL_VARYING_SLOTS] = {
[VARYING_SLOT_POS] = "position",
[VARYING_SLOT_PSIZ] = "point_size",
[VARYING_SLOT_PRIMITIVE_ID] = "primitive_id",
[VARYING_SLOT_LAYER] = "layer",
[VARYING_SLOT_VIEWPORT] = "viewport_index",
[VARYING_SLOT_VAR0] = "vary_00",
[VARYING_SLOT_VAR1] = "vary_01",
[VARYING_SLOT_VAR2] = "vary_02",
[VARYING_SLOT_VAR3] = "vary_03",
[VARYING_SLOT_VAR4] = "vary_04",
[VARYING_SLOT_VAR5] = "vary_05",
[VARYING_SLOT_VAR6] = "vary_06",
[VARYING_SLOT_VAR7] = "vary_07",
[VARYING_SLOT_VAR8] = "vary_08",
[VARYING_SLOT_VAR9] = "vary_09",
[VARYING_SLOT_VAR10] = "vary_10",
[VARYING_SLOT_VAR11] = "vary_11",
[VARYING_SLOT_VAR12] = "vary_12",
[VARYING_SLOT_VAR13] = "vary_13",
[VARYING_SLOT_VAR14] = "vary_14",
[VARYING_SLOT_VAR15] = "vary_15",
[VARYING_SLOT_VAR16] = "vary_16",
[VARYING_SLOT_VAR17] = "vary_17",
[VARYING_SLOT_VAR18] = "vary_18",
[VARYING_SLOT_VAR19] = "vary_19",
[VARYING_SLOT_VAR20] = "vary_20",
[VARYING_SLOT_VAR21] = "vary_21",
[VARYING_SLOT_VAR22] = "vary_22",
[VARYING_SLOT_VAR23] = "vary_23",
[VARYING_SLOT_VAR24] = "vary_24",
[VARYING_SLOT_VAR25] = "vary_25",
[VARYING_SLOT_VAR26] = "vary_26",
[VARYING_SLOT_VAR27] = "vary_27",
[VARYING_SLOT_VAR28] = "vary_28",
[VARYING_SLOT_VAR29] = "vary_29",
[VARYING_SLOT_VAR30] = "vary_30",
[VARYING_SLOT_VAR31] = "vary_31",
static const struct {
const char *name;
bool user;
bool scalarized;
} VARYING_SLOT_INFO[NUM_TOTAL_VARYING_SLOTS] = {
[VARYING_SLOT_POS] = {"position"},
[VARYING_SLOT_PSIZ] = {"point_size"},
[VARYING_SLOT_PRIMITIVE_ID] = {"primitive_id"},
[VARYING_SLOT_LAYER] = {"render_target_array_index"},
[VARYING_SLOT_VIEWPORT] = {"viewport_array_index"},
[VARYING_SLOT_CLIP_DIST0] = {"clip_0", .user = true, .scalarized = true},
[VARYING_SLOT_CLIP_DIST1] = {"clip_1", .user = true, .scalarized = true},
[VARYING_SLOT_VAR0] = {"vary_00", .user = true},
[VARYING_SLOT_VAR1] = {"vary_01", .user = true},
[VARYING_SLOT_VAR2] = {"vary_02", .user = true},
[VARYING_SLOT_VAR3] = {"vary_03", .user = true},
[VARYING_SLOT_VAR4] = {"vary_04", .user = true},
[VARYING_SLOT_VAR5] = {"vary_05", .user = true},
[VARYING_SLOT_VAR6] = {"vary_06", .user = true},
[VARYING_SLOT_VAR7] = {"vary_07", .user = true},
[VARYING_SLOT_VAR8] = {"vary_08", .user = true},
[VARYING_SLOT_VAR9] = {"vary_09", .user = true},
[VARYING_SLOT_VAR10] = {"vary_10", .user = true},
[VARYING_SLOT_VAR11] = {"vary_11", .user = true},
[VARYING_SLOT_VAR12] = {"vary_12", .user = true},
[VARYING_SLOT_VAR13] = {"vary_13", .user = true},
[VARYING_SLOT_VAR14] = {"vary_14", .user = true},
[VARYING_SLOT_VAR15] = {"vary_15", .user = true},
[VARYING_SLOT_VAR16] = {"vary_16", .user = true},
[VARYING_SLOT_VAR17] = {"vary_17", .user = true},
[VARYING_SLOT_VAR18] = {"vary_18", .user = true},
[VARYING_SLOT_VAR19] = {"vary_19", .user = true},
[VARYING_SLOT_VAR20] = {"vary_20", .user = true},
[VARYING_SLOT_VAR21] = {"vary_21", .user = true},
[VARYING_SLOT_VAR22] = {"vary_22", .user = true},
[VARYING_SLOT_VAR23] = {"vary_23", .user = true},
[VARYING_SLOT_VAR24] = {"vary_24", .user = true},
[VARYING_SLOT_VAR25] = {"vary_25", .user = true},
[VARYING_SLOT_VAR26] = {"vary_26", .user = true},
[VARYING_SLOT_VAR27] = {"vary_27", .user = true},
[VARYING_SLOT_VAR28] = {"vary_28", .user = true},
[VARYING_SLOT_VAR29] = {"vary_29", .user = true},
[VARYING_SLOT_VAR30] = {"vary_30", .user = true},
[VARYING_SLOT_VAR31] = {"vary_31", .user = true},
};
/* Mapping from NIR varying slot to the MSL struct member attribute. */
static const char *VARYING_SLOT_SEMANTIC[NUM_TOTAL_VARYING_SLOTS] = {
[VARYING_SLOT_POS] = "[[position]]",
[VARYING_SLOT_PSIZ] = "[[point_size]]",
[VARYING_SLOT_PRIMITIVE_ID] = "[[primitive_id]]",
[VARYING_SLOT_LAYER] = "[[render_target_array_index]]",
[VARYING_SLOT_VIEWPORT] = "[[viewport_array_index]]",
[VARYING_SLOT_VAR0] = "[[user(vary_00)]]",
[VARYING_SLOT_VAR1] = "[[user(vary_01)]]",
[VARYING_SLOT_VAR2] = "[[user(vary_02)]]",
[VARYING_SLOT_VAR3] = "[[user(vary_03)]]",
[VARYING_SLOT_VAR4] = "[[user(vary_04)]]",
[VARYING_SLOT_VAR5] = "[[user(vary_05)]]",
[VARYING_SLOT_VAR6] = "[[user(vary_06)]]",
[VARYING_SLOT_VAR7] = "[[user(vary_07)]]",
[VARYING_SLOT_VAR8] = "[[user(vary_08)]]",
[VARYING_SLOT_VAR9] = "[[user(vary_09)]]",
[VARYING_SLOT_VAR10] = "[[user(vary_10)]]",
[VARYING_SLOT_VAR11] = "[[user(vary_11)]]",
[VARYING_SLOT_VAR12] = "[[user(vary_12)]]",
[VARYING_SLOT_VAR13] = "[[user(vary_13)]]",
[VARYING_SLOT_VAR14] = "[[user(vary_14)]]",
[VARYING_SLOT_VAR15] = "[[user(vary_15)]]",
[VARYING_SLOT_VAR16] = "[[user(vary_16)]]",
[VARYING_SLOT_VAR17] = "[[user(vary_17)]]",
[VARYING_SLOT_VAR18] = "[[user(vary_18)]]",
[VARYING_SLOT_VAR19] = "[[user(vary_19)]]",
[VARYING_SLOT_VAR20] = "[[user(vary_20)]]",
[VARYING_SLOT_VAR21] = "[[user(vary_21)]]",
[VARYING_SLOT_VAR22] = "[[user(vary_22)]]",
[VARYING_SLOT_VAR23] = "[[user(vary_23)]]",
[VARYING_SLOT_VAR24] = "[[user(vary_24)]]",
[VARYING_SLOT_VAR25] = "[[user(vary_25)]]",
[VARYING_SLOT_VAR26] = "[[user(vary_26)]]",
[VARYING_SLOT_VAR27] = "[[user(vary_27)]]",
[VARYING_SLOT_VAR28] = "[[user(vary_28)]]",
[VARYING_SLOT_VAR29] = "[[user(vary_29)]]",
[VARYING_SLOT_VAR30] = "[[user(vary_30)]]",
[VARYING_SLOT_VAR31] = "[[user(vary_31)]]",
};
static void
varying_slot_name(struct nir_to_msl_ctx *ctx, unsigned location,
unsigned component)
{
if (VARYING_SLOT_INFO[location].scalarized) {
P(ctx, "%s_%c", VARYING_SLOT_INFO[location].name, "xyzw"[component]);
} else {
P(ctx, "%s", VARYING_SLOT_INFO[location].name);
}
}
static void
varying_slot_semantic(struct nir_to_msl_ctx *ctx, unsigned location,
unsigned component)
{
if (VARYING_SLOT_INFO[location].user) {
P(ctx, "[[user(");
varying_slot_name(ctx, location, component);
P(ctx, ")]]");
} else {
P(ctx, "[[");
varying_slot_name(ctx, location, component);
P(ctx, "]]");
}
}
/* Mapping from NIR fragment output slot to MSL struct member name */
static const char *FS_OUTPUT_NAME[] = {
@@ -177,12 +168,23 @@ vs_output_block(nir_shader *shader, struct nir_to_msl_ctx *ctx)
ctx->indentlevel++;
u_foreach_bit64(location, shader->info.outputs_written) {
struct io_slot_info info = ctx->outputs_info[location];
bool scalarized = VARYING_SLOT_INFO[location].scalarized;
const char *type = alu_type_to_string(info.type);
const char *vector_suffix = vector_suffixes[info.num_components];
P_IND(ctx, "%s%s %s %s;\n", type, vector_suffix,
VARYING_SLOT_NAME[location], VARYING_SLOT_SEMANTIC[location]);
const char *vector_suffix =
scalarized ? "" : vector_suffixes[info.num_components];
unsigned components = scalarized ? info.num_components : 1;
for (int c = 0; c < components; c++) {
P_IND(ctx, "%s%s ", type, vector_suffix);
varying_slot_name(ctx, location, c);
P(ctx, " ");
varying_slot_semantic(ctx, location, c);
P(ctx, ";\n");
}
}
if (shader->info.clip_distance_array_size)
P_IND(ctx, "float gl_ClipDistance [[clip_distance]] [%d];",
shader->info.clip_distance_array_size);
ctx->indentlevel--;
P(ctx, "};\n");
}
@@ -195,8 +197,10 @@ fs_input_block(nir_shader *shader, struct nir_to_msl_ctx *ctx)
ctx->indentlevel++;
u_foreach_bit64(location, shader->info.inputs_read) {
struct io_slot_info info = ctx->inputs_info[location];
bool scalarized = VARYING_SLOT_INFO[location].scalarized;
const char *type = alu_type_to_string(info.type);
const char *vector_suffix = vector_suffixes[info.num_components];
const char *vector_suffix =
scalarized ? "" : vector_suffixes[info.num_components];
const char *interp = "";
switch (info.interpolation) {
case INTERP_MODE_NOPERSPECTIVE:
@@ -217,9 +221,14 @@ fs_input_block(nir_shader *shader, struct nir_to_msl_ctx *ctx)
interp = "[[sample_perspective]]";
break;
}
P_IND(ctx, "%s%s %s %s %s;\n", type, vector_suffix,
VARYING_SLOT_NAME[location], VARYING_SLOT_SEMANTIC[location],
interp);
unsigned components = scalarized ? info.num_components : 1;
for (int c = 0; c < components; c++) {
P_IND(ctx, "%s%s ", type, vector_suffix);
varying_slot_name(ctx, location, c);
P(ctx, " ");
varying_slot_semantic(ctx, location, c);
P(ctx, " %s;\n", interp);
}
}
/* Enable reading from framebuffer */
@@ -253,6 +262,7 @@ fs_output_block(nir_shader *shader, struct nir_to_msl_ctx *ctx)
P(ctx, "%s [[depth(%s)]];\n", FS_OUTPUT_NAME[location],
depth_layout_arg[depth_layout]);
} else {
// TODO: scalarized fs outputs
P(ctx, "%s [[%s]];\n", FS_OUTPUT_NAME[location],
FS_OUTPUT_SEMANTIC[location]);
}
@@ -422,28 +432,53 @@ msl_emit_output_var(struct nir_to_msl_ctx *ctx, nir_shader *shader)
}
}
const char *
msl_output_name(struct nir_to_msl_ctx *ctx, unsigned location)
void
msl_output_name(struct nir_to_msl_ctx *ctx, unsigned location,
unsigned component)
{
P(ctx, "out.")
switch (ctx->shader->info.stage) {
case MESA_SHADER_VERTEX:
return VARYING_SLOT_NAME[location];
varying_slot_name(ctx, location, component);
break;
case MESA_SHADER_FRAGMENT:
return FS_OUTPUT_NAME[location];
P(ctx, "%s", FS_OUTPUT_NAME[location]);
break;
default:
assert(0);
return "";
UNREACHABLE("Invalid shader stage");
}
}
const char *
msl_input_name(struct nir_to_msl_ctx *ctx, unsigned location)
void
msl_input_name(struct nir_to_msl_ctx *ctx, unsigned location,
unsigned component)
{
P(ctx, "in.");
switch (ctx->shader->info.stage) {
case MESA_SHADER_FRAGMENT:
return VARYING_SLOT_NAME[location];
varying_slot_name(ctx, location, component);
break;
default:
assert(0);
return "";
UNREACHABLE("Invalid shader stage");
}
}
uint32_t
msl_input_num_components(struct nir_to_msl_ctx *ctx, uint32_t location)
{
if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
VARYING_SLOT_INFO[location].scalarized)
return 1;
else
return ctx->inputs_info[location].num_components;
}
uint32_t
msl_output_num_components(struct nir_to_msl_ctx *ctx, uint32_t location)
{
if (ctx->shader->info.stage == MESA_SHADER_VERTEX &&
VARYING_SLOT_INFO[location].scalarized)
return 1;
else
return ctx->outputs_info[location].num_components;
}

View File

@@ -3,6 +3,7 @@
* Copyright 2025 Google LLC
* SPDX-License-Identifier: MIT
*/
#include "msl_private.h"
#include "nir_to_msl.h"
#include "nir.h"
@@ -281,3 +282,30 @@ msl_nir_vs_io_types(nir_shader *nir)
return nir_shader_intrinsics_pass(nir, msl_vs_io_types, nir_metadata_all,
NULL);
}
static bool
lower_clip_distance(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
if (intr->intrinsic != nir_intrinsic_store_output)
return false;
nir_io_semantics io = nir_intrinsic_io_semantics(intr);
unsigned component = nir_intrinsic_component(intr);
if (io.location != VARYING_SLOT_CLIP_DIST0 &&
io.location != VARYING_SLOT_CLIP_DIST1)
return false;
unsigned base = (io.location - VARYING_SLOT_CLIP_DIST0) * 4 + component;
if (intr->intrinsic == nir_intrinsic_store_output) {
b->cursor = nir_after_instr(&intr->instr);
nir_store_clip_distance_kk(b, intr->src[0].ssa, .base = base);
}
return true;
}
bool
msl_nir_lower_clip_distance(nir_shader *nir)
{
return nir_shader_intrinsics_pass(nir, lower_clip_distance, nir_metadata_all,
NULL);
}

View File

@@ -66,9 +66,17 @@ void msl_gather_io_info(struct nir_to_msl_ctx *ctx,
struct io_slot_info *info_array_input,
struct io_slot_info *info_array_output);
const char *msl_input_name(struct nir_to_msl_ctx *ctx, unsigned location);
void msl_input_name(struct nir_to_msl_ctx *ctx, unsigned location,
unsigned component);
const char *msl_output_name(struct nir_to_msl_ctx *ctx, unsigned location);
void msl_output_name(struct nir_to_msl_ctx *ctx, unsigned location,
unsigned component);
uint32_t msl_input_num_components(struct nir_to_msl_ctx *ctx,
uint32_t location);
uint32_t msl_output_num_components(struct nir_to_msl_ctx *ctx,
uint32_t location);
bool msl_src_is_float(struct nir_to_msl_ctx *ctx, nir_src *src);
bool msl_def_is_sampler(struct nir_to_msl_ctx *ctx, nir_def *def);
@@ -76,3 +84,5 @@ bool msl_def_is_sampler(struct nir_to_msl_ctx *ctx, nir_def *def);
void msl_nir_lower_subgroups(nir_shader *nir);
bool msl_nir_lower_algebraic_late(nir_shader *shader);
bool msl_nir_lower_clip_distance(nir_shader *nir);

View File

@@ -371,6 +371,9 @@ infer_types_from_intrinsic(struct hash_table *types, nir_intrinsic_instr *instr)
case nir_intrinsic_load_sampler_handle_kk:
set_type(types, &instr->def, TYPE_SAMPLER);
break;
case nir_intrinsic_store_clip_distance_kk:
set_type(types, &instr->src[0], TYPE_FLOAT);
break;
case nir_intrinsic_ddx:
case nir_intrinsic_ddy:
case nir_intrinsic_ddx_coarse:

View File

@@ -856,18 +856,6 @@ memory_modes_to_msl(struct nir_to_msl_ctx *ctx, nir_variable_mode modes)
P(ctx, "mem_flags::mem_none");
}
static uint32_t
get_input_num_components(struct nir_to_msl_ctx *ctx, uint32_t location)
{
return ctx->inputs_info[location].num_components;
}
static uint32_t
get_output_num_components(struct nir_to_msl_ctx *ctx, uint32_t location)
{
return ctx->outputs_info[location].num_components;
}
static void
intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
{
@@ -1001,8 +989,9 @@ intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
nir_io_semantics io = nir_intrinsic_io_semantics(instr);
uint32_t component = nir_intrinsic_component(instr);
uint32_t location = io.location + idx;
P(ctx, "in.%s", msl_input_name(ctx, location));
if (instr->num_components < get_input_num_components(ctx, location)) {
msl_input_name(ctx, location, component);
if (instr->num_components < msl_input_num_components(ctx, location)) {
P(ctx, ".");
for (unsigned i = 0; i < instr->num_components; i++)
P(ctx, "%c", "xyzw"[component + i]);
@@ -1015,8 +1004,9 @@ intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
nir_io_semantics io = nir_intrinsic_io_semantics(instr);
uint32_t component = nir_intrinsic_component(instr);
uint32_t location = io.location + idx;
P(ctx, "in.%s", msl_input_name(ctx, location));
if (instr->num_components < get_input_num_components(ctx, location)) {
msl_input_name(ctx, location, component);
if (instr->num_components < msl_input_num_components(ctx, location)) {
P(ctx, ".");
for (unsigned i = 0; i < instr->num_components; i++)
P(ctx, "%c", "xyzw"[component + i]);
@@ -1027,7 +1017,8 @@ intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
case nir_intrinsic_load_output: {
unsigned idx = nir_src_as_uint(instr->src[0]);
nir_io_semantics io = nir_intrinsic_io_semantics(instr);
P(ctx, "out.%s;\n", msl_output_name(ctx, io.location + idx));
msl_output_name(ctx, io.location + idx, 0);
P(ctx, ";\n");
break;
}
case nir_intrinsic_store_output: {
@@ -1036,10 +1027,11 @@ intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
uint32_t location = io.location + idx;
uint32_t write_mask = nir_intrinsic_write_mask(instr);
uint32_t component = nir_intrinsic_component(instr);
uint32_t dst_num_components = get_output_num_components(ctx, location);
uint32_t dst_num_components = msl_output_num_components(ctx, location);
uint32_t num_components = instr->num_components;
P_IND(ctx, "out.%s", msl_output_name(ctx, location));
P_IND(ctx, "%s", "");
msl_output_name(ctx, location, component);
if (dst_num_components > 1u) {
P(ctx, ".");
for (unsigned i = 0; i < num_components; i++)
@@ -1518,6 +1510,11 @@ intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
src_to_msl(ctx, &instr->src[0]);
P(ctx, ");\n");
break;
case nir_intrinsic_store_clip_distance_kk:
P_IND(ctx, "out.gl_ClipDistance[%d] = ", nir_intrinsic_base(instr));
src_to_msl(ctx, &instr->src[0]);
P(ctx, ";\n");
break;
default:
P_IND(ctx, "Unknown intrinsic %s\n", info->name);
}
@@ -1982,6 +1979,26 @@ msl_optimize_nir(struct nir_shader *nir)
return progress;
}
/* Scalarize stores to CLIP_DIST* varyings */
static bool
scalarize_clip_distance_filter(const nir_intrinsic_instr *intrin,
UNUSED const void *_data)
{
if (intrin->intrinsic != nir_intrinsic_store_output)
return false;
nir_io_semantics semantics = nir_intrinsic_io_semantics(intrin);
return semantics.location == VARYING_SLOT_CLIP_DIST0 ||
semantics.location == VARYING_SLOT_CLIP_DIST1;
}
void
msl_lower_nir_late(nir_shader *nir)
{
NIR_PASS(_, nir, nir_lower_io_to_scalar, nir_var_shader_out,
scalarize_clip_distance_filter, NULL);
NIR_PASS(_, nir, msl_nir_lower_clip_distance);
}
static void
msl_gather_info(struct nir_to_msl_ctx *ctx)
{

View File

@@ -55,3 +55,4 @@ bool msl_ensure_depth_write(nir_shader *nir);
bool msl_ensure_vertex_position_output(nir_shader *nir);
bool msl_nir_fs_io_types(nir_shader *nir);
bool msl_nir_vs_io_types(nir_shader *nir);
void msl_lower_nir_late(nir_shader *nir);

View File

@@ -170,6 +170,7 @@ kk_get_device_features(
.multiViewport = true,
.robustBufferAccess = true,
.samplerAnisotropy = true,
.shaderClipDistance = true,
.shaderImageGatherExtended = true,
.shaderInt16 = true,
.shaderInt64 = true,

View File

@@ -655,6 +655,7 @@ kk_compile_shader(struct kk_device *dev, struct vk_shader_compile_info *info,
if (info->stage == MESA_SHADER_VERTEX) {
kk_lower_vs_vbo(nir, state);
}
msl_lower_nir_late(nir);
msl_optimize_nir(nir);
modify_nir_info(nir);
shader->msl_code = nir_to_msl(nir, NULL, dev->disabled_workarounds);