radeonsi: move geometry shader code into si_shader_llvm_gs.c
Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3399>
This commit is contained in:
@@ -37,6 +37,7 @@ C_SOURCES := \
|
||||
si_shader_internal.h \
|
||||
si_shader_llvm.c \
|
||||
si_shader_llvm_build.c \
|
||||
si_shader_llvm_gs.c \
|
||||
si_shader_llvm_ps.c \
|
||||
si_shader_llvm_tess.c \
|
||||
si_shader_nir.c \
|
||||
|
||||
@@ -52,6 +52,7 @@ files_libradeonsi = files(
|
||||
'si_shader_internal.h',
|
||||
'si_shader_llvm.c',
|
||||
'si_shader_llvm_build.c',
|
||||
'si_shader_llvm_gs.c',
|
||||
'si_shader_llvm_ps.c',
|
||||
'si_shader_llvm_tess.c',
|
||||
'si_shader_nir.c',
|
||||
|
||||
@@ -49,8 +49,6 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f);
|
||||
|
||||
static void si_build_vs_prolog_function(struct si_shader_context *ctx,
|
||||
union si_shader_part_key *key);
|
||||
static void si_fix_resource_usage(struct si_screen *sscreen,
|
||||
struct si_shader *shader);
|
||||
|
||||
/** Whether the shader runs as a combination of multiple API shaders */
|
||||
static bool is_multi_part_shader(struct si_shader_context *ctx)
|
||||
@@ -428,122 +426,6 @@ LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx,
|
||||
}
|
||||
}
|
||||
|
||||
static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
|
||||
unsigned input_index,
|
||||
unsigned vtx_offset_param,
|
||||
LLVMTypeRef type,
|
||||
unsigned swizzle)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
struct si_shader *shader = ctx->shader;
|
||||
LLVMValueRef vtx_offset, soffset;
|
||||
struct si_shader_info *info = &shader->selector->info;
|
||||
unsigned semantic_name = info->input_semantic_name[input_index];
|
||||
unsigned semantic_index = info->input_semantic_index[input_index];
|
||||
unsigned param;
|
||||
LLVMValueRef value;
|
||||
|
||||
param = si_shader_io_get_unique_index(semantic_name, semantic_index, false);
|
||||
|
||||
/* GFX9 has the ESGS ring in LDS. */
|
||||
if (ctx->screen->info.chip_class >= GFX9) {
|
||||
unsigned index = vtx_offset_param;
|
||||
|
||||
switch (index / 2) {
|
||||
case 0:
|
||||
vtx_offset = si_unpack_param(ctx, ctx->gs_vtx01_offset,
|
||||
index % 2 ? 16 : 0, 16);
|
||||
break;
|
||||
case 1:
|
||||
vtx_offset = si_unpack_param(ctx, ctx->gs_vtx23_offset,
|
||||
index % 2 ? 16 : 0, 16);
|
||||
break;
|
||||
case 2:
|
||||
vtx_offset = si_unpack_param(ctx, ctx->gs_vtx45_offset,
|
||||
index % 2 ? 16 : 0, 16);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
unsigned offset = param * 4 + swizzle;
|
||||
vtx_offset = LLVMBuildAdd(ctx->ac.builder, vtx_offset,
|
||||
LLVMConstInt(ctx->i32, offset, false), "");
|
||||
|
||||
LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->esgs_ring, vtx_offset);
|
||||
LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, ptr, "");
|
||||
if (ac_get_type_size(type) == 64) {
|
||||
ptr = LLVMBuildGEP(ctx->ac.builder, ptr,
|
||||
&ctx->ac.i32_1, 1, "");
|
||||
LLVMValueRef values[2] = {
|
||||
value,
|
||||
LLVMBuildLoad(ctx->ac.builder, ptr, "")
|
||||
};
|
||||
value = ac_build_gather_values(&ctx->ac, values, 2);
|
||||
}
|
||||
return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
|
||||
}
|
||||
|
||||
/* GFX6: input load from the ESGS ring in memory. */
|
||||
if (swizzle == ~0) {
|
||||
LLVMValueRef values[4];
|
||||
unsigned chan;
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
values[chan] = si_llvm_load_input_gs(abi, input_index, vtx_offset_param,
|
||||
type, chan);
|
||||
}
|
||||
return ac_build_gather_values(&ctx->ac, values, 4);
|
||||
}
|
||||
|
||||
/* Get the vertex offset parameter on GFX6. */
|
||||
LLVMValueRef gs_vtx_offset = ac_get_arg(&ctx->ac,
|
||||
ctx->gs_vtx_offset[vtx_offset_param]);
|
||||
|
||||
vtx_offset = LLVMBuildMul(ctx->ac.builder, gs_vtx_offset,
|
||||
LLVMConstInt(ctx->i32, 4, 0), "");
|
||||
|
||||
soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle) * 256, 0);
|
||||
|
||||
value = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, ctx->i32_0,
|
||||
vtx_offset, soffset, 0, ac_glc, true, false);
|
||||
if (ac_get_type_size(type) == 64) {
|
||||
LLVMValueRef value2;
|
||||
soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle + 1) * 256, 0);
|
||||
|
||||
value2 = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1,
|
||||
ctx->i32_0, vtx_offset, soffset,
|
||||
0, ac_glc, true, false);
|
||||
return si_build_gather_64bit(ctx, type, value, value2);
|
||||
}
|
||||
return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
|
||||
}
|
||||
|
||||
static LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
|
||||
unsigned location,
|
||||
unsigned driver_location,
|
||||
unsigned component,
|
||||
unsigned num_components,
|
||||
unsigned vertex_index,
|
||||
unsigned const_index,
|
||||
LLVMTypeRef type)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
|
||||
LLVMValueRef value[4];
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
unsigned offset = i;
|
||||
if (ac_get_type_size(type) == 64)
|
||||
offset *= 2;
|
||||
|
||||
offset += component;
|
||||
value[i + component] = si_llvm_load_input_gs(&ctx->abi, driver_location / 4 + const_index,
|
||||
vertex_index, type, offset);
|
||||
}
|
||||
|
||||
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
|
||||
}
|
||||
|
||||
static LLVMValueRef get_base_vertex(struct ac_shader_abi *abi)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
@@ -816,9 +698,9 @@ void si_emit_streamout_output(struct si_shader_context *ctx,
|
||||
* Write streamout data to buffers for vertex stream @p stream (different
|
||||
* vertex streams can occur for GS copy shaders).
|
||||
*/
|
||||
static void si_llvm_emit_streamout(struct si_shader_context *ctx,
|
||||
struct si_shader_output_values *outputs,
|
||||
unsigned noutput, unsigned stream)
|
||||
void si_llvm_emit_streamout(struct si_shader_context *ctx,
|
||||
struct si_shader_output_values *outputs,
|
||||
unsigned noutput, unsigned stream)
|
||||
{
|
||||
struct si_shader_selector *sel = ctx->shader->selector;
|
||||
struct pipe_stream_output_info *so = &sel->so;
|
||||
@@ -1178,141 +1060,6 @@ void si_llvm_export_vs(struct si_shader_context *ctx,
|
||||
si_build_param_exports(ctx, outputs, noutput);
|
||||
}
|
||||
|
||||
/* Pass GS inputs from ES to GS on GFX9. */
|
||||
static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
|
||||
{
|
||||
LLVMValueRef ret = ctx->return_value;
|
||||
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0);
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1);
|
||||
if (ctx->shader->key.as_ngg)
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->gs_tg_info, 2);
|
||||
else
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->gs2vs_offset, 2);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->merged_wave_info, 3);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->merged_scratch_offset, 5);
|
||||
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->rw_buffers,
|
||||
8 + SI_SGPR_RW_BUFFERS);
|
||||
ret = si_insert_input_ptr(ctx, ret,
|
||||
ctx->bindless_samplers_and_images,
|
||||
8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
|
||||
if (ctx->screen->use_ngg) {
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->vs_state_bits,
|
||||
8 + SI_SGPR_VS_STATE_BITS);
|
||||
}
|
||||
|
||||
unsigned vgpr;
|
||||
if (ctx->type == PIPE_SHADER_VERTEX)
|
||||
vgpr = 8 + GFX9_VSGS_NUM_USER_SGPR;
|
||||
else
|
||||
vgpr = 8 + GFX9_TESGS_NUM_USER_SGPR;
|
||||
|
||||
ret = si_insert_input_ret_float(ctx, ret, ctx->gs_vtx01_offset, vgpr++);
|
||||
ret = si_insert_input_ret_float(ctx, ret, ctx->gs_vtx23_offset, vgpr++);
|
||||
ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_prim_id, vgpr++);
|
||||
ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_invocation_id, vgpr++);
|
||||
ret = si_insert_input_ret_float(ctx, ret, ctx->gs_vtx45_offset, vgpr++);
|
||||
ctx->return_value = ret;
|
||||
}
|
||||
|
||||
static void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi,
|
||||
unsigned max_outputs,
|
||||
LLVMValueRef *addrs)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
struct si_shader *es = ctx->shader;
|
||||
struct si_shader_info *info = &es->selector->info;
|
||||
LLVMValueRef lds_base = NULL;
|
||||
unsigned chan;
|
||||
int i;
|
||||
|
||||
if (ctx->screen->info.chip_class >= GFX9 && info->num_outputs) {
|
||||
unsigned itemsize_dw = es->selector->esgs_itemsize / 4;
|
||||
LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);
|
||||
LLVMValueRef wave_idx = si_unpack_param(ctx, ctx->merged_wave_info, 24, 4);
|
||||
vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx,
|
||||
LLVMBuildMul(ctx->ac.builder, wave_idx,
|
||||
LLVMConstInt(ctx->i32, ctx->ac.wave_size, false), ""), "");
|
||||
lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx,
|
||||
LLVMConstInt(ctx->i32, itemsize_dw, 0), "");
|
||||
}
|
||||
|
||||
for (i = 0; i < info->num_outputs; i++) {
|
||||
int param;
|
||||
|
||||
if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX ||
|
||||
info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
|
||||
continue;
|
||||
|
||||
param = si_shader_io_get_unique_index(info->output_semantic_name[i],
|
||||
info->output_semantic_index[i], false);
|
||||
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
if (!(info->output_usagemask[i] & (1 << chan)))
|
||||
continue;
|
||||
|
||||
LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
|
||||
out_val = ac_to_integer(&ctx->ac, out_val);
|
||||
|
||||
/* GFX9 has the ESGS ring in LDS. */
|
||||
if (ctx->screen->info.chip_class >= GFX9) {
|
||||
LLVMValueRef idx = LLVMConstInt(ctx->i32, param * 4 + chan, false);
|
||||
idx = LLVMBuildAdd(ctx->ac.builder, lds_base, idx, "");
|
||||
ac_build_indexed_store(&ctx->ac, ctx->esgs_ring, idx, out_val);
|
||||
continue;
|
||||
}
|
||||
|
||||
ac_build_buffer_store_dword(&ctx->ac,
|
||||
ctx->esgs_ring,
|
||||
out_val, 1, NULL,
|
||||
ac_get_arg(&ctx->ac, ctx->es2gs_offset),
|
||||
(4 * param + chan) * 4,
|
||||
ac_glc | ac_slc | ac_swizzled);
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx->screen->info.chip_class >= GFX9)
|
||||
si_set_es_return_value_for_gs(ctx);
|
||||
}
|
||||
|
||||
static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx)
|
||||
{
|
||||
if (ctx->screen->info.chip_class >= GFX9)
|
||||
return si_unpack_param(ctx, ctx->merged_wave_info, 16, 8);
|
||||
else
|
||||
return ac_get_arg(&ctx->ac, ctx->gs_wave_id);
|
||||
}
|
||||
|
||||
static void emit_gs_epilogue(struct si_shader_context *ctx)
|
||||
{
|
||||
if (ctx->shader->key.as_ngg) {
|
||||
gfx10_ngg_gs_emit_epilogue(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx->screen->info.chip_class >= GFX10)
|
||||
LLVMBuildFence(ctx->ac.builder, LLVMAtomicOrderingRelease, false, "");
|
||||
|
||||
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE,
|
||||
si_get_gs_wave_id(ctx));
|
||||
|
||||
if (ctx->screen->info.chip_class >= GFX9)
|
||||
ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
|
||||
}
|
||||
|
||||
static void si_llvm_emit_gs_epilogue(struct ac_shader_abi *abi,
|
||||
unsigned max_outputs,
|
||||
LLVMValueRef *addrs)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
struct si_shader_info UNUSED *info = &ctx->shader->selector->info;
|
||||
|
||||
assert(info->num_outputs <= max_outputs);
|
||||
|
||||
emit_gs_epilogue(ctx);
|
||||
}
|
||||
|
||||
static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
|
||||
unsigned max_outputs,
|
||||
LLVMValueRef *addrs)
|
||||
@@ -1389,106 +1136,6 @@ static void si_llvm_emit_prim_discard_cs_epilogue(struct ac_shader_abi *abi,
|
||||
ctx->return_value = ret;
|
||||
}
|
||||
|
||||
/* Emit one vertex from the geometry shader */
|
||||
static void si_llvm_emit_vertex(struct ac_shader_abi *abi,
|
||||
unsigned stream,
|
||||
LLVMValueRef *addrs)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
|
||||
if (ctx->shader->key.as_ngg) {
|
||||
gfx10_ngg_gs_emit_vertex(ctx, stream, addrs);
|
||||
return;
|
||||
}
|
||||
|
||||
struct si_shader_info *info = &ctx->shader->selector->info;
|
||||
struct si_shader *shader = ctx->shader;
|
||||
LLVMValueRef soffset = ac_get_arg(&ctx->ac, ctx->gs2vs_offset);
|
||||
LLVMValueRef gs_next_vertex;
|
||||
LLVMValueRef can_emit;
|
||||
unsigned chan, offset;
|
||||
int i;
|
||||
|
||||
/* Write vertex attribute values to GSVS ring */
|
||||
gs_next_vertex = LLVMBuildLoad(ctx->ac.builder,
|
||||
ctx->gs_next_vertex[stream],
|
||||
"");
|
||||
|
||||
/* If this thread has already emitted the declared maximum number of
|
||||
* vertices, skip the write: excessive vertex emissions are not
|
||||
* supposed to have any effect.
|
||||
*
|
||||
* If the shader has no writes to memory, kill it instead. This skips
|
||||
* further memory loads and may allow LLVM to skip to the end
|
||||
* altogether.
|
||||
*/
|
||||
can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex,
|
||||
LLVMConstInt(ctx->i32,
|
||||
shader->selector->gs_max_out_vertices, 0), "");
|
||||
|
||||
bool use_kill = !info->writes_memory;
|
||||
if (use_kill) {
|
||||
ac_build_kill_if_false(&ctx->ac, can_emit);
|
||||
} else {
|
||||
ac_build_ifcc(&ctx->ac, can_emit, 6505);
|
||||
}
|
||||
|
||||
offset = 0;
|
||||
for (i = 0; i < info->num_outputs; i++) {
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
if (!(info->output_usagemask[i] & (1 << chan)) ||
|
||||
((info->output_streams[i] >> (2 * chan)) & 3) != stream)
|
||||
continue;
|
||||
|
||||
LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
|
||||
LLVMValueRef voffset =
|
||||
LLVMConstInt(ctx->i32, offset *
|
||||
shader->selector->gs_max_out_vertices, 0);
|
||||
offset++;
|
||||
|
||||
voffset = LLVMBuildAdd(ctx->ac.builder, voffset, gs_next_vertex, "");
|
||||
voffset = LLVMBuildMul(ctx->ac.builder, voffset,
|
||||
LLVMConstInt(ctx->i32, 4, 0), "");
|
||||
|
||||
out_val = ac_to_integer(&ctx->ac, out_val);
|
||||
|
||||
ac_build_buffer_store_dword(&ctx->ac,
|
||||
ctx->gsvs_ring[stream],
|
||||
out_val, 1,
|
||||
voffset, soffset, 0,
|
||||
ac_glc | ac_slc | ac_swizzled);
|
||||
}
|
||||
}
|
||||
|
||||
gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex, ctx->i32_1, "");
|
||||
LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]);
|
||||
|
||||
/* Signal vertex emission if vertex data was written. */
|
||||
if (offset) {
|
||||
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
|
||||
si_get_gs_wave_id(ctx));
|
||||
}
|
||||
|
||||
if (!use_kill)
|
||||
ac_build_endif(&ctx->ac, 6505);
|
||||
}
|
||||
|
||||
/* Cut one primitive from the geometry shader */
|
||||
static void si_llvm_emit_primitive(struct ac_shader_abi *abi,
|
||||
unsigned stream)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
|
||||
if (ctx->shader->key.as_ngg) {
|
||||
LLVMBuildStore(ctx->ac.builder, ctx->ac.i32_0, ctx->gs_curprim_verts[stream]);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Signal primitive cut */
|
||||
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8),
|
||||
si_get_gs_wave_id(ctx));
|
||||
}
|
||||
|
||||
static void declare_streamout_params(struct si_shader_context *ctx,
|
||||
struct pipe_stream_output_info *so)
|
||||
{
|
||||
@@ -1708,7 +1355,7 @@ void si_add_arg_checked(struct ac_shader_args *args,
|
||||
ac_add_arg(args, file, registers, type, arg);
|
||||
}
|
||||
|
||||
static void create_function(struct si_shader_context *ctx)
|
||||
void si_create_function(struct si_shader_context *ctx)
|
||||
{
|
||||
struct si_shader *shader = ctx->shader;
|
||||
LLVMTypeRef returns[AC_MAX_ARGS];
|
||||
@@ -2106,144 +1753,6 @@ static void create_function(struct si_shader_context *ctx)
|
||||
}
|
||||
}
|
||||
|
||||
/* Ensure that the esgs ring is declared.
|
||||
*
|
||||
* We declare it with 64KB alignment as a hint that the
|
||||
* pointer value will always be 0.
|
||||
*/
|
||||
static void declare_esgs_ring(struct si_shader_context *ctx)
|
||||
{
|
||||
if (ctx->esgs_ring)
|
||||
return;
|
||||
|
||||
assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring"));
|
||||
|
||||
ctx->esgs_ring = LLVMAddGlobalInAddressSpace(
|
||||
ctx->ac.module, LLVMArrayType(ctx->i32, 0),
|
||||
"esgs_ring",
|
||||
AC_ADDR_SPACE_LDS);
|
||||
LLVMSetLinkage(ctx->esgs_ring, LLVMExternalLinkage);
|
||||
LLVMSetAlignment(ctx->esgs_ring, 64 * 1024);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load ESGS and GSVS ring buffer resource descriptors and save the variables
|
||||
* for later use.
|
||||
*/
|
||||
static void preload_ring_buffers(struct si_shader_context *ctx)
|
||||
{
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
|
||||
LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers);
|
||||
|
||||
if (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY) {
|
||||
if (ctx->screen->info.chip_class <= GFX8) {
|
||||
unsigned ring =
|
||||
ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS
|
||||
: SI_ES_RING_ESGS;
|
||||
LLVMValueRef offset = LLVMConstInt(ctx->i32, ring, 0);
|
||||
|
||||
ctx->esgs_ring =
|
||||
ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
|
||||
} else {
|
||||
if (USE_LDS_SYMBOLS && LLVM_VERSION_MAJOR >= 9) {
|
||||
/* Declare the ESGS ring as an explicit LDS symbol. */
|
||||
declare_esgs_ring(ctx);
|
||||
} else {
|
||||
ac_declare_lds_as_pointer(&ctx->ac);
|
||||
ctx->esgs_ring = ctx->ac.lds;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx->shader->is_gs_copy_shader) {
|
||||
LLVMValueRef offset = LLVMConstInt(ctx->i32, SI_RING_GSVS, 0);
|
||||
|
||||
ctx->gsvs_ring[0] =
|
||||
ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
|
||||
} else if (ctx->type == PIPE_SHADER_GEOMETRY) {
|
||||
const struct si_shader_selector *sel = ctx->shader->selector;
|
||||
LLVMValueRef offset = LLVMConstInt(ctx->i32, SI_RING_GSVS, 0);
|
||||
LLVMValueRef base_ring;
|
||||
|
||||
base_ring = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
|
||||
|
||||
/* The conceptual layout of the GSVS ring is
|
||||
* v0c0 .. vLv0 v0c1 .. vLc1 ..
|
||||
* but the real memory layout is swizzled across
|
||||
* threads:
|
||||
* t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
|
||||
* t16v0c0 ..
|
||||
* Override the buffer descriptor accordingly.
|
||||
*/
|
||||
LLVMTypeRef v2i64 = LLVMVectorType(ctx->i64, 2);
|
||||
uint64_t stream_offset = 0;
|
||||
|
||||
for (unsigned stream = 0; stream < 4; ++stream) {
|
||||
unsigned num_components;
|
||||
unsigned stride;
|
||||
unsigned num_records;
|
||||
LLVMValueRef ring, tmp;
|
||||
|
||||
num_components = sel->info.num_stream_output_components[stream];
|
||||
if (!num_components)
|
||||
continue;
|
||||
|
||||
stride = 4 * num_components * sel->gs_max_out_vertices;
|
||||
|
||||
/* Limit on the stride field for <= GFX7. */
|
||||
assert(stride < (1 << 14));
|
||||
|
||||
num_records = ctx->ac.wave_size;
|
||||
|
||||
ring = LLVMBuildBitCast(builder, base_ring, v2i64, "");
|
||||
tmp = LLVMBuildExtractElement(builder, ring, ctx->i32_0, "");
|
||||
tmp = LLVMBuildAdd(builder, tmp,
|
||||
LLVMConstInt(ctx->i64,
|
||||
stream_offset, 0), "");
|
||||
stream_offset += stride * ctx->ac.wave_size;
|
||||
|
||||
ring = LLVMBuildInsertElement(builder, ring, tmp, ctx->i32_0, "");
|
||||
ring = LLVMBuildBitCast(builder, ring, ctx->v4i32, "");
|
||||
tmp = LLVMBuildExtractElement(builder, ring, ctx->i32_1, "");
|
||||
tmp = LLVMBuildOr(builder, tmp,
|
||||
LLVMConstInt(ctx->i32,
|
||||
S_008F04_STRIDE(stride) |
|
||||
S_008F04_SWIZZLE_ENABLE(1), 0), "");
|
||||
ring = LLVMBuildInsertElement(builder, ring, tmp, ctx->i32_1, "");
|
||||
ring = LLVMBuildInsertElement(builder, ring,
|
||||
LLVMConstInt(ctx->i32, num_records, 0),
|
||||
LLVMConstInt(ctx->i32, 2, 0), "");
|
||||
|
||||
uint32_t rsrc3 =
|
||||
S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
|
||||
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
|
||||
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
|
||||
S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */
|
||||
S_008F0C_ADD_TID_ENABLE(1);
|
||||
|
||||
if (ctx->ac.chip_class >= GFX10) {
|
||||
rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
|
||||
S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
|
||||
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
|
||||
S_008F0C_ELEMENT_SIZE(1); /* element_size = 4 (bytes) */
|
||||
}
|
||||
|
||||
ring = LLVMBuildInsertElement(builder, ring,
|
||||
LLVMConstInt(ctx->i32, rsrc3, false),
|
||||
LLVMConstInt(ctx->i32, 3, 0), "");
|
||||
|
||||
ctx->gsvs_ring[stream] = ring;
|
||||
}
|
||||
} else if (ctx->type == PIPE_SHADER_TESS_EVAL) {
|
||||
si_llvm_preload_tes_rings(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
/* For the UMR disassembler. */
|
||||
#define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
|
||||
#define DEBUGGER_NUM_MARKERS 5
|
||||
@@ -2656,16 +2165,16 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
|
||||
si_shader_dump_stats(sscreen, shader, file, check_debug_option);
|
||||
}
|
||||
|
||||
static int si_compile_llvm(struct si_screen *sscreen,
|
||||
struct si_shader_binary *binary,
|
||||
struct ac_shader_config *conf,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
LLVMModuleRef mod,
|
||||
struct pipe_debug_callback *debug,
|
||||
enum pipe_shader_type shader_type,
|
||||
unsigned wave_size,
|
||||
const char *name,
|
||||
bool less_optimized)
|
||||
int si_compile_llvm(struct si_screen *sscreen,
|
||||
struct si_shader_binary *binary,
|
||||
struct ac_shader_config *conf,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
LLVMModuleRef mod,
|
||||
struct pipe_debug_callback *debug,
|
||||
enum pipe_shader_type shader_type,
|
||||
unsigned wave_size,
|
||||
const char *name,
|
||||
bool less_optimized)
|
||||
{
|
||||
unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
|
||||
|
||||
@@ -2724,155 +2233,6 @@ static int si_compile_llvm(struct si_screen *sscreen,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Generate code for the hardware VS shader stage to go with a geometry shader */
|
||||
struct si_shader *
|
||||
si_generate_gs_copy_shader(struct si_screen *sscreen,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
struct si_shader_selector *gs_selector,
|
||||
struct pipe_debug_callback *debug)
|
||||
{
|
||||
struct si_shader_context ctx;
|
||||
struct si_shader *shader;
|
||||
LLVMBuilderRef builder;
|
||||
struct si_shader_output_values outputs[SI_MAX_VS_OUTPUTS];
|
||||
struct si_shader_info *gsinfo = &gs_selector->info;
|
||||
int i;
|
||||
|
||||
|
||||
shader = CALLOC_STRUCT(si_shader);
|
||||
if (!shader)
|
||||
return NULL;
|
||||
|
||||
/* We can leave the fence as permanently signaled because the GS copy
|
||||
* shader only becomes visible globally after it has been compiled. */
|
||||
util_queue_fence_init(&shader->ready);
|
||||
|
||||
shader->selector = gs_selector;
|
||||
shader->is_gs_copy_shader = true;
|
||||
|
||||
si_llvm_context_init(&ctx, sscreen, compiler,
|
||||
si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false));
|
||||
ctx.shader = shader;
|
||||
ctx.type = PIPE_SHADER_VERTEX;
|
||||
|
||||
builder = ctx.ac.builder;
|
||||
|
||||
create_function(&ctx);
|
||||
preload_ring_buffers(&ctx);
|
||||
|
||||
LLVMValueRef voffset =
|
||||
LLVMBuildMul(ctx.ac.builder, ctx.abi.vertex_id,
|
||||
LLVMConstInt(ctx.i32, 4, 0), "");
|
||||
|
||||
/* Fetch the vertex stream ID.*/
|
||||
LLVMValueRef stream_id;
|
||||
|
||||
if (!sscreen->use_ngg_streamout && gs_selector->so.num_outputs)
|
||||
stream_id = si_unpack_param(&ctx, ctx.streamout_config, 24, 2);
|
||||
else
|
||||
stream_id = ctx.i32_0;
|
||||
|
||||
/* Fill in output information. */
|
||||
for (i = 0; i < gsinfo->num_outputs; ++i) {
|
||||
outputs[i].semantic_name = gsinfo->output_semantic_name[i];
|
||||
outputs[i].semantic_index = gsinfo->output_semantic_index[i];
|
||||
|
||||
for (int chan = 0; chan < 4; chan++) {
|
||||
outputs[i].vertex_stream[chan] =
|
||||
(gsinfo->output_streams[i] >> (2 * chan)) & 3;
|
||||
}
|
||||
}
|
||||
|
||||
LLVMBasicBlockRef end_bb;
|
||||
LLVMValueRef switch_inst;
|
||||
|
||||
end_bb = LLVMAppendBasicBlockInContext(ctx.ac.context, ctx.main_fn, "end");
|
||||
switch_inst = LLVMBuildSwitch(builder, stream_id, end_bb, 4);
|
||||
|
||||
for (int stream = 0; stream < 4; stream++) {
|
||||
LLVMBasicBlockRef bb;
|
||||
unsigned offset;
|
||||
|
||||
if (!gsinfo->num_stream_output_components[stream])
|
||||
continue;
|
||||
|
||||
if (stream > 0 && !gs_selector->so.num_outputs)
|
||||
continue;
|
||||
|
||||
bb = LLVMInsertBasicBlockInContext(ctx.ac.context, end_bb, "out");
|
||||
LLVMAddCase(switch_inst, LLVMConstInt(ctx.i32, stream, 0), bb);
|
||||
LLVMPositionBuilderAtEnd(builder, bb);
|
||||
|
||||
/* Fetch vertex data from GSVS ring */
|
||||
offset = 0;
|
||||
for (i = 0; i < gsinfo->num_outputs; ++i) {
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
if (!(gsinfo->output_usagemask[i] & (1 << chan)) ||
|
||||
outputs[i].vertex_stream[chan] != stream) {
|
||||
outputs[i].values[chan] = LLVMGetUndef(ctx.f32);
|
||||
continue;
|
||||
}
|
||||
|
||||
LLVMValueRef soffset = LLVMConstInt(ctx.i32,
|
||||
offset * gs_selector->gs_max_out_vertices * 16 * 4, 0);
|
||||
offset++;
|
||||
|
||||
outputs[i].values[chan] =
|
||||
ac_build_buffer_load(&ctx.ac,
|
||||
ctx.gsvs_ring[0], 1,
|
||||
ctx.i32_0, voffset,
|
||||
soffset, 0, ac_glc | ac_slc,
|
||||
true, false);
|
||||
}
|
||||
}
|
||||
|
||||
/* Streamout and exports. */
|
||||
if (!sscreen->use_ngg_streamout && gs_selector->so.num_outputs) {
|
||||
si_llvm_emit_streamout(&ctx, outputs,
|
||||
gsinfo->num_outputs,
|
||||
stream);
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
si_llvm_export_vs(&ctx, outputs, gsinfo->num_outputs);
|
||||
|
||||
LLVMBuildBr(builder, end_bb);
|
||||
}
|
||||
|
||||
LLVMPositionBuilderAtEnd(builder, end_bb);
|
||||
|
||||
LLVMBuildRetVoid(ctx.ac.builder);
|
||||
|
||||
ctx.type = PIPE_SHADER_GEOMETRY; /* override for shader dumping */
|
||||
si_llvm_optimize_module(&ctx);
|
||||
|
||||
bool ok = false;
|
||||
if (si_compile_llvm(sscreen, &ctx.shader->binary,
|
||||
&ctx.shader->config, ctx.compiler,
|
||||
ctx.ac.module,
|
||||
debug, PIPE_SHADER_GEOMETRY, ctx.ac.wave_size,
|
||||
"GS Copy Shader", false) == 0) {
|
||||
if (si_can_dump_shader(sscreen, PIPE_SHADER_GEOMETRY))
|
||||
fprintf(stderr, "GS Copy Shader:\n");
|
||||
si_shader_dump(sscreen, ctx.shader, debug, stderr, true);
|
||||
|
||||
if (!ctx.shader->config.scratch_bytes_per_wave)
|
||||
ok = si_shader_binary_upload(sscreen, ctx.shader, 0);
|
||||
else
|
||||
ok = true;
|
||||
}
|
||||
|
||||
si_llvm_dispose(&ctx);
|
||||
|
||||
if (!ok) {
|
||||
FREE(shader);
|
||||
shader = NULL;
|
||||
} else {
|
||||
si_fix_resource_usage(sscreen, shader);
|
||||
}
|
||||
return shader;
|
||||
}
|
||||
|
||||
static void si_dump_shader_key_vs(const struct si_shader_key *key,
|
||||
const struct si_vs_prolog_bits *prolog,
|
||||
const char *prefix, FILE *f)
|
||||
@@ -3052,22 +2412,6 @@ static bool si_vs_needs_prolog(const struct si_shader_selector *sel,
|
||||
key->unpack_instance_id_from_vertex_id;
|
||||
}
|
||||
|
||||
LLVMValueRef si_is_es_thread(struct si_shader_context *ctx)
|
||||
{
|
||||
/* Return true if the current thread should execute an ES thread. */
|
||||
return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
|
||||
ac_get_thread_id(&ctx->ac),
|
||||
si_unpack_param(ctx, ctx->merged_wave_info, 0, 8), "");
|
||||
}
|
||||
|
||||
LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx)
|
||||
{
|
||||
/* Return true if the current thread should execute a GS thread. */
|
||||
return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
|
||||
ac_get_thread_id(&ctx->ac),
|
||||
si_unpack_param(ctx, ctx->merged_wave_info, 8, 8), "");
|
||||
}
|
||||
|
||||
static bool si_build_main_function(struct si_shader_context *ctx,
|
||||
struct nir_shader *nir, bool free_nir)
|
||||
{
|
||||
@@ -3102,10 +2446,7 @@ static bool si_build_main_function(struct si_shader_context *ctx,
|
||||
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
|
||||
break;
|
||||
case PIPE_SHADER_GEOMETRY:
|
||||
ctx->abi.load_inputs = si_nir_load_input_gs;
|
||||
ctx->abi.emit_vertex = si_llvm_emit_vertex;
|
||||
ctx->abi.emit_primitive = si_llvm_emit_primitive;
|
||||
ctx->abi.emit_outputs = si_llvm_emit_gs_epilogue;
|
||||
si_llvm_init_gs_callbacks(ctx);
|
||||
break;
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
si_llvm_init_ps_callbacks(ctx);
|
||||
@@ -3121,8 +2462,15 @@ static bool si_build_main_function(struct si_shader_context *ctx,
|
||||
ctx->abi.load_ubo = load_ubo;
|
||||
ctx->abi.load_ssbo = load_ssbo;
|
||||
|
||||
create_function(ctx);
|
||||
preload_ring_buffers(ctx);
|
||||
si_create_function(ctx);
|
||||
|
||||
if (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY)
|
||||
si_preload_esgs_ring(ctx);
|
||||
|
||||
if (ctx->type == PIPE_SHADER_GEOMETRY)
|
||||
si_preload_gs_rings(ctx);
|
||||
else if (ctx->type == PIPE_SHADER_TESS_EVAL)
|
||||
si_llvm_preload_tes_rings(ctx);
|
||||
|
||||
if (ctx->type == PIPE_SHADER_TESS_CTRL &&
|
||||
sel->info.tessfactors_are_def_in_all_invocs) {
|
||||
@@ -3172,7 +2520,7 @@ static bool si_build_main_function(struct si_shader_context *ctx,
|
||||
* avoids bank conflicts for SoA accesses.
|
||||
*/
|
||||
if (!gfx10_is_ngg_passthrough(shader))
|
||||
declare_esgs_ring(ctx);
|
||||
si_llvm_declare_esgs_ring(ctx);
|
||||
|
||||
/* This is really only needed when streamout and / or vertex
|
||||
* compaction is enabled.
|
||||
@@ -3324,129 +2672,6 @@ static void si_get_vs_prolog_key(const struct si_shader_info *info,
|
||||
shader_out->info.uses_instanceid = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the GS prolog function. Rotate the input vertices for triangle strips
|
||||
* with adjacency.
|
||||
*/
|
||||
static void si_build_gs_prolog_function(struct si_shader_context *ctx,
|
||||
union si_shader_part_key *key)
|
||||
{
|
||||
unsigned num_sgprs, num_vgprs;
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
LLVMTypeRef returns[AC_MAX_ARGS];
|
||||
LLVMValueRef func, ret;
|
||||
|
||||
memset(&ctx->args, 0, sizeof(ctx->args));
|
||||
|
||||
if (ctx->screen->info.chip_class >= GFX9) {
|
||||
if (key->gs_prolog.states.gfx9_prev_is_vs)
|
||||
num_sgprs = 8 + GFX9_VSGS_NUM_USER_SGPR;
|
||||
else
|
||||
num_sgprs = 8 + GFX9_TESGS_NUM_USER_SGPR;
|
||||
num_vgprs = 5; /* ES inputs are not needed by GS */
|
||||
} else {
|
||||
num_sgprs = GFX6_GS_NUM_USER_SGPR + 2;
|
||||
num_vgprs = 8;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < num_sgprs; ++i) {
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
||||
returns[i] = ctx->i32;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < num_vgprs; ++i) {
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL);
|
||||
returns[num_sgprs + i] = ctx->f32;
|
||||
}
|
||||
|
||||
/* Create the function. */
|
||||
si_llvm_create_func(ctx, "gs_prolog", returns, num_sgprs + num_vgprs, 0);
|
||||
func = ctx->main_fn;
|
||||
|
||||
/* Set the full EXEC mask for the prolog, because we are only fiddling
|
||||
* with registers here. The main shader part will set the correct EXEC
|
||||
* mask.
|
||||
*/
|
||||
if (ctx->screen->info.chip_class >= GFX9 && !key->gs_prolog.is_monolithic)
|
||||
ac_init_exec_full_mask(&ctx->ac);
|
||||
|
||||
/* Copy inputs to outputs. This should be no-op, as the registers match,
|
||||
* but it will prevent the compiler from overwriting them unintentionally.
|
||||
*/
|
||||
ret = ctx->return_value;
|
||||
for (unsigned i = 0; i < num_sgprs; i++) {
|
||||
LLVMValueRef p = LLVMGetParam(func, i);
|
||||
ret = LLVMBuildInsertValue(builder, ret, p, i, "");
|
||||
}
|
||||
for (unsigned i = 0; i < num_vgprs; i++) {
|
||||
LLVMValueRef p = LLVMGetParam(func, num_sgprs + i);
|
||||
p = ac_to_float(&ctx->ac, p);
|
||||
ret = LLVMBuildInsertValue(builder, ret, p, num_sgprs + i, "");
|
||||
}
|
||||
|
||||
if (key->gs_prolog.states.tri_strip_adj_fix) {
|
||||
/* Remap the input vertices for every other primitive. */
|
||||
const struct ac_arg gfx6_vtx_params[6] = {
|
||||
{ .used = true, .arg_index = num_sgprs },
|
||||
{ .used = true, .arg_index = num_sgprs + 1 },
|
||||
{ .used = true, .arg_index = num_sgprs + 3 },
|
||||
{ .used = true, .arg_index = num_sgprs + 4 },
|
||||
{ .used = true, .arg_index = num_sgprs + 5 },
|
||||
{ .used = true, .arg_index = num_sgprs + 6 },
|
||||
};
|
||||
const struct ac_arg gfx9_vtx_params[3] = {
|
||||
{ .used = true, .arg_index = num_sgprs },
|
||||
{ .used = true, .arg_index = num_sgprs + 1 },
|
||||
{ .used = true, .arg_index = num_sgprs + 4 },
|
||||
};
|
||||
LLVMValueRef vtx_in[6], vtx_out[6];
|
||||
LLVMValueRef prim_id, rotate;
|
||||
|
||||
if (ctx->screen->info.chip_class >= GFX9) {
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
vtx_in[i*2] = si_unpack_param(ctx, gfx9_vtx_params[i], 0, 16);
|
||||
vtx_in[i*2+1] = si_unpack_param(ctx, gfx9_vtx_params[i], 16, 16);
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0; i < 6; i++)
|
||||
vtx_in[i] = ac_get_arg(&ctx->ac, gfx6_vtx_params[i]);
|
||||
}
|
||||
|
||||
prim_id = LLVMGetParam(func, num_sgprs + 2);
|
||||
rotate = LLVMBuildTrunc(builder, prim_id, ctx->i1, "");
|
||||
|
||||
for (unsigned i = 0; i < 6; ++i) {
|
||||
LLVMValueRef base, rotated;
|
||||
base = vtx_in[i];
|
||||
rotated = vtx_in[(i + 4) % 6];
|
||||
vtx_out[i] = LLVMBuildSelect(builder, rotate, rotated, base, "");
|
||||
}
|
||||
|
||||
if (ctx->screen->info.chip_class >= GFX9) {
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
LLVMValueRef hi, out;
|
||||
|
||||
hi = LLVMBuildShl(builder, vtx_out[i*2+1],
|
||||
LLVMConstInt(ctx->i32, 16, 0), "");
|
||||
out = LLVMBuildOr(builder, vtx_out[i*2], hi, "");
|
||||
out = ac_to_float(&ctx->ac, out);
|
||||
ret = LLVMBuildInsertValue(builder, ret, out,
|
||||
gfx9_vtx_params[i].arg_index, "");
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0; i < 6; i++) {
|
||||
LLVMValueRef out;
|
||||
|
||||
out = ac_to_float(&ctx->ac, vtx_out[i]);
|
||||
ret = LLVMBuildInsertValue(builder, ret, out,
|
||||
gfx6_vtx_params[i].arg_index, "");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LLVMBuildRet(builder, ret);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a list of shader part functions, build a wrapper function that
|
||||
* runs them in sequence to form a monolithic shader.
|
||||
@@ -3900,7 +3125,7 @@ int si_compile_shader(struct si_screen *sscreen,
|
||||
gs_prolog_key.gs_prolog.states = shader->key.part.gs.prolog;
|
||||
gs_prolog_key.gs_prolog.is_monolithic = true;
|
||||
gs_prolog_key.gs_prolog.as_ngg = shader->key.as_ngg;
|
||||
si_build_gs_prolog_function(&ctx, &gs_prolog_key);
|
||||
si_llvm_build_gs_prolog(&ctx, &gs_prolog_key);
|
||||
gs_prolog = ctx.main_fn;
|
||||
|
||||
/* ES main part */
|
||||
@@ -3959,7 +3184,7 @@ int si_compile_shader(struct si_screen *sscreen,
|
||||
|
||||
memset(&prolog_key, 0, sizeof(prolog_key));
|
||||
prolog_key.gs_prolog.states = shader->key.part.gs.prolog;
|
||||
si_build_gs_prolog_function(&ctx, &prolog_key);
|
||||
si_llvm_build_gs_prolog(&ctx, &prolog_key);
|
||||
parts[0] = ctx.main_fn;
|
||||
|
||||
si_build_wrapper_function(&ctx, parts, 2, 1, 0);
|
||||
@@ -4431,7 +3656,7 @@ static bool si_shader_select_gs_parts(struct si_screen *sscreen,
|
||||
shader->prolog2 = si_get_shader_part(sscreen, &sscreen->gs_prologs,
|
||||
PIPE_SHADER_GEOMETRY, true,
|
||||
&prolog_key, compiler, debug,
|
||||
si_build_gs_prolog_function,
|
||||
si_llvm_build_gs_prolog,
|
||||
"Geometry Shader Prolog");
|
||||
return shader->prolog2 != NULL;
|
||||
}
|
||||
@@ -4722,8 +3947,7 @@ void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
|
||||
*lds_size = MAX2(*lds_size, 8);
|
||||
}
|
||||
|
||||
static void si_fix_resource_usage(struct si_screen *sscreen,
|
||||
struct si_shader *shader)
|
||||
void si_fix_resource_usage(struct si_screen *sscreen, struct si_shader *shader)
|
||||
{
|
||||
unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */
|
||||
|
||||
|
||||
@@ -814,11 +814,6 @@ struct si_shader_part {
|
||||
};
|
||||
|
||||
/* si_shader.c */
|
||||
struct si_shader *
|
||||
si_generate_gs_copy_shader(struct si_screen *sscreen,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
struct si_shader_selector *gs_selector,
|
||||
struct pipe_debug_callback *debug);
|
||||
int si_compile_shader(struct si_screen *sscreen,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
struct si_shader *shader,
|
||||
@@ -844,6 +839,13 @@ void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
|
||||
const char *si_get_shader_name(const struct si_shader *shader);
|
||||
void si_shader_binary_clean(struct si_shader_binary *binary);
|
||||
|
||||
/* si_shader_llvm_gs.c */
|
||||
struct si_shader *
|
||||
si_generate_gs_copy_shader(struct si_screen *sscreen,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
struct si_shader_selector *gs_selector,
|
||||
struct pipe_debug_callback *debug);
|
||||
|
||||
/* si_shader_nir.c */
|
||||
void si_nir_scan_shader(const struct nir_shader *nir,
|
||||
struct si_shader_info *info);
|
||||
|
||||
@@ -260,6 +260,7 @@ LLVMValueRef si_build_gather_64bit(struct si_shader_context *ctx,
|
||||
LLVMTypeRef type, LLVMValueRef val1,
|
||||
LLVMValueRef val2);
|
||||
void si_llvm_emit_barrier(struct si_shader_context *ctx);
|
||||
void si_llvm_declare_esgs_ring(struct si_shader_context *ctx);
|
||||
void si_declare_compute_memory(struct si_shader_context *ctx);
|
||||
LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx,
|
||||
unsigned swizzle);
|
||||
@@ -287,8 +288,6 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir);
|
||||
LLVMValueRef si_unpack_param(struct si_shader_context *ctx,
|
||||
struct ac_arg param, unsigned rshift,
|
||||
unsigned bitwidth);
|
||||
LLVMValueRef si_is_es_thread(struct si_shader_context *ctx);
|
||||
LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx);
|
||||
void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *parts,
|
||||
unsigned num_parts, unsigned main_part,
|
||||
unsigned next_shader_first_part);
|
||||
@@ -304,6 +303,21 @@ LLVMValueRef si_insert_input_ret_float(struct si_shader_context *ctx, LLVMValueR
|
||||
struct ac_arg param, unsigned return_index);
|
||||
LLVMValueRef si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret,
|
||||
struct ac_arg param, unsigned return_index);
|
||||
int si_compile_llvm(struct si_screen *sscreen,
|
||||
struct si_shader_binary *binary,
|
||||
struct ac_shader_config *conf,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
LLVMModuleRef mod,
|
||||
struct pipe_debug_callback *debug,
|
||||
enum pipe_shader_type shader_type,
|
||||
unsigned wave_size,
|
||||
const char *name,
|
||||
bool less_optimized);
|
||||
void si_fix_resource_usage(struct si_screen *sscreen, struct si_shader *shader);
|
||||
void si_llvm_emit_streamout(struct si_shader_context *ctx,
|
||||
struct si_shader_output_values *outputs,
|
||||
unsigned noutput, unsigned stream);
|
||||
void si_create_function(struct si_shader_context *ctx);
|
||||
|
||||
void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
|
||||
unsigned max_outputs,
|
||||
@@ -315,6 +329,17 @@ void gfx10_ngg_gs_emit_prologue(struct si_shader_context *ctx);
|
||||
void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx);
|
||||
void gfx10_ngg_calculate_subgroup_info(struct si_shader *shader);
|
||||
|
||||
/* si_shader_llvm_gs.c */
|
||||
LLVMValueRef si_is_es_thread(struct si_shader_context *ctx);
|
||||
LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx);
|
||||
void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi, unsigned max_outputs,
|
||||
LLVMValueRef *addrs);
|
||||
void si_preload_esgs_ring(struct si_shader_context *ctx);
|
||||
void si_preload_gs_rings(struct si_shader_context *ctx);
|
||||
void si_llvm_build_gs_prolog(struct si_shader_context *ctx,
|
||||
union si_shader_part_key *key);
|
||||
void si_llvm_init_gs_callbacks(struct si_shader_context *ctx);
|
||||
|
||||
/* si_shader_llvm_tess.c */
|
||||
void si_llvm_preload_tes_rings(struct si_shader_context *ctx);
|
||||
void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi, unsigned max_outputs,
|
||||
|
||||
@@ -242,3 +242,23 @@ void si_llvm_emit_barrier(struct si_shader_context *ctx)
|
||||
|
||||
ac_build_s_barrier(&ctx->ac);
|
||||
}
|
||||
|
||||
/* Ensure that the esgs ring is declared.
|
||||
*
|
||||
* We declare it with 64KB alignment as a hint that the
|
||||
* pointer value will always be 0.
|
||||
*/
|
||||
void si_llvm_declare_esgs_ring(struct si_shader_context *ctx)
|
||||
{
|
||||
if (ctx->esgs_ring)
|
||||
return;
|
||||
|
||||
assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring"));
|
||||
|
||||
ctx->esgs_ring = LLVMAddGlobalInAddressSpace(
|
||||
ctx->ac.module, LLVMArrayType(ctx->i32, 0),
|
||||
"esgs_ring",
|
||||
AC_ADDR_SPACE_LDS);
|
||||
LLVMSetLinkage(ctx->esgs_ring, LLVMExternalLinkage);
|
||||
LLVMSetAlignment(ctx->esgs_ring, 64 * 1024);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,780 @@
|
||||
/*
|
||||
* Copyright 2020 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "si_shader_internal.h"
|
||||
#include "si_pipe.h"
|
||||
#include "sid.h"
|
||||
#include "util/u_memory.h"
|
||||
|
||||
LLVMValueRef si_is_es_thread(struct si_shader_context *ctx)
|
||||
{
|
||||
/* Return true if the current thread should execute an ES thread. */
|
||||
return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
|
||||
ac_get_thread_id(&ctx->ac),
|
||||
si_unpack_param(ctx, ctx->merged_wave_info, 0, 8), "");
|
||||
}
|
||||
|
||||
LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx)
|
||||
{
|
||||
/* Return true if the current thread should execute a GS thread. */
|
||||
return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
|
||||
ac_get_thread_id(&ctx->ac),
|
||||
si_unpack_param(ctx, ctx->merged_wave_info, 8, 8), "");
|
||||
}
|
||||
|
||||
static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
|
||||
unsigned input_index,
|
||||
unsigned vtx_offset_param,
|
||||
LLVMTypeRef type,
|
||||
unsigned swizzle)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
struct si_shader *shader = ctx->shader;
|
||||
LLVMValueRef vtx_offset, soffset;
|
||||
struct si_shader_info *info = &shader->selector->info;
|
||||
unsigned semantic_name = info->input_semantic_name[input_index];
|
||||
unsigned semantic_index = info->input_semantic_index[input_index];
|
||||
unsigned param;
|
||||
LLVMValueRef value;
|
||||
|
||||
param = si_shader_io_get_unique_index(semantic_name, semantic_index, false);
|
||||
|
||||
/* GFX9 has the ESGS ring in LDS. */
|
||||
if (ctx->screen->info.chip_class >= GFX9) {
|
||||
unsigned index = vtx_offset_param;
|
||||
|
||||
switch (index / 2) {
|
||||
case 0:
|
||||
vtx_offset = si_unpack_param(ctx, ctx->gs_vtx01_offset,
|
||||
index % 2 ? 16 : 0, 16);
|
||||
break;
|
||||
case 1:
|
||||
vtx_offset = si_unpack_param(ctx, ctx->gs_vtx23_offset,
|
||||
index % 2 ? 16 : 0, 16);
|
||||
break;
|
||||
case 2:
|
||||
vtx_offset = si_unpack_param(ctx, ctx->gs_vtx45_offset,
|
||||
index % 2 ? 16 : 0, 16);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
unsigned offset = param * 4 + swizzle;
|
||||
vtx_offset = LLVMBuildAdd(ctx->ac.builder, vtx_offset,
|
||||
LLVMConstInt(ctx->i32, offset, false), "");
|
||||
|
||||
LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->esgs_ring, vtx_offset);
|
||||
LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, ptr, "");
|
||||
if (ac_get_type_size(type) == 64) {
|
||||
ptr = LLVMBuildGEP(ctx->ac.builder, ptr,
|
||||
&ctx->ac.i32_1, 1, "");
|
||||
LLVMValueRef values[2] = {
|
||||
value,
|
||||
LLVMBuildLoad(ctx->ac.builder, ptr, "")
|
||||
};
|
||||
value = ac_build_gather_values(&ctx->ac, values, 2);
|
||||
}
|
||||
return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
|
||||
}
|
||||
|
||||
/* GFX6: input load from the ESGS ring in memory. */
|
||||
if (swizzle == ~0) {
|
||||
LLVMValueRef values[4];
|
||||
unsigned chan;
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
values[chan] = si_llvm_load_input_gs(abi, input_index, vtx_offset_param,
|
||||
type, chan);
|
||||
}
|
||||
return ac_build_gather_values(&ctx->ac, values, 4);
|
||||
}
|
||||
|
||||
/* Get the vertex offset parameter on GFX6. */
|
||||
LLVMValueRef gs_vtx_offset = ac_get_arg(&ctx->ac,
|
||||
ctx->gs_vtx_offset[vtx_offset_param]);
|
||||
|
||||
vtx_offset = LLVMBuildMul(ctx->ac.builder, gs_vtx_offset,
|
||||
LLVMConstInt(ctx->i32, 4, 0), "");
|
||||
|
||||
soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle) * 256, 0);
|
||||
|
||||
value = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, ctx->i32_0,
|
||||
vtx_offset, soffset, 0, ac_glc, true, false);
|
||||
if (ac_get_type_size(type) == 64) {
|
||||
LLVMValueRef value2;
|
||||
soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle + 1) * 256, 0);
|
||||
|
||||
value2 = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1,
|
||||
ctx->i32_0, vtx_offset, soffset,
|
||||
0, ac_glc, true, false);
|
||||
return si_build_gather_64bit(ctx, type, value, value2);
|
||||
}
|
||||
return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
|
||||
}
|
||||
|
||||
static LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
|
||||
unsigned location,
|
||||
unsigned driver_location,
|
||||
unsigned component,
|
||||
unsigned num_components,
|
||||
unsigned vertex_index,
|
||||
unsigned const_index,
|
||||
LLVMTypeRef type)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
|
||||
LLVMValueRef value[4];
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
unsigned offset = i;
|
||||
if (ac_get_type_size(type) == 64)
|
||||
offset *= 2;
|
||||
|
||||
offset += component;
|
||||
value[i + component] = si_llvm_load_input_gs(&ctx->abi, driver_location / 4 + const_index,
|
||||
vertex_index, type, offset);
|
||||
}
|
||||
|
||||
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
|
||||
}
|
||||
|
||||
/* Pass GS inputs from ES to GS on GFX9. */
|
||||
static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
|
||||
{
|
||||
LLVMValueRef ret = ctx->return_value;
|
||||
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0);
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1);
|
||||
if (ctx->shader->key.as_ngg)
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->gs_tg_info, 2);
|
||||
else
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->gs2vs_offset, 2);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->merged_wave_info, 3);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->merged_scratch_offset, 5);
|
||||
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->rw_buffers,
|
||||
8 + SI_SGPR_RW_BUFFERS);
|
||||
ret = si_insert_input_ptr(ctx, ret,
|
||||
ctx->bindless_samplers_and_images,
|
||||
8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
|
||||
if (ctx->screen->use_ngg) {
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->vs_state_bits,
|
||||
8 + SI_SGPR_VS_STATE_BITS);
|
||||
}
|
||||
|
||||
unsigned vgpr;
|
||||
if (ctx->type == PIPE_SHADER_VERTEX)
|
||||
vgpr = 8 + GFX9_VSGS_NUM_USER_SGPR;
|
||||
else
|
||||
vgpr = 8 + GFX9_TESGS_NUM_USER_SGPR;
|
||||
|
||||
ret = si_insert_input_ret_float(ctx, ret, ctx->gs_vtx01_offset, vgpr++);
|
||||
ret = si_insert_input_ret_float(ctx, ret, ctx->gs_vtx23_offset, vgpr++);
|
||||
ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_prim_id, vgpr++);
|
||||
ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_invocation_id, vgpr++);
|
||||
ret = si_insert_input_ret_float(ctx, ret, ctx->gs_vtx45_offset, vgpr++);
|
||||
ctx->return_value = ret;
|
||||
}
|
||||
|
||||
void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi, unsigned max_outputs,
|
||||
LLVMValueRef *addrs)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
struct si_shader *es = ctx->shader;
|
||||
struct si_shader_info *info = &es->selector->info;
|
||||
LLVMValueRef lds_base = NULL;
|
||||
unsigned chan;
|
||||
int i;
|
||||
|
||||
if (ctx->screen->info.chip_class >= GFX9 && info->num_outputs) {
|
||||
unsigned itemsize_dw = es->selector->esgs_itemsize / 4;
|
||||
LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);
|
||||
LLVMValueRef wave_idx = si_unpack_param(ctx, ctx->merged_wave_info, 24, 4);
|
||||
vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx,
|
||||
LLVMBuildMul(ctx->ac.builder, wave_idx,
|
||||
LLVMConstInt(ctx->i32, ctx->ac.wave_size, false), ""), "");
|
||||
lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx,
|
||||
LLVMConstInt(ctx->i32, itemsize_dw, 0), "");
|
||||
}
|
||||
|
||||
for (i = 0; i < info->num_outputs; i++) {
|
||||
int param;
|
||||
|
||||
if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX ||
|
||||
info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
|
||||
continue;
|
||||
|
||||
param = si_shader_io_get_unique_index(info->output_semantic_name[i],
|
||||
info->output_semantic_index[i], false);
|
||||
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
if (!(info->output_usagemask[i] & (1 << chan)))
|
||||
continue;
|
||||
|
||||
LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
|
||||
out_val = ac_to_integer(&ctx->ac, out_val);
|
||||
|
||||
/* GFX9 has the ESGS ring in LDS. */
|
||||
if (ctx->screen->info.chip_class >= GFX9) {
|
||||
LLVMValueRef idx = LLVMConstInt(ctx->i32, param * 4 + chan, false);
|
||||
idx = LLVMBuildAdd(ctx->ac.builder, lds_base, idx, "");
|
||||
ac_build_indexed_store(&ctx->ac, ctx->esgs_ring, idx, out_val);
|
||||
continue;
|
||||
}
|
||||
|
||||
ac_build_buffer_store_dword(&ctx->ac,
|
||||
ctx->esgs_ring,
|
||||
out_val, 1, NULL,
|
||||
ac_get_arg(&ctx->ac, ctx->es2gs_offset),
|
||||
(4 * param + chan) * 4,
|
||||
ac_glc | ac_slc | ac_swizzled);
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx->screen->info.chip_class >= GFX9)
|
||||
si_set_es_return_value_for_gs(ctx);
|
||||
}
|
||||
|
||||
static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx)
|
||||
{
|
||||
if (ctx->screen->info.chip_class >= GFX9)
|
||||
return si_unpack_param(ctx, ctx->merged_wave_info, 16, 8);
|
||||
else
|
||||
return ac_get_arg(&ctx->ac, ctx->gs_wave_id);
|
||||
}
|
||||
|
||||
static void emit_gs_epilogue(struct si_shader_context *ctx)
|
||||
{
|
||||
if (ctx->shader->key.as_ngg) {
|
||||
gfx10_ngg_gs_emit_epilogue(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx->screen->info.chip_class >= GFX10)
|
||||
LLVMBuildFence(ctx->ac.builder, LLVMAtomicOrderingRelease, false, "");
|
||||
|
||||
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE,
|
||||
si_get_gs_wave_id(ctx));
|
||||
|
||||
if (ctx->screen->info.chip_class >= GFX9)
|
||||
ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
|
||||
}
|
||||
|
||||
static void si_llvm_emit_gs_epilogue(struct ac_shader_abi *abi,
|
||||
unsigned max_outputs,
|
||||
LLVMValueRef *addrs)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
struct si_shader_info UNUSED *info = &ctx->shader->selector->info;
|
||||
|
||||
assert(info->num_outputs <= max_outputs);
|
||||
|
||||
emit_gs_epilogue(ctx);
|
||||
}
|
||||
|
||||
/* Emit one vertex from the geometry shader */
|
||||
static void si_llvm_emit_vertex(struct ac_shader_abi *abi,
|
||||
unsigned stream,
|
||||
LLVMValueRef *addrs)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
|
||||
if (ctx->shader->key.as_ngg) {
|
||||
gfx10_ngg_gs_emit_vertex(ctx, stream, addrs);
|
||||
return;
|
||||
}
|
||||
|
||||
struct si_shader_info *info = &ctx->shader->selector->info;
|
||||
struct si_shader *shader = ctx->shader;
|
||||
LLVMValueRef soffset = ac_get_arg(&ctx->ac, ctx->gs2vs_offset);
|
||||
LLVMValueRef gs_next_vertex;
|
||||
LLVMValueRef can_emit;
|
||||
unsigned chan, offset;
|
||||
int i;
|
||||
|
||||
/* Write vertex attribute values to GSVS ring */
|
||||
gs_next_vertex = LLVMBuildLoad(ctx->ac.builder,
|
||||
ctx->gs_next_vertex[stream],
|
||||
"");
|
||||
|
||||
/* If this thread has already emitted the declared maximum number of
|
||||
* vertices, skip the write: excessive vertex emissions are not
|
||||
* supposed to have any effect.
|
||||
*
|
||||
* If the shader has no writes to memory, kill it instead. This skips
|
||||
* further memory loads and may allow LLVM to skip to the end
|
||||
* altogether.
|
||||
*/
|
||||
can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex,
|
||||
LLVMConstInt(ctx->i32,
|
||||
shader->selector->gs_max_out_vertices, 0), "");
|
||||
|
||||
bool use_kill = !info->writes_memory;
|
||||
if (use_kill) {
|
||||
ac_build_kill_if_false(&ctx->ac, can_emit);
|
||||
} else {
|
||||
ac_build_ifcc(&ctx->ac, can_emit, 6505);
|
||||
}
|
||||
|
||||
offset = 0;
|
||||
for (i = 0; i < info->num_outputs; i++) {
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
if (!(info->output_usagemask[i] & (1 << chan)) ||
|
||||
((info->output_streams[i] >> (2 * chan)) & 3) != stream)
|
||||
continue;
|
||||
|
||||
LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
|
||||
LLVMValueRef voffset =
|
||||
LLVMConstInt(ctx->i32, offset *
|
||||
shader->selector->gs_max_out_vertices, 0);
|
||||
offset++;
|
||||
|
||||
voffset = LLVMBuildAdd(ctx->ac.builder, voffset, gs_next_vertex, "");
|
||||
voffset = LLVMBuildMul(ctx->ac.builder, voffset,
|
||||
LLVMConstInt(ctx->i32, 4, 0), "");
|
||||
|
||||
out_val = ac_to_integer(&ctx->ac, out_val);
|
||||
|
||||
ac_build_buffer_store_dword(&ctx->ac,
|
||||
ctx->gsvs_ring[stream],
|
||||
out_val, 1,
|
||||
voffset, soffset, 0,
|
||||
ac_glc | ac_slc | ac_swizzled);
|
||||
}
|
||||
}
|
||||
|
||||
gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex, ctx->i32_1, "");
|
||||
LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]);
|
||||
|
||||
/* Signal vertex emission if vertex data was written. */
|
||||
if (offset) {
|
||||
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
|
||||
si_get_gs_wave_id(ctx));
|
||||
}
|
||||
|
||||
if (!use_kill)
|
||||
ac_build_endif(&ctx->ac, 6505);
|
||||
}
|
||||
|
||||
/* Cut one primitive from the geometry shader */
|
||||
static void si_llvm_emit_primitive(struct ac_shader_abi *abi,
|
||||
unsigned stream)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
|
||||
if (ctx->shader->key.as_ngg) {
|
||||
LLVMBuildStore(ctx->ac.builder, ctx->ac.i32_0, ctx->gs_curprim_verts[stream]);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Signal primitive cut */
|
||||
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8),
|
||||
si_get_gs_wave_id(ctx));
|
||||
}
|
||||
|
||||
void si_preload_esgs_ring(struct si_shader_context *ctx)
|
||||
{
|
||||
if (ctx->screen->info.chip_class <= GFX8) {
|
||||
unsigned ring =
|
||||
ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS
|
||||
: SI_ES_RING_ESGS;
|
||||
LLVMValueRef offset = LLVMConstInt(ctx->i32, ring, 0);
|
||||
LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers);
|
||||
|
||||
ctx->esgs_ring =
|
||||
ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
|
||||
} else {
|
||||
if (USE_LDS_SYMBOLS && LLVM_VERSION_MAJOR >= 9) {
|
||||
/* Declare the ESGS ring as an explicit LDS symbol. */
|
||||
si_llvm_declare_esgs_ring(ctx);
|
||||
} else {
|
||||
ac_declare_lds_as_pointer(&ctx->ac);
|
||||
ctx->esgs_ring = ctx->ac.lds;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void si_preload_gs_rings(struct si_shader_context *ctx)
|
||||
{
|
||||
const struct si_shader_selector *sel = ctx->shader->selector;
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
LLVMValueRef offset = LLVMConstInt(ctx->i32, SI_RING_GSVS, 0);
|
||||
LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers);
|
||||
LLVMValueRef base_ring = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
|
||||
|
||||
/* The conceptual layout of the GSVS ring is
|
||||
* v0c0 .. vLv0 v0c1 .. vLc1 ..
|
||||
* but the real memory layout is swizzled across
|
||||
* threads:
|
||||
* t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
|
||||
* t16v0c0 ..
|
||||
* Override the buffer descriptor accordingly.
|
||||
*/
|
||||
LLVMTypeRef v2i64 = LLVMVectorType(ctx->i64, 2);
|
||||
uint64_t stream_offset = 0;
|
||||
|
||||
for (unsigned stream = 0; stream < 4; ++stream) {
|
||||
unsigned num_components;
|
||||
unsigned stride;
|
||||
unsigned num_records;
|
||||
LLVMValueRef ring, tmp;
|
||||
|
||||
num_components = sel->info.num_stream_output_components[stream];
|
||||
if (!num_components)
|
||||
continue;
|
||||
|
||||
stride = 4 * num_components * sel->gs_max_out_vertices;
|
||||
|
||||
/* Limit on the stride field for <= GFX7. */
|
||||
assert(stride < (1 << 14));
|
||||
|
||||
num_records = ctx->ac.wave_size;
|
||||
|
||||
ring = LLVMBuildBitCast(builder, base_ring, v2i64, "");
|
||||
tmp = LLVMBuildExtractElement(builder, ring, ctx->i32_0, "");
|
||||
tmp = LLVMBuildAdd(builder, tmp,
|
||||
LLVMConstInt(ctx->i64,
|
||||
stream_offset, 0), "");
|
||||
stream_offset += stride * ctx->ac.wave_size;
|
||||
|
||||
ring = LLVMBuildInsertElement(builder, ring, tmp, ctx->i32_0, "");
|
||||
ring = LLVMBuildBitCast(builder, ring, ctx->v4i32, "");
|
||||
tmp = LLVMBuildExtractElement(builder, ring, ctx->i32_1, "");
|
||||
tmp = LLVMBuildOr(builder, tmp,
|
||||
LLVMConstInt(ctx->i32,
|
||||
S_008F04_STRIDE(stride) |
|
||||
S_008F04_SWIZZLE_ENABLE(1), 0), "");
|
||||
ring = LLVMBuildInsertElement(builder, ring, tmp, ctx->i32_1, "");
|
||||
ring = LLVMBuildInsertElement(builder, ring,
|
||||
LLVMConstInt(ctx->i32, num_records, 0),
|
||||
LLVMConstInt(ctx->i32, 2, 0), "");
|
||||
|
||||
uint32_t rsrc3 =
|
||||
S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
|
||||
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
|
||||
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
|
||||
S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */
|
||||
S_008F0C_ADD_TID_ENABLE(1);
|
||||
|
||||
if (ctx->ac.chip_class >= GFX10) {
|
||||
rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
|
||||
S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
|
||||
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
|
||||
S_008F0C_ELEMENT_SIZE(1); /* element_size = 4 (bytes) */
|
||||
}
|
||||
|
||||
ring = LLVMBuildInsertElement(builder, ring,
|
||||
LLVMConstInt(ctx->i32, rsrc3, false),
|
||||
LLVMConstInt(ctx->i32, 3, 0), "");
|
||||
|
||||
ctx->gsvs_ring[stream] = ring;
|
||||
}
|
||||
}
|
||||
|
||||
/* Generate code for the hardware VS shader stage to go with a geometry shader */
|
||||
struct si_shader *
|
||||
si_generate_gs_copy_shader(struct si_screen *sscreen,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
struct si_shader_selector *gs_selector,
|
||||
struct pipe_debug_callback *debug)
|
||||
{
|
||||
struct si_shader_context ctx;
|
||||
struct si_shader *shader;
|
||||
LLVMBuilderRef builder;
|
||||
struct si_shader_output_values outputs[SI_MAX_VS_OUTPUTS];
|
||||
struct si_shader_info *gsinfo = &gs_selector->info;
|
||||
int i;
|
||||
|
||||
|
||||
shader = CALLOC_STRUCT(si_shader);
|
||||
if (!shader)
|
||||
return NULL;
|
||||
|
||||
/* We can leave the fence as permanently signaled because the GS copy
|
||||
* shader only becomes visible globally after it has been compiled. */
|
||||
util_queue_fence_init(&shader->ready);
|
||||
|
||||
shader->selector = gs_selector;
|
||||
shader->is_gs_copy_shader = true;
|
||||
|
||||
si_llvm_context_init(&ctx, sscreen, compiler,
|
||||
si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false));
|
||||
ctx.shader = shader;
|
||||
ctx.type = PIPE_SHADER_VERTEX;
|
||||
|
||||
builder = ctx.ac.builder;
|
||||
|
||||
si_create_function(&ctx);
|
||||
|
||||
LLVMValueRef buf_ptr = ac_get_arg(&ctx.ac, ctx.rw_buffers);
|
||||
ctx.gsvs_ring[0] = ac_build_load_to_sgpr(&ctx.ac, buf_ptr,
|
||||
LLVMConstInt(ctx.i32, SI_RING_GSVS, 0));
|
||||
|
||||
LLVMValueRef voffset =
|
||||
LLVMBuildMul(ctx.ac.builder, ctx.abi.vertex_id,
|
||||
LLVMConstInt(ctx.i32, 4, 0), "");
|
||||
|
||||
/* Fetch the vertex stream ID.*/
|
||||
LLVMValueRef stream_id;
|
||||
|
||||
if (!sscreen->use_ngg_streamout && gs_selector->so.num_outputs)
|
||||
stream_id = si_unpack_param(&ctx, ctx.streamout_config, 24, 2);
|
||||
else
|
||||
stream_id = ctx.i32_0;
|
||||
|
||||
/* Fill in output information. */
|
||||
for (i = 0; i < gsinfo->num_outputs; ++i) {
|
||||
outputs[i].semantic_name = gsinfo->output_semantic_name[i];
|
||||
outputs[i].semantic_index = gsinfo->output_semantic_index[i];
|
||||
|
||||
for (int chan = 0; chan < 4; chan++) {
|
||||
outputs[i].vertex_stream[chan] =
|
||||
(gsinfo->output_streams[i] >> (2 * chan)) & 3;
|
||||
}
|
||||
}
|
||||
|
||||
LLVMBasicBlockRef end_bb;
|
||||
LLVMValueRef switch_inst;
|
||||
|
||||
end_bb = LLVMAppendBasicBlockInContext(ctx.ac.context, ctx.main_fn, "end");
|
||||
switch_inst = LLVMBuildSwitch(builder, stream_id, end_bb, 4);
|
||||
|
||||
for (int stream = 0; stream < 4; stream++) {
|
||||
LLVMBasicBlockRef bb;
|
||||
unsigned offset;
|
||||
|
||||
if (!gsinfo->num_stream_output_components[stream])
|
||||
continue;
|
||||
|
||||
if (stream > 0 && !gs_selector->so.num_outputs)
|
||||
continue;
|
||||
|
||||
bb = LLVMInsertBasicBlockInContext(ctx.ac.context, end_bb, "out");
|
||||
LLVMAddCase(switch_inst, LLVMConstInt(ctx.i32, stream, 0), bb);
|
||||
LLVMPositionBuilderAtEnd(builder, bb);
|
||||
|
||||
/* Fetch vertex data from GSVS ring */
|
||||
offset = 0;
|
||||
for (i = 0; i < gsinfo->num_outputs; ++i) {
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
if (!(gsinfo->output_usagemask[i] & (1 << chan)) ||
|
||||
outputs[i].vertex_stream[chan] != stream) {
|
||||
outputs[i].values[chan] = LLVMGetUndef(ctx.f32);
|
||||
continue;
|
||||
}
|
||||
|
||||
LLVMValueRef soffset = LLVMConstInt(ctx.i32,
|
||||
offset * gs_selector->gs_max_out_vertices * 16 * 4, 0);
|
||||
offset++;
|
||||
|
||||
outputs[i].values[chan] =
|
||||
ac_build_buffer_load(&ctx.ac,
|
||||
ctx.gsvs_ring[0], 1,
|
||||
ctx.i32_0, voffset,
|
||||
soffset, 0, ac_glc | ac_slc,
|
||||
true, false);
|
||||
}
|
||||
}
|
||||
|
||||
/* Streamout and exports. */
|
||||
if (!sscreen->use_ngg_streamout && gs_selector->so.num_outputs) {
|
||||
si_llvm_emit_streamout(&ctx, outputs,
|
||||
gsinfo->num_outputs,
|
||||
stream);
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
si_llvm_export_vs(&ctx, outputs, gsinfo->num_outputs);
|
||||
|
||||
LLVMBuildBr(builder, end_bb);
|
||||
}
|
||||
|
||||
LLVMPositionBuilderAtEnd(builder, end_bb);
|
||||
|
||||
LLVMBuildRetVoid(ctx.ac.builder);
|
||||
|
||||
ctx.type = PIPE_SHADER_GEOMETRY; /* override for shader dumping */
|
||||
si_llvm_optimize_module(&ctx);
|
||||
|
||||
bool ok = false;
|
||||
if (si_compile_llvm(sscreen, &ctx.shader->binary,
|
||||
&ctx.shader->config, ctx.compiler,
|
||||
ctx.ac.module,
|
||||
debug, PIPE_SHADER_GEOMETRY, ctx.ac.wave_size,
|
||||
"GS Copy Shader", false) == 0) {
|
||||
if (si_can_dump_shader(sscreen, PIPE_SHADER_GEOMETRY))
|
||||
fprintf(stderr, "GS Copy Shader:\n");
|
||||
si_shader_dump(sscreen, ctx.shader, debug, stderr, true);
|
||||
|
||||
if (!ctx.shader->config.scratch_bytes_per_wave)
|
||||
ok = si_shader_binary_upload(sscreen, ctx.shader, 0);
|
||||
else
|
||||
ok = true;
|
||||
}
|
||||
|
||||
si_llvm_dispose(&ctx);
|
||||
|
||||
if (!ok) {
|
||||
FREE(shader);
|
||||
shader = NULL;
|
||||
} else {
|
||||
si_fix_resource_usage(sscreen, shader);
|
||||
}
|
||||
return shader;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the GS prolog function. Rotate the input vertices for triangle strips
|
||||
* with adjacency.
|
||||
*/
|
||||
void si_llvm_build_gs_prolog(struct si_shader_context *ctx,
|
||||
union si_shader_part_key *key)
|
||||
{
|
||||
unsigned num_sgprs, num_vgprs;
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
LLVMTypeRef returns[AC_MAX_ARGS];
|
||||
LLVMValueRef func, ret;
|
||||
|
||||
memset(&ctx->args, 0, sizeof(ctx->args));
|
||||
|
||||
if (ctx->screen->info.chip_class >= GFX9) {
|
||||
if (key->gs_prolog.states.gfx9_prev_is_vs)
|
||||
num_sgprs = 8 + GFX9_VSGS_NUM_USER_SGPR;
|
||||
else
|
||||
num_sgprs = 8 + GFX9_TESGS_NUM_USER_SGPR;
|
||||
num_vgprs = 5; /* ES inputs are not needed by GS */
|
||||
} else {
|
||||
num_sgprs = GFX6_GS_NUM_USER_SGPR + 2;
|
||||
num_vgprs = 8;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < num_sgprs; ++i) {
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
||||
returns[i] = ctx->i32;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < num_vgprs; ++i) {
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL);
|
||||
returns[num_sgprs + i] = ctx->f32;
|
||||
}
|
||||
|
||||
/* Create the function. */
|
||||
si_llvm_create_func(ctx, "gs_prolog", returns, num_sgprs + num_vgprs, 0);
|
||||
func = ctx->main_fn;
|
||||
|
||||
/* Set the full EXEC mask for the prolog, because we are only fiddling
|
||||
* with registers here. The main shader part will set the correct EXEC
|
||||
* mask.
|
||||
*/
|
||||
if (ctx->screen->info.chip_class >= GFX9 && !key->gs_prolog.is_monolithic)
|
||||
ac_init_exec_full_mask(&ctx->ac);
|
||||
|
||||
/* Copy inputs to outputs. This should be no-op, as the registers match,
|
||||
* but it will prevent the compiler from overwriting them unintentionally.
|
||||
*/
|
||||
ret = ctx->return_value;
|
||||
for (unsigned i = 0; i < num_sgprs; i++) {
|
||||
LLVMValueRef p = LLVMGetParam(func, i);
|
||||
ret = LLVMBuildInsertValue(builder, ret, p, i, "");
|
||||
}
|
||||
for (unsigned i = 0; i < num_vgprs; i++) {
|
||||
LLVMValueRef p = LLVMGetParam(func, num_sgprs + i);
|
||||
p = ac_to_float(&ctx->ac, p);
|
||||
ret = LLVMBuildInsertValue(builder, ret, p, num_sgprs + i, "");
|
||||
}
|
||||
|
||||
if (key->gs_prolog.states.tri_strip_adj_fix) {
|
||||
/* Remap the input vertices for every other primitive. */
|
||||
const struct ac_arg gfx6_vtx_params[6] = {
|
||||
{ .used = true, .arg_index = num_sgprs },
|
||||
{ .used = true, .arg_index = num_sgprs + 1 },
|
||||
{ .used = true, .arg_index = num_sgprs + 3 },
|
||||
{ .used = true, .arg_index = num_sgprs + 4 },
|
||||
{ .used = true, .arg_index = num_sgprs + 5 },
|
||||
{ .used = true, .arg_index = num_sgprs + 6 },
|
||||
};
|
||||
const struct ac_arg gfx9_vtx_params[3] = {
|
||||
{ .used = true, .arg_index = num_sgprs },
|
||||
{ .used = true, .arg_index = num_sgprs + 1 },
|
||||
{ .used = true, .arg_index = num_sgprs + 4 },
|
||||
};
|
||||
LLVMValueRef vtx_in[6], vtx_out[6];
|
||||
LLVMValueRef prim_id, rotate;
|
||||
|
||||
if (ctx->screen->info.chip_class >= GFX9) {
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
vtx_in[i*2] = si_unpack_param(ctx, gfx9_vtx_params[i], 0, 16);
|
||||
vtx_in[i*2+1] = si_unpack_param(ctx, gfx9_vtx_params[i], 16, 16);
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0; i < 6; i++)
|
||||
vtx_in[i] = ac_get_arg(&ctx->ac, gfx6_vtx_params[i]);
|
||||
}
|
||||
|
||||
prim_id = LLVMGetParam(func, num_sgprs + 2);
|
||||
rotate = LLVMBuildTrunc(builder, prim_id, ctx->i1, "");
|
||||
|
||||
for (unsigned i = 0; i < 6; ++i) {
|
||||
LLVMValueRef base, rotated;
|
||||
base = vtx_in[i];
|
||||
rotated = vtx_in[(i + 4) % 6];
|
||||
vtx_out[i] = LLVMBuildSelect(builder, rotate, rotated, base, "");
|
||||
}
|
||||
|
||||
if (ctx->screen->info.chip_class >= GFX9) {
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
LLVMValueRef hi, out;
|
||||
|
||||
hi = LLVMBuildShl(builder, vtx_out[i*2+1],
|
||||
LLVMConstInt(ctx->i32, 16, 0), "");
|
||||
out = LLVMBuildOr(builder, vtx_out[i*2], hi, "");
|
||||
out = ac_to_float(&ctx->ac, out);
|
||||
ret = LLVMBuildInsertValue(builder, ret, out,
|
||||
gfx9_vtx_params[i].arg_index, "");
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0; i < 6; i++) {
|
||||
LLVMValueRef out;
|
||||
|
||||
out = ac_to_float(&ctx->ac, vtx_out[i]);
|
||||
ret = LLVMBuildInsertValue(builder, ret, out,
|
||||
gfx6_vtx_params[i].arg_index, "");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LLVMBuildRet(builder, ret);
|
||||
}
|
||||
|
||||
void si_llvm_init_gs_callbacks(struct si_shader_context *ctx)
|
||||
{
|
||||
ctx->abi.load_inputs = si_nir_load_input_gs;
|
||||
ctx->abi.emit_vertex = si_llvm_emit_vertex;
|
||||
ctx->abi.emit_primitive = si_llvm_emit_primitive;
|
||||
ctx->abi.emit_outputs = si_llvm_emit_gs_epilogue;
|
||||
}
|
||||
Reference in New Issue
Block a user