llvmpipe: Do not use coroutines when they are unnecessary
Speeds up compilation and it should also run faster. Reviewed-by: Mike Blumenkrantz <michael.blumenkrantz@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32963>
This commit is contained in:
committed by
Marge Bot
parent
28c594701e
commit
92083fc70d
@@ -327,6 +327,7 @@ generate_compute(struct llvmpipe_context *lp,
|
|||||||
LLVMValueRef grid_x_arg, grid_y_arg, grid_z_arg;
|
LLVMValueRef grid_x_arg, grid_y_arg, grid_z_arg;
|
||||||
LLVMValueRef grid_size_x_arg, grid_size_y_arg, grid_size_z_arg;
|
LLVMValueRef grid_size_x_arg, grid_size_y_arg, grid_size_z_arg;
|
||||||
LLVMValueRef work_dim_arg, draw_id_arg, thread_data_ptr, io_ptr;
|
LLVMValueRef work_dim_arg, draw_id_arg, thread_data_ptr, io_ptr;
|
||||||
|
LLVMValueRef num_subgroup_loop, partials, subgroup_id, coro_mem;
|
||||||
LLVMBasicBlockRef block;
|
LLVMBasicBlockRef block;
|
||||||
LLVMBuilderRef builder;
|
LLVMBuilderRef builder;
|
||||||
struct lp_build_sampler_soa *sampler;
|
struct lp_build_sampler_soa *sampler;
|
||||||
@@ -337,6 +338,8 @@ generate_compute(struct llvmpipe_context *lp,
|
|||||||
bool is_mesh = nir->info.stage == MESA_SHADER_MESH;
|
bool is_mesh = nir->info.stage == MESA_SHADER_MESH;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
|
bool use_coro = nir->info.uses_memory_barrier || is_mesh;
|
||||||
|
|
||||||
LLVMValueRef output_array = NULL;
|
LLVMValueRef output_array = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -392,9 +395,13 @@ generate_compute(struct llvmpipe_context *lp,
|
|||||||
function = LLVMAddFunction(gallivm->module, func_name, func_type);
|
function = LLVMAddFunction(gallivm->module, func_name, func_type);
|
||||||
LLVMSetFunctionCallConv(function, LLVMCCallConv);
|
LLVMSetFunctionCallConv(function, LLVMCCallConv);
|
||||||
|
|
||||||
coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type);
|
if (use_coro) {
|
||||||
LLVMSetFunctionCallConv(coro, LLVMCCallConv);
|
coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type);
|
||||||
lp_build_coro_add_presplit(coro);
|
LLVMSetFunctionCallConv(coro, LLVMCCallConv);
|
||||||
|
lp_build_coro_add_presplit(coro);
|
||||||
|
} else {
|
||||||
|
coro = function;
|
||||||
|
}
|
||||||
|
|
||||||
variant->function = function;
|
variant->function = function;
|
||||||
variant->function_name = MALLOC(strlen(func_name)+1);
|
variant->function_name = MALLOC(strlen(func_name)+1);
|
||||||
@@ -403,7 +410,8 @@ generate_compute(struct llvmpipe_context *lp,
|
|||||||
|
|
||||||
for (i = 0; i < CS_ARG_MAX - !is_mesh; ++i) {
|
for (i = 0; i < CS_ARG_MAX - !is_mesh; ++i) {
|
||||||
if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
|
if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
|
||||||
lp_add_function_attr(coro, i + 1, LP_FUNC_ATTR_NOALIAS);
|
if (use_coro)
|
||||||
|
lp_add_function_attr(coro, i + 1, LP_FUNC_ATTR_NOALIAS);
|
||||||
if (i < CS_ARG_OUTER_COUNT)
|
if (i < CS_ARG_OUTER_COUNT)
|
||||||
lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
|
lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
|
||||||
}
|
}
|
||||||
@@ -411,7 +419,8 @@ generate_compute(struct llvmpipe_context *lp,
|
|||||||
|
|
||||||
if (variant->gallivm->cache->data_size) {
|
if (variant->gallivm->cache->data_size) {
|
||||||
gallivm_stub_func(gallivm, function);
|
gallivm_stub_func(gallivm, function);
|
||||||
gallivm_stub_func(gallivm, coro);
|
if (use_coro)
|
||||||
|
gallivm_stub_func(gallivm, coro);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -560,140 +569,161 @@ generate_compute(struct llvmpipe_context *lp,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
|
LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
|
||||||
builder = gallivm->builder;
|
|
||||||
assert(builder);
|
|
||||||
LLVMPositionBuilderAtEnd(builder, block);
|
|
||||||
|
|
||||||
if (is_mesh) {
|
|
||||||
LLVMTypeRef output_type = create_mesh_jit_output_type_deref(gallivm);
|
|
||||||
output_array = lp_build_array_alloca(gallivm, output_type, lp_build_const_int32(gallivm, align(MAX2(nir->info.mesh.max_primitives_out, nir->info.mesh.max_vertices_out), 8)), "outputs");
|
|
||||||
}
|
|
||||||
|
|
||||||
struct lp_build_loop_state loop_state[2];
|
|
||||||
|
|
||||||
LLVMValueRef vec_length = lp_build_const_int32(gallivm, cs_type.length);
|
LLVMValueRef vec_length = lp_build_const_int32(gallivm, cs_type.length);
|
||||||
|
|
||||||
LLVMValueRef invocation_count = LLVMBuildMul(gallivm->builder, block_x_size_arg, block_y_size_arg, "");
|
if (use_coro) {
|
||||||
invocation_count = LLVMBuildMul(gallivm->builder, invocation_count, block_z_size_arg, "");
|
block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
|
||||||
|
builder = gallivm->builder;
|
||||||
|
assert(builder);
|
||||||
|
LLVMPositionBuilderAtEnd(builder, block);
|
||||||
|
|
||||||
LLVMValueRef partials = LLVMBuildURem(gallivm->builder, invocation_count, vec_length, "");
|
if (is_mesh) {
|
||||||
|
LLVMTypeRef output_type = create_mesh_jit_output_type_deref(gallivm);
|
||||||
|
output_array = lp_build_array_alloca(gallivm, output_type, lp_build_const_int32(gallivm, align(MAX2(nir->info.mesh.max_primitives_out, nir->info.mesh.max_vertices_out), 8)), "outputs");
|
||||||
|
}
|
||||||
|
|
||||||
LLVMValueRef num_subgroup_loop = LLVMBuildAdd(gallivm->builder, invocation_count, lp_build_const_int32(gallivm, cs_type.length - 1), "");
|
struct lp_build_loop_state loop_state[2];
|
||||||
num_subgroup_loop = LLVMBuildUDiv(gallivm->builder, num_subgroup_loop, vec_length, "");
|
|
||||||
|
|
||||||
/* build a ptr in memory to store all the frames in later. */
|
LLVMValueRef invocation_count = LLVMBuildMul(gallivm->builder, block_x_size_arg, block_y_size_arg, "");
|
||||||
LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
|
invocation_count = LLVMBuildMul(gallivm->builder, invocation_count, block_z_size_arg, "");
|
||||||
LLVMValueRef coro_mem = LLVMBuildAlloca(gallivm->builder, hdl_ptr_type, "coro_mem");
|
|
||||||
LLVMBuildStore(builder, LLVMConstNull(hdl_ptr_type), coro_mem);
|
|
||||||
|
|
||||||
LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, num_subgroup_loop, "coro_hdls");
|
partials = LLVMBuildURem(gallivm->builder, invocation_count, vec_length, "");
|
||||||
|
|
||||||
unsigned end_coroutine = INT_MAX;
|
num_subgroup_loop = LLVMBuildAdd(gallivm->builder, invocation_count, lp_build_const_int32(gallivm, cs_type.length - 1), "");
|
||||||
|
num_subgroup_loop = LLVMBuildUDiv(gallivm->builder, num_subgroup_loop, vec_length, "");
|
||||||
|
|
||||||
/*
|
/* build a ptr in memory to store all the frames in later. */
|
||||||
* This is the main coroutine execution loop. It iterates over the dimensions
|
coro_mem = LLVMBuildAlloca(gallivm->builder, hdl_ptr_type, "coro_mem");
|
||||||
* and calls the coroutine main entrypoint on the first pass, but in subsequent
|
LLVMBuildStore(builder, LLVMConstNull(hdl_ptr_type), coro_mem);
|
||||||
* passes it checks if the coroutine has completed and resumes it if not.
|
|
||||||
*/
|
|
||||||
lp_build_loop_begin(&loop_state[1], gallivm,
|
|
||||||
lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */
|
|
||||||
lp_build_loop_begin(&loop_state[0], gallivm,
|
|
||||||
lp_build_const_int32(gallivm, 0)); /* subgroup loop */
|
|
||||||
{
|
|
||||||
LLVMValueRef args[CS_ARG_MAX];
|
|
||||||
args[CS_ARG_CONTEXT] = context_ptr;
|
|
||||||
args[CS_ARG_RESOURCES] = resources_ptr;
|
|
||||||
args[CS_ARG_BLOCK_X_SIZE] = LLVMGetUndef(int32_type);
|
|
||||||
args[CS_ARG_BLOCK_Y_SIZE] = LLVMGetUndef(int32_type);
|
|
||||||
args[CS_ARG_BLOCK_Z_SIZE] = LLVMGetUndef(int32_type);
|
|
||||||
args[CS_ARG_GRID_X] = grid_x_arg;
|
|
||||||
args[CS_ARG_GRID_Y] = grid_y_arg;
|
|
||||||
args[CS_ARG_GRID_Z] = grid_z_arg;
|
|
||||||
args[CS_ARG_GRID_SIZE_X] = grid_size_x_arg;
|
|
||||||
args[CS_ARG_GRID_SIZE_Y] = grid_size_y_arg;
|
|
||||||
args[CS_ARG_GRID_SIZE_Z] = grid_size_z_arg;
|
|
||||||
args[CS_ARG_WORK_DIM] = work_dim_arg;
|
|
||||||
args[CS_ARG_DRAW_ID] = draw_id_arg;
|
|
||||||
args[CS_ARG_VERTEX_DATA] = io_ptr;
|
|
||||||
args[CS_ARG_PER_THREAD_DATA] = thread_data_ptr;
|
|
||||||
args[CS_ARG_CORO_SUBGROUP_COUNT] = num_subgroup_loop;
|
|
||||||
args[CS_ARG_CORO_PARTIALS] = partials;
|
|
||||||
args[CS_ARG_CORO_BLOCK_X_SIZE] = block_x_size_arg;
|
|
||||||
args[CS_ARG_CORO_BLOCK_Y_SIZE] = block_y_size_arg;
|
|
||||||
args[CS_ARG_CORO_BLOCK_Z_SIZE] = block_z_size_arg;
|
|
||||||
|
|
||||||
args[CS_ARG_CORO_IDX] = loop_state[0].counter;
|
LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, num_subgroup_loop, "coro_hdls");
|
||||||
|
|
||||||
args[CS_ARG_CORO_MEM] = coro_mem;
|
unsigned end_coroutine = INT_MAX;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is the main coroutine execution loop. It iterates over the dimensions
|
||||||
|
* and calls the coroutine main entrypoint on the first pass, but in subsequent
|
||||||
|
* passes it checks if the coroutine has completed and resumes it if not.
|
||||||
|
*/
|
||||||
|
lp_build_loop_begin(&loop_state[1], gallivm,
|
||||||
|
lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */
|
||||||
|
lp_build_loop_begin(&loop_state[0], gallivm,
|
||||||
|
lp_build_const_int32(gallivm, 0)); /* subgroup loop */
|
||||||
|
{
|
||||||
|
LLVMValueRef args[CS_ARG_MAX];
|
||||||
|
args[CS_ARG_CONTEXT] = context_ptr;
|
||||||
|
args[CS_ARG_RESOURCES] = resources_ptr;
|
||||||
|
args[CS_ARG_BLOCK_X_SIZE] = LLVMGetUndef(int32_type);
|
||||||
|
args[CS_ARG_BLOCK_Y_SIZE] = LLVMGetUndef(int32_type);
|
||||||
|
args[CS_ARG_BLOCK_Z_SIZE] = LLVMGetUndef(int32_type);
|
||||||
|
args[CS_ARG_GRID_X] = grid_x_arg;
|
||||||
|
args[CS_ARG_GRID_Y] = grid_y_arg;
|
||||||
|
args[CS_ARG_GRID_Z] = grid_z_arg;
|
||||||
|
args[CS_ARG_GRID_SIZE_X] = grid_size_x_arg;
|
||||||
|
args[CS_ARG_GRID_SIZE_Y] = grid_size_y_arg;
|
||||||
|
args[CS_ARG_GRID_SIZE_Z] = grid_size_z_arg;
|
||||||
|
args[CS_ARG_WORK_DIM] = work_dim_arg;
|
||||||
|
args[CS_ARG_DRAW_ID] = draw_id_arg;
|
||||||
|
args[CS_ARG_VERTEX_DATA] = io_ptr;
|
||||||
|
args[CS_ARG_PER_THREAD_DATA] = thread_data_ptr;
|
||||||
|
args[CS_ARG_CORO_SUBGROUP_COUNT] = num_subgroup_loop;
|
||||||
|
args[CS_ARG_CORO_PARTIALS] = partials;
|
||||||
|
args[CS_ARG_CORO_BLOCK_X_SIZE] = block_x_size_arg;
|
||||||
|
args[CS_ARG_CORO_BLOCK_Y_SIZE] = block_y_size_arg;
|
||||||
|
args[CS_ARG_CORO_BLOCK_Z_SIZE] = block_z_size_arg;
|
||||||
|
|
||||||
|
args[CS_ARG_CORO_IDX] = loop_state[0].counter;
|
||||||
|
|
||||||
|
args[CS_ARG_CORO_MEM] = coro_mem;
|
||||||
|
|
||||||
|
if (is_mesh)
|
||||||
|
args[CS_ARG_CORO_OUTPUTS] = output_array;
|
||||||
|
|
||||||
|
LLVMValueRef coro_entry = LLVMBuildGEP2(gallivm->builder, hdl_ptr_type, coro_hdls, &loop_state[0].counter, 1, "");
|
||||||
|
|
||||||
|
LLVMValueRef coro_hdl = LLVMBuildLoad2(gallivm->builder, hdl_ptr_type, coro_entry, "coro_hdl");
|
||||||
|
|
||||||
|
struct lp_build_if_state ifstate;
|
||||||
|
LLVMValueRef cmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, loop_state[1].counter,
|
||||||
|
lp_build_const_int32(gallivm, 0), "");
|
||||||
|
/* first time here - call the coroutine function entry point */
|
||||||
|
lp_build_if(&ifstate, gallivm, cmp);
|
||||||
|
LLVMValueRef coro_ret = LLVMBuildCall2(gallivm->builder, coro_func_type, coro, args, CS_ARG_MAX - !is_mesh, "");
|
||||||
|
LLVMBuildStore(gallivm->builder, coro_ret, coro_entry);
|
||||||
|
lp_build_else(&ifstate);
|
||||||
|
/* subsequent calls for this invocation - check if done. */
|
||||||
|
LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl);
|
||||||
|
struct lp_build_if_state ifstate2;
|
||||||
|
lp_build_if(&ifstate2, gallivm, coro_done);
|
||||||
|
/* if done destroy and force loop exit */
|
||||||
|
lp_build_coro_destroy(gallivm, coro_hdl);
|
||||||
|
lp_build_loop_force_set_counter(&loop_state[1], lp_build_const_int32(gallivm, end_coroutine - 1));
|
||||||
|
lp_build_else(&ifstate2);
|
||||||
|
/* otherwise resume the coroutine */
|
||||||
|
lp_build_coro_resume(gallivm, coro_hdl);
|
||||||
|
lp_build_endif(&ifstate2);
|
||||||
|
lp_build_endif(&ifstate);
|
||||||
|
lp_build_loop_force_reload_counter(&loop_state[1]);
|
||||||
|
}
|
||||||
|
lp_build_loop_end_cond(&loop_state[0],
|
||||||
|
num_subgroup_loop,
|
||||||
|
NULL, LLVMIntUGE);
|
||||||
|
lp_build_loop_end_cond(&loop_state[1],
|
||||||
|
lp_build_const_int32(gallivm, end_coroutine),
|
||||||
|
NULL, LLVMIntEQ);
|
||||||
|
|
||||||
|
LLVMValueRef coro_mem_ptr = LLVMBuildLoad2(builder, hdl_ptr_type, coro_mem, "");
|
||||||
|
LLVMTypeRef mem_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
|
||||||
|
LLVMTypeRef free_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), &mem_ptr_type, 1, 0);
|
||||||
|
LLVMBuildCall2(gallivm->builder, free_type, gallivm->coro_free_hook, &coro_mem_ptr, 1, "");
|
||||||
|
|
||||||
|
LLVMBuildRetVoid(builder);
|
||||||
|
|
||||||
|
/* This is stage (b) - generate the compute shader code inside the coroutine. */
|
||||||
|
context_ptr = LLVMGetParam(coro, CS_ARG_CONTEXT);
|
||||||
|
resources_ptr = LLVMGetParam(coro, CS_ARG_RESOURCES);
|
||||||
|
grid_x_arg = LLVMGetParam(coro, CS_ARG_GRID_X);
|
||||||
|
grid_y_arg = LLVMGetParam(coro, CS_ARG_GRID_Y);
|
||||||
|
grid_z_arg = LLVMGetParam(coro, CS_ARG_GRID_Z);
|
||||||
|
grid_size_x_arg = LLVMGetParam(coro, CS_ARG_GRID_SIZE_X);
|
||||||
|
grid_size_y_arg = LLVMGetParam(coro, CS_ARG_GRID_SIZE_Y);
|
||||||
|
grid_size_z_arg = LLVMGetParam(coro, CS_ARG_GRID_SIZE_Z);
|
||||||
|
work_dim_arg = LLVMGetParam(coro, CS_ARG_WORK_DIM);
|
||||||
|
draw_id_arg = LLVMGetParam(coro, CS_ARG_DRAW_ID);
|
||||||
|
io_ptr = LLVMGetParam(coro, CS_ARG_VERTEX_DATA);
|
||||||
|
thread_data_ptr = LLVMGetParam(coro, CS_ARG_PER_THREAD_DATA);
|
||||||
|
num_subgroup_loop = LLVMGetParam(coro, CS_ARG_CORO_SUBGROUP_COUNT);
|
||||||
|
partials = LLVMGetParam(coro, CS_ARG_CORO_PARTIALS);
|
||||||
|
block_x_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_X_SIZE);
|
||||||
|
block_y_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_Y_SIZE);
|
||||||
|
block_z_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_Z_SIZE);
|
||||||
|
subgroup_id = LLVMGetParam(coro, CS_ARG_CORO_IDX);
|
||||||
|
coro_mem = LLVMGetParam(coro, CS_ARG_CORO_MEM);
|
||||||
if (is_mesh)
|
if (is_mesh)
|
||||||
args[CS_ARG_CORO_OUTPUTS] = output_array;
|
output_array = LLVMGetParam(coro, CS_ARG_CORO_OUTPUTS);
|
||||||
|
|
||||||
LLVMValueRef coro_entry = LLVMBuildGEP2(gallivm->builder, hdl_ptr_type, coro_hdls, &loop_state[0].counter, 1, "");
|
|
||||||
|
|
||||||
LLVMValueRef coro_hdl = LLVMBuildLoad2(gallivm->builder, hdl_ptr_type, coro_entry, "coro_hdl");
|
|
||||||
|
|
||||||
struct lp_build_if_state ifstate;
|
|
||||||
LLVMValueRef cmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, loop_state[1].counter,
|
|
||||||
lp_build_const_int32(gallivm, 0), "");
|
|
||||||
/* first time here - call the coroutine function entry point */
|
|
||||||
lp_build_if(&ifstate, gallivm, cmp);
|
|
||||||
LLVMValueRef coro_ret = LLVMBuildCall2(gallivm->builder, coro_func_type, coro, args, CS_ARG_MAX - !is_mesh, "");
|
|
||||||
LLVMBuildStore(gallivm->builder, coro_ret, coro_entry);
|
|
||||||
lp_build_else(&ifstate);
|
|
||||||
/* subsequent calls for this invocation - check if done. */
|
|
||||||
LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl);
|
|
||||||
struct lp_build_if_state ifstate2;
|
|
||||||
lp_build_if(&ifstate2, gallivm, coro_done);
|
|
||||||
/* if done destroy and force loop exit */
|
|
||||||
lp_build_coro_destroy(gallivm, coro_hdl);
|
|
||||||
lp_build_loop_force_set_counter(&loop_state[1], lp_build_const_int32(gallivm, end_coroutine - 1));
|
|
||||||
lp_build_else(&ifstate2);
|
|
||||||
/* otherwise resume the coroutine */
|
|
||||||
lp_build_coro_resume(gallivm, coro_hdl);
|
|
||||||
lp_build_endif(&ifstate2);
|
|
||||||
lp_build_endif(&ifstate);
|
|
||||||
lp_build_loop_force_reload_counter(&loop_state[1]);
|
|
||||||
}
|
}
|
||||||
lp_build_loop_end_cond(&loop_state[0],
|
|
||||||
num_subgroup_loop,
|
|
||||||
NULL, LLVMIntUGE);
|
|
||||||
lp_build_loop_end_cond(&loop_state[1],
|
|
||||||
lp_build_const_int32(gallivm, end_coroutine),
|
|
||||||
NULL, LLVMIntEQ);
|
|
||||||
|
|
||||||
LLVMValueRef coro_mem_ptr = LLVMBuildLoad2(builder, hdl_ptr_type, coro_mem, "");
|
|
||||||
LLVMTypeRef mem_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
|
|
||||||
LLVMTypeRef free_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), &mem_ptr_type, 1, 0);
|
|
||||||
LLVMBuildCall2(gallivm->builder, free_type, gallivm->coro_free_hook, &coro_mem_ptr, 1, "");
|
|
||||||
|
|
||||||
LLVMBuildRetVoid(builder);
|
|
||||||
|
|
||||||
/* This is stage (b) - generate the compute shader code inside the coroutine. */
|
|
||||||
context_ptr = LLVMGetParam(coro, CS_ARG_CONTEXT);
|
|
||||||
resources_ptr = LLVMGetParam(coro, CS_ARG_RESOURCES);
|
|
||||||
grid_x_arg = LLVMGetParam(coro, CS_ARG_GRID_X);
|
|
||||||
grid_y_arg = LLVMGetParam(coro, CS_ARG_GRID_Y);
|
|
||||||
grid_z_arg = LLVMGetParam(coro, CS_ARG_GRID_Z);
|
|
||||||
grid_size_x_arg = LLVMGetParam(coro, CS_ARG_GRID_SIZE_X);
|
|
||||||
grid_size_y_arg = LLVMGetParam(coro, CS_ARG_GRID_SIZE_Y);
|
|
||||||
grid_size_z_arg = LLVMGetParam(coro, CS_ARG_GRID_SIZE_Z);
|
|
||||||
work_dim_arg = LLVMGetParam(coro, CS_ARG_WORK_DIM);
|
|
||||||
draw_id_arg = LLVMGetParam(coro, CS_ARG_DRAW_ID);
|
|
||||||
io_ptr = LLVMGetParam(coro, CS_ARG_VERTEX_DATA);
|
|
||||||
thread_data_ptr = LLVMGetParam(coro, CS_ARG_PER_THREAD_DATA);
|
|
||||||
num_subgroup_loop = LLVMGetParam(coro, CS_ARG_CORO_SUBGROUP_COUNT);
|
|
||||||
partials = LLVMGetParam(coro, CS_ARG_CORO_PARTIALS);
|
|
||||||
block_x_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_X_SIZE);
|
|
||||||
block_y_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_Y_SIZE);
|
|
||||||
block_z_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_Z_SIZE);
|
|
||||||
LLVMValueRef subgroup_id = LLVMGetParam(coro, CS_ARG_CORO_IDX);
|
|
||||||
coro_mem = LLVMGetParam(coro, CS_ARG_CORO_MEM);
|
|
||||||
if (is_mesh)
|
|
||||||
output_array = LLVMGetParam(coro, CS_ARG_CORO_OUTPUTS);
|
|
||||||
block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "entry");
|
block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "entry");
|
||||||
|
builder = gallivm->builder;
|
||||||
LLVMPositionBuilderAtEnd(builder, block);
|
LLVMPositionBuilderAtEnd(builder, block);
|
||||||
|
|
||||||
|
struct lp_build_loop_state loop_state;
|
||||||
|
|
||||||
|
if (!use_coro) {
|
||||||
|
LLVMValueRef invocation_count = LLVMBuildMul(gallivm->builder, block_x_size_arg, block_y_size_arg, "");
|
||||||
|
invocation_count = LLVMBuildMul(gallivm->builder, invocation_count, block_z_size_arg, "");
|
||||||
|
|
||||||
|
partials = LLVMBuildURem(gallivm->builder, invocation_count, vec_length, "");
|
||||||
|
|
||||||
|
num_subgroup_loop = LLVMBuildAdd(gallivm->builder, invocation_count, lp_build_const_int32(gallivm, cs_type.length - 1), "");
|
||||||
|
num_subgroup_loop = LLVMBuildUDiv(gallivm->builder, num_subgroup_loop, vec_length, "");
|
||||||
|
|
||||||
|
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
|
||||||
|
|
||||||
|
subgroup_id = loop_state.counter;
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
LLVMValueRef consts_ptr;
|
LLVMValueRef consts_ptr;
|
||||||
LLVMValueRef ssbo_ptr;
|
LLVMValueRef ssbo_ptr;
|
||||||
@@ -718,12 +748,16 @@ generate_compute(struct llvmpipe_context *lp,
|
|||||||
thread_data_ptr);
|
thread_data_ptr);
|
||||||
|
|
||||||
/* these are coroutine entrypoint necessities */
|
/* these are coroutine entrypoint necessities */
|
||||||
LLVMValueRef coro_id = lp_build_coro_id(gallivm);
|
LLVMValueRef coro_hdl = NULL;
|
||||||
LLVMValueRef coro_entry = lp_build_coro_alloc_mem_array(gallivm, coro_mem, subgroup_id, num_subgroup_loop);
|
if (use_coro) {
|
||||||
LLVMTypeRef mem_ptr_type = LLVMInt8TypeInContext(gallivm->context);
|
LLVMValueRef coro_id = lp_build_coro_id(gallivm);
|
||||||
LLVMValueRef alloced_ptr = LLVMBuildLoad2(gallivm->builder, hdl_ptr_type, coro_mem, "");
|
LLVMValueRef coro_entry = lp_build_coro_alloc_mem_array(gallivm, coro_mem, subgroup_id, num_subgroup_loop);
|
||||||
alloced_ptr = LLVMBuildGEP2(gallivm->builder, mem_ptr_type, alloced_ptr, &coro_entry, 1, "");
|
LLVMTypeRef mem_ptr_type = LLVMInt8TypeInContext(gallivm->context);
|
||||||
LLVMValueRef coro_hdl = lp_build_coro_begin(gallivm, coro_id, alloced_ptr);
|
LLVMValueRef alloced_ptr = LLVMBuildLoad2(gallivm->builder, hdl_ptr_type, coro_mem, "");
|
||||||
|
alloced_ptr = LLVMBuildGEP2(gallivm->builder, mem_ptr_type, alloced_ptr, &coro_entry, 1, "");
|
||||||
|
coro_hdl = lp_build_coro_begin(gallivm, coro_id, alloced_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
LLVMValueRef has_partials = LLVMBuildICmp(gallivm->builder, LLVMIntNE, partials, lp_build_const_int32(gallivm, 0), "");
|
LLVMValueRef has_partials = LLVMBuildICmp(gallivm->builder, LLVMIntNE, partials, lp_build_const_int32(gallivm, 0), "");
|
||||||
|
|
||||||
struct lp_build_context bld;
|
struct lp_build_context bld;
|
||||||
@@ -798,13 +832,11 @@ generate_compute(struct llvmpipe_context *lp,
|
|||||||
mask_val = LLVMBuildLoad2(gallivm->builder, mask_type, mask_val, "");
|
mask_val = LLVMBuildLoad2(gallivm->builder, mask_type, mask_val, "");
|
||||||
lp_build_mask_begin(&mask, gallivm, cs_type, mask_val);
|
lp_build_mask_begin(&mask, gallivm, cs_type, mask_val);
|
||||||
|
|
||||||
struct lp_build_coro_suspend_info coro_info;
|
struct lp_build_coro_suspend_info coro_info = {0};
|
||||||
|
if (use_coro) {
|
||||||
LLVMBasicBlockRef sus_block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "suspend");
|
coro_info.suspend = LLVMAppendBasicBlockInContext(gallivm->context, coro, "suspend");
|
||||||
LLVMBasicBlockRef clean_block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "cleanup");
|
coro_info.cleanup = LLVMAppendBasicBlockInContext(gallivm->context, coro, "cleanup");
|
||||||
|
}
|
||||||
coro_info.suspend = sus_block;
|
|
||||||
coro_info.cleanup = clean_block;
|
|
||||||
|
|
||||||
if (is_mesh) {
|
if (is_mesh) {
|
||||||
LLVMValueRef vertex_count = lp_build_alloca(gallivm, LLVMInt32TypeInContext(gallivm->context), "vertex_count");
|
LLVMValueRef vertex_count = lp_build_alloca(gallivm, LLVMInt32TypeInContext(gallivm->context), "vertex_count");
|
||||||
@@ -908,16 +940,23 @@ generate_compute(struct llvmpipe_context *lp,
|
|||||||
NULL, LLVMIntUGE);
|
NULL, LLVMIntUGE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!use_coro)
|
||||||
|
lp_build_loop_end_cond(&loop_state, num_subgroup_loop, NULL, LLVMIntUGE);
|
||||||
|
|
||||||
mask_val = lp_build_mask_end(&mask);
|
mask_val = lp_build_mask_end(&mask);
|
||||||
|
|
||||||
lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true);
|
if (use_coro) {
|
||||||
LLVMPositionBuilderAtEnd(builder, clean_block);
|
lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true);
|
||||||
|
LLVMPositionBuilderAtEnd(builder, coro_info.cleanup);
|
||||||
|
|
||||||
LLVMBuildBr(builder, sus_block);
|
LLVMBuildBr(builder, coro_info.suspend);
|
||||||
LLVMPositionBuilderAtEnd(builder, sus_block);
|
LLVMPositionBuilderAtEnd(builder, coro_info.suspend);
|
||||||
|
|
||||||
lp_build_coro_end(gallivm, coro_hdl);
|
lp_build_coro_end(gallivm, coro_hdl);
|
||||||
LLVMBuildRet(builder, coro_hdl);
|
LLVMBuildRet(builder, coro_hdl);
|
||||||
|
} else {
|
||||||
|
LLVMBuildRetVoid(builder);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lp_bld_llvm_sampler_soa_destroy(sampler);
|
lp_bld_llvm_sampler_soa_destroy(sampler);
|
||||||
|
|||||||
Reference in New Issue
Block a user