gallivm: Only guard tex/image ops if the exec mask can be zero
Reviewed-by: Mike Blumenkrantz <michael.blumenkrantz@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32963>
This commit is contained in:
committed by
Marge Bot
parent
d9db40208d
commit
bd88edcf81
@@ -156,23 +156,25 @@ lp_bld_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base
|
||||
LLVMTypeRef out_residency_type = lp_build_vec_type(gallivm, lp_int_type(params->type));
|
||||
|
||||
LLVMValueRef out_data[5];
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(out_data) - 1; i++) {
|
||||
out_data[i] = lp_build_alloca(gallivm, out_data_type, "");
|
||||
}
|
||||
out_data[ARRAY_SIZE(out_data) - 1] = lp_build_alloca(gallivm, out_residency_type, "");
|
||||
|
||||
struct lp_type uint_type = lp_uint_type(params->type);
|
||||
LLVMValueRef uint_zero = lp_build_const_int_vec(gallivm, uint_type, 0);
|
||||
|
||||
LLVMValueRef bitmask = LLVMBuildICmp(builder, LLVMIntNE, params->exec_mask, uint_zero, "exec_bitvec");
|
||||
|
||||
LLVMTypeRef bitmask_type = LLVMIntTypeInContext(gallivm->context, uint_type.length);
|
||||
bitmask = LLVMBuildBitCast(builder, bitmask, bitmask_type, "exec_bitmask");
|
||||
|
||||
LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, bitmask, LLVMConstInt(bitmask_type, 0, false), "any_active");
|
||||
|
||||
struct lp_build_if_state if_state;
|
||||
lp_build_if(&if_state, gallivm, any_active);
|
||||
if (!params->exec_mask_nz) {
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(out_data) - 1; i++) {
|
||||
out_data[i] = lp_build_alloca(gallivm, out_data_type, "");
|
||||
}
|
||||
out_data[ARRAY_SIZE(out_data) - 1] = lp_build_alloca(gallivm, out_residency_type, "");
|
||||
|
||||
struct lp_type uint_type = lp_uint_type(params->type);
|
||||
LLVMValueRef uint_zero = lp_build_const_int_vec(gallivm, uint_type, 0);
|
||||
|
||||
LLVMValueRef bitmask = LLVMBuildICmp(builder, LLVMIntNE, params->exec_mask, uint_zero, "exec_bitvec");
|
||||
|
||||
LLVMTypeRef bitmask_type = LLVMIntTypeInContext(gallivm->context, uint_type.length);
|
||||
bitmask = LLVMBuildBitCast(builder, bitmask, bitmask_type, "exec_bitmask");
|
||||
|
||||
LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, bitmask, LLVMConstInt(bitmask_type, 0, false), "any_active");
|
||||
|
||||
lp_build_if(&if_state, gallivm, any_active);
|
||||
}
|
||||
|
||||
enum lp_sampler_op_type op_type = (params->sample_key & LP_SAMPLER_OP_TYPE_MASK) >> LP_SAMPLER_OP_TYPE_SHIFT;
|
||||
uint32_t functions_offset = op_type == LP_SAMPLER_OP_FETCH ? offsetof(struct lp_texture_functions, fetch_functions)
|
||||
@@ -265,15 +267,18 @@ lp_bld_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base
|
||||
if (params->type.length != lp_native_vector_width / 32)
|
||||
params->texel[i] = truncate_to_type_width(gallivm, params->texel[i], params->type);
|
||||
|
||||
LLVMBuildStore(builder, params->texel[i], out_data[i]);
|
||||
if (!params->exec_mask_nz)
|
||||
LLVMBuildStore(builder, params->texel[i], out_data[i]);
|
||||
}
|
||||
|
||||
lp_build_endif(&if_state);
|
||||
if (!params->exec_mask_nz) {
|
||||
lp_build_endif(&if_state);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(out_data) - 1; i++)
|
||||
params->texel[i] = LLVMBuildLoad2(builder, out_data_type, out_data[i], "");
|
||||
params->texel[ARRAY_SIZE(out_data) - 1] =
|
||||
LLVMBuildLoad2(builder, out_residency_type, out_data[ARRAY_SIZE(out_data) - 1], "");
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(out_data) - 1; i++)
|
||||
params->texel[i] = LLVMBuildLoad2(builder, out_data_type, out_data[i], "");
|
||||
params->texel[ARRAY_SIZE(out_data) - 1] =
|
||||
LLVMBuildLoad2(builder, out_residency_type, out_data[ARRAY_SIZE(out_data) - 1], "");
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
@@ -330,22 +335,24 @@ lp_bld_llvm_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base,
|
||||
LLVMTypeRef out_data_type = lp_build_vec_type(gallivm, params->int_type);
|
||||
|
||||
LLVMValueRef out_data[4];
|
||||
for (uint32_t i = 0; i < 4; i++) {
|
||||
out_data[i] = lp_build_alloca(gallivm, out_data_type, "");
|
||||
}
|
||||
|
||||
struct lp_type uint_type = lp_uint_type(params->int_type);
|
||||
LLVMValueRef uint_zero = lp_build_const_int_vec(gallivm, uint_type, 0);
|
||||
|
||||
LLVMValueRef bitmask = LLVMBuildICmp(builder, LLVMIntNE, params->exec_mask, uint_zero, "exec_bitvec");
|
||||
|
||||
LLVMTypeRef bitmask_type = LLVMIntTypeInContext(gallivm->context, uint_type.length);
|
||||
bitmask = LLVMBuildBitCast(builder, bitmask, bitmask_type, "exec_bitmask");
|
||||
|
||||
LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, bitmask, LLVMConstInt(bitmask_type, 0, false), "any_active");
|
||||
|
||||
struct lp_build_if_state if_state;
|
||||
lp_build_if(&if_state, gallivm, any_active);
|
||||
if (!params->exec_mask_nz) {
|
||||
for (uint32_t i = 0; i < 4; i++) {
|
||||
out_data[i] = lp_build_alloca(gallivm, out_data_type, "");
|
||||
}
|
||||
|
||||
struct lp_type uint_type = lp_uint_type(params->int_type);
|
||||
LLVMValueRef uint_zero = lp_build_const_int_vec(gallivm, uint_type, 0);
|
||||
|
||||
LLVMValueRef bitmask = LLVMBuildICmp(builder, LLVMIntNE, params->exec_mask, uint_zero, "exec_bitvec");
|
||||
|
||||
LLVMTypeRef bitmask_type = LLVMIntTypeInContext(gallivm->context, uint_type.length);
|
||||
bitmask = LLVMBuildBitCast(builder, bitmask, bitmask_type, "exec_bitmask");
|
||||
|
||||
LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, bitmask, LLVMConstInt(bitmask_type, 0, false), "any_active");
|
||||
|
||||
lp_build_if(&if_state, gallivm, any_active);
|
||||
}
|
||||
|
||||
uint32_t functions_offset = params->samples_only ? offsetof(struct lp_texture_functions, samples_function)
|
||||
: offsetof(struct lp_texture_functions, size_function);
|
||||
@@ -380,13 +387,16 @@ lp_bld_llvm_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base,
|
||||
if (params->int_type.length != lp_native_vector_width / 32)
|
||||
params->sizes_out[i] = truncate_to_type_width(gallivm, params->sizes_out[i], params->int_type);
|
||||
|
||||
LLVMBuildStore(builder, params->sizes_out[i], out_data[i]);
|
||||
if (!params->exec_mask_nz)
|
||||
LLVMBuildStore(builder, params->sizes_out[i], out_data[i]);
|
||||
}
|
||||
|
||||
lp_build_endif(&if_state);
|
||||
if (!params->exec_mask_nz) {
|
||||
lp_build_endif(&if_state);
|
||||
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
params->sizes_out[i] = LLVMBuildLoad2(gallivm->builder, out_data_type, out_data[i], "");
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
params->sizes_out[i] = LLVMBuildLoad2(gallivm->builder, out_data_type, out_data[i], "");
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
@@ -438,23 +448,25 @@ lp_bld_llvm_image_soa_emit_op(const struct lp_build_image_soa *base,
|
||||
LLVMTypeRef out_residency_type = lp_build_vec_type(gallivm, lp_int_type(texel_type));
|
||||
|
||||
LLVMValueRef out_data[5];
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(out_data) - 1; i++) {
|
||||
out_data[i] = lp_build_alloca(gallivm, out_data_type, "");
|
||||
}
|
||||
out_data[ARRAY_SIZE(out_data) - 1] = lp_build_alloca(gallivm, out_residency_type, "");
|
||||
|
||||
struct lp_type uint_type = lp_uint_type(params->type);
|
||||
LLVMValueRef uint_zero = lp_build_const_int_vec(gallivm, uint_type, 0);
|
||||
|
||||
LLVMValueRef bitmask = LLVMBuildICmp(builder, LLVMIntNE, params->exec_mask, uint_zero, "exec_bitvec");
|
||||
|
||||
LLVMTypeRef bitmask_type = LLVMIntTypeInContext(gallivm->context, uint_type.length);
|
||||
bitmask = LLVMBuildBitCast(builder, bitmask, bitmask_type, "exec_bitmask");
|
||||
|
||||
LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, bitmask, LLVMConstInt(bitmask_type, 0, false), "any_active");
|
||||
|
||||
struct lp_build_if_state if_state;
|
||||
lp_build_if(&if_state, gallivm, any_active);
|
||||
if (!params->exec_mask_nz) {
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(out_data) - 1; i++) {
|
||||
out_data[i] = lp_build_alloca(gallivm, out_data_type, "");
|
||||
}
|
||||
out_data[ARRAY_SIZE(out_data) - 1] = lp_build_alloca(gallivm, out_residency_type, "");
|
||||
|
||||
struct lp_type uint_type = lp_uint_type(params->type);
|
||||
LLVMValueRef uint_zero = lp_build_const_int_vec(gallivm, uint_type, 0);
|
||||
|
||||
LLVMValueRef bitmask = LLVMBuildICmp(builder, LLVMIntNE, params->exec_mask, uint_zero, "exec_bitvec");
|
||||
|
||||
LLVMTypeRef bitmask_type = LLVMIntTypeInContext(gallivm->context, uint_type.length);
|
||||
bitmask = LLVMBuildBitCast(builder, bitmask, bitmask_type, "exec_bitmask");
|
||||
|
||||
LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, bitmask, LLVMConstInt(bitmask_type, 0, false), "any_active");
|
||||
|
||||
lp_build_if(&if_state, gallivm, any_active);
|
||||
}
|
||||
|
||||
LLVMValueRef image_base_ptr = load_texture_functions_ptr(
|
||||
gallivm, params->resource, offsetof(struct lp_descriptor, functions),
|
||||
@@ -521,22 +533,25 @@ lp_bld_llvm_image_soa_emit_op(const struct lp_build_image_soa *base,
|
||||
if (params->img_op != LP_IMG_STORE) {
|
||||
uint32_t channel_count = params->img_op == LP_IMG_LOAD_SPARSE ? 5 : 4;
|
||||
for (unsigned i = 0; i < channel_count; i++) {
|
||||
LLVMValueRef channel = LLVMBuildExtractValue(builder, result, i, "");
|
||||
params->outdata[i] = LLVMBuildExtractValue(builder, result, i, "");
|
||||
if (params->type.length != lp_native_vector_width / 32)
|
||||
channel = truncate_to_type_width(gallivm, channel, params->type);
|
||||
params->outdata[i] = truncate_to_type_width(gallivm, params->outdata[i], params->type);
|
||||
|
||||
LLVMBuildStore(builder, channel, out_data[i]);
|
||||
if (!params->exec_mask_nz)
|
||||
LLVMBuildStore(builder, params->outdata[i], out_data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
lp_build_endif(&if_state);
|
||||
if (!params->exec_mask_nz) {
|
||||
lp_build_endif(&if_state);
|
||||
|
||||
if (params->img_op != LP_IMG_STORE) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(out_data) - 1; i++) {
|
||||
params->outdata[i] = LLVMBuildLoad2(builder, out_data_type, out_data[i], "");
|
||||
if (params->img_op != LP_IMG_STORE) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(out_data) - 1; i++) {
|
||||
params->outdata[i] = LLVMBuildLoad2(builder, out_data_type, out_data[i], "");
|
||||
}
|
||||
params->outdata[ARRAY_SIZE(out_data) - 1] =
|
||||
LLVMBuildLoad2(builder, out_residency_type, out_data[ARRAY_SIZE(out_data) - 1], "");
|
||||
}
|
||||
params->outdata[ARRAY_SIZE(out_data) - 1] =
|
||||
LLVMBuildLoad2(builder, out_residency_type, out_data[ARRAY_SIZE(out_data) - 1], "");
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
@@ -350,6 +350,15 @@ mask_vec_with_helpers(struct lp_build_nir_soa_context *bld)
|
||||
bld->uint_bld.type, -1);
|
||||
}
|
||||
|
||||
static bool
|
||||
lp_exec_mask_is_nz(struct lp_build_nir_soa_context *bld)
|
||||
{
|
||||
if (bld->shader->info.stage == MESA_SHADER_FRAGMENT && bld->shader->info.fs.uses_discard)
|
||||
return false;
|
||||
|
||||
return !bld->exec_mask.has_mask;
|
||||
}
|
||||
|
||||
static bool
|
||||
invocation_0_must_be_active(struct lp_build_nir_soa_context *bld)
|
||||
{
|
||||
@@ -1653,6 +1662,7 @@ static void emit_image_op(struct lp_build_nir_soa_context *bld,
|
||||
params->thread_data_type = bld->thread_data_type;
|
||||
params->thread_data_ptr = bld->thread_data_ptr;
|
||||
params->exec_mask = mask_vec(bld);
|
||||
params->exec_mask_nz = lp_exec_mask_is_nz(bld);
|
||||
|
||||
bld->image->emit_op(bld->image,
|
||||
bld->base.gallivm,
|
||||
@@ -1721,6 +1731,7 @@ static void emit_tex(struct lp_build_nir_soa_context *bld,
|
||||
params->thread_data_type = bld->thread_data_type;
|
||||
params->thread_data_ptr = bld->thread_data_ptr;
|
||||
params->exec_mask = mask_vec(bld);
|
||||
params->exec_mask_nz = lp_exec_mask_is_nz(bld);
|
||||
|
||||
if (params->texture_index_offset && bld->shader->info.stage != MESA_SHADER_FRAGMENT) {
|
||||
/* this is horrible but this can be dynamic */
|
||||
@@ -1793,6 +1804,7 @@ static void emit_tex_size(struct lp_build_nir_soa_context *bld,
|
||||
lp_build_const_int32(bld->base.gallivm, 0), "");
|
||||
|
||||
params->exec_mask = mask_vec(bld);
|
||||
params->exec_mask_nz = lp_exec_mask_is_nz(bld);
|
||||
|
||||
bld->sampler->emit_size_query(bld->sampler,
|
||||
bld->base.gallivm,
|
||||
|
||||
@@ -124,6 +124,7 @@ struct lp_sampler_params
|
||||
LLVMValueRef texture_resource;
|
||||
LLVMValueRef sampler_resource;
|
||||
LLVMValueRef exec_mask;
|
||||
bool exec_mask_nz;
|
||||
};
|
||||
|
||||
/* Parameters used to handle sampler_size instructions */
|
||||
@@ -144,6 +145,7 @@ struct lp_sampler_size_query_params
|
||||
|
||||
LLVMValueRef resource;
|
||||
LLVMValueRef exec_mask;
|
||||
bool exec_mask_nz;
|
||||
enum pipe_format format;
|
||||
};
|
||||
|
||||
@@ -163,6 +165,7 @@ struct lp_img_params
|
||||
unsigned target;
|
||||
LLVMAtomicRMWBinOp op;
|
||||
LLVMValueRef exec_mask;
|
||||
bool exec_mask_nz;
|
||||
LLVMTypeRef resources_type;
|
||||
LLVMValueRef resources_ptr;
|
||||
LLVMTypeRef thread_data_type;
|
||||
|
||||
Reference in New Issue
Block a user