diff --git a/src/egl/drivers/dri2/platform_surfaceless.c b/src/egl/drivers/dri2/platform_surfaceless.c index 1f3a196fd57..d9d24852113 100644 --- a/src/egl/drivers/dri2/platform_surfaceless.c +++ b/src/egl/drivers/dri2/platform_surfaceless.c @@ -43,34 +43,11 @@ #include "loader_dri_helper.h" #if defined(__APPLE__) && defined(VK_USE_PLATFORM_METAL_EXT) -#include #include -#include #include #include #include #include - -static void -crash_handler(int sig) -{ - void *array[50]; - int size = backtrace(array, 50); - fprintf(stderr, "\n\n=== CRASH HANDLER: Signal %d ===\n", sig); - fprintf(stderr, "Stack trace:\n"); - backtrace_symbols_fd(array, size, STDERR_FILENO); - fprintf(stderr, "=== END STACK TRACE ===\n\n"); - signal(sig, SIG_DFL); - raise(sig); -} - -__attribute__((constructor)) static void -install_crash_handler(void) -{ - signal(SIGSEGV, crash_handler); - signal(SIGBUS, crash_handler); - signal(SIGABRT, crash_handler); -} #endif static struct dri_image * @@ -345,70 +322,24 @@ static const __DRIextension *kopper_loader_extensions[] = { #ifdef VK_USE_PLATFORM_METAL_EXT -struct get_size_ctx { - void *layer; - double w; - double h; -}; - -static void -get_drawable_size_main_thread(void *data) -{ - struct get_size_ctx *ctx = data; - typedef struct { - double width; - double height; - } MGLSize; - - /* Check superlayer to verify attachment */ - id superlayer = ((id(*)(id, SEL))objc_msgSend)( - (id)ctx->layer, sel_registerName("superlayer")); - - MGLSize (*msgSendSize)(id, SEL) = (MGLSize(*)(id, SEL))objc_msgSend; - MGLSize size = msgSendSize((id)ctx->layer, sel_registerName("drawableSize")); - ctx->w = size.width; - ctx->h = size.height; -} - static void surfaceless_metal_kopper_get_drawable_info(struct dri_drawable *draw, int *w, int *h, void *loaderPrivate) { struct dri2_egl_surface *dri2_surf = loaderPrivate; - void *layer = dri2_surf->base.NativeSurface; - if (layer) { - /* Debugging SIGBUS: Validate layer state */ - - /* Check class */ - const char *cls = object_getClassName((id)layer); - - /* Check device property */ - id device = - ((id(*)(id, SEL))objc_msgSend)((id)layer, sel_registerName("device")); - - /* [layer drawableSize] */ - /* Query on Main Thread to avoid race conditions with CoreAnimation which - * can cause SIGBUS */ - - struct get_size_ctx ctx; - ctx.layer = layer; - ctx.w = 0; - ctx.h = 0; - - if (pthread_main_np()) { - get_drawable_size_main_thread(&ctx); - } else { - dispatch_sync_f(dispatch_get_main_queue(), &ctx, - get_drawable_size_main_thread); - } - - *w = (int)ctx.w; - *h = (int)ctx.h; - } else { - *w = dri2_surf->base.Width; - *h = dri2_surf->base.Height; - } + /* PERFORMANCE FIX: Return cached dimensions instead of querying the + * CAMetalLayer on every call via dispatch_sync to main thread. + * + * The previous implementation was a major performance bottleneck - each + * dispatch_sync_f() blocks the calling thread waiting for the main thread. + * This was happening multiple times per frame, causing massive stalls. + * + * The surface dimensions are already updated through kopper_update_size() + * on resize events, so we can safely return the cached values. + */ + *w = dri2_surf->base.Width; + *h = dri2_surf->base.Height; } #include diff --git a/src/gallium/drivers/zink/driinfo_zink.h b/src/gallium/drivers/zink/driinfo_zink.h index cdf1596cce0..e5e6b6b68d6 100644 --- a/src/gallium/drivers/zink/driinfo_zink.h +++ b/src/gallium/drivers/zink/driinfo_zink.h @@ -3,6 +3,7 @@ DRI_CONF_SECTION_DEBUG DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION(false) DRI_CONF_OPT_B(radeonsi_inline_uniforms, false, "Optimize shaders by replacing uniforms with literals") + DRI_CONF_ALLOW_GLSL_COMPAT_SHADERS(true) DRI_CONF_SECTION_END DRI_CONF_SECTION_PERFORMANCE diff --git a/src/gallium/drivers/zink/zink_screen.c b/src/gallium/drivers/zink/zink_screen.c index 3b3e6af7a6f..13f3667c838 100644 --- a/src/gallium/drivers/zink/zink_screen.c +++ b/src/gallium/drivers/zink/zink_screen.c @@ -1236,7 +1236,10 @@ zink_init_screen_caps(struct zink_screen *screen) caps->mesh.pipeline_statistic_queries = screen->info.mesh_feats.meshShaderQueries; - if (screen->info.feats12.subgroupBroadcastDynamicId && screen->info.feats12.shaderSubgroupExtendedTypes && screen->info.feats.features.shaderFloat64) { + /* Enable subgroup operations if the Vulkan driver supports them. + * Note: shaderFloat64 was previously required here for GL_ARB_shader_ballot's + * uint64 ballot masks, but subgroup shuffle/basic ops don't need it. */ + if (screen->info.feats12.subgroupBroadcastDynamicId && screen->info.feats12.shaderSubgroupExtendedTypes) { caps->shader_subgroup_size = screen->info.subgroup.subgroupSize; if (screen->info.have_EXT_mesh_shader) caps->shader_subgroup_supported_stages = screen->info.subgroup.supportedStages & BITFIELD_MASK(MESA_SHADER_MESH_STAGES); @@ -2886,6 +2889,7 @@ init_driver_workarounds(struct zink_screen *screen) case VK_DRIVER_ID_MESA_V3DV: case VK_DRIVER_ID_MESA_PANVK: case VK_DRIVER_ID_MESA_NVK: + case VK_DRIVER_ID_MESA_KOSMICKRISP: screen->driver_workarounds.implicit_sync = false; break; default: diff --git a/src/kosmickrisp/compiler/msl_type_inference.c b/src/kosmickrisp/compiler/msl_type_inference.c index 6258ea5f593..72f39523cef 100644 --- a/src/kosmickrisp/compiler/msl_type_inference.c +++ b/src/kosmickrisp/compiler/msl_type_inference.c @@ -453,6 +453,8 @@ infer_types_from_intrinsic(struct hash_table *types, nir_intrinsic_instr *instr) set_type(types, &instr->src[1], TYPE_UINT); break; case nir_intrinsic_reduce: + case nir_intrinsic_inclusive_scan: + case nir_intrinsic_exclusive_scan: switch (nir_intrinsic_reduction_op(instr)) { case nir_op_iand: case nir_op_ior: diff --git a/src/kosmickrisp/compiler/nir_to_msl.c b/src/kosmickrisp/compiler/nir_to_msl.c index 96ae7cfac44..87dfbb14a7a 100644 --- a/src/kosmickrisp/compiler/nir_to_msl.c +++ b/src/kosmickrisp/compiler/nir_to_msl.c @@ -1513,6 +1513,42 @@ intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr) UNREACHABLE("Bad reduction op"); } + src_to_msl(ctx, &instr->src[0]); + P(ctx, ");\n"); + break; + case nir_intrinsic_inclusive_scan: + switch (nir_intrinsic_reduction_op(instr)) { + case nir_op_iadd: + case nir_op_fadd: + P(ctx, "simd_prefix_inclusive_sum("); + break; + case nir_op_imul: + case nir_op_fmul: + P(ctx, "simd_prefix_inclusive_product("); + break; + default: + /* Metal only supports sum and product for prefix operations. + * Other ops would need to be lowered in NIR. */ + UNREACHABLE("Unsupported inclusive_scan op"); + } + + src_to_msl(ctx, &instr->src[0]); + P(ctx, ");\n"); + break; + case nir_intrinsic_exclusive_scan: + switch (nir_intrinsic_reduction_op(instr)) { + case nir_op_iadd: + case nir_op_fadd: + P(ctx, "simd_prefix_exclusive_sum("); + break; + case nir_op_imul: + case nir_op_fmul: + P(ctx, "simd_prefix_exclusive_product("); + break; + default: + UNREACHABLE("Unsupported exclusive_scan op"); + } + src_to_msl(ctx, &instr->src[0]); P(ctx, ");\n"); break; diff --git a/src/kosmickrisp/vulkan/kk_physical_device.c b/src/kosmickrisp/vulkan/kk_physical_device.c index c5d74cfaeff..2079b97799a 100644 --- a/src/kosmickrisp/vulkan/kk_physical_device.c +++ b/src/kosmickrisp/vulkan/kk_physical_device.c @@ -452,8 +452,8 @@ kk_get_device_properties(const struct kk_physical_device *pdev, VK_SUBGROUP_FEATURE_VOTE_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT | - VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR, // | TODO_KOSMICKRISP - // VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | + VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR | + VK_SUBGROUP_FEATURE_ARITHMETIC_BIT, // | TODO_KOSMICKRISP // VK_SUBGROUP_FEATURE_CLUSTERED_BIT | // VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR, .subgroupQuadOperationsInAllStages = true,