diff --git a/docs/features.txt b/docs/features.txt index 356f22cfa2a..81e8fad8b0c 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -492,7 +492,7 @@ Vulkan 1.3 -- all DONE: anv, radv, tu, lvp, vn VK_EXT_pipeline_creation_feedback DONE (anv, hasvk, lvp, radv, tu, v3dv, vn) VK_EXT_private_data DONE (anv, hasvk, lvp, nvk, pvr, radv, tu, v3dv, vn) VK_EXT_image_robustness DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn) - VK_EXT_shader_demote_to_helper_invocation DONE (anv, hasvk, lvp, nvk, radv, tu, vn) + VK_EXT_shader_demote_to_helper_invocation DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn) VK_EXT_subgroup_size_control DONE (anv, hasvk, lvp, nvk, radv, tu, vn) VK_EXT_texel_buffer_alignment DONE (anv, hasvk, lvp, nvk, pvr, radv, tu, v3dv, vn) VK_EXT_texture_compression_astc_hdr DONE (vn) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 782d015592b..a4b904f9d80 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -649,7 +649,10 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr, V3D_TMU_OP_TYPE_ATOMIC; /* Only load per-quad if we can be certain that all - * lines in the quad are active. + * lines in the quad are active. Notice that demoted + * invocations, unlike terminated ones, are still + * active: we want to skip memory writes for them but + * loads should still work. */ uint32_t perquad = is_load && !vir_in_nonuniform_control_flow(c) && @@ -1908,6 +1911,7 @@ emit_frag_end(struct v3d_compile *c) if (c->output_position_index == -1 && !(c->s->info.num_images || c->s->info.num_ssbos) && !c->s->info.fs.uses_discard && + !c->s->info.fs.uses_demote && !c->fs_key->sample_alpha_to_coverage && c->output_sample_mask_index == -1 && has_any_tlb_color_write) { @@ -3426,8 +3430,19 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) ntq_emit_image_size(c, instr); break; + /* FIXME: the Vulkan and SPIR-V specs specify that OpTerminate (which + * is intended to match the semantics of GLSL's discard) should + * terminate the invocation immediately. Our implementation doesn't + * do that. What we do is actually a demote by removing the invocations + * from the sample mask. Maybe we could be more strict and force an + * early termination by emitting a (maybe conditional) jump to the + * end section of the fragment shader for affected invocations. + */ case nir_intrinsic_discard: case nir_intrinsic_terminate: + c->emitted_discard = true; + FALLTHROUGH; + case nir_intrinsic_demote: ntq_flush_tmu(c); if (vir_in_nonuniform_control_flow(c)) { @@ -3440,11 +3455,13 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) vir_SETMSF_dest(c, vir_nop_reg(), vir_uniform_ui(c, 0)); } - c->emitted_discard = true; break; case nir_intrinsic_discard_if: - case nir_intrinsic_terminate_if: { + case nir_intrinsic_terminate_if: + c->emitted_discard = true; + FALLTHROUGH; + case nir_intrinsic_demote_if: { ntq_flush_tmu(c); enum v3d_qpu_cond cond = ntq_emit_bool_to_cond(c, instr->src[0]); @@ -3462,7 +3479,6 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) vir_set_cond(vir_SETMSF_dest(c, vir_nop_reg(), vir_uniform_ui(c, 0)), cond); - c->emitted_discard = true; break; } diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 48d08a9ee0c..09190db9b9b 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -730,6 +730,7 @@ v3d_lower_nir(struct v3d_compile *c) nir_var_function_temp, 0, glsl_get_natural_size_align_bytes); + NIR_PASS(_, c->s, nir_lower_is_helper_invocation); NIR_PASS(_, c->s, v3d_nir_lower_scratch); NIR_PASS(_, c->s, v3d_nir_lower_null_pointers); } diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c index 1b37529fc21..4497942ac4c 100644 --- a/src/broadcom/vulkan/v3dv_device.c +++ b/src/broadcom/vulkan/v3dv_device.c @@ -205,6 +205,7 @@ get_device_extensions(const struct v3dv_physical_device *device, .EXT_private_data = true, .EXT_provoking_vertex = true, .EXT_separate_stencil_usage = true, + .EXT_shader_demote_to_helper_invocation = true, .EXT_shader_module_identifier = true, .EXT_texel_buffer_alignment = true, .EXT_tooling_info = true, @@ -444,6 +445,9 @@ get_features(const struct v3dv_physical_device *physical_device, /* VK_KHR_shader_terminate_invocation */ .shaderTerminateInvocation = true, + + /* VK_EXT_shader_demote_to_helper_invocation */ + .shaderDemoteToHelperInvocation = true, }; } diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c index fe343c7268c..756d1d52e55 100644 --- a/src/broadcom/vulkan/v3dv_pipeline.c +++ b/src/broadcom/vulkan/v3dv_pipeline.c @@ -174,6 +174,7 @@ static const struct spirv_to_nir_options default_spirv_options = { .physical_storage_buffer_address = true, .workgroup_memory_explicit_layout = true, .image_read_without_format = true, + .demote_to_helper_invocation = true, }, .ubo_addr_format = nir_address_format_32bit_index_offset, .ssbo_addr_format = nir_address_format_32bit_index_offset,