diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index d62e097956b..a877eb291d5 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -566,7 +566,6 @@ anv_cmd_buffer_flush_pipeline_state(struct anv_cmd_buffer *cmd_buffer, diff_fix_state(VS, final.vs); diff_fix_state(HS, final.hs); diff_fix_state(DS, final.ds); - diff_fix_state(PS, final.ps); diff_fix_state(CLIP, partial.clip); diff_fix_state(SF, partial.sf); @@ -576,6 +575,7 @@ anv_cmd_buffer_flush_pipeline_state(struct anv_cmd_buffer *cmd_buffer, diff_fix_state(GS, partial.gs); diff_fix_state(TE, partial.te); diff_fix_state(VFG, partial.vfg); + diff_fix_state(PS, partial.ps); diff_fix_state(PS_EXTRA, partial.ps_extra); if (cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader) { diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index bd7dbf4b01c..5f18db16e96 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1553,6 +1553,31 @@ struct anv_gfx_dynamic_state { uint32_t LineStippleRepeatCount; } ls; + /* 3DSTATE_PS */ + struct { + uint32_t PositionXYOffsetSelect; + + uint32_t KernelStartPointer0; + uint32_t KernelStartPointer1; + uint32_t KernelStartPointer2; + + uint32_t DispatchGRFStartRegisterForConstantSetupData0; + uint32_t DispatchGRFStartRegisterForConstantSetupData1; + uint32_t DispatchGRFStartRegisterForConstantSetupData2; + + /* Pre-Gfx20 only */ + bool _8PixelDispatchEnable; + bool _16PixelDispatchEnable; + bool _32PixelDispatchEnable; + + /* Gfx20+ only */ + bool Kernel0Enable; + bool Kernel1Enable; + uint32_t Kernel0SIMDWidth; + uint32_t Kernel1SIMDWidth; + uint32_t Kernel0PolyPackingPolicy; + } ps; + /* 3DSTATE_PS_EXTRA */ struct { bool PixelShaderIsPerSample; @@ -4635,7 +4660,6 @@ struct anv_graphics_pipeline { struct anv_gfx_state_ptr vs; struct anv_gfx_state_ptr hs; struct anv_gfx_state_ptr ds; - struct anv_gfx_state_ptr ps; struct anv_gfx_state_ptr task_control; struct anv_gfx_state_ptr task_shader; @@ -4659,6 +4683,7 @@ struct anv_graphics_pipeline { struct anv_gfx_state_ptr so; struct anv_gfx_state_ptr gs; struct anv_gfx_state_ptr te; + struct anv_gfx_state_ptr ps; struct anv_gfx_state_ptr vfg; } partial; }; diff --git a/src/intel/vulkan/genX_gfx_state.c b/src/intel/vulkan/genX_gfx_state.c index 8a9011f1dd8..689cc1cf247 100644 --- a/src/intel/vulkan/genX_gfx_state.c +++ b/src/intel/vulkan/genX_gfx_state.c @@ -31,6 +31,7 @@ #include "genxml/gen_macros.h" #include "genxml/genX_pack.h" +#include "common/intel_genX_state_brw.h" #include "common/intel_guardband.h" #include "common/intel_tiled_render.h" #include "compiler/brw_prim.h" @@ -580,6 +581,52 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer) if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) || (gfx->dirty & ANV_CMD_DIRTY_FS_MSAA_FLAGS)) { if (wm_prog_data) { + const struct anv_shader_bin *fs_bin = + pipeline->base.shaders[MESA_SHADER_FRAGMENT]; + + struct GENX(3DSTATE_PS) ps = {}; + intel_set_ps_dispatch_state(&ps, device->info, wm_prog_data, + MAX2(dyn->ms.rasterization_samples, 1), + gfx->fs_msaa_flags); + + SET(PS, ps.KernelStartPointer0, + fs_bin->kernel.offset + + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0)); + SET(PS, ps.KernelStartPointer1, + fs_bin->kernel.offset + + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1)); +#if GFX_VER < 20 + SET(PS, ps.KernelStartPointer2, + fs_bin->kernel.offset + + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2)); +#endif + + SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData0, + brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0)); + SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData1, + brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1)); +#if GFX_VER < 20 + SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData2, + brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2)); +#endif + +#if GFX_VER < 20 + SET(PS, ps._8PixelDispatchEnable, ps._8PixelDispatchEnable); + SET(PS, ps._16PixelDispatchEnable, ps._16PixelDispatchEnable); + SET(PS, ps._32PixelDispatchEnable, ps._32PixelDispatchEnable); +#else + SET(PS, ps.Kernel0Enable, ps.Kernel0Enable); + SET(PS, ps.Kernel1Enable, ps.Kernel1Enable); + SET(PS, ps.Kernel0SIMDWidth, ps.Kernel0SIMDWidth); + SET(PS, ps.Kernel1SIMDWidth, ps.Kernel1SIMDWidth); + SET(PS, ps.Kernel0PolyPackingPolicy, ps.Kernel0PolyPackingPolicy); +#endif + + SET(PS, ps.PositionXYOffsetSelect, + !wm_prog_data->uses_pos_offset ? POSOFFSET_NONE : + brw_wm_prog_data_is_persample(wm_prog_data, gfx->fs_msaa_flags) ? + POSOFFSET_SAMPLE : POSOFFSET_CENTROID); + SET(PS_EXTRA, ps_extra.PixelShaderIsPerSample, brw_wm_prog_data_is_persample(wm_prog_data, gfx->fs_msaa_flags)); #if GFX_VER >= 11 @@ -595,6 +642,15 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer) #endif SET(WM, wm.BarycentricInterpolationMode, wm_prog_data_barycentric_modes(wm_prog_data, gfx->fs_msaa_flags)); + } else { +#if GFX_VER < 20 + SET(PS, ps._8PixelDispatchEnable, false); + SET(PS, ps._16PixelDispatchEnable, false); + SET(PS, ps._32PixelDispatchEnable, false); +#else + SET(PS, ps.Kernel0Enable, false); + SET(PS, ps.Kernel1Enable, false); +#endif } } @@ -1609,9 +1665,6 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) #endif } - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS)) - anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.ps); - if (device->vk.enabled_extensions.EXT_mesh_shader) { if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL)) anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_control); @@ -1654,6 +1707,32 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) /* Now the potentially dynamic instructions */ + if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS)) { + anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_PS), + pipeline, partial.ps, ps) { + SET(ps, ps, KernelStartPointer0); + SET(ps, ps, KernelStartPointer1); + SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData0); + SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData1); + +#if GFX_VER < 20 + SET(ps, ps, KernelStartPointer2); + SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData2); + + SET(ps, ps, _8PixelDispatchEnable); + SET(ps, ps, _16PixelDispatchEnable); + SET(ps, ps, _32PixelDispatchEnable); +#else + SET(ps, ps, Kernel0Enable); + SET(ps, ps, Kernel1Enable); + SET(ps, ps, Kernel0SIMDWidth); + SET(ps, ps, Kernel1SIMDWidth); + SET(ps, ps, Kernel0PolyPackingPolicy); +#endif + SET(ps, ps, PositionXYOffsetSelect); + } + } + if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA)) { anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_PS_EXTRA), pipeline, partial.ps_extra, pse) { diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 7f19124f3af..c93c99f8b91 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1578,20 +1578,13 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline, pipeline->base.shaders[MESA_SHADER_FRAGMENT]; if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { - anv_pipeline_emit(pipeline, final.ps, GENX(3DSTATE_PS), ps); + anv_pipeline_emit(pipeline, partial.ps, GENX(3DSTATE_PS), ps); return; } const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); - anv_pipeline_emit(pipeline, final.ps, GENX(3DSTATE_PS), ps) { - intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data, - ms != NULL ? ms->rasterization_samples : 1, - pipeline->fs_msaa_flags); - - const bool persample = - brw_wm_prog_data_is_persample(wm_prog_data, pipeline->fs_msaa_flags); - + anv_pipeline_emit(pipeline, partial.ps, GENX(3DSTATE_PS), ps) { #if GFX_VER == 12 assert(wm_prog_data->dispatch_multi == 0 || (wm_prog_data->dispatch_multi == 16 && wm_prog_data->max_polygons == 2)); @@ -1604,15 +1597,6 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline, ps.OverlappingSubspansEnable = false; #endif - ps.KernelStartPointer0 = fs_bin->kernel.offset + - brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); - ps.KernelStartPointer1 = fs_bin->kernel.offset + - brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1); -#if GFX_VER < 20 - ps.KernelStartPointer2 = fs_bin->kernel.offset + - brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2); -#endif - ps.SingleProgramFlow = false; ps.VectorMaskEnable = wm_prog_data->uses_vmask; /* Wa_1606682166 */ @@ -1622,21 +1606,9 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline, ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 || wm_prog_data->base.ubo_ranges[0].length; #endif - ps.PositionXYOffsetSelect = - !wm_prog_data->uses_pos_offset ? POSOFFSET_NONE : - persample ? POSOFFSET_SAMPLE : POSOFFSET_CENTROID; ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1; - ps.DispatchGRFStartRegisterForConstantSetupData0 = - brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0); - ps.DispatchGRFStartRegisterForConstantSetupData1 = - brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1); -#if GFX_VER < 20 - ps.DispatchGRFStartRegisterForConstantSetupData2 = - brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2); -#endif - #if GFX_VERx10 >= 125 ps.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base.base, MESA_SHADER_FRAGMENT, fs_bin);