diff --git a/src/amd/vulkan/meta/radv_meta.h b/src/amd/vulkan/meta/radv_meta.h index e7a01a28821..8f338ef967b 100644 --- a/src/amd/vulkan/meta/radv_meta.h +++ b/src/amd/vulkan/meta/radv_meta.h @@ -309,6 +309,13 @@ nir_shader *radv_meta_nir_build_blit2d_copy_fragment_shader_stencil(struct radv_ radv_meta_nir_texel_fetch_build_func txf_func, const char *name, bool is_3d, bool is_multisampled); +void radv_meta_nir_build_clear_color_shaders(struct radv_device *dev, struct nir_shader **out_vs, + struct nir_shader **out_fs, uint32_t frag_output); +void radv_meta_nir_build_clear_depthstencil_shaders(struct radv_device *dev, struct nir_shader **out_vs, + struct nir_shader **out_fs, bool unrestricted); +nir_shader *radv_meta_nir_build_clear_htile_mask_shader(struct radv_device *dev); +nir_shader *radv_meta_nir_build_clear_dcc_comp_to_single_shader(struct radv_device *dev, bool is_msaa); + uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, struct radeon_winsys_bo *bo, uint64_t va, uint64_t size, uint32_t value); diff --git a/src/amd/vulkan/meta/radv_meta_clear.c b/src/amd/vulkan/meta/radv_meta_clear.c index b7957a01193..5d836f6e70d 100644 --- a/src/amd/vulkan/meta/radv_meta_clear.c +++ b/src/amd/vulkan/meta/radv_meta_clear.c @@ -4,7 +4,6 @@ * SPDX-License-Identifier: MIT */ -#include "nir/nir_builder.h" #include "radv_debug.h" #include "radv_entrypoints.h" #include "radv_formats.h" @@ -17,43 +16,6 @@ #include "ac_formats.h" -static void -build_color_shaders(struct radv_device *dev, struct nir_shader **out_vs, struct nir_shader **out_fs, - uint32_t frag_output) -{ - nir_builder vs_b = radv_meta_init_shader(dev, MESA_SHADER_VERTEX, "meta_clear_color_vs"); - nir_builder fs_b = radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_clear_color_fs-%d", frag_output); - - const struct glsl_type *position_type = glsl_vec4_type(); - const struct glsl_type *color_type = glsl_vec4_type(); - - nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position"); - vs_out_pos->data.location = VARYING_SLOT_POS; - - nir_def *in_color_load = nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16); - - nir_variable *fs_out_color = nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color"); - fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output; - - nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf); - - nir_def *outvec = nir_gen_rect_vertices(&vs_b, NULL, NULL); - nir_store_var(&vs_b, vs_out_pos, outvec, 0xf); - - const struct glsl_type *layer_type = glsl_int_type(); - nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer"); - vs_out_layer->data.location = VARYING_SLOT_LAYER; - vs_out_layer->data.interpolation = INTERP_MODE_FLAT; - nir_def *inst_id = nir_load_instance_id(&vs_b); - nir_def *base_instance = nir_load_base_instance(&vs_b); - - nir_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance); - nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1); - - *out_vs = vs_b.shader; - *out_fs = fs_b.shader; -} - static VkResult get_color_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout_out) { @@ -101,7 +63,7 @@ get_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t frag_o nir_shader *vs_module, *fs_module; - build_color_shaders(device, &vs_module, &fs_module, frag_output); + radv_meta_nir_build_clear_color_shaders(device, &vs_module, &fs_module, frag_output); VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = {0}; blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState){ @@ -278,51 +240,6 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *cl } } -static void -build_depthstencil_shader(struct radv_device *dev, struct nir_shader **out_vs, struct nir_shader **out_fs, - bool unrestricted) -{ - nir_builder vs_b = radv_meta_init_shader( - dev, MESA_SHADER_VERTEX, unrestricted ? "meta_clear_depthstencil_unrestricted_vs" : "meta_clear_depthstencil_vs"); - nir_builder fs_b = - radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, - unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs"); - - const struct glsl_type *position_out_type = glsl_vec4_type(); - - nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position"); - vs_out_pos->data.location = VARYING_SLOT_POS; - - nir_def *z; - if (unrestricted) { - nir_def *in_color_load = nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4); - - nir_variable *fs_out_depth = nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth"); - fs_out_depth->data.location = FRAG_RESULT_DEPTH; - nir_store_var(&fs_b, fs_out_depth, in_color_load, 0x1); - - z = nir_imm_float(&vs_b, 0.0); - } else { - z = nir_load_push_constant(&vs_b, 1, 32, nir_imm_int(&vs_b, 0), .range = 4); - } - - nir_def *outvec = nir_gen_rect_vertices(&vs_b, z, NULL); - nir_store_var(&vs_b, vs_out_pos, outvec, 0xf); - - const struct glsl_type *layer_type = glsl_int_type(); - nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer"); - vs_out_layer->data.location = VARYING_SLOT_LAYER; - vs_out_layer->data.interpolation = INTERP_MODE_FLAT; - nir_def *inst_id = nir_load_instance_id(&vs_b); - nir_def *base_instance = nir_load_base_instance(&vs_b); - - nir_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance); - nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1); - - *out_vs = vs_b.shader; - *out_fs = fs_b.shader; -} - static bool radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview, VkImageLayout image_layout, VkImageAspectFlags aspects, const VkClearRect *clear_rect, const VkClearDepthStencilValue clear_value, @@ -386,7 +303,7 @@ get_depth_stencil_pipeline(struct radv_device *device, int samples, VkImageAspec nir_shader *vs_module, *fs_module; - build_depthstencil_shader(device, &vs_module, &fs_module, unrestricted); + radv_meta_nir_build_clear_depthstencil_shaders(device, &vs_module, &fs_module, unrestricted); VkGraphicsPipelineCreateInfoRADV radv_info = { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO_RADV, @@ -586,32 +503,6 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, VkClearDepthStencilV } } -static nir_shader * -build_clear_htile_mask_shader(struct radv_device *dev) -{ - nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_htile_mask"); - b.shader->info.workgroup_size[0] = 64; - - nir_def *global_id = get_global_ids(&b, 1); - - nir_def *offset = nir_imul_imm(&b, global_id, 16); - offset = nir_channel(&b, offset, 0); - - nir_def *constants = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16); - nir_def *va = nir_pack_64_2x32(&b, nir_channels(&b, constants, 0x3)); - va = nir_iadd(&b, va, nir_u2u64(&b, offset)); - - nir_def *load = nir_build_load_global(&b, 4, 32, va, .align_mul = 16); - - /* data = (data & ~htile_mask) | (htile_value & htile_mask) */ - nir_def *data = nir_iand(&b, load, nir_channel(&b, constants, 3)); - data = nir_ior(&b, data, nir_channel(&b, constants, 2)); - - nir_build_store_global(&b, data, va, .access = ACCESS_NON_READABLE, .align_mul = 16); - - return b.shader; -} - static VkResult get_clear_htile_mask_pipeline(struct radv_device *device, VkPipeline *pipeline_out, VkPipelineLayout *layout_out) { @@ -634,7 +525,7 @@ get_clear_htile_mask_pipeline(struct radv_device *device, VkPipeline *pipeline_o return VK_SUCCESS; } - nir_shader *cs = build_clear_htile_mask_shader(device); + nir_shader *cs = radv_meta_nir_build_clear_htile_mask_shader(device); const VkPipelineShaderStageCreateInfo stage_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, @@ -884,49 +775,6 @@ radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_imag } } -/* Clear DCC using comp-to-single by storing the clear value at the beginning of every 256B block. - * For MSAA images, clearing the first sample should be enough as long as CMASK is also cleared. - */ -static nir_shader * -build_clear_dcc_comp_to_single_shader(struct radv_device *dev, bool is_msaa) -{ - enum glsl_sampler_dim dim = is_msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D; - const struct glsl_type *img_type = glsl_image_type(dim, true, GLSL_TYPE_FLOAT); - - nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_dcc_comp_to_single-%s", - is_msaa ? "multisampled" : "singlesampled"); - b.shader->info.workgroup_size[0] = 8; - b.shader->info.workgroup_size[1] = 8; - - nir_def *global_id = get_global_ids(&b, 3); - - /* Load the dimensions in pixels of a block that gets compressed to one DCC byte. */ - nir_def *dcc_block_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8); - - /* Compute the coordinates. */ - nir_def *coord = nir_trim_vector(&b, global_id, 2); - coord = nir_imul(&b, coord, dcc_block_size); - coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), nir_channel(&b, global_id, 2), - nir_undef(&b, 1, 32)); - - nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img"); - output_img->data.descriptor_set = 0; - output_img->data.binding = 0; - - /* Load the clear color values. */ - nir_def *clear_values = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 8), .range = 24); - - nir_def *data = nir_vec4(&b, nir_channel(&b, clear_values, 0), nir_channel(&b, clear_values, 1), - nir_channel(&b, clear_values, 2), nir_channel(&b, clear_values, 3)); - - /* Store the clear color values. */ - nir_def *sample_id = is_msaa ? nir_imm_int(&b, 0) : nir_undef(&b, 1, 32); - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, sample_id, data, nir_imm_int(&b, 0), - .image_dim = dim, .image_array = true); - - return b.shader; -} - static uint32_t radv_get_cmask_fast_clear_value(const struct radv_image *image) { @@ -1089,7 +937,7 @@ get_clear_dcc_comp_to_single_pipeline(struct radv_device *device, bool is_msaa, return VK_SUCCESS; } - nir_shader *cs = build_clear_dcc_comp_to_single_shader(device, is_msaa); + nir_shader *cs = radv_meta_nir_build_clear_dcc_comp_to_single_shader(device, is_msaa); const VkPipelineShaderStageCreateInfo stage_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, diff --git a/src/amd/vulkan/nir/radv_meta_nir.c b/src/amd/vulkan/nir/radv_meta_nir.c index be7dbcf95ab..3889127e4c4 100644 --- a/src/amd/vulkan/nir/radv_meta_nir.c +++ b/src/amd/vulkan/nir/radv_meta_nir.c @@ -668,3 +668,155 @@ radv_meta_nir_build_cleari_r32g32b32_compute_shader(struct radv_device *dev) return b.shader; } + +void +radv_meta_nir_build_clear_color_shaders(struct radv_device *dev, struct nir_shader **out_vs, struct nir_shader **out_fs, + uint32_t frag_output) +{ + nir_builder vs_b = radv_meta_init_shader(dev, MESA_SHADER_VERTEX, "meta_clear_color_vs"); + nir_builder fs_b = radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_clear_color_fs-%d", frag_output); + + const struct glsl_type *position_type = glsl_vec4_type(); + const struct glsl_type *color_type = glsl_vec4_type(); + + nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position"); + vs_out_pos->data.location = VARYING_SLOT_POS; + + nir_def *in_color_load = nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16); + + nir_variable *fs_out_color = nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color"); + fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output; + + nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf); + + nir_def *outvec = nir_gen_rect_vertices(&vs_b, NULL, NULL); + nir_store_var(&vs_b, vs_out_pos, outvec, 0xf); + + const struct glsl_type *layer_type = glsl_int_type(); + nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer"); + vs_out_layer->data.location = VARYING_SLOT_LAYER; + vs_out_layer->data.interpolation = INTERP_MODE_FLAT; + nir_def *inst_id = nir_load_instance_id(&vs_b); + nir_def *base_instance = nir_load_base_instance(&vs_b); + + nir_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance); + nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1); + + *out_vs = vs_b.shader; + *out_fs = fs_b.shader; +} + +void +radv_meta_nir_build_clear_depthstencil_shaders(struct radv_device *dev, struct nir_shader **out_vs, + struct nir_shader **out_fs, bool unrestricted) +{ + nir_builder vs_b = radv_meta_init_shader( + dev, MESA_SHADER_VERTEX, unrestricted ? "meta_clear_depthstencil_unrestricted_vs" : "meta_clear_depthstencil_vs"); + nir_builder fs_b = + radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, + unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs"); + + const struct glsl_type *position_out_type = glsl_vec4_type(); + + nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position"); + vs_out_pos->data.location = VARYING_SLOT_POS; + + nir_def *z; + if (unrestricted) { + nir_def *in_color_load = nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4); + + nir_variable *fs_out_depth = nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth"); + fs_out_depth->data.location = FRAG_RESULT_DEPTH; + nir_store_var(&fs_b, fs_out_depth, in_color_load, 0x1); + + z = nir_imm_float(&vs_b, 0.0); + } else { + z = nir_load_push_constant(&vs_b, 1, 32, nir_imm_int(&vs_b, 0), .range = 4); + } + + nir_def *outvec = nir_gen_rect_vertices(&vs_b, z, NULL); + nir_store_var(&vs_b, vs_out_pos, outvec, 0xf); + + const struct glsl_type *layer_type = glsl_int_type(); + nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer"); + vs_out_layer->data.location = VARYING_SLOT_LAYER; + vs_out_layer->data.interpolation = INTERP_MODE_FLAT; + nir_def *inst_id = nir_load_instance_id(&vs_b); + nir_def *base_instance = nir_load_base_instance(&vs_b); + + nir_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance); + nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1); + + *out_vs = vs_b.shader; + *out_fs = fs_b.shader; +} + +nir_shader * +radv_meta_nir_build_clear_htile_mask_shader(struct radv_device *dev) +{ + nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_htile_mask"); + b.shader->info.workgroup_size[0] = 64; + + nir_def *global_id = get_global_ids(&b, 1); + + nir_def *offset = nir_imul_imm(&b, global_id, 16); + offset = nir_channel(&b, offset, 0); + + nir_def *constants = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16); + nir_def *va = nir_pack_64_2x32(&b, nir_channels(&b, constants, 0x3)); + va = nir_iadd(&b, va, nir_u2u64(&b, offset)); + + nir_def *load = nir_build_load_global(&b, 4, 32, va, .align_mul = 16); + + /* data = (data & ~htile_mask) | (htile_value & htile_mask) */ + nir_def *data = nir_iand(&b, load, nir_channel(&b, constants, 3)); + data = nir_ior(&b, data, nir_channel(&b, constants, 2)); + + nir_build_store_global(&b, data, va, .access = ACCESS_NON_READABLE, .align_mul = 16); + + return b.shader; +} + +/** + * Clear DCC using comp-to-single by storing the clear value at the beginning of every 256B block. + * For MSAA images, clearing the first sample should be enough as long as CMASK is also cleared. + */ +nir_shader * +radv_meta_nir_build_clear_dcc_comp_to_single_shader(struct radv_device *dev, bool is_msaa) +{ + enum glsl_sampler_dim dim = is_msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D; + const struct glsl_type *img_type = glsl_image_type(dim, true, GLSL_TYPE_FLOAT); + + nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_dcc_comp_to_single-%s", + is_msaa ? "multisampled" : "singlesampled"); + b.shader->info.workgroup_size[0] = 8; + b.shader->info.workgroup_size[1] = 8; + + nir_def *global_id = get_global_ids(&b, 3); + + /* Load the dimensions in pixels of a block that gets compressed to one DCC byte. */ + nir_def *dcc_block_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8); + + /* Compute the coordinates. */ + nir_def *coord = nir_trim_vector(&b, global_id, 2); + coord = nir_imul(&b, coord, dcc_block_size); + coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), nir_channel(&b, global_id, 2), + nir_undef(&b, 1, 32)); + + nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img"); + output_img->data.descriptor_set = 0; + output_img->data.binding = 0; + + /* Load the clear color values. */ + nir_def *clear_values = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 8), .range = 24); + + nir_def *data = nir_vec4(&b, nir_channel(&b, clear_values, 0), nir_channel(&b, clear_values, 1), + nir_channel(&b, clear_values, 2), nir_channel(&b, clear_values, 3)); + + /* Store the clear color values. */ + nir_def *sample_id = is_msaa ? nir_imm_int(&b, 0) : nir_undef(&b, 1, 32); + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, sample_id, data, nir_imm_int(&b, 0), + .image_dim = dim, .image_array = true); + + return b.shader; +}