radv/shader_info: repack and compact struct radv_shader_info

using pahole.

Reduces the size of radv_shader_info from 760 bytes to 640 bytes.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37931>
This commit is contained in:
Daniel Schürmann
2025-10-17 10:35:18 +02:00
parent e1bcbbf3dd
commit 43f1ad308a

View File

@@ -79,98 +79,88 @@ struct gfx10_ngg_info {
};
struct radv_shader_info {
uint32_t workgroup_size;
uint32_t nir_shared_size;
uint64_t inline_push_constant_mask;
uint32_t push_constant_size;
bool can_inline_all_push_constants;
bool loads_push_constants;
bool loads_dynamic_offsets;
uint32_t desc_set_used_mask;
bool uses_view_index;
bool uses_invocation_id;
bool uses_prim_id;
uint8_t wave_size;
struct radv_userdata_locations user_sgprs_locs;
bool is_ngg;
bool is_ngg_passthrough;
bool has_ngg_culling;
bool has_ngg_early_prim_export;
bool has_prim_query;
bool has_xfb_query;
uint8_t ngg_lds_scratch_size;
uint32_t user_data_0;
uint32_t num_tess_patches;
uint32_t esgs_itemsize; /* Only for VS or TES as ES */
uint32_t ngg_lds_vertex_size; /* VS,TES: Cull+XFB, GS: GSVS size */
struct radv_vs_output_info outinfo;
unsigned workgroup_size;
bool force_vrs_per_vertex;
mesa_shader_stage stage;
mesa_shader_stage next_stage;
enum radv_shader_type type;
uint32_t user_data_0;
bool inputs_linked;
bool outputs_linked;
bool merged_shader_compiled_separately; /* GFX9+ */
bool force_indirect_descriptors;
uint64_t gs_inputs_read; /* Mask of GS inputs read (only used by linked ES) */
unsigned nir_shared_size;
struct radv_userdata_locations user_sgprs_locs;
struct radv_vs_output_info outinfo;
uint8_t wave_size;
uint8_t ngg_lds_scratch_size;
mesa_shader_stage stage : 8;
mesa_shader_stage next_stage : 8;
enum radv_shader_type type : 4;
bool can_inline_all_push_constants : 1;
bool loads_push_constants : 1;
bool loads_dynamic_offsets : 1;
bool uses_view_index : 1;
bool uses_invocation_id : 1;
bool uses_prim_id : 1;
bool is_ngg : 1;
bool is_ngg_passthrough : 1;
bool has_ngg_culling : 1;
bool has_ngg_early_prim_export : 1;
bool has_prim_query : 1;
bool has_xfb_query : 1;
bool force_vrs_per_vertex : 1;
bool inputs_linked : 1;
bool outputs_linked : 1;
bool merged_shader_compiled_separately : 1; /* GFX9+ */
bool force_indirect_descriptors : 1;
struct {
bool needs_draw_id;
bool needs_instance_id;
bool as_es;
bool as_ls;
bool tcs_in_out_eq;
uint64_t tcs_inputs_via_temp;
uint64_t tcs_inputs_via_lds;
uint8_t num_linked_outputs;
bool needs_base_instance;
bool use_per_attribute_vb_descs;
uint32_t vb_desc_usage_mask;
uint32_t input_slot_usage_mask;
uint8_t num_attributes;
bool has_prolog;
bool dynamic_inputs;
bool dynamic_num_verts_per_prim;
uint32_t num_outputs; /* For NGG streamout only */
uint8_t num_linked_outputs;
uint8_t num_attributes;
bool needs_draw_id : 1;
bool needs_instance_id : 1;
bool as_es : 1;
bool as_ls : 1;
bool tcs_in_out_eq : 1;
bool needs_base_instance : 1;
bool use_per_attribute_vb_descs : 1;
bool has_prolog : 1;
bool dynamic_inputs : 1;
bool dynamic_num_verts_per_prim : 1;
} vs;
struct {
uint8_t num_components_per_stream[4];
unsigned vertices_in;
unsigned vertices_out;
unsigned input_prim;
unsigned output_prim;
unsigned invocations;
unsigned es_type; /* GFX9: VS or TES */
uint32_t vertices_in;
uint32_t vertices_out;
uint32_t input_prim;
uint32_t output_prim;
uint32_t invocations;
uint32_t es_type; /* GFX9: VS or TES */
uint8_t num_linked_inputs;
bool has_pipeline_stat_query;
} gs;
struct {
bool as_es;
enum tess_primitive_mode _primitive_mode;
enum gl_tess_spacing spacing;
bool ccw;
bool point_mode;
bool reads_tess_factors;
unsigned tcs_vertices_out;
uint32_t tcs_vertices_out;
uint32_t num_outputs; /* For NGG streamout only */
uint8_t num_linked_inputs; /* Number of reserved per-vertex input slots in VRAM. */
uint8_t num_linked_patch_inputs; /* Number of reserved per-patch input slots in VRAM. */
uint8_t num_linked_outputs;
uint32_t num_outputs; /* For NGG streamout only */
enum tess_primitive_mode _primitive_mode : 2;
enum gl_tess_spacing spacing : 2;
bool as_es : 1;
bool ccw : 1;
bool point_mode : 1;
bool reads_tess_factors : 1;
} tes;
struct {
bool uses_sample_shading;
bool needs_sample_positions;
bool needs_poly_line_smooth;
bool writes_memory;
bool writes_z;
bool writes_stencil;
bool writes_sample_mask;
bool writes_mrt0_alpha;
bool exports_mrtz_via_epilog;
bool has_pcoord;
bool prim_id_input;
bool viewport_index_input;
uint8_t input_clips_culls_mask;
uint32_t input_mask;
uint32_t input_per_primitive_mask;
uint32_t float32_shaded_mask;
@@ -179,68 +169,82 @@ struct radv_shader_info {
uint32_t float16_shaded_mask;
uint32_t float16_hi_shaded_mask;
uint32_t num_inputs;
bool can_discard;
bool early_fragment_test;
bool post_depth_coverage;
bool reads_sample_mask_in;
bool reads_front_face;
bool reads_sample_id;
bool reads_frag_shading_rate;
bool reads_barycentric_model;
bool reads_persp_sample;
bool reads_persp_center;
bool reads_persp_centroid;
bool reads_linear_sample;
bool reads_linear_center;
bool reads_linear_centroid;
bool reads_fully_covered;
bool reads_pixel_coord;
bool reads_layer;
uint8_t input_clips_culls_mask;
bool uses_sample_shading : 1;
bool needs_sample_positions : 1;
bool needs_poly_line_smooth : 1;
bool writes_memory : 1;
bool writes_z : 1;
bool writes_stencil : 1;
bool writes_sample_mask : 1;
bool writes_mrt0_alpha : 1;
bool mrt0_is_dual_src : 1;
bool exports_mrtz_via_epilog : 1;
bool has_pcoord : 1;
bool prim_id_input : 1;
bool viewport_index_input : 1;
bool can_discard : 1;
bool early_fragment_test : 1;
bool post_depth_coverage : 1;
uint8_t reads_frag_coord_mask;
uint8_t reads_sample_pos_mask;
uint8_t depth_layout;
bool allow_flat_shading;
bool pops; /* Uses Primitive Ordered Pixel Shading (fragment shader interlock) */
bool pops_is_per_sample;
bool mrt0_is_dual_src;
bool reads_sample_mask_in : 1;
bool reads_front_face : 1;
bool reads_sample_id : 1;
bool reads_frag_shading_rate : 1;
bool reads_barycentric_model : 1;
bool reads_persp_sample : 1;
bool reads_persp_center : 1;
bool reads_persp_centroid : 1;
bool reads_linear_sample : 1;
bool reads_linear_center : 1;
bool reads_linear_centroid : 1;
bool reads_fully_covered : 1;
bool reads_pixel_coord : 1;
bool reads_layer : 1;
bool pops : 1; /* Uses Primitive Ordered Pixel Shading (fragment shader interlock) */
bool pops_is_per_sample : 1;
uint32_t spi_ps_input_ena;
uint32_t spi_ps_input_addr;
uint32_t colors_written; /* Mask of outputs written */
uint32_t spi_shader_col_format;
uint32_t cb_shader_mask;
uint8_t color0_written;
bool load_provoking_vtx;
bool load_rasterization_prim;
bool force_sample_iter_shading_rate;
bool uses_fbfetch_output;
bool has_epilog;
bool load_provoking_vtx : 1;
bool load_rasterization_prim : 1;
bool force_sample_iter_shading_rate : 1;
bool uses_fbfetch_output : 1;
bool allow_flat_shading : 1;
bool has_epilog : 1;
} ps;
struct {
bool uses_grid_size;
uint32_t block_size[3];
bool uses_block_id[3];
bool uses_thread_id[3];
bool uses_local_invocation_idx;
unsigned block_size[3];
bool uses_grid_size : 1;
bool uses_local_invocation_idx : 1;
bool uses_full_subgroups;
bool linear_taskmesh_dispatch;
bool has_query; /* Task shader only */
bool uses_full_subgroups : 1;
bool linear_taskmesh_dispatch : 1;
bool has_query : 1; /* Task shader only */
bool regalloc_hang_bug;
bool regalloc_hang_bug : 1;
unsigned derivative_group : 2;
uint32_t derivative_group : 2;
} cs;
struct {
ac_nir_tess_io_info io_info;
uint64_t tes_inputs_read;
uint64_t tes_patch_inputs_read;
unsigned tcs_vertices_out;
uint32_t tcs_vertices_out;
uint32_t lds_size; /* in bytes */
uint8_t num_linked_inputs; /* Number of reserved per-vertex input slots in LDS. */
bool tes_reads_tess_factors : 1;
} tcs;
struct {
enum mesa_prim output_prim;
enum mesa_prim output_prim : 8; /* byte-size aligned */
bool needs_ms_scratch_ring;
bool has_task; /* If mesh shader is used together with a task shader. */
bool has_query;