diff --git a/src/gallium/drivers/iris/iris_program_cache.c b/src/gallium/drivers/iris/iris_program_cache.c index 144173c9593..5b51aacdf2f 100644 --- a/src/gallium/drivers/iris/iris_program_cache.c +++ b/src/gallium/drivers/iris/iris_program_cache.c @@ -462,7 +462,7 @@ iris_ensure_indirect_generation_shader(struct iris_batch *batch) brw_nir_analyze_ubo_ranges(screen->brw, nir, prog_data->base.ubo_ranges); - struct brw_compile_stats stats[3]; + struct genisa_stats stats[3]; struct brw_compile_fs_params params = { .base = { .nir = nir, diff --git a/src/intel/compiler/brw_compile_bs.cpp b/src/intel/compiler/brw_compile_bs.cpp index 7a71f863c48..f279d9c2a77 100644 --- a/src/intel/compiler/brw_compile_bs.cpp +++ b/src/intel/compiler/brw_compile_bs.cpp @@ -69,7 +69,7 @@ compile_single_bs(const struct brw_compiler *compiler, struct brw_bs_prog_data *prog_data, nir_shader *shader, brw_generator *g, - struct brw_compile_stats *stats, + struct genisa_stats *stats, int *prog_offset, uint64_t *bsr) { diff --git a/src/intel/compiler/brw_compile_cs.cpp b/src/intel/compiler/brw_compile_cs.cpp index 250d2513b94..502fe101067 100644 --- a/src/intel/compiler/brw_compile_cs.cpp +++ b/src/intel/compiler/brw_compile_cs.cpp @@ -291,7 +291,7 @@ brw_compile_cs(const struct brw_compiler *compiler, uint32_t max_dispatch_width = 8u << (util_last_bit(prog_data->prog_mask) - 1); - struct brw_compile_stats *stats = params->base.stats; + struct genisa_stats *stats = params->base.stats; for (unsigned simd = 0; simd < 3; simd++) { if (prog_data->prog_mask & (1u << simd)) { assert(v[simd]); diff --git a/src/intel/compiler/brw_compile_fs.cpp b/src/intel/compiler/brw_compile_fs.cpp index 60b9c2cd009..bcb7cfc4aa6 100644 --- a/src/intel/compiler/brw_compile_fs.cpp +++ b/src/intel/compiler/brw_compile_fs.cpp @@ -1917,7 +1917,7 @@ brw_compile_fs(const struct brw_compiler *compiler, nir->info.name)); } - struct brw_compile_stats *stats = params->base.stats; + struct genisa_stats *stats = params->base.stats; uint32_t max_dispatch_width = 0; if (vmulti) { @@ -1947,7 +1947,7 @@ brw_compile_fs(const struct brw_compiler *compiler, max_dispatch_width = 32; } - for (struct brw_compile_stats *s = params->base.stats; s != NULL && s != stats; s++) + for (struct genisa_stats *s = params->base.stats; s != NULL && s != stats; s++) s->max_dispatch_width = max_dispatch_width; g.add_const_data(nir->constant_data, nir->constant_data_size); diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 9daaa30610a..47d00f0a156 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -33,6 +33,7 @@ #include "util/mesa-sha1.h" #include "util/enum_operators.h" #include "util/ralloc.h" +#include "util/shader_stats.h" #include "util/u_math.h" #include "brw_isa_info.h" #include "intel_shader_enums.h" @@ -1427,20 +1428,6 @@ DEFINE_PROG_DATA_DOWNCAST(mesh, prog_data->stage == MESA_SHADER_MESH) #undef DEFINE_PROG_DATA_DOWNCAST -struct brw_compile_stats { - uint32_t dispatch_width; /**< 0 for vec4 */ - uint32_t max_polygons; - uint32_t max_dispatch_width; - uint32_t instructions; - uint32_t sends; - uint32_t loops; - uint32_t cycles; - uint32_t spills; - uint32_t fills; - uint32_t max_live_registers; - uint32_t non_ssa_registers_after_nir; -}; - /** @} */ struct brw_compiler * @@ -1483,7 +1470,7 @@ struct brw_compile_params { nir_shader *nir; - struct brw_compile_stats *stats; + struct genisa_stats *stats; void *log_data; diff --git a/src/intel/compiler/brw_generator.cpp b/src/intel/compiler/brw_generator.cpp index a52f79f19cf..48ade37e738 100644 --- a/src/intel/compiler/brw_generator.cpp +++ b/src/intel/compiler/brw_generator.cpp @@ -733,7 +733,7 @@ brw_generator::enable_debug(const char *shader_name) int brw_generator::generate_code(const brw_shader &s, - struct brw_compile_stats *stats) + struct genisa_stats *stats) { const int dispatch_width = s.dispatch_width; struct brw_shader_stats shader_stats = s.shader_stats; @@ -1512,15 +1512,30 @@ brw_generator::generate_code(const brw_shader &s, if (stats) { stats->dispatch_width = dispatch_width; stats->max_polygons = s.max_polygons; - stats->max_dispatch_width = dispatch_width; - stats->instructions = before_size / 16 - nop_count - sync_nop_count; - stats->sends = send_count; - stats->loops = loop_count; - stats->cycles = perf.latency; - stats->spills = shader_stats.spill_count; - stats->fills = shader_stats.fill_count; + stats->instrs = before_size / 16 - nop_count - sync_nop_count; + stats->send_messages = send_count; + stats->loop_count = loop_count; + stats->cycle_count = perf.latency; + stats->spill_count = shader_stats.spill_count; + stats->fill_count = shader_stats.fill_count; stats->max_live_registers = shader_stats.max_register_pressure; - stats->non_ssa_registers_after_nir = shader_stats.non_ssa_registers_after_nir; + stats->non_ssa_regs_after_nir = shader_stats.non_ssa_registers_after_nir; + stats->source_hash = prog_data->source_hash; + stats->grf_registers = devinfo->ver >= 30 ? prog_data->grf_used : 0; + + /* Report the max dispatch width only on the smallest SIMD variant. + * + * XXX: SIMD8 is not the smallest on Xe2. This logic should be adjusted. + */ + if (stage != MESA_SHADER_FRAGMENT || dispatch_width == 8) + stats->max_dispatch_width = dispatch_width; + else + stats->max_dispatch_width = 0; + + if (mesa_shader_stage_uses_workgroup(stage)) + stats->workgroup_memory_size = prog_data->total_shared; + else + stats->workgroup_memory_size = 0; } return start_offset; diff --git a/src/intel/compiler/brw_generator.h b/src/intel/compiler/brw_generator.h index 4c3437c38f0..b6fbcfbdcca 100644 --- a/src/intel/compiler/brw_generator.h +++ b/src/intel/compiler/brw_generator.h @@ -19,7 +19,7 @@ public: void enable_debug(const char *shader_name); int generate_code(const brw_shader &s, - struct brw_compile_stats *stats); + struct genisa_stats *stats); void add_const_data(void *data, unsigned size); void add_resume_sbt(unsigned num_resume_shaders, uint64_t *sbt); const unsigned *get_assembly(); diff --git a/src/intel/vulkan/anv_internal_kernels.c b/src/intel/vulkan/anv_internal_kernels.c index 7aa8243e7a2..d645c20534c 100644 --- a/src/intel/vulkan/anv_internal_kernels.c +++ b/src/intel/vulkan/anv_internal_kernels.c @@ -48,6 +48,14 @@ lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin, return true; } +static void +check_sends(struct genisa_stats *stats, unsigned send_count) +{ + assert(stats->spill_count == 0); + assert(stats->fill_count == 0); + assert(stats->send_messages == send_count); +} + static struct anv_shader_bin * compile_shader(struct anv_device *device, enum anv_internal_kernel_name shader_name, @@ -153,7 +161,7 @@ compile_shader(struct anv_device *device, const unsigned *program; if (stage == MESA_SHADER_FRAGMENT) { - struct brw_compile_stats stats[3]; + struct genisa_stats stats[3]; struct brw_compile_fs_params params = { .base = { .nir = nir, @@ -170,28 +178,18 @@ compile_shader(struct anv_device *device, if (!INTEL_DEBUG(DEBUG_SHADER_PRINT)) { unsigned stat_idx = 0; if (prog_data.wm.dispatch_8) { - assert(stats[stat_idx].spills == 0); - assert(stats[stat_idx].fills == 0); - assert(stats[stat_idx].sends == sends_count_expectation); - stat_idx++; + check_sends(&stats[stat_idx++], sends_count_expectation); } if (prog_data.wm.dispatch_16) { - assert(stats[stat_idx].spills == 0); - assert(stats[stat_idx].fills == 0); - assert(stats[stat_idx].sends == sends_count_expectation); - stat_idx++; + check_sends(&stats[stat_idx++], sends_count_expectation); } if (prog_data.wm.dispatch_32) { - assert(stats[stat_idx].spills == 0); - assert(stats[stat_idx].fills == 0); - assert(stats[stat_idx].sends == - sends_count_expectation * - (device->info->ver < 20 ? 2 : 1)); - stat_idx++; + check_sends(&stats[stat_idx++], sends_count_expectation * + (device->info->ver < 20 ? 2 : 1)); } } } else { - struct brw_compile_stats stats; + struct genisa_stats stats; struct brw_compile_cs_params params = { .base = { .nir = nir, @@ -206,9 +204,7 @@ compile_shader(struct anv_device *device, program = brw_compile_cs(compiler, ¶ms); if (!INTEL_DEBUG(DEBUG_SHADER_PRINT)) { - assert(stats.spills == 0); - assert(stats.fills == 0); - assert(stats.sends == sends_count_expectation); + check_sends(&stats, sends_count_expectation); } } diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index efa87399be3..fa093a2a82a 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -103,7 +103,7 @@ anv_shader_bin_create(struct anv_device *device, const void *kernel_data, uint32_t kernel_size, const struct brw_stage_prog_data *prog_data_in, uint32_t prog_data_size, - const struct brw_compile_stats *stats, uint32_t num_stats, + const struct genisa_stats *stats, uint32_t num_stats, const nir_xfb_info *xfb_info_in, const struct anv_pipeline_bind_map *bind_map, const struct anv_push_descriptor_info *push_desc_info) @@ -381,7 +381,7 @@ anv_shader_bin_deserialize(struct vk_pipeline_cache *cache, void *mem_ctx = ralloc_context(NULL); uint32_t num_stats = blob_read_uint32(blob); - const struct brw_compile_stats *stats = + const struct genisa_stats *stats = blob_read_bytes(blob, num_stats * sizeof(stats[0])); const nir_xfb_info *xfb_info = NULL; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index a48792fa36c..ac7f587448e 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1225,7 +1225,7 @@ struct anv_shader { const struct brw_stage_prog_data *prog_data; - struct brw_compile_stats stats[3]; + struct genisa_stats stats[3]; uint32_t num_stats; char *nir_str; @@ -5097,7 +5097,7 @@ struct anv_shader_upload_params { const struct brw_stage_prog_data *prog_data; uint32_t prog_data_size; - const struct brw_compile_stats *stats; + const struct genisa_stats *stats; uint32_t num_stats; const struct nir_xfb_info *xfb_info; @@ -5145,7 +5145,7 @@ struct anv_shader_bin { const struct brw_stage_prog_data *prog_data; uint32_t prog_data_size; - struct brw_compile_stats stats[3]; + struct genisa_stats stats[3]; uint32_t num_stats; struct nir_xfb_info *xfb_info; @@ -5178,7 +5178,7 @@ anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader) struct anv_pipeline_executable { mesa_shader_stage stage; - struct brw_compile_stats stats; + struct genisa_stats stats; char *nir; char *disasm; diff --git a/src/intel/vulkan/anv_shader.c b/src/intel/vulkan/anv_shader.c index 1161dc249c5..489d198f3f1 100644 --- a/src/intel/vulkan/anv_shader.c +++ b/src/intel/vulkan/anv_shader.c @@ -8,6 +8,7 @@ #include "nir/nir_serialize.h" #include "compiler/brw_disasm.h" +#include "util/shader_stats.h" static void anv_shader_destroy(struct vk_device *vk_device, @@ -174,7 +175,7 @@ anv_shader_get_executable_properties(struct vk_device *device, container_of(vk_shader, struct anv_shader, vk); for (uint32_t i = 0; i < shader->num_stats; i++) { - const struct brw_compile_stats *stats = &shader->stats[i]; + const struct genisa_stats *stats = &shader->stats[i]; vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) { mesa_shader_stage stage = vk_shader->stage; @@ -219,145 +220,11 @@ anv_shader_get_executable_statistics(struct vk_device *vk_device, { VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out, statistics, statistic_count); - struct anv_device *device = - container_of(vk_device, struct anv_device, vk); struct anv_shader *shader = container_of(vk_shader, struct anv_shader, vk); assert(executable_index < shader->num_stats); - - const struct brw_compile_stats *stats = &shader->stats[executable_index]; - const struct brw_stage_prog_data *prog_data = shader->prog_data; - - vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { - VK_COPY_STR(stat->name, "Instruction Count"); - VK_COPY_STR(stat->description, - "Number of GEN instructions in the final generated " - "shader executable."); - stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; - stat->value.u64 = stats->instructions; - } - - vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { - VK_COPY_STR(stat->name, "SEND Count"); - VK_COPY_STR(stat->description, - "Number of instructions in the final generated shader " - "executable which access external units such as the " - "constant cache or the sampler."); - stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; - stat->value.u64 = stats->sends; - } - - vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { - VK_COPY_STR(stat->name, "Loop Count"); - VK_COPY_STR(stat->description, - "Number of loops (not unrolled) in the final generated " - "shader executable."); - stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; - stat->value.u64 = stats->loops; - } - - vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { - VK_COPY_STR(stat->name, "Cycle Count"); - VK_COPY_STR(stat->description, - "Estimate of the number of EU cycles required to execute " - "the final generated executable. This is an estimate only " - "and may vary greatly from actual run-time performance."); - stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; - stat->value.u64 = stats->cycles; - } - - vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { - VK_COPY_STR(stat->name, "Spill Count"); - VK_COPY_STR(stat->description, - "Number of scratch spill operations. This gives a rough " - "estimate of the cost incurred due to spilling temporary " - "values to memory. If this is non-zero, you may want to " - "adjust your shader to reduce register pressure."); - stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; - stat->value.u64 = stats->spills; - } - - vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { - VK_COPY_STR(stat->name, "Fill Count"); - VK_COPY_STR(stat->description, - "Number of scratch fill operations. This gives a rough " - "estimate of the cost incurred due to spilling temporary " - "values to memory. If this is non-zero, you may want to " - "adjust your shader to reduce register pressure."); - stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; - stat->value.u64 = stats->fills; - } - - vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { - VK_COPY_STR(stat->name, "Scratch Memory Size"); - VK_COPY_STR(stat->description, - "Number of bytes of scratch memory required by the " - "generated shader executable. If this is non-zero, you " - "may want to adjust your shader to reduce register " - "pressure."); - stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; - stat->value.u64 = prog_data->total_scratch; - } - - if (device->info->ver >= 30) { - vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { - VK_COPY_STR(stat->name, "GRF registers"); - VK_COPY_STR(stat->description, - "Number of GRF registers required by the shader."); - stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; - stat->value.u64 = prog_data->grf_used; - } - } - - vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { - VK_COPY_STR(stat->name, "Max dispatch width"); - VK_COPY_STR(stat->description, - "Largest SIMD dispatch width."); - stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; - /* Report the max dispatch width only on the smallest SIMD variant */ - if (vk_shader->stage != MESA_SHADER_FRAGMENT || stats->dispatch_width == 8) - stat->value.u64 = stats->max_dispatch_width; - else - stat->value.u64 = 0; - } - - vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { - VK_COPY_STR(stat->name, "Max live registers"); - VK_COPY_STR(stat->description, - "Maximum number of registers used across the entire shader."); - stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; - stat->value.u64 = stats->max_live_registers; - } - - vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { - VK_COPY_STR(stat->name, "Workgroup Memory Size"); - VK_COPY_STR(stat->description, - "Number of bytes of workgroup shared memory used by this " - "shader including any padding."); - stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; - if (mesa_shader_stage_uses_workgroup(vk_shader->stage)) - stat->value.u64 = prog_data->total_shared; - else - stat->value.u64 = 0; - } - - vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { - VK_COPY_STR(stat->name, "Source hash"); - VK_PRINT_STR(stat->description, - "hash = 0x%08x. Hash generated from shader source.", - prog_data->source_hash); - stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; - stat->value.u64 = prog_data->source_hash; - } - - vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { - VK_COPY_STR(stat->name, "Non SSA regs after NIR"); - VK_COPY_STR(stat->description, "Non SSA regs after NIR translation to BRW."); - stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; - stat->value.u64 = stats->non_ssa_registers_after_nir; - } - + vk_add_genisa_stats(out, &shader->stats[executable_index]); return VK_SUCCESS; } diff --git a/src/intel/vulkan/anv_shader.h b/src/intel/vulkan/anv_shader.h index 3210f6c9ab0..5bfb54ae5d9 100644 --- a/src/intel/vulkan/anv_shader.h +++ b/src/intel/vulkan/anv_shader.h @@ -93,7 +93,7 @@ struct anv_shader_data { const nir_xfb_info *xfb_info; uint32_t num_stats; - struct brw_compile_stats stats[3]; + struct genisa_stats stats[3]; char *disasm[3]; bool use_primitive_replication; diff --git a/src/util/shader_stats.xml b/src/util/shader_stats.xml index 314bb0022e0..1122d48dd00 100644 --- a/src/util/shader_stats.xml +++ b/src/util/shader_stats.xml @@ -121,4 +121,63 @@ Number of SMEM instructions Number of VOPD instructions + + + + + +