diff --git a/src/gallium/drivers/iris/iris_program_cache.c b/src/gallium/drivers/iris/iris_program_cache.c
index 144173c9593..5b51aacdf2f 100644
--- a/src/gallium/drivers/iris/iris_program_cache.c
+++ b/src/gallium/drivers/iris/iris_program_cache.c
@@ -462,7 +462,7 @@ iris_ensure_indirect_generation_shader(struct iris_batch *batch)
brw_nir_analyze_ubo_ranges(screen->brw, nir, prog_data->base.ubo_ranges);
- struct brw_compile_stats stats[3];
+ struct genisa_stats stats[3];
struct brw_compile_fs_params params = {
.base = {
.nir = nir,
diff --git a/src/intel/compiler/brw_compile_bs.cpp b/src/intel/compiler/brw_compile_bs.cpp
index 7a71f863c48..f279d9c2a77 100644
--- a/src/intel/compiler/brw_compile_bs.cpp
+++ b/src/intel/compiler/brw_compile_bs.cpp
@@ -69,7 +69,7 @@ compile_single_bs(const struct brw_compiler *compiler,
struct brw_bs_prog_data *prog_data,
nir_shader *shader,
brw_generator *g,
- struct brw_compile_stats *stats,
+ struct genisa_stats *stats,
int *prog_offset,
uint64_t *bsr)
{
diff --git a/src/intel/compiler/brw_compile_cs.cpp b/src/intel/compiler/brw_compile_cs.cpp
index 250d2513b94..502fe101067 100644
--- a/src/intel/compiler/brw_compile_cs.cpp
+++ b/src/intel/compiler/brw_compile_cs.cpp
@@ -291,7 +291,7 @@ brw_compile_cs(const struct brw_compiler *compiler,
uint32_t max_dispatch_width = 8u << (util_last_bit(prog_data->prog_mask) - 1);
- struct brw_compile_stats *stats = params->base.stats;
+ struct genisa_stats *stats = params->base.stats;
for (unsigned simd = 0; simd < 3; simd++) {
if (prog_data->prog_mask & (1u << simd)) {
assert(v[simd]);
diff --git a/src/intel/compiler/brw_compile_fs.cpp b/src/intel/compiler/brw_compile_fs.cpp
index 60b9c2cd009..bcb7cfc4aa6 100644
--- a/src/intel/compiler/brw_compile_fs.cpp
+++ b/src/intel/compiler/brw_compile_fs.cpp
@@ -1917,7 +1917,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
nir->info.name));
}
- struct brw_compile_stats *stats = params->base.stats;
+ struct genisa_stats *stats = params->base.stats;
uint32_t max_dispatch_width = 0;
if (vmulti) {
@@ -1947,7 +1947,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
max_dispatch_width = 32;
}
- for (struct brw_compile_stats *s = params->base.stats; s != NULL && s != stats; s++)
+ for (struct genisa_stats *s = params->base.stats; s != NULL && s != stats; s++)
s->max_dispatch_width = max_dispatch_width;
g.add_const_data(nir->constant_data, nir->constant_data_size);
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index 9daaa30610a..47d00f0a156 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -33,6 +33,7 @@
#include "util/mesa-sha1.h"
#include "util/enum_operators.h"
#include "util/ralloc.h"
+#include "util/shader_stats.h"
#include "util/u_math.h"
#include "brw_isa_info.h"
#include "intel_shader_enums.h"
@@ -1427,20 +1428,6 @@ DEFINE_PROG_DATA_DOWNCAST(mesh, prog_data->stage == MESA_SHADER_MESH)
#undef DEFINE_PROG_DATA_DOWNCAST
-struct brw_compile_stats {
- uint32_t dispatch_width; /**< 0 for vec4 */
- uint32_t max_polygons;
- uint32_t max_dispatch_width;
- uint32_t instructions;
- uint32_t sends;
- uint32_t loops;
- uint32_t cycles;
- uint32_t spills;
- uint32_t fills;
- uint32_t max_live_registers;
- uint32_t non_ssa_registers_after_nir;
-};
-
/** @} */
struct brw_compiler *
@@ -1483,7 +1470,7 @@ struct brw_compile_params {
nir_shader *nir;
- struct brw_compile_stats *stats;
+ struct genisa_stats *stats;
void *log_data;
diff --git a/src/intel/compiler/brw_generator.cpp b/src/intel/compiler/brw_generator.cpp
index a52f79f19cf..48ade37e738 100644
--- a/src/intel/compiler/brw_generator.cpp
+++ b/src/intel/compiler/brw_generator.cpp
@@ -733,7 +733,7 @@ brw_generator::enable_debug(const char *shader_name)
int
brw_generator::generate_code(const brw_shader &s,
- struct brw_compile_stats *stats)
+ struct genisa_stats *stats)
{
const int dispatch_width = s.dispatch_width;
struct brw_shader_stats shader_stats = s.shader_stats;
@@ -1512,15 +1512,30 @@ brw_generator::generate_code(const brw_shader &s,
if (stats) {
stats->dispatch_width = dispatch_width;
stats->max_polygons = s.max_polygons;
- stats->max_dispatch_width = dispatch_width;
- stats->instructions = before_size / 16 - nop_count - sync_nop_count;
- stats->sends = send_count;
- stats->loops = loop_count;
- stats->cycles = perf.latency;
- stats->spills = shader_stats.spill_count;
- stats->fills = shader_stats.fill_count;
+ stats->instrs = before_size / 16 - nop_count - sync_nop_count;
+ stats->send_messages = send_count;
+ stats->loop_count = loop_count;
+ stats->cycle_count = perf.latency;
+ stats->spill_count = shader_stats.spill_count;
+ stats->fill_count = shader_stats.fill_count;
stats->max_live_registers = shader_stats.max_register_pressure;
- stats->non_ssa_registers_after_nir = shader_stats.non_ssa_registers_after_nir;
+ stats->non_ssa_regs_after_nir = shader_stats.non_ssa_registers_after_nir;
+ stats->source_hash = prog_data->source_hash;
+ stats->grf_registers = devinfo->ver >= 30 ? prog_data->grf_used : 0;
+
+ /* Report the max dispatch width only on the smallest SIMD variant.
+ *
+ * XXX: SIMD8 is not the smallest on Xe2. This logic should be adjusted.
+ */
+ if (stage != MESA_SHADER_FRAGMENT || dispatch_width == 8)
+ stats->max_dispatch_width = dispatch_width;
+ else
+ stats->max_dispatch_width = 0;
+
+ if (mesa_shader_stage_uses_workgroup(stage))
+ stats->workgroup_memory_size = prog_data->total_shared;
+ else
+ stats->workgroup_memory_size = 0;
}
return start_offset;
diff --git a/src/intel/compiler/brw_generator.h b/src/intel/compiler/brw_generator.h
index 4c3437c38f0..b6fbcfbdcca 100644
--- a/src/intel/compiler/brw_generator.h
+++ b/src/intel/compiler/brw_generator.h
@@ -19,7 +19,7 @@ public:
void enable_debug(const char *shader_name);
int generate_code(const brw_shader &s,
- struct brw_compile_stats *stats);
+ struct genisa_stats *stats);
void add_const_data(void *data, unsigned size);
void add_resume_sbt(unsigned num_resume_shaders, uint64_t *sbt);
const unsigned *get_assembly();
diff --git a/src/intel/vulkan/anv_internal_kernels.c b/src/intel/vulkan/anv_internal_kernels.c
index 7aa8243e7a2..d645c20534c 100644
--- a/src/intel/vulkan/anv_internal_kernels.c
+++ b/src/intel/vulkan/anv_internal_kernels.c
@@ -48,6 +48,14 @@ lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
return true;
}
+static void
+check_sends(struct genisa_stats *stats, unsigned send_count)
+{
+ assert(stats->spill_count == 0);
+ assert(stats->fill_count == 0);
+ assert(stats->send_messages == send_count);
+}
+
static struct anv_shader_bin *
compile_shader(struct anv_device *device,
enum anv_internal_kernel_name shader_name,
@@ -153,7 +161,7 @@ compile_shader(struct anv_device *device,
const unsigned *program;
if (stage == MESA_SHADER_FRAGMENT) {
- struct brw_compile_stats stats[3];
+ struct genisa_stats stats[3];
struct brw_compile_fs_params params = {
.base = {
.nir = nir,
@@ -170,28 +178,18 @@ compile_shader(struct anv_device *device,
if (!INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
unsigned stat_idx = 0;
if (prog_data.wm.dispatch_8) {
- assert(stats[stat_idx].spills == 0);
- assert(stats[stat_idx].fills == 0);
- assert(stats[stat_idx].sends == sends_count_expectation);
- stat_idx++;
+ check_sends(&stats[stat_idx++], sends_count_expectation);
}
if (prog_data.wm.dispatch_16) {
- assert(stats[stat_idx].spills == 0);
- assert(stats[stat_idx].fills == 0);
- assert(stats[stat_idx].sends == sends_count_expectation);
- stat_idx++;
+ check_sends(&stats[stat_idx++], sends_count_expectation);
}
if (prog_data.wm.dispatch_32) {
- assert(stats[stat_idx].spills == 0);
- assert(stats[stat_idx].fills == 0);
- assert(stats[stat_idx].sends ==
- sends_count_expectation *
- (device->info->ver < 20 ? 2 : 1));
- stat_idx++;
+ check_sends(&stats[stat_idx++], sends_count_expectation *
+ (device->info->ver < 20 ? 2 : 1));
}
}
} else {
- struct brw_compile_stats stats;
+ struct genisa_stats stats;
struct brw_compile_cs_params params = {
.base = {
.nir = nir,
@@ -206,9 +204,7 @@ compile_shader(struct anv_device *device,
program = brw_compile_cs(compiler, ¶ms);
if (!INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
- assert(stats.spills == 0);
- assert(stats.fills == 0);
- assert(stats.sends == sends_count_expectation);
+ check_sends(&stats, sends_count_expectation);
}
}
diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c
index efa87399be3..fa093a2a82a 100644
--- a/src/intel/vulkan/anv_pipeline_cache.c
+++ b/src/intel/vulkan/anv_pipeline_cache.c
@@ -103,7 +103,7 @@ anv_shader_bin_create(struct anv_device *device,
const void *kernel_data, uint32_t kernel_size,
const struct brw_stage_prog_data *prog_data_in,
uint32_t prog_data_size,
- const struct brw_compile_stats *stats, uint32_t num_stats,
+ const struct genisa_stats *stats, uint32_t num_stats,
const nir_xfb_info *xfb_info_in,
const struct anv_pipeline_bind_map *bind_map,
const struct anv_push_descriptor_info *push_desc_info)
@@ -381,7 +381,7 @@ anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,
void *mem_ctx = ralloc_context(NULL);
uint32_t num_stats = blob_read_uint32(blob);
- const struct brw_compile_stats *stats =
+ const struct genisa_stats *stats =
blob_read_bytes(blob, num_stats * sizeof(stats[0]));
const nir_xfb_info *xfb_info = NULL;
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index a48792fa36c..ac7f587448e 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1225,7 +1225,7 @@ struct anv_shader {
const struct brw_stage_prog_data *prog_data;
- struct brw_compile_stats stats[3];
+ struct genisa_stats stats[3];
uint32_t num_stats;
char *nir_str;
@@ -5097,7 +5097,7 @@ struct anv_shader_upload_params {
const struct brw_stage_prog_data *prog_data;
uint32_t prog_data_size;
- const struct brw_compile_stats *stats;
+ const struct genisa_stats *stats;
uint32_t num_stats;
const struct nir_xfb_info *xfb_info;
@@ -5145,7 +5145,7 @@ struct anv_shader_bin {
const struct brw_stage_prog_data *prog_data;
uint32_t prog_data_size;
- struct brw_compile_stats stats[3];
+ struct genisa_stats stats[3];
uint32_t num_stats;
struct nir_xfb_info *xfb_info;
@@ -5178,7 +5178,7 @@ anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
struct anv_pipeline_executable {
mesa_shader_stage stage;
- struct brw_compile_stats stats;
+ struct genisa_stats stats;
char *nir;
char *disasm;
diff --git a/src/intel/vulkan/anv_shader.c b/src/intel/vulkan/anv_shader.c
index 1161dc249c5..489d198f3f1 100644
--- a/src/intel/vulkan/anv_shader.c
+++ b/src/intel/vulkan/anv_shader.c
@@ -8,6 +8,7 @@
#include "nir/nir_serialize.h"
#include "compiler/brw_disasm.h"
+#include "util/shader_stats.h"
static void
anv_shader_destroy(struct vk_device *vk_device,
@@ -174,7 +175,7 @@ anv_shader_get_executable_properties(struct vk_device *device,
container_of(vk_shader, struct anv_shader, vk);
for (uint32_t i = 0; i < shader->num_stats; i++) {
- const struct brw_compile_stats *stats = &shader->stats[i];
+ const struct genisa_stats *stats = &shader->stats[i];
vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
mesa_shader_stage stage = vk_shader->stage;
@@ -219,145 +220,11 @@ anv_shader_get_executable_statistics(struct vk_device *vk_device,
{
VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
statistics, statistic_count);
- struct anv_device *device =
- container_of(vk_device, struct anv_device, vk);
struct anv_shader *shader =
container_of(vk_shader, struct anv_shader, vk);
assert(executable_index < shader->num_stats);
-
- const struct brw_compile_stats *stats = &shader->stats[executable_index];
- const struct brw_stage_prog_data *prog_data = shader->prog_data;
-
- vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
- VK_COPY_STR(stat->name, "Instruction Count");
- VK_COPY_STR(stat->description,
- "Number of GEN instructions in the final generated "
- "shader executable.");
- stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- stat->value.u64 = stats->instructions;
- }
-
- vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
- VK_COPY_STR(stat->name, "SEND Count");
- VK_COPY_STR(stat->description,
- "Number of instructions in the final generated shader "
- "executable which access external units such as the "
- "constant cache or the sampler.");
- stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- stat->value.u64 = stats->sends;
- }
-
- vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
- VK_COPY_STR(stat->name, "Loop Count");
- VK_COPY_STR(stat->description,
- "Number of loops (not unrolled) in the final generated "
- "shader executable.");
- stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- stat->value.u64 = stats->loops;
- }
-
- vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
- VK_COPY_STR(stat->name, "Cycle Count");
- VK_COPY_STR(stat->description,
- "Estimate of the number of EU cycles required to execute "
- "the final generated executable. This is an estimate only "
- "and may vary greatly from actual run-time performance.");
- stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- stat->value.u64 = stats->cycles;
- }
-
- vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
- VK_COPY_STR(stat->name, "Spill Count");
- VK_COPY_STR(stat->description,
- "Number of scratch spill operations. This gives a rough "
- "estimate of the cost incurred due to spilling temporary "
- "values to memory. If this is non-zero, you may want to "
- "adjust your shader to reduce register pressure.");
- stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- stat->value.u64 = stats->spills;
- }
-
- vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
- VK_COPY_STR(stat->name, "Fill Count");
- VK_COPY_STR(stat->description,
- "Number of scratch fill operations. This gives a rough "
- "estimate of the cost incurred due to spilling temporary "
- "values to memory. If this is non-zero, you may want to "
- "adjust your shader to reduce register pressure.");
- stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- stat->value.u64 = stats->fills;
- }
-
- vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
- VK_COPY_STR(stat->name, "Scratch Memory Size");
- VK_COPY_STR(stat->description,
- "Number of bytes of scratch memory required by the "
- "generated shader executable. If this is non-zero, you "
- "may want to adjust your shader to reduce register "
- "pressure.");
- stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- stat->value.u64 = prog_data->total_scratch;
- }
-
- if (device->info->ver >= 30) {
- vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
- VK_COPY_STR(stat->name, "GRF registers");
- VK_COPY_STR(stat->description,
- "Number of GRF registers required by the shader.");
- stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- stat->value.u64 = prog_data->grf_used;
- }
- }
-
- vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
- VK_COPY_STR(stat->name, "Max dispatch width");
- VK_COPY_STR(stat->description,
- "Largest SIMD dispatch width.");
- stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- /* Report the max dispatch width only on the smallest SIMD variant */
- if (vk_shader->stage != MESA_SHADER_FRAGMENT || stats->dispatch_width == 8)
- stat->value.u64 = stats->max_dispatch_width;
- else
- stat->value.u64 = 0;
- }
-
- vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
- VK_COPY_STR(stat->name, "Max live registers");
- VK_COPY_STR(stat->description,
- "Maximum number of registers used across the entire shader.");
- stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- stat->value.u64 = stats->max_live_registers;
- }
-
- vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
- VK_COPY_STR(stat->name, "Workgroup Memory Size");
- VK_COPY_STR(stat->description,
- "Number of bytes of workgroup shared memory used by this "
- "shader including any padding.");
- stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- if (mesa_shader_stage_uses_workgroup(vk_shader->stage))
- stat->value.u64 = prog_data->total_shared;
- else
- stat->value.u64 = 0;
- }
-
- vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
- VK_COPY_STR(stat->name, "Source hash");
- VK_PRINT_STR(stat->description,
- "hash = 0x%08x. Hash generated from shader source.",
- prog_data->source_hash);
- stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- stat->value.u64 = prog_data->source_hash;
- }
-
- vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
- VK_COPY_STR(stat->name, "Non SSA regs after NIR");
- VK_COPY_STR(stat->description, "Non SSA regs after NIR translation to BRW.");
- stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- stat->value.u64 = stats->non_ssa_registers_after_nir;
- }
-
+ vk_add_genisa_stats(out, &shader->stats[executable_index]);
return VK_SUCCESS;
}
diff --git a/src/intel/vulkan/anv_shader.h b/src/intel/vulkan/anv_shader.h
index 3210f6c9ab0..5bfb54ae5d9 100644
--- a/src/intel/vulkan/anv_shader.h
+++ b/src/intel/vulkan/anv_shader.h
@@ -93,7 +93,7 @@ struct anv_shader_data {
const nir_xfb_info *xfb_info;
uint32_t num_stats;
- struct brw_compile_stats stats[3];
+ struct genisa_stats stats[3];
char *disasm[3];
bool use_primitive_replication;
diff --git a/src/util/shader_stats.xml b/src/util/shader_stats.xml
index 314bb0022e0..1122d48dd00 100644
--- a/src/util/shader_stats.xml
+++ b/src/util/shader_stats.xml
@@ -121,4 +121,63 @@
Number of SMEM instructions
Number of VOPD instructions
+
+
+
+ 0 for vec4
+
+
+ Number of GEN instructions in the final generated shader executable.
+
+
+ Number of instructions in the final generated shader executable
+ which access external units such as the constant cache or the sampler.
+
+
+ Number of loops (not unrolled) in the final generated shader
+ executable.
+
+
+ Estimate of the number of EU cycles required to execute the final
+ generated executable. This is an estimate only and may vary greatly
+ from actual run-time performance.
+
+
+ Number of scratch spill operations. This gives a rough estimate of
+ the cost incurred due to spilling temporary values to memory. If
+ this is non-zero, you may want to adjust your shader to reduce
+ register pressure.
+
+
+ Number of scratch fill operations. This gives a rough estimate of
+ the cost incurred due to spilling temporary values to memory. If
+ this is non-zero, you may want to adjust your shader to reduce
+ register pressure.
+
+
+ Number of bytes of scratch memory required by the generated shader
+ executable. If this is non-zero, you may want to adjust your shader
+ to reduce register pressure.
+
+
+ Number of GRF registers required by the shader.
+
+
+ Largest SIMD dispatch width.
+
+
+ Maximum number of registers used across the entire shader.
+
+
+ Number of bytes of workgroup shared memory used by this shader
+ including any padding.
+
+
+ Non SSA regs after NIR translation to BRW.
+
+
+ Hash generated from shader source.
+
+
+