3fa2220838
RGP expects shaders to be contiguous in memory, otherwise it explodes because we have to generate huge captures with lot of holes. This reduces capture sizes of Cyberpunk 2077 from ~3.5GiB to ~180MiB. This should also help for future pipeline libraries. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13690>
1009 lines
31 KiB
C
1009 lines
31 KiB
C
/*
|
|
* Copyright © 2016 Red Hat.
|
|
* Copyright © 2016 Bas Nieuwenhuizen
|
|
*
|
|
* based in part on anv driver which is:
|
|
* Copyright © 2015 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#ifndef _WIN32
|
|
#include <sys/utsname.h>
|
|
#endif
|
|
#include <sys/stat.h>
|
|
|
|
#include "util/mesa-sha1.h"
|
|
#include "ac_debug.h"
|
|
#include "radv_debug.h"
|
|
#include "radv_shader.h"
|
|
#include "sid.h"
|
|
|
|
#define TRACE_BO_SIZE 4096
|
|
#define TMA_BO_SIZE 4096
|
|
|
|
#define COLOR_RESET "\033[0m"
|
|
#define COLOR_RED "\033[31m"
|
|
#define COLOR_GREEN "\033[1;32m"
|
|
#define COLOR_YELLOW "\033[1;33m"
|
|
#define COLOR_CYAN "\033[1;36m"
|
|
|
|
#define RADV_DUMP_DIR "radv_dumps"
|
|
|
|
/* Trace BO layout (offsets are 4 bytes):
|
|
*
|
|
* [0]: primary trace ID
|
|
* [1]: secondary trace ID
|
|
* [2-3]: 64-bit GFX ring pipeline pointer
|
|
* [4-5]: 64-bit COMPUTE ring pipeline pointer
|
|
* [6-7]: Vertex descriptors pointer
|
|
* [8-9]: 64-bit descriptor set #0 pointer
|
|
* ...
|
|
* [68-69]: 64-bit descriptor set #31 pointer
|
|
*/
|
|
|
|
bool
|
|
radv_init_trace(struct radv_device *device)
|
|
{
|
|
struct radeon_winsys *ws = device->ws;
|
|
VkResult result;
|
|
|
|
result = ws->buffer_create(
|
|
ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM,
|
|
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
|
|
RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo);
|
|
if (result != VK_SUCCESS)
|
|
return false;
|
|
|
|
result = ws->buffer_make_resident(ws, device->trace_bo, true);
|
|
if (result != VK_SUCCESS)
|
|
return false;
|
|
|
|
device->trace_id_ptr = ws->buffer_map(device->trace_bo);
|
|
if (!device->trace_id_ptr)
|
|
return false;
|
|
|
|
ac_vm_fault_occured(device->physical_device->rad_info.chip_class, &device->dmesg_timestamp,
|
|
NULL);
|
|
|
|
return true;
|
|
}
|
|
|
|
void
|
|
radv_finish_trace(struct radv_device *device)
|
|
{
|
|
struct radeon_winsys *ws = device->ws;
|
|
|
|
if (unlikely(device->trace_bo)) {
|
|
ws->buffer_make_resident(ws, device->trace_bo, false);
|
|
ws->buffer_destroy(ws, device->trace_bo);
|
|
}
|
|
}
|
|
|
|
static void
|
|
radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
|
|
{
|
|
fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
|
|
device->ws->cs_dump(cs, f, (const int *)device->trace_id_ptr, 2);
|
|
}
|
|
|
|
static void
|
|
radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset)
|
|
{
|
|
struct radeon_winsys *ws = device->ws;
|
|
uint32_t value;
|
|
|
|
if (ws->read_registers(ws, offset, 1, &value))
|
|
ac_dump_reg(f, device->physical_device->rad_info.chip_class, offset, value, ~0);
|
|
}
|
|
|
|
static void
|
|
radv_dump_debug_registers(struct radv_device *device, FILE *f)
|
|
{
|
|
struct radeon_info *info = &device->physical_device->rad_info;
|
|
|
|
fprintf(f, "Memory-mapped registers:\n");
|
|
radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
|
|
|
|
radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
|
|
radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
|
|
radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
|
|
radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
|
|
radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
|
|
radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
|
|
radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
|
|
if (info->chip_class <= GFX8) {
|
|
radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
|
|
radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
|
|
radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
|
|
}
|
|
radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
|
|
radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
|
|
radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
|
|
radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
|
|
radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
|
|
radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
|
|
radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
|
|
radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
|
|
radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
|
|
radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
|
|
fprintf(f, "\n");
|
|
}
|
|
|
|
static void
|
|
radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
|
|
{
|
|
fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
|
|
for (unsigned j = 0; j < 4; j++)
|
|
ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff);
|
|
}
|
|
|
|
static void
|
|
radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
|
|
{
|
|
unsigned sq_img_rsrc_word0 =
|
|
chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;
|
|
|
|
fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n");
|
|
for (unsigned j = 0; j < 8; j++)
|
|
ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff);
|
|
|
|
fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
|
|
for (unsigned j = 0; j < 8; j++)
|
|
ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff);
|
|
}
|
|
|
|
static void
|
|
radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
|
|
{
|
|
fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
|
|
for (unsigned j = 0; j < 4; j++) {
|
|
ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff);
|
|
}
|
|
}
|
|
|
|
static void
|
|
radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc,
|
|
FILE *f)
|
|
{
|
|
radv_dump_image_descriptor(chip_class, desc, f);
|
|
radv_dump_sampler_descriptor(chip_class, desc + 16, f);
|
|
}
|
|
|
|
static void
|
|
radv_dump_descriptor_set(struct radv_device *device, struct radv_descriptor_set *set, unsigned id,
|
|
FILE *f)
|
|
{
|
|
enum chip_class chip_class = device->physical_device->rad_info.chip_class;
|
|
const struct radv_descriptor_set_layout *layout;
|
|
int i;
|
|
|
|
if (!set)
|
|
return;
|
|
layout = set->header.layout;
|
|
|
|
for (i = 0; i < set->header.layout->binding_count; i++) {
|
|
uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4;
|
|
|
|
switch (layout->binding[i].type) {
|
|
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
|
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
|
radv_dump_buffer_descriptor(chip_class, desc, f);
|
|
break;
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
|
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
|
|
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
|
|
radv_dump_image_descriptor(chip_class, desc, f);
|
|
break;
|
|
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
|
|
radv_dump_combined_image_sampler_descriptor(chip_class, desc, f);
|
|
break;
|
|
case VK_DESCRIPTOR_TYPE_SAMPLER:
|
|
radv_dump_sampler_descriptor(chip_class, desc, f);
|
|
break;
|
|
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
|
case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
|
|
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
|
|
/* todo */
|
|
break;
|
|
default:
|
|
assert(!"unknown descriptor type");
|
|
break;
|
|
}
|
|
fprintf(f, "\n");
|
|
}
|
|
fprintf(f, "\n\n");
|
|
}
|
|
|
|
static void
|
|
radv_dump_descriptors(struct radv_device *device, FILE *f)
|
|
{
|
|
uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
|
|
int i;
|
|
|
|
fprintf(f, "Descriptors:\n");
|
|
for (i = 0; i < MAX_SETS; i++) {
|
|
struct radv_descriptor_set *set = *(struct radv_descriptor_set **)(ptr + i + 4);
|
|
|
|
radv_dump_descriptor_set(device, set, i, f);
|
|
}
|
|
}
|
|
|
|
struct radv_shader_inst {
|
|
char text[160]; /* one disasm line */
|
|
unsigned offset; /* instruction offset */
|
|
unsigned size; /* instruction size = 4 or 8 */
|
|
};
|
|
|
|
/* Split a disassembly string into lines and add them to the array pointed
|
|
* to by "instructions". */
|
|
static void
|
|
si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num,
|
|
struct radv_shader_inst *instructions)
|
|
{
|
|
struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
|
|
char *next;
|
|
|
|
while ((next = strchr(disasm, '\n'))) {
|
|
struct radv_shader_inst *inst = &instructions[*num];
|
|
unsigned len = next - disasm;
|
|
|
|
if (!memchr(disasm, ';', len)) {
|
|
/* Ignore everything that is not an instruction. */
|
|
disasm = next + 1;
|
|
continue;
|
|
}
|
|
|
|
assert(len < ARRAY_SIZE(inst->text));
|
|
memcpy(inst->text, disasm, len);
|
|
inst->text[len] = 0;
|
|
inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
|
|
|
|
const char *semicolon = strchr(disasm, ';');
|
|
assert(semicolon);
|
|
/* More than 16 chars after ";" means the instruction is 8 bytes long. */
|
|
inst->size = next - semicolon > 16 ? 8 : 4;
|
|
|
|
snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
|
|
" [PC=0x%" PRIx64 ", off=%u, size=%u]", start_addr + inst->offset, inst->offset,
|
|
inst->size);
|
|
|
|
last_inst = inst;
|
|
(*num)++;
|
|
disasm = next + 1;
|
|
}
|
|
}
|
|
|
|
static void
|
|
radv_dump_annotated_shader(struct radv_shader *shader, gl_shader_stage stage,
|
|
struct ac_wave_info *waves, unsigned num_waves, FILE *f)
|
|
{
|
|
uint64_t start_addr, end_addr;
|
|
unsigned i;
|
|
|
|
if (!shader)
|
|
return;
|
|
|
|
start_addr = radv_shader_get_va(shader);
|
|
end_addr = start_addr + shader->code_size;
|
|
|
|
/* See if any wave executes the shader. */
|
|
for (i = 0; i < num_waves; i++) {
|
|
if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
|
|
break;
|
|
}
|
|
|
|
if (i == num_waves)
|
|
return; /* the shader is not being executed */
|
|
|
|
/* Remember the first found wave. The waves are sorted according to PC. */
|
|
waves = &waves[i];
|
|
num_waves -= i;
|
|
|
|
/* Get the list of instructions.
|
|
* Buffer size / 4 is the upper bound of the instruction count.
|
|
*/
|
|
unsigned num_inst = 0;
|
|
struct radv_shader_inst *instructions =
|
|
calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
|
|
|
|
si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
|
|
|
|
fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
|
|
radv_get_shader_name(&shader->info, stage));
|
|
|
|
/* Print instructions with annotations. */
|
|
for (i = 0; i < num_inst; i++) {
|
|
struct radv_shader_inst *inst = &instructions[i];
|
|
|
|
fprintf(f, "%s\n", inst->text);
|
|
|
|
/* Print which waves execute the instruction right now. */
|
|
while (num_waves && start_addr + inst->offset == waves->pc) {
|
|
fprintf(f,
|
|
" " COLOR_GREEN "^ SE%u SH%u CU%u "
|
|
"SIMD%u WAVE%u EXEC=%016" PRIx64 " ",
|
|
waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec);
|
|
|
|
if (inst->size == 4) {
|
|
fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0);
|
|
} else {
|
|
fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1);
|
|
}
|
|
|
|
waves->matched = true;
|
|
waves = &waves[1];
|
|
num_waves--;
|
|
}
|
|
}
|
|
|
|
fprintf(f, "\n\n");
|
|
free(instructions);
|
|
}
|
|
|
|
static void
|
|
radv_dump_annotated_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,
|
|
FILE *f)
|
|
{
|
|
struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
|
|
enum chip_class chip_class = pipeline->device->physical_device->rad_info.chip_class;
|
|
unsigned num_waves = ac_get_wave_info(chip_class, waves);
|
|
|
|
fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
|
|
|
|
/* Dump annotated active graphics shaders. */
|
|
unsigned stages = active_stages;
|
|
while (stages) {
|
|
int stage = u_bit_scan(&stages);
|
|
|
|
radv_dump_annotated_shader(pipeline->shaders[stage], stage, waves, num_waves, f);
|
|
}
|
|
|
|
/* Print waves executing shaders that are not currently bound. */
|
|
unsigned i;
|
|
bool found = false;
|
|
for (i = 0; i < num_waves; i++) {
|
|
if (waves[i].matched)
|
|
continue;
|
|
|
|
if (!found) {
|
|
fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n");
|
|
found = true;
|
|
}
|
|
fprintf(f,
|
|
" SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx64
|
|
"\n",
|
|
waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec,
|
|
waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc);
|
|
}
|
|
if (found)
|
|
fprintf(f, "\n\n");
|
|
}
|
|
|
|
static void
|
|
radv_dump_spirv(struct radv_shader *shader, const char *sha1, const char *dump_dir)
|
|
{
|
|
char dump_path[512];
|
|
FILE *f;
|
|
|
|
snprintf(dump_path, sizeof(dump_path), "%s/%s.spv", dump_dir, sha1);
|
|
|
|
f = fopen(dump_path, "w+");
|
|
if (f) {
|
|
fwrite(shader->spirv, shader->spirv_size, 1, f);
|
|
fclose(f);
|
|
}
|
|
}
|
|
|
|
static void
|
|
radv_dump_shader(struct radv_pipeline *pipeline, struct radv_shader *shader,
|
|
gl_shader_stage stage, const char *dump_dir, FILE *f)
|
|
{
|
|
if (!shader)
|
|
return;
|
|
|
|
fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));
|
|
|
|
if (shader->spirv) {
|
|
unsigned char sha1[21];
|
|
char sha1buf[41];
|
|
|
|
_mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
|
|
_mesa_sha1_format(sha1buf, sha1);
|
|
|
|
fprintf(f, "SPIRV (see %s.spv)\n\n", sha1buf);
|
|
radv_dump_spirv(shader, sha1buf, dump_dir);
|
|
}
|
|
|
|
if (shader->nir_string) {
|
|
fprintf(f, "NIR:\n%s\n", shader->nir_string);
|
|
}
|
|
|
|
fprintf(f, "%s IR:\n%s\n", pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO",
|
|
shader->ir_string);
|
|
fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
|
|
|
|
radv_dump_shader_stats(pipeline->device, pipeline, stage, f);
|
|
}
|
|
|
|
static void
|
|
radv_dump_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,
|
|
const char *dump_dir, FILE *f)
|
|
{
|
|
/* Dump active graphics shaders. */
|
|
unsigned stages = active_stages;
|
|
while (stages) {
|
|
int stage = u_bit_scan(&stages);
|
|
|
|
radv_dump_shader(pipeline, pipeline->shaders[stage], stage, dump_dir, f);
|
|
}
|
|
}
|
|
|
|
static void
|
|
radv_dump_vertex_descriptors(struct radv_pipeline *pipeline, FILE *f)
|
|
{
|
|
void *ptr = (uint64_t *)pipeline->device->trace_id_ptr;
|
|
uint32_t count = util_bitcount(pipeline->vb_desc_usage_mask);
|
|
uint32_t *vb_ptr = &((uint32_t *)ptr)[3];
|
|
|
|
if (!count)
|
|
return;
|
|
|
|
fprintf(f, "Num vertex %s: %d\n",
|
|
pipeline->use_per_attribute_vb_descs ? "attributes" : "bindings", count);
|
|
for (uint32_t i = 0; i < count; i++) {
|
|
uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
|
|
uint64_t va = 0;
|
|
|
|
va |= desc[0];
|
|
va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32;
|
|
|
|
fprintf(f, "VBO#%d:\n", i);
|
|
fprintf(f, "\tVA: 0x%" PRIx64 "\n", va);
|
|
fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1]));
|
|
fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]);
|
|
}
|
|
}
|
|
|
|
static struct radv_pipeline *
|
|
radv_get_saved_pipeline(struct radv_device *device, enum ring_type ring)
|
|
{
|
|
uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
|
|
int offset = ring == RING_GFX ? 1 : 2;
|
|
|
|
return *(struct radv_pipeline **)(ptr + offset);
|
|
}
|
|
|
|
static void
|
|
radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f)
|
|
{
|
|
enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
|
|
struct radv_pipeline *pipeline;
|
|
|
|
fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE");
|
|
|
|
pipeline = radv_get_saved_pipeline(queue->device, ring);
|
|
if (pipeline) {
|
|
radv_dump_shaders(pipeline, pipeline->active_stages, dump_dir, f);
|
|
if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR))
|
|
radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f);
|
|
radv_dump_vertex_descriptors(pipeline, f);
|
|
radv_dump_descriptors(queue->device, f);
|
|
}
|
|
}
|
|
|
|
static void
|
|
radv_dump_cmd(const char *cmd, FILE *f)
|
|
{
|
|
#ifndef _WIN32
|
|
char line[2048];
|
|
FILE *p;
|
|
|
|
p = popen(cmd, "r");
|
|
if (p) {
|
|
while (fgets(line, sizeof(line), p))
|
|
fputs(line, f);
|
|
fprintf(f, "\n");
|
|
pclose(p);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void
|
|
radv_dump_dmesg(FILE *f)
|
|
{
|
|
fprintf(f, "\nLast 60 lines of dmesg:\n\n");
|
|
radv_dump_cmd("dmesg | tail -n60", f);
|
|
}
|
|
|
|
void
|
|
radv_dump_enabled_options(struct radv_device *device, FILE *f)
|
|
{
|
|
uint64_t mask;
|
|
|
|
if (device->instance->debug_flags) {
|
|
fprintf(f, "Enabled debug options: ");
|
|
|
|
mask = device->instance->debug_flags;
|
|
while (mask) {
|
|
int i = u_bit_scan64(&mask);
|
|
fprintf(f, "%s, ", radv_get_debug_option_name(i));
|
|
}
|
|
fprintf(f, "\n");
|
|
}
|
|
|
|
if (device->instance->perftest_flags) {
|
|
fprintf(f, "Enabled perftest options: ");
|
|
|
|
mask = device->instance->perftest_flags;
|
|
while (mask) {
|
|
int i = u_bit_scan64(&mask);
|
|
fprintf(f, "%s, ", radv_get_perftest_option_name(i));
|
|
}
|
|
fprintf(f, "\n");
|
|
}
|
|
}
|
|
|
|
static void
|
|
radv_dump_app_info(struct radv_device *device, FILE *f)
|
|
{
|
|
struct radv_instance *instance = device->instance;
|
|
|
|
fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name);
|
|
fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version);
|
|
fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name);
|
|
fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version);
|
|
fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version),
|
|
VK_VERSION_MINOR(instance->vk.app_info.api_version),
|
|
VK_VERSION_PATCH(instance->vk.app_info.api_version));
|
|
|
|
radv_dump_enabled_options(device, f);
|
|
}
|
|
|
|
static void
|
|
radv_dump_device_name(struct radv_device *device, FILE *f)
|
|
{
|
|
struct radeon_info *info = &device->physical_device->rad_info;
|
|
#ifndef _WIN32
|
|
char kernel_version[128] = {0};
|
|
struct utsname uname_data;
|
|
#endif
|
|
const char *chip_name;
|
|
|
|
chip_name = device->ws->get_chip_name(device->ws);
|
|
|
|
#ifdef _WIN32
|
|
fprintf(f, "Device name: %s (%s / DRM %i.%i.%i)\n\n", chip_name, device->physical_device->name,
|
|
info->drm_major, info->drm_minor, info->drm_patchlevel);
|
|
#else
|
|
if (uname(&uname_data) == 0)
|
|
snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
|
|
|
|
fprintf(f, "Device name: %s (%s / DRM %i.%i.%i%s)\n\n", chip_name, device->physical_device->name,
|
|
info->drm_major, info->drm_minor, info->drm_patchlevel, kernel_version);
|
|
#endif
|
|
}
|
|
|
|
static void
|
|
radv_dump_umr_ring(struct radv_queue *queue, FILE *f)
|
|
{
|
|
enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
|
|
struct radv_device *device = queue->device;
|
|
char cmd[128];
|
|
|
|
/* TODO: Dump compute ring. */
|
|
if (ring != RING_GFX)
|
|
return;
|
|
|
|
sprintf(cmd, "umr -R %s 2>&1",
|
|
device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
|
|
|
|
fprintf(f, "\nUMR GFX ring:\n\n");
|
|
radv_dump_cmd(cmd, f);
|
|
}
|
|
|
|
static void
|
|
radv_dump_umr_waves(struct radv_queue *queue, FILE *f)
|
|
{
|
|
enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
|
|
struct radv_device *device = queue->device;
|
|
char cmd[128];
|
|
|
|
/* TODO: Dump compute ring. */
|
|
if (ring != RING_GFX)
|
|
return;
|
|
|
|
sprintf(cmd, "umr -O bits,halt_waves -wa %s 2>&1",
|
|
device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
|
|
|
|
fprintf(f, "\nUMR GFX waves:\n\n");
|
|
radv_dump_cmd(cmd, f);
|
|
}
|
|
|
|
static bool
|
|
radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring)
|
|
{
|
|
struct radeon_winsys *ws = queue->device->ws;
|
|
|
|
if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
void
|
|
radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
|
|
{
|
|
struct radv_device *device = queue->device;
|
|
enum ring_type ring;
|
|
uint64_t addr;
|
|
|
|
ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
|
|
|
|
bool hang_occurred = radv_gpu_hang_occured(queue, ring);
|
|
bool vm_fault_occurred = false;
|
|
if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
|
|
vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
|
|
&device->dmesg_timestamp, &addr);
|
|
if (!hang_occurred && !vm_fault_occurred)
|
|
return;
|
|
|
|
fprintf(stderr, "radv: GPU hang detected...\n");
|
|
|
|
#ifndef _WIN32
|
|
/* Create a directory into $HOME/radv_dumps_<pid>_<time> to save
|
|
* various debugging info about that GPU hang.
|
|
*/
|
|
struct tm *timep, result;
|
|
time_t raw_time;
|
|
FILE *f;
|
|
char dump_dir[256], dump_path[512], buf_time[128];
|
|
|
|
time(&raw_time);
|
|
timep = os_localtime(&raw_time, &result);
|
|
strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep);
|
|
|
|
snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."),
|
|
getpid(), buf_time);
|
|
if (mkdir(dump_dir, 0774) && errno != EEXIST) {
|
|
fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno);
|
|
abort();
|
|
}
|
|
|
|
fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir);
|
|
|
|
/* Dump trace file. */
|
|
snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");
|
|
f = fopen(dump_path, "w+");
|
|
if (f) {
|
|
radv_dump_trace(queue->device, cs, f);
|
|
fclose(f);
|
|
}
|
|
|
|
/* Dump pipeline state. */
|
|
snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
|
|
f = fopen(dump_path, "w+");
|
|
if (f) {
|
|
radv_dump_queue_state(queue, dump_dir, f);
|
|
fclose(f);
|
|
}
|
|
|
|
if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR)) {
|
|
/* Dump UMR ring. */
|
|
snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log");
|
|
f = fopen(dump_path, "w+");
|
|
if (f) {
|
|
radv_dump_umr_ring(queue, f);
|
|
fclose(f);
|
|
}
|
|
|
|
/* Dump UMR waves. */
|
|
snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log");
|
|
f = fopen(dump_path, "w+");
|
|
if (f) {
|
|
radv_dump_umr_waves(queue, f);
|
|
fclose(f);
|
|
}
|
|
}
|
|
|
|
/* Dump debug registers. */
|
|
snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log");
|
|
f = fopen(dump_path, "w+");
|
|
if (f) {
|
|
radv_dump_debug_registers(device, f);
|
|
fclose(f);
|
|
}
|
|
|
|
/* Dump BO ranges. */
|
|
snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_ranges.log");
|
|
f = fopen(dump_path, "w+");
|
|
if (f) {
|
|
device->ws->dump_bo_ranges(device->ws, f);
|
|
fclose(f);
|
|
}
|
|
|
|
/* Dump BO log. */
|
|
snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_history.log");
|
|
f = fopen(dump_path, "w+");
|
|
if (f) {
|
|
device->ws->dump_bo_log(device->ws, f);
|
|
fclose(f);
|
|
}
|
|
|
|
/* Dump VM fault info. */
|
|
if (vm_fault_occurred) {
|
|
snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log");
|
|
f = fopen(dump_path, "w+");
|
|
if (f) {
|
|
fprintf(f, "VM fault report.\n\n");
|
|
fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n\n", addr);
|
|
fclose(f);
|
|
}
|
|
}
|
|
|
|
/* Dump app info. */
|
|
snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "app_info.log");
|
|
f = fopen(dump_path, "w+");
|
|
if (f) {
|
|
radv_dump_app_info(device, f);
|
|
fclose(f);
|
|
}
|
|
|
|
/* Dump GPU info. */
|
|
snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log");
|
|
f = fopen(dump_path, "w+");
|
|
if (f) {
|
|
radv_dump_device_name(device, f);
|
|
ac_print_gpu_info(&device->physical_device->rad_info, f);
|
|
fclose(f);
|
|
}
|
|
|
|
/* Dump dmesg. */
|
|
snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log");
|
|
f = fopen(dump_path, "w+");
|
|
if (f) {
|
|
radv_dump_dmesg(f);
|
|
fclose(f);
|
|
}
|
|
#endif
|
|
|
|
fprintf(stderr, "radv: GPU hang report saved successfully!\n");
|
|
abort();
|
|
}
|
|
|
|
void
|
|
radv_print_spirv(const char *data, uint32_t size, FILE *fp)
|
|
{
|
|
#ifndef _WIN32
|
|
char path[] = "/tmp/fileXXXXXX";
|
|
char command[128];
|
|
int fd;
|
|
|
|
/* Dump the binary into a temporary file. */
|
|
fd = mkstemp(path);
|
|
if (fd < 0)
|
|
return;
|
|
|
|
if (write(fd, data, size) == -1)
|
|
goto fail;
|
|
|
|
/* Disassemble using spirv-dis if installed. */
|
|
sprintf(command, "spirv-dis %s", path);
|
|
radv_dump_cmd(command, fp);
|
|
|
|
fail:
|
|
close(fd);
|
|
unlink(path);
|
|
#endif
|
|
}
|
|
|
|
bool
|
|
radv_trap_handler_init(struct radv_device *device)
|
|
{
|
|
struct radeon_winsys *ws = device->ws;
|
|
VkResult result;
|
|
|
|
/* Create the trap handler shader and upload it like other shaders. */
|
|
device->trap_handler_shader = radv_create_trap_handler_shader(device);
|
|
if (!device->trap_handler_shader) {
|
|
fprintf(stderr, "radv: failed to create the trap handler shader.\n");
|
|
return false;
|
|
}
|
|
|
|
result = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM,
|
|
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
|
|
RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,
|
|
RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo);
|
|
if (result != VK_SUCCESS)
|
|
return false;
|
|
|
|
result = ws->buffer_make_resident(ws, device->tma_bo, true);
|
|
if (result != VK_SUCCESS)
|
|
return false;
|
|
|
|
device->tma_ptr = ws->buffer_map(device->tma_bo);
|
|
if (!device->tma_ptr)
|
|
return false;
|
|
|
|
/* Upload a buffer descriptor to store various info from the trap. */
|
|
uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;
|
|
uint32_t desc[4];
|
|
|
|
desc[0] = tma_va;
|
|
desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);
|
|
desc[2] = TMA_BO_SIZE;
|
|
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
|
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
|
|
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
|
|
|
|
memcpy(device->tma_ptr, desc, sizeof(desc));
|
|
|
|
return true;
|
|
}
|
|
|
|
void
|
|
radv_trap_handler_finish(struct radv_device *device)
|
|
{
|
|
struct radeon_winsys *ws = device->ws;
|
|
|
|
if (unlikely(device->trap_handler_shader)) {
|
|
radv_shader_destroy(device, device->trap_handler_shader);
|
|
}
|
|
|
|
if (unlikely(device->tma_bo)) {
|
|
ws->buffer_make_resident(ws, device->tma_bo, false);
|
|
ws->buffer_destroy(ws, device->tma_bo);
|
|
}
|
|
}
|
|
|
|
static void
|
|
radv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc)
|
|
{
|
|
struct radv_shader *shader;
|
|
uint64_t start_addr, end_addr;
|
|
uint32_t instr_offset;
|
|
|
|
shader = radv_find_shader(device, faulty_pc);
|
|
if (!shader)
|
|
return;
|
|
|
|
start_addr = radv_shader_get_va(shader);
|
|
end_addr = start_addr + shader->code_size;
|
|
instr_offset = faulty_pc - start_addr;
|
|
|
|
fprintf(stderr,
|
|
"Faulty shader found "
|
|
"VA=[0x%" PRIx64 "-0x%" PRIx64 "], instr_offset=%d\n",
|
|
start_addr, end_addr, instr_offset);
|
|
|
|
/* Get the list of instructions.
|
|
* Buffer size / 4 is the upper bound of the instruction count.
|
|
*/
|
|
unsigned num_inst = 0;
|
|
struct radv_shader_inst *instructions =
|
|
calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
|
|
|
|
/* Split the disassembly string into instructions. */
|
|
si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
|
|
|
|
/* Print instructions with annotations. */
|
|
for (unsigned i = 0; i < num_inst; i++) {
|
|
struct radv_shader_inst *inst = &instructions[i];
|
|
|
|
if (start_addr + inst->offset == faulty_pc) {
|
|
fprintf(stderr, "\n!!! Faulty instruction below !!!\n");
|
|
fprintf(stderr, "%s\n", inst->text);
|
|
fprintf(stderr, "\n");
|
|
} else {
|
|
fprintf(stderr, "%s\n", inst->text);
|
|
}
|
|
}
|
|
|
|
free(instructions);
|
|
}
|
|
|
|
struct radv_sq_hw_reg {
|
|
uint32_t status;
|
|
uint32_t trap_sts;
|
|
uint32_t hw_id;
|
|
uint32_t ib_sts;
|
|
};
|
|
|
|
static void
|
|
radv_dump_sq_hw_regs(struct radv_device *device)
|
|
{
|
|
struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
|
|
|
|
fprintf(stderr, "\nHardware registers:\n");
|
|
if (device->physical_device->rad_info.chip_class >= GFX10) {
|
|
ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000408_SQ_WAVE_STATUS,
|
|
regs->status, ~0);
|
|
ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00040C_SQ_WAVE_TRAPSTS,
|
|
regs->trap_sts, ~0);
|
|
ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00045C_SQ_WAVE_HW_ID1,
|
|
regs->hw_id, ~0);
|
|
ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00041C_SQ_WAVE_IB_STS,
|
|
regs->ib_sts, ~0);
|
|
} else {
|
|
ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000048_SQ_WAVE_STATUS,
|
|
regs->status, ~0);
|
|
ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00004C_SQ_WAVE_TRAPSTS,
|
|
regs->trap_sts, ~0);
|
|
ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000050_SQ_WAVE_HW_ID,
|
|
regs->hw_id, ~0);
|
|
ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00005C_SQ_WAVE_IB_STS,
|
|
regs->ib_sts, ~0);
|
|
}
|
|
fprintf(stderr, "\n\n");
|
|
}
|
|
|
|
void
|
|
radv_check_trap_handler(struct radv_queue *queue)
|
|
{
|
|
enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
|
|
struct radv_device *device = queue->device;
|
|
struct radeon_winsys *ws = device->ws;
|
|
|
|
/* Wait for the context to be idle in a finite time. */
|
|
ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family);
|
|
|
|
/* Try to detect if the trap handler has been reached by the hw by
|
|
* looking at ttmp0 which should be non-zero if a shader exception
|
|
* happened.
|
|
*/
|
|
if (!device->tma_ptr[4])
|
|
return;
|
|
|
|
#if 0
|
|
fprintf(stderr, "tma_ptr:\n");
|
|
for (unsigned i = 0; i < 10; i++)
|
|
fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]);
|
|
#endif
|
|
|
|
radv_dump_sq_hw_regs(device);
|
|
|
|
uint32_t ttmp0 = device->tma_ptr[4];
|
|
uint32_t ttmp1 = device->tma_ptr[5];
|
|
|
|
/* According to the ISA docs, 3.10 Trap and Exception Registers:
|
|
*
|
|
* "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"
|
|
*
|
|
* "When the trap handler is entered, the PC of the faulting
|
|
* instruction is: (PC - PC_rewind * 4)."
|
|
* */
|
|
uint8_t trap_id = (ttmp1 >> 16) & 0xff;
|
|
uint8_t ht = (ttmp1 >> 24) & 0x1;
|
|
uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
|
|
uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
|
|
|
|
fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht,
|
|
pc_rewind);
|
|
|
|
radv_dump_faulty_shader(device, pc);
|
|
|
|
abort();
|
|
}
|