amd: query cache sizes from the kernel

Also rename l1_cache_size -> tcp_cache_size. L1 means shader array cache.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21641>
This commit is contained in:
Marek Olšák
2023-02-18 03:54:27 -05:00
committed by Marge Bot
parent 6e2e89e6d8
commit 79732416fd
3 changed files with 73 additions and 46 deletions
+68 -45
View File
@@ -990,42 +990,64 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info)
info->tcc_rb_non_coherent = !util_is_power_of_two_or_zero(info->num_tcc_blocks);
switch (info->family) {
case CHIP_TAHITI:
case CHIP_PITCAIRN:
case CHIP_OLAND:
case CHIP_HAWAII:
case CHIP_KABINI:
case CHIP_TONGA:
case CHIP_STONEY:
case CHIP_RAVEN2:
info->l2_cache_size = info->num_tcc_blocks * 64 * 1024;
break;
case CHIP_VERDE:
case CHIP_HAINAN:
case CHIP_BONAIRE:
case CHIP_KAVERI:
case CHIP_ICELAND:
case CHIP_CARRIZO:
case CHIP_FIJI:
case CHIP_POLARIS12:
case CHIP_VEGAM:
case CHIP_GFX1036:
info->l2_cache_size = info->num_tcc_blocks * 128 * 1024;
break;
default:
info->l2_cache_size = info->num_tcc_blocks * 256 * 1024;
break;
case CHIP_REMBRANDT:
case CHIP_GFX1103_R1:
info->l2_cache_size = info->num_tcc_blocks * 512 * 1024;
break;
if (info->drm_minor >= 52) {
info->sqc_inst_cache_size = device_info.sqc_inst_cache_size;
info->sqc_scalar_cache_size = device_info.sqc_data_cache_size;
info->num_sqc_per_wgp = device_info.num_sqc_per_wgp;
}
if (info->gfx_level >= GFX11)
info->l1_cache_size = 32768;
else
info->l1_cache_size = 16384;
if (info->gfx_level >= GFX11 && info->drm_minor >= 52) {
info->tcp_cache_size = device_info.tcp_cache_size;
info->l1_cache_size = device_info.gl1c_cache_size;
info->l2_cache_size = device_info.gl2c_cache_size;
info->l3_cache_size_mb = DIV_ROUND_UP(device_info.mall_size, 1024 * 1024);
} else {
if (info->gfx_level >= GFX11) {
info->tcp_cache_size = 32768;
info->l1_cache_size = 256 * 1024;
} else {
info->tcp_cache_size = 16384;
info->l1_cache_size = 128 * 1024;
}
if (info->gfx_level >= GFX10_3 && info->has_dedicated_vram) {
info->l3_cache_size_mb = info->num_tcc_blocks *
(info->family == CHIP_NAVI21 ||
info->family == CHIP_NAVI22 ? 8 : 4);
}
switch (info->family) {
case CHIP_TAHITI:
case CHIP_PITCAIRN:
case CHIP_OLAND:
case CHIP_HAWAII:
case CHIP_KABINI:
case CHIP_TONGA:
case CHIP_STONEY:
case CHIP_RAVEN2:
info->l2_cache_size = info->num_tcc_blocks * 64 * 1024;
break;
case CHIP_VERDE:
case CHIP_HAINAN:
case CHIP_BONAIRE:
case CHIP_KAVERI:
case CHIP_ICELAND:
case CHIP_CARRIZO:
case CHIP_FIJI:
case CHIP_POLARIS12:
case CHIP_VEGAM:
case CHIP_GFX1036:
info->l2_cache_size = info->num_tcc_blocks * 128 * 1024;
break;
default:
info->l2_cache_size = info->num_tcc_blocks * 256 * 1024;
break;
case CHIP_REMBRANDT:
case CHIP_GFX1103_R1:
info->l2_cache_size = info->num_tcc_blocks * 512 * 1024;
break;
}
}
info->mc_arb_ramcfg = amdinfo.mc_arb_ramcfg;
info->gb_addr_config = amdinfo.gb_addr_cfg;
@@ -1412,12 +1434,6 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info)
}
}
if (info->gfx_level >= GFX10_3 && info->has_dedicated_vram) {
info->l3_cache_size_mb = info->num_tcc_blocks *
(info->family == CHIP_NAVI21 ||
info->family == CHIP_NAVI22 ? 8 : 4);
}
if (info->gfx_level >= GFX11) {
switch (info->family) {
case CHIP_GFX1103_R1:
@@ -1517,12 +1533,19 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
fprintf(f, " max_gpu_freq = %i MHz\n", info->max_gpu_freq_mhz);
fprintf(f, " max_gflops = %u GFLOPS\n", info->max_gflops);
if (info->gfx_level >= GFX10) {
fprintf(f, " l0_cache_size = %i KB\n", DIV_ROUND_UP(info->l1_cache_size, 1024));
fprintf(f, " l1_cache_size = %i KB\n", info->gfx_level >= GFX11 ? 256 : 128);
} else {
fprintf(f, " l1_cache_size = %i KB\n", DIV_ROUND_UP(info->l1_cache_size, 1024));
if (info->sqc_inst_cache_size) {
fprintf(f, " sqc_inst_cache_size = %i KB (%u per WGP)\n",
DIV_ROUND_UP(info->sqc_inst_cache_size, 1024), info->num_sqc_per_wgp);
}
if (info->sqc_scalar_cache_size) {
fprintf(f, " sqc_scalar_cache_size = %i KB (%u per WGP)\n",
DIV_ROUND_UP(info->sqc_scalar_cache_size, 1024), info->num_sqc_per_wgp);
}
fprintf(f, " tcp_cache_size = %i KB\n", DIV_ROUND_UP(info->tcp_cache_size, 1024));
if (info->gfx_level >= GFX10)
fprintf(f, " l1_cache_size = %i KB\n", DIV_ROUND_UP(info->l1_cache_size, 1024));
fprintf(f, " l2_cache_size = %i KB\n", DIV_ROUND_UP(info->l2_cache_size, 1024));
+4
View File
@@ -60,6 +60,10 @@ struct radeon_info {
uint32_t num_cu; /* only enabled CUs */
uint32_t max_gpu_freq_mhz; /* also known as the shader clock */
uint32_t max_gflops;
uint32_t sqc_inst_cache_size;
uint32_t sqc_scalar_cache_size;
uint32_t num_sqc_per_wgp;
uint32_t tcp_cache_size;
uint32_t l1_cache_size;
uint32_t l2_cache_size;
uint32_t l3_cache_size_mb;
+1 -1
View File
@@ -462,7 +462,7 @@ static void ac_sqtt_fill_asic_info(struct radeon_info *rad_info,
chunk->vram_bus_width = rad_info->memory_bus_width;
chunk->vram_size = (uint64_t)rad_info->vram_size_kb * 1024;
chunk->l2_cache_size = rad_info->l2_cache_size;
chunk->l1_cache_size = rad_info->l1_cache_size;
chunk->l1_cache_size = rad_info->tcp_cache_size;
chunk->lds_size = rad_info->lds_size_per_workgroup;
if (rad_info->gfx_level >= GFX10) {
/* RGP expects the LDS size in CU mode. */