ac/gpu_info: create separate function ac_fill_cu_info() to fill out CU info

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38701>
This commit is contained in:
Daniel Schürmann
2025-11-26 19:34:07 +01:00
committed by Marge Bot
parent 749c619c45
commit 6f4e8046b5
5 changed files with 81 additions and 94 deletions

View File

@@ -233,6 +233,74 @@ static bool handle_env_var_force_family(struct radeon_info *info)
return true;
}
void
ac_fill_cu_info(struct radeon_info *info, struct drm_amdgpu_info_device *device_info)
{
struct ac_cu_info *cu_info = &info->cu_info;
if (info->gfx_level >= GFX10_3)
cu_info->max_waves_per_simd = 16;
else if (info->gfx_level == GFX10)
cu_info->max_waves_per_simd = 20;
else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
cu_info->max_waves_per_simd = 8;
else
cu_info->max_waves_per_simd = 10;
if (info->gfx_level >= GFX10) {
cu_info->num_physical_sgprs_per_simd = 108 * cu_info->max_waves_per_simd;
cu_info->min_sgpr_alloc = 108;
cu_info->max_sgpr_alloc = 108; /* includes VCC, which can be treated as s[106-107] on GFX10+ */
cu_info->sgpr_alloc_granularity = 108;
} else if (info->family == CHIP_TONGA || info->family == CHIP_ICELAND) {
/* SGPRInitBug: Due to a HW bug, we always have to allocate the same amount of SGPRs. */
cu_info->num_physical_sgprs_per_simd = 800;
cu_info->min_sgpr_alloc = 96;
cu_info->max_sgpr_alloc = 96;
cu_info->sgpr_alloc_granularity = 96;
} else if (info->gfx_level >= GFX8) {
cu_info->num_physical_sgprs_per_simd = 800;
cu_info->min_sgpr_alloc = 16;
cu_info->max_sgpr_alloc = 102;
cu_info->sgpr_alloc_granularity = 16;
} else {
cu_info->num_physical_sgprs_per_simd = 512;
cu_info->min_sgpr_alloc = 8;
cu_info->max_sgpr_alloc = 104;
cu_info->sgpr_alloc_granularity = 8;
}
/* Some GPU info was broken before DRM 3.45.0. */
if (info->drm_minor >= 45 && device_info && device_info->num_shader_visible_vgprs) {
/* The Gfx10 VGPR count is in Wave32, so divide it by 2 for Wave64.
* Gfx6-9 numbers are in Wave64. CDNA also includes Accumulation VGPRs.
*/
if (info->gfx_level >= GFX10 || (info->gfx_level == GFX9 && info->family >= CHIP_MI100))
cu_info->num_physical_wave64_vgprs_per_simd = device_info->num_shader_visible_vgprs / 2;
else
cu_info->num_physical_wave64_vgprs_per_simd = device_info->num_shader_visible_vgprs;
} else {
if (info->family == CHIP_NAVI31 || info->family == CHIP_NAVI32 ||
info->family == CHIP_STRIX_HALO || info->gfx_level == GFX12) {
cu_info->num_physical_wave64_vgprs_per_simd = 768;
} else if (info->gfx_level >= GFX10) {
cu_info->num_physical_wave64_vgprs_per_simd = 512;
} else {
cu_info->num_physical_wave64_vgprs_per_simd = 256;
}
}
if (info->gfx_level >= GFX10_3)
cu_info->wave64_vgpr_alloc_granularity = cu_info->num_physical_wave64_vgprs_per_simd / 64;
else if (info->gfx_level == GFX9 && info->family >= CHIP_MI200)
cu_info->wave64_vgpr_alloc_granularity = 8;
else
cu_info->wave64_vgpr_alloc_granularity = 4;
cu_info->min_wave64_vgpr_alloc = cu_info->wave64_vgpr_alloc_granularity;
cu_info->max_vgpr_alloc = 256;
cu_info->num_simd_per_compute_unit = info->gfx_level >= GFX10 ? 2 : 4;
}
enum ac_query_gpu_info_result
ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
bool require_pci_bus_info)
@@ -1259,39 +1327,6 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
}
}
/* Fill ac_cu_info */
if (info->gfx_level >= GFX10_3)
info->cu_info.max_waves_per_simd = 16;
else if (info->gfx_level == GFX10)
info->cu_info.max_waves_per_simd = 20;
else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
info->cu_info.max_waves_per_simd = 8;
else
info->cu_info.max_waves_per_simd = 10;
if (info->gfx_level >= GFX10) {
info->cu_info.num_physical_sgprs_per_simd = 108 * info->cu_info.max_waves_per_simd;
info->cu_info.min_sgpr_alloc = 108;
info->cu_info.max_sgpr_alloc = 108; /* includes VCC, which can be treated as s[106-107] on GFX10+ */
info->cu_info.sgpr_alloc_granularity = 108;
} else if (info->family == CHIP_TONGA || info->family == CHIP_ICELAND) {
/* SGPRInitBug: Due to a HW bug, we always have to allocate the same amount of SGPRs. */
info->cu_info.num_physical_sgprs_per_simd = 800;
info->cu_info.min_sgpr_alloc = 96;
info->cu_info.max_sgpr_alloc = 96;
info->cu_info.sgpr_alloc_granularity = 96;
} else if (info->gfx_level >= GFX8) {
info->cu_info.num_physical_sgprs_per_simd = 800;
info->cu_info.min_sgpr_alloc = 16;
info->cu_info.max_sgpr_alloc = 102;
info->cu_info.sgpr_alloc_granularity = 16;
} else {
info->cu_info.num_physical_sgprs_per_simd = 512;
info->cu_info.min_sgpr_alloc = 8;
info->cu_info.max_sgpr_alloc = 104;
info->cu_info.sgpr_alloc_granularity = 8;
}
info->has_3d_cube_border_color_mipmap = info->has_graphics || info->family == CHIP_MI100;
info->has_image_opcodes = debug_get_bool_option("AMD_IMAGE_OPCODES",
info->has_graphics || info->family < CHIP_GFX940);
@@ -1308,35 +1343,7 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
/* On GFX10.3, the polarity of AUTO_FLUSH_MODE is inverted. */
info->has_sqtt_auto_flush_mode_bug = info->gfx_level == GFX10_3;
/* Some GPU info was broken before DRM 3.45.0. */
if (info->drm_minor >= 45 && device_info.num_shader_visible_vgprs) {
/* The Gfx10 VGPR count is in Wave32, so divide it by 2 for Wave64.
* Gfx6-9 numbers are in Wave64. CDNA also includes Accumulation VGPRs.
*/
if (info->gfx_level >= GFX10 || (info->gfx_level == GFX9 && info->family >= CHIP_MI100))
info->cu_info.num_physical_wave64_vgprs_per_simd = device_info.num_shader_visible_vgprs / 2;
else
info->cu_info.num_physical_wave64_vgprs_per_simd = device_info.num_shader_visible_vgprs;
} else {
if (info->family == CHIP_NAVI31 || info->family == CHIP_NAVI32 ||
info->family == CHIP_STRIX_HALO || info->gfx_level == GFX12) {
info->cu_info.num_physical_wave64_vgprs_per_simd = 768;
} else if (info->gfx_level >= GFX10) {
info->cu_info.num_physical_wave64_vgprs_per_simd = 512;
} else {
info->cu_info.num_physical_wave64_vgprs_per_simd = 256;
}
}
if (info->gfx_level >= GFX10_3)
info->cu_info.wave64_vgpr_alloc_granularity = info->cu_info.num_physical_wave64_vgprs_per_simd / 64;
else if (info->gfx_level == GFX9 && info->family >= CHIP_MI200)
info->cu_info.wave64_vgpr_alloc_granularity = 8;
else
info->cu_info.wave64_vgpr_alloc_granularity = 4;
info->cu_info.min_wave64_vgpr_alloc = info->cu_info.wave64_vgpr_alloc_granularity;
info->cu_info.max_vgpr_alloc = 256;
info->cu_info.num_simd_per_compute_unit = info->gfx_level >= GFX10 ? 2 : 4;
ac_fill_cu_info(info, &device_info);
/* BIG_PAGE is supported since gfx10.3 and requires VRAM. VRAM is only guaranteed
* with AMDGPU_GEM_CREATE_DISCARDABLE. DISCARDABLE was added in DRM 3.47.0.

View File

@@ -20,6 +20,7 @@ extern "C" {
#define AMD_MAX_WGP 60
struct amdgpu_gpu_info;
struct drm_amdgpu_info_device;
struct amd_ip_info {
uint8_t ver_major;
@@ -364,6 +365,7 @@ enum ac_query_gpu_info_result {
enum ac_query_gpu_info_result ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
bool require_pci_bus_info);
void ac_fill_cu_info(struct radeon_info *info, struct drm_amdgpu_info_device *device_info);
void ac_compute_driver_uuid(char *uuid, size_t size);

View File

@@ -46,21 +46,6 @@ ac_null_device_create(struct radeon_info *gpu_info, const char *family)
gpu_info->pci_id = pci_ids[gpu_info->family].pci_id;
gpu_info->max_se = pci_ids[gpu_info->family].has_dedicated_vram ? 4 : 1;
gpu_info->num_se = gpu_info->max_se;
if (gpu_info->gfx_level >= GFX10_3)
gpu_info->cu_info.max_waves_per_simd = 16;
else if (gpu_info->gfx_level >= GFX10)
gpu_info->cu_info.max_waves_per_simd = 20;
else if (gpu_info->family >= CHIP_POLARIS10 && gpu_info->family <= CHIP_VEGAM)
gpu_info->cu_info.max_waves_per_simd = 8;
else
gpu_info->cu_info.max_waves_per_simd = 10;
if (gpu_info->gfx_level >= GFX10)
gpu_info->cu_info.num_physical_sgprs_per_simd = 128 * gpu_info->cu_info.max_waves_per_simd;
else if (gpu_info->gfx_level >= GFX8)
gpu_info->cu_info.num_physical_sgprs_per_simd = 800;
else
gpu_info->cu_info.num_physical_sgprs_per_simd = 512;
gpu_info->has_timeline_syncobj = true;
gpu_info->has_vm_always_valid = true;
@@ -71,13 +56,6 @@ ac_null_device_create(struct radeon_info *gpu_info, const char *family)
gpu_info->has_ngg_fully_culled_bug = gpu_info->gfx_level == GFX10;
gpu_info->has_ngg_passthru_no_msg = gpu_info->family >= CHIP_NAVI23;
if (gpu_info->family == CHIP_NAVI31 || gpu_info->family == CHIP_NAVI32 || gpu_info->gfx_level >= GFX12)
gpu_info->cu_info.num_physical_wave64_vgprs_per_simd = 768;
else if (gpu_info->gfx_level >= GFX10)
gpu_info->cu_info.num_physical_wave64_vgprs_per_simd = 512;
else
gpu_info->cu_info.num_physical_wave64_vgprs_per_simd = 256;
gpu_info->cu_info.num_simd_per_compute_unit = gpu_info->gfx_level >= GFX10 ? 2 : 4;
gpu_info->lds_size_per_workgroup = gpu_info->gfx_level >= GFX7 ? 64 * 1024 : 32 * 1024;
gpu_info->max_render_backends = pci_ids[gpu_info->family].num_render_backends;
@@ -114,6 +92,9 @@ ac_null_device_create(struct radeon_info *gpu_info, const char *family)
gpu_info->ip[AMD_IP_GFX].num_queues = 1;
gpu_info->gart_page_size = 4096;
ac_fill_cu_info(gpu_info, NULL);
gpu_info->family_overridden = true;
return true;

View File

@@ -3,10 +3,12 @@
libradeonwinsys_deps = [idep_mesautil, dep_libdrm]
libradeonwinsys_c_args = []
amd_common_libs = []
if with_gallium_radeonsi
libradeonwinsys_deps += [idep_amdgfxregs_h]
libradeonwinsys_c_args = ['-DHAVE_GALLIUM_RADEONSI']
amd_common_libs += [libamd_common]
endif
libradeonwinsys = static_library(
@@ -22,6 +24,7 @@ libradeonwinsys = static_library(
'radeon_surface.h'),
include_directories : [inc_src, inc_include, inc_gallium, inc_gallium_aux],
gnu_symbol_visibility : 'hidden',
link_with : amd_common_libs,
c_args : libradeonwinsys_c_args,
dependencies : libradeonwinsys_deps,
)

View File

@@ -632,9 +632,6 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
ws->info.max_alignment = 1024*1024;
ws->info.has_graphics = true;
ws->info.cpdma_prefetch_writes_memory = true;
ws->info.cu_info.max_waves_per_simd = 10;
ws->info.cu_info.num_physical_sgprs_per_simd = 512;
ws->info.cu_info.num_physical_wave64_vgprs_per_simd = 256;
ws->info.has_3d_cube_border_color_mipmap = true;
ws->info.has_image_opcodes = true;
ws->info.spi_cu_en_has_effect = false;
@@ -644,15 +641,12 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
ws->info.max_gflops = 128 * ws->info.num_cu * ws->info.max_gpu_freq_mhz / 1000;
ws->info.num_tcc_blocks = ws->info.max_tcc_blocks;
ws->info.tcp_cache_size = 16 * 1024;
ws->info.cu_info.num_simd_per_compute_unit = 4;
ws->info.cu_info.min_sgpr_alloc = 8;
ws->info.cu_info.max_sgpr_alloc = 104;
ws->info.cu_info.sgpr_alloc_granularity = 8;
ws->info.cu_info.min_wave64_vgpr_alloc = 4;
ws->info.cu_info.max_vgpr_alloc = 256;
ws->info.cu_info.wave64_vgpr_alloc_granularity = 4;
ws->info.lds_size_per_workgroup = ws->info.gfx_level == GFX7 ? 64 * 1024 : 32 * 1024;
#ifdef HAVE_GALLIUM_RADEONSI
ac_fill_cu_info(&ws->info, NULL);
#endif
for (unsigned se = 0; se < ws->info.max_se; se++) {
for (unsigned sa = 0; sa < ws->info.max_sa_per_se; sa++)
ws->info.cu_mask[se][sa] = BITFIELD_MASK(ws->info.max_good_cu_per_sa);