ac/gpu_info: create separate function ac_fill_cu_info() to fill out CU info
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38701>
This commit is contained in:
committed by
Marge Bot
parent
749c619c45
commit
6f4e8046b5
@@ -233,6 +233,74 @@ static bool handle_env_var_force_family(struct radeon_info *info)
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
ac_fill_cu_info(struct radeon_info *info, struct drm_amdgpu_info_device *device_info)
|
||||
{
|
||||
struct ac_cu_info *cu_info = &info->cu_info;
|
||||
|
||||
if (info->gfx_level >= GFX10_3)
|
||||
cu_info->max_waves_per_simd = 16;
|
||||
else if (info->gfx_level == GFX10)
|
||||
cu_info->max_waves_per_simd = 20;
|
||||
else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
|
||||
cu_info->max_waves_per_simd = 8;
|
||||
else
|
||||
cu_info->max_waves_per_simd = 10;
|
||||
|
||||
if (info->gfx_level >= GFX10) {
|
||||
cu_info->num_physical_sgprs_per_simd = 108 * cu_info->max_waves_per_simd;
|
||||
cu_info->min_sgpr_alloc = 108;
|
||||
cu_info->max_sgpr_alloc = 108; /* includes VCC, which can be treated as s[106-107] on GFX10+ */
|
||||
cu_info->sgpr_alloc_granularity = 108;
|
||||
} else if (info->family == CHIP_TONGA || info->family == CHIP_ICELAND) {
|
||||
/* SGPRInitBug: Due to a HW bug, we always have to allocate the same amount of SGPRs. */
|
||||
cu_info->num_physical_sgprs_per_simd = 800;
|
||||
cu_info->min_sgpr_alloc = 96;
|
||||
cu_info->max_sgpr_alloc = 96;
|
||||
cu_info->sgpr_alloc_granularity = 96;
|
||||
} else if (info->gfx_level >= GFX8) {
|
||||
cu_info->num_physical_sgprs_per_simd = 800;
|
||||
cu_info->min_sgpr_alloc = 16;
|
||||
cu_info->max_sgpr_alloc = 102;
|
||||
cu_info->sgpr_alloc_granularity = 16;
|
||||
} else {
|
||||
cu_info->num_physical_sgprs_per_simd = 512;
|
||||
cu_info->min_sgpr_alloc = 8;
|
||||
cu_info->max_sgpr_alloc = 104;
|
||||
cu_info->sgpr_alloc_granularity = 8;
|
||||
}
|
||||
|
||||
/* Some GPU info was broken before DRM 3.45.0. */
|
||||
if (info->drm_minor >= 45 && device_info && device_info->num_shader_visible_vgprs) {
|
||||
/* The Gfx10 VGPR count is in Wave32, so divide it by 2 for Wave64.
|
||||
* Gfx6-9 numbers are in Wave64. CDNA also includes Accumulation VGPRs.
|
||||
*/
|
||||
if (info->gfx_level >= GFX10 || (info->gfx_level == GFX9 && info->family >= CHIP_MI100))
|
||||
cu_info->num_physical_wave64_vgprs_per_simd = device_info->num_shader_visible_vgprs / 2;
|
||||
else
|
||||
cu_info->num_physical_wave64_vgprs_per_simd = device_info->num_shader_visible_vgprs;
|
||||
} else {
|
||||
if (info->family == CHIP_NAVI31 || info->family == CHIP_NAVI32 ||
|
||||
info->family == CHIP_STRIX_HALO || info->gfx_level == GFX12) {
|
||||
cu_info->num_physical_wave64_vgprs_per_simd = 768;
|
||||
} else if (info->gfx_level >= GFX10) {
|
||||
cu_info->num_physical_wave64_vgprs_per_simd = 512;
|
||||
} else {
|
||||
cu_info->num_physical_wave64_vgprs_per_simd = 256;
|
||||
}
|
||||
}
|
||||
if (info->gfx_level >= GFX10_3)
|
||||
cu_info->wave64_vgpr_alloc_granularity = cu_info->num_physical_wave64_vgprs_per_simd / 64;
|
||||
else if (info->gfx_level == GFX9 && info->family >= CHIP_MI200)
|
||||
cu_info->wave64_vgpr_alloc_granularity = 8;
|
||||
else
|
||||
cu_info->wave64_vgpr_alloc_granularity = 4;
|
||||
cu_info->min_wave64_vgpr_alloc = cu_info->wave64_vgpr_alloc_granularity;
|
||||
cu_info->max_vgpr_alloc = 256;
|
||||
|
||||
cu_info->num_simd_per_compute_unit = info->gfx_level >= GFX10 ? 2 : 4;
|
||||
}
|
||||
|
||||
enum ac_query_gpu_info_result
|
||||
ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
||||
bool require_pci_bus_info)
|
||||
@@ -1259,39 +1327,6 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
||||
}
|
||||
}
|
||||
|
||||
/* Fill ac_cu_info */
|
||||
if (info->gfx_level >= GFX10_3)
|
||||
info->cu_info.max_waves_per_simd = 16;
|
||||
else if (info->gfx_level == GFX10)
|
||||
info->cu_info.max_waves_per_simd = 20;
|
||||
else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
|
||||
info->cu_info.max_waves_per_simd = 8;
|
||||
else
|
||||
info->cu_info.max_waves_per_simd = 10;
|
||||
|
||||
if (info->gfx_level >= GFX10) {
|
||||
info->cu_info.num_physical_sgprs_per_simd = 108 * info->cu_info.max_waves_per_simd;
|
||||
info->cu_info.min_sgpr_alloc = 108;
|
||||
info->cu_info.max_sgpr_alloc = 108; /* includes VCC, which can be treated as s[106-107] on GFX10+ */
|
||||
info->cu_info.sgpr_alloc_granularity = 108;
|
||||
} else if (info->family == CHIP_TONGA || info->family == CHIP_ICELAND) {
|
||||
/* SGPRInitBug: Due to a HW bug, we always have to allocate the same amount of SGPRs. */
|
||||
info->cu_info.num_physical_sgprs_per_simd = 800;
|
||||
info->cu_info.min_sgpr_alloc = 96;
|
||||
info->cu_info.max_sgpr_alloc = 96;
|
||||
info->cu_info.sgpr_alloc_granularity = 96;
|
||||
} else if (info->gfx_level >= GFX8) {
|
||||
info->cu_info.num_physical_sgprs_per_simd = 800;
|
||||
info->cu_info.min_sgpr_alloc = 16;
|
||||
info->cu_info.max_sgpr_alloc = 102;
|
||||
info->cu_info.sgpr_alloc_granularity = 16;
|
||||
} else {
|
||||
info->cu_info.num_physical_sgprs_per_simd = 512;
|
||||
info->cu_info.min_sgpr_alloc = 8;
|
||||
info->cu_info.max_sgpr_alloc = 104;
|
||||
info->cu_info.sgpr_alloc_granularity = 8;
|
||||
}
|
||||
|
||||
info->has_3d_cube_border_color_mipmap = info->has_graphics || info->family == CHIP_MI100;
|
||||
info->has_image_opcodes = debug_get_bool_option("AMD_IMAGE_OPCODES",
|
||||
info->has_graphics || info->family < CHIP_GFX940);
|
||||
@@ -1308,35 +1343,7 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
||||
/* On GFX10.3, the polarity of AUTO_FLUSH_MODE is inverted. */
|
||||
info->has_sqtt_auto_flush_mode_bug = info->gfx_level == GFX10_3;
|
||||
|
||||
/* Some GPU info was broken before DRM 3.45.0. */
|
||||
if (info->drm_minor >= 45 && device_info.num_shader_visible_vgprs) {
|
||||
/* The Gfx10 VGPR count is in Wave32, so divide it by 2 for Wave64.
|
||||
* Gfx6-9 numbers are in Wave64. CDNA also includes Accumulation VGPRs.
|
||||
*/
|
||||
if (info->gfx_level >= GFX10 || (info->gfx_level == GFX9 && info->family >= CHIP_MI100))
|
||||
info->cu_info.num_physical_wave64_vgprs_per_simd = device_info.num_shader_visible_vgprs / 2;
|
||||
else
|
||||
info->cu_info.num_physical_wave64_vgprs_per_simd = device_info.num_shader_visible_vgprs;
|
||||
} else {
|
||||
if (info->family == CHIP_NAVI31 || info->family == CHIP_NAVI32 ||
|
||||
info->family == CHIP_STRIX_HALO || info->gfx_level == GFX12) {
|
||||
info->cu_info.num_physical_wave64_vgprs_per_simd = 768;
|
||||
} else if (info->gfx_level >= GFX10) {
|
||||
info->cu_info.num_physical_wave64_vgprs_per_simd = 512;
|
||||
} else {
|
||||
info->cu_info.num_physical_wave64_vgprs_per_simd = 256;
|
||||
}
|
||||
}
|
||||
if (info->gfx_level >= GFX10_3)
|
||||
info->cu_info.wave64_vgpr_alloc_granularity = info->cu_info.num_physical_wave64_vgprs_per_simd / 64;
|
||||
else if (info->gfx_level == GFX9 && info->family >= CHIP_MI200)
|
||||
info->cu_info.wave64_vgpr_alloc_granularity = 8;
|
||||
else
|
||||
info->cu_info.wave64_vgpr_alloc_granularity = 4;
|
||||
info->cu_info.min_wave64_vgpr_alloc = info->cu_info.wave64_vgpr_alloc_granularity;
|
||||
info->cu_info.max_vgpr_alloc = 256;
|
||||
|
||||
info->cu_info.num_simd_per_compute_unit = info->gfx_level >= GFX10 ? 2 : 4;
|
||||
ac_fill_cu_info(info, &device_info);
|
||||
|
||||
/* BIG_PAGE is supported since gfx10.3 and requires VRAM. VRAM is only guaranteed
|
||||
* with AMDGPU_GEM_CREATE_DISCARDABLE. DISCARDABLE was added in DRM 3.47.0.
|
||||
|
||||
@@ -20,6 +20,7 @@ extern "C" {
|
||||
#define AMD_MAX_WGP 60
|
||||
|
||||
struct amdgpu_gpu_info;
|
||||
struct drm_amdgpu_info_device;
|
||||
|
||||
struct amd_ip_info {
|
||||
uint8_t ver_major;
|
||||
@@ -364,6 +365,7 @@ enum ac_query_gpu_info_result {
|
||||
|
||||
enum ac_query_gpu_info_result ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
||||
bool require_pci_bus_info);
|
||||
void ac_fill_cu_info(struct radeon_info *info, struct drm_amdgpu_info_device *device_info);
|
||||
|
||||
void ac_compute_driver_uuid(char *uuid, size_t size);
|
||||
|
||||
|
||||
@@ -46,21 +46,6 @@ ac_null_device_create(struct radeon_info *gpu_info, const char *family)
|
||||
gpu_info->pci_id = pci_ids[gpu_info->family].pci_id;
|
||||
gpu_info->max_se = pci_ids[gpu_info->family].has_dedicated_vram ? 4 : 1;
|
||||
gpu_info->num_se = gpu_info->max_se;
|
||||
if (gpu_info->gfx_level >= GFX10_3)
|
||||
gpu_info->cu_info.max_waves_per_simd = 16;
|
||||
else if (gpu_info->gfx_level >= GFX10)
|
||||
gpu_info->cu_info.max_waves_per_simd = 20;
|
||||
else if (gpu_info->family >= CHIP_POLARIS10 && gpu_info->family <= CHIP_VEGAM)
|
||||
gpu_info->cu_info.max_waves_per_simd = 8;
|
||||
else
|
||||
gpu_info->cu_info.max_waves_per_simd = 10;
|
||||
|
||||
if (gpu_info->gfx_level >= GFX10)
|
||||
gpu_info->cu_info.num_physical_sgprs_per_simd = 128 * gpu_info->cu_info.max_waves_per_simd;
|
||||
else if (gpu_info->gfx_level >= GFX8)
|
||||
gpu_info->cu_info.num_physical_sgprs_per_simd = 800;
|
||||
else
|
||||
gpu_info->cu_info.num_physical_sgprs_per_simd = 512;
|
||||
|
||||
gpu_info->has_timeline_syncobj = true;
|
||||
gpu_info->has_vm_always_valid = true;
|
||||
@@ -71,13 +56,6 @@ ac_null_device_create(struct radeon_info *gpu_info, const char *family)
|
||||
gpu_info->has_ngg_fully_culled_bug = gpu_info->gfx_level == GFX10;
|
||||
gpu_info->has_ngg_passthru_no_msg = gpu_info->family >= CHIP_NAVI23;
|
||||
|
||||
if (gpu_info->family == CHIP_NAVI31 || gpu_info->family == CHIP_NAVI32 || gpu_info->gfx_level >= GFX12)
|
||||
gpu_info->cu_info.num_physical_wave64_vgprs_per_simd = 768;
|
||||
else if (gpu_info->gfx_level >= GFX10)
|
||||
gpu_info->cu_info.num_physical_wave64_vgprs_per_simd = 512;
|
||||
else
|
||||
gpu_info->cu_info.num_physical_wave64_vgprs_per_simd = 256;
|
||||
gpu_info->cu_info.num_simd_per_compute_unit = gpu_info->gfx_level >= GFX10 ? 2 : 4;
|
||||
gpu_info->lds_size_per_workgroup = gpu_info->gfx_level >= GFX7 ? 64 * 1024 : 32 * 1024;
|
||||
gpu_info->max_render_backends = pci_ids[gpu_info->family].num_render_backends;
|
||||
|
||||
@@ -114,6 +92,9 @@ ac_null_device_create(struct radeon_info *gpu_info, const char *family)
|
||||
gpu_info->ip[AMD_IP_GFX].num_queues = 1;
|
||||
|
||||
gpu_info->gart_page_size = 4096;
|
||||
|
||||
ac_fill_cu_info(gpu_info, NULL);
|
||||
|
||||
gpu_info->family_overridden = true;
|
||||
|
||||
return true;
|
||||
|
||||
@@ -3,10 +3,12 @@
|
||||
|
||||
libradeonwinsys_deps = [idep_mesautil, dep_libdrm]
|
||||
libradeonwinsys_c_args = []
|
||||
amd_common_libs = []
|
||||
|
||||
if with_gallium_radeonsi
|
||||
libradeonwinsys_deps += [idep_amdgfxregs_h]
|
||||
libradeonwinsys_c_args = ['-DHAVE_GALLIUM_RADEONSI']
|
||||
amd_common_libs += [libamd_common]
|
||||
endif
|
||||
|
||||
libradeonwinsys = static_library(
|
||||
@@ -22,6 +24,7 @@ libradeonwinsys = static_library(
|
||||
'radeon_surface.h'),
|
||||
include_directories : [inc_src, inc_include, inc_gallium, inc_gallium_aux],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
link_with : amd_common_libs,
|
||||
c_args : libradeonwinsys_c_args,
|
||||
dependencies : libradeonwinsys_deps,
|
||||
)
|
||||
|
||||
@@ -632,9 +632,6 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
||||
ws->info.max_alignment = 1024*1024;
|
||||
ws->info.has_graphics = true;
|
||||
ws->info.cpdma_prefetch_writes_memory = true;
|
||||
ws->info.cu_info.max_waves_per_simd = 10;
|
||||
ws->info.cu_info.num_physical_sgprs_per_simd = 512;
|
||||
ws->info.cu_info.num_physical_wave64_vgprs_per_simd = 256;
|
||||
ws->info.has_3d_cube_border_color_mipmap = true;
|
||||
ws->info.has_image_opcodes = true;
|
||||
ws->info.spi_cu_en_has_effect = false;
|
||||
@@ -644,15 +641,12 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
||||
ws->info.max_gflops = 128 * ws->info.num_cu * ws->info.max_gpu_freq_mhz / 1000;
|
||||
ws->info.num_tcc_blocks = ws->info.max_tcc_blocks;
|
||||
ws->info.tcp_cache_size = 16 * 1024;
|
||||
ws->info.cu_info.num_simd_per_compute_unit = 4;
|
||||
ws->info.cu_info.min_sgpr_alloc = 8;
|
||||
ws->info.cu_info.max_sgpr_alloc = 104;
|
||||
ws->info.cu_info.sgpr_alloc_granularity = 8;
|
||||
ws->info.cu_info.min_wave64_vgpr_alloc = 4;
|
||||
ws->info.cu_info.max_vgpr_alloc = 256;
|
||||
ws->info.cu_info.wave64_vgpr_alloc_granularity = 4;
|
||||
ws->info.lds_size_per_workgroup = ws->info.gfx_level == GFX7 ? 64 * 1024 : 32 * 1024;
|
||||
|
||||
#ifdef HAVE_GALLIUM_RADEONSI
|
||||
ac_fill_cu_info(&ws->info, NULL);
|
||||
#endif
|
||||
|
||||
for (unsigned se = 0; se < ws->info.max_se; se++) {
|
||||
for (unsigned sa = 0; sa < ws->info.max_sa_per_se; sa++)
|
||||
ws->info.cu_mask[se][sa] = BITFIELD_MASK(ws->info.max_good_cu_per_sa);
|
||||
|
||||
Reference in New Issue
Block a user