ac/gpu_info: create separate function ac_fill_cu_info() to fill out CU info
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38701>
This commit is contained in:
committed by
Marge Bot
parent
749c619c45
commit
6f4e8046b5
@@ -233,6 +233,74 @@ static bool handle_env_var_force_family(struct radeon_info *info)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ac_fill_cu_info(struct radeon_info *info, struct drm_amdgpu_info_device *device_info)
|
||||||
|
{
|
||||||
|
struct ac_cu_info *cu_info = &info->cu_info;
|
||||||
|
|
||||||
|
if (info->gfx_level >= GFX10_3)
|
||||||
|
cu_info->max_waves_per_simd = 16;
|
||||||
|
else if (info->gfx_level == GFX10)
|
||||||
|
cu_info->max_waves_per_simd = 20;
|
||||||
|
else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
|
||||||
|
cu_info->max_waves_per_simd = 8;
|
||||||
|
else
|
||||||
|
cu_info->max_waves_per_simd = 10;
|
||||||
|
|
||||||
|
if (info->gfx_level >= GFX10) {
|
||||||
|
cu_info->num_physical_sgprs_per_simd = 108 * cu_info->max_waves_per_simd;
|
||||||
|
cu_info->min_sgpr_alloc = 108;
|
||||||
|
cu_info->max_sgpr_alloc = 108; /* includes VCC, which can be treated as s[106-107] on GFX10+ */
|
||||||
|
cu_info->sgpr_alloc_granularity = 108;
|
||||||
|
} else if (info->family == CHIP_TONGA || info->family == CHIP_ICELAND) {
|
||||||
|
/* SGPRInitBug: Due to a HW bug, we always have to allocate the same amount of SGPRs. */
|
||||||
|
cu_info->num_physical_sgprs_per_simd = 800;
|
||||||
|
cu_info->min_sgpr_alloc = 96;
|
||||||
|
cu_info->max_sgpr_alloc = 96;
|
||||||
|
cu_info->sgpr_alloc_granularity = 96;
|
||||||
|
} else if (info->gfx_level >= GFX8) {
|
||||||
|
cu_info->num_physical_sgprs_per_simd = 800;
|
||||||
|
cu_info->min_sgpr_alloc = 16;
|
||||||
|
cu_info->max_sgpr_alloc = 102;
|
||||||
|
cu_info->sgpr_alloc_granularity = 16;
|
||||||
|
} else {
|
||||||
|
cu_info->num_physical_sgprs_per_simd = 512;
|
||||||
|
cu_info->min_sgpr_alloc = 8;
|
||||||
|
cu_info->max_sgpr_alloc = 104;
|
||||||
|
cu_info->sgpr_alloc_granularity = 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Some GPU info was broken before DRM 3.45.0. */
|
||||||
|
if (info->drm_minor >= 45 && device_info && device_info->num_shader_visible_vgprs) {
|
||||||
|
/* The Gfx10 VGPR count is in Wave32, so divide it by 2 for Wave64.
|
||||||
|
* Gfx6-9 numbers are in Wave64. CDNA also includes Accumulation VGPRs.
|
||||||
|
*/
|
||||||
|
if (info->gfx_level >= GFX10 || (info->gfx_level == GFX9 && info->family >= CHIP_MI100))
|
||||||
|
cu_info->num_physical_wave64_vgprs_per_simd = device_info->num_shader_visible_vgprs / 2;
|
||||||
|
else
|
||||||
|
cu_info->num_physical_wave64_vgprs_per_simd = device_info->num_shader_visible_vgprs;
|
||||||
|
} else {
|
||||||
|
if (info->family == CHIP_NAVI31 || info->family == CHIP_NAVI32 ||
|
||||||
|
info->family == CHIP_STRIX_HALO || info->gfx_level == GFX12) {
|
||||||
|
cu_info->num_physical_wave64_vgprs_per_simd = 768;
|
||||||
|
} else if (info->gfx_level >= GFX10) {
|
||||||
|
cu_info->num_physical_wave64_vgprs_per_simd = 512;
|
||||||
|
} else {
|
||||||
|
cu_info->num_physical_wave64_vgprs_per_simd = 256;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (info->gfx_level >= GFX10_3)
|
||||||
|
cu_info->wave64_vgpr_alloc_granularity = cu_info->num_physical_wave64_vgprs_per_simd / 64;
|
||||||
|
else if (info->gfx_level == GFX9 && info->family >= CHIP_MI200)
|
||||||
|
cu_info->wave64_vgpr_alloc_granularity = 8;
|
||||||
|
else
|
||||||
|
cu_info->wave64_vgpr_alloc_granularity = 4;
|
||||||
|
cu_info->min_wave64_vgpr_alloc = cu_info->wave64_vgpr_alloc_granularity;
|
||||||
|
cu_info->max_vgpr_alloc = 256;
|
||||||
|
|
||||||
|
cu_info->num_simd_per_compute_unit = info->gfx_level >= GFX10 ? 2 : 4;
|
||||||
|
}
|
||||||
|
|
||||||
enum ac_query_gpu_info_result
|
enum ac_query_gpu_info_result
|
||||||
ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
||||||
bool require_pci_bus_info)
|
bool require_pci_bus_info)
|
||||||
@@ -1259,39 +1327,6 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Fill ac_cu_info */
|
|
||||||
if (info->gfx_level >= GFX10_3)
|
|
||||||
info->cu_info.max_waves_per_simd = 16;
|
|
||||||
else if (info->gfx_level == GFX10)
|
|
||||||
info->cu_info.max_waves_per_simd = 20;
|
|
||||||
else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
|
|
||||||
info->cu_info.max_waves_per_simd = 8;
|
|
||||||
else
|
|
||||||
info->cu_info.max_waves_per_simd = 10;
|
|
||||||
|
|
||||||
if (info->gfx_level >= GFX10) {
|
|
||||||
info->cu_info.num_physical_sgprs_per_simd = 108 * info->cu_info.max_waves_per_simd;
|
|
||||||
info->cu_info.min_sgpr_alloc = 108;
|
|
||||||
info->cu_info.max_sgpr_alloc = 108; /* includes VCC, which can be treated as s[106-107] on GFX10+ */
|
|
||||||
info->cu_info.sgpr_alloc_granularity = 108;
|
|
||||||
} else if (info->family == CHIP_TONGA || info->family == CHIP_ICELAND) {
|
|
||||||
/* SGPRInitBug: Due to a HW bug, we always have to allocate the same amount of SGPRs. */
|
|
||||||
info->cu_info.num_physical_sgprs_per_simd = 800;
|
|
||||||
info->cu_info.min_sgpr_alloc = 96;
|
|
||||||
info->cu_info.max_sgpr_alloc = 96;
|
|
||||||
info->cu_info.sgpr_alloc_granularity = 96;
|
|
||||||
} else if (info->gfx_level >= GFX8) {
|
|
||||||
info->cu_info.num_physical_sgprs_per_simd = 800;
|
|
||||||
info->cu_info.min_sgpr_alloc = 16;
|
|
||||||
info->cu_info.max_sgpr_alloc = 102;
|
|
||||||
info->cu_info.sgpr_alloc_granularity = 16;
|
|
||||||
} else {
|
|
||||||
info->cu_info.num_physical_sgprs_per_simd = 512;
|
|
||||||
info->cu_info.min_sgpr_alloc = 8;
|
|
||||||
info->cu_info.max_sgpr_alloc = 104;
|
|
||||||
info->cu_info.sgpr_alloc_granularity = 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
info->has_3d_cube_border_color_mipmap = info->has_graphics || info->family == CHIP_MI100;
|
info->has_3d_cube_border_color_mipmap = info->has_graphics || info->family == CHIP_MI100;
|
||||||
info->has_image_opcodes = debug_get_bool_option("AMD_IMAGE_OPCODES",
|
info->has_image_opcodes = debug_get_bool_option("AMD_IMAGE_OPCODES",
|
||||||
info->has_graphics || info->family < CHIP_GFX940);
|
info->has_graphics || info->family < CHIP_GFX940);
|
||||||
@@ -1308,35 +1343,7 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||||||
/* On GFX10.3, the polarity of AUTO_FLUSH_MODE is inverted. */
|
/* On GFX10.3, the polarity of AUTO_FLUSH_MODE is inverted. */
|
||||||
info->has_sqtt_auto_flush_mode_bug = info->gfx_level == GFX10_3;
|
info->has_sqtt_auto_flush_mode_bug = info->gfx_level == GFX10_3;
|
||||||
|
|
||||||
/* Some GPU info was broken before DRM 3.45.0. */
|
ac_fill_cu_info(info, &device_info);
|
||||||
if (info->drm_minor >= 45 && device_info.num_shader_visible_vgprs) {
|
|
||||||
/* The Gfx10 VGPR count is in Wave32, so divide it by 2 for Wave64.
|
|
||||||
* Gfx6-9 numbers are in Wave64. CDNA also includes Accumulation VGPRs.
|
|
||||||
*/
|
|
||||||
if (info->gfx_level >= GFX10 || (info->gfx_level == GFX9 && info->family >= CHIP_MI100))
|
|
||||||
info->cu_info.num_physical_wave64_vgprs_per_simd = device_info.num_shader_visible_vgprs / 2;
|
|
||||||
else
|
|
||||||
info->cu_info.num_physical_wave64_vgprs_per_simd = device_info.num_shader_visible_vgprs;
|
|
||||||
} else {
|
|
||||||
if (info->family == CHIP_NAVI31 || info->family == CHIP_NAVI32 ||
|
|
||||||
info->family == CHIP_STRIX_HALO || info->gfx_level == GFX12) {
|
|
||||||
info->cu_info.num_physical_wave64_vgprs_per_simd = 768;
|
|
||||||
} else if (info->gfx_level >= GFX10) {
|
|
||||||
info->cu_info.num_physical_wave64_vgprs_per_simd = 512;
|
|
||||||
} else {
|
|
||||||
info->cu_info.num_physical_wave64_vgprs_per_simd = 256;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (info->gfx_level >= GFX10_3)
|
|
||||||
info->cu_info.wave64_vgpr_alloc_granularity = info->cu_info.num_physical_wave64_vgprs_per_simd / 64;
|
|
||||||
else if (info->gfx_level == GFX9 && info->family >= CHIP_MI200)
|
|
||||||
info->cu_info.wave64_vgpr_alloc_granularity = 8;
|
|
||||||
else
|
|
||||||
info->cu_info.wave64_vgpr_alloc_granularity = 4;
|
|
||||||
info->cu_info.min_wave64_vgpr_alloc = info->cu_info.wave64_vgpr_alloc_granularity;
|
|
||||||
info->cu_info.max_vgpr_alloc = 256;
|
|
||||||
|
|
||||||
info->cu_info.num_simd_per_compute_unit = info->gfx_level >= GFX10 ? 2 : 4;
|
|
||||||
|
|
||||||
/* BIG_PAGE is supported since gfx10.3 and requires VRAM. VRAM is only guaranteed
|
/* BIG_PAGE is supported since gfx10.3 and requires VRAM. VRAM is only guaranteed
|
||||||
* with AMDGPU_GEM_CREATE_DISCARDABLE. DISCARDABLE was added in DRM 3.47.0.
|
* with AMDGPU_GEM_CREATE_DISCARDABLE. DISCARDABLE was added in DRM 3.47.0.
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ extern "C" {
|
|||||||
#define AMD_MAX_WGP 60
|
#define AMD_MAX_WGP 60
|
||||||
|
|
||||||
struct amdgpu_gpu_info;
|
struct amdgpu_gpu_info;
|
||||||
|
struct drm_amdgpu_info_device;
|
||||||
|
|
||||||
struct amd_ip_info {
|
struct amd_ip_info {
|
||||||
uint8_t ver_major;
|
uint8_t ver_major;
|
||||||
@@ -364,6 +365,7 @@ enum ac_query_gpu_info_result {
|
|||||||
|
|
||||||
enum ac_query_gpu_info_result ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
enum ac_query_gpu_info_result ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
||||||
bool require_pci_bus_info);
|
bool require_pci_bus_info);
|
||||||
|
void ac_fill_cu_info(struct radeon_info *info, struct drm_amdgpu_info_device *device_info);
|
||||||
|
|
||||||
void ac_compute_driver_uuid(char *uuid, size_t size);
|
void ac_compute_driver_uuid(char *uuid, size_t size);
|
||||||
|
|
||||||
|
|||||||
@@ -46,21 +46,6 @@ ac_null_device_create(struct radeon_info *gpu_info, const char *family)
|
|||||||
gpu_info->pci_id = pci_ids[gpu_info->family].pci_id;
|
gpu_info->pci_id = pci_ids[gpu_info->family].pci_id;
|
||||||
gpu_info->max_se = pci_ids[gpu_info->family].has_dedicated_vram ? 4 : 1;
|
gpu_info->max_se = pci_ids[gpu_info->family].has_dedicated_vram ? 4 : 1;
|
||||||
gpu_info->num_se = gpu_info->max_se;
|
gpu_info->num_se = gpu_info->max_se;
|
||||||
if (gpu_info->gfx_level >= GFX10_3)
|
|
||||||
gpu_info->cu_info.max_waves_per_simd = 16;
|
|
||||||
else if (gpu_info->gfx_level >= GFX10)
|
|
||||||
gpu_info->cu_info.max_waves_per_simd = 20;
|
|
||||||
else if (gpu_info->family >= CHIP_POLARIS10 && gpu_info->family <= CHIP_VEGAM)
|
|
||||||
gpu_info->cu_info.max_waves_per_simd = 8;
|
|
||||||
else
|
|
||||||
gpu_info->cu_info.max_waves_per_simd = 10;
|
|
||||||
|
|
||||||
if (gpu_info->gfx_level >= GFX10)
|
|
||||||
gpu_info->cu_info.num_physical_sgprs_per_simd = 128 * gpu_info->cu_info.max_waves_per_simd;
|
|
||||||
else if (gpu_info->gfx_level >= GFX8)
|
|
||||||
gpu_info->cu_info.num_physical_sgprs_per_simd = 800;
|
|
||||||
else
|
|
||||||
gpu_info->cu_info.num_physical_sgprs_per_simd = 512;
|
|
||||||
|
|
||||||
gpu_info->has_timeline_syncobj = true;
|
gpu_info->has_timeline_syncobj = true;
|
||||||
gpu_info->has_vm_always_valid = true;
|
gpu_info->has_vm_always_valid = true;
|
||||||
@@ -71,13 +56,6 @@ ac_null_device_create(struct radeon_info *gpu_info, const char *family)
|
|||||||
gpu_info->has_ngg_fully_culled_bug = gpu_info->gfx_level == GFX10;
|
gpu_info->has_ngg_fully_culled_bug = gpu_info->gfx_level == GFX10;
|
||||||
gpu_info->has_ngg_passthru_no_msg = gpu_info->family >= CHIP_NAVI23;
|
gpu_info->has_ngg_passthru_no_msg = gpu_info->family >= CHIP_NAVI23;
|
||||||
|
|
||||||
if (gpu_info->family == CHIP_NAVI31 || gpu_info->family == CHIP_NAVI32 || gpu_info->gfx_level >= GFX12)
|
|
||||||
gpu_info->cu_info.num_physical_wave64_vgprs_per_simd = 768;
|
|
||||||
else if (gpu_info->gfx_level >= GFX10)
|
|
||||||
gpu_info->cu_info.num_physical_wave64_vgprs_per_simd = 512;
|
|
||||||
else
|
|
||||||
gpu_info->cu_info.num_physical_wave64_vgprs_per_simd = 256;
|
|
||||||
gpu_info->cu_info.num_simd_per_compute_unit = gpu_info->gfx_level >= GFX10 ? 2 : 4;
|
|
||||||
gpu_info->lds_size_per_workgroup = gpu_info->gfx_level >= GFX7 ? 64 * 1024 : 32 * 1024;
|
gpu_info->lds_size_per_workgroup = gpu_info->gfx_level >= GFX7 ? 64 * 1024 : 32 * 1024;
|
||||||
gpu_info->max_render_backends = pci_ids[gpu_info->family].num_render_backends;
|
gpu_info->max_render_backends = pci_ids[gpu_info->family].num_render_backends;
|
||||||
|
|
||||||
@@ -114,6 +92,9 @@ ac_null_device_create(struct radeon_info *gpu_info, const char *family)
|
|||||||
gpu_info->ip[AMD_IP_GFX].num_queues = 1;
|
gpu_info->ip[AMD_IP_GFX].num_queues = 1;
|
||||||
|
|
||||||
gpu_info->gart_page_size = 4096;
|
gpu_info->gart_page_size = 4096;
|
||||||
|
|
||||||
|
ac_fill_cu_info(gpu_info, NULL);
|
||||||
|
|
||||||
gpu_info->family_overridden = true;
|
gpu_info->family_overridden = true;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@@ -3,10 +3,12 @@
|
|||||||
|
|
||||||
libradeonwinsys_deps = [idep_mesautil, dep_libdrm]
|
libradeonwinsys_deps = [idep_mesautil, dep_libdrm]
|
||||||
libradeonwinsys_c_args = []
|
libradeonwinsys_c_args = []
|
||||||
|
amd_common_libs = []
|
||||||
|
|
||||||
if with_gallium_radeonsi
|
if with_gallium_radeonsi
|
||||||
libradeonwinsys_deps += [idep_amdgfxregs_h]
|
libradeonwinsys_deps += [idep_amdgfxregs_h]
|
||||||
libradeonwinsys_c_args = ['-DHAVE_GALLIUM_RADEONSI']
|
libradeonwinsys_c_args = ['-DHAVE_GALLIUM_RADEONSI']
|
||||||
|
amd_common_libs += [libamd_common]
|
||||||
endif
|
endif
|
||||||
|
|
||||||
libradeonwinsys = static_library(
|
libradeonwinsys = static_library(
|
||||||
@@ -22,6 +24,7 @@ libradeonwinsys = static_library(
|
|||||||
'radeon_surface.h'),
|
'radeon_surface.h'),
|
||||||
include_directories : [inc_src, inc_include, inc_gallium, inc_gallium_aux],
|
include_directories : [inc_src, inc_include, inc_gallium, inc_gallium_aux],
|
||||||
gnu_symbol_visibility : 'hidden',
|
gnu_symbol_visibility : 'hidden',
|
||||||
|
link_with : amd_common_libs,
|
||||||
c_args : libradeonwinsys_c_args,
|
c_args : libradeonwinsys_c_args,
|
||||||
dependencies : libradeonwinsys_deps,
|
dependencies : libradeonwinsys_deps,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -632,9 +632,6 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
|||||||
ws->info.max_alignment = 1024*1024;
|
ws->info.max_alignment = 1024*1024;
|
||||||
ws->info.has_graphics = true;
|
ws->info.has_graphics = true;
|
||||||
ws->info.cpdma_prefetch_writes_memory = true;
|
ws->info.cpdma_prefetch_writes_memory = true;
|
||||||
ws->info.cu_info.max_waves_per_simd = 10;
|
|
||||||
ws->info.cu_info.num_physical_sgprs_per_simd = 512;
|
|
||||||
ws->info.cu_info.num_physical_wave64_vgprs_per_simd = 256;
|
|
||||||
ws->info.has_3d_cube_border_color_mipmap = true;
|
ws->info.has_3d_cube_border_color_mipmap = true;
|
||||||
ws->info.has_image_opcodes = true;
|
ws->info.has_image_opcodes = true;
|
||||||
ws->info.spi_cu_en_has_effect = false;
|
ws->info.spi_cu_en_has_effect = false;
|
||||||
@@ -644,15 +641,12 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
|||||||
ws->info.max_gflops = 128 * ws->info.num_cu * ws->info.max_gpu_freq_mhz / 1000;
|
ws->info.max_gflops = 128 * ws->info.num_cu * ws->info.max_gpu_freq_mhz / 1000;
|
||||||
ws->info.num_tcc_blocks = ws->info.max_tcc_blocks;
|
ws->info.num_tcc_blocks = ws->info.max_tcc_blocks;
|
||||||
ws->info.tcp_cache_size = 16 * 1024;
|
ws->info.tcp_cache_size = 16 * 1024;
|
||||||
ws->info.cu_info.num_simd_per_compute_unit = 4;
|
|
||||||
ws->info.cu_info.min_sgpr_alloc = 8;
|
|
||||||
ws->info.cu_info.max_sgpr_alloc = 104;
|
|
||||||
ws->info.cu_info.sgpr_alloc_granularity = 8;
|
|
||||||
ws->info.cu_info.min_wave64_vgpr_alloc = 4;
|
|
||||||
ws->info.cu_info.max_vgpr_alloc = 256;
|
|
||||||
ws->info.cu_info.wave64_vgpr_alloc_granularity = 4;
|
|
||||||
ws->info.lds_size_per_workgroup = ws->info.gfx_level == GFX7 ? 64 * 1024 : 32 * 1024;
|
ws->info.lds_size_per_workgroup = ws->info.gfx_level == GFX7 ? 64 * 1024 : 32 * 1024;
|
||||||
|
|
||||||
|
#ifdef HAVE_GALLIUM_RADEONSI
|
||||||
|
ac_fill_cu_info(&ws->info, NULL);
|
||||||
|
#endif
|
||||||
|
|
||||||
for (unsigned se = 0; se < ws->info.max_se; se++) {
|
for (unsigned se = 0; se < ws->info.max_se; se++) {
|
||||||
for (unsigned sa = 0; sa < ws->info.max_sa_per_se; sa++)
|
for (unsigned sa = 0; sa < ws->info.max_sa_per_se; sa++)
|
||||||
ws->info.cu_mask[se][sa] = BITFIELD_MASK(ws->info.max_good_cu_per_sa);
|
ws->info.cu_mask[se][sa] = BITFIELD_MASK(ws->info.max_good_cu_per_sa);
|
||||||
|
|||||||
Reference in New Issue
Block a user