freedreno/common: Make max tile dimensions a param

This value may differ between SKUs within a given generation.  So make
them parameters.

Signed-off-by: Rob Clark <rob.clark@oss.qualcomm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38450>
This commit is contained in:
Rob Clark
2025-11-03 14:15:18 -08:00
committed by Marge Bot
parent 96d6755e1f
commit aa112be635
+41 -5
View File
@@ -133,18 +133,14 @@ class A6xxGPUInfo(GPUInfo):
duplication of parameters that are unique to the sub-generation.
"""
def __init__(self, chip, template, num_ccu,
tile_align_w, tile_align_h, num_vsc_pipes,
tile_align_w, tile_align_h, tile_max_w, tile_max_h, num_vsc_pipes,
cs_shared_mem_size, wave_granularity, fibers_per_sp,
magic_regs, raw_magic_regs = None, highest_bank_bit = 15,
ubwc_swizzle = 0x6, macrotile_mode = 1,
threadsize_base = 64, max_waves = 16):
if chip == CHIP.A6XX:
tile_max_w = 1024 # max_bitfield_val(5, 0, 5)
tile_max_h = max_bitfield_val(14, 8, 4) # 1008
compute_lb_size = 0
else:
tile_max_w = 1728
tile_max_h = 1728
# on a7xx the compute_lb_size is 40KB for all known parts for now.
# We have a parameter for it in case some low-end parts cut it down.
compute_lb_size = 40 * 1024
@@ -458,6 +454,8 @@ add_gpus([
num_ccu = 1,
tile_align_w = 32,
tile_align_h = 16,
tile_max_w = 1024,
tile_max_h = 1024,
num_vsc_pipes = 16,
cs_shared_mem_size = 16 * 1024,
wave_granularity = 1,
@@ -496,6 +494,8 @@ add_gpus([
num_ccu = 1,
tile_align_w = 32,
tile_align_h = 32,
tile_max_w = 1024,
tile_max_h = 1024,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,
@@ -530,6 +530,8 @@ add_gpus([
num_ccu = 1,
tile_align_w = 32,
tile_align_h = 16,
tile_max_w = 1024,
tile_max_h = 1024,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,
@@ -563,6 +565,8 @@ add_gpus([
num_ccu = 2,
tile_align_w = 96,
tile_align_h = 16,
tile_max_w = 1024,
tile_max_h = 1024,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,
@@ -595,6 +599,8 @@ add_gpus([
num_ccu = 2,
tile_align_w = 32,
tile_align_h = 16,
tile_max_w = 1024,
tile_max_h = 1024,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,
@@ -629,6 +635,8 @@ add_gpus([
num_ccu = 2,
tile_align_w = 32,
tile_align_h = 16,
tile_max_w = 1024,
tile_max_h = 1024,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,
@@ -663,6 +671,8 @@ add_gpus([
num_ccu = 4,
tile_align_w = 64,
tile_align_h = 32,
tile_max_w = 1024,
tile_max_h = 1024,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,
@@ -697,6 +707,8 @@ add_gpus([
num_ccu = 3,
tile_align_w = 96,
tile_align_h = 16,
tile_max_w = 1024,
tile_max_h = 1024,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,
@@ -737,6 +749,8 @@ add_gpus([
num_ccu = 2,
tile_align_w = 32,
tile_align_h = 16,
tile_max_w = 1024,
tile_max_h = 1024,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,
@@ -770,6 +784,8 @@ add_gpus([
num_ccu = 3,
tile_align_w = 96,
tile_align_h = 16,
tile_max_w = 1024,
tile_max_h = 1024,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,
@@ -804,6 +820,8 @@ add_gpus([
num_ccu = 3,
tile_align_w = 96,
tile_align_h = 16,
tile_max_w = 1024,
tile_max_h = 1024,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,
@@ -837,6 +855,8 @@ add_gpus([
num_ccu = 8,
tile_align_w = 64,
tile_align_h = 32,
tile_max_w = 1024,
tile_max_h = 1024,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,
@@ -884,6 +904,8 @@ add_gpus([
num_ccu = 1,
tile_align_w = 32,
tile_align_h = 16,
tile_max_w = 1024,
tile_max_h = 1024,
num_vsc_pipes = 16,
cs_shared_mem_size = 16 * 1024,
wave_granularity = 1,
@@ -1131,6 +1153,8 @@ add_gpus([
num_ccu = 4,
tile_align_w = 64,
tile_align_h = 32,
tile_max_w = 1024,
tile_max_h = 1024,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,
@@ -1149,6 +1173,8 @@ add_gpus([
num_ccu = 4,
tile_align_w = 64,
tile_align_h = 32,
tile_max_w = 1024,
tile_max_h = 1024,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,
@@ -1167,6 +1193,8 @@ add_gpus([
num_ccu = 3,
tile_align_w = 96,
tile_align_h = 32,
tile_max_w = 2016,
tile_max_h = 2032,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,
@@ -1233,6 +1261,8 @@ add_gpus([
num_ccu = 6,
tile_align_w = 96,
tile_align_h = 32,
tile_max_w = 2016,
tile_max_h = 2032,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,
@@ -1252,6 +1282,8 @@ add_gpus([
num_ccu = 6,
tile_align_w = 96,
tile_align_h = 32,
tile_max_w = 2016,
tile_max_h = 2032,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,
@@ -1313,6 +1345,8 @@ add_gpus([
num_ccu = 6,
tile_align_w = 96,
tile_align_h = 32,
tile_max_w = 2016,
tile_max_h = 2032,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,
@@ -1334,6 +1368,8 @@ add_gpus([
num_ccu = 6,
tile_align_w = 96,
tile_align_h = 32,
tile_max_w = 2016,
tile_max_h = 2032,
num_vsc_pipes = 32,
cs_shared_mem_size = 32 * 1024,
wave_granularity = 2,