diff --git a/src/freedreno/common/freedreno_devices.py b/src/freedreno/common/freedreno_devices.py index 667afe50a22..8604791411c 100644 --- a/src/freedreno/common/freedreno_devices.py +++ b/src/freedreno/common/freedreno_devices.py @@ -133,18 +133,14 @@ class A6xxGPUInfo(GPUInfo): duplication of parameters that are unique to the sub-generation. """ def __init__(self, chip, template, num_ccu, - tile_align_w, tile_align_h, num_vsc_pipes, + tile_align_w, tile_align_h, tile_max_w, tile_max_h, num_vsc_pipes, cs_shared_mem_size, wave_granularity, fibers_per_sp, magic_regs, raw_magic_regs = None, highest_bank_bit = 15, ubwc_swizzle = 0x6, macrotile_mode = 1, threadsize_base = 64, max_waves = 16): if chip == CHIP.A6XX: - tile_max_w = 1024 # max_bitfield_val(5, 0, 5) - tile_max_h = max_bitfield_val(14, 8, 4) # 1008 compute_lb_size = 0 else: - tile_max_w = 1728 - tile_max_h = 1728 # on a7xx the compute_lb_size is 40KB for all known parts for now. # We have a parameter for it in case some low-end parts cut it down. compute_lb_size = 40 * 1024 @@ -458,6 +454,8 @@ add_gpus([ num_ccu = 1, tile_align_w = 32, tile_align_h = 16, + tile_max_w = 1024, + tile_max_h = 1024, num_vsc_pipes = 16, cs_shared_mem_size = 16 * 1024, wave_granularity = 1, @@ -496,6 +494,8 @@ add_gpus([ num_ccu = 1, tile_align_w = 32, tile_align_h = 32, + tile_max_w = 1024, + tile_max_h = 1024, num_vsc_pipes = 32, cs_shared_mem_size = 32 * 1024, wave_granularity = 2, @@ -530,6 +530,8 @@ add_gpus([ num_ccu = 1, tile_align_w = 32, tile_align_h = 16, + tile_max_w = 1024, + tile_max_h = 1024, num_vsc_pipes = 32, cs_shared_mem_size = 32 * 1024, wave_granularity = 2, @@ -563,6 +565,8 @@ add_gpus([ num_ccu = 2, tile_align_w = 96, tile_align_h = 16, + tile_max_w = 1024, + tile_max_h = 1024, num_vsc_pipes = 32, cs_shared_mem_size = 32 * 1024, wave_granularity = 2, @@ -595,6 +599,8 @@ add_gpus([ num_ccu = 2, tile_align_w = 32, tile_align_h = 16, + tile_max_w = 1024, + tile_max_h = 1024, num_vsc_pipes = 32, cs_shared_mem_size = 32 * 1024, wave_granularity = 2, @@ -629,6 +635,8 @@ add_gpus([ num_ccu = 2, tile_align_w = 32, tile_align_h = 16, + tile_max_w = 1024, + tile_max_h = 1024, num_vsc_pipes = 32, cs_shared_mem_size = 32 * 1024, wave_granularity = 2, @@ -663,6 +671,8 @@ add_gpus([ num_ccu = 4, tile_align_w = 64, tile_align_h = 32, + tile_max_w = 1024, + tile_max_h = 1024, num_vsc_pipes = 32, cs_shared_mem_size = 32 * 1024, wave_granularity = 2, @@ -697,6 +707,8 @@ add_gpus([ num_ccu = 3, tile_align_w = 96, tile_align_h = 16, + tile_max_w = 1024, + tile_max_h = 1024, num_vsc_pipes = 32, cs_shared_mem_size = 32 * 1024, wave_granularity = 2, @@ -737,6 +749,8 @@ add_gpus([ num_ccu = 2, tile_align_w = 32, tile_align_h = 16, + tile_max_w = 1024, + tile_max_h = 1024, num_vsc_pipes = 32, cs_shared_mem_size = 32 * 1024, wave_granularity = 2, @@ -770,6 +784,8 @@ add_gpus([ num_ccu = 3, tile_align_w = 96, tile_align_h = 16, + tile_max_w = 1024, + tile_max_h = 1024, num_vsc_pipes = 32, cs_shared_mem_size = 32 * 1024, wave_granularity = 2, @@ -804,6 +820,8 @@ add_gpus([ num_ccu = 3, tile_align_w = 96, tile_align_h = 16, + tile_max_w = 1024, + tile_max_h = 1024, num_vsc_pipes = 32, cs_shared_mem_size = 32 * 1024, wave_granularity = 2, @@ -837,6 +855,8 @@ add_gpus([ num_ccu = 8, tile_align_w = 64, tile_align_h = 32, + tile_max_w = 1024, + tile_max_h = 1024, num_vsc_pipes = 32, cs_shared_mem_size = 32 * 1024, wave_granularity = 2, @@ -884,6 +904,8 @@ add_gpus([ num_ccu = 1, tile_align_w = 32, tile_align_h = 16, + tile_max_w = 1024, + tile_max_h = 1024, num_vsc_pipes = 16, cs_shared_mem_size = 16 * 1024, wave_granularity = 1, @@ -1131,6 +1153,8 @@ add_gpus([ num_ccu = 4, tile_align_w = 64, tile_align_h = 32, + tile_max_w = 1024, + tile_max_h = 1024, num_vsc_pipes = 32, cs_shared_mem_size = 32 * 1024, wave_granularity = 2, @@ -1149,6 +1173,8 @@ add_gpus([ num_ccu = 4, tile_align_w = 64, tile_align_h = 32, + tile_max_w = 1024, + tile_max_h = 1024, num_vsc_pipes = 32, cs_shared_mem_size = 32 * 1024, wave_granularity = 2, @@ -1167,6 +1193,8 @@ add_gpus([ num_ccu = 3, tile_align_w = 96, tile_align_h = 32, + tile_max_w = 2016, + tile_max_h = 2032, num_vsc_pipes = 32, cs_shared_mem_size = 32 * 1024, wave_granularity = 2, @@ -1233,6 +1261,8 @@ add_gpus([ num_ccu = 6, tile_align_w = 96, tile_align_h = 32, + tile_max_w = 2016, + tile_max_h = 2032, num_vsc_pipes = 32, cs_shared_mem_size = 32 * 1024, wave_granularity = 2, @@ -1252,6 +1282,8 @@ add_gpus([ num_ccu = 6, tile_align_w = 96, tile_align_h = 32, + tile_max_w = 2016, + tile_max_h = 2032, num_vsc_pipes = 32, cs_shared_mem_size = 32 * 1024, wave_granularity = 2, @@ -1313,6 +1345,8 @@ add_gpus([ num_ccu = 6, tile_align_w = 96, tile_align_h = 32, + tile_max_w = 2016, + tile_max_h = 2032, num_vsc_pipes = 32, cs_shared_mem_size = 32 * 1024, wave_granularity = 2, @@ -1334,6 +1368,8 @@ add_gpus([ num_ccu = 6, tile_align_w = 96, tile_align_h = 32, + tile_max_w = 2016, + tile_max_h = 2032, num_vsc_pipes = 32, cs_shared_mem_size = 32 * 1024, wave_granularity = 2,