diff --git a/src/gallium/include/winsys/radeon_winsys.h b/src/gallium/include/winsys/radeon_winsys.h index 78ee0b49d34..c4df071a71a 100644 --- a/src/gallium/include/winsys/radeon_winsys.h +++ b/src/gallium/include/winsys/radeon_winsys.h @@ -708,186 +708,137 @@ radeon_bo_reference(struct radeon_winsys *rws, struct pb_buffer **dst, struct pb pb_reference_with_winsys(rws, dst, src); } -enum radeon_heap -{ - RADEON_HEAP_VRAM_NO_CPU_ACCESS, - RADEON_HEAP_VRAM_READ_ONLY, - RADEON_HEAP_VRAM_READ_ONLY_32BIT, - RADEON_HEAP_VRAM_32BIT, - RADEON_HEAP_VRAM, - RADEON_HEAP_GTT_WC, - RADEON_HEAP_GTT_WC_READ_ONLY, - RADEON_HEAP_GTT_WC_READ_ONLY_32BIT, - RADEON_HEAP_GTT_WC_32BIT, - RADEON_HEAP_GTT, - RADEON_HEAP_GTT_GL2_BYPASS_WC, - RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY, - RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY_32BIT, - RADEON_HEAP_GTT_GL2_BYPASS_WC_32BIT, - RADEON_HEAP_GTT_GL2_BYPASS, - RADEON_MAX_SLAB_HEAPS, - RADEON_MAX_CACHED_HEAPS = RADEON_MAX_SLAB_HEAPS, -}; +/* The following bits describe the heaps managed by slab allocators (pb_slab) and + * the allocation cache (pb_cache). + */ +#define RADEON_HEAP_BIT_VRAM (1 << 0) /* if false, it's GTT */ +#define RADEON_HEAP_BIT_READ_ONLY (1 << 1) /* both VRAM and GTT */ +#define RADEON_HEAP_BIT_32BIT (1 << 2) /* both VRAM and GTT */ -static inline enum radeon_bo_domain radeon_domain_from_heap(enum radeon_heap heap) +#define RADEON_HEAP_BIT_NO_CPU_ACCESS (1 << 3) /* VRAM only */ + +#define RADEON_HEAP_BIT_WC (1 << 3) /* GTT only, VRAM implies this to be true */ +#define RADEON_HEAP_BIT_GL2_BYPASS (1 << 4) /* GTT only */ + +/* The number of all possible heap descriptions using the bits above. */ +#define RADEON_NUM_HEAPS (1 << 5) + +static inline enum radeon_bo_domain radeon_domain_from_heap(int heap) { - switch (heap) { - case RADEON_HEAP_VRAM_NO_CPU_ACCESS: - case RADEON_HEAP_VRAM_READ_ONLY: - case RADEON_HEAP_VRAM_READ_ONLY_32BIT: - case RADEON_HEAP_VRAM_32BIT: - case RADEON_HEAP_VRAM: + assert(heap >= 0); + + if (heap & RADEON_HEAP_BIT_VRAM) return RADEON_DOMAIN_VRAM; - case RADEON_HEAP_GTT_WC: - case RADEON_HEAP_GTT_WC_READ_ONLY: - case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT: - case RADEON_HEAP_GTT_WC_32BIT: - case RADEON_HEAP_GTT: - case RADEON_HEAP_GTT_GL2_BYPASS_WC: - case RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY: - case RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY_32BIT: - case RADEON_HEAP_GTT_GL2_BYPASS_WC_32BIT: - case RADEON_HEAP_GTT_GL2_BYPASS: + else return RADEON_DOMAIN_GTT; - default: - assert(0); - return (enum radeon_bo_domain)0; - } } -static inline unsigned radeon_flags_from_heap(enum radeon_heap heap) +static inline unsigned radeon_flags_from_heap(int heap) { + assert(heap >= 0); + unsigned flags = RADEON_FLAG_NO_INTERPROCESS_SHARING; - switch (heap) { - case RADEON_HEAP_GTT: - case RADEON_HEAP_GTT_GL2_BYPASS: - break; - default: - flags |= RADEON_FLAG_GTT_WC; - } - - switch (heap) { - case RADEON_HEAP_GTT_GL2_BYPASS_WC: - case RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY: - case RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY_32BIT: - case RADEON_HEAP_GTT_GL2_BYPASS_WC_32BIT: - case RADEON_HEAP_GTT_GL2_BYPASS: - flags |= RADEON_FLAG_GL2_BYPASS; - break; - default: - break; - } - - switch (heap) { - case RADEON_HEAP_VRAM_READ_ONLY: - case RADEON_HEAP_VRAM_READ_ONLY_32BIT: - case RADEON_HEAP_GTT_WC_READ_ONLY: - case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT: - case RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY: - case RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY_32BIT: + if (heap & RADEON_HEAP_BIT_READ_ONLY) flags |= RADEON_FLAG_READ_ONLY; - break; - default: - break; - } - - switch (heap) { - case RADEON_HEAP_VRAM_READ_ONLY_32BIT: - case RADEON_HEAP_VRAM_32BIT: - case RADEON_HEAP_GTT_WC_READ_ONLY_32BIT: - case RADEON_HEAP_GTT_WC_32BIT: - case RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY_32BIT: - case RADEON_HEAP_GTT_GL2_BYPASS_WC_32BIT: + if (heap & RADEON_HEAP_BIT_32BIT) flags |= RADEON_FLAG_32BIT; - FALLTHROUGH; - default: - break; - } - switch (heap) { - case RADEON_HEAP_VRAM_NO_CPU_ACCESS: - flags |= RADEON_FLAG_NO_CPU_ACCESS; - break; - default: - break; + if (heap & RADEON_HEAP_BIT_VRAM) { + flags |= RADEON_FLAG_GTT_WC; + if (heap & RADEON_HEAP_BIT_NO_CPU_ACCESS) + flags |= RADEON_FLAG_NO_CPU_ACCESS; + } else { + /* GTT only */ + if (heap & RADEON_HEAP_BIT_WC) + flags |= RADEON_FLAG_GTT_WC; + if (heap & RADEON_HEAP_BIT_GL2_BYPASS) + flags |= RADEON_FLAG_GL2_BYPASS; } return flags; } +/* This cleans up flags, so that we can comfortably assume that no invalid flag combinations + * are set. + */ +static void radeon_canonicalize_bo_flags(enum radeon_bo_domain *_domain, + enum radeon_bo_flag *_flags) +{ + unsigned domain = *_domain; + unsigned flags = *_flags; + + /* Only set 1 domain, e.g. ignore GTT if VRAM is set. */ + if (domain) + domain = BITFIELD_BIT(ffs(domain) - 1); + else + domain = RADEON_DOMAIN_VRAM; + + switch (domain) { + case RADEON_DOMAIN_VRAM: + flags |= RADEON_FLAG_GTT_WC; + flags &= ~RADEON_FLAG_GL2_BYPASS; + break; + case RADEON_DOMAIN_GTT: + flags &= ~RADEON_FLAG_NO_CPU_ACCESS; + break; + case RADEON_DOMAIN_GDS: + case RADEON_DOMAIN_OA: + flags |= RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_NO_CPU_ACCESS; + flags &= ~RADEON_FLAG_SPARSE; + break; + } + + /* Sparse buffers must have NO_CPU_ACCESS set. */ + if (flags & RADEON_FLAG_SPARSE) + flags |= RADEON_FLAG_NO_CPU_ACCESS; + + *_domain = (enum radeon_bo_domain)domain; + *_flags = (enum radeon_bo_flag)flags; +} + /* Return the heap index for winsys allocators, or -1 on failure. */ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeon_bo_flag flags) { - bool uncached; - - /* VRAM implies WC (write combining) */ - assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC); - /* NO_CPU_ACCESS implies VRAM only. */ - assert(!(flags & RADEON_FLAG_NO_CPU_ACCESS) || domain == RADEON_DOMAIN_VRAM); + radeon_canonicalize_bo_flags(&domain, &flags); /* Resources with interprocess sharing don't use any winsys allocators. */ if (!(flags & RADEON_FLAG_NO_INTERPROCESS_SHARING)) return -1; - /* Unsupported flags: NO_SUBALLOC, SPARSE. */ - if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GL2_BYPASS | - RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT | - RADEON_FLAG_DRIVER_INTERNAL)) + /* These are unsupported flags. */ + /* RADEON_FLAG_DRIVER_INTERNAL is ignored. It doesn't affect allocators. */ + /* TODO: handle ENCRYPTED better */ + if (flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE | RADEON_FLAG_ENCRYPTED)) return -1; - switch (domain) { - case RADEON_DOMAIN_VRAM: - switch (flags & (RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT)) { - case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: - case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY: - assert(!"NO_CPU_ACCESS | READ_ONLY doesn't make sense"); - return -1; - case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_32BIT: - assert(!"NO_CPU_ACCESS with 32BIT is disallowed"); - return -1; - case RADEON_FLAG_NO_CPU_ACCESS: - return RADEON_HEAP_VRAM_NO_CPU_ACCESS; - case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: - return RADEON_HEAP_VRAM_READ_ONLY_32BIT; - case RADEON_FLAG_READ_ONLY: - return RADEON_HEAP_VRAM_READ_ONLY; - case RADEON_FLAG_32BIT: - return RADEON_HEAP_VRAM_32BIT; - case 0: - return RADEON_HEAP_VRAM; - } - break; - case RADEON_DOMAIN_GTT: - uncached = flags & RADEON_FLAG_GL2_BYPASS; + int heap = 0; - switch (flags & (RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT)) { - case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: - return uncached ? RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY_32BIT - : RADEON_HEAP_GTT_WC_READ_ONLY_32BIT; - case RADEON_FLAG_GTT_WC | RADEON_FLAG_READ_ONLY: - return uncached ? RADEON_HEAP_GTT_GL2_BYPASS_WC_READ_ONLY - : RADEON_HEAP_GTT_WC_READ_ONLY; - case RADEON_FLAG_GTT_WC | RADEON_FLAG_32BIT: - return uncached ? RADEON_HEAP_GTT_GL2_BYPASS_WC_32BIT - : RADEON_HEAP_GTT_WC_32BIT; - case RADEON_FLAG_GTT_WC: - return uncached ? RADEON_HEAP_GTT_GL2_BYPASS_WC : RADEON_HEAP_GTT_WC; - case RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT: - case RADEON_FLAG_READ_ONLY: - assert(!"READ_ONLY without WC is disallowed"); - return -1; - case RADEON_FLAG_32BIT: - assert(!"32BIT without WC is disallowed"); - return -1; - case 0: - return uncached ? RADEON_HEAP_GTT_GL2_BYPASS : RADEON_HEAP_GTT; - } - break; - default: - break; + if (flags & RADEON_FLAG_READ_ONLY) + heap |= RADEON_HEAP_BIT_READ_ONLY; + if (flags & RADEON_FLAG_32BIT) + heap |= RADEON_HEAP_BIT_32BIT; + + if (domain == RADEON_DOMAIN_VRAM) { + /* VRAM | GTT shouldn't occur, but if it does, ignore GTT. */ + heap |= RADEON_HEAP_BIT_VRAM; + if (flags & RADEON_FLAG_NO_CPU_ACCESS) + heap |= RADEON_HEAP_BIT_NO_CPU_ACCESS; + /* RADEON_FLAG_WC is ignored and implied to be true for VRAM */ + /* RADEON_FLAG_GL2_BYPASS is ignored and implied to be false for VRAM */ + } else if (domain == RADEON_DOMAIN_GTT) { + /* GTT is implied by RADEON_HEAP_BIT_VRAM not being set. */ + if (flags & RADEON_FLAG_GTT_WC) + heap |= RADEON_HEAP_BIT_WC; + if (flags & RADEON_FLAG_GL2_BYPASS) + heap |= RADEON_HEAP_BIT_GL2_BYPASS; + /* RADEON_FLAG_NO_CPU_ACCESS is ignored and implied to be false for GTT */ + } else { + return -1; /* */ } - return -1; + + assert(heap < RADEON_NUM_HEAPS); + return heap; } typedef struct pipe_screen *(*radeon_screen_create_t)(struct radeon_winsys *, diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 54e32d1be4a..59b4943c980 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -1362,17 +1362,7 @@ amdgpu_bo_create(struct amdgpu_winsys *ws, struct amdgpu_winsys_bo *bo; int heap = -1; - if (domain & (RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA)) - flags |= RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_SUBALLOC; - - /* VRAM implies WC. This is not optional. */ - assert(!(domain & RADEON_DOMAIN_VRAM) || flags & RADEON_FLAG_GTT_WC); - - /* NO_CPU_ACCESS is not valid with GTT. */ - assert(!(domain & RADEON_DOMAIN_GTT) || !(flags & RADEON_FLAG_NO_CPU_ACCESS)); - - /* Sparse buffers must have NO_CPU_ACCESS set. */ - assert(!(flags & RADEON_FLAG_SPARSE) || flags & RADEON_FLAG_NO_CPU_ACCESS); + radeon_canonicalize_bo_flags(&domain, &flags); struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && ws->info.has_tmz_support) ? ws->bo_slabs_encrypted : ws->bo_slabs; @@ -1385,7 +1375,7 @@ amdgpu_bo_create(struct amdgpu_winsys *ws, struct pb_slab_entry *entry; int heap = radeon_get_heap_index(domain, flags); - if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS) + if (heap < 0 || heap >= RADEON_NUM_HEAPS) goto no_slab; unsigned alloc_size = size; @@ -1457,7 +1447,7 @@ no_slab: if (use_reusable_pool) { heap = radeon_get_heap_index(domain, flags & ~RADEON_FLAG_ENCRYPTED); - assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS); + assert(heap >= 0 && heap < RADEON_NUM_HEAPS); /* Get a buffer from the cache. */ bo = (struct amdgpu_winsys_bo*) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index 6ea421fcb6f..65873d0a8fa 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -451,7 +451,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, goto fail_alloc; /* Create managers. */ - pb_cache_init(&aws->bo_cache, RADEON_MAX_CACHED_HEAPS, + pb_cache_init(&aws->bo_cache, RADEON_NUM_HEAPS, 500000, aws->check_vm ? 1.0f : 2.0f, 0, (aws->info.vram_size + aws->info.gart_size) / 8, aws, /* Cast to void* because one of the function parameters @@ -471,7 +471,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, if (!pb_slabs_init(&aws->bo_slabs[i], min_order, max_order, - RADEON_MAX_SLAB_HEAPS, true, + RADEON_NUM_HEAPS, true, aws, amdgpu_bo_can_reclaim_slab, amdgpu_bo_slab_alloc_normal, @@ -486,7 +486,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, if (aws->info.has_tmz_support && !pb_slabs_init(&aws->bo_slabs_encrypted[i], min_order, max_order, - RADEON_MAX_SLAB_HEAPS, true, + RADEON_NUM_HEAPS, true, aws, amdgpu_bo_can_reclaim_slab, amdgpu_bo_slab_alloc_encrypted, diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index a85b44179b5..a251b5f3b4d 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -1013,19 +1013,14 @@ radeon_winsys_bo_create(struct radeon_winsys *rws, struct radeon_bo *bo; int heap = -1; + radeon_canonicalize_bo_flags(&domain, &flags); + assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */ /* Only 32-bit sizes are supported. */ if (size > UINT_MAX) return NULL; - /* VRAM implies WC. This is not optional. */ - if (domain & RADEON_DOMAIN_VRAM) - flags |= RADEON_FLAG_GTT_WC; - /* NO_CPU_ACCESS is valid with VRAM only. */ - if (domain != RADEON_DOMAIN_VRAM) - flags &= ~RADEON_FLAG_NO_CPU_ACCESS; - /* Sub-allocate small buffers from slabs. */ if (!(flags & RADEON_FLAG_NO_SUBALLOC) && size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) && @@ -1034,7 +1029,7 @@ radeon_winsys_bo_create(struct radeon_winsys *rws, struct pb_slab_entry *entry; int heap = radeon_get_heap_index(domain, flags); - if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS) + if (heap < 0 || heap >= RADEON_NUM_HEAPS) goto no_slab; entry = pb_slab_alloc(&ws->bo_slabs, size, heap); @@ -1070,7 +1065,7 @@ no_slab: /* Shared resources don't use cached heaps. */ if (use_reusable_pool) { heap = radeon_get_heap_index(domain, flags); - assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS); + assert(heap >= 0 && heap < RADEON_NUM_HEAPS); bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, 0, heap)); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 76e7e90a12b..b40f9bed945 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -851,7 +851,7 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config, if (!do_winsys_init(ws)) goto fail1; - pb_cache_init(&ws->bo_cache, RADEON_MAX_CACHED_HEAPS, + pb_cache_init(&ws->bo_cache, RADEON_NUM_HEAPS, 500000, ws->check_vm ? 1.0f : 2.0f, 0, MIN2(ws->info.vram_size, ws->info.gart_size), NULL, radeon_bo_destroy, @@ -864,7 +864,7 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config, */ if (!pb_slabs_init(&ws->bo_slabs, RADEON_SLAB_MIN_SIZE_LOG2, RADEON_SLAB_MAX_SIZE_LOG2, - RADEON_MAX_SLAB_HEAPS, false, + RADEON_NUM_HEAPS, false, ws, radeon_bo_can_reclaim_slab, radeon_bo_slab_alloc,