radeonsi: rework RADEON_PRIO flags to be <= 31
This decreases sizeof(struct amdgpu_cs_buffer) from 24 to 16 bytes. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
@@ -115,50 +115,51 @@ enum radeon_value_id {
|
||||
RADEON_CS_THREAD_TIME,
|
||||
};
|
||||
|
||||
/* Each group of four has the same priority. */
|
||||
enum radeon_bo_priority {
|
||||
/* Each group of two has the same priority. */
|
||||
RADEON_PRIO_FENCE = 0,
|
||||
RADEON_PRIO_TRACE,
|
||||
RADEON_PRIO_SO_FILLED_SIZE,
|
||||
|
||||
RADEON_PRIO_SO_FILLED_SIZE = 2,
|
||||
RADEON_PRIO_QUERY,
|
||||
|
||||
RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */
|
||||
RADEON_PRIO_IB2, /* IB executed with INDIRECT_BUFFER */
|
||||
RADEON_PRIO_DRAW_INDIRECT,
|
||||
|
||||
RADEON_PRIO_DRAW_INDIRECT = 6,
|
||||
RADEON_PRIO_INDEX_BUFFER,
|
||||
|
||||
RADEON_PRIO_CP_DMA = 12,
|
||||
|
||||
RADEON_PRIO_CONST_BUFFER = 16,
|
||||
RADEON_PRIO_DESCRIPTORS,
|
||||
RADEON_PRIO_CP_DMA = 8,
|
||||
RADEON_PRIO_BORDER_COLORS,
|
||||
|
||||
RADEON_PRIO_SAMPLER_BUFFER = 20,
|
||||
RADEON_PRIO_CONST_BUFFER = 10,
|
||||
RADEON_PRIO_DESCRIPTORS,
|
||||
|
||||
RADEON_PRIO_SAMPLER_BUFFER = 12,
|
||||
RADEON_PRIO_VERTEX_BUFFER,
|
||||
|
||||
RADEON_PRIO_SHADER_RW_BUFFER = 24,
|
||||
RADEON_PRIO_SHADER_RW_BUFFER = 14,
|
||||
RADEON_PRIO_COMPUTE_GLOBAL,
|
||||
|
||||
RADEON_PRIO_SAMPLER_TEXTURE = 28,
|
||||
RADEON_PRIO_SAMPLER_TEXTURE = 16,
|
||||
RADEON_PRIO_SHADER_RW_IMAGE,
|
||||
|
||||
RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 32,
|
||||
RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 18,
|
||||
RADEON_PRIO_COLOR_BUFFER,
|
||||
|
||||
RADEON_PRIO_COLOR_BUFFER = 36,
|
||||
RADEON_PRIO_DEPTH_BUFFER = 20,
|
||||
|
||||
RADEON_PRIO_DEPTH_BUFFER = 40,
|
||||
RADEON_PRIO_COLOR_BUFFER_MSAA = 22,
|
||||
|
||||
RADEON_PRIO_COLOR_BUFFER_MSAA = 44,
|
||||
RADEON_PRIO_DEPTH_BUFFER_MSAA = 24,
|
||||
|
||||
RADEON_PRIO_DEPTH_BUFFER_MSAA = 48,
|
||||
|
||||
RADEON_PRIO_SEPARATE_META = 52,
|
||||
RADEON_PRIO_SEPARATE_META = 26,
|
||||
RADEON_PRIO_SHADER_BINARY, /* the hw can't hide instruction cache misses */
|
||||
|
||||
RADEON_PRIO_SHADER_RINGS = 56,
|
||||
RADEON_PRIO_SHADER_RINGS = 28,
|
||||
|
||||
RADEON_PRIO_SCRATCH_BUFFER = 60,
|
||||
/* 63 is the maximum value */
|
||||
RADEON_PRIO_SCRATCH_BUFFER = 30,
|
||||
/* 31 is the maximum value */
|
||||
};
|
||||
|
||||
struct winsys_handle;
|
||||
@@ -223,7 +224,7 @@ enum radeon_feature_id {
|
||||
struct radeon_bo_list_item {
|
||||
uint64_t bo_size;
|
||||
uint64_t vm_address;
|
||||
uint64_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */
|
||||
uint32_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */
|
||||
};
|
||||
|
||||
struct radeon_winsys {
|
||||
|
||||
@@ -568,8 +568,8 @@ static void si_dump_bo_list(struct si_context *sctx,
|
||||
size / page_size, va / page_size, (va + size) / page_size);
|
||||
|
||||
/* Print the usage. */
|
||||
for (j = 0; j < 64; j++) {
|
||||
if (!(saved->bo_list[i].priority_usage & (1ull << j)))
|
||||
for (j = 0; j < 32; j++) {
|
||||
if (!(saved->bo_list[i].priority_usage & (1u << j)))
|
||||
continue;
|
||||
|
||||
fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
|
||||
|
||||
@@ -629,7 +629,7 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs,
|
||||
*/
|
||||
if (bo == cs->last_added_bo &&
|
||||
(usage & cs->last_added_bo_usage) == usage &&
|
||||
(1ull << priority) & cs->last_added_bo_priority_usage)
|
||||
(1u << priority) & cs->last_added_bo_priority_usage)
|
||||
return cs->last_added_bo_index;
|
||||
|
||||
if (!bo->sparse) {
|
||||
@@ -658,7 +658,7 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs,
|
||||
buffer = &cs->sparse_buffers[index];
|
||||
}
|
||||
|
||||
buffer->u.real.priority_usage |= 1ull << priority;
|
||||
buffer->u.real.priority_usage |= 1u << priority;
|
||||
buffer->usage |= usage;
|
||||
|
||||
cs->last_added_bo = bo;
|
||||
@@ -1339,7 +1339,7 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
|
||||
assert(buffer->u.real.priority_usage != 0);
|
||||
|
||||
handles[num_handles] = buffer->bo->bo;
|
||||
flags[num_handles] = (util_last_bit64(buffer->u.real.priority_usage) - 1) / 4;
|
||||
flags[num_handles] = (util_last_bit(buffer->u.real.priority_usage) - 1) / 2;
|
||||
++num_handles;
|
||||
}
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ struct amdgpu_cs_buffer {
|
||||
struct amdgpu_winsys_bo *bo;
|
||||
union {
|
||||
struct {
|
||||
uint64_t priority_usage;
|
||||
uint32_t priority_usage;
|
||||
} real;
|
||||
struct {
|
||||
uint32_t real_idx; /* index of underlying real BO */
|
||||
@@ -94,7 +94,7 @@ struct amdgpu_cs_context {
|
||||
struct amdgpu_winsys_bo *last_added_bo;
|
||||
unsigned last_added_bo_index;
|
||||
unsigned last_added_bo_usage;
|
||||
uint64_t last_added_bo_priority_usage;
|
||||
uint32_t last_added_bo_priority_usage;
|
||||
|
||||
struct pipe_fence_handle **fence_dependencies;
|
||||
unsigned num_fence_dependencies;
|
||||
|
||||
@@ -366,7 +366,7 @@ static unsigned radeon_drm_cs_add_buffer(struct radeon_cmdbuf *rcs,
|
||||
reloc->read_domains |= rd;
|
||||
reloc->write_domain |= wd;
|
||||
reloc->flags = MAX2(reloc->flags, priority);
|
||||
cs->csc->relocs_bo[index].u.real.priority_usage |= 1ull << priority;
|
||||
cs->csc->relocs_bo[index].u.real.priority_usage |= 1u << priority;
|
||||
|
||||
if (added_domains & RADEON_DOMAIN_VRAM)
|
||||
cs->base.used_vram += bo->base.size;
|
||||
|
||||
@@ -33,7 +33,7 @@ struct radeon_bo_item {
|
||||
struct radeon_bo *bo;
|
||||
union {
|
||||
struct {
|
||||
uint64_t priority_usage;
|
||||
uint32_t priority_usage;
|
||||
} real;
|
||||
struct {
|
||||
unsigned real_idx;
|
||||
|
||||
Reference in New Issue
Block a user