intel: Move slm functions from brw_compiler.h to intel_compute_slm.c/h
This functions were inlined in a header and duplicated between brw and elk. That would be enough reasons to move to a C file but next patches will add more code to support Xe2 platforms, what would cause more code to be inlined, duplicating even more code and increasing lib size. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28910>
This commit is contained in:
committed by
Marge Bot
parent
357dde47a5
commit
f5f71bae02
@@ -82,6 +82,7 @@
|
||||
#endif
|
||||
|
||||
#include "drm-uapi/i915_drm.h"
|
||||
#include "intel/common/intel_compute_slm.h"
|
||||
#include "intel/common/intel_l3_config.h"
|
||||
#include "intel/common/intel_sample_positions.h"
|
||||
#include "intel/compiler/elk/elk_compiler.h"
|
||||
@@ -8165,8 +8166,8 @@ crocus_upload_compute_state(struct crocus_context *ice,
|
||||
idd.NumberofThreadsinGPGPUThreadGroup = dispatch.threads;
|
||||
idd.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs;
|
||||
idd.BarrierEnable = cs_prog_data->uses_barrier;
|
||||
idd.SharedLocalMemorySize = elk_encode_slm_size(GFX_VER,
|
||||
prog_data->total_shared);
|
||||
idd.SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER,
|
||||
prog_data->total_shared);
|
||||
#if GFX_VERx10 >= 75
|
||||
idd.CrossThreadConstantDataReadLength = cs_prog_data->push.cross_thread.regs;
|
||||
#endif
|
||||
|
||||
@@ -96,6 +96,7 @@
|
||||
#include "util/u_trace_gallium.h"
|
||||
#include "nir.h"
|
||||
#include "intel/common/intel_aux_map.h"
|
||||
#include "intel/common/intel_compute_slm.h"
|
||||
#include "intel/common/intel_l3_config.h"
|
||||
#include "intel/common/intel_sample_positions.h"
|
||||
#include "intel/ds/intel_tracepoints.h"
|
||||
@@ -311,17 +312,6 @@ translate_wrap(unsigned pipe_wrap)
|
||||
return map[pipe_wrap];
|
||||
}
|
||||
|
||||
|
||||
static inline uint32_t
|
||||
iris_encode_slm_size(unsigned gen, uint32_t bytes)
|
||||
{
|
||||
#if GFX_VER >= 9
|
||||
return encode_slm_size(gen, bytes);
|
||||
#else
|
||||
return elk_encode_slm_size(gen, bytes);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate space for some indirect state.
|
||||
*
|
||||
@@ -8871,7 +8861,7 @@ iris_upload_compute_walker(struct iris_context *ice,
|
||||
idd.KernelStartPointer = KSP(shader);
|
||||
idd.NumberofThreadsinGPGPUThreadGroup = dispatch.threads;
|
||||
idd.SharedLocalMemorySize =
|
||||
iris_encode_slm_size(GFX_VER, shader->total_shared);
|
||||
intel_compute_slm_encode_size(GFX_VER, shader->total_shared);
|
||||
idd.SamplerStatePointer = shs->sampler_table.offset;
|
||||
idd.SamplerCount = encode_sampler_count(shader),
|
||||
idd.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE];
|
||||
@@ -9029,7 +9019,7 @@ iris_upload_gpgpu_walker(struct iris_context *ice,
|
||||
|
||||
iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), desc, idd) {
|
||||
idd.SharedLocalMemorySize =
|
||||
iris_encode_slm_size(GFX_VER, ish->kernel_shared_size + grid->variable_shared_mem);
|
||||
intel_compute_slm_encode_size(GFX_VER, ish->kernel_shared_size + grid->variable_shared_mem);
|
||||
idd.KernelStartPointer =
|
||||
KSP(shader) + iris_cs_data_prog_offset(cs_data, dispatch.simd_size);
|
||||
idd.SamplerStatePointer = shs->sampler_table.offset;
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
|
||||
#include "blorp_priv.h"
|
||||
#include "dev/intel_device_info.h"
|
||||
#include "common/intel_compute_slm.h"
|
||||
#include "common/intel_sample_positions.h"
|
||||
#include "common/intel_l3_config.h"
|
||||
#include "genxml/gen_macros.h"
|
||||
@@ -1735,7 +1736,7 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
||||
.BindingTablePointer = surfaces_offset,
|
||||
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
||||
.SharedLocalMemorySize =
|
||||
encode_slm_size(GFX_VER, prog_data->total_shared),
|
||||
intel_compute_slm_encode_size(GFX_VER, prog_data->total_shared),
|
||||
.PreferredSLMAllocationSize = preferred_slm_allocation_size(devinfo),
|
||||
.NumberOfBarriers = cs_prog_data->uses_barrier,
|
||||
};
|
||||
@@ -1798,8 +1799,8 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
||||
.BindingTablePointer = surfaces_offset,
|
||||
.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
|
||||
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
||||
.SharedLocalMemorySize = encode_slm_size(GFX_VER,
|
||||
prog_data->total_shared),
|
||||
.SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER,
|
||||
prog_data->total_shared),
|
||||
.BarrierEnable = cs_prog_data->uses_barrier,
|
||||
.CrossThreadConstantDataReadLength =
|
||||
cs_prog_data->push.cross_thread.regs,
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
|
||||
#include "blorp_priv.h"
|
||||
#include "dev/intel_device_info.h"
|
||||
#include "common/intel_compute_slm.h"
|
||||
#include "common/intel_sample_positions.h"
|
||||
#include "common/intel_l3_config.h"
|
||||
#include "genxml/gen_macros.h"
|
||||
@@ -2060,8 +2061,8 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
||||
.BindingTablePointer = surfaces_offset,
|
||||
.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
|
||||
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
||||
.SharedLocalMemorySize = elk_encode_slm_size(GFX_VER,
|
||||
prog_data->total_shared),
|
||||
.SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER,
|
||||
prog_data->total_shared),
|
||||
.BarrierEnable = cs_prog_data->uses_barrier,
|
||||
#if GFX_VER >= 8 || GFX_VERx10 == 75
|
||||
.CrossThreadConstantDataReadLength =
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright 2024 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "intel_compute_slm.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "util/macros.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
/* Shared Local Memory Size is specified as powers of two,
|
||||
* and also have a Gen-dependent minimum value if not zero.
|
||||
*/
|
||||
uint32_t
|
||||
intel_compute_slm_calculate_size(unsigned gen, uint32_t bytes)
|
||||
{
|
||||
assert(bytes <= 64 * 1024);
|
||||
if (bytes > 0)
|
||||
return MAX2(util_next_power_of_two(bytes), gen >= 9 ? 1024 : 4096);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
intel_compute_slm_encode_size(unsigned gen, uint32_t bytes)
|
||||
{
|
||||
uint32_t slm_size = 0;
|
||||
|
||||
/* Shared Local Memory is specified as powers of two, and encoded in
|
||||
* INTERFACE_DESCRIPTOR_DATA with the following representations:
|
||||
*
|
||||
* Size | 0 kB | 1 kB | 2 kB | 4 kB | 8 kB | 16 kB | 32 kB | 64 kB |
|
||||
* -------------------------------------------------------------------
|
||||
* Gfx7-8 | 0 | none | none | 1 | 2 | 4 | 8 | 16 |
|
||||
* -------------------------------------------------------------------
|
||||
* Gfx9+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
||||
*/
|
||||
|
||||
if (bytes > 0) {
|
||||
slm_size = intel_compute_slm_calculate_size(gen, bytes);
|
||||
assert(util_is_power_of_two_nonzero(slm_size));
|
||||
|
||||
if (gen >= 9) {
|
||||
/* Turn an exponent of 10 (1024 kB) into 1. */
|
||||
assert(slm_size >= 1024);
|
||||
slm_size = ffs(slm_size) - 10;
|
||||
} else {
|
||||
assert(slm_size >= 4096);
|
||||
/* Convert to the pre-Gfx9 representation. */
|
||||
slm_size = slm_size / 4096;
|
||||
}
|
||||
}
|
||||
|
||||
return slm_size;
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
/*
|
||||
* Copyright 2024 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
uint32_t intel_compute_slm_calculate_size(unsigned gen, uint32_t bytes);
|
||||
uint32_t intel_compute_slm_encode_size(unsigned gen, uint32_t bytes);
|
||||
@@ -36,6 +36,8 @@ files_libintel_common = files(
|
||||
'intel_bind_timeline.c',
|
||||
'intel_bind_timeline.h',
|
||||
'intel_buffer_alloc.h',
|
||||
'intel_compute_slm.c',
|
||||
'intel_compute_slm.h',
|
||||
'intel_debug_identifier.h',
|
||||
'intel_debug_identifier.c',
|
||||
'intel_engine.c',
|
||||
|
||||
@@ -1556,52 +1556,6 @@ void brw_debug_key_recompile(const struct brw_compiler *c, void *log,
|
||||
const struct brw_base_prog_key *old_key,
|
||||
const struct brw_base_prog_key *key);
|
||||
|
||||
/* Shared Local Memory Size is specified as powers of two,
|
||||
* and also have a Gen-dependent minimum value if not zero.
|
||||
*/
|
||||
static inline uint32_t
|
||||
intel_calculate_slm_size(unsigned gen, uint32_t bytes)
|
||||
{
|
||||
assert(bytes <= 64 * 1024);
|
||||
if (bytes > 0)
|
||||
return MAX2(util_next_power_of_two(bytes), gen >= 9 ? 1024 : 4096);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
encode_slm_size(unsigned gen, uint32_t bytes)
|
||||
{
|
||||
uint32_t slm_size = 0;
|
||||
|
||||
/* Shared Local Memory is specified as powers of two, and encoded in
|
||||
* INTERFACE_DESCRIPTOR_DATA with the following representations:
|
||||
*
|
||||
* Size | 0 kB | 1 kB | 2 kB | 4 kB | 8 kB | 16 kB | 32 kB | 64 kB |
|
||||
* -------------------------------------------------------------------
|
||||
* Gfx7-8 | 0 | none | none | 1 | 2 | 4 | 8 | 16 |
|
||||
* -------------------------------------------------------------------
|
||||
* Gfx9+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
||||
*/
|
||||
|
||||
if (bytes > 0) {
|
||||
slm_size = intel_calculate_slm_size(gen, bytes);
|
||||
assert(util_is_power_of_two_nonzero(slm_size));
|
||||
|
||||
if (gen >= 9) {
|
||||
/* Turn an exponent of 10 (1024 kB) into 1. */
|
||||
assert(slm_size >= 1024);
|
||||
slm_size = ffs(slm_size) - 10;
|
||||
} else {
|
||||
assert(slm_size >= 4096);
|
||||
/* Convert to the pre-Gfx9 representation. */
|
||||
slm_size = slm_size / 4096;
|
||||
}
|
||||
}
|
||||
|
||||
return slm_size;
|
||||
}
|
||||
|
||||
unsigned
|
||||
brw_cs_push_const_total_size(const struct brw_cs_prog_data *cs_prog_data,
|
||||
unsigned threads);
|
||||
|
||||
@@ -1652,46 +1652,6 @@ void elk_debug_key_recompile(const struct elk_compiler *c, void *log,
|
||||
const struct elk_base_prog_key *old_key,
|
||||
const struct elk_base_prog_key *key);
|
||||
|
||||
/* Shared Local Memory Size is specified as powers of two,
|
||||
* and also have a Gen-dependent minimum value if not zero.
|
||||
*/
|
||||
static inline uint32_t
|
||||
elk_calculate_slm_size(unsigned gen, uint32_t bytes)
|
||||
{
|
||||
assert(bytes <= 64 * 1024);
|
||||
if (bytes > 0)
|
||||
return MAX2(util_next_power_of_two(bytes), gen >= 9 ? 1024 : 4096);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
elk_encode_slm_size(unsigned gen, uint32_t bytes)
|
||||
{
|
||||
uint32_t slm_size = 0;
|
||||
|
||||
/* Shared Local Memory is specified as powers of two, and encoded in
|
||||
* INTERFACE_DESCRIPTOR_DATA with the following representations:
|
||||
*
|
||||
* Size | 0 kB | 1 kB | 2 kB | 4 kB | 8 kB | 16 kB | 32 kB | 64 kB |
|
||||
* -------------------------------------------------------------------
|
||||
* Gfx7-8 | 0 | none | none | 1 | 2 | 4 | 8 | 16 |
|
||||
* -------------------------------------------------------------------
|
||||
* Gfx9+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
||||
*/
|
||||
|
||||
if (bytes > 0) {
|
||||
slm_size = elk_calculate_slm_size(gen, bytes);
|
||||
assert(util_is_power_of_two_nonzero(slm_size));
|
||||
|
||||
assert(slm_size >= 4096);
|
||||
/* Convert to the pre-Gfx9 representation. */
|
||||
slm_size = slm_size / 4096;
|
||||
}
|
||||
|
||||
return slm_size;
|
||||
}
|
||||
|
||||
unsigned
|
||||
elk_cs_push_const_total_size(const struct elk_cs_prog_data *cs_prog_data,
|
||||
unsigned threads);
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
|
||||
#include "util/mesa-sha1.h"
|
||||
#include "util/os_time.h"
|
||||
#include "common/intel_compute_slm.h"
|
||||
#include "common/intel_l3_config.h"
|
||||
#include "common/intel_sample_positions.h"
|
||||
#include "compiler/brw_disasm.h"
|
||||
@@ -1162,7 +1163,7 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
|
||||
const unsigned chunk_size = 16;
|
||||
const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
|
||||
assert(shared_size <=
|
||||
intel_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size));
|
||||
intel_compute_slm_calculate_size(compiler->devinfo->ver, nir->info.shared_size));
|
||||
|
||||
NIR_PASS(_, nir, nir_zero_initialize_shared_memory,
|
||||
shared_size, chunk_size);
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include "vk_util.h"
|
||||
|
||||
#include "common/intel_aux_map.h"
|
||||
#include "common/intel_compute_slm.h"
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "genxml/genX_pack.h"
|
||||
#include "genxml/genX_rt_pack.h"
|
||||
@@ -283,7 +284,7 @@ get_interface_descriptor_data(struct anv_cmd_buffer *cmd_buffer,
|
||||
.BindingTableEntryCount = devinfo->verx10 == 125 ?
|
||||
0 : 1 + MIN2(shader->bind_map.surface_count, 30),
|
||||
.NumberofThreadsinGPGPUThreadGroup = dispatch->threads,
|
||||
.SharedLocalMemorySize = encode_slm_size(GFX_VER, prog_data->base.total_shared),
|
||||
.SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER, prog_data->base.total_shared),
|
||||
.PreferredSLMAllocationSize = preferred_slm_allocation_size(devinfo),
|
||||
.NumberOfBarriers = prog_data->uses_barrier,
|
||||
};
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include "genxml/genX_pack.h"
|
||||
#include "genxml/genX_rt_pack.h"
|
||||
|
||||
#include "common/intel_compute_slm.h"
|
||||
#include "common/intel_genX_state_brw.h"
|
||||
#include "common/intel_l3_config.h"
|
||||
#include "common/intel_sample_positions.h"
|
||||
@@ -1792,7 +1793,7 @@ emit_task_state(struct anv_graphics_pipeline *pipeline)
|
||||
|
||||
task.NumberofBarriers = task_prog_data->base.uses_barrier;
|
||||
task.SharedLocalMemorySize =
|
||||
encode_slm_size(GFX_VER, task_prog_data->base.base.total_shared);
|
||||
intel_compute_slm_encode_size(GFX_VER, task_prog_data->base.base.total_shared);
|
||||
task.PreferredSLMAllocationSize =
|
||||
preferred_slm_allocation_size(devinfo);
|
||||
|
||||
@@ -1873,7 +1874,7 @@ emit_mesh_state(struct anv_graphics_pipeline *pipeline)
|
||||
|
||||
mesh.NumberofBarriers = mesh_prog_data->base.uses_barrier;
|
||||
mesh.SharedLocalMemorySize =
|
||||
encode_slm_size(GFX_VER, mesh_prog_data->base.base.total_shared);
|
||||
intel_compute_slm_encode_size(GFX_VER, mesh_prog_data->base.base.total_shared);
|
||||
mesh.PreferredSLMAllocationSize =
|
||||
preferred_slm_allocation_size(devinfo);
|
||||
|
||||
@@ -2080,7 +2081,7 @@ genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
|
||||
0 : 1 + MIN2(pipeline->cs->bind_map.surface_count, 30),
|
||||
.BarrierEnable = cs_prog_data->uses_barrier,
|
||||
.SharedLocalMemorySize =
|
||||
encode_slm_size(GFX_VER, cs_prog_data->base.total_shared),
|
||||
intel_compute_slm_encode_size(GFX_VER, cs_prog_data->base.total_shared),
|
||||
|
||||
.ConstantURBEntryReadOffset = 0,
|
||||
.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "genxml/genX_pack.h"
|
||||
#include "common/intel_compute_slm.h"
|
||||
#include "common/intel_genX_state_brw.h"
|
||||
|
||||
static void
|
||||
@@ -580,8 +581,8 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state,
|
||||
.BindingTablePointer = 0,
|
||||
.BindingTableEntryCount = 0,
|
||||
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
||||
.SharedLocalMemorySize = encode_slm_size(GFX_VER,
|
||||
prog_data->base.total_shared),
|
||||
.SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER,
|
||||
prog_data->base.total_shared),
|
||||
.NumberOfBarriers = prog_data->uses_barrier,
|
||||
};
|
||||
}
|
||||
@@ -649,8 +650,8 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state,
|
||||
.SamplerCount = 0,
|
||||
.BindingTableEntryCount = 0,
|
||||
.BarrierEnable = prog_data->uses_barrier,
|
||||
.SharedLocalMemorySize = encode_slm_size(GFX_VER,
|
||||
prog_data->base.total_shared),
|
||||
.SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER,
|
||||
prog_data->base.total_shared),
|
||||
|
||||
.ConstantURBEntryReadOffset = 0,
|
||||
.ConstantURBEntryReadLength = prog_data->push.per_thread.regs,
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
|
||||
#include "util/mesa-sha1.h"
|
||||
#include "util/os_time.h"
|
||||
#include "common/intel_compute_slm.h"
|
||||
#include "common/intel_l3_config.h"
|
||||
#include "common/intel_sample_positions.h"
|
||||
#include "compiler/elk/elk_disasm.h"
|
||||
@@ -568,7 +569,7 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
|
||||
const unsigned chunk_size = 16;
|
||||
const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
|
||||
assert(shared_size <=
|
||||
elk_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size));
|
||||
intel_compute_slm_calculate_size(compiler->devinfo->ver, nir->info.shared_size));
|
||||
|
||||
NIR_PASS(_, nir, nir_zero_initialize_shared_memory,
|
||||
shared_size, chunk_size);
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include "genxml/genX_pack.h"
|
||||
#include "genxml/genX_rt_pack.h"
|
||||
|
||||
#include "common/intel_compute_slm.h"
|
||||
#include "common/intel_genX_state_elk.h"
|
||||
#include "common/intel_l3_config.h"
|
||||
#include "common/intel_sample_positions.h"
|
||||
@@ -1939,8 +1940,7 @@ genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
|
||||
*/
|
||||
.BindingTableEntryCount = 1 + MIN2(cs_bin->bind_map.surface_count, 30),
|
||||
.BarrierEnable = cs_prog_data->uses_barrier,
|
||||
.SharedLocalMemorySize =
|
||||
elk_encode_slm_size(GFX_VER, cs_prog_data->base.total_shared),
|
||||
.SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER, cs_prog_data->base.total_shared),
|
||||
|
||||
#if GFX_VERx10 != 75
|
||||
.ConstantURBEntryReadOffset = 0,
|
||||
|
||||
Reference in New Issue
Block a user