intel: Move slm functions from brw_compiler.h to intel_compute_slm.c/h

This functions were inlined in a header and duplicated between brw and
elk.
That would be enough reasons to move to a C file but next patches
will add more code to support Xe2 platforms, what would cause more
code to be inlined, duplicating even more code and increasing lib
size.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28910>
This commit is contained in:
José Roberto de Souza
2024-04-19 11:29:22 -07:00
committed by Marge Bot
parent 357dde47a5
commit f5f71bae02
15 changed files with 100 additions and 118 deletions
+3 -2
View File
@@ -82,6 +82,7 @@
#endif
#include "drm-uapi/i915_drm.h"
#include "intel/common/intel_compute_slm.h"
#include "intel/common/intel_l3_config.h"
#include "intel/common/intel_sample_positions.h"
#include "intel/compiler/elk/elk_compiler.h"
@@ -8165,8 +8166,8 @@ crocus_upload_compute_state(struct crocus_context *ice,
idd.NumberofThreadsinGPGPUThreadGroup = dispatch.threads;
idd.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs;
idd.BarrierEnable = cs_prog_data->uses_barrier;
idd.SharedLocalMemorySize = elk_encode_slm_size(GFX_VER,
prog_data->total_shared);
idd.SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER,
prog_data->total_shared);
#if GFX_VERx10 >= 75
idd.CrossThreadConstantDataReadLength = cs_prog_data->push.cross_thread.regs;
#endif
+3 -13
View File
@@ -96,6 +96,7 @@
#include "util/u_trace_gallium.h"
#include "nir.h"
#include "intel/common/intel_aux_map.h"
#include "intel/common/intel_compute_slm.h"
#include "intel/common/intel_l3_config.h"
#include "intel/common/intel_sample_positions.h"
#include "intel/ds/intel_tracepoints.h"
@@ -311,17 +312,6 @@ translate_wrap(unsigned pipe_wrap)
return map[pipe_wrap];
}
static inline uint32_t
iris_encode_slm_size(unsigned gen, uint32_t bytes)
{
#if GFX_VER >= 9
return encode_slm_size(gen, bytes);
#else
return elk_encode_slm_size(gen, bytes);
#endif
}
/**
* Allocate space for some indirect state.
*
@@ -8871,7 +8861,7 @@ iris_upload_compute_walker(struct iris_context *ice,
idd.KernelStartPointer = KSP(shader);
idd.NumberofThreadsinGPGPUThreadGroup = dispatch.threads;
idd.SharedLocalMemorySize =
iris_encode_slm_size(GFX_VER, shader->total_shared);
intel_compute_slm_encode_size(GFX_VER, shader->total_shared);
idd.SamplerStatePointer = shs->sampler_table.offset;
idd.SamplerCount = encode_sampler_count(shader),
idd.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE];
@@ -9029,7 +9019,7 @@ iris_upload_gpgpu_walker(struct iris_context *ice,
iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), desc, idd) {
idd.SharedLocalMemorySize =
iris_encode_slm_size(GFX_VER, ish->kernel_shared_size + grid->variable_shared_mem);
intel_compute_slm_encode_size(GFX_VER, ish->kernel_shared_size + grid->variable_shared_mem);
idd.KernelStartPointer =
KSP(shader) + iris_cs_data_prog_offset(cs_data, dispatch.simd_size);
idd.SamplerStatePointer = shs->sampler_table.offset;
+4 -3
View File
@@ -26,6 +26,7 @@
#include "blorp_priv.h"
#include "dev/intel_device_info.h"
#include "common/intel_compute_slm.h"
#include "common/intel_sample_positions.h"
#include "common/intel_l3_config.h"
#include "genxml/gen_macros.h"
@@ -1735,7 +1736,7 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
.BindingTablePointer = surfaces_offset,
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
.SharedLocalMemorySize =
encode_slm_size(GFX_VER, prog_data->total_shared),
intel_compute_slm_encode_size(GFX_VER, prog_data->total_shared),
.PreferredSLMAllocationSize = preferred_slm_allocation_size(devinfo),
.NumberOfBarriers = cs_prog_data->uses_barrier,
};
@@ -1798,8 +1799,8 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
.BindingTablePointer = surfaces_offset,
.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
.SharedLocalMemorySize = encode_slm_size(GFX_VER,
prog_data->total_shared),
.SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER,
prog_data->total_shared),
.BarrierEnable = cs_prog_data->uses_barrier,
.CrossThreadConstantDataReadLength =
cs_prog_data->push.cross_thread.regs,
+3 -2
View File
@@ -30,6 +30,7 @@
#include "blorp_priv.h"
#include "dev/intel_device_info.h"
#include "common/intel_compute_slm.h"
#include "common/intel_sample_positions.h"
#include "common/intel_l3_config.h"
#include "genxml/gen_macros.h"
@@ -2060,8 +2061,8 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
.BindingTablePointer = surfaces_offset,
.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
.SharedLocalMemorySize = elk_encode_slm_size(GFX_VER,
prog_data->total_shared),
.SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER,
prog_data->total_shared),
.BarrierEnable = cs_prog_data->uses_barrier,
#if GFX_VER >= 8 || GFX_VERx10 == 75
.CrossThreadConstantDataReadLength =
+57
View File
@@ -0,0 +1,57 @@
/*
* Copyright 2024 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "intel_compute_slm.h"
#include <assert.h>
#include "util/macros.h"
#include "util/u_math.h"
/* Shared Local Memory Size is specified as powers of two,
* and also have a Gen-dependent minimum value if not zero.
*/
uint32_t
intel_compute_slm_calculate_size(unsigned gen, uint32_t bytes)
{
assert(bytes <= 64 * 1024);
if (bytes > 0)
return MAX2(util_next_power_of_two(bytes), gen >= 9 ? 1024 : 4096);
else
return 0;
}
uint32_t
intel_compute_slm_encode_size(unsigned gen, uint32_t bytes)
{
uint32_t slm_size = 0;
/* Shared Local Memory is specified as powers of two, and encoded in
* INTERFACE_DESCRIPTOR_DATA with the following representations:
*
* Size | 0 kB | 1 kB | 2 kB | 4 kB | 8 kB | 16 kB | 32 kB | 64 kB |
* -------------------------------------------------------------------
* Gfx7-8 | 0 | none | none | 1 | 2 | 4 | 8 | 16 |
* -------------------------------------------------------------------
* Gfx9+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
*/
if (bytes > 0) {
slm_size = intel_compute_slm_calculate_size(gen, bytes);
assert(util_is_power_of_two_nonzero(slm_size));
if (gen >= 9) {
/* Turn an exponent of 10 (1024 kB) into 1. */
assert(slm_size >= 1024);
slm_size = ffs(slm_size) - 10;
} else {
assert(slm_size >= 4096);
/* Convert to the pre-Gfx9 representation. */
slm_size = slm_size / 4096;
}
}
return slm_size;
}
+11
View File
@@ -0,0 +1,11 @@
/*
* Copyright 2024 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#pragma once
#include <stdint.h>
uint32_t intel_compute_slm_calculate_size(unsigned gen, uint32_t bytes);
uint32_t intel_compute_slm_encode_size(unsigned gen, uint32_t bytes);
+2
View File
@@ -36,6 +36,8 @@ files_libintel_common = files(
'intel_bind_timeline.c',
'intel_bind_timeline.h',
'intel_buffer_alloc.h',
'intel_compute_slm.c',
'intel_compute_slm.h',
'intel_debug_identifier.h',
'intel_debug_identifier.c',
'intel_engine.c',
-46
View File
@@ -1556,52 +1556,6 @@ void brw_debug_key_recompile(const struct brw_compiler *c, void *log,
const struct brw_base_prog_key *old_key,
const struct brw_base_prog_key *key);
/* Shared Local Memory Size is specified as powers of two,
* and also have a Gen-dependent minimum value if not zero.
*/
static inline uint32_t
intel_calculate_slm_size(unsigned gen, uint32_t bytes)
{
assert(bytes <= 64 * 1024);
if (bytes > 0)
return MAX2(util_next_power_of_two(bytes), gen >= 9 ? 1024 : 4096);
else
return 0;
}
static inline uint32_t
encode_slm_size(unsigned gen, uint32_t bytes)
{
uint32_t slm_size = 0;
/* Shared Local Memory is specified as powers of two, and encoded in
* INTERFACE_DESCRIPTOR_DATA with the following representations:
*
* Size | 0 kB | 1 kB | 2 kB | 4 kB | 8 kB | 16 kB | 32 kB | 64 kB |
* -------------------------------------------------------------------
* Gfx7-8 | 0 | none | none | 1 | 2 | 4 | 8 | 16 |
* -------------------------------------------------------------------
* Gfx9+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
*/
if (bytes > 0) {
slm_size = intel_calculate_slm_size(gen, bytes);
assert(util_is_power_of_two_nonzero(slm_size));
if (gen >= 9) {
/* Turn an exponent of 10 (1024 kB) into 1. */
assert(slm_size >= 1024);
slm_size = ffs(slm_size) - 10;
} else {
assert(slm_size >= 4096);
/* Convert to the pre-Gfx9 representation. */
slm_size = slm_size / 4096;
}
}
return slm_size;
}
unsigned
brw_cs_push_const_total_size(const struct brw_cs_prog_data *cs_prog_data,
unsigned threads);
-40
View File
@@ -1652,46 +1652,6 @@ void elk_debug_key_recompile(const struct elk_compiler *c, void *log,
const struct elk_base_prog_key *old_key,
const struct elk_base_prog_key *key);
/* Shared Local Memory Size is specified as powers of two,
* and also have a Gen-dependent minimum value if not zero.
*/
static inline uint32_t
elk_calculate_slm_size(unsigned gen, uint32_t bytes)
{
assert(bytes <= 64 * 1024);
if (bytes > 0)
return MAX2(util_next_power_of_two(bytes), gen >= 9 ? 1024 : 4096);
else
return 0;
}
static inline uint32_t
elk_encode_slm_size(unsigned gen, uint32_t bytes)
{
uint32_t slm_size = 0;
/* Shared Local Memory is specified as powers of two, and encoded in
* INTERFACE_DESCRIPTOR_DATA with the following representations:
*
* Size | 0 kB | 1 kB | 2 kB | 4 kB | 8 kB | 16 kB | 32 kB | 64 kB |
* -------------------------------------------------------------------
* Gfx7-8 | 0 | none | none | 1 | 2 | 4 | 8 | 16 |
* -------------------------------------------------------------------
* Gfx9+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
*/
if (bytes > 0) {
slm_size = elk_calculate_slm_size(gen, bytes);
assert(util_is_power_of_two_nonzero(slm_size));
assert(slm_size >= 4096);
/* Convert to the pre-Gfx9 representation. */
slm_size = slm_size / 4096;
}
return slm_size;
}
unsigned
elk_cs_push_const_total_size(const struct elk_cs_prog_data *cs_prog_data,
unsigned threads);
+2 -1
View File
@@ -29,6 +29,7 @@
#include "util/mesa-sha1.h"
#include "util/os_time.h"
#include "common/intel_compute_slm.h"
#include "common/intel_l3_config.h"
#include "common/intel_sample_positions.h"
#include "compiler/brw_disasm.h"
@@ -1162,7 +1163,7 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
const unsigned chunk_size = 16;
const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
assert(shared_size <=
intel_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size));
intel_compute_slm_calculate_size(compiler->devinfo->ver, nir->info.shared_size));
NIR_PASS(_, nir, nir_zero_initialize_shared_memory,
shared_size, chunk_size);
+2 -1
View File
@@ -30,6 +30,7 @@
#include "vk_util.h"
#include "common/intel_aux_map.h"
#include "common/intel_compute_slm.h"
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
#include "genxml/genX_rt_pack.h"
@@ -283,7 +284,7 @@ get_interface_descriptor_data(struct anv_cmd_buffer *cmd_buffer,
.BindingTableEntryCount = devinfo->verx10 == 125 ?
0 : 1 + MIN2(shader->bind_map.surface_count, 30),
.NumberofThreadsinGPGPUThreadGroup = dispatch->threads,
.SharedLocalMemorySize = encode_slm_size(GFX_VER, prog_data->base.total_shared),
.SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER, prog_data->base.total_shared),
.PreferredSLMAllocationSize = preferred_slm_allocation_size(devinfo),
.NumberOfBarriers = prog_data->uses_barrier,
};
+4 -3
View File
@@ -27,6 +27,7 @@
#include "genxml/genX_pack.h"
#include "genxml/genX_rt_pack.h"
#include "common/intel_compute_slm.h"
#include "common/intel_genX_state_brw.h"
#include "common/intel_l3_config.h"
#include "common/intel_sample_positions.h"
@@ -1792,7 +1793,7 @@ emit_task_state(struct anv_graphics_pipeline *pipeline)
task.NumberofBarriers = task_prog_data->base.uses_barrier;
task.SharedLocalMemorySize =
encode_slm_size(GFX_VER, task_prog_data->base.base.total_shared);
intel_compute_slm_encode_size(GFX_VER, task_prog_data->base.base.total_shared);
task.PreferredSLMAllocationSize =
preferred_slm_allocation_size(devinfo);
@@ -1873,7 +1874,7 @@ emit_mesh_state(struct anv_graphics_pipeline *pipeline)
mesh.NumberofBarriers = mesh_prog_data->base.uses_barrier;
mesh.SharedLocalMemorySize =
encode_slm_size(GFX_VER, mesh_prog_data->base.base.total_shared);
intel_compute_slm_encode_size(GFX_VER, mesh_prog_data->base.base.total_shared);
mesh.PreferredSLMAllocationSize =
preferred_slm_allocation_size(devinfo);
@@ -2080,7 +2081,7 @@ genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
0 : 1 + MIN2(pipeline->cs->bind_map.surface_count, 30),
.BarrierEnable = cs_prog_data->uses_barrier,
.SharedLocalMemorySize =
encode_slm_size(GFX_VER, cs_prog_data->base.total_shared),
intel_compute_slm_encode_size(GFX_VER, cs_prog_data->base.total_shared),
.ConstantURBEntryReadOffset = 0,
.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
+5 -4
View File
@@ -30,6 +30,7 @@
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
#include "common/intel_compute_slm.h"
#include "common/intel_genX_state_brw.h"
static void
@@ -580,8 +581,8 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state,
.BindingTablePointer = 0,
.BindingTableEntryCount = 0,
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
.SharedLocalMemorySize = encode_slm_size(GFX_VER,
prog_data->base.total_shared),
.SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER,
prog_data->base.total_shared),
.NumberOfBarriers = prog_data->uses_barrier,
};
}
@@ -649,8 +650,8 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state,
.SamplerCount = 0,
.BindingTableEntryCount = 0,
.BarrierEnable = prog_data->uses_barrier,
.SharedLocalMemorySize = encode_slm_size(GFX_VER,
prog_data->base.total_shared),
.SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER,
prog_data->base.total_shared),
.ConstantURBEntryReadOffset = 0,
.ConstantURBEntryReadLength = prog_data->push.per_thread.regs,
+2 -1
View File
@@ -29,6 +29,7 @@
#include "util/mesa-sha1.h"
#include "util/os_time.h"
#include "common/intel_compute_slm.h"
#include "common/intel_l3_config.h"
#include "common/intel_sample_positions.h"
#include "compiler/elk/elk_disasm.h"
@@ -568,7 +569,7 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
const unsigned chunk_size = 16;
const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
assert(shared_size <=
elk_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size));
intel_compute_slm_calculate_size(compiler->devinfo->ver, nir->info.shared_size));
NIR_PASS(_, nir, nir_zero_initialize_shared_memory,
shared_size, chunk_size);
+2 -2
View File
@@ -27,6 +27,7 @@
#include "genxml/genX_pack.h"
#include "genxml/genX_rt_pack.h"
#include "common/intel_compute_slm.h"
#include "common/intel_genX_state_elk.h"
#include "common/intel_l3_config.h"
#include "common/intel_sample_positions.h"
@@ -1939,8 +1940,7 @@ genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
*/
.BindingTableEntryCount = 1 + MIN2(cs_bin->bind_map.surface_count, 30),
.BarrierEnable = cs_prog_data->uses_barrier,
.SharedLocalMemorySize =
elk_encode_slm_size(GFX_VER, cs_prog_data->base.total_shared),
.SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER, cs_prog_data->base.total_shared),
#if GFX_VERx10 != 75
.ConstantURBEntryReadOffset = 0,