panfrost: Fix shader_clock support for v6+
This code sets PANFROST_JD_REQ_CYCLE_COUNT when using LD_GCLK to enable
shader_clock
Fixes: c1ce2dcc ("pan/bi: Enable ARB_shader_clock extension support")
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Signed-off-by: Ashley Smith <ashley.smith@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35363>
This commit is contained in:
@@ -307,7 +307,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
|
||||
GL_ARB_sample_locations DONE (freedreno/a6xx, nvc0, zink)
|
||||
GL_ARB_seamless_cubemap_per_texture DONE (etnaviv/SEAMLESS_CUBE_MAP, freedreno, nvc0, r600, radeonsi, softpipe, virgl, zink, asahi, iris, crocus)
|
||||
GL_ARB_shader_ballot DONE (nvc0, radeonsi, zink, iris, crocus/gen8, d3d12, asahi)
|
||||
GL_ARB_shader_clock DONE (freedreno/a6xx, nv50, nvc0, r600, radeonsi, llvmpipe, virgl, panfrost, zink, iris, crocus/gen7+)
|
||||
GL_ARB_shader_clock DONE (freedreno/a6xx, nv50, nvc0, r600, radeonsi, llvmpipe, virgl, panfrost/v6+, zink, iris, crocus/gen7+)
|
||||
GL_ARB_shader_stencil_export DONE (r600, radeonsi, softpipe, llvmpipe, virgl, panfrost, zink, asahi, iris/gen9+)
|
||||
GL_ARB_shader_viewport_layer_array DONE (freedreno/a6xx, nvc0, radeonsi, zink, iris, crocus/gen6+, asahi)
|
||||
GL_ARB_shading_language_include DONE
|
||||
|
||||
@@ -37,3 +37,4 @@ VK_EXT_load_store_op_none on panvk
|
||||
VK_EXT_scalar_block_layout on radv/gfx6
|
||||
VK_EXT_inline_uniform_block on panvk
|
||||
cl_khr_fp16 on asahi, freedreno, llvmpipe, panfrost, radeonsi and zink
|
||||
GL_ARB_shader_clock on panfrost/v6+
|
||||
|
||||
@@ -2978,6 +2978,9 @@ panfrost_update_shader_state(struct panfrost_batch *batch,
|
||||
unsigned dirty_3d = ctx->dirty;
|
||||
unsigned dirty = ctx->dirty_shader[st];
|
||||
|
||||
if (ss->info.has_shader_clk_instr)
|
||||
batch->need_job_req_cycle_count = true;
|
||||
|
||||
if (dirty & (PAN_DIRTY_STAGE_TEXTURE | PAN_DIRTY_STAGE_SHADER)) {
|
||||
batch->textures[st] = panfrost_emit_texture_descriptors(batch, st);
|
||||
}
|
||||
|
||||
@@ -667,11 +667,9 @@ panfrost_init_screen_caps(struct panfrost_screen *screen)
|
||||
caps->anisotropic_filter =
|
||||
panfrost_device_gpu_rev(dev) >= dev->model->min_rev_anisotropic;
|
||||
|
||||
/* Compile side is done for Bifrost, Midgard TODO. Needs some kernel
|
||||
* work to turn on, since CYCLE_COUNT_START needs to be issued. In
|
||||
* kbase, userspace requests this via BASE_JD_REQ_PERMON. There is not
|
||||
* yet way to request this with mainline TODO */
|
||||
caps->shader_clock = dev->arch >= 6;
|
||||
/* Compile side is TODO for Midgard. */
|
||||
caps->shader_clock = dev->arch >= 6 &&
|
||||
dev->kmod.props.gpu_can_query_timestamp;
|
||||
|
||||
caps->vs_instanceid = true;
|
||||
caps->texture_multisample = true;
|
||||
|
||||
@@ -2318,9 +2318,9 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
||||
break;
|
||||
|
||||
case nir_intrinsic_shader_clock:
|
||||
assert(nir_intrinsic_memory_scope(instr) == SCOPE_SUBGROUP);
|
||||
bi_ld_gclk_u64_to(b, dst, BI_SOURCE_CYCLE_COUNTER);
|
||||
bi_split_def(b, &instr->def);
|
||||
b->shader->info.has_ld_gclk_instr = true;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_ddx:
|
||||
@@ -6326,6 +6326,7 @@ bi_compile_variant(nir_shader *nir,
|
||||
|
||||
info->ubo_mask |= ctx->ubo_mask;
|
||||
info->tls_size = MAX2(info->tls_size, ctx->info.tls_size);
|
||||
info->has_shader_clk_instr = ctx->info.has_ld_gclk_instr;
|
||||
|
||||
if (idvs == BI_IDVS_VARYING) {
|
||||
info->vs.secondary_enable = (binary->size > offset);
|
||||
|
||||
@@ -886,6 +886,7 @@ struct bi_shader_info {
|
||||
unsigned tls_size;
|
||||
unsigned work_reg_count;
|
||||
unsigned push_offset;
|
||||
bool has_ld_gclk_instr;
|
||||
};
|
||||
|
||||
/* State of index-driven vertex shading for current shader */
|
||||
|
||||
@@ -281,6 +281,9 @@ struct pan_shader_info {
|
||||
/* Floating point controls that the driver should try to honour */
|
||||
bool ftz_fp16, ftz_fp32;
|
||||
|
||||
/* True if the shader contains a shader_clock instruction. */
|
||||
bool has_shader_clk_instr;
|
||||
|
||||
unsigned sampler_count;
|
||||
unsigned texture_count;
|
||||
unsigned ubo_count;
|
||||
|
||||
Reference in New Issue
Block a user