diff --git a/src/mesa/main/feedback.c b/src/mesa/main/feedback.c index e6d925b274d..3a035b67eeb 100644 --- a/src/mesa/main/feedback.c +++ b/src/mesa/main/feedback.c @@ -637,9 +637,11 @@ _mesa_RenderMode( GLenum mode ) return 0; } - ctx->RenderMode = mode; st_RenderMode( ctx, mode ); + /* finally update render mode to new one */ + ctx->RenderMode = mode; + return result; } diff --git a/src/mesa/meson.build b/src/mesa/meson.build index 522a4c2eb11..8fba301efc5 100644 --- a/src/mesa/meson.build +++ b/src/mesa/meson.build @@ -351,6 +351,7 @@ files_libmesa = files( 'state_tracker/st_draw.c', 'state_tracker/st_draw.h', 'state_tracker/st_draw_feedback.c', + 'state_tracker/st_draw_hw_select.c', 'state_tracker/st_extensions.c', 'state_tracker/st_extensions.h', 'state_tracker/st_format.c', diff --git a/src/mesa/state_tracker/st_cb_feedback.c b/src/mesa/state_tracker/st_cb_feedback.c index c5e6f779cd0..e937386fde0 100644 --- a/src/mesa/state_tracker/st_cb_feedback.c +++ b/src/mesa/state_tracker/st_cb_feedback.c @@ -291,12 +291,16 @@ st_RenderMode(struct gl_context *ctx, GLenum newMode ) st_init_draw_functions(st->screen, &ctx->Driver); } else if (newMode == GL_SELECT) { - if (!st->selection_stage) - st->selection_stage = draw_glselect_stage(ctx, draw); - draw_set_rasterize_stage(draw, st->selection_stage); - /* Plug in new vbo draw function */ - ctx->Driver.DrawGallium = _mesa_draw_gallium_fallback; - ctx->Driver.DrawGalliumMultiMode = _mesa_draw_gallium_multimode_fallback; + if (ctx->Const.HardwareAcceleratedSelect) + st_init_hw_select_draw_functions(st->screen, &ctx->Driver); + else { + if (!st->selection_stage) + st->selection_stage = draw_glselect_stage(ctx, draw); + draw_set_rasterize_stage(draw, st->selection_stage); + /* Plug in new vbo draw function */ + ctx->Driver.DrawGallium = _mesa_draw_gallium_fallback; + ctx->Driver.DrawGalliumMultiMode = _mesa_draw_gallium_multimode_fallback; + } } else { struct gl_program *vp = st->ctx->VertexProgram._Current; @@ -311,4 +315,8 @@ st_RenderMode(struct gl_context *ctx, GLenum newMode ) if (vp) st->dirty |= ST_NEW_VERTEX_PROGRAM(st, vp); } + + /* Restore geometry shader states when leaving GL_SELECT mode. */ + if (ctx->RenderMode == GL_SELECT && ctx->Const.HardwareAcceleratedSelect) + st->dirty |= ST_NEW_GS_SSBOS | ST_NEW_GS_CONSTANTS | ST_NEW_GS_STATE; } diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 7c889727dbc..ffd50ec3e17 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -64,6 +64,7 @@ #include "util/u_upload_mgr.h" #include "util/u_vbuf.h" #include "util/u_memory.h" +#include "util/hash_table.h" #include "cso_cache/cso_context.h" #include "compiler/glsl/glsl_parser_extras.h" #include "nir/nir_to_tgsi.h" @@ -958,6 +959,12 @@ st_destroy_context(struct st_context *st) st_release_program(st, &st->tep); st_release_program(st, &st->cp); + if (st->hw_select_shaders) { + hash_table_foreach(st->hw_select_shaders, entry) + st->pipe->delete_gs_state(st->pipe, entry->data); + _mesa_hash_table_destroy(st->hw_select_shaders, NULL); + } + /* release framebuffer in the winsys buffers list */ LIST_FOR_EACH_ENTRY_SAFE_REV(stfb, next, &st->winsys_buffers, head) { _mesa_reference_framebuffer(&stfb, NULL); diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index d3eb1075617..749d3a4fea5 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -383,6 +383,8 @@ struct st_context struct st_zombie_shader_node list; simple_mtx_t mutex; } zombie_shaders; + + struct hash_table *hw_select_shaders; }; diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index cd0b8d6e8b2..1fbfc5c1170 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -519,3 +519,70 @@ st_draw_quad(struct st_context *st, return true; } + +static void +st_hw_select_draw_gallium(struct gl_context *ctx, + struct pipe_draw_info *info, + unsigned drawid_offset, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws) +{ + struct st_context *st = st_context(ctx); + + prepare_draw(st, ctx, ST_PIPELINE_RENDER_STATE_MASK, ST_PIPELINE_RENDER); + + if (!prepare_indexed_draw(st, ctx, info, draws, num_draws)) + return; + + if (!st_draw_hw_select_prepare_common(ctx) || + !st_draw_hw_select_prepare_mode(ctx, info)) + return; + + cso_multi_draw(st->cso_context, info, drawid_offset, draws, num_draws); +} + +static void +st_hw_select_draw_gallium_multimode(struct gl_context *ctx, + struct pipe_draw_info *info, + const struct pipe_draw_start_count_bias *draws, + const unsigned char *mode, + unsigned num_draws) +{ + struct st_context *st = st_context(ctx); + + prepare_draw(st, ctx, ST_PIPELINE_RENDER_STATE_MASK, ST_PIPELINE_RENDER); + + if (!prepare_indexed_draw(st, ctx, info, draws, num_draws)) + return; + + if (!st_draw_hw_select_prepare_common(ctx)) + return; + + unsigned i, first; + struct cso_context *cso = st->cso_context; + + /* Find consecutive draws where mode doesn't vary. */ + for (i = 0, first = 0; i <= num_draws; i++) { + if (i == num_draws || mode[i] != mode[first]) { + info->mode = mode[first]; + + if (st_draw_hw_select_prepare_mode(ctx, info)) + cso_multi_draw(cso, info, 0, &draws[first], i - first); + + first = i; + + /* We can pass the reference only once. st_buffer_object keeps + * the reference alive for later draws. + */ + info->take_index_buffer_ownership = false; + } + } +} + +void +st_init_hw_select_draw_functions(struct pipe_screen *screen, + struct dd_function_table *functions) +{ + functions->DrawGallium = st_hw_select_draw_gallium; + functions->DrawGalliumMultiMode = st_hw_select_draw_gallium_multimode; +} diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h index e0e6e472bbd..46ee64df6fa 100644 --- a/src/mesa/state_tracker/st_draw.h +++ b/src/mesa/state_tracker/st_draw.h @@ -99,4 +99,13 @@ st_indirect_draw_vbo(struct gl_context *ctx, const struct _mesa_index_buffer *ib, bool primitive_restart, unsigned restart_index); + +bool +st_draw_hw_select_prepare_common(struct gl_context *ctx); +bool +st_draw_hw_select_prepare_mode(struct gl_context *ctx, struct pipe_draw_info *info); +void +st_init_hw_select_draw_functions(struct pipe_screen *screen, + struct dd_function_table *functions); + #endif diff --git a/src/mesa/state_tracker/st_draw_hw_select.c b/src/mesa/state_tracker/st_draw_hw_select.c new file mode 100644 index 00000000000..5a032da046a --- /dev/null +++ b/src/mesa/state_tracker/st_draw_hw_select.c @@ -0,0 +1,821 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "main/enums.h" +#include "main/context.h" + +#include "st_context.h" +#include "st_nir.h" +#include "st_draw.h" + +#include "nir.h" +#include "nir_builtin_builder.h" + +#include "u_memory.h" + +union state_key { + struct { + unsigned num_user_clip_planes:4; + unsigned face_culling_enabled:1; + unsigned result_offset_from_attribute:1; + unsigned primitive:4; + }; + uint32_t u32; +}; + +enum primitive_state { + HW_SELECT_PRIM_NONE, + HW_SELECT_PRIM_POINTS, + HW_SELECT_PRIM_LINES, + HW_SELECT_PRIM_TRIANGLES, + HW_SELECT_PRIM_QUADS, +}; + +struct geometry_constant { + float depth_scale; + float depth_transport; + uint32_t culling_config; + uint32_t result_offset; + float clip_planes[MAX_CLIP_PLANES][4]; +}; + +#define set_uniform_location(var, field, packed) \ + do { \ + unsigned offset = Offset(struct geometry_constant, field); \ + var->data.driver_location = offset >> (packed ? 2 : 4); \ + var->data.location_frac = (offset >> 2) & 0x3; \ + } while (0) + +static nir_ssa_def * +has_nan_or_inf(nir_builder *b, nir_ssa_def *v) +{ + nir_ssa_def *nan = nir_bany_fnequal4(b, v, v); + + nir_ssa_def *imm = nir_imm_float(b, INFINITY); + nir_ssa_def *inf = nir_bany(b, nir_feq(b, nir_fabs(b, v), imm)); + + return nir_ior(b, nan, inf); +} + +static void +return_if_true(nir_builder *b, nir_ssa_def *cond) +{ + nir_if *if_cond = nir_push_if(b, cond); + nir_jump(b, nir_jump_return); + nir_pop_if(b, if_cond); +} + +static void +get_input_vertices(nir_builder *b, nir_ssa_def **v) +{ + const int num_in_vert = b->shader->info.gs.vertices_in; + + nir_variable *in_pos = nir_variable_create( + b->shader, nir_var_shader_in, glsl_array_type(glsl_vec4_type(), num_in_vert, 0), + "gl_Position"); + in_pos->data.location = VARYING_SLOT_POS; + + nir_ssa_def *is_nan_or_inf = NULL; + for (int i = 0; i < num_in_vert; i++) { + v[i] = nir_load_array_var_imm(b, in_pos, i); + nir_ssa_def *r = has_nan_or_inf(b, v[i]); + is_nan_or_inf = i ? nir_ior(b, is_nan_or_inf, r) : r; + } + return_if_true(b, is_nan_or_inf); +} + +static void +face_culling(nir_builder *b, nir_ssa_def **v, bool packed) +{ + /* use the z value of the face normal to determine if the face points to us: + * Nz = (x1 - x0) * (y2 - y0) - (y1 - y0) * (x2 - x0) + * + * it should be in NDC (Normalized Device Coordinate), but now we are in clip + * space (Vd = Vc / Vc.w), so multiply Nz with w0*w1*w2 to get the clip space + * value: + * det = x0 * (y1 * w2 - y2 * w1) + + * x1 * (y2 * w0 - y0 * w2) + + * x2 * (y0 * w1 - y1 * w0) + * + * we only care about the sign of the det, but also need to count the sign of + * w0/w1/w2 as a negtive w would change the direction of Nz < 0 + */ + nir_ssa_def *y1w2 = nir_fmul(b, nir_channel(b, v[1], 1), nir_channel(b, v[2], 3)); + nir_ssa_def *y2w1 = nir_fmul(b, nir_channel(b, v[2], 1), nir_channel(b, v[1], 3)); + nir_ssa_def *y2w0 = nir_fmul(b, nir_channel(b, v[2], 1), nir_channel(b, v[0], 3)); + nir_ssa_def *y0w2 = nir_fmul(b, nir_channel(b, v[0], 1), nir_channel(b, v[2], 3)); + nir_ssa_def *y0w1 = nir_fmul(b, nir_channel(b, v[0], 1), nir_channel(b, v[1], 3)); + nir_ssa_def *y1w0 = nir_fmul(b, nir_channel(b, v[1], 1), nir_channel(b, v[0], 3)); + nir_ssa_def *t0 = nir_fmul(b, nir_channel(b, v[0], 0), nir_fsub(b, y1w2, y2w1)); + nir_ssa_def *t1 = nir_fmul(b, nir_channel(b, v[1], 0), nir_fsub(b, y2w0, y0w2)); + nir_ssa_def *t2 = nir_fmul(b, nir_channel(b, v[2], 0), nir_fsub(b, y0w1, y1w0)); + nir_ssa_def *det = nir_fadd(b, nir_fadd(b, t0, t1), t2); + + /* invert det sign once any vertex w < 0 */ + nir_ssa_def *n0 = nir_flt(b, nir_channel(b, v[0], 3), nir_imm_float(b, 0)); + nir_ssa_def *n1 = nir_flt(b, nir_channel(b, v[1], 3), nir_imm_float(b, 0)); + nir_ssa_def *n2 = nir_flt(b, nir_channel(b, v[2], 3), nir_imm_float(b, 0)); + nir_ssa_def *cond = nir_ixor(b, nir_ixor(b, n0, n1), n2); + det = nir_bcsel(b, cond, nir_fneg(b, det), det); + + nir_variable *culling_config = nir_variable_create( + b->shader, nir_var_uniform, glsl_uint_type(), "culling_config"); + set_uniform_location(culling_config, culling_config, packed); + nir_ssa_def *config = nir_i2b(b, nir_load_var(b, culling_config)); + + /* det < 0 then z points to camera */ + nir_ssa_def *zero = nir_imm_zero(b, 1, det->bit_size); + nir_ssa_def *is_zero = nir_feq(b, det, zero); + nir_ssa_def *is_neg = nir_flt(b, det, zero); + nir_ssa_def *cull = nir_ixor(b, is_neg, config); + return_if_true(b, nir_ior(b, is_zero, cull)); +} + +static void +fast_frustum_culling(nir_builder *b, nir_ssa_def **v) +{ + nir_ssa_def *cull = NULL; + + /* there are six culling planes for the visible volume: + * 1. x + w = 0 + * 2. -x + w = 0 + * 3. y + w = 0 + * 4. -y + w = 0 + * 5. z + w = 0 + * 6. -z + w = 0 + * + * if all vertices of the primitive are outside (plane equation <0) of + * any plane, the primitive must be invisible. + */ + for (int i = 0; i < 6; i++) { + nir_ssa_def *outside = NULL; + + for (int j = 0; j < b->shader->info.gs.vertices_in; j++) { + nir_ssa_def *c = nir_channel(b, v[j], i >> 1); + if (i & 1) + c = nir_fneg(b, c); + + nir_ssa_def *r = nir_flt(b, nir_channel(b, v[j], 3), c); + outside = j ? nir_iand(b, outside, r) : r; + } + + cull = i ? nir_ior(b, cull, outside) : outside; + } + + return_if_true(b, cull); +} + +static nir_ssa_def * +get_intersection(nir_builder *b, nir_ssa_def *v1, nir_ssa_def *v2, + nir_ssa_def *d1, nir_ssa_def *d2) +{ + nir_ssa_def *factor = nir_fdiv(b, d1, nir_fsub(b, d1, d2)); + return nir_fmad(b, nir_fsub(b, v2, v1), factor, v1); +} + +#define begin_for_loop(name, max) \ + nir_variable *name##_index = \ + nir_local_variable_create(b->impl, glsl_int_type(), #name "_i"); \ + nir_store_var(b, name##_index, nir_imm_int(b, 0), 1); \ + \ + nir_loop *name = nir_push_loop(b); \ + { \ + nir_ssa_def *idx = nir_load_var(b, name##_index); \ + nir_if *if_in_loop = nir_push_if(b, nir_ilt(b, idx, max)); + +#define end_for_loop(name) \ + nir_store_var(b, name##_index, nir_iadd_imm(b, idx, 1), 1); \ + nir_push_else(b, if_in_loop); \ + nir_jump(b, nir_jump_break); \ + nir_pop_if(b, if_in_loop); \ + } \ + nir_pop_loop(b, name); + +static void +clip_with_plane(nir_builder *b, nir_variable *vert, nir_variable *num_vert, + int max_vert, nir_ssa_def *plane) +{ + nir_variable *all_clipped = nir_local_variable_create( + b->impl, glsl_bool_type(), "all_clipped"); + nir_store_var(b, all_clipped, nir_imm_true(b), 1); + + nir_variable *dist = nir_local_variable_create( + b->impl, glsl_array_type(glsl_float_type(), max_vert, 0), "dist"); + + nir_ssa_def *num = nir_load_var(b, num_vert); + begin_for_loop(dist_loop, num) + { + nir_ssa_def *v = nir_load_array_var(b, vert, idx); + nir_ssa_def *d = nir_fdot(b, v, plane); + nir_store_array_var(b, dist, idx, d, 1); + + nir_ssa_def *clipped = nir_flt(b, d, nir_imm_float(b, 0)); + nir_store_var(b, all_clipped, + nir_iand(b, nir_load_var(b, all_clipped), clipped), 1); + } + end_for_loop(dist_loop) + + return_if_true(b, nir_load_var(b, all_clipped)); + + /* Use +/0/- to denote the dist[i] sign, which means: + * +: inside plane + * -: outside plane + * 0: just on the plane + * + * Some example: + * ++++: all vertex not clipped + * ----: all vertex clipped + * +-++: one vertex clipped, need to insert two vertex at '-', array grow + * +--+: two vertex clipped, need to insert two vertex at '--', array same + * +---: three vertex clipped, need to insert two vertex at '---', array trim + * +-0+: one vertex clipped, need to insert one vertex at '-', array same + * + * Plane clip only produce convex polygon, so '-' must be contigous, there's + * no '+-+-', so one clip plane can only grow array by 1. + */ + + /* when array grow or '-' has been replaced with inserted vertex, save the + * original vert to be used by following calculation. + */ + nir_variable *saved = + nir_local_variable_create(b->impl, glsl_vec4_type(), "saved"); + + nir_variable *vert_index = + nir_local_variable_create(b->impl, glsl_int_type(), "vert_index"); + nir_store_var(b, vert_index, nir_imm_int(b, 0), 1); + + begin_for_loop(vert_loop, num) + { + nir_ssa_def *di = nir_load_array_var(b, dist, idx); + nir_if *if_clipped = nir_push_if(b, nir_flt(b, di, nir_imm_float(b, 0))); + { + /* - case, we need to take care of sign change and insert vertex */ + + nir_ssa_def *prev = nir_bcsel(b, nir_ieq_imm(b, idx, 0), + nir_iadd_imm(b, num, -1), + nir_iadd_imm(b, idx, -1)); + nir_ssa_def *dp = nir_load_array_var(b, dist, prev); + nir_if *prev_if = nir_push_if(b, nir_flt(b, nir_imm_float(b, 0), dp)); + { + /* +- case, replace - with inserted vertex + * assert(vert_index <= idx), array is sure to not grow here + * but need to save vert[idx] when vert_index==idx + */ + + nir_ssa_def *vi = nir_load_array_var(b, vert, idx); + nir_store_var(b, saved, vi, 0xf); + + nir_ssa_def *vp = nir_load_array_var(b, vert, prev); + nir_ssa_def *iv = get_intersection(b, vp, vi, dp, di); + nir_ssa_def *index = nir_load_var(b, vert_index); + nir_store_array_var(b, vert, index, iv, 0xf); + + nir_store_var(b, vert_index, nir_iadd_imm(b, index, 1), 1); + } + nir_pop_if(b, prev_if); + + nir_ssa_def *next = nir_bcsel(b, nir_ieq(b, idx, nir_iadd_imm(b, num, -1)), + nir_imm_int(b, 0), nir_iadd_imm(b, idx, 1)); + nir_ssa_def *dn = nir_load_array_var(b, dist, next); + nir_if *next_if = nir_push_if(b, nir_flt(b, nir_imm_float(b, 0), dn)); + { + /* -+ case, may grow array: + * vert_index > idx: +-+ case, grow array, current vertex in 'saved', + * save next + to 'saved', will replace it with inserted vertex. + * vert_index <= idx: --+ case, will replace last - with inserted vertex, + * no need to save last -, because + case won't use - value. + */ + + nir_ssa_def *index = nir_load_var(b, vert_index); + nir_ssa_def *vi = nir_bcsel(b, nir_flt(b, idx, index), + nir_load_var(b, saved), + nir_load_array_var(b, vert, idx)); + nir_ssa_def *vn = nir_load_array_var(b, vert, next); + nir_ssa_def *iv = get_intersection(b, vn, vi, dn, di); + + nir_store_var(b, saved, nir_load_array_var(b, vert, index), 0xf); + nir_store_array_var(b, vert, index, iv, 0xf); + + nir_store_var(b, vert_index, nir_iadd_imm(b, index, 1), 1); + } + nir_pop_if(b, next_if); + } + nir_push_else(b, if_clipped); + { + /* +/0 case, just keep the vert + * vert_index > idx: array grew case, vert[idx] is inserted vertex or prev + * +/0 vertex, current vertex is in 'saved', need to save next vertex + * vert_index < idx: array trim case + */ + + nir_ssa_def *index = nir_load_var(b, vert_index); + nir_ssa_def *vi = nir_bcsel(b, nir_flt(b, idx, index), + nir_load_var(b, saved), + nir_load_array_var(b, vert, idx)); + + nir_store_var(b, saved, nir_load_array_var(b, vert, index), 0xf); + nir_store_array_var(b, vert, index, vi, 0xf); + + nir_store_var(b, vert_index, nir_iadd_imm(b, index, 1), 1); + } + nir_pop_if(b, if_clipped); + } + end_for_loop(vert_loop); + + nir_copy_var(b, num_vert, vert_index); +} + +static nir_ssa_def * +get_user_clip_plane(nir_builder *b, int index, bool packed) +{ + char name[16]; + snprintf(name, sizeof(name), "gl_ClipPlane%d", index); + nir_variable *plane = nir_variable_create( + b->shader, nir_var_uniform, glsl_vec4_type(), name); + + set_uniform_location(plane, clip_planes[index][0], packed); + + return nir_load_var(b, plane); +} + +static void +get_depth_range_transform(nir_builder *b, bool packed, nir_ssa_def **trans) +{ + nir_variable *depth_scale = nir_variable_create( + b->shader, nir_var_uniform, glsl_float_type(), "depth_scale"); + set_uniform_location(depth_scale, depth_scale, packed); + + nir_variable *depth_transport = nir_variable_create( + b->shader, nir_var_uniform, glsl_float_type(), "depth_transport"); + set_uniform_location(depth_transport, depth_transport, packed); + + trans[0] = nir_load_var(b, depth_scale); + trans[1] = nir_load_var(b, depth_transport); +} + +static nir_ssa_def * +get_window_space_depth(nir_builder *b, nir_ssa_def *v, nir_ssa_def **trans) +{ + nir_ssa_def *z = nir_channel(b, v, 2); + nir_ssa_def *w = nir_channel(b, v, 3); + + /* do perspective division, if w==0, xyz must be 0 too (otherwise can't pass + * the clip test), 0/0=NaN, but we want it to be the nearest point. + */ + nir_ssa_def *c = nir_feq(b, w, nir_imm_float(b, 0)); + nir_ssa_def *d = nir_bcsel(b, c, nir_imm_float(b, -1), nir_fdiv(b, z, w)); + + /* map [-1, 1] to [near, far] set by glDepthRange(near, far) */ + return nir_fmad(b, trans[0], d, trans[1]); +} + +static void +update_result_buffer(nir_builder *b, nir_ssa_def *dmin, nir_ssa_def *dmax, + bool offset_from_attribute, bool packed) +{ + nir_ssa_def *offset; + if (offset_from_attribute) { + nir_variable *in_offset = nir_variable_create( + b->shader, nir_var_shader_in, + glsl_array_type(glsl_uint_type(), b->shader->info.gs.vertices_in, 0), + "result_offset"); + in_offset->data.location = VARYING_SLOT_VAR0; + offset = nir_load_array_var_imm(b, in_offset, 0); + } else { + nir_variable *uni_offset = nir_variable_create( + b->shader, nir_var_uniform, glsl_uint_type(), "result_offset"); + set_uniform_location(uni_offset, result_offset, packed); + offset = nir_load_var(b, uni_offset); + } + + nir_variable_create(b->shader, nir_var_mem_ssbo, + glsl_array_type(glsl_uint_type(), 0, 0), "result"); + /* driver_location = 0 (slot 0) */ + + nir_ssa_def *ssbo = nir_imm_int(b, 0); + nir_ssbo_atomic_exchange(b, 32, ssbo, offset, nir_imm_int(b, 1)); + nir_ssbo_atomic_umin(b, 32, ssbo, nir_iadd_imm(b, offset, 4), dmin); + nir_ssbo_atomic_umax(b, 32, ssbo, nir_iadd_imm(b, offset, 8), dmax); +} + +static void +build_point_nir_shader(nir_builder *b, union state_key state, bool packed) +{ + assert(b->shader->info.gs.vertices_in == 1); + + nir_ssa_def *v; + get_input_vertices(b, &v); + + fast_frustum_culling(b, &v); + + nir_ssa_def *outside = NULL; + for (int i = 0; i < state.num_user_clip_planes; i++) { + nir_ssa_def *p = get_user_clip_plane(b, i, packed); + nir_ssa_def *d = nir_fdot(b, v, p); + nir_ssa_def *r = nir_flt(b, d, nir_imm_float(b, 0)); + outside = i ? nir_ior(b, outside, r) : r; + } + if (outside) + return_if_true(b, outside); + + nir_ssa_def *trans[2]; + get_depth_range_transform(b, packed, trans); + + nir_ssa_def *depth = get_window_space_depth(b, v, trans); + nir_ssa_def *fdepth = nir_fmul_imm(b, depth, 4294967295.0); + nir_ssa_def *idepth = nir_f2uN(b, fdepth, 32); + + update_result_buffer(b, idepth, idepth, state.result_offset_from_attribute, packed); +} + +static nir_variable * +create_clip_planes(nir_builder *b, int num_clip_planes, bool packed) +{ + nir_variable *clip_planes = nir_local_variable_create( + b->impl, glsl_array_type(glsl_vec4_type(), num_clip_planes, 0), "clip_planes"); + + nir_ssa_def *unit_clip_planes[6] = { + nir_imm_vec4(b, 1, 0, 0, 1), + nir_imm_vec4(b, -1, 0, 0, 1), + nir_imm_vec4(b, 0, 1, 0, 1), + nir_imm_vec4(b, 0, -1, 0, 1), + nir_imm_vec4(b, 0, 0, 1, 1), + nir_imm_vec4(b, 0, 0, -1, 1), + }; + for (int i = 0; i < 6; i++) + nir_store_array_var_imm(b, clip_planes, i, unit_clip_planes[i], 0xf); + + for (int i = 6; i < num_clip_planes; i++) { + nir_ssa_def *p = get_user_clip_plane(b, i - 6, packed); + nir_store_array_var_imm(b, clip_planes, i, p, 0xf); + } + + return clip_planes; +} + +static void +build_line_nir_shader(nir_builder *b, union state_key state, bool packed) +{ + assert(b->shader->info.gs.vertices_in == 2); + + nir_ssa_def *v[2]; + get_input_vertices(b, v); + + fast_frustum_culling(b, v); + + nir_variable *vert0 = nir_local_variable_create(b->impl, glsl_vec4_type(), "vert0"); + nir_store_var(b, vert0, v[0], 0xf); + + nir_variable *vert1 = nir_local_variable_create(b->impl, glsl_vec4_type(), "vert1"); + nir_store_var(b, vert1, v[1], 0xf); + + const int num_clip_planes = 6 + state.num_user_clip_planes; + nir_variable *clip_planes = create_clip_planes(b, num_clip_planes, packed); + + begin_for_loop(clip_loop, nir_imm_int(b, num_clip_planes)) + { + nir_ssa_def *plane = nir_load_array_var(b, clip_planes, idx); + nir_ssa_def *v0 = nir_load_var(b, vert0); + nir_ssa_def *v1 = nir_load_var(b, vert1); + nir_ssa_def *d0 = nir_fdot(b, v0, plane); + nir_ssa_def *d1 = nir_fdot(b, v1, plane); + nir_ssa_def *n0 = nir_flt(b, d0, nir_imm_float(b, 0)); + nir_ssa_def *n1 = nir_flt(b, d1, nir_imm_float(b, 0)); + + return_if_true(b, nir_iand(b, n0, n1)); + + nir_if *clip_if = nir_push_if(b, nir_ior(b, n0, n1)); + { + nir_ssa_def *iv = get_intersection(b, v0, v1, d0, d1); + nir_store_var(b, vert0, nir_bcsel(b, n0, iv, v0), 0xf); + nir_store_var(b, vert1, nir_bcsel(b, n1, iv, v1), 0xf); + } + nir_pop_if(b, clip_if); + } + end_for_loop(clip_loop) + + nir_ssa_def *trans[2]; + get_depth_range_transform(b, packed, trans); + + nir_ssa_def *d0 = get_window_space_depth(b, nir_load_var(b, vert0), trans); + nir_ssa_def *d1 = get_window_space_depth(b, nir_load_var(b, vert1), trans); + + nir_ssa_def *dmin = nir_fmin(b, d0, d1); + nir_ssa_def *dmax = nir_fmax(b, d0, d1); + + nir_ssa_def *fdmin = nir_fmul_imm(b, dmin, 4294967295.0); + nir_ssa_def *idmin = nir_f2uN(b, fdmin, 32); + + nir_ssa_def *fdmax = nir_fmul_imm(b, dmax, 4294967295.0); + nir_ssa_def *idmax = nir_f2uN(b, fdmax, 32); + + update_result_buffer(b, idmin, idmax, state.result_offset_from_attribute, packed); +} + +static void +build_planar_primitive_nir_shader(nir_builder *b, union state_key state, bool packed) +{ + const int num_in_vert = b->shader->info.gs.vertices_in; + assert(num_in_vert == 3 || num_in_vert == 4); + + nir_ssa_def *v[4]; + get_input_vertices(b, v); + + if (state.face_culling_enabled) + face_culling(b, v, packed); + + /* fast frustum culling, this should filter out most primitives */ + fast_frustum_culling(b, v); + + const int num_clip_planes = 6 + state.num_user_clip_planes; + const int max_vert = num_in_vert + num_clip_planes; + + /* TODO: could use shared memory (ie. AMD GPU LDS) for this array + * to reduce register usage. + */ + nir_variable *vert = nir_local_variable_create( + b->impl, glsl_array_type(glsl_vec4_type(), max_vert, 0), "vert"); + for (int i = 0; i < num_in_vert; i++) + nir_store_array_var_imm(b, vert, i, v[i], 0xf); + + nir_variable *num_vert = + nir_local_variable_create(b->impl, glsl_int_type(), "num_vert"); + nir_store_var(b, num_vert, nir_imm_int(b, num_in_vert), 1); + + nir_variable *clip_planes = create_clip_planes(b, num_clip_planes, packed); + + /* accurate clipping with all clip planes */ + begin_for_loop(clip_loop, nir_imm_int(b, num_clip_planes)) + { + nir_ssa_def *plane = nir_load_array_var(b, clip_planes, idx); + clip_with_plane(b, vert, num_vert, max_vert, plane); + } + end_for_loop(clip_loop) + + nir_ssa_def *trans[2]; + get_depth_range_transform(b, packed, trans); + + nir_variable *dmin = + nir_local_variable_create(b->impl, glsl_float_type(), "dmin"); + nir_store_var(b, dmin, nir_imm_float(b, 1), 1); + + nir_variable *dmax = + nir_local_variable_create(b->impl, glsl_float_type(), "dmax"); + nir_store_var(b, dmax, nir_imm_float(b, 0), 1); + + begin_for_loop(depth_loop, nir_load_var(b, num_vert)) + { + nir_ssa_def *vtx = nir_load_array_var(b, vert, idx); + nir_ssa_def *depth = get_window_space_depth(b, vtx, trans); + nir_store_var(b, dmin, nir_fmin(b, nir_load_var(b, dmin), depth), 1); + nir_store_var(b, dmax, nir_fmax(b, nir_load_var(b, dmax), depth), 1); + } + end_for_loop(depth_loop) + + nir_ssa_def *fdmin = nir_fmul_imm(b, nir_load_var(b, dmin), 4294967295.0); + nir_ssa_def *idmin = nir_f2uN(b, fdmin, 32); + + nir_ssa_def *fdmax = nir_fmul_imm(b, nir_load_var(b, dmax), 4294967295.0); + nir_ssa_def *idmax = nir_f2uN(b, fdmax, 32); + + update_result_buffer(b, idmin, idmax, state.result_offset_from_attribute, packed); +} + +static void * +hw_select_create_gs(struct st_context *st, union state_key state) +{ + const nir_shader_compiler_options *options = + st_get_nir_compiler_options(st, MESA_SHADER_GEOMETRY); + + nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options, + "hw select GS"); + + nir_shader *nir = b.shader; + nir->info.inputs_read = VARYING_BIT_POS; + nir->info.num_ssbos = 1; + nir->info.gs.output_primitive = SHADER_PRIM_POINTS; + nir->info.gs.vertices_out = 1; + nir->info.gs.invocations = 1; + nir->info.gs.active_stream_mask = 1; + + if (state.result_offset_from_attribute) + nir->info.inputs_read |= VARYING_BIT_VAR(0); + + bool packed = st->ctx->Const.PackedDriverUniformStorage; + + switch (state.primitive) { + case HW_SELECT_PRIM_POINTS: + nir->info.gs.input_primitive = SHADER_PRIM_POINTS; + nir->info.gs.vertices_in = 1; + build_point_nir_shader(&b, state, packed); + break; + case HW_SELECT_PRIM_LINES: + nir->info.gs.input_primitive = SHADER_PRIM_LINES; + nir->info.gs.vertices_in = 2; + build_line_nir_shader(&b, state, packed); + break; + case HW_SELECT_PRIM_TRIANGLES: + nir->info.gs.input_primitive = SHADER_PRIM_TRIANGLES; + nir->info.gs.vertices_in = 3; + build_planar_primitive_nir_shader(&b, state, packed); + break; + case HW_SELECT_PRIM_QUADS: + /* geometry shader has no quad primitive, use lines_adjacency instead */ + nir->info.gs.input_primitive = SHADER_PRIM_LINES_ADJACENCY; + nir->info.gs.vertices_in = 4; + build_planar_primitive_nir_shader(&b, state, packed); + break; + default: + unreachable("unexpected primitive"); + } + + nir_lower_returns(nir); + + return st_nir_finish_builtin_shader(st, nir); +} + +bool +st_draw_hw_select_prepare_common(struct gl_context *ctx) +{ + struct st_context *st = st_context(ctx); + if (st->gp || st->tcp || st->tep) { + fprintf(stderr, "HW GL_SELECT does not support user geometry/tessellation shader\n"); + return false; + } + + struct geometry_constant consts; + + float n = ctx->ViewportArray[0].Near; + float f = ctx->ViewportArray[0].Far; + consts.depth_scale = (f - n) / 2; + consts.depth_transport = (f + n) / 2; + + /* this field is not used when face culling disabled */ + consts.culling_config = + (ctx->Polygon.CullFaceMode == GL_BACK) ^ + (ctx->Polygon.FrontFace == GL_CCW); + + /* this field is not used when passing result offset by attribute */ + consts.result_offset = st->ctx->Select.ResultOffset; + + int num_planes = 0; + u_foreach_bit(i, ctx->Transform.ClipPlanesEnabled) { + COPY_4V(consts.clip_planes[num_planes], ctx->Transform._ClipUserPlane[i]); + num_planes++; + } + + struct pipe_constant_buffer cb; + cb.buffer = NULL; + cb.user_buffer = &consts; + cb.buffer_offset = 0; + cb.buffer_size = sizeof(consts) - (MAX_CLIP_PLANES - num_planes) * 4 * sizeof(float); + + struct pipe_context *pipe = st->pipe; + pipe->set_constant_buffer(pipe, PIPE_SHADER_GEOMETRY, 0, false, &cb); + + struct pipe_shader_buffer buffer; + memset(&buffer, 0, sizeof(buffer)); + buffer.buffer = ctx->Select.Result->buffer; + buffer.buffer_size = MAX_NAME_STACK_RESULT_NUM * 3 * sizeof(int); + + pipe->set_shader_buffers(pipe, PIPE_SHADER_GEOMETRY, 0, 1, &buffer, 0x1); + + return true; +} + +static union state_key +make_state_key(struct gl_context *ctx, int mode) +{ + union state_key state = {0}; + + switch (mode) { + case GL_POINTS: + state.primitive = HW_SELECT_PRIM_POINTS; + break; + case GL_LINES: + case GL_LINE_STRIP: + case GL_LINE_LOOP: + state.primitive = HW_SELECT_PRIM_LINES; + break; + case GL_QUADS: + state.primitive = HW_SELECT_PRIM_QUADS; + break; + case GL_TRIANGLES: + case GL_TRIANGLE_STRIP: + case GL_TRIANGLE_FAN: + /* These will be broken into triangles. */ + case GL_QUAD_STRIP: + case GL_POLYGON: + state.primitive = HW_SELECT_PRIM_TRIANGLES; + break; + default: + fprintf(stderr, "HW GL_SELECT does not support draw mode %s\n", + _mesa_enum_to_string(mode)); + return (union state_key){0}; + } + + /* TODO: support gl_ClipDistance/gl_CullDistance, but it costs more regs */ + struct gl_program *vp = ctx->st->vp; + if (vp->info.clip_distance_array_size || vp->info.cull_distance_array_size) { + fprintf(stderr, "HW GL_SELECT does not support gl_ClipDistance/gl_CullDistance\n"); + return (union state_key){0}; + } + + state.num_user_clip_planes = util_bitcount(ctx->Transform.ClipPlanesEnabled); + + /* face culling only apply to 2D primitives */ + if (state.primitive == HW_SELECT_PRIM_QUADS || + state.primitive == HW_SELECT_PRIM_TRIANGLES) + state.face_culling_enabled = ctx->Polygon.CullFlag; + + state.result_offset_from_attribute = + ctx->VertexProgram._VPMode == VP_MODE_FF && + (ctx->VertexProgram._VaryingInputs & VERT_BIT_SELECT_RESULT_OFFSET); + + return state; +} + +bool +st_draw_hw_select_prepare_mode(struct gl_context *ctx, struct pipe_draw_info *info) +{ + union state_key key = make_state_key(ctx, info->mode); + if (!key.u32) + return false; + + struct st_context *st = st_context(ctx); + if (!st->hw_select_shaders) + st->hw_select_shaders = _mesa_hash_table_create_u32_keys(NULL); + + struct hash_entry *he = _mesa_hash_table_search(st->hw_select_shaders, + (void*)(uintptr_t)key.u32); + void *gs; + if (!he) { + gs = hw_select_create_gs(st, key); + if (!gs) + return false; + + _mesa_hash_table_insert(st->hw_select_shaders, (void*)(uintptr_t)key.u32, gs); + } else + gs = he->data; + + struct cso_context *cso = st->cso_context; + cso_set_geometry_shader_handle(cso, gs); + + /* Replace draw mode with equivalent one which geometry shader support. + * + * New mode consume same vertex buffer structure and produce primitive with + * same vertices (no need to be same type of primitive, because geometry shader + * operate on vertives and emit nothing). + * + * We can break QUAD and POLYGON to triangles with same shape. But we can't futher + * break them into single line or point because new primitive need to contain >=3 + * vertices so that it's still handled in 2D (planar) way instead of 1D (line) or + * 0D (point) way which have different algorithm. + */ + switch (info->mode) { + case GL_QUADS: + info->mode = GL_LINES_ADJACENCY; + break; + case GL_QUAD_STRIP: + info->mode = GL_TRIANGLE_STRIP; + break; + case GL_POLYGON: + info->mode = GL_TRIANGLE_FAN; + break; + default: + break; + } + + /* Only normal glBegin/End draws pass result offset by attribute to avoid flush + * vertices when change name stack, so multiple glBegin/End sections before/after + * name stack calls can be merged to a single draw call. To achieve this We mark + * name stack result buffer used in glEnd instead of the last draw call. + * + * Other case like glDrawArrays and display list replay won't merge draws cross + * name stack calls, so we just mark name stack result buffer used here. + */ + if (!key.result_offset_from_attribute) + ctx->Select.ResultUsed = GL_TRUE; + + return true; +}