Merge remote-tracking branch 'public/master' into vulkan
This commit is contained in:
+5
-16
@@ -704,8 +704,10 @@ test "x$enable_asm" = xno && AC_MSG_RESULT([no])
|
||||
if test "x$enable_asm" = xyes -a "x$cross_compiling" = xyes; then
|
||||
case "$host_cpu" in
|
||||
i?86 | x86_64 | amd64)
|
||||
enable_asm=no
|
||||
AC_MSG_RESULT([no, cross compiling])
|
||||
if test "x$host_cpu" != "x$target_cpu"; then
|
||||
enable_asm=no
|
||||
AC_MSG_RESULT([no, cross compiling])
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
@@ -929,12 +931,6 @@ AC_ARG_ENABLE([xlib-glx],
|
||||
[enable_xlib_glx="$enableval"],
|
||||
[enable_xlib_glx=no])
|
||||
|
||||
AC_ARG_ENABLE([r600-llvm-compiler],
|
||||
[AS_HELP_STRING([--enable-r600-llvm-compiler],
|
||||
[Enable experimental LLVM backend for graphics shaders @<:@default=disabled@:>@])],
|
||||
[enable_r600_llvm="$enableval"],
|
||||
[enable_r600_llvm=no])
|
||||
|
||||
AC_ARG_ENABLE([gallium-tests],
|
||||
[AS_HELP_STRING([--enable-gallium-tests],
|
||||
[Enable optional Gallium tests) @<:@default=disabled@:>@])],
|
||||
@@ -2238,14 +2234,8 @@ if test -n "$with_gallium_drivers"; then
|
||||
PKG_CHECK_MODULES([RADEON], [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED])
|
||||
gallium_require_drm "Gallium R600"
|
||||
gallium_require_drm_loader
|
||||
if test "x$enable_r600_llvm" = xyes -o "x$enable_opencl" = xyes; then
|
||||
radeon_llvm_check "r600g"
|
||||
LLVM_COMPONENTS="${LLVM_COMPONENTS} bitreader asmparser"
|
||||
fi
|
||||
if test "x$enable_r600_llvm" = xyes; then
|
||||
USE_R600_LLVM_COMPILER=yes;
|
||||
fi
|
||||
if test "x$enable_opencl" = xyes; then
|
||||
radeon_llvm_check "r600g"
|
||||
LLVM_COMPONENTS="${LLVM_COMPONENTS} bitreader asmparser"
|
||||
fi
|
||||
;;
|
||||
@@ -2416,7 +2406,6 @@ AM_CONDITIONAL(NEED_RADEON_DRM_WINSYS, test "x$HAVE_GALLIUM_R300" = xyes -o \
|
||||
"x$HAVE_GALLIUM_RADEONSI" = xyes)
|
||||
AM_CONDITIONAL(NEED_WINSYS_XLIB, test "x$NEED_WINSYS_XLIB" = xyes)
|
||||
AM_CONDITIONAL(NEED_RADEON_LLVM, test x$NEED_RADEON_LLVM = xyes)
|
||||
AM_CONDITIONAL(USE_R600_LLVM_COMPILER, test x$USE_R600_LLVM_COMPILER = xyes)
|
||||
AM_CONDITIONAL(HAVE_GALLIUM_COMPUTE, test x$enable_opencl = xyes)
|
||||
AM_CONDITIONAL(HAVE_MESA_LLVM, test x$MESA_LLVM = x1)
|
||||
AM_CONDITIONAL(USE_VC4_SIMULATOR, test x$USE_VC4_SIMULATOR = xyes)
|
||||
|
||||
+184
-169
@@ -1,13 +1,28 @@
|
||||
# Status of OpenGL extensions in Mesa
|
||||
|
||||
Status of OpenGL 3.x features in Mesa
|
||||
Here's how to read this file:
|
||||
|
||||
all DONE: <driver>, ...
|
||||
All the extensions are done for the given list of drivers.
|
||||
|
||||
Note: when an item is marked as "DONE" it means all the core Mesa
|
||||
infrastructure is complete but it may be the case that few (if any) drivers
|
||||
implement the features.
|
||||
DONE
|
||||
The extension is done for Mesa and no implementation is necessary on the
|
||||
driver-side.
|
||||
|
||||
DONE ()
|
||||
The extension is done for Mesa and all the drivers in the "all DONE" list.
|
||||
|
||||
OpenGL Core and Compatibility context support
|
||||
DONE (<driver>, ...)
|
||||
The extension is done for Mesa, all the drivers in the "all DONE" list, and
|
||||
all the drivers in the brackets.
|
||||
|
||||
in progress
|
||||
The extension is started but not finished yet.
|
||||
|
||||
not started
|
||||
The extension isn't started yet.
|
||||
|
||||
# OpenGL Core and Compatibility context support
|
||||
|
||||
OpenGL 3.1 and later versions are only supported with the Core profile.
|
||||
There are no plans to support GL_ARB_compatibility. The last supported OpenGL
|
||||
@@ -15,30 +30,30 @@ version with all deprecated features is 3.0. Some of the later GL features
|
||||
are exposed in the 3.0 context as extensions.
|
||||
|
||||
|
||||
Feature Status
|
||||
----------------------------------------------------- ------------------------
|
||||
Feature Status
|
||||
------------------------------------------------------- ------------------------
|
||||
|
||||
GL 3.0, GLSL 1.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe
|
||||
|
||||
glBindFragDataLocation, glGetFragDataLocation DONE
|
||||
Conditional rendering (GL_NV_conditional_render) DONE ()
|
||||
Map buffer subranges (GL_ARB_map_buffer_range) DONE ()
|
||||
Clamping controls (GL_ARB_color_buffer_float) DONE ()
|
||||
Float textures, renderbuffers (GL_ARB_texture_float) DONE ()
|
||||
GL_NV_conditional_render (Conditional rendering) DONE ()
|
||||
GL_ARB_map_buffer_range (Map buffer subranges) DONE ()
|
||||
GL_ARB_color_buffer_float (Clamping controls) DONE ()
|
||||
GL_ARB_texture_float (Float textures, renderbuffers) DONE ()
|
||||
GL_EXT_packed_float DONE ()
|
||||
GL_EXT_texture_shared_exponent DONE ()
|
||||
Float depth buffers (GL_ARB_depth_buffer_float) DONE ()
|
||||
Framebuffer objects (GL_ARB_framebuffer_object) DONE ()
|
||||
GL_ARB_depth_buffer_float (Float depth buffers) DONE ()
|
||||
GL_ARB_framebuffer_object (Framebuffer objects) DONE ()
|
||||
GL_ARB_half_float_pixel DONE (all drivers)
|
||||
GL_ARB_half_float_vertex DONE ()
|
||||
GL_EXT_texture_integer DONE ()
|
||||
GL_EXT_texture_array DONE ()
|
||||
Per-buffer blend and masks (GL_EXT_draw_buffers2) DONE ()
|
||||
GL_EXT_draw_buffers2 (Per-buffer blend and masks) DONE ()
|
||||
GL_EXT_texture_compression_rgtc DONE ()
|
||||
GL_ARB_texture_rg DONE ()
|
||||
Transform feedback (GL_EXT_transform_feedback) DONE ()
|
||||
Vertex array objects (GL_ARB_vertex_array_object) DONE ()
|
||||
sRGB framebuffer format (GL_EXT_framebuffer_sRGB) DONE ()
|
||||
GL_EXT_transform_feedback (Transform feedback) DONE ()
|
||||
GL_ARB_vertex_array_object (Vertex array objects) DONE ()
|
||||
GL_EXT_framebuffer_sRGB (sRGB framebuffer format) DONE ()
|
||||
glClearBuffer commands DONE
|
||||
glGetStringi command DONE
|
||||
glTexParameterI, glGetTexParameterI commands DONE
|
||||
@@ -53,28 +68,28 @@ GL 3.0, GLSL 1.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, soft
|
||||
GL 3.1, GLSL 1.40 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe
|
||||
|
||||
Forward compatible context support/deprecations DONE ()
|
||||
Instanced drawing (GL_ARB_draw_instanced) DONE ()
|
||||
Buffer copying (GL_ARB_copy_buffer) DONE ()
|
||||
Primitive restart (GL_NV_primitive_restart) DONE ()
|
||||
GL_ARB_draw_instanced (Instanced drawing) DONE ()
|
||||
GL_ARB_copy_buffer (Buffer copying) DONE ()
|
||||
GL_NV_primitive_restart (Primitive restart) DONE ()
|
||||
16 vertex texture image units DONE ()
|
||||
Texture buffer objs (GL_ARB_texture_buffer_object) DONE for OpenGL 3.1 contexts ()
|
||||
Rectangular textures (GL_ARB_texture_rectangle) DONE ()
|
||||
Uniform buffer objs (GL_ARB_uniform_buffer_object) DONE ()
|
||||
Signed normalized textures (GL_EXT_texture_snorm) DONE ()
|
||||
GL_ARB_texture_buffer_object (Texture buffer objs) DONE (for OpenGL 3.1 contexts)
|
||||
GL_ARB_texture_rectangle (Rectangular textures) DONE ()
|
||||
GL_ARB_uniform_buffer_object (Uniform buffer objs) DONE ()
|
||||
GL_EXT_texture_snorm (Signed normalized textures) DONE ()
|
||||
|
||||
|
||||
GL 3.2, GLSL 1.50 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe
|
||||
|
||||
Core/compatibility profiles DONE
|
||||
Geometry shaders DONE ()
|
||||
BGRA vertex order (GL_ARB_vertex_array_bgra) DONE ()
|
||||
Base vertex offset(GL_ARB_draw_elements_base_vertex) DONE ()
|
||||
Frag shader coord (GL_ARB_fragment_coord_conventions) DONE ()
|
||||
Provoking vertex (GL_ARB_provoking_vertex) DONE ()
|
||||
Seamless cubemaps (GL_ARB_seamless_cube_map) DONE ()
|
||||
Multisample textures (GL_ARB_texture_multisample) DONE ()
|
||||
Frag depth clamp (GL_ARB_depth_clamp) DONE ()
|
||||
Fence objects (GL_ARB_sync) DONE ()
|
||||
GL_ARB_vertex_array_bgra (BGRA vertex order) DONE ()
|
||||
GL_ARB_draw_elements_base_vertex (Base vertex offset) DONE ()
|
||||
GL_ARB_fragment_coord_conventions (Frag shader coord) DONE ()
|
||||
GL_ARB_provoking_vertex (Provoking vertex) DONE ()
|
||||
GL_ARB_seamless_cube_map (Seamless cubemaps) DONE ()
|
||||
GL_ARB_texture_multisample (Multisample textures) DONE ()
|
||||
GL_ARB_depth_clamp (Frag depth clamp) DONE ()
|
||||
GL_ARB_sync (Fence objects) DONE ()
|
||||
GLX_ARB_create_context_profile DONE
|
||||
|
||||
|
||||
@@ -94,170 +109,170 @@ GL 3.3, GLSL 3.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, soft
|
||||
|
||||
GL 4.0, GLSL 4.00 --- all DONE: nvc0, r600, radeonsi
|
||||
|
||||
GL_ARB_draw_buffers_blend DONE (i965, nv50, llvmpipe, softpipe)
|
||||
GL_ARB_draw_indirect DONE (i965, llvmpipe, softpipe)
|
||||
GL_ARB_gpu_shader5 DONE (i965)
|
||||
- 'precise' qualifier DONE
|
||||
- Dynamically uniform sampler array indices DONE (softpipe)
|
||||
- Dynamically uniform UBO array indices DONE ()
|
||||
- Implicit signed -> unsigned conversions DONE
|
||||
- Fused multiply-add DONE ()
|
||||
- Packing/bitfield/conversion functions DONE (softpipe)
|
||||
- Enhanced textureGather DONE (softpipe)
|
||||
- Geometry shader instancing DONE (llvmpipe, softpipe)
|
||||
- Geometry shader multiple streams DONE ()
|
||||
- Enhanced per-sample shading DONE ()
|
||||
- Interpolation functions DONE ()
|
||||
- New overload resolution rules DONE
|
||||
GL_ARB_gpu_shader_fp64 DONE (llvmpipe, softpipe)
|
||||
GL_ARB_sample_shading DONE (i965, nv50)
|
||||
GL_ARB_shader_subroutine DONE (i965, nv50, llvmpipe, softpipe)
|
||||
GL_ARB_tessellation_shader DONE (i965)
|
||||
GL_ARB_texture_buffer_object_rgb32 DONE (i965, llvmpipe, softpipe)
|
||||
GL_ARB_texture_cube_map_array DONE (i965, nv50, llvmpipe, softpipe)
|
||||
GL_ARB_texture_gather DONE (i965, nv50, llvmpipe, softpipe)
|
||||
GL_ARB_texture_query_lod DONE (i965, nv50, softpipe)
|
||||
GL_ARB_transform_feedback2 DONE (i965, nv50, llvmpipe, softpipe)
|
||||
GL_ARB_transform_feedback3 DONE (i965, nv50, llvmpipe, softpipe)
|
||||
GL_ARB_draw_buffers_blend DONE (i965, nv50, llvmpipe, softpipe)
|
||||
GL_ARB_draw_indirect DONE (i965, llvmpipe, softpipe)
|
||||
GL_ARB_gpu_shader5 DONE (i965)
|
||||
- 'precise' qualifier DONE
|
||||
- Dynamically uniform sampler array indices DONE (softpipe)
|
||||
- Dynamically uniform UBO array indices DONE ()
|
||||
- Implicit signed -> unsigned conversions DONE
|
||||
- Fused multiply-add DONE ()
|
||||
- Packing/bitfield/conversion functions DONE (softpipe)
|
||||
- Enhanced textureGather DONE (softpipe)
|
||||
- Geometry shader instancing DONE (llvmpipe, softpipe)
|
||||
- Geometry shader multiple streams DONE ()
|
||||
- Enhanced per-sample shading DONE ()
|
||||
- Interpolation functions DONE ()
|
||||
- New overload resolution rules DONE
|
||||
GL_ARB_gpu_shader_fp64 DONE (llvmpipe, softpipe)
|
||||
GL_ARB_sample_shading DONE (i965, nv50)
|
||||
GL_ARB_shader_subroutine DONE (i965, nv50, llvmpipe, softpipe)
|
||||
GL_ARB_tessellation_shader DONE (i965)
|
||||
GL_ARB_texture_buffer_object_rgb32 DONE (i965, llvmpipe, softpipe)
|
||||
GL_ARB_texture_cube_map_array DONE (i965, nv50, llvmpipe, softpipe)
|
||||
GL_ARB_texture_gather DONE (i965, nv50, llvmpipe, softpipe)
|
||||
GL_ARB_texture_query_lod DONE (i965, nv50, softpipe)
|
||||
GL_ARB_transform_feedback2 DONE (i965, nv50, llvmpipe, softpipe)
|
||||
GL_ARB_transform_feedback3 DONE (i965, nv50, llvmpipe, softpipe)
|
||||
|
||||
|
||||
GL 4.1, GLSL 4.10 --- all DONE: nvc0, r600, radeonsi
|
||||
|
||||
GL_ARB_ES2_compatibility DONE (i965, nv50, llvmpipe, softpipe)
|
||||
GL_ARB_get_program_binary DONE (0 binary formats)
|
||||
GL_ARB_separate_shader_objects DONE (all drivers)
|
||||
GL_ARB_shader_precision DONE (all drivers that support GLSL 4.10)
|
||||
GL_ARB_vertex_attrib_64bit DONE (llvmpipe, softpipe)
|
||||
GL_ARB_viewport_array DONE (i965, nv50, llvmpipe, softpipe)
|
||||
GL_ARB_ES2_compatibility DONE (i965, nv50, llvmpipe, softpipe)
|
||||
GL_ARB_get_program_binary DONE (0 binary formats)
|
||||
GL_ARB_separate_shader_objects DONE (all drivers)
|
||||
GL_ARB_shader_precision DONE (all drivers that support GLSL 4.10)
|
||||
GL_ARB_vertex_attrib_64bit DONE (llvmpipe, softpipe)
|
||||
GL_ARB_viewport_array DONE (i965, nv50, llvmpipe, softpipe)
|
||||
|
||||
|
||||
GL 4.2, GLSL 4.20:
|
||||
|
||||
GL_ARB_texture_compression_bptc DONE (i965, nvc0, r600, radeonsi)
|
||||
GL_ARB_compressed_texture_pixel_storage DONE (all drivers)
|
||||
GL_ARB_shader_atomic_counters DONE (i965, nvc0)
|
||||
GL_ARB_texture_storage DONE (all drivers)
|
||||
GL_ARB_transform_feedback_instanced DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_base_instance DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_shader_image_load_store DONE (i965)
|
||||
GL_ARB_conservative_depth DONE (all drivers that support GLSL 1.30)
|
||||
GL_ARB_shading_language_420pack DONE (all drivers that support GLSL 1.30)
|
||||
GL_ARB_shading_language_packing DONE (all drivers)
|
||||
GL_ARB_internalformat_query DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_map_buffer_alignment DONE (all drivers)
|
||||
GL_ARB_texture_compression_bptc DONE (i965, nvc0, r600, radeonsi)
|
||||
GL_ARB_compressed_texture_pixel_storage DONE (all drivers)
|
||||
GL_ARB_shader_atomic_counters DONE (i965, nvc0)
|
||||
GL_ARB_texture_storage DONE (all drivers)
|
||||
GL_ARB_transform_feedback_instanced DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_base_instance DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_shader_image_load_store DONE (i965, radeonsi)
|
||||
GL_ARB_conservative_depth DONE (all drivers that support GLSL 1.30)
|
||||
GL_ARB_shading_language_420pack DONE (all drivers that support GLSL 1.30)
|
||||
GL_ARB_shading_language_packing DONE (all drivers)
|
||||
GL_ARB_internalformat_query DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_map_buffer_alignment DONE (all drivers)
|
||||
|
||||
|
||||
GL 4.3, GLSL 4.30:
|
||||
|
||||
GL_ARB_arrays_of_arrays DONE (all drivers that support GLSL 1.30)
|
||||
GL_ARB_ES3_compatibility DONE (all drivers that support GLSL 3.30)
|
||||
GL_ARB_clear_buffer_object DONE (all drivers)
|
||||
GL_ARB_compute_shader DONE (i965)
|
||||
GL_ARB_copy_image DONE (i965, nv50, nvc0, r600, radeonsi)
|
||||
GL_KHR_debug DONE (all drivers)
|
||||
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
|
||||
GL_ARB_fragment_layer_viewport DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe)
|
||||
GL_ARB_framebuffer_no_attachments DONE (i965)
|
||||
GL_ARB_internalformat_query2 DONE (i965)
|
||||
GL_ARB_invalidate_subdata DONE (all drivers)
|
||||
GL_ARB_multi_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_program_interface_query DONE (all drivers)
|
||||
GL_ARB_robust_buffer_access_behavior not started
|
||||
GL_ARB_shader_image_size DONE (i965)
|
||||
GL_ARB_shader_storage_buffer_object DONE (i965, nvc0)
|
||||
GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
|
||||
GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30)
|
||||
GL_ARB_texture_storage_multisample DONE (all drivers that support GL_ARB_texture_multisample)
|
||||
GL_ARB_texture_view DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_vertex_attrib_binding DONE (all drivers)
|
||||
GL_ARB_arrays_of_arrays DONE (all drivers that support GLSL 1.30)
|
||||
GL_ARB_ES3_compatibility DONE (all drivers that support GLSL 3.30)
|
||||
GL_ARB_clear_buffer_object DONE (all drivers)
|
||||
GL_ARB_compute_shader DONE (i965)
|
||||
GL_ARB_copy_image DONE (i965, nv50, nvc0, r600, radeonsi)
|
||||
GL_KHR_debug DONE (all drivers)
|
||||
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
|
||||
GL_ARB_fragment_layer_viewport DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe)
|
||||
GL_ARB_framebuffer_no_attachments DONE (i965)
|
||||
GL_ARB_internalformat_query2 DONE (all drivers)
|
||||
GL_ARB_invalidate_subdata DONE (all drivers)
|
||||
GL_ARB_multi_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_program_interface_query DONE (all drivers)
|
||||
GL_ARB_robust_buffer_access_behavior not started
|
||||
GL_ARB_shader_image_size DONE (i965, radeonsi)
|
||||
GL_ARB_shader_storage_buffer_object DONE (i965, nvc0)
|
||||
GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
|
||||
GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30)
|
||||
GL_ARB_texture_storage_multisample DONE (all drivers that support GL_ARB_texture_multisample)
|
||||
GL_ARB_texture_view DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_vertex_attrib_binding DONE (all drivers)
|
||||
|
||||
|
||||
GL 4.4, GLSL 4.40:
|
||||
|
||||
GL_MAX_VERTEX_ATTRIB_STRIDE DONE (all drivers)
|
||||
GL_ARB_buffer_storage DONE (i965, nv50, nvc0, r600, radeonsi)
|
||||
GL_ARB_clear_texture DONE (i965, nv50, nvc0)
|
||||
GL_ARB_enhanced_layouts in progress (Timothy)
|
||||
- compile-time constant expressions DONE
|
||||
- explicit byte offsets for blocks DONE
|
||||
- forced alignment within blocks DONE
|
||||
- specified vec4-slot component numbers in progress
|
||||
- specified transform/feedback layout in progress
|
||||
- input/output block locations DONE
|
||||
GL_ARB_multi_bind DONE (all drivers)
|
||||
GL_ARB_query_buffer_object DONE (nvc0)
|
||||
GL_ARB_texture_mirror_clamp_to_edge DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_texture_stencil8 DONE (nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_vertex_type_10f_11f_11f_rev DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_MAX_VERTEX_ATTRIB_STRIDE DONE (all drivers)
|
||||
GL_ARB_buffer_storage DONE (i965, nv50, nvc0, r600, radeonsi)
|
||||
GL_ARB_clear_texture DONE (i965, nv50, nvc0)
|
||||
GL_ARB_enhanced_layouts in progress (Timothy)
|
||||
- compile-time constant expressions DONE
|
||||
- explicit byte offsets for blocks DONE
|
||||
- forced alignment within blocks DONE
|
||||
- specified vec4-slot component numbers in progress
|
||||
- specified transform/feedback layout in progress
|
||||
- input/output block locations DONE
|
||||
GL_ARB_multi_bind DONE (all drivers)
|
||||
GL_ARB_query_buffer_object DONE (nvc0)
|
||||
GL_ARB_texture_mirror_clamp_to_edge DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_texture_stencil8 DONE (nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_vertex_type_10f_11f_11f_rev DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
|
||||
GL 4.5, GLSL 4.50:
|
||||
|
||||
GL_ARB_ES3_1_compatibility not started
|
||||
GL_ARB_clip_control DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_conditional_render_inverted DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_cull_distance in progress (Tobias)
|
||||
GL_ARB_derivative_control DONE (i965, nv50, nvc0, r600, radeonsi)
|
||||
GL_ARB_direct_state_access DONE (all drivers)
|
||||
GL_ARB_get_texture_sub_image DONE (all drivers)
|
||||
GL_ARB_shader_texture_image_samples DONE (i965, nv50, nvc0, r600, radeonsi)
|
||||
GL_ARB_texture_barrier DONE (i965, nv50, nvc0, r600, radeonsi)
|
||||
GL_KHR_context_flush_control DONE (all - but needs GLX/EGL extension to be useful)
|
||||
GL_KHR_robust_buffer_access_behavior not started
|
||||
GL_KHR_robustness 90% done (the ARB variant)
|
||||
GL_EXT_shader_integer_mix DONE (all drivers that support GLSL)
|
||||
GL_ARB_ES3_1_compatibility not started
|
||||
GL_ARB_clip_control DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_conditional_render_inverted DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_cull_distance in progress (Tobias)
|
||||
GL_ARB_derivative_control DONE (i965, nv50, nvc0, r600, radeonsi)
|
||||
GL_ARB_direct_state_access DONE (all drivers)
|
||||
GL_ARB_get_texture_sub_image DONE (all drivers)
|
||||
GL_ARB_shader_texture_image_samples DONE (i965, nv50, nvc0, r600, radeonsi)
|
||||
GL_ARB_texture_barrier DONE (i965, nv50, nvc0, r600, radeonsi)
|
||||
GL_KHR_context_flush_control DONE (all - but needs GLX/EGL extension to be useful)
|
||||
GL_KHR_robust_buffer_access_behavior not started
|
||||
GL_KHR_robustness not started (90% done with the ARB variant)
|
||||
GL_EXT_shader_integer_mix DONE (all drivers that support GLSL)
|
||||
|
||||
These are the extensions cherry-picked to make GLES 3.1
|
||||
GLES3.1, GLSL ES 3.1
|
||||
GL_ARB_arrays_of_arrays DONE (all drivers that support GLSL 1.30)
|
||||
GL_ARB_compute_shader DONE (i965)
|
||||
GL_ARB_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
|
||||
GL_ARB_framebuffer_no_attachments DONE (i965)
|
||||
GL_ARB_program_interface_query DONE (all drivers)
|
||||
GL_ARB_shader_atomic_counters DONE (i965, nvc0)
|
||||
GL_ARB_shader_image_load_store DONE (i965)
|
||||
GL_ARB_shader_image_size DONE (i965)
|
||||
GL_ARB_shader_storage_buffer_object DONE (i965, nvc0)
|
||||
GL_ARB_shading_language_packing DONE (all drivers)
|
||||
GL_ARB_separate_shader_objects DONE (all drivers)
|
||||
GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
Multisample textures (GL_ARB_texture_multisample) DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_texture_storage_multisample DONE (all drivers that support GL_ARB_texture_multisample)
|
||||
GL_ARB_vertex_attrib_binding DONE (all drivers)
|
||||
GS5 Enhanced textureGather DONE (i965, nvc0, r600, radeonsi)
|
||||
GS5 Packing/bitfield/conversion functions DONE (i965, nvc0, r600, radeonsi)
|
||||
GL_EXT_shader_integer_mix DONE (all drivers that support GLSL)
|
||||
GL_ARB_arrays_of_arrays DONE (all drivers that support GLSL 1.30)
|
||||
GL_ARB_compute_shader DONE (i965)
|
||||
GL_ARB_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
|
||||
GL_ARB_framebuffer_no_attachments DONE (i965)
|
||||
GL_ARB_program_interface_query DONE (all drivers)
|
||||
GL_ARB_shader_atomic_counters DONE (i965, nvc0)
|
||||
GL_ARB_shader_image_load_store DONE (i965)
|
||||
GL_ARB_shader_image_size DONE (i965)
|
||||
GL_ARB_shader_storage_buffer_object DONE (i965, nvc0)
|
||||
GL_ARB_shading_language_packing DONE (all drivers)
|
||||
GL_ARB_separate_shader_objects DONE (all drivers)
|
||||
GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_texture_multisample (Multisample textures) DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_texture_storage_multisample DONE (all drivers that support GL_ARB_texture_multisample)
|
||||
GL_ARB_vertex_attrib_binding DONE (all drivers)
|
||||
GS5 Enhanced textureGather DONE (i965, nvc0, r600, radeonsi)
|
||||
GS5 Packing/bitfield/conversion functions DONE (i965, nvc0, r600, radeonsi)
|
||||
GL_EXT_shader_integer_mix DONE (all drivers that support GLSL)
|
||||
|
||||
Additional functionality not covered above:
|
||||
glMemoryBarrierByRegion DONE
|
||||
glGetTexLevelParameter[fi]v - needs updates DONE
|
||||
glMemoryBarrierByRegion DONE
|
||||
glGetTexLevelParameter[fi]v - needs updates DONE
|
||||
glGetBooleani_v - restrict to GLES enums
|
||||
gl_HelperInvocation support DONE (i965, nvc0, r600)
|
||||
gl_HelperInvocation support DONE (i965, nvc0, r600)
|
||||
|
||||
GLES3.2, GLSL ES 3.2
|
||||
GL_EXT_color_buffer_float DONE (all drivers)
|
||||
GL_KHR_blend_equation_advanced not started
|
||||
GL_KHR_debug DONE (all drivers)
|
||||
GL_KHR_robustness 90% done (the ARB variant)
|
||||
GL_KHR_texture_compression_astc_ldr DONE (i965/gen9+)
|
||||
GL_OES_copy_image not started (based on GL_ARB_copy_image, which is done for some drivers)
|
||||
GL_OES_draw_buffers_indexed not started
|
||||
GL_OES_draw_elements_base_vertex DONE (all drivers)
|
||||
GL_OES_geometry_shader started (Marta)
|
||||
GL_OES_gpu_shader5 DONE (all drivers that support GL_ARB_gpu_shader5)
|
||||
GL_OES_primitive_bounding box not started
|
||||
GL_OES_sample_shading not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
|
||||
GL_OES_sample_variables not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
|
||||
GL_OES_shader_image_atomic DONE (all drivers that support GL_ARB_shader_image_load_store)
|
||||
GL_OES_shader_io_blocks not started (based on parts of GLSL 1.50, which is done)
|
||||
GL_OES_shader_multisample_interpolation not started (based on parts of GL_ARB_gpu_shader5, which is done)
|
||||
GL_OES_tessellation_shader not started (based on GL_ARB_tessellation_shader, which is done for some drivers)
|
||||
GL_OES_texture_border_clamp DONE (all drivers)
|
||||
GL_OES_texture_buffer not started (based on GL_ARB_texture_buffer_object, GL_ARB_texture_buffer_range, and GL_ARB_texture_buffer_object_rgb32 that are all done)
|
||||
GL_OES_texture_cube_map_array not started (based on GL_ARB_texture_cube_map_array, which is done for all drivers)
|
||||
GL_OES_texture_stencil8 DONE (all drivers that support GL_ARB_texture_stencil8)
|
||||
GL_OES_texture_storage_multisample_2d_array DONE (all drivers that support GL_ARB_texture_multisample)
|
||||
GL_EXT_color_buffer_float DONE (all drivers)
|
||||
GL_KHR_blend_equation_advanced not started
|
||||
GL_KHR_debug DONE (all drivers)
|
||||
GL_KHR_robustness not started (90% done with the ARB variant)
|
||||
GL_KHR_texture_compression_astc_ldr DONE (i965/gen9+)
|
||||
GL_OES_copy_image not started (based on GL_ARB_copy_image, which is done for some drivers)
|
||||
GL_OES_draw_buffers_indexed not started
|
||||
GL_OES_draw_elements_base_vertex DONE (all drivers)
|
||||
GL_OES_geometry_shader started (Marta)
|
||||
GL_OES_gpu_shader5 DONE (all drivers that support GL_ARB_gpu_shader5)
|
||||
GL_OES_primitive_bounding box not started
|
||||
GL_OES_sample_shading not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
|
||||
GL_OES_sample_variables not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
|
||||
GL_OES_shader_image_atomic DONE (all drivers that support GL_ARB_shader_image_load_store)
|
||||
GL_OES_shader_io_blocks not started (based on parts of GLSL 1.50, which is done)
|
||||
GL_OES_shader_multisample_interpolation not started (based on parts of GL_ARB_gpu_shader5, which is done)
|
||||
GL_OES_tessellation_shader not started (based on GL_ARB_tessellation_shader, which is done for some drivers)
|
||||
GL_OES_texture_border_clamp DONE (all drivers)
|
||||
GL_OES_texture_buffer not started (based on GL_ARB_texture_buffer_object, GL_ARB_texture_buffer_range, and GL_ARB_texture_buffer_object_rgb32 that are all done)
|
||||
GL_OES_texture_cube_map_array not started (based on GL_ARB_texture_cube_map_array, which is done for all drivers)
|
||||
GL_OES_texture_stencil8 DONE (all drivers that support GL_ARB_texture_stencil8)
|
||||
GL_OES_texture_storage_multisample_2d_array DONE (all drivers that support GL_ARB_texture_multisample)
|
||||
|
||||
More info about these features and the work involved can be found at
|
||||
http://dri.freedesktop.org/wiki/MissingFunctionality
|
||||
|
||||
@@ -163,6 +163,9 @@ See the <a href="xlibdriver.html">Xlib software driver page</a> for details.
|
||||
<li>blorp - emit messages about the blorp operations (blits & clears)</li>
|
||||
<li>nodualobj - suppress generation of dual-object geometry shader code</li>
|
||||
<li>optimizer - dump shader assembly to files at each optimization pass and iteration that make progress</li>
|
||||
<li>vec4 - force vec4 mode in vertex shader</li>
|
||||
<li>spill_fs - force spilling of all registers in the scalar backend (useful to debug spilling code)</li>
|
||||
<li>spill_vec4 - force spilling of all registers in the vec4 backend (useful to debug spilling code)</li>
|
||||
</ul>
|
||||
</ul>
|
||||
|
||||
|
||||
+1
-2
@@ -73,8 +73,7 @@ The following are required for DRI-based hardware acceleration with Mesa:
|
||||
<ul>
|
||||
<li><a href="http://xorg.freedesktop.org/releases/individual/proto/">
|
||||
dri2proto</a> version 2.6 or later
|
||||
<li><a href="http://dri.freedesktop.org/libdrm/">libDRM</a>
|
||||
version 2.4.33 or later
|
||||
<li><a href="http://dri.freedesktop.org/libdrm/">libDRM</a> latest version
|
||||
<li>Xorg server version 1.5 or later
|
||||
<li>Linux 2.6.28 or later
|
||||
</ul>
|
||||
|
||||
@@ -44,8 +44,10 @@ Note: some of the new features are only available with certain drivers.
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
<li>GL_ARB_internalformat_query2 on i965</li>
|
||||
<li>GL_ARB_internalformat_query2 on all drivers</li>
|
||||
<li>GL_ARB_shader_atomic_counter_ops on nvc0</li>
|
||||
<li>GL_ARB_shader_image_load_store on radeonsi</li>
|
||||
<li>GL_ARB_shader_image_size on radeonsi</li>
|
||||
<li>GL_OES_texture_border_clamp and GL_EXT_texture_border_clamp on all drivers that support GL_ARB_texture_border_clamp</li>
|
||||
<li>GL_OES_shader_image_atomic on all drivers that support GL_ARB_shader_image_load_store</li>
|
||||
</ul>
|
||||
|
||||
@@ -129,6 +129,7 @@ LIBGLSL_FILES = \
|
||||
glsl/opt_tree_grafting.cpp \
|
||||
glsl/opt_vectorize.cpp \
|
||||
glsl/program.h \
|
||||
glsl/propagate_invariance.cpp \
|
||||
glsl/s_expression.cpp \
|
||||
glsl/s_expression.h
|
||||
|
||||
|
||||
@@ -217,6 +217,7 @@ LIBGLSL_FILES = \
|
||||
opt_tree_grafting.cpp \
|
||||
opt_vectorize.cpp \
|
||||
program.h \
|
||||
propagate_invariance.cpp \
|
||||
s_expression.cpp \
|
||||
s_expression.h
|
||||
|
||||
|
||||
@@ -2125,7 +2125,9 @@ process_array_size(exec_node *node,
|
||||
}
|
||||
|
||||
ir_constant *const size = ir->constant_expression_value();
|
||||
if (size == NULL || array_size->has_sequence_subexpression()) {
|
||||
if (size == NULL ||
|
||||
(state->is_version(120, 300) &&
|
||||
array_size->has_sequence_subexpression())) {
|
||||
_mesa_glsl_error(& loc, state, "array size must be a "
|
||||
"constant valued expression");
|
||||
return 0;
|
||||
|
||||
@@ -1887,6 +1887,7 @@ do_common_optimization(exec_list *ir, bool linked,
|
||||
OPT(do_dead_functions, ir);
|
||||
OPT(do_structure_splitting, ir);
|
||||
}
|
||||
propagate_invariance(ir);
|
||||
OPT(do_if_simplification, ir);
|
||||
OPT(opt_flatten_nested_if_blocks, ir);
|
||||
OPT(opt_conditional_discard, ir);
|
||||
|
||||
@@ -719,6 +719,13 @@ public:
|
||||
*/
|
||||
unsigned is_unmatched_generic_inout:1;
|
||||
|
||||
/**
|
||||
* Is this varying used only by transform feedback?
|
||||
*
|
||||
* This is used by the linker to decide if its safe to pack the varying.
|
||||
*/
|
||||
unsigned is_xfb_only:1;
|
||||
|
||||
/**
|
||||
* If non-zero, then this variable may be packed along with other variables
|
||||
* into a single varying slot, so this offset should be applied when
|
||||
|
||||
@@ -124,7 +124,8 @@ void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size);
|
||||
void lower_ubo_reference(struct gl_shader *shader);
|
||||
void lower_packed_varyings(void *mem_ctx,
|
||||
unsigned locations_used, ir_variable_mode mode,
|
||||
unsigned gs_input_vertices, gl_shader *shader);
|
||||
unsigned gs_input_vertices, gl_shader *shader,
|
||||
bool disable_varying_packing, bool xfb_enabled);
|
||||
bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index);
|
||||
bool lower_vector_derefs(gl_shader *shader);
|
||||
void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader);
|
||||
@@ -138,6 +139,7 @@ bool lower_tess_level(gl_shader *shader);
|
||||
bool lower_vertex_id(gl_shader *shader);
|
||||
|
||||
bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state);
|
||||
void propagate_invariance(exec_list *instructions);
|
||||
|
||||
ir_rvalue *
|
||||
compare_index_block(exec_list *instructions, ir_variable *index,
|
||||
|
||||
@@ -826,7 +826,7 @@ namespace {
|
||||
class varying_matches
|
||||
{
|
||||
public:
|
||||
varying_matches(bool disable_varying_packing,
|
||||
varying_matches(bool disable_varying_packing, bool xfb_enabled,
|
||||
gl_shader_stage producer_stage,
|
||||
gl_shader_stage consumer_stage);
|
||||
~varying_matches();
|
||||
@@ -836,13 +836,29 @@ public:
|
||||
void store_locations() const;
|
||||
|
||||
private:
|
||||
bool is_varying_packing_safe(const glsl_type *type,
|
||||
const ir_variable *var);
|
||||
|
||||
/**
|
||||
* If true, this driver disables varying packing, so all varyings need to
|
||||
* be aligned on slot boundaries, and take up a number of slots equal to
|
||||
* their number of matrix columns times their array size.
|
||||
*
|
||||
* Packing may also be disabled because our current packing method is not
|
||||
* safe in SSO or versions of OpenGL where interpolation qualifiers are not
|
||||
* guaranteed to match across stages.
|
||||
*/
|
||||
const bool disable_varying_packing;
|
||||
|
||||
/**
|
||||
* If true, this driver has transform feedback enabled. The transform
|
||||
* feedback code requires at least some packing be done even when varying
|
||||
* packing is disabled, fortunately where transform feedback requires
|
||||
* packing it's safe to override the disabled setting. See
|
||||
* is_varying_packing_safe().
|
||||
*/
|
||||
const bool xfb_enabled;
|
||||
|
||||
/**
|
||||
* Enum representing the order in which varyings are packed within a
|
||||
* packing class.
|
||||
@@ -862,6 +878,7 @@ private:
|
||||
static unsigned compute_packing_class(const ir_variable *var);
|
||||
static packing_order_enum compute_packing_order(const ir_variable *var);
|
||||
static int match_comparator(const void *x_generic, const void *y_generic);
|
||||
static int xfb_comparator(const void *x_generic, const void *y_generic);
|
||||
|
||||
/**
|
||||
* Structure recording the relationship between a single producer output
|
||||
@@ -917,9 +934,11 @@ private:
|
||||
} /* anonymous namespace */
|
||||
|
||||
varying_matches::varying_matches(bool disable_varying_packing,
|
||||
bool xfb_enabled,
|
||||
gl_shader_stage producer_stage,
|
||||
gl_shader_stage consumer_stage)
|
||||
: disable_varying_packing(disable_varying_packing),
|
||||
xfb_enabled(xfb_enabled),
|
||||
producer_stage(producer_stage),
|
||||
consumer_stage(consumer_stage)
|
||||
{
|
||||
@@ -941,6 +960,24 @@ varying_matches::~varying_matches()
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Packing is always safe on individual arrays, structure and matices. It is
|
||||
* also safe if the varying is only used for transform feedback.
|
||||
*/
|
||||
bool
|
||||
varying_matches::is_varying_packing_safe(const glsl_type *type,
|
||||
const ir_variable *var)
|
||||
{
|
||||
if (consumer_stage == MESA_SHADER_TESS_EVAL ||
|
||||
consumer_stage == MESA_SHADER_TESS_CTRL ||
|
||||
producer_stage == MESA_SHADER_TESS_CTRL)
|
||||
return false;
|
||||
|
||||
return xfb_enabled && (type->is_array() || type->is_record() ||
|
||||
type->is_matrix() || var->data.is_xfb_only);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Record the given producer/consumer variable pair in the list of variables
|
||||
* that should later be assigned locations.
|
||||
@@ -1020,7 +1057,7 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
|
||||
= this->compute_packing_class(var);
|
||||
this->matches[this->num_matches].packing_order
|
||||
= this->compute_packing_order(var);
|
||||
if (this->disable_varying_packing) {
|
||||
if (this->disable_varying_packing && !is_varying_packing_safe(type, var)) {
|
||||
unsigned slots = type->count_attribute_slots(false);
|
||||
this->matches[this->num_matches].num_components = slots * 4;
|
||||
} else {
|
||||
@@ -1046,37 +1083,28 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
|
||||
uint64_t reserved_slots,
|
||||
bool separate_shader)
|
||||
{
|
||||
/* We disable varying sorting for separate shader programs for the
|
||||
* following reasons:
|
||||
*
|
||||
* 1/ All programs must sort the code in the same order to guarantee the
|
||||
* interface matching. However varying_matches::record() will change the
|
||||
* interpolation qualifier of some stages.
|
||||
*
|
||||
* 2/ GLSL version 4.50 removes the matching constrain on the interpolation
|
||||
* qualifier.
|
||||
*
|
||||
* From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.40 spec:
|
||||
*
|
||||
* "The type and presence of interpolation qualifiers of variables with
|
||||
* the same name declared in all linked shaders for the same cross-stage
|
||||
* interface must match, otherwise the link command will fail.
|
||||
*
|
||||
* When comparing an output from one stage to an input of a subsequent
|
||||
* stage, the input and output don't match if their interpolation
|
||||
* qualifiers (or lack thereof) are not the same."
|
||||
*
|
||||
* "It is a link-time error if, within the same stage, the interpolation
|
||||
* qualifiers of variables of the same name do not match."
|
||||
/* If packing has been disabled then we cannot safely sort the varyings by
|
||||
* class as it may mean we are using a version of OpenGL where
|
||||
* interpolation qualifiers are not guaranteed to be matching across
|
||||
* shaders, sorting in this case could result in mismatching shader
|
||||
* interfaces.
|
||||
* When packing is disabled the sort orders varyings used by transform
|
||||
* feedback first, but also depends on *undefined behaviour* of qsort to
|
||||
* reverse the order of the varyings. See: xfb_comparator().
|
||||
*/
|
||||
if (!separate_shader) {
|
||||
if (!this->disable_varying_packing) {
|
||||
/* Sort varying matches into an order that makes them easy to pack. */
|
||||
qsort(this->matches, this->num_matches, sizeof(*this->matches),
|
||||
&varying_matches::match_comparator);
|
||||
} else {
|
||||
/* Only sort varyings that are only used by transform feedback. */
|
||||
qsort(this->matches, this->num_matches, sizeof(*this->matches),
|
||||
&varying_matches::xfb_comparator);
|
||||
}
|
||||
|
||||
unsigned generic_location = 0;
|
||||
unsigned generic_patch_location = MAX_VARYING*4;
|
||||
bool previous_var_xfb_only = false;
|
||||
|
||||
for (unsigned i = 0; i < this->num_matches; i++) {
|
||||
unsigned *location = &generic_location;
|
||||
@@ -1100,16 +1128,30 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
|
||||
/* Advance to the next slot if this varying has a different packing
|
||||
* class than the previous one, and we're not already on a slot
|
||||
* boundary.
|
||||
*
|
||||
* Also advance to the next slot if packing is disabled. This makes sure
|
||||
* we don't assign varyings the same locations which is possible
|
||||
* because we still pack individual arrays, records and matrices even
|
||||
* when packing is disabled. Note we don't advance to the next slot if
|
||||
* we can pack varyings together that are only used for transform
|
||||
* feedback.
|
||||
*/
|
||||
if (i > 0 &&
|
||||
this->matches[i - 1].packing_class
|
||||
!= this->matches[i].packing_class) {
|
||||
if ((this->disable_varying_packing &&
|
||||
!(previous_var_xfb_only && var->data.is_xfb_only)) ||
|
||||
(i > 0 && this->matches[i - 1].packing_class
|
||||
!= this->matches[i].packing_class )) {
|
||||
*location = ALIGN(*location, 4);
|
||||
}
|
||||
|
||||
previous_var_xfb_only = var->data.is_xfb_only;
|
||||
|
||||
unsigned num_elements = type->count_attribute_slots(is_vertex_input);
|
||||
unsigned slot_end = this->disable_varying_packing ? 4 :
|
||||
type->without_array()->vector_elements;
|
||||
unsigned slot_end;
|
||||
if (this->disable_varying_packing &&
|
||||
!is_varying_packing_safe(type, var))
|
||||
slot_end = 4;
|
||||
else
|
||||
slot_end = type->without_array()->vector_elements;
|
||||
slot_end += *location - 1;
|
||||
|
||||
/* FIXME: We could be smarter in the below code and loop back over
|
||||
@@ -1133,7 +1175,8 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
|
||||
/* Increase the slot to make sure there is enough room for next
|
||||
* array element.
|
||||
*/
|
||||
if (this->disable_varying_packing)
|
||||
if (this->disable_varying_packing &&
|
||||
!is_varying_packing_safe(type, var))
|
||||
slot_end += 4;
|
||||
else
|
||||
slot_end += type->without_array()->vector_elements;
|
||||
@@ -1258,6 +1301,32 @@ varying_matches::match_comparator(const void *x_generic, const void *y_generic)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Comparison function passed to qsort() to sort varyings used only by
|
||||
* transform feedback when packing of other varyings is disabled.
|
||||
*/
|
||||
int
|
||||
varying_matches::xfb_comparator(const void *x_generic, const void *y_generic)
|
||||
{
|
||||
const match *x = (const match *) x_generic;
|
||||
|
||||
if (x->producer_var != NULL && x->producer_var->data.is_xfb_only)
|
||||
return match_comparator(x_generic, y_generic);
|
||||
|
||||
/* FIXME: When the comparator returns 0 it means the elements being
|
||||
* compared are equivalent. However the qsort documentation says:
|
||||
*
|
||||
* "The order of equivalent elements is undefined."
|
||||
*
|
||||
* In practice the sort ends up reversing the order of the varyings which
|
||||
* means locations are also assigned in this reversed order and happens to
|
||||
* be what we want. This is also whats happening in
|
||||
* varying_matches::match_comparator().
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Is the given variable a varying variable to be counted against the
|
||||
* limit in ctx->Const.MaxVarying?
|
||||
@@ -1573,26 +1642,60 @@ assign_varying_locations(struct gl_context *ctx,
|
||||
unsigned num_tfeedback_decls,
|
||||
tfeedback_decl *tfeedback_decls)
|
||||
{
|
||||
if (ctx->Const.DisableVaryingPacking) {
|
||||
/* Transform feedback code assumes varyings are packed, so if the driver
|
||||
* has disabled varying packing, make sure it does not support transform
|
||||
* feedback.
|
||||
*/
|
||||
assert(!ctx->Extensions.EXT_transform_feedback);
|
||||
}
|
||||
|
||||
/* Tessellation shaders treat inputs and outputs as shared memory and can
|
||||
* access inputs and outputs of other invocations.
|
||||
* Therefore, they can't be lowered to temps easily (and definitely not
|
||||
* efficiently).
|
||||
*/
|
||||
bool disable_varying_packing =
|
||||
ctx->Const.DisableVaryingPacking ||
|
||||
bool unpackable_tess =
|
||||
(consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) ||
|
||||
(consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) ||
|
||||
(producer && producer->Stage == MESA_SHADER_TESS_CTRL);
|
||||
|
||||
varying_matches matches(disable_varying_packing,
|
||||
/* Transform feedback code assumes varying arrays are packed, so if the
|
||||
* driver has disabled varying packing, make sure to at least enable
|
||||
* packing required by transform feedback.
|
||||
*/
|
||||
bool xfb_enabled =
|
||||
ctx->Extensions.EXT_transform_feedback && !unpackable_tess;
|
||||
|
||||
/* Disable varying packing for GL 4.4+ as there is no guarantee
|
||||
* that interpolation qualifiers will match between shaders in these
|
||||
* versions. We also disable packing on outerward facing interfaces for
|
||||
* SSO because in ES we need to retain the unpacked varying information
|
||||
* for draw time validation. For desktop GL we could allow packing for
|
||||
* versions < 4.4 but its just safer not to do packing.
|
||||
*
|
||||
* Packing is still enabled on individual arrays, structs, and matrices as
|
||||
* these are required by the transform feedback code and it is still safe
|
||||
* to do so. We also enable packing when a varying is only used for
|
||||
* transform feedback and its not a SSO.
|
||||
*
|
||||
* Varying packing currently only packs together varyings with matching
|
||||
* interpolation qualifiers as the backends assume all packed components
|
||||
* are to be processed in the same way. Therefore we cannot do packing in
|
||||
* these versions of GL without the risk of mismatching interfaces.
|
||||
*
|
||||
* From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
|
||||
*
|
||||
* "The type and presence of interpolation qualifiers of variables with
|
||||
* the same name declared in all linked shaders for the same cross-stage
|
||||
* interface must match, otherwise the link command will fail.
|
||||
*
|
||||
* When comparing an output from one stage to an input of a subsequent
|
||||
* stage, the input and output don't match if their interpolation
|
||||
* qualifiers (or lack thereof) are not the same."
|
||||
*
|
||||
* This text was also in at least revison 7 of the 4.40 spec but is no
|
||||
* longer in revision 9 and not in the 4.50 spec.
|
||||
*/
|
||||
bool disable_varying_packing =
|
||||
ctx->Const.DisableVaryingPacking || unpackable_tess;
|
||||
if ((ctx->API == API_OPENGL_CORE && ctx->Version >= 44) ||
|
||||
(prog->SeparateShader && (producer == NULL || consumer == NULL)))
|
||||
disable_varying_packing = true;
|
||||
|
||||
varying_matches matches(disable_varying_packing, xfb_enabled,
|
||||
producer ? producer->Stage : (gl_shader_stage)-1,
|
||||
consumer ? consumer->Stage : (gl_shader_stage)-1);
|
||||
hash_table *tfeedback_candidates
|
||||
@@ -1711,8 +1814,10 @@ assign_varying_locations(struct gl_context *ctx,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout)
|
||||
if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) {
|
||||
matched_candidate->toplevel_var->data.is_xfb_only = 1;
|
||||
matches.record(matched_candidate->toplevel_var, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
const uint64_t reserved_slots =
|
||||
@@ -1784,15 +1889,16 @@ assign_varying_locations(struct gl_context *ctx,
|
||||
ir_var_shader_in);
|
||||
}
|
||||
|
||||
if (!disable_varying_packing) {
|
||||
if (producer) {
|
||||
lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out,
|
||||
0, producer);
|
||||
}
|
||||
if (consumer) {
|
||||
lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in,
|
||||
consumer_vertices, consumer);
|
||||
}
|
||||
if (producer) {
|
||||
lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out,
|
||||
0, producer, disable_varying_packing,
|
||||
xfb_enabled);
|
||||
}
|
||||
|
||||
if (consumer) {
|
||||
lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in,
|
||||
consumer_vertices, consumer,
|
||||
disable_varying_packing, xfb_enabled);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
@@ -168,7 +168,9 @@ public:
|
||||
ir_variable_mode mode,
|
||||
unsigned gs_input_vertices,
|
||||
exec_list *out_instructions,
|
||||
exec_list *out_variables);
|
||||
exec_list *out_variables,
|
||||
bool disable_varying_packing,
|
||||
bool xfb_enabled);
|
||||
|
||||
void run(struct gl_shader *shader);
|
||||
|
||||
@@ -231,6 +233,9 @@ private:
|
||||
* Exec list into which the visitor should insert any new variables.
|
||||
*/
|
||||
exec_list *out_variables;
|
||||
|
||||
bool disable_varying_packing;
|
||||
bool xfb_enabled;
|
||||
};
|
||||
|
||||
} /* anonymous namespace */
|
||||
@@ -238,7 +243,8 @@ private:
|
||||
lower_packed_varyings_visitor::lower_packed_varyings_visitor(
|
||||
void *mem_ctx, unsigned locations_used, ir_variable_mode mode,
|
||||
unsigned gs_input_vertices, exec_list *out_instructions,
|
||||
exec_list *out_variables)
|
||||
exec_list *out_variables, bool disable_varying_packing,
|
||||
bool xfb_enabled)
|
||||
: mem_ctx(mem_ctx),
|
||||
locations_used(locations_used),
|
||||
packed_varyings((ir_variable **)
|
||||
@@ -247,7 +253,9 @@ lower_packed_varyings_visitor::lower_packed_varyings_visitor(
|
||||
mode(mode),
|
||||
gs_input_vertices(gs_input_vertices),
|
||||
out_instructions(out_instructions),
|
||||
out_variables(out_variables)
|
||||
out_variables(out_variables),
|
||||
disable_varying_packing(disable_varying_packing),
|
||||
xfb_enabled(xfb_enabled)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -656,7 +664,18 @@ lower_packed_varyings_visitor::needs_lowering(ir_variable *var)
|
||||
if (var->data.explicit_location)
|
||||
return false;
|
||||
|
||||
const glsl_type *type = var->type->without_array();
|
||||
/* Override disable_varying_packing if the var is only used by transform
|
||||
* feedback. Also override it if transform feedback is enabled and the
|
||||
* variable is an array, struct or matrix as the elements of these types
|
||||
* will always has the same interpolation and therefore asre safe to pack.
|
||||
*/
|
||||
const glsl_type *type = var->type;
|
||||
if (disable_varying_packing && !var->data.is_xfb_only &&
|
||||
!((type->is_array() || type->is_record() || type->is_matrix()) &&
|
||||
xfb_enabled))
|
||||
return false;
|
||||
|
||||
type = type->without_array();
|
||||
if (type->vector_elements == 4 && !type->is_double())
|
||||
return false;
|
||||
return true;
|
||||
@@ -709,7 +728,8 @@ lower_packed_varyings_gs_splicer::visit_leave(ir_emit_vertex *ev)
|
||||
void
|
||||
lower_packed_varyings(void *mem_ctx, unsigned locations_used,
|
||||
ir_variable_mode mode, unsigned gs_input_vertices,
|
||||
gl_shader *shader)
|
||||
gl_shader *shader, bool disable_varying_packing,
|
||||
bool xfb_enabled)
|
||||
{
|
||||
exec_list *instructions = shader->ir;
|
||||
ir_function *main_func = shader->symbols->get_function("main");
|
||||
@@ -720,7 +740,9 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used,
|
||||
lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode,
|
||||
gs_input_vertices,
|
||||
&new_instructions,
|
||||
&new_variables);
|
||||
&new_variables,
|
||||
disable_varying_packing,
|
||||
xfb_enabled);
|
||||
visitor.run(shader);
|
||||
if (mode == ir_var_shader_out) {
|
||||
if (shader->Stage == MESA_SHADER_GEOMETRY) {
|
||||
|
||||
@@ -58,6 +58,8 @@ public:
|
||||
{
|
||||
}
|
||||
|
||||
virtual ir_visitor_status visit_enter(ir_assignment *ir);
|
||||
|
||||
ir_rvalue *handle_expression(ir_expression *ir);
|
||||
void handle_rvalue(ir_rvalue **rvalue);
|
||||
bool reassociate_constant(ir_expression *ir1,
|
||||
@@ -80,6 +82,23 @@ public:
|
||||
|
||||
} /* unnamed namespace */
|
||||
|
||||
ir_visitor_status
|
||||
ir_algebraic_visitor::visit_enter(ir_assignment *ir)
|
||||
{
|
||||
ir_variable *var = ir->lhs->variable_referenced();
|
||||
if (var->data.invariant || var->data.precise) {
|
||||
/* If we're assigning to an invariant or precise variable, just bail.
|
||||
* Most of the algebraic optimizations aren't precision-safe.
|
||||
*
|
||||
* FINISHME: Find out which optimizations are precision-safe and enable
|
||||
* then only for invariant or precise trees.
|
||||
*/
|
||||
return visit_continue_with_parent;
|
||||
} else {
|
||||
return visit_continue;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_vec_zero(ir_constant *ir)
|
||||
{
|
||||
|
||||
@@ -131,6 +131,8 @@ public:
|
||||
progress = false;
|
||||
}
|
||||
|
||||
virtual ir_visitor_status visit_enter(ir_assignment *ir);
|
||||
|
||||
void handle_rvalue(ir_rvalue **rvalue);
|
||||
|
||||
bool progress;
|
||||
@@ -146,6 +148,20 @@ struct is_reduction_data {
|
||||
|
||||
} /* anonymous namespace */
|
||||
|
||||
ir_visitor_status
|
||||
ir_rebalance_visitor::visit_enter(ir_assignment *ir)
|
||||
{
|
||||
ir_variable *var = ir->lhs->variable_referenced();
|
||||
if (var->data.invariant || var->data.precise) {
|
||||
/* If we're assigning to an invariant variable, just bail. Tree
|
||||
* rebalancing (reassociation) isn't precision-safe.
|
||||
*/
|
||||
return visit_continue_with_parent;
|
||||
} else {
|
||||
return visit_continue;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
is_reduction_operation(ir_expression_operation operation)
|
||||
{
|
||||
|
||||
@@ -0,0 +1,125 @@
|
||||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file propagate_invariance.cpp
|
||||
* Propagate the "invariant" and "precise" qualifiers to variables used to
|
||||
* compute invariant or precise values.
|
||||
*
|
||||
* The GLSL spec (depending on what version you read) says, among the
|
||||
* conditions for geting bit-for-bit the same values on an invariant output:
|
||||
*
|
||||
* "All operations in the consuming expressions and any intermediate
|
||||
* expressions must be the same, with the same order of operands and same
|
||||
* associativity, to give the same order of evaluation."
|
||||
*
|
||||
* This effectively means that if a variable is used to compute an invariant
|
||||
* value then that variable becomes invariant. The same should apply to the
|
||||
* "precise" qualifier.
|
||||
*/
|
||||
|
||||
#include "ir.h"
|
||||
#include "ir_visitor.h"
|
||||
#include "ir_rvalue_visitor.h"
|
||||
#include "ir_optimization.h"
|
||||
#include "compiler/glsl_types.h"
|
||||
|
||||
namespace {
|
||||
|
||||
class ir_invariance_propagation_visitor : public ir_hierarchical_visitor {
|
||||
public:
|
||||
ir_invariance_propagation_visitor()
|
||||
{
|
||||
this->progress = false;
|
||||
this->dst_var = NULL;
|
||||
}
|
||||
|
||||
virtual ~ir_invariance_propagation_visitor()
|
||||
{
|
||||
/* empty */
|
||||
}
|
||||
|
||||
virtual ir_visitor_status visit_enter(ir_assignment *ir);
|
||||
virtual ir_visitor_status visit_leave(ir_assignment *ir);
|
||||
virtual ir_visitor_status visit(ir_dereference_variable *ir);
|
||||
|
||||
ir_variable *dst_var;
|
||||
bool progress;
|
||||
};
|
||||
|
||||
} /* unnamed namespace */
|
||||
|
||||
ir_visitor_status
|
||||
ir_invariance_propagation_visitor::visit_enter(ir_assignment *ir)
|
||||
{
|
||||
assert(this->dst_var == NULL);
|
||||
ir_variable *var = ir->lhs->variable_referenced();
|
||||
if (var->data.invariant || var->data.precise) {
|
||||
this->dst_var = var;
|
||||
return visit_continue;
|
||||
} else {
|
||||
return visit_continue_with_parent;
|
||||
}
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
ir_invariance_propagation_visitor::visit_leave(ir_assignment *ir)
|
||||
{
|
||||
this->dst_var = NULL;
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
ir_invariance_propagation_visitor::visit(ir_dereference_variable *ir)
|
||||
{
|
||||
if (this->dst_var == NULL)
|
||||
return visit_continue;
|
||||
|
||||
if (this->dst_var->data.invariant) {
|
||||
if (!ir->var->data.invariant)
|
||||
this->progress = true;
|
||||
|
||||
ir->var->data.invariant = true;
|
||||
}
|
||||
|
||||
if (this->dst_var->data.precise) {
|
||||
if (!ir->var->data.precise)
|
||||
this->progress = true;
|
||||
|
||||
ir->var->data.precise = true;
|
||||
}
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
void
|
||||
propagate_invariance(exec_list *instructions)
|
||||
{
|
||||
ir_invariance_propagation_visitor visitor;
|
||||
|
||||
do {
|
||||
visitor.progress = false;
|
||||
visit_list_elements(&visitor, instructions);
|
||||
} while (visitor.progress);
|
||||
}
|
||||
@@ -731,7 +731,7 @@ nir_visitor::visit(ir_call *ir)
|
||||
ir_dereference *param =
|
||||
(ir_dereference *) ir->actual_parameters.get_head();
|
||||
instr->variables[0] = evaluate_deref(&instr->instr, param);
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
|
||||
nir_builder_instr_insert(&b, &instr->instr);
|
||||
break;
|
||||
}
|
||||
@@ -765,7 +765,7 @@ nir_visitor::visit(ir_call *ir)
|
||||
const nir_intrinsic_info *info =
|
||||
&nir_intrinsic_infos[instr->intrinsic];
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest,
|
||||
info->dest_components, NULL);
|
||||
info->dest_components, 32, NULL);
|
||||
}
|
||||
|
||||
if (op == nir_intrinsic_image_size ||
|
||||
@@ -826,7 +826,7 @@ nir_visitor::visit(ir_call *ir)
|
||||
nir_builder_instr_insert(&b, &instr->instr);
|
||||
break;
|
||||
case nir_intrinsic_shader_clock:
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
|
||||
nir_builder_instr_insert(&b, &instr->instr);
|
||||
break;
|
||||
case nir_intrinsic_store_ssbo: {
|
||||
@@ -867,7 +867,7 @@ nir_visitor::visit(ir_call *ir)
|
||||
|
||||
/* Setup destination register */
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest,
|
||||
type->vector_elements, NULL);
|
||||
type->vector_elements, 32, NULL);
|
||||
|
||||
/* Insert the created nir instruction now since in the case of boolean
|
||||
* result we will need to emit another instruction after it
|
||||
@@ -890,7 +890,7 @@ nir_visitor::visit(ir_call *ir)
|
||||
load_ssbo_compare->src[1].swizzle[i] = 0;
|
||||
nir_ssa_dest_init(&load_ssbo_compare->instr,
|
||||
&load_ssbo_compare->dest.dest,
|
||||
type->vector_elements, NULL);
|
||||
type->vector_elements, 32, NULL);
|
||||
load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1;
|
||||
nir_builder_instr_insert(&b, &load_ssbo_compare->instr);
|
||||
dest = &load_ssbo_compare->dest.dest;
|
||||
@@ -936,7 +936,7 @@ nir_visitor::visit(ir_call *ir)
|
||||
/* Atomic result */
|
||||
assert(ir->return_deref);
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest,
|
||||
ir->return_deref->type->vector_elements, NULL);
|
||||
ir->return_deref->type->vector_elements, 32, NULL);
|
||||
nir_builder_instr_insert(&b, &instr->instr);
|
||||
break;
|
||||
}
|
||||
@@ -951,8 +951,9 @@ nir_visitor::visit(ir_call *ir)
|
||||
instr->num_components = type->vector_elements;
|
||||
|
||||
/* Setup destination register */
|
||||
unsigned bit_size = glsl_get_bit_size(type->base_type);
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest,
|
||||
type->vector_elements, NULL);
|
||||
type->vector_elements, bit_size, NULL);
|
||||
|
||||
nir_builder_instr_insert(&b, &instr->instr);
|
||||
break;
|
||||
@@ -1013,8 +1014,10 @@ nir_visitor::visit(ir_call *ir)
|
||||
|
||||
/* Atomic result */
|
||||
assert(ir->return_deref);
|
||||
unsigned bit_size = glsl_get_bit_size(ir->return_deref->type->base_type);
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest,
|
||||
ir->return_deref->type->vector_elements, NULL);
|
||||
ir->return_deref->type->vector_elements,
|
||||
bit_size, NULL);
|
||||
nir_builder_instr_insert(&b, &instr->instr);
|
||||
break;
|
||||
}
|
||||
@@ -1061,6 +1064,9 @@ nir_visitor::visit(ir_assignment *ir)
|
||||
{
|
||||
unsigned num_components = ir->lhs->type->vector_elements;
|
||||
|
||||
b.exact = ir->lhs->variable_referenced()->data.invariant ||
|
||||
ir->lhs->variable_referenced()->data.precise;
|
||||
|
||||
if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) &&
|
||||
(ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) {
|
||||
/* We're doing a plain-as-can-be copy, so emit a copy_var */
|
||||
@@ -1163,7 +1169,7 @@ nir_visitor::add_instr(nir_instr *instr, unsigned num_components)
|
||||
nir_dest *dest = get_instr_dest(instr);
|
||||
|
||||
if (dest)
|
||||
nir_ssa_dest_init(instr, dest, num_components, NULL);
|
||||
nir_ssa_dest_init(instr, dest, num_components, 32, NULL);
|
||||
|
||||
nir_builder_instr_insert(&b, instr);
|
||||
|
||||
@@ -1203,6 +1209,7 @@ nir_visitor::visit(ir_expression *ir)
|
||||
nir_intrinsic_instr *load =
|
||||
nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo);
|
||||
load->num_components = ir->type->vector_elements;
|
||||
load->dest.ssa.bit_size = glsl_get_bit_size(ir->type->base_type);
|
||||
load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
|
||||
load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
|
||||
add_instr(&load->instr, ir->type->vector_elements);
|
||||
|
||||
+16
-17
@@ -70,6 +70,7 @@ reg_create(void *mem_ctx, struct exec_list *list)
|
||||
list_inithead(®->if_uses);
|
||||
|
||||
reg->num_components = 0;
|
||||
reg->bit_size = 32;
|
||||
reg->num_array_elems = 0;
|
||||
reg->is_packed = false;
|
||||
reg->name = NULL;
|
||||
@@ -473,7 +474,7 @@ nir_load_const_instr_create(nir_shader *shader, unsigned num_components)
|
||||
nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr);
|
||||
instr_init(&instr->instr, nir_instr_type_load_const);
|
||||
|
||||
nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
|
||||
nir_ssa_def_init(&instr->instr, &instr->def, num_components, 32, NULL);
|
||||
|
||||
return instr;
|
||||
}
|
||||
@@ -562,7 +563,7 @@ nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components)
|
||||
nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
|
||||
instr_init(&instr->instr, nir_instr_type_ssa_undef);
|
||||
|
||||
nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
|
||||
nir_ssa_def_init(&instr->instr, &instr->def, num_components, 32, NULL);
|
||||
|
||||
return instr;
|
||||
}
|
||||
@@ -699,10 +700,10 @@ nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref)
|
||||
case GLSL_TYPE_FLOAT:
|
||||
case GLSL_TYPE_INT:
|
||||
case GLSL_TYPE_UINT:
|
||||
load->value.u[i] = constant->value.u[matrix_offset + i];
|
||||
load->value.u32[i] = constant->value.u[matrix_offset + i];
|
||||
break;
|
||||
case GLSL_TYPE_BOOL:
|
||||
load->value.u[i] = constant->value.b[matrix_offset + i] ?
|
||||
load->value.u32[i] = constant->value.b[matrix_offset + i] ?
|
||||
NIR_TRUE : NIR_FALSE;
|
||||
break;
|
||||
default:
|
||||
@@ -731,18 +732,11 @@ reduce_cursor(nir_cursor cursor)
|
||||
{
|
||||
switch (cursor.option) {
|
||||
case nir_cursor_before_block:
|
||||
assert(nir_cf_node_prev(&cursor.block->cf_node) == NULL ||
|
||||
nir_cf_node_prev(&cursor.block->cf_node)->type != nir_cf_node_block);
|
||||
if (exec_list_is_empty(&cursor.block->instr_list)) {
|
||||
/* Empty block. After is as good as before. */
|
||||
cursor.option = nir_cursor_after_block;
|
||||
} else {
|
||||
/* Try to switch to after the previous block if there is one.
|
||||
* (This isn't likely, but it can happen.)
|
||||
*/
|
||||
nir_cf_node *prev_node = nir_cf_node_prev(&cursor.block->cf_node);
|
||||
if (prev_node && prev_node->type == nir_cf_node_block) {
|
||||
cursor.block = nir_cf_node_as_block(prev_node);
|
||||
cursor.option = nir_cursor_after_block;
|
||||
}
|
||||
}
|
||||
return cursor;
|
||||
|
||||
@@ -1379,15 +1373,18 @@ nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
|
||||
src_add_all_uses(dest->reg.indirect, instr, NULL);
|
||||
}
|
||||
|
||||
/* note: does *not* take ownership of 'name' */
|
||||
void
|
||||
nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
|
||||
unsigned num_components, const char *name)
|
||||
unsigned num_components,
|
||||
unsigned bit_size, const char *name)
|
||||
{
|
||||
def->name = name;
|
||||
def->name = ralloc_strdup(instr, name);
|
||||
def->parent_instr = instr;
|
||||
list_inithead(&def->uses);
|
||||
list_inithead(&def->if_uses);
|
||||
def->num_components = num_components;
|
||||
def->bit_size = bit_size;
|
||||
|
||||
if (instr->block) {
|
||||
nir_function_impl *impl =
|
||||
@@ -1399,12 +1396,14 @@ nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
|
||||
}
|
||||
}
|
||||
|
||||
/* note: does *not* take ownership of 'name' */
|
||||
void
|
||||
nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
|
||||
unsigned num_components, const char *name)
|
||||
unsigned num_components, unsigned bit_size,
|
||||
const char *name)
|
||||
{
|
||||
dest->is_ssa = true;
|
||||
nir_ssa_def_init(instr, &dest->ssa, num_components, name);
|
||||
nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size, name);
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
+70
-7
@@ -101,6 +101,7 @@ union nir_constant_data {
|
||||
int i[16];
|
||||
float f[16];
|
||||
bool b[16];
|
||||
double d[16];
|
||||
};
|
||||
|
||||
typedef struct nir_constant {
|
||||
@@ -381,6 +382,9 @@ typedef struct nir_register {
|
||||
unsigned num_components; /** < number of vector components */
|
||||
unsigned num_array_elems; /** < size of array (0 for no array) */
|
||||
|
||||
/* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
|
||||
uint8_t bit_size;
|
||||
|
||||
/** generic register index. */
|
||||
unsigned index;
|
||||
|
||||
@@ -488,6 +492,9 @@ typedef struct nir_ssa_def {
|
||||
struct list_head if_uses;
|
||||
|
||||
uint8_t num_components;
|
||||
|
||||
/* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
|
||||
uint8_t bit_size;
|
||||
} nir_ssa_def;
|
||||
|
||||
struct nir_src;
|
||||
@@ -594,6 +601,18 @@ nir_dest_for_reg(nir_register *reg)
|
||||
return dest;
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
nir_src_bit_size(nir_src src)
|
||||
{
|
||||
return src.is_ssa ? src.ssa->bit_size : src.reg.reg->bit_size;
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
nir_dest_bit_size(nir_dest dest)
|
||||
{
|
||||
return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size;
|
||||
}
|
||||
|
||||
void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
|
||||
void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
|
||||
|
||||
@@ -649,9 +668,36 @@ typedef enum {
|
||||
nir_type_float,
|
||||
nir_type_int,
|
||||
nir_type_uint,
|
||||
nir_type_bool
|
||||
nir_type_bool,
|
||||
nir_type_bool32 = 32 | nir_type_bool,
|
||||
nir_type_int8 = 8 | nir_type_int,
|
||||
nir_type_int16 = 16 | nir_type_int,
|
||||
nir_type_int32 = 32 | nir_type_int,
|
||||
nir_type_int64 = 64 | nir_type_int,
|
||||
nir_type_uint8 = 8 | nir_type_uint,
|
||||
nir_type_uint16 = 16 | nir_type_uint,
|
||||
nir_type_uint32 = 32 | nir_type_uint,
|
||||
nir_type_uint64 = 64 | nir_type_uint,
|
||||
nir_type_float16 = 16 | nir_type_float,
|
||||
nir_type_float32 = 32 | nir_type_float,
|
||||
nir_type_float64 = 64 | nir_type_float,
|
||||
} nir_alu_type;
|
||||
|
||||
#define NIR_ALU_TYPE_SIZE_MASK 0xfffffff8
|
||||
#define NIR_ALU_TYPE_BASE_TYPE_MASK 0x00000007
|
||||
|
||||
static inline unsigned
|
||||
nir_alu_type_get_type_size(nir_alu_type type)
|
||||
{
|
||||
return type & NIR_ALU_TYPE_SIZE_MASK;
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
nir_alu_type_get_base_type(nir_alu_type type)
|
||||
{
|
||||
return type & NIR_ALU_TYPE_BASE_TYPE_MASK;
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
NIR_OP_IS_COMMUTATIVE = (1 << 0),
|
||||
NIR_OP_IS_ASSOCIATIVE = (1 << 1),
|
||||
@@ -708,6 +754,17 @@ extern const nir_op_info nir_op_infos[nir_num_opcodes];
|
||||
typedef struct nir_alu_instr {
|
||||
nir_instr instr;
|
||||
nir_op op;
|
||||
|
||||
/** Indicates that this ALU instruction generates an exact value
|
||||
*
|
||||
* This is kind of a mixture of GLSL "precise" and "invariant" and not
|
||||
* really equivalent to either. This indicates that the value generated by
|
||||
* this operation is high-precision and any code transformations that touch
|
||||
* it must ensure that the resulting value is bit-for-bit identical to the
|
||||
* original.
|
||||
*/
|
||||
bool exact;
|
||||
|
||||
nir_alu_dest dest;
|
||||
nir_alu_src src[];
|
||||
} nir_alu_instr;
|
||||
@@ -1218,9 +1275,12 @@ nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type)
|
||||
|
||||
typedef struct {
|
||||
union {
|
||||
float f[4];
|
||||
int32_t i[4];
|
||||
uint32_t u[4];
|
||||
float f32[4];
|
||||
double f64[4];
|
||||
int32_t i32[4];
|
||||
uint32_t u32[4];
|
||||
int64_t i64[4];
|
||||
uint64_t u64[4];
|
||||
};
|
||||
} nir_const_value;
|
||||
|
||||
@@ -2061,9 +2121,11 @@ void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
|
||||
nir_dest new_dest);
|
||||
|
||||
void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
|
||||
unsigned num_components, const char *name);
|
||||
unsigned num_components, unsigned bit_size,
|
||||
const char *name);
|
||||
void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
|
||||
unsigned num_components, const char *name);
|
||||
unsigned num_components, unsigned bit_size,
|
||||
const char *name);
|
||||
void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
|
||||
void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
|
||||
nir_instr *after_me);
|
||||
@@ -2094,9 +2156,10 @@ void nir_index_blocks(nir_function_impl *impl);
|
||||
void nir_print_shader(nir_shader *shader, FILE *fp);
|
||||
void nir_print_instr(const nir_instr *instr, FILE *fp);
|
||||
|
||||
nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s);
|
||||
nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s);
|
||||
nir_function_impl *nir_function_impl_clone(const nir_function_impl *fi);
|
||||
nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var);
|
||||
nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader);
|
||||
|
||||
#ifdef DEBUG
|
||||
void nir_validate_shader(nir_shader *shader);
|
||||
|
||||
@@ -63,12 +63,13 @@ class Value(object):
|
||||
static const ${val.c_type} ${val.name} = {
|
||||
{ ${val.type_enum} },
|
||||
% if isinstance(val, Constant):
|
||||
{ ${hex(val)} /* ${val.value} */ },
|
||||
${val.type()}, { ${hex(val)} /* ${val.value} */ },
|
||||
% elif isinstance(val, Variable):
|
||||
${val.index}, /* ${val.var_name} */
|
||||
${'true' if val.is_constant else 'false'},
|
||||
nir_type_${ val.required_type or 'invalid' },
|
||||
${val.type() or 'nir_type_invalid' },
|
||||
% elif isinstance(val, Expression):
|
||||
${'true' if val.inexact else 'false'},
|
||||
nir_op_${val.opcode},
|
||||
{ ${', '.join(src.c_ptr for src in val.sources)} },
|
||||
% endif
|
||||
@@ -107,10 +108,18 @@ class Constant(Value):
|
||||
if isinstance(self.value, (int, long)):
|
||||
return hex(self.value)
|
||||
elif isinstance(self.value, float):
|
||||
return hex(struct.unpack('I', struct.pack('f', self.value))[0])
|
||||
return hex(struct.unpack('Q', struct.pack('d', self.value))[0])
|
||||
else:
|
||||
assert False
|
||||
|
||||
def type(self):
|
||||
if isinstance(self.value, (bool)):
|
||||
return "nir_type_bool32"
|
||||
elif isinstance(self.value, (int, long)):
|
||||
return "nir_type_int"
|
||||
elif isinstance(self.value, float):
|
||||
return "nir_type_float"
|
||||
|
||||
_var_name_re = re.compile(r"(?P<const>#)?(?P<name>\w+)(?:@(?P<type>\w+))?")
|
||||
|
||||
class Variable(Value):
|
||||
@@ -129,12 +138,26 @@ class Variable(Value):
|
||||
|
||||
self.index = varset[self.var_name]
|
||||
|
||||
def type(self):
|
||||
if self.required_type == 'bool':
|
||||
return "nir_type_bool32"
|
||||
elif self.required_type in ('int', 'unsigned'):
|
||||
return "nir_type_int"
|
||||
elif self.required_type == 'float':
|
||||
return "nir_type_float"
|
||||
|
||||
_opcode_re = re.compile(r"(?P<inexact>~)?(?P<opcode>\w+)")
|
||||
|
||||
class Expression(Value):
|
||||
def __init__(self, expr, name_base, varset):
|
||||
Value.__init__(self, name_base, "expression")
|
||||
assert isinstance(expr, tuple)
|
||||
|
||||
self.opcode = expr[0]
|
||||
m = _opcode_re.match(expr[0])
|
||||
assert m and m.group('opcode') is not None
|
||||
|
||||
self.opcode = m.group('opcode')
|
||||
self.inexact = m.group('inexact') is not None
|
||||
self.sources = [ Value.create(src, "{0}_{1}".format(name_base, i), varset)
|
||||
for (i, src) in enumerate(expr[1:]) ]
|
||||
|
||||
|
||||
@@ -31,6 +31,9 @@ struct exec_list;
|
||||
typedef struct nir_builder {
|
||||
nir_cursor cursor;
|
||||
|
||||
/* Whether new ALU instructions will be marked "exact" */
|
||||
bool exact;
|
||||
|
||||
nir_shader *shader;
|
||||
nir_function_impl *impl;
|
||||
} nir_builder;
|
||||
@@ -39,6 +42,7 @@ static inline void
|
||||
nir_builder_init(nir_builder *build, nir_function_impl *impl)
|
||||
{
|
||||
memset(build, 0, sizeof(*build));
|
||||
build->exact = false;
|
||||
build->impl = impl;
|
||||
build->shader = impl->function->shader;
|
||||
}
|
||||
@@ -50,6 +54,7 @@ nir_builder_init_simple_shader(nir_builder *build, void *mem_ctx,
|
||||
{
|
||||
build->shader = nir_shader_create(mem_ctx, stage, options);
|
||||
nir_function *func = nir_function_create(build->shader, "main");
|
||||
build->exact = false;
|
||||
build->impl = nir_function_impl_create(func);
|
||||
build->cursor = nir_after_cf_list(&build->impl->body);
|
||||
}
|
||||
@@ -104,7 +109,7 @@ nir_imm_float(nir_builder *build, float x)
|
||||
nir_const_value v;
|
||||
|
||||
memset(&v, 0, sizeof(v));
|
||||
v.f[0] = x;
|
||||
v.f32[0] = x;
|
||||
|
||||
return nir_build_imm(build, 1, v);
|
||||
}
|
||||
@@ -115,10 +120,10 @@ nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
|
||||
nir_const_value v;
|
||||
|
||||
memset(&v, 0, sizeof(v));
|
||||
v.f[0] = x;
|
||||
v.f[1] = y;
|
||||
v.f[2] = z;
|
||||
v.f[3] = w;
|
||||
v.f32[0] = x;
|
||||
v.f32[1] = y;
|
||||
v.f32[2] = z;
|
||||
v.f32[3] = w;
|
||||
|
||||
return nir_build_imm(build, 4, v);
|
||||
}
|
||||
@@ -129,7 +134,7 @@ nir_imm_int(nir_builder *build, int x)
|
||||
nir_const_value v;
|
||||
|
||||
memset(&v, 0, sizeof(v));
|
||||
v.i[0] = x;
|
||||
v.i32[0] = x;
|
||||
|
||||
return nir_build_imm(build, 1, v);
|
||||
}
|
||||
@@ -140,10 +145,10 @@ nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w)
|
||||
nir_const_value v;
|
||||
|
||||
memset(&v, 0, sizeof(v));
|
||||
v.i[0] = x;
|
||||
v.i[1] = y;
|
||||
v.i[2] = z;
|
||||
v.i[3] = w;
|
||||
v.i32[0] = x;
|
||||
v.i32[1] = y;
|
||||
v.i32[2] = z;
|
||||
v.i32[3] = w;
|
||||
|
||||
return nir_build_imm(build, 4, v);
|
||||
}
|
||||
@@ -157,6 +162,8 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
|
||||
if (!instr)
|
||||
return NULL;
|
||||
|
||||
instr->exact = build->exact;
|
||||
|
||||
instr->src[0].src = nir_src_for_ssa(src0);
|
||||
if (src1)
|
||||
instr->src[1].src = nir_src_for_ssa(src1);
|
||||
@@ -178,6 +185,25 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
|
||||
}
|
||||
assert(num_components != 0);
|
||||
|
||||
/* Figure out the bitwidth based on the source bitwidth if the instruction
|
||||
* is variable-width.
|
||||
*/
|
||||
unsigned bit_size = nir_alu_type_get_type_size(op_info->output_type);
|
||||
if (bit_size == 0) {
|
||||
for (unsigned i = 0; i < op_info->num_inputs; i++) {
|
||||
unsigned src_bit_size = instr->src[i].src.ssa->bit_size;
|
||||
if (nir_alu_type_get_type_size(op_info->input_types[i]) == 0) {
|
||||
if (bit_size)
|
||||
assert(src_bit_size == bit_size);
|
||||
else
|
||||
bit_size = src_bit_size;
|
||||
} else {
|
||||
assert(src_bit_size ==
|
||||
nir_alu_type_get_type_size(op_info->input_types[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Make sure we don't swizzle from outside of our source vector (like if a
|
||||
* scalar value was passed into a multiply with a vector).
|
||||
*/
|
||||
@@ -187,7 +213,8 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
|
||||
}
|
||||
}
|
||||
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
|
||||
bit_size, NULL);
|
||||
instr->dest.write_mask = (1 << num_components) - 1;
|
||||
|
||||
nir_builder_instr_insert(build, &instr->instr);
|
||||
@@ -252,7 +279,9 @@ static inline nir_ssa_def *
|
||||
nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
|
||||
{
|
||||
nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_fmov);
|
||||
nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
|
||||
nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components,
|
||||
nir_src_bit_size(src.src), NULL);
|
||||
mov->exact = build->exact;
|
||||
mov->dest.write_mask = (1 << num_components) - 1;
|
||||
mov->src[0] = src;
|
||||
nir_builder_instr_insert(build, &mov->instr);
|
||||
@@ -264,7 +293,9 @@ static inline nir_ssa_def *
|
||||
nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
|
||||
{
|
||||
nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_imov);
|
||||
nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
|
||||
nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components,
|
||||
nir_src_bit_size(src.src), NULL);
|
||||
mov->exact = build->exact;
|
||||
mov->dest.write_mask = (1 << num_components) - 1;
|
||||
mov->src[0] = src;
|
||||
nir_builder_instr_insert(build, &mov->instr);
|
||||
@@ -360,7 +391,8 @@ nir_load_var(nir_builder *build, nir_variable *var)
|
||||
nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_var);
|
||||
load->num_components = num_components;
|
||||
load->variables[0] = nir_deref_var_create(load, var);
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, num_components,
|
||||
glsl_get_bit_size(glsl_get_base_type(var->type)), NULL);
|
||||
nir_builder_instr_insert(build, &load->instr);
|
||||
return &load->dest.ssa;
|
||||
}
|
||||
@@ -426,7 +458,7 @@ nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index)
|
||||
load->num_components = nir_intrinsic_infos[op].dest_components;
|
||||
load->const_index[0] = index;
|
||||
nir_ssa_dest_init(&load->instr, &load->dest,
|
||||
nir_intrinsic_infos[op].dest_components, NULL);
|
||||
nir_intrinsic_infos[op].dest_components, 32, NULL);
|
||||
nir_builder_instr_insert(build, &load->instr);
|
||||
return &load->dest.ssa;
|
||||
}
|
||||
|
||||
@@ -127,11 +127,10 @@ nir_constant_clone(const nir_constant *c, nir_variable *nvar)
|
||||
/* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid
|
||||
* having to deal with locals and globals separately:
|
||||
*/
|
||||
static nir_variable *
|
||||
clone_variable(clone_state *state, const nir_variable *var)
|
||||
nir_variable *
|
||||
nir_variable_clone(const nir_variable *var, nir_shader *shader)
|
||||
{
|
||||
nir_variable *nvar = rzalloc(state->ns, nir_variable);
|
||||
add_remap(state, nvar, var);
|
||||
nir_variable *nvar = rzalloc(shader, nir_variable);
|
||||
|
||||
nvar->type = var->type;
|
||||
nvar->name = ralloc_strdup(nvar, var->name);
|
||||
@@ -149,6 +148,15 @@ clone_variable(clone_state *state, const nir_variable *var)
|
||||
return nvar;
|
||||
}
|
||||
|
||||
static nir_variable *
|
||||
clone_variable(clone_state *state, const nir_variable *var)
|
||||
{
|
||||
nir_variable *nvar = nir_variable_clone(var, state->ns);
|
||||
add_remap(state, nvar, var);
|
||||
|
||||
return nvar;
|
||||
}
|
||||
|
||||
/* clone list of nir_variable: */
|
||||
static void
|
||||
clone_var_list(clone_state *state, struct exec_list *dst,
|
||||
@@ -220,7 +228,8 @@ __clone_dst(clone_state *state, nir_instr *ninstr,
|
||||
{
|
||||
ndst->is_ssa = dst->is_ssa;
|
||||
if (dst->is_ssa) {
|
||||
nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name);
|
||||
nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components,
|
||||
dst->ssa.bit_size, dst->ssa.name);
|
||||
add_remap(state, &ndst->ssa, &dst->ssa);
|
||||
} else {
|
||||
ndst->reg.reg = remap_reg(state, dst->reg.reg);
|
||||
@@ -303,6 +312,7 @@ static nir_alu_instr *
|
||||
clone_alu(clone_state *state, const nir_alu_instr *alu)
|
||||
{
|
||||
nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op);
|
||||
nalu->exact = alu->exact;
|
||||
|
||||
__clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest);
|
||||
nalu->dest.saturate = alu->dest.saturate;
|
||||
|
||||
@@ -28,4 +28,4 @@
|
||||
#include "nir.h"
|
||||
|
||||
nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components,
|
||||
nir_const_value *src);
|
||||
unsigned bit_size, nir_const_value *src);
|
||||
|
||||
@@ -1,4 +1,43 @@
|
||||
#! /usr/bin/python2
|
||||
|
||||
def type_has_size(type_):
|
||||
return type_[-1:].isdigit()
|
||||
|
||||
def type_sizes(type_):
|
||||
if type_.endswith("8"):
|
||||
return [8]
|
||||
elif type_.endswith("16"):
|
||||
return [16]
|
||||
elif type_.endswith("32"):
|
||||
return [32]
|
||||
elif type_.endswith("64"):
|
||||
return [64]
|
||||
else:
|
||||
return [32, 64]
|
||||
|
||||
def type_add_size(type_, size):
|
||||
if type_has_size(type_):
|
||||
return type_
|
||||
return type_ + str(size)
|
||||
|
||||
def get_const_field(type_):
|
||||
if type_ == "int32":
|
||||
return "i32"
|
||||
if type_ == "uint32":
|
||||
return "u32"
|
||||
if type_ == "int64":
|
||||
return "i64"
|
||||
if type_ == "uint64":
|
||||
return "u64"
|
||||
if type_ == "bool32":
|
||||
return "u32"
|
||||
if type_ == "float32":
|
||||
return "f32"
|
||||
if type_ == "float64":
|
||||
return "f64"
|
||||
raise Exception(str(type_))
|
||||
assert(0)
|
||||
|
||||
template = """\
|
||||
/*
|
||||
* Copyright (C) 2014 Intel Corporation
|
||||
@@ -205,110 +244,140 @@ unpack_half_1x16(uint16_t u)
|
||||
}
|
||||
|
||||
/* Some typed vector structures to make things like src0.y work */
|
||||
% for type in ["float", "int", "uint", "bool"]:
|
||||
struct ${type}_vec {
|
||||
${type} x;
|
||||
${type} y;
|
||||
${type} z;
|
||||
${type} w;
|
||||
typedef float float32_t;
|
||||
typedef double float64_t;
|
||||
typedef bool bool32_t;
|
||||
% for type in ["float", "int", "uint"]:
|
||||
% for width in [32, 64]:
|
||||
struct ${type}${width}_vec {
|
||||
${type}${width}_t x;
|
||||
${type}${width}_t y;
|
||||
${type}${width}_t z;
|
||||
${type}${width}_t w;
|
||||
};
|
||||
% endfor
|
||||
% endfor
|
||||
|
||||
struct bool32_vec {
|
||||
bool x;
|
||||
bool y;
|
||||
bool z;
|
||||
bool w;
|
||||
};
|
||||
|
||||
% for name, op in sorted(opcodes.iteritems()):
|
||||
static nir_const_value
|
||||
evaluate_${name}(unsigned num_components, nir_const_value *_src)
|
||||
evaluate_${name}(unsigned num_components, unsigned bit_size,
|
||||
nir_const_value *_src)
|
||||
{
|
||||
nir_const_value _dst_val = { { {0, 0, 0, 0} } };
|
||||
|
||||
## For each non-per-component input, create a variable srcN that
|
||||
## contains x, y, z, and w elements which are filled in with the
|
||||
## appropriately-typed values.
|
||||
% for j in range(op.num_inputs):
|
||||
% if op.input_sizes[j] == 0:
|
||||
<% continue %>
|
||||
% elif "src" + str(j) not in op.const_expr:
|
||||
## Avoid unused variable warnings
|
||||
<% continue %>
|
||||
%endif
|
||||
switch (bit_size) {
|
||||
% for bit_size in [32, 64]:
|
||||
case ${bit_size}: {
|
||||
<%
|
||||
output_type = type_add_size(op.output_type, bit_size)
|
||||
input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
|
||||
%>
|
||||
|
||||
struct ${op.input_types[j]}_vec src${j} = {
|
||||
% for k in range(op.input_sizes[j]):
|
||||
% if op.input_types[j] == "bool":
|
||||
_src[${j}].u[${k}] != 0,
|
||||
% else:
|
||||
_src[${j}].${op.input_types[j][:1]}[${k}],
|
||||
% endif
|
||||
% endfor
|
||||
};
|
||||
% endfor
|
||||
## For each non-per-component input, create a variable srcN that
|
||||
## contains x, y, z, and w elements which are filled in with the
|
||||
## appropriately-typed values.
|
||||
% for j in range(op.num_inputs):
|
||||
% if op.input_sizes[j] == 0:
|
||||
<% continue %>
|
||||
% elif "src" + str(j) not in op.const_expr:
|
||||
## Avoid unused variable warnings
|
||||
<% continue %>
|
||||
%endif
|
||||
|
||||
% if op.output_size == 0:
|
||||
## For per-component instructions, we need to iterate over the
|
||||
## components and apply the constant expression one component
|
||||
## at a time.
|
||||
for (unsigned _i = 0; _i < num_components; _i++) {
|
||||
## For each per-component input, create a variable srcN that
|
||||
## contains the value of the current (_i'th) component.
|
||||
% for j in range(op.num_inputs):
|
||||
% if op.input_sizes[j] != 0:
|
||||
<% continue %>
|
||||
% elif "src" + str(j) not in op.const_expr:
|
||||
## Avoid unused variable warnings
|
||||
<% continue %>
|
||||
% elif op.input_types[j] == "bool":
|
||||
bool src${j} = _src[${j}].u[_i] != 0;
|
||||
struct ${input_types[j]}_vec src${j} = {
|
||||
% for k in range(op.input_sizes[j]):
|
||||
% if input_types[j] == "bool32":
|
||||
_src[${j}].u32[${k}] != 0,
|
||||
% else:
|
||||
${op.input_types[j]} src${j} = _src[${j}].${op.input_types[j][:1]}[_i];
|
||||
_src[${j}].${get_const_field(input_types[j])}[${k}],
|
||||
% endif
|
||||
% endfor
|
||||
};
|
||||
% endfor
|
||||
|
||||
% if op.output_size == 0:
|
||||
## For per-component instructions, we need to iterate over the
|
||||
## components and apply the constant expression one component
|
||||
## at a time.
|
||||
for (unsigned _i = 0; _i < num_components; _i++) {
|
||||
## For each per-component input, create a variable srcN that
|
||||
## contains the value of the current (_i'th) component.
|
||||
% for j in range(op.num_inputs):
|
||||
% if op.input_sizes[j] != 0:
|
||||
<% continue %>
|
||||
% elif "src" + str(j) not in op.const_expr:
|
||||
## Avoid unused variable warnings
|
||||
<% continue %>
|
||||
% elif input_types[j] == "bool32":
|
||||
bool src${j} = _src[${j}].u32[_i] != 0;
|
||||
% else:
|
||||
${input_types[j]}_t src${j} =
|
||||
_src[${j}].${get_const_field(input_types[j])}[_i];
|
||||
% endif
|
||||
% endfor
|
||||
|
||||
## Create an appropriately-typed variable dst and assign the
|
||||
## result of the const_expr to it. If const_expr already contains
|
||||
## writes to dst, just include const_expr directly.
|
||||
% if "dst" in op.const_expr:
|
||||
${output_type}_t dst;
|
||||
${op.const_expr}
|
||||
% else:
|
||||
${output_type}_t dst = ${op.const_expr};
|
||||
% endif
|
||||
|
||||
## Store the current component of the actual destination to the
|
||||
## value of dst.
|
||||
% if output_type == "bool32":
|
||||
## Sanitize the C value to a proper NIR bool
|
||||
_dst_val.u32[_i] = dst ? NIR_TRUE : NIR_FALSE;
|
||||
% else:
|
||||
_dst_val.${get_const_field(output_type)}[_i] = dst;
|
||||
% endif
|
||||
}
|
||||
% else:
|
||||
## In the non-per-component case, create a struct dst with
|
||||
## appropriately-typed elements x, y, z, and w and assign the result
|
||||
## of the const_expr to all components of dst, or include the
|
||||
## const_expr directly if it writes to dst already.
|
||||
struct ${output_type}_vec dst;
|
||||
|
||||
## Create an appropriately-typed variable dst and assign the
|
||||
## result of the const_expr to it. If const_expr already contains
|
||||
## writes to dst, just include const_expr directly.
|
||||
% if "dst" in op.const_expr:
|
||||
${op.output_type} dst;
|
||||
${op.const_expr}
|
||||
% else:
|
||||
${op.output_type} dst = ${op.const_expr};
|
||||
## Splat the value to all components. This way expressions which
|
||||
## write the same value to all components don't need to explicitly
|
||||
## write to dest. One such example is fnoise which has a
|
||||
## const_expr of 0.0f.
|
||||
dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
|
||||
% endif
|
||||
|
||||
## Store the current component of the actual destination to the
|
||||
## value of dst.
|
||||
% if op.output_type == "bool":
|
||||
## Sanitize the C value to a proper NIR bool
|
||||
_dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE;
|
||||
% else:
|
||||
_dst_val.${op.output_type[:1]}[_i] = dst;
|
||||
% endif
|
||||
}
|
||||
% else:
|
||||
## In the non-per-component case, create a struct dst with
|
||||
## appropriately-typed elements x, y, z, and w and assign the result
|
||||
## of the const_expr to all components of dst, or include the
|
||||
## const_expr directly if it writes to dst already.
|
||||
struct ${op.output_type}_vec dst;
|
||||
|
||||
% if "dst" in op.const_expr:
|
||||
${op.const_expr}
|
||||
% else:
|
||||
## Splat the value to all components. This way expressions which
|
||||
## write the same value to all components don't need to explicitly
|
||||
## write to dest. One such example is fnoise which has a
|
||||
## const_expr of 0.0f.
|
||||
dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
|
||||
## For each component in the destination, copy the value of dst to
|
||||
## the actual destination.
|
||||
% for k in range(op.output_size):
|
||||
% if output_type == "bool32":
|
||||
## Sanitize the C value to a proper NIR bool
|
||||
_dst_val.u32[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
|
||||
% else:
|
||||
_dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]};
|
||||
% endif
|
||||
% endfor
|
||||
% endif
|
||||
|
||||
## For each component in the destination, copy the value of dst to
|
||||
## the actual destination.
|
||||
% for k in range(op.output_size):
|
||||
% if op.output_type == "bool":
|
||||
## Sanitize the C value to a proper NIR bool
|
||||
_dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
|
||||
% else:
|
||||
_dst_val.${op.output_type[:1]}[${k}] = dst.${"xyzw"[k]};
|
||||
% endif
|
||||
% endfor
|
||||
% endif
|
||||
break;
|
||||
}
|
||||
% endfor
|
||||
|
||||
default:
|
||||
unreachable("unknown bit width");
|
||||
}
|
||||
|
||||
return _dst_val;
|
||||
}
|
||||
@@ -316,12 +385,12 @@ evaluate_${name}(unsigned num_components, nir_const_value *_src)
|
||||
|
||||
nir_const_value
|
||||
nir_eval_const_opcode(nir_op op, unsigned num_components,
|
||||
nir_const_value *src)
|
||||
unsigned bit_width, nir_const_value *src)
|
||||
{
|
||||
switch (op) {
|
||||
% for name in sorted(opcodes.iterkeys()):
|
||||
case nir_op_${name}: {
|
||||
return evaluate_${name}(num_components, src);
|
||||
return evaluate_${name}(num_components, bit_width, src);
|
||||
break;
|
||||
}
|
||||
% endfor
|
||||
@@ -333,4 +402,7 @@ nir_eval_const_opcode(nir_op op, unsigned num_components,
|
||||
from nir_opcodes import opcodes
|
||||
from mako.template import Template
|
||||
|
||||
print Template(template).render(opcodes=opcodes)
|
||||
print Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
|
||||
type_has_size=type_has_size,
|
||||
type_add_size=type_add_size,
|
||||
get_const_field=get_const_field)
|
||||
|
||||
@@ -342,7 +342,8 @@ isolate_phi_nodes_block(nir_block *block, void *void_state)
|
||||
nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx,
|
||||
nir_parallel_copy_entry);
|
||||
nir_ssa_dest_init(&pcopy->instr, &entry->dest,
|
||||
phi->dest.ssa.num_components, src->src.ssa->name);
|
||||
phi->dest.ssa.num_components,
|
||||
phi->dest.ssa.bit_size, src->src.ssa->name);
|
||||
exec_list_push_tail(&pcopy->entries, &entry->node);
|
||||
|
||||
assert(src->src.is_ssa);
|
||||
@@ -355,7 +356,8 @@ isolate_phi_nodes_block(nir_block *block, void *void_state)
|
||||
nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx,
|
||||
nir_parallel_copy_entry);
|
||||
nir_ssa_dest_init(&block_pcopy->instr, &entry->dest,
|
||||
phi->dest.ssa.num_components, phi->dest.ssa.name);
|
||||
phi->dest.ssa.num_components, phi->dest.ssa.bit_size,
|
||||
phi->dest.ssa.name);
|
||||
exec_list_push_tail(&block_pcopy->entries, &entry->node);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&phi->dest.ssa,
|
||||
|
||||
@@ -77,13 +77,13 @@ nir_gs_count_vertices(const nir_shader *shader)
|
||||
return -1;
|
||||
|
||||
if (count == -1)
|
||||
count = val->i[0];
|
||||
count = val->i32[0];
|
||||
|
||||
/* We've found contradictory set_vertex_count intrinsics.
|
||||
* This can happen if there are early-returns in main() and
|
||||
* different paths emit different numbers of vertices.
|
||||
*/
|
||||
if (count != val->i[0])
|
||||
if (count != val->i32[0])
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,6 +52,7 @@ hash_alu(uint32_t hash, const nir_alu_instr *instr)
|
||||
{
|
||||
hash = HASH(hash, instr->op);
|
||||
hash = HASH(hash, instr->dest.dest.ssa.num_components);
|
||||
/* We explicitly don't hash instr->dest.dest.exact */
|
||||
|
||||
if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
|
||||
assert(nir_op_infos[instr->op].num_inputs == 2);
|
||||
@@ -81,9 +82,9 @@ hash_load_const(uint32_t hash, const nir_load_const_instr *instr)
|
||||
{
|
||||
hash = HASH(hash, instr->def.num_components);
|
||||
|
||||
hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f,
|
||||
hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f32,
|
||||
instr->def.num_components
|
||||
* sizeof(instr->value.f[0]));
|
||||
* sizeof(instr->value.f32[0]));
|
||||
|
||||
return hash;
|
||||
}
|
||||
@@ -267,6 +268,8 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
|
||||
if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
|
||||
return false;
|
||||
|
||||
/* We explicitly don't hash instr->dest.dest.exact */
|
||||
|
||||
if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
|
||||
assert(nir_op_infos[alu1->op].num_inputs == 2);
|
||||
return (nir_alu_srcs_equal(alu1, alu2, 0, 0) &&
|
||||
@@ -322,8 +325,8 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
|
||||
if (load1->def.num_components != load2->def.num_components)
|
||||
return false;
|
||||
|
||||
return memcmp(load1->value.f, load2->value.f,
|
||||
load1->def.num_components * sizeof(*load2->value.f)) == 0;
|
||||
return memcmp(load1->value.f32, load2->value.f32,
|
||||
load1->def.num_components * sizeof(*load2->value.f32)) == 0;
|
||||
}
|
||||
case nir_instr_type_phi: {
|
||||
nir_phi_instr *phi1 = nir_instr_as_phi(instr1);
|
||||
@@ -496,8 +499,17 @@ nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr)
|
||||
struct set_entry *entry = _mesa_set_search(instr_set, instr);
|
||||
if (entry) {
|
||||
nir_ssa_def *def = nir_instr_get_dest_ssa_def(instr);
|
||||
nir_ssa_def *new_def =
|
||||
nir_instr_get_dest_ssa_def((nir_instr *) entry->key);
|
||||
nir_instr *match = (nir_instr *) entry->key;
|
||||
nir_ssa_def *new_def = nir_instr_get_dest_ssa_def(match);
|
||||
|
||||
/* It's safe to replace a exact instruction with an inexact one as
|
||||
* long as we make it exact. If we got here, the two instructions are
|
||||
* exactly identical in every other way so, once we've set the exact
|
||||
* bit, they are the same.
|
||||
*/
|
||||
if (instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->exact)
|
||||
nir_instr_as_alu(match)->exact = true;
|
||||
|
||||
nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def));
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -31,9 +31,11 @@
|
||||
*/
|
||||
|
||||
static void
|
||||
nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components)
|
||||
nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components,
|
||||
unsigned bit_size)
|
||||
{
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
|
||||
bit_size, NULL);
|
||||
instr->dest.write_mask = (1 << num_components) - 1;
|
||||
}
|
||||
|
||||
@@ -46,7 +48,7 @@ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
|
||||
nir_ssa_def *last = NULL;
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
nir_alu_instr *chan = nir_alu_instr_create(builder->shader, chan_op);
|
||||
nir_alu_ssa_dest_init(chan, 1);
|
||||
nir_alu_ssa_dest_init(chan, 1, instr->dest.dest.ssa.bit_size);
|
||||
nir_alu_src_copy(&chan->src[0], &instr->src[0], chan);
|
||||
chan->src[0].swizzle[0] = chan->src[0].swizzle[i];
|
||||
if (nir_op_infos[chan_op].num_inputs > 1) {
|
||||
@@ -80,6 +82,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
|
||||
assert(instr->dest.write_mask != 0);
|
||||
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
b->exact = instr->exact;
|
||||
|
||||
#define LOWER_REDUCTION(name, chan, merge) \
|
||||
case name##2: \
|
||||
@@ -220,7 +223,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
|
||||
lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan];
|
||||
}
|
||||
|
||||
nir_alu_ssa_dest_init(lower, 1);
|
||||
nir_alu_ssa_dest_init(lower, 1, instr->dest.dest.ssa.bit_size);
|
||||
lower->dest.saturate = instr->dest.saturate;
|
||||
comps[chan] = &lower->dest.dest.ssa;
|
||||
|
||||
|
||||
@@ -75,7 +75,7 @@ lower_instr(nir_intrinsic_instr *instr,
|
||||
state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index);
|
||||
|
||||
nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
|
||||
offset_const->value.u[0] = instr->variables[0]->var->data.offset;
|
||||
offset_const->value.u32[0] = instr->variables[0]->var->data.offset;
|
||||
|
||||
nir_instr_insert_before(&instr->instr, &offset_const->instr);
|
||||
|
||||
@@ -90,17 +90,17 @@ lower_instr(nir_intrinsic_instr *instr,
|
||||
unsigned child_array_elements = tail->child != NULL ?
|
||||
glsl_get_aoa_size(tail->type) : 1;
|
||||
|
||||
offset_const->value.u[0] += deref_array->base_offset *
|
||||
offset_const->value.u32[0] += deref_array->base_offset *
|
||||
child_array_elements * ATOMIC_COUNTER_SIZE;
|
||||
|
||||
if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
|
||||
nir_load_const_instr *atomic_counter_size =
|
||||
nir_load_const_instr_create(mem_ctx, 1);
|
||||
atomic_counter_size->value.u[0] = child_array_elements * ATOMIC_COUNTER_SIZE;
|
||||
atomic_counter_size->value.u32[0] = child_array_elements * ATOMIC_COUNTER_SIZE;
|
||||
nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr);
|
||||
|
||||
nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul);
|
||||
nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
|
||||
nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, 32, NULL);
|
||||
mul->dest.write_mask = 0x1;
|
||||
nir_src_copy(&mul->src[0].src, &deref_array->indirect, mul);
|
||||
mul->src[1].src.is_ssa = true;
|
||||
@@ -108,7 +108,7 @@ lower_instr(nir_intrinsic_instr *instr,
|
||||
nir_instr_insert_before(&instr->instr, &mul->instr);
|
||||
|
||||
nir_alu_instr *add = nir_alu_instr_create(mem_ctx, nir_op_iadd);
|
||||
nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
|
||||
nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, 32, NULL);
|
||||
add->dest.write_mask = 0x1;
|
||||
add->src[0].src.is_ssa = true;
|
||||
add->src[0].src.ssa = &mul->dest.dest.ssa;
|
||||
@@ -125,7 +125,7 @@ lower_instr(nir_intrinsic_instr *instr,
|
||||
|
||||
if (instr->dest.is_ssa) {
|
||||
nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
|
||||
instr->dest.ssa.num_components, NULL);
|
||||
instr->dest.ssa.num_components, 32, NULL);
|
||||
nir_ssa_def_rewrite_uses(&instr->dest.ssa,
|
||||
nir_src_for_ssa(&new_instr->dest.ssa));
|
||||
} else {
|
||||
|
||||
@@ -88,7 +88,7 @@ load_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val)
|
||||
load->num_components = 4;
|
||||
nir_intrinsic_set_base(load, in->data.driver_location);
|
||||
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
|
||||
nir_builder_instr_insert(b, &load->instr);
|
||||
|
||||
val[0] = nir_channel(b, &load->dest.ssa, 0);
|
||||
|
||||
@@ -75,8 +75,9 @@ emit_indirect_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr,
|
||||
if (src == NULL) {
|
||||
/* We're a load. We need to insert a phi node */
|
||||
nir_phi_instr *phi = nir_phi_instr_create(b->shader);
|
||||
unsigned bit_size = then_dest->bit_size;
|
||||
nir_ssa_dest_init(&phi->instr, &phi->dest,
|
||||
then_dest->num_components, NULL);
|
||||
then_dest->num_components, bit_size, NULL);
|
||||
|
||||
nir_phi_src *src0 = ralloc(phi, nir_phi_src);
|
||||
src0->pred = nir_cf_node_as_block(nir_if_last_then_node(if_stmt));
|
||||
@@ -125,8 +126,9 @@ emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr,
|
||||
load->num_components = orig_instr->num_components;
|
||||
load->variables[0] =
|
||||
nir_deref_as_var(nir_copy_deref(load, &deref->deref));
|
||||
unsigned bit_size = orig_instr->dest.ssa.bit_size;
|
||||
nir_ssa_dest_init(&load->instr, &load->dest,
|
||||
load->num_components, NULL);
|
||||
load->num_components, bit_size, NULL);
|
||||
nir_builder_instr_insert(b, &load->instr);
|
||||
*dest = &load->dest.ssa;
|
||||
} else {
|
||||
|
||||
@@ -289,7 +289,8 @@ nir_lower_io_block(nir_block *block, void *void_state)
|
||||
|
||||
if (intrin->dest.is_ssa) {
|
||||
nir_ssa_dest_init(&load->instr, &load->dest,
|
||||
intrin->num_components, NULL);
|
||||
intrin->num_components,
|
||||
intrin->dest.ssa.bit_size, NULL);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
|
||||
nir_src_for_ssa(&load->dest.ssa));
|
||||
} else {
|
||||
@@ -369,7 +370,8 @@ nir_lower_io_block(nir_block *block, void *void_state)
|
||||
|
||||
if (intrin->dest.is_ssa) {
|
||||
nir_ssa_dest_init(&atomic->instr, &atomic->dest,
|
||||
intrin->dest.ssa.num_components, NULL);
|
||||
intrin->dest.ssa.num_components,
|
||||
intrin->dest.ssa.bit_size, NULL);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
|
||||
nir_src_for_ssa(&atomic->dest.ssa));
|
||||
} else {
|
||||
|
||||
@@ -49,7 +49,7 @@ lower_load_const_instr_scalar(nir_load_const_instr *lower)
|
||||
nir_ssa_def *loads[4];
|
||||
for (unsigned i = 0; i < lower->def.num_components; i++) {
|
||||
nir_load_const_instr *load_comp = nir_load_const_instr_create(b.shader, 1);
|
||||
load_comp->value.u[0] = lower->value.u[i];
|
||||
load_comp->value.u32[0] = lower->value.u32[i];
|
||||
nir_builder_instr_insert(&b, &load_comp->instr);
|
||||
loads[i] = &load_comp->def;
|
||||
}
|
||||
|
||||
@@ -161,7 +161,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
|
||||
if (src.reg.indirect) {
|
||||
nir_load_const_instr *load_const =
|
||||
nir_load_const_instr_create(state->shader, 1);
|
||||
load_const->value.u[0] = glsl_get_length(parent_type);
|
||||
load_const->value.u32[0] = glsl_get_length(parent_type);
|
||||
nir_instr_insert_before(instr, &load_const->instr);
|
||||
|
||||
nir_alu_instr *mul = nir_alu_instr_create(state->shader, nir_op_imul);
|
||||
@@ -169,7 +169,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
|
||||
mul->src[1].src.is_ssa = true;
|
||||
mul->src[1].src.ssa = &load_const->def;
|
||||
mul->dest.write_mask = 1;
|
||||
nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
|
||||
nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, 32, NULL);
|
||||
nir_instr_insert_before(instr, &mul->instr);
|
||||
|
||||
src.reg.indirect->is_ssa = true;
|
||||
@@ -187,7 +187,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
|
||||
add->src[0].src = *src.reg.indirect;
|
||||
nir_src_copy(&add->src[1].src, &deref_array->indirect, add);
|
||||
add->dest.write_mask = 1;
|
||||
nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
|
||||
nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, 32, NULL);
|
||||
nir_instr_insert_before(instr, &add->instr);
|
||||
|
||||
src.reg.indirect->is_ssa = true;
|
||||
@@ -221,7 +221,8 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
|
||||
mov->dest.write_mask = (1 << intrin->num_components) - 1;
|
||||
if (intrin->dest.is_ssa) {
|
||||
nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
|
||||
intrin->num_components, NULL);
|
||||
intrin->num_components,
|
||||
intrin->dest.ssa.bit_size, NULL);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
|
||||
nir_src_for_ssa(&mov->dest.dest.ssa));
|
||||
} else {
|
||||
|
||||
@@ -188,6 +188,8 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
|
||||
if (!should_lower_phi(phi, state))
|
||||
continue;
|
||||
|
||||
unsigned bit_size = phi->dest.ssa.bit_size;
|
||||
|
||||
/* Create a vecN operation to combine the results. Most of these
|
||||
* will be redundant, but copy propagation should clean them up for
|
||||
* us. No need to add the complexity here.
|
||||
@@ -202,12 +204,14 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
|
||||
|
||||
nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, vec_op);
|
||||
nir_ssa_dest_init(&vec->instr, &vec->dest.dest,
|
||||
phi->dest.ssa.num_components, NULL);
|
||||
phi->dest.ssa.num_components,
|
||||
bit_size, NULL);
|
||||
vec->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
|
||||
|
||||
for (unsigned i = 0; i < phi->dest.ssa.num_components; i++) {
|
||||
nir_phi_instr *new_phi = nir_phi_instr_create(state->mem_ctx);
|
||||
nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1, NULL);
|
||||
nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1,
|
||||
phi->dest.ssa.bit_size, NULL);
|
||||
|
||||
vec->src[i].src = nir_src_for_ssa(&new_phi->dest.ssa);
|
||||
|
||||
@@ -215,7 +219,7 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
|
||||
/* We need to insert a mov to grab the i'th component of src */
|
||||
nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
|
||||
nir_op_imov);
|
||||
nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL);
|
||||
nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, bit_size, NULL);
|
||||
mov->dest.write_mask = 1;
|
||||
nir_src_copy(&mov->src[0].src, &src->src, state->mem_ctx);
|
||||
mov->src[0].swizzle[0] = i;
|
||||
|
||||
@@ -65,9 +65,9 @@ convert_block(nir_block *block, void *void_state)
|
||||
*/
|
||||
|
||||
nir_const_value local_size;
|
||||
local_size.u[0] = b->shader->info.cs.local_size[0];
|
||||
local_size.u[1] = b->shader->info.cs.local_size[1];
|
||||
local_size.u[2] = b->shader->info.cs.local_size[2];
|
||||
local_size.u32[0] = b->shader->info.cs.local_size[0];
|
||||
local_size.u32[1] = b->shader->info.cs.local_size[1];
|
||||
local_size.u32[2] = b->shader->info.cs.local_size[2];
|
||||
|
||||
nir_ssa_def *group_id =
|
||||
nir_load_system_value(b, nir_intrinsic_load_work_group_id, 0);
|
||||
|
||||
@@ -140,7 +140,7 @@ get_texture_size(nir_builder *b, nir_tex_instr *tex)
|
||||
txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0));
|
||||
txs->src[0].src_type = nir_tex_src_lod;
|
||||
|
||||
nir_ssa_dest_init(&txs->instr, &txs->dest, 2, NULL);
|
||||
nir_ssa_dest_init(&txs->instr, &txs->dest, 2, 32, NULL);
|
||||
nir_builder_instr_insert(b, &txs->instr);
|
||||
|
||||
return nir_i2f(b, &txs->dest.ssa);
|
||||
@@ -223,13 +223,13 @@ get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
|
||||
memset(&v, 0, sizeof(v));
|
||||
|
||||
if (swizzle_val == 4) {
|
||||
v.u[0] = v.u[1] = v.u[2] = v.u[3] = 0;
|
||||
v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 0;
|
||||
} else {
|
||||
assert(swizzle_val == 5);
|
||||
if (type == nir_type_float)
|
||||
v.f[0] = v.f[1] = v.f[2] = v.f[3] = 1.0;
|
||||
v.f32[0] = v.f32[1] = v.f32[2] = v.f32[3] = 1.0;
|
||||
else
|
||||
v.u[0] = v.u[1] = v.u[2] = v.u[3] = 1;
|
||||
v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 1;
|
||||
}
|
||||
|
||||
return nir_build_imm(b, 4, v);
|
||||
|
||||
@@ -74,7 +74,7 @@ load_input(nir_builder *b, nir_variable *in)
|
||||
load->num_components = 4;
|
||||
nir_intrinsic_set_base(load, in->data.driver_location);
|
||||
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
|
||||
nir_builder_instr_insert(b, &load->instr);
|
||||
|
||||
return &load->dest.ssa;
|
||||
|
||||
@@ -116,12 +116,15 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
|
||||
assert(src_tail->type == dest_tail->type);
|
||||
|
||||
unsigned num_components = glsl_get_vector_elements(src_tail->type);
|
||||
unsigned bit_size =
|
||||
glsl_get_bit_size(glsl_get_base_type(src_tail->type));
|
||||
|
||||
nir_intrinsic_instr *load =
|
||||
nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var);
|
||||
load->num_components = num_components;
|
||||
load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src_head->deref));
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, num_components, bit_size,
|
||||
NULL);
|
||||
|
||||
nir_instr_insert_before(©_instr->instr, &load->instr);
|
||||
|
||||
|
||||
@@ -505,6 +505,7 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
|
||||
nir_ssa_undef_instr *undef =
|
||||
nir_ssa_undef_instr_create(state->shader,
|
||||
intrin->num_components);
|
||||
undef->def.bit_size = intrin->dest.ssa.bit_size;
|
||||
|
||||
nir_instr_insert_before(&intrin->instr, &undef->instr);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
@@ -528,7 +529,8 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
|
||||
|
||||
mov->dest.write_mask = (1 << intrin->num_components) - 1;
|
||||
nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
|
||||
intrin->num_components, NULL);
|
||||
intrin->num_components,
|
||||
intrin->dest.ssa.bit_size, NULL);
|
||||
|
||||
nir_instr_insert_before(&intrin->instr, &mov->instr);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
@@ -719,6 +721,7 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
|
||||
node->pb_value =
|
||||
nir_phi_builder_add_value(state.phi_builder,
|
||||
glsl_get_vector_elements(node->type),
|
||||
glsl_get_bit_size(glsl_get_base_type(node->type)),
|
||||
store_blocks);
|
||||
|
||||
if (node->deref->var->constant_initializer) {
|
||||
|
||||
@@ -90,8 +90,12 @@ class Opcode(object):
|
||||
# helper variables for strings
|
||||
tfloat = "float"
|
||||
tint = "int"
|
||||
tbool = "bool"
|
||||
tbool = "bool32"
|
||||
tuint = "uint"
|
||||
tfloat32 = "float32"
|
||||
tint32 = "int32"
|
||||
tuint32 = "uint32"
|
||||
tfloat64 = "float64"
|
||||
|
||||
commutative = "commutative "
|
||||
associative = "associative "
|
||||
@@ -155,57 +159,57 @@ unop("frsq", tfloat, "1.0f / sqrtf(src0)")
|
||||
unop("fsqrt", tfloat, "sqrtf(src0)")
|
||||
unop("fexp2", tfloat, "exp2f(src0)")
|
||||
unop("flog2", tfloat, "log2f(src0)")
|
||||
unop_convert("f2i", tint, tfloat, "src0") # Float-to-integer conversion.
|
||||
unop_convert("f2u", tuint, tfloat, "src0") # Float-to-unsigned conversion
|
||||
unop_convert("i2f", tfloat, tint, "src0") # Integer-to-float conversion.
|
||||
unop_convert("f2i", tint32, tfloat32, "src0") # Float-to-integer conversion.
|
||||
unop_convert("f2u", tuint32, tfloat32, "src0") # Float-to-unsigned conversion
|
||||
unop_convert("i2f", tfloat32, tint32, "src0") # Integer-to-float conversion.
|
||||
# Float-to-boolean conversion
|
||||
unop_convert("f2b", tbool, tfloat, "src0 != 0.0f")
|
||||
unop_convert("f2b", tbool, tfloat32, "src0 != 0.0f")
|
||||
# Boolean-to-float conversion
|
||||
unop_convert("b2f", tfloat, tbool, "src0 ? 1.0f : 0.0f")
|
||||
unop_convert("b2f", tfloat32, tbool, "src0 ? 1.0f : 0.0f")
|
||||
# Int-to-boolean conversion
|
||||
unop_convert("i2b", tbool, tint, "src0 != 0")
|
||||
unop_convert("b2i", tint, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
|
||||
unop_convert("u2f", tfloat, tuint, "src0") # Unsigned-to-float conversion.
|
||||
unop_convert("i2b", tbool, tint32, "src0 != 0")
|
||||
unop_convert("b2i", tint32, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
|
||||
unop_convert("u2f", tfloat32, tuint32, "src0") # Unsigned-to-float conversion.
|
||||
|
||||
# Unary floating-point rounding operations.
|
||||
|
||||
|
||||
unop("ftrunc", tfloat, "truncf(src0)")
|
||||
unop("fceil", tfloat, "ceilf(src0)")
|
||||
unop("ffloor", tfloat, "floorf(src0)")
|
||||
unop("ffract", tfloat, "src0 - floorf(src0)")
|
||||
unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
|
||||
unop("ftrunc", tfloat, "bit_size == 64 ? trunc(src0) : truncf(src0)")
|
||||
unop("fceil", tfloat, "bit_size == 64 ? ceil(src0) : ceilf(src0)")
|
||||
unop("ffloor", tfloat, "bit_size == 64 ? floor(src0) : floorf(src0)")
|
||||
unop("ffract", tfloat, "src0 - (bit_size == 64 ? floor(src0) : floorf(src0))")
|
||||
unop("fround_even", tfloat, "bit_size == 64 ? _mesa_roundeven(src0) : _mesa_roundevenf(src0)")
|
||||
|
||||
unop("fquantize2f16", tfloat, "(fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0))")
|
||||
|
||||
# Trigonometric operations.
|
||||
|
||||
|
||||
unop("fsin", tfloat, "sinf(src0)")
|
||||
unop("fcos", tfloat, "cosf(src0)")
|
||||
unop("fsin", tfloat, "bit_size == 64 ? sin(src0) : sinf(src0)")
|
||||
unop("fcos", tfloat, "bit_size == 64 ? cos(src0) : cosf(src0)")
|
||||
|
||||
|
||||
# Partial derivatives.
|
||||
|
||||
|
||||
unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0.
|
||||
unop("fddy", tfloat, "0.0f")
|
||||
unop("fddx_fine", tfloat, "0.0f")
|
||||
unop("fddy_fine", tfloat, "0.0f")
|
||||
unop("fddx_coarse", tfloat, "0.0f")
|
||||
unop("fddy_coarse", tfloat, "0.0f")
|
||||
unop("fddx", tfloat, "0.0") # the derivative of a constant is 0.
|
||||
unop("fddy", tfloat, "0.0")
|
||||
unop("fddx_fine", tfloat, "0.0")
|
||||
unop("fddy_fine", tfloat, "0.0")
|
||||
unop("fddx_coarse", tfloat, "0.0")
|
||||
unop("fddy_coarse", tfloat, "0.0")
|
||||
|
||||
|
||||
# Floating point pack and unpack operations.
|
||||
|
||||
def pack_2x16(fmt):
|
||||
unop_horiz("pack_" + fmt + "_2x16", 1, tuint, 2, tfloat, """
|
||||
unop_horiz("pack_" + fmt + "_2x16", 1, tuint32, 2, tfloat32, """
|
||||
dst.x = (uint32_t) pack_fmt_1x16(src0.x);
|
||||
dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16;
|
||||
""".replace("fmt", fmt))
|
||||
|
||||
def pack_4x8(fmt):
|
||||
unop_horiz("pack_" + fmt + "_4x8", 1, tuint, 4, tfloat, """
|
||||
unop_horiz("pack_" + fmt + "_4x8", 1, tuint32, 4, tfloat32, """
|
||||
dst.x = (uint32_t) pack_fmt_1x8(src0.x);
|
||||
dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8;
|
||||
dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16;
|
||||
@@ -213,13 +217,13 @@ dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24;
|
||||
""".replace("fmt", fmt))
|
||||
|
||||
def unpack_2x16(fmt):
|
||||
unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tuint, """
|
||||
unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat32, 1, tuint32, """
|
||||
dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff));
|
||||
dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16));
|
||||
""".replace("fmt", fmt))
|
||||
|
||||
def unpack_4x8(fmt):
|
||||
unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tuint, """
|
||||
unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat32, 1, tuint32, """
|
||||
dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff));
|
||||
dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff));
|
||||
dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff));
|
||||
@@ -238,11 +242,11 @@ unpack_2x16("unorm")
|
||||
unpack_4x8("unorm")
|
||||
unpack_2x16("half")
|
||||
|
||||
unop_horiz("pack_uvec2_to_uint", 1, tuint, 2, tuint, """
|
||||
unop_horiz("pack_uvec2_to_uint", 1, tuint32, 2, tuint32, """
|
||||
dst.x = (src0.x & 0xffff) | (src0.y >> 16);
|
||||
""")
|
||||
|
||||
unop_horiz("pack_uvec4_to_uint", 1, tuint, 4, tuint, """
|
||||
unop_horiz("pack_uvec4_to_uint", 1, tuint32, 4, tuint32, """
|
||||
dst.x = (src0.x << 0) |
|
||||
(src0.y << 8) |
|
||||
(src0.z << 16) |
|
||||
@@ -252,22 +256,22 @@ dst.x = (src0.x << 0) |
|
||||
# Lowered floating point unpacking operations.
|
||||
|
||||
|
||||
unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tuint,
|
||||
unop_horiz("unpack_half_2x16_split_x", 1, tfloat32, 1, tuint32,
|
||||
"unpack_half_1x16((uint16_t)(src0.x & 0xffff))")
|
||||
unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tuint,
|
||||
unop_horiz("unpack_half_2x16_split_y", 1, tfloat32, 1, tuint32,
|
||||
"unpack_half_1x16((uint16_t)(src0.x >> 16))")
|
||||
|
||||
|
||||
# Bit operations, part of ARB_gpu_shader5.
|
||||
|
||||
|
||||
unop("bitfield_reverse", tuint, """
|
||||
unop("bitfield_reverse", tuint32, """
|
||||
/* we're not winning any awards for speed here, but that's ok */
|
||||
dst = 0;
|
||||
for (unsigned bit = 0; bit < 32; bit++)
|
||||
dst |= ((src0 >> bit) & 1) << (31 - bit);
|
||||
""")
|
||||
unop("bit_count", tuint, """
|
||||
unop("bit_count", tuint32, """
|
||||
dst = 0;
|
||||
for (unsigned bit = 0; bit < 32; bit++) {
|
||||
if ((src0 >> bit) & 1)
|
||||
@@ -275,7 +279,7 @@ for (unsigned bit = 0; bit < 32; bit++) {
|
||||
}
|
||||
""")
|
||||
|
||||
unop_convert("ufind_msb", tint, tuint, """
|
||||
unop_convert("ufind_msb", tint32, tuint32, """
|
||||
dst = -1;
|
||||
for (int bit = 31; bit > 0; bit--) {
|
||||
if ((src0 >> bit) & 1) {
|
||||
@@ -285,7 +289,7 @@ for (int bit = 31; bit > 0; bit--) {
|
||||
}
|
||||
""")
|
||||
|
||||
unop("ifind_msb", tint, """
|
||||
unop("ifind_msb", tint32, """
|
||||
dst = -1;
|
||||
for (int bit = 31; bit >= 0; bit--) {
|
||||
/* If src0 < 0, we're looking for the first 0 bit.
|
||||
@@ -299,7 +303,7 @@ for (int bit = 31; bit >= 0; bit--) {
|
||||
}
|
||||
""")
|
||||
|
||||
unop("find_lsb", tint, """
|
||||
unop("find_lsb", tint32, """
|
||||
dst = -1;
|
||||
for (unsigned bit = 0; bit < 32; bit++) {
|
||||
if ((src0 >> bit) & 1) {
|
||||
@@ -359,10 +363,10 @@ binop("fmul", tfloat, commutative + associative, "src0 * src1")
|
||||
# low 32-bits of signed/unsigned integer multiply
|
||||
binop("imul", tint, commutative + associative, "src0 * src1")
|
||||
# high 32-bits of signed integer multiply
|
||||
binop("imul_high", tint, commutative,
|
||||
binop("imul_high", tint32, commutative,
|
||||
"(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)")
|
||||
# high 32-bits of unsigned integer multiply
|
||||
binop("umul_high", tuint, commutative,
|
||||
binop("umul_high", tuint32, commutative,
|
||||
"(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
|
||||
|
||||
binop("fdiv", tfloat, "", "src0 / src1")
|
||||
@@ -427,18 +431,18 @@ binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
|
||||
|
||||
# non-integer-aware GLSL-style comparisons that return 0.0 or 1.0
|
||||
|
||||
binop_reduce("fall_equal", 1, tfloat, tfloat, "{src0} == {src1}",
|
||||
binop_reduce("fall_equal", 1, tfloat32, tfloat32, "{src0} == {src1}",
|
||||
"{src0} && {src1}", "{src} ? 1.0f : 0.0f")
|
||||
binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}",
|
||||
binop_reduce("fany_nequal", 1, tfloat32, tfloat32, "{src0} != {src1}",
|
||||
"{src0} || {src1}", "{src} ? 1.0f : 0.0f")
|
||||
|
||||
# These comparisons for integer-less hardware return 1.0 and 0.0 for true
|
||||
# and false respectively
|
||||
|
||||
binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
|
||||
binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
|
||||
binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
|
||||
binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
|
||||
binop("slt", tfloat32, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
|
||||
binop("sge", tfloat32, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
|
||||
binop("seq", tfloat32, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
|
||||
binop("sne", tfloat32, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
|
||||
|
||||
|
||||
binop("ishl", tint, "", "src0 << src1")
|
||||
@@ -461,11 +465,11 @@ binop("ixor", tuint, commutative + associative, "src0 ^ src1")
|
||||
# These use (src != 0.0) for testing the truth of the input, and output 1.0
|
||||
# for true and 0.0 for false
|
||||
|
||||
binop("fand", tfloat, commutative,
|
||||
binop("fand", tfloat32, commutative,
|
||||
"((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f")
|
||||
binop("for", tfloat, commutative,
|
||||
binop("for", tfloat32, commutative,
|
||||
"((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f")
|
||||
binop("fxor", tfloat, commutative,
|
||||
binop("fxor", tfloat32, commutative,
|
||||
"(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f")
|
||||
|
||||
binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
|
||||
@@ -487,7 +491,7 @@ binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
|
||||
binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0")
|
||||
|
||||
# Saturated vector add for 4 8bit ints.
|
||||
binop("usadd_4x8", tint, commutative + associative, """
|
||||
binop("usadd_4x8", tint32, commutative + associative, """
|
||||
dst = 0;
|
||||
for (int i = 0; i < 32; i += 8) {
|
||||
dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i;
|
||||
@@ -495,7 +499,7 @@ for (int i = 0; i < 32; i += 8) {
|
||||
""")
|
||||
|
||||
# Saturated vector subtract for 4 8bit ints.
|
||||
binop("ussub_4x8", tint, "", """
|
||||
binop("ussub_4x8", tint32, "", """
|
||||
dst = 0;
|
||||
for (int i = 0; i < 32; i += 8) {
|
||||
int src0_chan = (src0 >> i) & 0xff;
|
||||
@@ -506,7 +510,7 @@ for (int i = 0; i < 32; i += 8) {
|
||||
""")
|
||||
|
||||
# vector min for 4 8bit ints.
|
||||
binop("umin_4x8", tint, commutative + associative, """
|
||||
binop("umin_4x8", tint32, commutative + associative, """
|
||||
dst = 0;
|
||||
for (int i = 0; i < 32; i += 8) {
|
||||
dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
|
||||
@@ -514,7 +518,7 @@ for (int i = 0; i < 32; i += 8) {
|
||||
""")
|
||||
|
||||
# vector max for 4 8bit ints.
|
||||
binop("umax_4x8", tint, commutative + associative, """
|
||||
binop("umax_4x8", tint32, commutative + associative, """
|
||||
dst = 0;
|
||||
for (int i = 0; i < 32; i += 8) {
|
||||
dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
|
||||
@@ -522,7 +526,7 @@ for (int i = 0; i < 32; i += 8) {
|
||||
""")
|
||||
|
||||
# unorm multiply: (a * b) / 255.
|
||||
binop("umul_unorm_4x8", tint, commutative + associative, """
|
||||
binop("umul_unorm_4x8", tint32, commutative + associative, """
|
||||
dst = 0;
|
||||
for (int i = 0; i < 32; i += 8) {
|
||||
int src0_chan = (src0 >> i) & 0xff;
|
||||
@@ -531,15 +535,15 @@ for (int i = 0; i < 32; i += 8) {
|
||||
}
|
||||
""")
|
||||
|
||||
binop("fpow", tfloat, "", "powf(src0, src1)")
|
||||
binop("fpow", tfloat, "", "bit_size == 64 ? powf(src0, src1) : pow(src0, src1)")
|
||||
|
||||
binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat,
|
||||
binop_horiz("pack_half_2x16_split", 1, tuint32, 1, tfloat32, 1, tfloat32,
|
||||
"pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
|
||||
|
||||
# bfm implements the behavior of the first operation of the SM5 "bfi" assembly
|
||||
# and that of the "bfi1" i965 instruction. That is, it has undefined behavior
|
||||
# if either of its arguments are 32.
|
||||
binop_convert("bfm", tuint, tint, "", """
|
||||
binop_convert("bfm", tuint32, tint32, "", """
|
||||
int bits = src0, offset = src1;
|
||||
if (offset < 0 || bits < 0 || offset > 31 || bits > 31 || offset + bits > 32)
|
||||
dst = 0; /* undefined */
|
||||
@@ -548,7 +552,7 @@ else
|
||||
""")
|
||||
|
||||
opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """
|
||||
dst = ldexpf(src0, src1);
|
||||
dst = (bit_size == 64) ? ldexp(src0, src1) : ldexpf(src0, src1);
|
||||
/* flush denormals to zero. */
|
||||
if (!isnormal(dst))
|
||||
dst = copysignf(0.0f, src0);
|
||||
@@ -588,12 +592,12 @@ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
|
||||
# bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).
|
||||
|
||||
|
||||
triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2")
|
||||
triop("fcsel", tfloat32, "(src0 != 0.0f) ? src1 : src2")
|
||||
opcode("bcsel", 0, tuint, [0, 0, 0],
|
||||
[tbool, tuint, tuint], "", "src0 ? src1 : src2")
|
||||
|
||||
# SM5 bfi assembly
|
||||
triop("bfi", tuint, """
|
||||
triop("bfi", tuint32, """
|
||||
unsigned mask = src0, insert = src1, base = src2;
|
||||
if (mask == 0) {
|
||||
dst = base;
|
||||
@@ -608,8 +612,8 @@ if (mask == 0) {
|
||||
""")
|
||||
|
||||
# SM5 ubfe/ibfe assembly
|
||||
opcode("ubfe", 0, tuint,
|
||||
[0, 0, 0], [tuint, tint, tint], "", """
|
||||
opcode("ubfe", 0, tuint32,
|
||||
[0, 0, 0], [tuint32, tint32, tint32], "", """
|
||||
unsigned base = src0;
|
||||
int offset = src1, bits = src2;
|
||||
if (bits == 0) {
|
||||
@@ -622,8 +626,8 @@ if (bits == 0) {
|
||||
dst = base >> offset;
|
||||
}
|
||||
""")
|
||||
opcode("ibfe", 0, tint,
|
||||
[0, 0, 0], [tint, tint, tint], "", """
|
||||
opcode("ibfe", 0, tint32,
|
||||
[0, 0, 0], [tint32, tint32, tint32], "", """
|
||||
int base = src0;
|
||||
int offset = src1, bits = src2;
|
||||
if (bits == 0) {
|
||||
@@ -638,8 +642,8 @@ if (bits == 0) {
|
||||
""")
|
||||
|
||||
# GLSL bitfieldExtract()
|
||||
opcode("ubitfield_extract", 0, tuint,
|
||||
[0, 0, 0], [tuint, tint, tint], "", """
|
||||
opcode("ubitfield_extract", 0, tuint32,
|
||||
[0, 0, 0], [tuint32, tint32, tint32], "", """
|
||||
unsigned base = src0;
|
||||
int offset = src1, bits = src2;
|
||||
if (bits == 0) {
|
||||
@@ -650,8 +654,8 @@ if (bits == 0) {
|
||||
dst = (base >> offset) & ((1ull << bits) - 1);
|
||||
}
|
||||
""")
|
||||
opcode("ibitfield_extract", 0, tint,
|
||||
[0, 0, 0], [tint, tint, tint], "", """
|
||||
opcode("ibitfield_extract", 0, tint32,
|
||||
[0, 0, 0], [tint32, tint32, tint32], "", """
|
||||
int base = src0;
|
||||
int offset = src1, bits = src2;
|
||||
if (bits == 0) {
|
||||
@@ -678,8 +682,8 @@ def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
|
||||
[tuint, tuint, tuint, tuint],
|
||||
"", const_expr)
|
||||
|
||||
opcode("bitfield_insert", 0, tuint, [0, 0, 0, 0],
|
||||
[tuint, tuint, tint, tint], "", """
|
||||
opcode("bitfield_insert", 0, tuint32, [0, 0, 0, 0],
|
||||
[tuint32, tuint32, tint32, tint32], "", """
|
||||
unsigned base = src0, insert = src1;
|
||||
int offset = src2, bits = src3;
|
||||
if (bits == 0) {
|
||||
|
||||
@@ -35,10 +35,17 @@ d = 'd'
|
||||
|
||||
# Written in the form (<search>, <replace>) where <search> is an expression
|
||||
# and <replace> is either an expression or a value. An expression is
|
||||
# defined as a tuple of the form (<op>, <src0>, <src1>, <src2>, <src3>)
|
||||
# defined as a tuple of the form ([~]<op>, <src0>, <src1>, <src2>, <src3>)
|
||||
# where each source is either an expression or a value. A value can be
|
||||
# either a numeric constant or a string representing a variable name.
|
||||
#
|
||||
# If the opcode in a search expression is prefixed by a '~' character, this
|
||||
# indicates that the operation is inexact. Such operations will only get
|
||||
# applied to SSA values that do not have the exact bit set. This should be
|
||||
# used by by any optimizations that are not bit-for-bit exact. It should not,
|
||||
# however, be used for backend-requested lowering operations as those need to
|
||||
# happen regardless of precision.
|
||||
#
|
||||
# Variable names are specified as "[#]name[@type]" where "#" inicates that
|
||||
# the given variable will only match constants and the type indicates that
|
||||
# the given variable will only match values from ALU instructions with the
|
||||
@@ -55,19 +62,19 @@ optimizations = [
|
||||
(('fabs', ('fneg', a)), ('fabs', a)),
|
||||
(('iabs', ('iabs', a)), ('iabs', a)),
|
||||
(('iabs', ('ineg', a)), ('iabs', a)),
|
||||
(('fadd', a, 0.0), a),
|
||||
(('~fadd', a, 0.0), a),
|
||||
(('iadd', a, 0), a),
|
||||
(('usadd_4x8', a, 0), a),
|
||||
(('usadd_4x8', a, ~0), ~0),
|
||||
(('fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
|
||||
(('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
|
||||
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
|
||||
(('fadd', ('fneg', a), a), 0.0),
|
||||
(('~fadd', ('fneg', a), a), 0.0),
|
||||
(('iadd', ('ineg', a), a), 0),
|
||||
(('iadd', ('ineg', a), ('iadd', a, b)), b),
|
||||
(('iadd', a, ('iadd', ('ineg', a), b)), b),
|
||||
(('fadd', ('fneg', a), ('fadd', a, b)), b),
|
||||
(('fadd', a, ('fadd', ('fneg', a), b)), b),
|
||||
(('fmul', a, 0.0), 0.0),
|
||||
(('~fadd', ('fneg', a), ('fadd', a, b)), b),
|
||||
(('~fadd', a, ('fadd', ('fneg', a), b)), b),
|
||||
(('~fmul', a, 0.0), 0.0),
|
||||
(('imul', a, 0), 0),
|
||||
(('umul_unorm_4x8', a, 0), 0),
|
||||
(('umul_unorm_4x8', a, ~0), a),
|
||||
@@ -76,32 +83,48 @@ optimizations = [
|
||||
(('fmul', a, -1.0), ('fneg', a)),
|
||||
(('imul', a, -1), ('ineg', a)),
|
||||
(('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
|
||||
(('ffma', 0.0, a, b), b),
|
||||
(('ffma', a, 0.0, b), b),
|
||||
(('ffma', a, b, 0.0), ('fmul', a, b)),
|
||||
(('~ffma', 0.0, a, b), b),
|
||||
(('~ffma', a, 0.0, b), b),
|
||||
(('~ffma', a, b, 0.0), ('fmul', a, b)),
|
||||
(('ffma', a, 1.0, b), ('fadd', a, b)),
|
||||
(('ffma', 1.0, a, b), ('fadd', a, b)),
|
||||
(('flrp', a, b, 0.0), a),
|
||||
(('flrp', a, b, 1.0), b),
|
||||
(('flrp', a, a, b), a),
|
||||
(('flrp', 0.0, a, b), ('fmul', a, b)),
|
||||
(('~flrp', a, b, 0.0), a),
|
||||
(('~flrp', a, b, 1.0), b),
|
||||
(('~flrp', a, a, b), a),
|
||||
(('~flrp', 0.0, a, b), ('fmul', a, b)),
|
||||
(('~flrp', a, b, ('b2f', c)), ('bcsel', c, b, a), 'options->lower_flrp'),
|
||||
(('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
|
||||
(('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
|
||||
(('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
|
||||
(('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
|
||||
(('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', c)))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp'),
|
||||
(('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c ))), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp'),
|
||||
(('~fadd', a, ('fmul', ('b2f', c), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp'),
|
||||
(('~fadd', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
|
||||
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
|
||||
(('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
|
||||
(('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
|
||||
# Comparison simplifications
|
||||
(('inot', ('flt', a, b)), ('fge', a, b)),
|
||||
(('inot', ('fge', a, b)), ('flt', a, b)),
|
||||
(('inot', ('feq', a, b)), ('fne', a, b)),
|
||||
(('inot', ('fne', a, b)), ('feq', a, b)),
|
||||
(('~inot', ('flt', a, b)), ('fge', a, b)),
|
||||
(('~inot', ('fge', a, b)), ('flt', a, b)),
|
||||
(('~inot', ('feq', a, b)), ('fne', a, b)),
|
||||
(('~inot', ('fne', a, b)), ('feq', a, b)),
|
||||
(('inot', ('ilt', a, b)), ('ige', a, b)),
|
||||
(('inot', ('ige', a, b)), ('ilt', a, b)),
|
||||
(('inot', ('ieq', a, b)), ('ine', a, b)),
|
||||
(('inot', ('ine', a, b)), ('ieq', a, b)),
|
||||
|
||||
# 0.0 >= b2f(a)
|
||||
# b2f(a) <= 0.0
|
||||
# b2f(a) == 0.0 because b2f(a) can only be 0 or 1
|
||||
# inot(a)
|
||||
(('fge', 0.0, ('b2f', a)), ('inot', a)),
|
||||
|
||||
# 0.0 < fabs(a)
|
||||
# fabs(a) > 0.0
|
||||
# fabs(a) != 0.0 because fabs(a) must be >= 0
|
||||
# a != 0.0
|
||||
(('flt', 0.0, ('fabs', a)), ('fne', a, 0.0)),
|
||||
|
||||
(('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
|
||||
(('bcsel', ('flt', a, b), a, b), ('fmin', a, b)),
|
||||
(('bcsel', ('flt', b, a), b, a), ('fmin', a, b)),
|
||||
(('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
|
||||
(('bcsel', ('inot', 'a@bool'), b, c), ('bcsel', a, c, b)),
|
||||
(('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)),
|
||||
@@ -111,15 +134,19 @@ optimizations = [
|
||||
(('imax', a, a), a),
|
||||
(('umin', a, a), a),
|
||||
(('umax', a, a), a),
|
||||
(('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
|
||||
(('fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
|
||||
(('~fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
|
||||
(('~fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
|
||||
(('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
|
||||
(('fsat', ('fsat', a)), ('fsat', a)),
|
||||
(('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
|
||||
(('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
|
||||
(('ior', ('flt', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)),
|
||||
(('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
|
||||
(('ior', ('fge', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)),
|
||||
(('~ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
|
||||
(('~ior', ('flt', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)),
|
||||
(('~ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
|
||||
(('~ior', ('fge', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)),
|
||||
(('fabs', ('slt', a, b)), ('slt', a, b)),
|
||||
(('fabs', ('sge', a, b)), ('sge', a, b)),
|
||||
(('fabs', ('seq', a, b)), ('seq', a, b)),
|
||||
(('fabs', ('sne', a, b)), ('sne', a, b)),
|
||||
(('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'),
|
||||
(('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'),
|
||||
(('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'),
|
||||
@@ -151,7 +178,6 @@ optimizations = [
|
||||
(('ior', a, 0), a),
|
||||
(('fxor', a, a), 0.0),
|
||||
(('ixor', a, a), 0),
|
||||
(('fxor', a, 0.0), a),
|
||||
(('ixor', a, 0), a),
|
||||
(('inot', ('inot', a)), a),
|
||||
# DeMorgan's Laws
|
||||
@@ -167,35 +193,35 @@ optimizations = [
|
||||
(('iand', 0xff, ('ushr', a, 24)), ('ushr', a, 24)),
|
||||
(('iand', 0xffff, ('ushr', a, 16)), ('ushr', a, 16)),
|
||||
# Exponential/logarithmic identities
|
||||
(('fexp2', ('flog2', a)), a), # 2^lg2(a) = a
|
||||
(('flog2', ('fexp2', a)), a), # lg2(2^a) = a
|
||||
(('~fexp2', ('flog2', a)), a), # 2^lg2(a) = a
|
||||
(('~flog2', ('fexp2', a)), a), # lg2(2^a) = a
|
||||
(('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b)
|
||||
(('fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
|
||||
(('fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))),
|
||||
('fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d
|
||||
(('fpow', a, 1.0), a),
|
||||
(('fpow', a, 2.0), ('fmul', a, a)),
|
||||
(('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
|
||||
(('fpow', 2.0, a), ('fexp2', a)),
|
||||
(('fpow', ('fpow', a, 2.2), 0.454545), a),
|
||||
(('fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)),
|
||||
(('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
|
||||
(('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
|
||||
(('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
|
||||
(('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
|
||||
(('flog2', ('frcp', a)), ('fneg', ('flog2', a))),
|
||||
(('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
|
||||
(('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
|
||||
(('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
|
||||
(('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
|
||||
(('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
|
||||
(('~fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
|
||||
(('~fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))),
|
||||
('~fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d
|
||||
(('~fpow', a, 1.0), a),
|
||||
(('~fpow', a, 2.0), ('fmul', a, a)),
|
||||
(('~fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
|
||||
(('~fpow', 2.0, a), ('fexp2', a)),
|
||||
(('~fpow', ('fpow', a, 2.2), 0.454545), a),
|
||||
(('~fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)),
|
||||
(('~fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
|
||||
(('~frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
|
||||
(('~frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
|
||||
(('~flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
|
||||
(('~flog2', ('frcp', a)), ('fneg', ('flog2', a))),
|
||||
(('~flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
|
||||
(('~flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
|
||||
(('~fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
|
||||
(('~fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
|
||||
(('~fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
|
||||
# Division and reciprocal
|
||||
(('fdiv', 1.0, a), ('frcp', a)),
|
||||
(('~fdiv', 1.0, a), ('frcp', a)),
|
||||
(('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
|
||||
(('frcp', ('frcp', a)), a),
|
||||
(('frcp', ('fsqrt', a)), ('frsq', a)),
|
||||
(('~frcp', ('frcp', a)), a),
|
||||
(('~frcp', ('fsqrt', a)), ('frsq', a)),
|
||||
(('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
|
||||
(('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
|
||||
(('~frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
|
||||
# Boolean simplifications
|
||||
(('ieq', 'a@bool', True), a),
|
||||
(('ine', 'a@bool', True), ('inot', a)),
|
||||
@@ -216,6 +242,10 @@ optimizations = [
|
||||
(('i2b', ('b2i', a)), a),
|
||||
(('f2i', ('ftrunc', a)), ('f2i', a)),
|
||||
(('f2u', ('ftrunc', a)), ('f2u', a)),
|
||||
(('i2b', ('ineg', a)), ('i2b', a)),
|
||||
(('i2b', ('iabs', a)), ('i2b', a)),
|
||||
(('fabs', ('b2f', a)), ('b2f', a)),
|
||||
(('iabs', ('b2i', a)), ('b2i', a)),
|
||||
|
||||
# Byte extraction
|
||||
(('ushr', a, 24), ('extract_u8', a, 3), '!options->lower_extract_byte'),
|
||||
@@ -228,7 +258,7 @@ optimizations = [
|
||||
(('iand', 0xffff, a), ('extract_u16', a, 0), '!options->lower_extract_word'),
|
||||
|
||||
# Subtracts
|
||||
(('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
|
||||
(('~fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
|
||||
(('isub', a, ('isub', 0, b)), ('iadd', a, b)),
|
||||
(('ussub_4x8', a, 0), a),
|
||||
(('ussub_4x8', a, ~0), 0),
|
||||
@@ -236,7 +266,7 @@ optimizations = [
|
||||
(('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'),
|
||||
(('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
|
||||
(('ineg', a), ('isub', 0, a), 'options->lower_negate'),
|
||||
(('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
|
||||
(('~fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
|
||||
(('iadd', a, ('isub', 0, b)), ('isub', a, b)),
|
||||
(('fabs', ('fsub', 0.0, a)), ('fabs', a)),
|
||||
(('iabs', ('isub', 0, a)), ('iabs', a)),
|
||||
@@ -368,10 +398,13 @@ for op in ['flt', 'fge', 'feq', 'fne',
|
||||
# they help code generation but do not necessarily produce code that is
|
||||
# more easily optimizable.
|
||||
late_optimizations = [
|
||||
# Most of these optimizations aren't quite safe when you get infinity or
|
||||
# Nan involved but the first one should be fine.
|
||||
(('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
|
||||
(('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
|
||||
(('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
|
||||
(('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
|
||||
(('~fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
|
||||
(('~feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
|
||||
(('~fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
|
||||
|
||||
(('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
|
||||
(('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
|
||||
(('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
|
||||
|
||||
@@ -46,10 +46,28 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
|
||||
if (!instr->dest.dest.is_ssa)
|
||||
return false;
|
||||
|
||||
/* In the case that any outputs/inputs have unsized types, then we need to
|
||||
* guess the bit-size. In this case, the validator ensures that all
|
||||
* bit-sizes match so we can just take the bit-size from first
|
||||
* output/input with an unsized type. If all the outputs/inputs are sized
|
||||
* then we don't need to guess the bit-size at all because the code we
|
||||
* generate for constant opcodes in this case already knows the sizes of
|
||||
* the types involved and does not need the provided bit-size for anything
|
||||
* (although it still requires to receive a valid bit-size).
|
||||
*/
|
||||
unsigned bit_size = 0;
|
||||
if (!nir_alu_type_get_type_size(nir_op_infos[instr->op].output_type))
|
||||
bit_size = instr->dest.dest.ssa.bit_size;
|
||||
|
||||
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
|
||||
if (!instr->src[i].src.is_ssa)
|
||||
return false;
|
||||
|
||||
if (bit_size == 0 &&
|
||||
!nir_alu_type_get_type_size(nir_op_infos[instr->op].input_sizes[i])) {
|
||||
bit_size = instr->src[i].src.ssa->bit_size;
|
||||
}
|
||||
|
||||
nir_instr *src_instr = instr->src[i].src.ssa->parent_instr;
|
||||
|
||||
if (src_instr->type != nir_instr_type_load_const)
|
||||
@@ -58,24 +76,31 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
|
||||
|
||||
for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(instr, i);
|
||||
j++) {
|
||||
src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]];
|
||||
if (load_const->def.bit_size == 64)
|
||||
src[i].u64[j] = load_const->value.u64[instr->src[i].swizzle[j]];
|
||||
else
|
||||
src[i].u32[j] = load_const->value.u32[instr->src[i].swizzle[j]];
|
||||
}
|
||||
|
||||
/* We shouldn't have any source modifiers in the optimization loop. */
|
||||
assert(!instr->src[i].abs && !instr->src[i].negate);
|
||||
}
|
||||
|
||||
if (bit_size == 0)
|
||||
bit_size = 32;
|
||||
|
||||
/* We shouldn't have any saturate modifiers in the optimization loop. */
|
||||
assert(!instr->dest.saturate);
|
||||
|
||||
nir_const_value dest =
|
||||
nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components,
|
||||
src);
|
||||
bit_size, src);
|
||||
|
||||
nir_load_const_instr *new_instr =
|
||||
nir_load_const_instr_create(mem_ctx,
|
||||
instr->dest.dest.ssa.num_components);
|
||||
|
||||
new_instr->def.bit_size = instr->dest.dest.ssa.bit_size;
|
||||
new_instr->value = dest;
|
||||
|
||||
nir_instr_insert_before(&instr->instr, &new_instr->instr);
|
||||
@@ -106,7 +131,7 @@ constant_fold_deref(nir_instr *instr, nir_deref_var *deref)
|
||||
nir_load_const_instr *indirect =
|
||||
nir_instr_as_load_const(arr->indirect.ssa->parent_instr);
|
||||
|
||||
arr->base_offset += indirect->value.u[0];
|
||||
arr->base_offset += indirect->value.u32[0];
|
||||
|
||||
/* Clear out the source */
|
||||
nir_instr_rewrite_src(instr, &arr->indirect, nir_src_for_ssa(NULL));
|
||||
|
||||
@@ -228,7 +228,7 @@ dead_cf_block(nir_block *block)
|
||||
if (!const_value)
|
||||
return false;
|
||||
|
||||
opt_constant_if(following_if, const_value->u[0] != 0);
|
||||
opt_constant_if(following_if, const_value->u32[0] != 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -210,7 +210,8 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state)
|
||||
}
|
||||
|
||||
nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
|
||||
phi->dest.ssa.num_components, phi->dest.ssa.name);
|
||||
phi->dest.ssa.num_components,
|
||||
phi->dest.ssa.bit_size, phi->dest.ssa.name);
|
||||
sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
|
||||
|
||||
nir_ssa_def_rewrite_uses(&phi->dest.ssa,
|
||||
|
||||
@@ -52,6 +52,7 @@ struct nir_phi_builder_value {
|
||||
|
||||
/* Needed so we can create phis and undefs */
|
||||
unsigned num_components;
|
||||
unsigned bit_size;
|
||||
|
||||
/* The list of phi nodes associated with this value. Phi nodes are not
|
||||
* added directly. Instead, they are created, the instr->block pointer
|
||||
@@ -61,8 +62,18 @@ struct nir_phi_builder_value {
|
||||
*/
|
||||
struct exec_list phis;
|
||||
|
||||
/* Array of SSA defs, indexed by block. If a phi needs to be inserted
|
||||
* in a given block, it will have the magic value NEEDS_PHI.
|
||||
/* Array of SSA defs, indexed by block. For each block, this array has has
|
||||
* one of three types of values:
|
||||
*
|
||||
* - NULL. Indicates that there is no known definition in this block. If
|
||||
* you need to find one, look at the block's immediate dominator.
|
||||
*
|
||||
* - NEEDS_PHI. Indicates that the block may need a phi node but none has
|
||||
* been created yet. If a def is requested for a block, a phi will need
|
||||
* to be created.
|
||||
*
|
||||
* - A regular SSA def. This will be either the result of a phi node or
|
||||
* one of the defs provided by nir_phi_builder_value_set_blocK_def().
|
||||
*/
|
||||
nir_ssa_def *defs[0];
|
||||
};
|
||||
@@ -101,7 +112,7 @@ nir_phi_builder_create(nir_function_impl *impl)
|
||||
|
||||
struct nir_phi_builder_value *
|
||||
nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
|
||||
const BITSET_WORD *defs)
|
||||
unsigned bit_size, const BITSET_WORD *defs)
|
||||
{
|
||||
struct nir_phi_builder_value *val;
|
||||
unsigned i, w_start = 0, w_end = 0;
|
||||
@@ -109,6 +120,7 @@ nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
|
||||
val = rzalloc_size(pb, sizeof(*val) + sizeof(val->defs[0]) * pb->num_blocks);
|
||||
val->builder = pb;
|
||||
val->num_components = num_components;
|
||||
val->bit_size = bit_size;
|
||||
exec_list_make_empty(&val->phis);
|
||||
exec_list_push_tail(&pb->values, &val->node);
|
||||
|
||||
@@ -127,8 +139,7 @@ nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
|
||||
set_foreach(cur->dom_frontier, dom_entry) {
|
||||
nir_block *next = (nir_block *) dom_entry->key;
|
||||
|
||||
/*
|
||||
* If there's more than one return statement, then the end block
|
||||
/* If there's more than one return statement, then the end block
|
||||
* can be a join point for some definitions. However, there are
|
||||
* no instructions in the end block, so nothing would use those
|
||||
* phi nodes. Of course, we couldn't place those phi nodes
|
||||
@@ -139,6 +150,10 @@ nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
|
||||
continue;
|
||||
|
||||
if (val->defs[next->index] == NULL) {
|
||||
/* Instead of creating a phi node immediately, we simply set the
|
||||
* value to the magic value NEEDS_PHI. Later, we create phi nodes
|
||||
* on demand in nir_phi_builder_value_get_block_def().
|
||||
*/
|
||||
val->defs[next->index] = NEEDS_PHI;
|
||||
|
||||
if (pb->work[next->index] < pb->iter_count) {
|
||||
@@ -163,7 +178,9 @@ nir_ssa_def *
|
||||
nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val,
|
||||
nir_block *block)
|
||||
{
|
||||
/* For each block, we have one of three types of values */
|
||||
if (val->defs[block->index] == NULL) {
|
||||
/* NULL indicates that we have no SSA def for this block. */
|
||||
if (block->imm_dom) {
|
||||
/* Grab it from our immediate dominator. We'll stash it here for
|
||||
* easy access later.
|
||||
@@ -185,17 +202,36 @@ nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val,
|
||||
return &undef->def;
|
||||
}
|
||||
} else if (val->defs[block->index] == NEEDS_PHI) {
|
||||
/* If we need a phi instruction, go ahead and create one but don't
|
||||
* add it to the program yet. Later, we'll go through and set up phi
|
||||
* sources and add the instructions will be added at that time.
|
||||
/* The magic value NEEDS_PHI indicates that the block needs a phi node
|
||||
* but none has been created. We need to create one now so we can
|
||||
* return it to the caller.
|
||||
*
|
||||
* Because a phi node may use SSA defs that it does not dominate (this
|
||||
* happens in loops), we do not yet have enough information to fully
|
||||
* fill out the phi node. Instead, the phi nodes we create here will be
|
||||
* empty (have no sources) and won't actually be placed in the block's
|
||||
* instruction list yet. Later, in nir_phi_builder_finish(), we walk
|
||||
* over all of the phi instructions, fill out the sources lists, and
|
||||
* place them at the top of their respective block's instruction list.
|
||||
*
|
||||
* Creating phi nodes on-demand allows us to avoid creating dead phi
|
||||
* nodes that will just get deleted later. While this probably isn't a
|
||||
* big win for a full into-SSA pass, other users may use the phi builder
|
||||
* to make small SSA form repairs where most of the phi nodes will never
|
||||
* be used.
|
||||
*/
|
||||
nir_phi_instr *phi = nir_phi_instr_create(val->builder->shader);
|
||||
nir_ssa_dest_init(&phi->instr, &phi->dest, val->num_components, NULL);
|
||||
nir_ssa_dest_init(&phi->instr, &phi->dest, val->num_components,
|
||||
val->bit_size, NULL);
|
||||
phi->instr.block = block;
|
||||
exec_list_push_tail(&val->phis, &phi->instr.node);
|
||||
val->defs[block->index] = &phi->dest.ssa;
|
||||
return &phi->dest.ssa;
|
||||
} else {
|
||||
/* In this case, we have an actual SSA def. It's either the result of a
|
||||
* phi node created by the case above or one passed to us through
|
||||
* nir_phi_builder_value_set_block_def().
|
||||
*/
|
||||
return val->defs[block->index];
|
||||
}
|
||||
}
|
||||
@@ -216,9 +252,14 @@ nir_phi_builder_finish(struct nir_phi_builder *pb)
|
||||
NIR_VLA(nir_block *, preds, num_blocks);
|
||||
|
||||
foreach_list_typed(struct nir_phi_builder_value, val, node, &pb->values) {
|
||||
/* We can't iterate over the list of phis normally because we are
|
||||
* removing them as we go and, in some cases, adding new phis as we
|
||||
* build the source lists of others.
|
||||
/* We treat the linked list of phi nodes like a worklist. The list is
|
||||
* pre-populated by calls to nir_phi_builder_value_get_block_def() that
|
||||
* create phi nodes. As we fill in the sources of phi nodes, more may
|
||||
* be created and are added to the end of the list.
|
||||
*
|
||||
* Because we are adding and removing phi nodes from the list as we go,
|
||||
* we can't iterate over it normally. Instead, we just iterate until
|
||||
* the list is empty.
|
||||
*/
|
||||
while (!exec_list_is_empty(&val->phis)) {
|
||||
struct exec_node *head = exec_list_get_head(&val->phis);
|
||||
|
||||
@@ -25,7 +25,38 @@
|
||||
|
||||
#include "nir.h"
|
||||
|
||||
/** A helper for placing phi nodes in a NIR shader
|
||||
*
|
||||
* Basic usage goes something like this:
|
||||
*
|
||||
* each variable, var, has:
|
||||
* a bitset var.defs of blocks where the variable is defined
|
||||
* a struct nir_phi_builder_value *pb_val
|
||||
*
|
||||
* // initialize bitsets
|
||||
* foreach block:
|
||||
* foreach def of variable var:
|
||||
* var.defs[def.block] = true;
|
||||
*
|
||||
* // initialize phi builder
|
||||
* pb = nir_phi_builder_create()
|
||||
* foreach var:
|
||||
* var.pb_val = nir_phi_builder_add_value(pb, var.defs)
|
||||
*
|
||||
* // Visit each block. This needs to visit dominators first;
|
||||
* // nir_for_each_block() will be ok.
|
||||
* foreach block:
|
||||
* foreach instruction:
|
||||
* foreach use of variable var:
|
||||
* replace use with nir_phi_builder_get_block_def(var.pb_val)
|
||||
* foreach def of variable var:
|
||||
* create ssa def, register with
|
||||
* nir_phi_builder_set_block_def(var.pb_val)
|
||||
*
|
||||
* nir_phi_builder_finish(pb)
|
||||
*/
|
||||
struct nir_phi_builder;
|
||||
|
||||
struct nir_phi_builder_value;
|
||||
|
||||
/* Create a new phi builder.
|
||||
@@ -43,7 +74,7 @@ struct nir_phi_builder *nir_phi_builder_create(nir_function_impl *impl);
|
||||
*/
|
||||
struct nir_phi_builder_value *
|
||||
nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
|
||||
const BITSET_WORD *defs);
|
||||
unsigned bit_size, const BITSET_WORD *defs);
|
||||
|
||||
/* Register a definition for the given value and block.
|
||||
*
|
||||
|
||||
@@ -207,6 +207,8 @@ print_alu_instr(nir_alu_instr *instr, print_state *state)
|
||||
print_alu_dest(&instr->dest, state);
|
||||
|
||||
fprintf(fp, " = %s", nir_op_infos[instr->op].name);
|
||||
if (instr->exact)
|
||||
fprintf(fp, "!");
|
||||
if (instr->dest.saturate)
|
||||
fprintf(fp, ".sat");
|
||||
fprintf(fp, " ");
|
||||
@@ -714,7 +716,7 @@ print_load_const_instr(nir_load_const_instr *instr, print_state *state)
|
||||
* and then print the float in a comment for readability.
|
||||
*/
|
||||
|
||||
fprintf(fp, "0x%08x /* %f */", instr->value.u[i], instr->value.f[i]);
|
||||
fprintf(fp, "0x%08x /* %f */", instr->value.u32[i], instr->value.f32[i]);
|
||||
}
|
||||
|
||||
fprintf(fp, ")");
|
||||
|
||||
@@ -85,7 +85,8 @@ repair_ssa_def(nir_ssa_def *def, void *void_state)
|
||||
BITSET_SET(state->def_set, def->parent_instr->block->index);
|
||||
|
||||
struct nir_phi_builder_value *val =
|
||||
nir_phi_builder_add_value(pb, def->num_components, state->def_set);
|
||||
nir_phi_builder_add_value(pb, def->num_components, def->bit_size,
|
||||
state->def_set);
|
||||
|
||||
nir_phi_builder_value_set_block_def(val, def->parent_instr->block, def);
|
||||
|
||||
|
||||
+232
-24
@@ -62,7 +62,8 @@ alu_instr_is_bool(nir_alu_instr *instr)
|
||||
case nir_op_inot:
|
||||
return src_is_bool(instr->src[0].src);
|
||||
default:
|
||||
return nir_op_infos[instr->op].output_type == nir_type_bool;
|
||||
return (nir_alu_type_get_base_type(nir_op_infos[instr->op].output_type)
|
||||
== nir_type_bool);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -125,8 +126,10 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
|
||||
nir_alu_instr *src_alu =
|
||||
nir_instr_as_alu(instr->src[src].src.ssa->parent_instr);
|
||||
|
||||
if (nir_op_infos[src_alu->op].output_type != var->type &&
|
||||
!(var->type == nir_type_bool && alu_instr_is_bool(src_alu)))
|
||||
if (nir_alu_type_get_base_type(nir_op_infos[src_alu->op].output_type) !=
|
||||
var->type &&
|
||||
!(nir_alu_type_get_base_type(var->type) == nir_type_bool &&
|
||||
alu_instr_is_bool(src_alu)))
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -158,21 +161,65 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
|
||||
nir_load_const_instr *load =
|
||||
nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr);
|
||||
|
||||
switch (nir_op_infos[instr->op].input_types[src]) {
|
||||
switch (const_val->type) {
|
||||
case nir_type_float:
|
||||
for (unsigned i = 0; i < num_components; ++i) {
|
||||
if (load->value.f[new_swizzle[i]] != const_val->data.f)
|
||||
double val;
|
||||
switch (load->def.bit_size) {
|
||||
case 32:
|
||||
val = load->value.f32[new_swizzle[i]];
|
||||
break;
|
||||
case 64:
|
||||
val = load->value.f64[new_swizzle[i]];
|
||||
break;
|
||||
default:
|
||||
unreachable("unknown bit size");
|
||||
}
|
||||
|
||||
if (val != const_val->data.d)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
case nir_type_int:
|
||||
case nir_type_uint:
|
||||
case nir_type_bool:
|
||||
for (unsigned i = 0; i < num_components; ++i) {
|
||||
if (load->value.i[new_swizzle[i]] != const_val->data.i)
|
||||
int64_t val;
|
||||
switch (load->def.bit_size) {
|
||||
case 32:
|
||||
val = load->value.i32[new_swizzle[i]];
|
||||
break;
|
||||
case 64:
|
||||
val = load->value.i64[new_swizzle[i]];
|
||||
break;
|
||||
default:
|
||||
unreachable("unknown bit size");
|
||||
}
|
||||
|
||||
if (val != const_val->data.i)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
case nir_type_uint:
|
||||
case nir_type_bool32:
|
||||
for (unsigned i = 0; i < num_components; ++i) {
|
||||
uint64_t val;
|
||||
switch (load->def.bit_size) {
|
||||
case 32:
|
||||
val = load->value.u32[new_swizzle[i]];
|
||||
break;
|
||||
case 64:
|
||||
val = load->value.u64[new_swizzle[i]];
|
||||
break;
|
||||
default:
|
||||
unreachable("unknown bit size");
|
||||
}
|
||||
|
||||
if (val != const_val->data.u)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
default:
|
||||
unreachable("Invalid alu source type");
|
||||
}
|
||||
@@ -191,6 +238,10 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
|
||||
if (instr->op != expr->opcode)
|
||||
return false;
|
||||
|
||||
assert(instr->dest.dest.is_ssa);
|
||||
if (expr->inexact && instr->exact)
|
||||
return false;
|
||||
|
||||
assert(!instr->dest.saturate);
|
||||
assert(nir_op_infos[instr->op].num_inputs > 0);
|
||||
|
||||
@@ -244,9 +295,123 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct bitsize_tree {
|
||||
unsigned num_srcs;
|
||||
struct bitsize_tree *srcs[4];
|
||||
|
||||
unsigned common_size;
|
||||
bool is_src_sized[4];
|
||||
bool is_dest_sized;
|
||||
|
||||
unsigned dest_size;
|
||||
unsigned src_size[4];
|
||||
} bitsize_tree;
|
||||
|
||||
static bitsize_tree *
|
||||
build_bitsize_tree(void *mem_ctx, struct match_state *state,
|
||||
const nir_search_value *value)
|
||||
{
|
||||
bitsize_tree *tree = ralloc(mem_ctx, bitsize_tree);
|
||||
|
||||
switch (value->type) {
|
||||
case nir_search_value_expression: {
|
||||
nir_search_expression *expr = nir_search_value_as_expression(value);
|
||||
nir_op_info info = nir_op_infos[expr->opcode];
|
||||
tree->num_srcs = info.num_inputs;
|
||||
tree->common_size = 0;
|
||||
for (unsigned i = 0; i < info.num_inputs; i++) {
|
||||
tree->is_src_sized[i] = !!nir_alu_type_get_type_size(info.input_types[i]);
|
||||
if (tree->is_src_sized[i])
|
||||
tree->src_size[i] = nir_alu_type_get_type_size(info.input_types[i]);
|
||||
tree->srcs[i] = build_bitsize_tree(mem_ctx, state, expr->srcs[i]);
|
||||
}
|
||||
tree->is_dest_sized = !!nir_alu_type_get_type_size(info.output_type);
|
||||
if (tree->is_dest_sized)
|
||||
tree->dest_size = nir_alu_type_get_type_size(info.output_type);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_search_value_variable: {
|
||||
nir_search_variable *var = nir_search_value_as_variable(value);
|
||||
tree->num_srcs = 0;
|
||||
tree->is_dest_sized = true;
|
||||
tree->dest_size = nir_src_bit_size(state->variables[var->variable].src);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_search_value_constant: {
|
||||
tree->num_srcs = 0;
|
||||
tree->is_dest_sized = false;
|
||||
tree->common_size = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return tree;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
bitsize_tree_filter_up(bitsize_tree *tree)
|
||||
{
|
||||
for (unsigned i = 0; i < tree->num_srcs; i++) {
|
||||
unsigned src_size = bitsize_tree_filter_up(tree->srcs[i]);
|
||||
if (src_size == 0)
|
||||
continue;
|
||||
|
||||
if (tree->is_src_sized[i]) {
|
||||
assert(src_size == tree->src_size[i]);
|
||||
} else if (tree->common_size != 0) {
|
||||
assert(src_size == tree->common_size);
|
||||
tree->src_size[i] = src_size;
|
||||
} else {
|
||||
tree->common_size = src_size;
|
||||
tree->src_size[i] = src_size;
|
||||
}
|
||||
}
|
||||
|
||||
if (tree->num_srcs && tree->common_size) {
|
||||
if (tree->dest_size == 0)
|
||||
tree->dest_size = tree->common_size;
|
||||
else if (!tree->is_dest_sized)
|
||||
assert(tree->dest_size == tree->common_size);
|
||||
|
||||
for (unsigned i = 0; i < tree->num_srcs; i++) {
|
||||
if (!tree->src_size[i])
|
||||
tree->src_size[i] = tree->common_size;
|
||||
}
|
||||
}
|
||||
|
||||
return tree->dest_size;
|
||||
}
|
||||
|
||||
static void
|
||||
bitsize_tree_filter_down(bitsize_tree *tree, unsigned size)
|
||||
{
|
||||
if (tree->dest_size)
|
||||
assert(tree->dest_size == size);
|
||||
else
|
||||
tree->dest_size = size;
|
||||
|
||||
if (!tree->is_dest_sized) {
|
||||
if (tree->common_size)
|
||||
assert(tree->common_size == size);
|
||||
else
|
||||
tree->common_size = size;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < tree->num_srcs; i++) {
|
||||
if (!tree->src_size[i]) {
|
||||
assert(tree->common_size);
|
||||
tree->src_size[i] = tree->common_size;
|
||||
}
|
||||
bitsize_tree_filter_down(tree->srcs[i], tree->src_size[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static nir_alu_src
|
||||
construct_value(const nir_search_value *value, nir_alu_type type,
|
||||
unsigned num_components, struct match_state *state,
|
||||
construct_value(const nir_search_value *value,
|
||||
unsigned num_components, bitsize_tree *bitsize, bool exact,
|
||||
struct match_state *state,
|
||||
nir_instr *instr, void *mem_ctx)
|
||||
{
|
||||
switch (value->type) {
|
||||
@@ -257,7 +422,9 @@ construct_value(const nir_search_value *value, nir_alu_type type,
|
||||
num_components = nir_op_infos[expr->opcode].output_size;
|
||||
|
||||
nir_alu_instr *alu = nir_alu_instr_create(mem_ctx, expr->opcode);
|
||||
nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components, NULL);
|
||||
nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components,
|
||||
bitsize->dest_size, NULL);
|
||||
alu->exact = exact;
|
||||
alu->dest.write_mask = (1 << num_components) - 1;
|
||||
alu->dest.saturate = false;
|
||||
|
||||
@@ -269,8 +436,7 @@ construct_value(const nir_search_value *value, nir_alu_type type,
|
||||
num_components = nir_op_infos[alu->op].input_sizes[i];
|
||||
|
||||
alu->src[i] = construct_value(expr->srcs[i],
|
||||
nir_op_infos[alu->op].input_types[i],
|
||||
num_components,
|
||||
num_components, bitsize->srcs[i], exact,
|
||||
state, instr, mem_ctx);
|
||||
}
|
||||
|
||||
@@ -301,23 +467,57 @@ construct_value(const nir_search_value *value, nir_alu_type type,
|
||||
const nir_search_constant *c = nir_search_value_as_constant(value);
|
||||
nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1);
|
||||
|
||||
switch (type) {
|
||||
switch (c->type) {
|
||||
case nir_type_float:
|
||||
load->def.name = ralloc_asprintf(mem_ctx, "%f", c->data.f);
|
||||
load->value.f[0] = c->data.f;
|
||||
load->def.name = ralloc_asprintf(load, "%f", c->data.d);
|
||||
switch (bitsize->dest_size) {
|
||||
case 32:
|
||||
load->value.f32[0] = c->data.d;
|
||||
break;
|
||||
case 64:
|
||||
load->value.f64[0] = c->data.d;
|
||||
break;
|
||||
default:
|
||||
unreachable("unknown bit size");
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_type_int:
|
||||
load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i);
|
||||
load->value.i[0] = c->data.i;
|
||||
load->def.name = ralloc_asprintf(load, "%ld", c->data.i);
|
||||
switch (bitsize->dest_size) {
|
||||
case 32:
|
||||
load->value.i32[0] = c->data.i;
|
||||
break;
|
||||
case 64:
|
||||
load->value.i64[0] = c->data.i;
|
||||
break;
|
||||
default:
|
||||
unreachable("unknown bit size");
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_type_uint:
|
||||
case nir_type_bool:
|
||||
load->value.u[0] = c->data.u;
|
||||
load->def.name = ralloc_asprintf(load, "%lu", c->data.u);
|
||||
switch (bitsize->dest_size) {
|
||||
case 32:
|
||||
load->value.u32[0] = c->data.u;
|
||||
break;
|
||||
case 64:
|
||||
load->value.u64[0] = c->data.u;
|
||||
break;
|
||||
default:
|
||||
unreachable("unknown bit size");
|
||||
}
|
||||
|
||||
case nir_type_bool32:
|
||||
load->value.u32[0] = c->data.u;
|
||||
break;
|
||||
default:
|
||||
unreachable("Invalid alu source type");
|
||||
}
|
||||
|
||||
load->def.bit_size = bitsize->dest_size;
|
||||
|
||||
nir_instr_insert_before(instr, &load->instr);
|
||||
|
||||
nir_alu_src val;
|
||||
@@ -352,6 +552,11 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
|
||||
swizzle, &state))
|
||||
return NULL;
|
||||
|
||||
void *bitsize_ctx = ralloc_context(NULL);
|
||||
bitsize_tree *tree = build_bitsize_tree(bitsize_ctx, &state, replace);
|
||||
bitsize_tree_filter_up(tree);
|
||||
bitsize_tree_filter_down(tree, instr->dest.dest.ssa.bit_size);
|
||||
|
||||
/* Inserting a mov may be unnecessary. However, it's much easier to
|
||||
* simply let copy propagation clean this up than to try to go through
|
||||
* and rewrite swizzles ourselves.
|
||||
@@ -359,11 +564,12 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
|
||||
nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
|
||||
mov->dest.write_mask = instr->dest.write_mask;
|
||||
nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
|
||||
instr->dest.dest.ssa.num_components, NULL);
|
||||
instr->dest.dest.ssa.num_components,
|
||||
instr->dest.dest.ssa.bit_size, NULL);
|
||||
|
||||
mov->src[0] = construct_value(replace, nir_op_infos[instr->op].output_type,
|
||||
instr->dest.dest.ssa.num_components, &state,
|
||||
&instr->instr, mem_ctx);
|
||||
mov->src[0] = construct_value(replace,
|
||||
instr->dest.dest.ssa.num_components, tree,
|
||||
instr->exact, &state, &instr->instr, mem_ctx);
|
||||
nir_instr_insert_before(&instr->instr, &mov->instr);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
|
||||
@@ -375,5 +581,7 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
|
||||
*/
|
||||
nir_instr_remove(&instr->instr);
|
||||
|
||||
ralloc_free(bitsize_ctx);
|
||||
|
||||
return mov;
|
||||
}
|
||||
|
||||
@@ -71,16 +71,24 @@ typedef struct {
|
||||
typedef struct {
|
||||
nir_search_value value;
|
||||
|
||||
nir_alu_type type;
|
||||
|
||||
union {
|
||||
uint32_t u;
|
||||
int32_t i;
|
||||
float f;
|
||||
uint64_t u;
|
||||
int64_t i;
|
||||
double d;
|
||||
} data;
|
||||
} nir_search_constant;
|
||||
|
||||
typedef struct {
|
||||
nir_search_value value;
|
||||
|
||||
/* When set on a search expression, the expression will only match an SSA
|
||||
* value that does *not* have the exact bit set. If unset, the exact bit
|
||||
* on the SSA value is ignored.
|
||||
*/
|
||||
bool inexact;
|
||||
|
||||
nir_op opcode;
|
||||
const nir_search_value *srcs[4];
|
||||
} nir_search_expression;
|
||||
|
||||
@@ -219,7 +219,9 @@ rewrite_def_forwards(nir_dest *dest, void *_state)
|
||||
state->states[index].num_defs);
|
||||
|
||||
list_del(&dest->reg.def_link);
|
||||
nir_ssa_dest_init(state->parent_instr, dest, reg->num_components, name);
|
||||
nir_ssa_dest_init(state->parent_instr, dest, reg->num_components,
|
||||
reg->bit_size, name);
|
||||
ralloc_free(name);
|
||||
|
||||
/* push our SSA destination on the stack */
|
||||
state->states[index].index++;
|
||||
@@ -271,7 +273,9 @@ rewrite_alu_instr_forward(nir_alu_instr *instr, rewrite_state *state)
|
||||
|
||||
instr->dest.write_mask = (1 << num_components) - 1;
|
||||
list_del(&instr->dest.dest.reg.def_link);
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, name);
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
|
||||
reg->bit_size, name);
|
||||
ralloc_free(name);
|
||||
|
||||
if (nir_op_infos[instr->op].output_size == 0) {
|
||||
/*
|
||||
|
||||
@@ -179,9 +179,12 @@ validate_alu_src(nir_alu_instr *instr, unsigned index, validate_state *state)
|
||||
nir_alu_src *src = &instr->src[index];
|
||||
|
||||
unsigned num_components;
|
||||
if (src->src.is_ssa)
|
||||
unsigned src_bit_size;
|
||||
if (src->src.is_ssa) {
|
||||
src_bit_size = src->src.ssa->bit_size;
|
||||
num_components = src->src.ssa->num_components;
|
||||
else {
|
||||
} else {
|
||||
src_bit_size = src->src.reg.reg->bit_size;
|
||||
if (src->src.reg.reg->is_packed)
|
||||
num_components = 4; /* can't check anything */
|
||||
else
|
||||
@@ -194,6 +197,24 @@ validate_alu_src(nir_alu_instr *instr, unsigned index, validate_state *state)
|
||||
assert(src->swizzle[i] < num_components);
|
||||
}
|
||||
|
||||
nir_alu_type src_type = nir_op_infos[instr->op].input_types[index];
|
||||
|
||||
/* 8-bit float isn't a thing */
|
||||
if (nir_alu_type_get_base_type(src_type) == nir_type_float)
|
||||
assert(src_bit_size == 16 || src_bit_size == 32 || src_bit_size == 64);
|
||||
|
||||
if (nir_alu_type_get_type_size(src_type)) {
|
||||
/* This source has an explicit bit size */
|
||||
assert(nir_alu_type_get_type_size(src_type) == src_bit_size);
|
||||
} else {
|
||||
if (!nir_alu_type_get_type_size(nir_op_infos[instr->op].output_type)) {
|
||||
unsigned dest_bit_size =
|
||||
instr->dest.dest.is_ssa ? instr->dest.dest.ssa.bit_size
|
||||
: instr->dest.dest.reg.reg->bit_size;
|
||||
assert(dest_bit_size == src_bit_size);
|
||||
}
|
||||
}
|
||||
|
||||
validate_src(&src->src, state);
|
||||
}
|
||||
|
||||
@@ -263,8 +284,10 @@ validate_dest(nir_dest *dest, validate_state *state)
|
||||
}
|
||||
|
||||
static void
|
||||
validate_alu_dest(nir_alu_dest *dest, validate_state *state)
|
||||
validate_alu_dest(nir_alu_instr *instr, validate_state *state)
|
||||
{
|
||||
nir_alu_dest *dest = &instr->dest;
|
||||
|
||||
unsigned dest_size =
|
||||
dest->dest.is_ssa ? dest->dest.ssa.num_components
|
||||
: dest->dest.reg.reg->num_components;
|
||||
@@ -282,6 +305,17 @@ validate_alu_dest(nir_alu_dest *dest, validate_state *state)
|
||||
assert(nir_op_infos[alu->op].output_type == nir_type_float ||
|
||||
!dest->saturate);
|
||||
|
||||
unsigned bit_size = dest->dest.is_ssa ? dest->dest.ssa.bit_size
|
||||
: dest->dest.reg.reg->bit_size;
|
||||
nir_alu_type type = nir_op_infos[instr->op].output_type;
|
||||
|
||||
/* 8-bit float isn't a thing */
|
||||
if (nir_alu_type_get_base_type(type) == nir_type_float)
|
||||
assert(bit_size == 16 || bit_size == 32 || bit_size == 64);
|
||||
|
||||
assert(nir_alu_type_get_type_size(type) == 0 ||
|
||||
nir_alu_type_get_type_size(type) == bit_size);
|
||||
|
||||
validate_dest(&dest->dest, state);
|
||||
}
|
||||
|
||||
@@ -294,7 +328,7 @@ validate_alu_instr(nir_alu_instr *instr, validate_state *state)
|
||||
validate_alu_src(instr, i, state);
|
||||
}
|
||||
|
||||
validate_alu_dest(&instr->dest, state);
|
||||
validate_alu_dest(instr, state);
|
||||
}
|
||||
|
||||
static void
|
||||
|
||||
@@ -92,7 +92,7 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
|
||||
nir_load_const_instr_create(b->shader, num_components);
|
||||
|
||||
for (unsigned i = 0; i < num_components; i++)
|
||||
load->value.u[i] = constant->value.u[i];
|
||||
load->value.u32[i] = constant->value.u[i];
|
||||
|
||||
nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
|
||||
val->def = &load->def;
|
||||
@@ -109,7 +109,7 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
|
||||
nir_load_const_instr_create(b->shader, rows);
|
||||
|
||||
for (unsigned j = 0; j < rows; j++)
|
||||
load->value.u[j] = constant->value.u[rows * i + j];
|
||||
load->value.u32[j] = constant->value.u[rows * i + j];
|
||||
|
||||
nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
|
||||
col_val->def = &load->def;
|
||||
@@ -1035,6 +1035,8 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
|
||||
nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap);
|
||||
|
||||
unsigned num_components = glsl_get_vector_elements(val->const_type);
|
||||
unsigned bit_size =
|
||||
glsl_get_bit_size(glsl_get_base_type(val->const_type));
|
||||
|
||||
nir_const_value src[3];
|
||||
assert(count <= 7);
|
||||
@@ -1043,14 +1045,16 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
|
||||
vtn_value(b, w[4 + i], vtn_value_type_constant)->constant;
|
||||
|
||||
unsigned j = swap ? 1 - i : i;
|
||||
assert(bit_size == 32);
|
||||
for (unsigned k = 0; k < num_components; k++)
|
||||
src[j].u[k] = c->value.u[k];
|
||||
src[j].u32[k] = c->value.u[k];
|
||||
}
|
||||
|
||||
nir_const_value res = nir_eval_const_opcode(op, num_components, src);
|
||||
nir_const_value res = nir_eval_const_opcode(op, num_components,
|
||||
bit_size, src);
|
||||
|
||||
for (unsigned k = 0; k < num_components; k++)
|
||||
val->constant->value.u[k] = res.u[k];
|
||||
val->constant->value.u[k] = res.u32[k];
|
||||
|
||||
return;
|
||||
} /* default */
|
||||
@@ -1414,7 +1418,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
|
||||
}
|
||||
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest,
|
||||
nir_tex_instr_dest_size(instr), NULL);
|
||||
nir_tex_instr_dest_size(instr), 32, NULL);
|
||||
|
||||
assert(glsl_get_vector_elements(ret_type->type) ==
|
||||
nir_tex_instr_dest_size(instr));
|
||||
@@ -1600,7 +1604,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
|
||||
if (opcode != SpvOpImageWrite) {
|
||||
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
|
||||
struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
|
||||
nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, NULL);
|
||||
nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, 32, NULL);
|
||||
|
||||
nir_builder_instr_insert(&b->nb, &intrin->instr);
|
||||
|
||||
@@ -1738,7 +1742,7 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
|
||||
fill_common_atomic_sources(b, opcode, w, &atomic->src[2]);
|
||||
}
|
||||
|
||||
nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, NULL);
|
||||
nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL);
|
||||
|
||||
struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
|
||||
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
|
||||
@@ -1750,7 +1754,7 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
|
||||
}
|
||||
|
||||
static nir_alu_instr *
|
||||
create_vec(nir_shader *shader, unsigned num_components)
|
||||
create_vec(nir_shader *shader, unsigned num_components, unsigned bit_size)
|
||||
{
|
||||
nir_op op;
|
||||
switch (num_components) {
|
||||
@@ -1762,7 +1766,8 @@ create_vec(nir_shader *shader, unsigned num_components)
|
||||
}
|
||||
|
||||
nir_alu_instr *vec = nir_alu_instr_create(shader, op);
|
||||
nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL);
|
||||
nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components,
|
||||
bit_size, NULL);
|
||||
vec->dest.write_mask = (1 << num_components) - 1;
|
||||
|
||||
return vec;
|
||||
@@ -1779,7 +1784,8 @@ vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src)
|
||||
|
||||
for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) {
|
||||
nir_alu_instr *vec = create_vec(b->shader,
|
||||
glsl_get_matrix_columns(src->type));
|
||||
glsl_get_matrix_columns(src->type),
|
||||
glsl_get_bit_size(glsl_get_base_type(src->type)));
|
||||
if (glsl_type_is_vector_or_scalar(src->type)) {
|
||||
vec->src[0].src = nir_src_for_ssa(src->def);
|
||||
vec->src[0].swizzle[0] = i;
|
||||
@@ -1809,7 +1815,8 @@ nir_ssa_def *
|
||||
vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert,
|
||||
unsigned index)
|
||||
{
|
||||
nir_alu_instr *vec = create_vec(b->shader, src->num_components);
|
||||
nir_alu_instr *vec = create_vec(b->shader, src->num_components,
|
||||
src->bit_size);
|
||||
|
||||
for (unsigned i = 0; i < src->num_components; i++) {
|
||||
if (i == index) {
|
||||
@@ -1854,7 +1861,7 @@ vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components,
|
||||
nir_ssa_def *src0, nir_ssa_def *src1,
|
||||
const uint32_t *indices)
|
||||
{
|
||||
nir_alu_instr *vec = create_vec(b->shader, num_components);
|
||||
nir_alu_instr *vec = create_vec(b->shader, num_components, src0->bit_size);
|
||||
|
||||
nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1);
|
||||
nir_builder_instr_insert(&b->nb, &undef->instr);
|
||||
@@ -1884,7 +1891,8 @@ static nir_ssa_def *
|
||||
vtn_vector_construct(struct vtn_builder *b, unsigned num_components,
|
||||
unsigned num_srcs, nir_ssa_def **srcs)
|
||||
{
|
||||
nir_alu_instr *vec = create_vec(b->shader, num_components);
|
||||
nir_alu_instr *vec = create_vec(b->shader, num_components,
|
||||
srcs[0]->bit_size);
|
||||
|
||||
unsigned dest_idx = 0;
|
||||
for (unsigned i = 0; i < num_srcs; i++) {
|
||||
|
||||
@@ -627,7 +627,9 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
|
||||
|
||||
nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest.dest,
|
||||
glsl_get_vector_elements(val->ssa->type), val->name);
|
||||
glsl_get_vector_elements(val->ssa->type),
|
||||
glsl_get_bit_size(glsl_get_base_type(val->ssa->type)),
|
||||
val->name);
|
||||
instr->dest.write_mask = (1 << instr->dest.dest.ssa.num_components) - 1;
|
||||
val->ssa->def = &instr->dest.dest.ssa;
|
||||
|
||||
|
||||
@@ -190,7 +190,9 @@ _vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref,
|
||||
|
||||
if (load) {
|
||||
nir_ssa_dest_init(&intrin->instr, &intrin->dest,
|
||||
intrin->num_components, NULL);
|
||||
intrin->num_components,
|
||||
glsl_get_bit_size(glsl_get_base_type(tail->type)),
|
||||
NULL);
|
||||
inout->def = &intrin->dest.ssa;
|
||||
} else {
|
||||
nir_intrinsic_set_write_mask(intrin, (1 << intrin->num_components) - 1);
|
||||
@@ -322,7 +324,7 @@ get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain,
|
||||
nir_intrinsic_set_desc_set(instr, chain->var->descriptor_set);
|
||||
nir_intrinsic_set_binding(instr, chain->var->binding);
|
||||
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
|
||||
nir_builder_instr_insert(&b->nb, &instr->instr);
|
||||
|
||||
return &instr->dest.ssa;
|
||||
@@ -411,7 +413,8 @@ _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load,
|
||||
|
||||
if (load) {
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest,
|
||||
instr->num_components, NULL);
|
||||
instr->num_components,
|
||||
glsl_get_bit_size(glsl_get_base_type(type)), NULL);
|
||||
(*inout)->def = &instr->dest.ssa;
|
||||
}
|
||||
|
||||
@@ -1385,7 +1388,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
|
||||
nir_intrinsic_instr_create(b->nb.shader,
|
||||
nir_intrinsic_get_buffer_size);
|
||||
instr->src[0] = nir_src_for_ssa(index);
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
|
||||
nir_builder_instr_insert(&b->nb, &instr->instr);
|
||||
nir_ssa_def *buf_size = &instr->dest.ssa;
|
||||
|
||||
|
||||
@@ -80,6 +80,27 @@ enum glsl_base_type glsl_get_sampler_result_type(const struct glsl_type *type);
|
||||
unsigned glsl_get_record_location_offset(const struct glsl_type *type,
|
||||
unsigned length);
|
||||
|
||||
static inline unsigned
|
||||
glsl_get_bit_size(enum glsl_base_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case GLSL_TYPE_INT:
|
||||
case GLSL_TYPE_UINT:
|
||||
case GLSL_TYPE_BOOL:
|
||||
case GLSL_TYPE_FLOAT: /* TODO handle mediump */
|
||||
case GLSL_TYPE_SUBROUTINE:
|
||||
return 32;
|
||||
|
||||
case GLSL_TYPE_DOUBLE:
|
||||
return 64;
|
||||
|
||||
default:
|
||||
unreachable("unknown base type");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool glsl_type_is_void(const struct glsl_type *type);
|
||||
bool glsl_type_is_error(const struct glsl_type *type);
|
||||
bool glsl_type_is_vector(const struct glsl_type *type);
|
||||
|
||||
@@ -44,7 +44,6 @@
|
||||
#include "egllog.h"
|
||||
|
||||
|
||||
#define MIN2(A, B) (((A) < (B)) ? (A) : (B))
|
||||
|
||||
|
||||
/**
|
||||
|
||||
@@ -40,9 +40,16 @@ extern "C" {
|
||||
|
||||
#define _EGL_MAX_EXTENSIONS_LEN 1000
|
||||
|
||||
/* Hardcoded, conservative default for EGL_LARGEST_PBUFFER,
|
||||
* this is used to implement EGL_LARGEST_PBUFFER.
|
||||
*/
|
||||
#define _EGL_MAX_PBUFFER_WIDTH 4096
|
||||
#define _EGL_MAX_PBUFFER_HEIGHT 4096
|
||||
|
||||
#define _EGL_VENDOR_STRING "Mesa Project"
|
||||
|
||||
#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
|
||||
#define MIN2(A, B) (((A) < (B)) ? (A) : (B))
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -307,6 +307,12 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
|
||||
if (err != EGL_SUCCESS)
|
||||
return _eglError(err, func);
|
||||
|
||||
/* if EGL_LARGEST_PBUFFER in use, clamp width and height */
|
||||
if (surf->LargestPbuffer) {
|
||||
surf->Width = MIN2(surf->Width, _EGL_MAX_PBUFFER_WIDTH);
|
||||
surf->Height = MIN2(surf->Height, _EGL_MAX_PBUFFER_HEIGHT);
|
||||
}
|
||||
|
||||
return EGL_TRUE;
|
||||
}
|
||||
|
||||
|
||||
@@ -206,12 +206,6 @@ static unsigned tgsi_gs_run(struct draw_geometry_shader *shader,
|
||||
{
|
||||
struct tgsi_exec_machine *machine = shader->machine;
|
||||
|
||||
tgsi_set_exec_mask(machine,
|
||||
1,
|
||||
input_primitives > 1,
|
||||
input_primitives > 2,
|
||||
input_primitives > 3);
|
||||
|
||||
/* run interpreter */
|
||||
tgsi_exec_machine_run(machine);
|
||||
|
||||
|
||||
@@ -264,11 +264,11 @@ aa_transform_epilog(struct tgsi_transform_context *ctx)
|
||||
if (aactx->colorOutput != -1) {
|
||||
/* insert texture sampling code for antialiasing. */
|
||||
|
||||
/* TEX texTemp, input_coord, sampler */
|
||||
tgsi_transform_tex_2d_inst(ctx,
|
||||
TGSI_FILE_TEMPORARY, aactx->texTemp,
|
||||
TGSI_FILE_INPUT, aactx->maxInput + 1,
|
||||
aactx->freeSampler);
|
||||
/* TEX texTemp, input_coord, sampler, 2D */
|
||||
tgsi_transform_tex_inst(ctx,
|
||||
TGSI_FILE_TEMPORARY, aactx->texTemp,
|
||||
TGSI_FILE_INPUT, aactx->maxInput + 1,
|
||||
TGSI_TEXTURE_2D, aactx->freeSampler);
|
||||
|
||||
/* MOV rgb */
|
||||
tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
|
||||
|
||||
@@ -159,12 +159,6 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
|
||||
input = (const float (*)[4])((const char *)input + input_stride);
|
||||
}
|
||||
|
||||
tgsi_set_exec_mask(machine,
|
||||
1,
|
||||
max_vertices > 1,
|
||||
max_vertices > 2,
|
||||
max_vertices > 3);
|
||||
|
||||
/* run interpreter */
|
||||
tgsi_exec_machine_run( machine );
|
||||
|
||||
|
||||
@@ -1191,6 +1191,7 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso)
|
||||
"FRAG\n"
|
||||
"DCL IN[0], GENERIC[0], LINEAR\n"
|
||||
"DCL SAMP[0]\n"
|
||||
"DCL SVIEW[0], RECT, FLOAT\n"
|
||||
"DCL OUT[0], COLOR[0]\n"
|
||||
"DCL TEMP[0]\n"
|
||||
|
||||
|
||||
@@ -459,7 +459,7 @@ ttn_emit_immediate(struct ttn_compile *c)
|
||||
c->next_imm++;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
load_const->value.u[i] = tgsi_imm->u[i].Uint;
|
||||
load_const->value.u32[i] = tgsi_imm->u[i].Uint;
|
||||
|
||||
nir_builder_instr_insert(b, &load_const->instr);
|
||||
}
|
||||
@@ -515,8 +515,8 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
|
||||
nir_intrinsic_load_var);
|
||||
load->num_components = 4;
|
||||
load->variables[0] = ttn_array_deref(c, load, var, offset, indirect);
|
||||
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
|
||||
nir_ssa_dest_init(&load->instr, &load->dest,
|
||||
4, 32, NULL);
|
||||
nir_builder_instr_insert(b, &load->instr);
|
||||
|
||||
src = nir_src_for_ssa(&load->dest.ssa);
|
||||
@@ -567,7 +567,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
|
||||
load = nir_intrinsic_instr_create(b->shader, op);
|
||||
load->num_components = ncomp;
|
||||
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, ncomp, NULL);
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, ncomp, 32, NULL);
|
||||
nir_builder_instr_insert(b, &load->instr);
|
||||
|
||||
src = nir_src_for_ssa(&load->dest.ssa);
|
||||
@@ -632,7 +632,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
|
||||
}
|
||||
load->src[srcn++] = nir_src_for_ssa(offset);
|
||||
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
|
||||
nir_builder_instr_insert(b, &load->instr);
|
||||
|
||||
src = nir_src_for_ssa(&load->dest.ssa);
|
||||
@@ -1425,7 +1425,7 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
|
||||
|
||||
assert(src_number == num_srcs);
|
||||
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL);
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL);
|
||||
nir_builder_instr_insert(b, &instr->instr);
|
||||
|
||||
/* Resolve the writemask on the texture op. */
|
||||
@@ -1464,10 +1464,10 @@ ttn_txq(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
|
||||
txs->src[0].src = nir_src_for_ssa(ttn_channel(b, src[0], X));
|
||||
txs->src[0].src_type = nir_tex_src_lod;
|
||||
|
||||
nir_ssa_dest_init(&txs->instr, &txs->dest, 3, NULL);
|
||||
nir_ssa_dest_init(&txs->instr, &txs->dest, 3, 32, NULL);
|
||||
nir_builder_instr_insert(b, &txs->instr);
|
||||
|
||||
nir_ssa_dest_init(&qlv->instr, &qlv->dest, 1, NULL);
|
||||
nir_ssa_dest_init(&qlv->instr, &qlv->dest, 1, 32, NULL);
|
||||
nir_builder_instr_insert(b, &qlv->instr);
|
||||
|
||||
ttn_move_dest_masked(b, dest, &txs->dest.ssa, TGSI_WRITEMASK_XYZ);
|
||||
|
||||
@@ -23,8 +23,6 @@
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
|
||||
struct nir_shader_compiler_options *options;
|
||||
|
||||
struct nir_shader *
|
||||
tgsi_to_nir(const void *tgsi_tokens,
|
||||
const struct nir_shader_compiler_options *options);
|
||||
|
||||
@@ -33,6 +33,7 @@ static const char nored[] = "FRAG\n"
|
||||
"DCL IN[0], GENERIC[0], PERSPECTIVE\n"
|
||||
"DCL OUT[0], COLOR\n"
|
||||
"DCL SAMP[0]\n"
|
||||
"DCL SVIEW[0], 2D, FLOAT\n"
|
||||
"DCL TEMP[0]\n"
|
||||
"IMM FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}\n"
|
||||
" 0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n"
|
||||
@@ -46,6 +47,7 @@ static const char nogreen[] = "FRAG\n"
|
||||
"DCL IN[0], GENERIC[0], PERSPECTIVE\n"
|
||||
"DCL OUT[0], COLOR\n"
|
||||
"DCL SAMP[0]\n"
|
||||
"DCL SVIEW[0], 2D, FLOAT\n"
|
||||
"DCL TEMP[0]\n"
|
||||
"IMM FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}\n"
|
||||
" 0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n"
|
||||
@@ -59,6 +61,7 @@ static const char noblue[] = "FRAG\n"
|
||||
"DCL IN[0], GENERIC[0], PERSPECTIVE\n"
|
||||
"DCL OUT[0], COLOR\n"
|
||||
"DCL SAMP[0]\n"
|
||||
"DCL SVIEW[0], 2D, FLOAT\n"
|
||||
"DCL TEMP[0]\n"
|
||||
"IMM FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}\n"
|
||||
" 0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n"
|
||||
|
||||
@@ -50,6 +50,7 @@ static const char depth1fs[] = "FRAG\n"
|
||||
"DCL IN[2], GENERIC[11], PERSPECTIVE\n"
|
||||
"DCL OUT[0], COLOR\n"
|
||||
"DCL SAMP[0]\n"
|
||||
"DCL SVIEW[0], 2D, FLOAT\n"
|
||||
"DCL TEMP[0..2]\n"
|
||||
"IMM FLT32 { 0.0030, 0.0000, 1.0000, 0.0000}\n"
|
||||
" 0: TEX TEMP[0].x, IN[1].xyyy, SAMP[0], 2D\n"
|
||||
@@ -80,6 +81,7 @@ static const char color1fs[] = "FRAG\n"
|
||||
"DCL IN[2], GENERIC[11], PERSPECTIVE\n"
|
||||
"DCL OUT[0], COLOR\n"
|
||||
"DCL SAMP[0]\n"
|
||||
"DCL SVIEW[0], 2D, FLOAT\n"
|
||||
"DCL TEMP[0..2]\n"
|
||||
"IMM FLT32 { 0.2126, 0.7152, 0.0722, 0.1000}\n"
|
||||
"IMM FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}\n"
|
||||
@@ -112,6 +114,7 @@ static const char neigh3fs[] = "FRAG\n"
|
||||
"DCL IN[2], GENERIC[11], PERSPECTIVE\n"
|
||||
"DCL OUT[0], COLOR\n"
|
||||
"DCL SAMP[0]\n"
|
||||
"DCL SVIEW[0], 2D, FLOAT\n"
|
||||
"DCL SAMP[1]\n"
|
||||
"DCL TEMP[0..8]\n"
|
||||
"IMM FLT32 { 1.0000, 0.00001, 0.0000, 0.0000}\n"
|
||||
@@ -175,8 +178,11 @@ static const char blend2fs_1[] = "FRAG\n"
|
||||
"DCL IN[0], GENERIC[0], PERSPECTIVE\n"
|
||||
"DCL OUT[0], COLOR\n"
|
||||
"DCL SAMP[0]\n"
|
||||
"DCL SVIEW[0], 2D, FLOAT\n"
|
||||
"DCL SAMP[1]\n"
|
||||
"DCL SVIEW[1], 2D, FLOAT\n"
|
||||
"DCL SAMP[2]\n"
|
||||
"DCL SVIEW[2], 2D, FLOAT\n"
|
||||
"DCL CONST[0]\n"
|
||||
"DCL TEMP[0..6]\n"
|
||||
"IMM FLT32 { 0.0000, -0.2500, 0.00609756, 0.5000}\n"
|
||||
|
||||
@@ -111,7 +111,7 @@ tgsi_default_declaration( void )
|
||||
declaration.Local = 0;
|
||||
declaration.Array = 0;
|
||||
declaration.Atomic = 0;
|
||||
declaration.Shared = 0;
|
||||
declaration.MemType = TGSI_MEMORY_TYPE_GLOBAL;
|
||||
declaration.Padding = 0;
|
||||
|
||||
return declaration;
|
||||
@@ -127,6 +127,8 @@ tgsi_build_declaration(
|
||||
unsigned invariant,
|
||||
unsigned local,
|
||||
unsigned array,
|
||||
unsigned atomic,
|
||||
unsigned mem_type,
|
||||
struct tgsi_header *header )
|
||||
{
|
||||
struct tgsi_declaration declaration;
|
||||
@@ -143,6 +145,8 @@ tgsi_build_declaration(
|
||||
declaration.Invariant = invariant;
|
||||
declaration.Local = local;
|
||||
declaration.Array = array;
|
||||
declaration.Atomic = atomic;
|
||||
declaration.MemType = mem_type;
|
||||
header_bodysize_grow( header );
|
||||
|
||||
return declaration;
|
||||
@@ -401,6 +405,8 @@ tgsi_build_full_declaration(
|
||||
full_decl->Declaration.Invariant,
|
||||
full_decl->Declaration.Local,
|
||||
full_decl->Declaration.Array,
|
||||
full_decl->Declaration.Atomic,
|
||||
full_decl->Declaration.MemType,
|
||||
header );
|
||||
|
||||
if (maxsize <= size)
|
||||
@@ -775,6 +781,8 @@ tgsi_default_instruction_memory( void )
|
||||
struct tgsi_instruction_memory instruction_memory;
|
||||
|
||||
instruction_memory.Qualifier = 0;
|
||||
instruction_memory.Texture = 0;
|
||||
instruction_memory.Format = 0;
|
||||
instruction_memory.Padding = 0;
|
||||
|
||||
return instruction_memory;
|
||||
@@ -790,6 +798,8 @@ tgsi_build_instruction_memory(
|
||||
struct tgsi_instruction_memory instruction_memory;
|
||||
|
||||
instruction_memory.Qualifier = qualifier;
|
||||
instruction_memory.Texture = 0;
|
||||
instruction_memory.Format = 0;
|
||||
instruction_memory.Padding = 0;
|
||||
instruction->Memory = 1;
|
||||
|
||||
|
||||
@@ -365,8 +365,13 @@ iter_declaration(
|
||||
}
|
||||
|
||||
if (decl->Declaration.File == TGSI_FILE_MEMORY) {
|
||||
if (decl->Declaration.Shared)
|
||||
TXT(", SHARED");
|
||||
switch (decl->Declaration.MemType) {
|
||||
/* Note: ,GLOBAL is optional / the default */
|
||||
case TGSI_MEMORY_TYPE_GLOBAL: TXT(", GLOBAL"); break;
|
||||
case TGSI_MEMORY_TYPE_SHARED: TXT(", SHARED"); break;
|
||||
case TGSI_MEMORY_TYPE_PRIVATE: TXT(", PRIVATE"); break;
|
||||
case TGSI_MEMORY_TYPE_INPUT: TXT(", INPUT"); break;
|
||||
}
|
||||
}
|
||||
|
||||
if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
|
||||
|
||||
@@ -196,10 +196,6 @@ struct tgsi_sampler
|
||||
#define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3)
|
||||
#define TGSI_EXEC_TEMP_HALF_C 0
|
||||
|
||||
/* execution mask, each value is either 0 or ~0 */
|
||||
#define TGSI_EXEC_MASK_I (TGSI_EXEC_NUM_TEMPS + 3)
|
||||
#define TGSI_EXEC_MASK_C 1
|
||||
|
||||
/* 4 register buffer for various purposes */
|
||||
#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4)
|
||||
#define TGSI_EXEC_NUM_TEMP_R 4
|
||||
@@ -397,27 +393,6 @@ boolean
|
||||
tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst);
|
||||
|
||||
|
||||
static inline void
|
||||
tgsi_set_kill_mask(struct tgsi_exec_machine *mach, unsigned mask)
|
||||
{
|
||||
mach->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] =
|
||||
mask;
|
||||
}
|
||||
|
||||
|
||||
/** Set execution mask values prior to executing the shader */
|
||||
static inline void
|
||||
tgsi_set_exec_mask(struct tgsi_exec_machine *mach,
|
||||
boolean ch0, boolean ch1, boolean ch2, boolean ch3)
|
||||
{
|
||||
int *mask = mach->Temps[TGSI_EXEC_MASK_I].xyzw[TGSI_EXEC_MASK_C].i;
|
||||
mask[0] = ch0 ? ~0 : 0;
|
||||
mask[1] = ch1 ? ~0 : 0;
|
||||
mask[2] = ch2 ? ~0 : 0;
|
||||
mask[3] = ch3 ? ~0 : 0;
|
||||
}
|
||||
|
||||
|
||||
extern void
|
||||
tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
|
||||
unsigned num_bufs,
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_prim.h"
|
||||
#include "tgsi/tgsi_info.h"
|
||||
#include "tgsi/tgsi_parse.h"
|
||||
#include "tgsi/tgsi_util.h"
|
||||
#include "tgsi/tgsi_scan.h"
|
||||
@@ -192,8 +193,17 @@ scan_instruction(struct tgsi_shader_info *info,
|
||||
}
|
||||
}
|
||||
|
||||
if (is_memory_file(src->Register.File))
|
||||
if (is_memory_file(src->Register.File)) {
|
||||
is_mem_inst = true;
|
||||
|
||||
if (tgsi_get_opcode_info(fullinst->Instruction.Opcode)->is_store) {
|
||||
info->writes_memory = TRUE;
|
||||
|
||||
if (src->Register.File == TGSI_FILE_IMAGE &&
|
||||
!src->Register.Indirect)
|
||||
info->images_writemask |= 1 << src->Register.Index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* check for indirect register writes */
|
||||
@@ -204,8 +214,16 @@ scan_instruction(struct tgsi_shader_info *info,
|
||||
info->indirect_files_written |= (1 << dst->Register.File);
|
||||
}
|
||||
|
||||
if (is_memory_file(dst->Register.File))
|
||||
if (is_memory_file(dst->Register.File)) {
|
||||
assert(fullinst->Instruction.Opcode == TGSI_OPCODE_STORE);
|
||||
|
||||
is_mem_inst = true;
|
||||
info->writes_memory = TRUE;
|
||||
|
||||
if (dst->Register.File == TGSI_FILE_IMAGE &&
|
||||
!dst->Register.Indirect)
|
||||
info->images_writemask |= 1 << dst->Register.Index;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_mem_inst)
|
||||
@@ -413,6 +431,9 @@ scan_declaration(struct tgsi_shader_info *info,
|
||||
}
|
||||
} else if (file == TGSI_FILE_SAMPLER) {
|
||||
info->samplers_declared |= 1 << reg;
|
||||
} else if (file == TGSI_FILE_IMAGE) {
|
||||
if (fulldecl->Image.Resource == TGSI_TEXTURE_BUFFER)
|
||||
info->images_buffers |= 1 << reg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -111,12 +111,22 @@ struct tgsi_shader_info
|
||||
boolean writes_clipvertex;
|
||||
boolean writes_viewport_index;
|
||||
boolean writes_layer;
|
||||
boolean writes_memory; /**< contains stores or atomics to buffers or images */
|
||||
boolean is_msaa_sampler[PIPE_MAX_SAMPLERS];
|
||||
boolean uses_doubles; /**< uses any of the double instructions */
|
||||
unsigned clipdist_writemask;
|
||||
unsigned culldist_writemask;
|
||||
unsigned num_written_culldistance;
|
||||
unsigned num_written_clipdistance;
|
||||
/**
|
||||
* Bitmask indicating which images are written to (STORE / ATOM*).
|
||||
* Indirect image accesses are not reflected in this mask.
|
||||
*/
|
||||
unsigned images_writemask;
|
||||
/**
|
||||
* Bitmask indicating which declared image is a buffer.
|
||||
*/
|
||||
unsigned images_buffers;
|
||||
/**
|
||||
* Bitmask indicating which register files are accessed with
|
||||
* indirect addressing. The bits are (1 << TGSI_FILE_x), etc.
|
||||
|
||||
@@ -145,6 +145,7 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] =
|
||||
"NUM_CLIPDIST_ENABLED",
|
||||
"NUM_CULLDIST_ENABLED",
|
||||
"FS_EARLY_DEPTH_STENCIL",
|
||||
"NEXT_SHADER",
|
||||
};
|
||||
|
||||
const char *tgsi_return_type_names[TGSI_RETURN_TYPE_COUNT] =
|
||||
|
||||
@@ -1390,8 +1390,18 @@ static boolean parse_declaration( struct translate_ctx *ctx )
|
||||
ctx->cur = cur;
|
||||
}
|
||||
} else if (file == TGSI_FILE_MEMORY) {
|
||||
if (str_match_nocase_whole(&cur, "SHARED")) {
|
||||
decl.Declaration.Shared = 1;
|
||||
if (str_match_nocase_whole(&cur, "GLOBAL")) {
|
||||
/* Note this is a no-op global is the default */
|
||||
decl.Declaration.MemType = TGSI_MEMORY_TYPE_GLOBAL;
|
||||
ctx->cur = cur;
|
||||
} else if (str_match_nocase_whole(&cur, "SHARED")) {
|
||||
decl.Declaration.MemType = TGSI_MEMORY_TYPE_SHARED;
|
||||
ctx->cur = cur;
|
||||
} else if (str_match_nocase_whole(&cur, "PRIVATE")) {
|
||||
decl.Declaration.MemType = TGSI_MEMORY_TYPE_PRIVATE;
|
||||
ctx->cur = cur;
|
||||
} else if (str_match_nocase_whole(&cur, "INPUT")) {
|
||||
decl.Declaration.MemType = TGSI_MEMORY_TYPE_INPUT;
|
||||
ctx->cur = cur;
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -301,6 +301,40 @@ tgsi_transform_op2_inst(struct tgsi_transform_context *ctx,
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
tgsi_transform_op3_inst(struct tgsi_transform_context *ctx,
|
||||
unsigned opcode,
|
||||
unsigned dst_file,
|
||||
unsigned dst_index,
|
||||
unsigned dst_writemask,
|
||||
unsigned src0_file,
|
||||
unsigned src0_index,
|
||||
unsigned src1_file,
|
||||
unsigned src1_index,
|
||||
unsigned src2_file,
|
||||
unsigned src2_index)
|
||||
{
|
||||
struct tgsi_full_instruction inst;
|
||||
|
||||
inst = tgsi_default_full_instruction();
|
||||
inst.Instruction.Opcode = opcode;
|
||||
inst.Instruction.NumDstRegs = 1;
|
||||
inst.Dst[0].Register.File = dst_file,
|
||||
inst.Dst[0].Register.Index = dst_index;
|
||||
inst.Dst[0].Register.WriteMask = dst_writemask;
|
||||
inst.Instruction.NumSrcRegs = 3;
|
||||
inst.Src[0].Register.File = src0_file;
|
||||
inst.Src[0].Register.Index = src0_index;
|
||||
inst.Src[1].Register.File = src1_file;
|
||||
inst.Src[1].Register.Index = src1_index;
|
||||
inst.Src[2].Register.File = src2_file;
|
||||
inst.Src[2].Register.Index = src2_index;
|
||||
|
||||
ctx->emit_instruction(ctx, &inst);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static inline void
|
||||
tgsi_transform_op1_swz_inst(struct tgsi_transform_context *ctx,
|
||||
unsigned opcode,
|
||||
@@ -482,15 +516,18 @@ tgsi_transform_kill_inst(struct tgsi_transform_context *ctx,
|
||||
|
||||
|
||||
static inline void
|
||||
tgsi_transform_tex_2d_inst(struct tgsi_transform_context *ctx,
|
||||
unsigned dst_file,
|
||||
unsigned dst_index,
|
||||
unsigned src_file,
|
||||
unsigned src_index,
|
||||
unsigned sampler_index)
|
||||
tgsi_transform_tex_inst(struct tgsi_transform_context *ctx,
|
||||
unsigned dst_file,
|
||||
unsigned dst_index,
|
||||
unsigned src_file,
|
||||
unsigned src_index,
|
||||
unsigned tex_target,
|
||||
unsigned sampler_index)
|
||||
{
|
||||
struct tgsi_full_instruction inst;
|
||||
|
||||
assert(tex_target < TGSI_TEXTURE_COUNT);
|
||||
|
||||
inst = tgsi_default_full_instruction();
|
||||
inst.Instruction.Opcode = TGSI_OPCODE_TEX;
|
||||
inst.Instruction.NumDstRegs = 1;
|
||||
@@ -498,7 +535,7 @@ tgsi_transform_tex_2d_inst(struct tgsi_transform_context *ctx,
|
||||
inst.Dst[0].Register.Index = dst_index;
|
||||
inst.Instruction.NumSrcRegs = 2;
|
||||
inst.Instruction.Texture = TRUE;
|
||||
inst.Texture.Texture = TGSI_TEXTURE_2D;
|
||||
inst.Texture.Texture = tex_target;
|
||||
inst.Src[0].Register.File = src_file;
|
||||
inst.Src[0].Register.Index = src_index;
|
||||
inst.Src[1].Register.File = TGSI_FILE_SAMPLER;
|
||||
|
||||
@@ -101,6 +101,7 @@ struct ureg_program
|
||||
{
|
||||
unsigned processor;
|
||||
bool supports_any_inout_decl_range;
|
||||
int next_shader_processor;
|
||||
|
||||
struct {
|
||||
unsigned semantic_name;
|
||||
@@ -190,7 +191,7 @@ struct ureg_program
|
||||
|
||||
struct ureg_tokens domain[2];
|
||||
|
||||
bool use_shared_memory;
|
||||
bool use_memory[TGSI_MEMORY_TYPE_COUNT];
|
||||
};
|
||||
|
||||
static union tgsi_any_token error_tokens[32];
|
||||
@@ -729,13 +730,14 @@ struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr,
|
||||
return reg;
|
||||
}
|
||||
|
||||
/* Allocate a shared memory area.
|
||||
/* Allocate a memory area.
|
||||
*/
|
||||
struct ureg_src ureg_DECL_shared_memory(struct ureg_program *ureg)
|
||||
struct ureg_src ureg_DECL_memory(struct ureg_program *ureg,
|
||||
unsigned memory_type)
|
||||
{
|
||||
struct ureg_src reg = ureg_src_register(TGSI_FILE_MEMORY, 0);
|
||||
struct ureg_src reg = ureg_src_register(TGSI_FILE_MEMORY, memory_type);
|
||||
|
||||
ureg->use_shared_memory = true;
|
||||
ureg->use_memory[memory_type] = true;
|
||||
return reg;
|
||||
}
|
||||
|
||||
@@ -1672,7 +1674,7 @@ emit_decl_buffer(struct ureg_program *ureg,
|
||||
}
|
||||
|
||||
static void
|
||||
emit_decl_shared_memory(struct ureg_program *ureg)
|
||||
emit_decl_memory(struct ureg_program *ureg, unsigned memory_type)
|
||||
{
|
||||
union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
|
||||
|
||||
@@ -1681,11 +1683,11 @@ emit_decl_shared_memory(struct ureg_program *ureg)
|
||||
out[0].decl.NrTokens = 2;
|
||||
out[0].decl.File = TGSI_FILE_MEMORY;
|
||||
out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
|
||||
out[0].decl.Shared = true;
|
||||
out[0].decl.MemType = memory_type;
|
||||
|
||||
out[1].value = 0;
|
||||
out[1].decl_range.First = 0;
|
||||
out[1].decl_range.Last = 0;
|
||||
out[1].decl_range.First = memory_type;
|
||||
out[1].decl_range.Last = memory_type;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -1860,8 +1862,10 @@ static void emit_decls( struct ureg_program *ureg )
|
||||
emit_decl_buffer(ureg, ureg->buffer[i].index, ureg->buffer[i].atomic);
|
||||
}
|
||||
|
||||
if (ureg->use_shared_memory)
|
||||
emit_decl_shared_memory(ureg);
|
||||
for (i = 0; i < TGSI_MEMORY_TYPE_COUNT; i++) {
|
||||
if (ureg->use_memory[i])
|
||||
emit_decl_memory(ureg, i);
|
||||
}
|
||||
|
||||
if (ureg->const_decls.nr_constant_ranges) {
|
||||
for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) {
|
||||
@@ -1966,6 +1970,16 @@ const struct tgsi_token *ureg_finalize( struct ureg_program *ureg )
|
||||
{
|
||||
const struct tgsi_token *tokens;
|
||||
|
||||
switch (ureg->processor) {
|
||||
case TGSI_PROCESSOR_VERTEX:
|
||||
case TGSI_PROCESSOR_TESS_EVAL:
|
||||
ureg_property(ureg, TGSI_PROPERTY_NEXT_SHADER,
|
||||
ureg->next_shader_processor == -1 ?
|
||||
TGSI_PROCESSOR_FRAGMENT :
|
||||
ureg->next_shader_processor);
|
||||
break;
|
||||
}
|
||||
|
||||
emit_header( ureg );
|
||||
emit_decls( ureg );
|
||||
copy_instructions( ureg );
|
||||
@@ -2079,6 +2093,7 @@ ureg_create_with_screen(unsigned processor, struct pipe_screen *screen)
|
||||
screen->get_shader_param(screen,
|
||||
util_pipe_shader_from_tgsi_processor(processor),
|
||||
PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE) != 0;
|
||||
ureg->next_shader_processor = -1;
|
||||
|
||||
for (i = 0; i < Elements(ureg->properties); i++)
|
||||
ureg->properties[i] = ~0;
|
||||
@@ -2108,6 +2123,13 @@ no_ureg:
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ureg_set_next_shader_processor(struct ureg_program *ureg, unsigned processor)
|
||||
{
|
||||
ureg->next_shader_processor = processor;
|
||||
}
|
||||
|
||||
|
||||
unsigned
|
||||
ureg_get_nr_outputs( const struct ureg_program *ureg )
|
||||
{
|
||||
|
||||
@@ -114,6 +114,8 @@ ureg_create_shader( struct ureg_program *,
|
||||
struct pipe_context *pipe,
|
||||
const struct pipe_stream_output_info *so );
|
||||
|
||||
void
|
||||
ureg_set_next_shader_processor(struct ureg_program *ureg, unsigned processor);
|
||||
|
||||
/* Alternately, return the built token stream and hand ownership of
|
||||
* that memory to the caller:
|
||||
@@ -338,7 +340,7 @@ struct ureg_src
|
||||
ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr, bool atomic);
|
||||
|
||||
struct ureg_src
|
||||
ureg_DECL_shared_memory(struct ureg_program *ureg);
|
||||
ureg_DECL_memory(struct ureg_program *ureg, unsigned memory_type);
|
||||
|
||||
static inline struct ureg_src
|
||||
ureg_imm4f( struct ureg_program *ureg,
|
||||
|
||||
@@ -344,11 +344,11 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
|
||||
pctx->wincoordFile, wincoordInput,
|
||||
TGSI_FILE_IMMEDIATE, pctx->numImmed);
|
||||
|
||||
/* TEX texTemp, texTemp, sampler; */
|
||||
tgsi_transform_tex_2d_inst(ctx,
|
||||
TGSI_FILE_TEMPORARY, texTemp,
|
||||
TGSI_FILE_TEMPORARY, texTemp,
|
||||
sampIdx);
|
||||
/* TEX texTemp, texTemp, sampler, 2D; */
|
||||
tgsi_transform_tex_inst(ctx,
|
||||
TGSI_FILE_TEMPORARY, texTemp,
|
||||
TGSI_FILE_TEMPORARY, texTemp,
|
||||
TGSI_TEXTURE_2D, sampIdx);
|
||||
|
||||
/* KILL_IF -texTemp; # if -texTemp < 0, kill fragment */
|
||||
tgsi_transform_kill_inst(ctx,
|
||||
|
||||
@@ -646,6 +646,7 @@ util_make_fs_blit_msaa_depthstencil(struct pipe_context *pipe,
|
||||
"FRAG\n"
|
||||
"DCL IN[0], GENERIC[0], LINEAR\n"
|
||||
"DCL SAMP[0..1]\n"
|
||||
"DCL SVIEW[0..1], %s, FLOAT\n"
|
||||
"DCL OUT[0], POSITION\n"
|
||||
"DCL OUT[1], STENCIL\n"
|
||||
"DCL TEMP[0]\n"
|
||||
@@ -663,7 +664,7 @@ util_make_fs_blit_msaa_depthstencil(struct pipe_context *pipe,
|
||||
assert(tgsi_tex == TGSI_TEXTURE_2D_MSAA ||
|
||||
tgsi_tex == TGSI_TEXTURE_2D_ARRAY_MSAA);
|
||||
|
||||
sprintf(text, shader_templ, type, type);
|
||||
sprintf(text, shader_templ, type, type, type);
|
||||
|
||||
if (!tgsi_text_translate(text, tokens, Elements(tokens))) {
|
||||
assert(0);
|
||||
|
||||
@@ -3213,6 +3213,14 @@ Whether depth test, stencil test, and occlusion query should run before
|
||||
the fragment shader (regardless of fragment shader side effects). Corresponds
|
||||
to GLSL early_fragment_tests.
|
||||
|
||||
NEXT_SHADER
|
||||
"""""""""""
|
||||
|
||||
Which shader stage will MOST LIKELY follow after this shader when the shader
|
||||
is bound. This is only a hint to the driver and doesn't have to be precise.
|
||||
Only set for VS and TES.
|
||||
|
||||
|
||||
Texture Sampling and Texture Formats
|
||||
------------------------------------
|
||||
|
||||
|
||||
@@ -1017,7 +1017,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
|
||||
|
||||
const_offset = nir_src_as_const_value(intr->src[1]);
|
||||
if (const_offset) {
|
||||
off += const_offset->u[0];
|
||||
off += const_offset->u32[0];
|
||||
} else {
|
||||
/* For load_ubo_indirect, second src is indirect offset: */
|
||||
src1 = get_src(ctx, &intr->src[1])[0];
|
||||
@@ -1159,7 +1159,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
||||
idx = nir_intrinsic_base(intr);
|
||||
const_offset = nir_src_as_const_value(intr->src[0]);
|
||||
if (const_offset) {
|
||||
idx += const_offset->u[0];
|
||||
idx += const_offset->u32[0];
|
||||
for (int i = 0; i < intr->num_components; i++) {
|
||||
unsigned n = idx * 4 + i;
|
||||
dst[i] = create_uniform(ctx, n);
|
||||
@@ -1186,7 +1186,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
||||
idx = nir_intrinsic_base(intr);
|
||||
const_offset = nir_src_as_const_value(intr->src[0]);
|
||||
if (const_offset) {
|
||||
idx += const_offset->u[0];
|
||||
idx += const_offset->u32[0];
|
||||
for (int i = 0; i < intr->num_components; i++) {
|
||||
unsigned n = idx * 4 + i;
|
||||
dst[i] = ctx->ir->inputs[n];
|
||||
@@ -1213,7 +1213,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
||||
idx = nir_intrinsic_base(intr);
|
||||
const_offset = nir_src_as_const_value(intr->src[1]);
|
||||
compile_assert(ctx, const_offset != NULL);
|
||||
idx += const_offset->u[0];
|
||||
idx += const_offset->u32[0];
|
||||
|
||||
src = get_src(ctx, &intr->src[0]);
|
||||
for (int i = 0; i < intr->num_components; i++) {
|
||||
@@ -1301,7 +1301,7 @@ emit_load_const(struct ir3_compile *ctx, nir_load_const_instr *instr)
|
||||
struct ir3_instruction **dst = get_dst_ssa(ctx, &instr->def,
|
||||
instr->def.num_components);
|
||||
for (int i = 0; i < instr->def.num_components; i++)
|
||||
dst[i] = create_immed(ctx->block, instr->value.u[i]);
|
||||
dst[i] = create_immed(ctx->block, instr->value.u32[i]);
|
||||
}
|
||||
|
||||
static void
|
||||
|
||||
@@ -290,7 +290,7 @@ lower_if_else_block(nir_block *block, void *void_state)
|
||||
}
|
||||
|
||||
nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
|
||||
phi->dest.ssa.num_components, phi->dest.ssa.name);
|
||||
phi->dest.ssa.num_components, 32, phi->dest.ssa.name);
|
||||
sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
|
||||
|
||||
nir_ssa_def_rewrite_uses(&phi->dest.ssa,
|
||||
|
||||
@@ -160,7 +160,7 @@ struct nv50_ir_prog_info
|
||||
uint8_t clipDistances; /* number of clip distance outputs */
|
||||
uint8_t cullDistances; /* number of cull distance outputs */
|
||||
int8_t genUserClip; /* request user clip planes for ClipVertex */
|
||||
uint8_t auxCBSlot; /* constant buffer index of UCP/draw data */
|
||||
uint8_t auxCBSlot; /* driver constant buffer slot */
|
||||
uint16_t ucpBase; /* base address for UCPs */
|
||||
uint16_t drawInfoBase; /* base address for draw parameters */
|
||||
uint8_t pointSize; /* output index for PointSize */
|
||||
@@ -175,7 +175,6 @@ struct nv50_ir_prog_info
|
||||
uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */
|
||||
bool fp64; /* program uses fp64 math */
|
||||
bool nv50styleSurfaces; /* generate gX[] access for raw buffers */
|
||||
uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface info */
|
||||
uint16_t texBindBase; /* base address for tex handles (nve4) */
|
||||
uint16_t suInfoBase; /* base address for surface info (nve4) */
|
||||
uint16_t sampleInfoBase; /* base address for sample positions */
|
||||
|
||||
@@ -1655,10 +1655,8 @@ CodeEmitterGK110::emitSTORE(const Instruction *i)
|
||||
break;
|
||||
}
|
||||
|
||||
if (i->src(0).getFile() != FILE_MEMORY_GLOBAL)
|
||||
offset &= 0xffffff;
|
||||
|
||||
if (code[0] & 0x2) {
|
||||
offset &= 0xffffff;
|
||||
emitLoadStoreType(i->dType, 0x33);
|
||||
if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
|
||||
emitCachingMode(i->cache, 0x2f);
|
||||
|
||||
@@ -1634,7 +1634,9 @@ CodeEmitterNV50::emitTEX(const TexInstruction *i)
|
||||
code[1] |= (i->tex.mask & 0xc) << 12;
|
||||
|
||||
if (i->tex.liveOnly)
|
||||
code[1] |= 4;
|
||||
code[1] |= 1 << 2;
|
||||
if (i->tex.derivAll)
|
||||
code[1] |= 1 << 3;
|
||||
|
||||
defId(i->def(0), 2);
|
||||
|
||||
|
||||
@@ -856,15 +856,17 @@ public:
|
||||
};
|
||||
std::vector<TextureView> textureViews;
|
||||
|
||||
/*
|
||||
struct Resource {
|
||||
uint8_t target; // TGSI_TEXTURE_*
|
||||
bool raw;
|
||||
uint8_t slot; // $surface index
|
||||
};
|
||||
std::vector<Resource> resources;
|
||||
*/
|
||||
|
||||
struct MemoryFile {
|
||||
bool shared;
|
||||
uint8_t mem_type; // TGSI_MEMORY_TYPE_*
|
||||
};
|
||||
std::vector<MemoryFile> memoryFiles;
|
||||
|
||||
@@ -1037,6 +1039,9 @@ void Source::scanProperty(const struct tgsi_full_property *prop)
|
||||
case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
|
||||
info->io.cullDistances = prop->u[0].Data;
|
||||
break;
|
||||
case TGSI_PROPERTY_NEXT_SHADER:
|
||||
/* Do not need to know the next shader stage. */
|
||||
break;
|
||||
default:
|
||||
INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
|
||||
break;
|
||||
@@ -1222,7 +1227,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
|
||||
break;
|
||||
case TGSI_FILE_MEMORY:
|
||||
for (i = first; i <= last; ++i)
|
||||
memoryFiles[i].shared = decl->Declaration.Shared;
|
||||
memoryFiles[i].mem_type = decl->Declaration.MemType;
|
||||
break;
|
||||
case TGSI_FILE_NULL:
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
@@ -1261,9 +1266,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
|
||||
info->numBarriers = 1;
|
||||
|
||||
if (insn.dstCount()) {
|
||||
if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) {
|
||||
Instruction::DstRegister dst = insn.getDst(0);
|
||||
Instruction::DstRegister dst = insn.getDst(0);
|
||||
|
||||
if (dst.getFile() == TGSI_FILE_OUTPUT) {
|
||||
if (dst.isIndirect(0))
|
||||
for (unsigned i = 0; i < info->numOutputs; ++i)
|
||||
info->out[i].mask = 0xf;
|
||||
@@ -1280,11 +1285,11 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
|
||||
if (isEdgeFlagPassthrough(insn))
|
||||
info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
|
||||
} else
|
||||
if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
|
||||
if (insn.getDst(0).isIndirect(0))
|
||||
indirectTempArrays.insert(insn.getDst(0).getArrayId());
|
||||
if (dst.getFile() == TGSI_FILE_TEMPORARY) {
|
||||
if (dst.isIndirect(0))
|
||||
indirectTempArrays.insert(dst.getArrayId());
|
||||
} else
|
||||
if (insn.getDst(0).getFile() == TGSI_FILE_BUFFER) {
|
||||
if (dst.getFile() == TGSI_FILE_BUFFER) {
|
||||
info->io.globalAccess |= 0x2;
|
||||
}
|
||||
}
|
||||
@@ -1419,8 +1424,8 @@ private:
|
||||
void handleLIT(Value *dst0[4]);
|
||||
void handleUserClipPlanes();
|
||||
|
||||
Symbol *getResourceBase(int r);
|
||||
void getResourceCoords(std::vector<Value *>&, int r, int s);
|
||||
// Symbol *getResourceBase(int r);
|
||||
// void getResourceCoords(std::vector<Value *>&, int r, int s);
|
||||
|
||||
void handleLOAD(Value *dst0[4]);
|
||||
void handleSTORE();
|
||||
@@ -1527,8 +1532,21 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
|
||||
|
||||
sym->reg.fileIndex = fileIdx;
|
||||
|
||||
if (tgsiFile == TGSI_FILE_MEMORY && code->memoryFiles[fileIdx].shared)
|
||||
sym->setFile(FILE_MEMORY_SHARED);
|
||||
if (tgsiFile == TGSI_FILE_MEMORY) {
|
||||
switch (code->memoryFiles[fileIdx].mem_type) {
|
||||
case TGSI_MEMORY_TYPE_SHARED:
|
||||
sym->setFile(FILE_MEMORY_SHARED);
|
||||
break;
|
||||
case TGSI_MEMORY_TYPE_INPUT:
|
||||
assert(prog->getType() == Program::TYPE_COMPUTE);
|
||||
assert(idx == -1);
|
||||
sym->setFile(FILE_SHADER_INPUT);
|
||||
address += info->prop.cp.inputOffset;
|
||||
break;
|
||||
default:
|
||||
assert(0); /* TODO: Add support for global and private memory */
|
||||
}
|
||||
}
|
||||
|
||||
if (idx >= 0) {
|
||||
if (sym->reg.file == FILE_SHADER_INPUT)
|
||||
@@ -1989,7 +2007,6 @@ Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask)
|
||||
void
|
||||
Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
|
||||
{
|
||||
Value *val;
|
||||
Value *arg[4], *src[8];
|
||||
Value *lod = NULL, *shd = NULL;
|
||||
unsigned int s, c, d;
|
||||
@@ -2032,17 +2049,6 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
|
||||
shd = src[n - 1];
|
||||
}
|
||||
|
||||
if (tgt.isCube()) {
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
|
||||
val = getScratch();
|
||||
mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
|
||||
mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
|
||||
mkOp1(OP_RCP, TYPE_F32, val, val);
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
|
||||
}
|
||||
|
||||
for (c = 0, d = 0; c < 4; ++c) {
|
||||
if (dst[c]) {
|
||||
texi->setDef(d++, dst[c]);
|
||||
@@ -2148,6 +2154,7 @@ Converter::handleLIT(Value *dst0[4])
|
||||
}
|
||||
}
|
||||
|
||||
/* Keep this around for now as reference when adding img support
|
||||
static inline bool
|
||||
isResourceSpecial(const int r)
|
||||
{
|
||||
@@ -2178,7 +2185,8 @@ Converter::getResourceBase(const int r)
|
||||
|
||||
switch (r) {
|
||||
case TGSI_RESOURCE_GLOBAL:
|
||||
sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15);
|
||||
sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL,
|
||||
info->io.auxCBSlot);
|
||||
break;
|
||||
case TGSI_RESOURCE_LOCAL:
|
||||
assert(prog->getType() == Program::TYPE_COMPUTE);
|
||||
@@ -2243,6 +2251,7 @@ partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask)
|
||||
}
|
||||
return n + 1;
|
||||
}
|
||||
*/
|
||||
|
||||
// For raw loads, granularity is 4 byte.
|
||||
// Usage of the texture read mask on OP_SULDP is not allowed.
|
||||
@@ -2253,8 +2262,9 @@ Converter::handleLOAD(Value *dst0[4])
|
||||
int c;
|
||||
std::vector<Value *> off, src, ldv, def;
|
||||
|
||||
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER ||
|
||||
tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) {
|
||||
switch (tgsi.getSrc(0).getFile()) {
|
||||
case TGSI_FILE_BUFFER:
|
||||
case TGSI_FILE_MEMORY:
|
||||
for (c = 0; c < 4; ++c) {
|
||||
if (!dst0[c])
|
||||
continue;
|
||||
@@ -2274,9 +2284,12 @@ Converter::handleLOAD(Value *dst0[4])
|
||||
if (tgsi.getSrc(0).isIndirect(0))
|
||||
ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
|
||||
}
|
||||
return;
|
||||
break;
|
||||
default:
|
||||
assert(!"Unsupported srcFile for LOAD");
|
||||
}
|
||||
|
||||
/* Keep this around for now as reference when adding img support
|
||||
getResourceCoords(off, r, 1);
|
||||
|
||||
if (isResourceRaw(code, r)) {
|
||||
@@ -2342,6 +2355,7 @@ Converter::handleLOAD(Value *dst0[4])
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
|
||||
if (dst0[c] != def[c])
|
||||
mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
|
||||
*/
|
||||
}
|
||||
|
||||
// For formatted stores, the write mask on OP_SUSTP can be used.
|
||||
@@ -2353,8 +2367,9 @@ Converter::handleSTORE()
|
||||
int c;
|
||||
std::vector<Value *> off, src, dummy;
|
||||
|
||||
if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER ||
|
||||
tgsi.getDst(0).getFile() == TGSI_FILE_MEMORY) {
|
||||
switch (tgsi.getDst(0).getFile()) {
|
||||
case TGSI_FILE_BUFFER:
|
||||
case TGSI_FILE_MEMORY:
|
||||
for (c = 0; c < 4; ++c) {
|
||||
if (!(tgsi.getDst(0).getMask() & (1 << c)))
|
||||
continue;
|
||||
@@ -2375,9 +2390,12 @@ Converter::handleSTORE()
|
||||
if (tgsi.getDst(0).isIndirect(0))
|
||||
st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0));
|
||||
}
|
||||
return;
|
||||
break;
|
||||
default:
|
||||
assert(!"Unsupported dstFile for STORE");
|
||||
}
|
||||
|
||||
/* Keep this around for now as reference when adding img support
|
||||
getResourceCoords(off, r, 0);
|
||||
src = off;
|
||||
const int s = src.size();
|
||||
@@ -2425,6 +2443,7 @@ Converter::handleSTORE()
|
||||
mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0,
|
||||
dummy, src)->tex.mask = tgsi.getDst(0).getMask();
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
// XXX: These only work on resources with the single-component u32/s32 formats.
|
||||
@@ -2439,8 +2458,9 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
|
||||
std::vector<Value *> defv;
|
||||
LValue *dst = getScratch();
|
||||
|
||||
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER ||
|
||||
tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) {
|
||||
switch (tgsi.getSrc(0).getFile()) {
|
||||
case TGSI_FILE_BUFFER:
|
||||
case TGSI_FILE_MEMORY:
|
||||
for (int c = 0; c < 4; ++c) {
|
||||
if (!dst0[c])
|
||||
continue;
|
||||
@@ -2468,10 +2488,12 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
|
||||
for (int c = 0; c < 4; ++c)
|
||||
if (dst0[c])
|
||||
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
|
||||
return;
|
||||
break;
|
||||
default:
|
||||
assert(!"Unsupported srcFile for ATOM");
|
||||
}
|
||||
|
||||
|
||||
/* Keep this around for now as reference when adding img support
|
||||
getResourceCoords(srcv, r, 1);
|
||||
|
||||
if (isResourceSpecial(r)) {
|
||||
@@ -2499,6 +2521,7 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
|
||||
for (int c = 0; c < 4; ++c)
|
||||
if (dst0[c])
|
||||
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
|
||||
*/
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@@ -67,6 +67,7 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
|
||||
tmp = bld.getScratch();
|
||||
|
||||
for (l = 0; l < 4; ++l) {
|
||||
Value *src[3], *val;
|
||||
// mov coordinates from lane l to all lanes
|
||||
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
|
||||
for (c = 0; c < dim; ++c) {
|
||||
@@ -92,10 +93,25 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
|
||||
add->lanes = 1; /* abused for .ndv */
|
||||
}
|
||||
|
||||
// normalize cube coordinates if necessary
|
||||
if (i->tex.target.isCube()) {
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
|
||||
val = bld.getScratch();
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
|
||||
bld.mkOp1(OP_RCP, TYPE_F32, val, val);
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
|
||||
} else {
|
||||
for (c = 0; c < dim; ++c)
|
||||
src[c] = crd[c];
|
||||
}
|
||||
|
||||
// texture
|
||||
bld.insert(tex = cloneForward(func, i));
|
||||
for (c = 0; c < dim; ++c)
|
||||
tex->setSrc(c + array, crd[c]);
|
||||
tex->setSrc(c + array, src[c]);
|
||||
bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
|
||||
|
||||
// save results
|
||||
|
||||
@@ -682,7 +682,7 @@ void NV50LoweringPreSSA::loadTexMsInfo(uint32_t off, Value **ms,
|
||||
Value **ms_x, Value **ms_y) {
|
||||
// This loads the texture-indexed ms setting from the constant buffer
|
||||
Value *tmp = new_LValue(func, FILE_GPR);
|
||||
uint8_t b = prog->driver->io.resInfoCBSlot;
|
||||
uint8_t b = prog->driver->io.auxCBSlot;
|
||||
off += prog->driver->io.suInfoBase;
|
||||
if (prog->getType() > Program::TYPE_VERTEX)
|
||||
off += 16 * 2 * 4;
|
||||
@@ -724,6 +724,23 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i)
|
||||
const int dref = arg;
|
||||
const int lod = i->tex.target.isShadow() ? (arg + 1) : arg;
|
||||
|
||||
/* Only normalize in the non-explicit derivatives case.
|
||||
*/
|
||||
if (i->tex.target.isCube() && i->op != OP_TXD) {
|
||||
Value *src[3], *val;
|
||||
int c;
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), i->getSrc(c));
|
||||
val = bld.getScratch();
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
|
||||
bld.mkOp1(OP_RCP, TYPE_F32, val, val);
|
||||
for (c = 0; c < 3; ++c) {
|
||||
i->setSrc(c, bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(),
|
||||
i->getSrc(c), val));
|
||||
}
|
||||
}
|
||||
|
||||
// handle MS, which means looking up the MS params for this texture, and
|
||||
// adjusting the input coordinates to point at the right sample.
|
||||
if (i->tex.target.isMS()) {
|
||||
@@ -934,12 +951,14 @@ NV50LoweringPreSSA::handleTXD(TexInstruction *i)
|
||||
|
||||
handleTEX(i);
|
||||
i->op = OP_TEX; // no need to clone dPdx/dPdy later
|
||||
i->tex.derivAll = true;
|
||||
|
||||
for (c = 0; c < dim; ++c)
|
||||
crd[c] = bld.getScratch();
|
||||
|
||||
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
|
||||
for (l = 0; l < 4; ++l) {
|
||||
Value *src[3], *val;
|
||||
// mov coordinates from lane l to all lanes
|
||||
for (c = 0; c < dim; ++c)
|
||||
bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero);
|
||||
@@ -949,10 +968,24 @@ NV50LoweringPreSSA::handleTXD(TexInstruction *i)
|
||||
// add dPdy from lane l to lanes dy
|
||||
for (c = 0; c < dim; ++c)
|
||||
bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
|
||||
// normalize cube coordinates if necessary
|
||||
if (i->tex.target.isCube()) {
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
|
||||
val = bld.getScratch();
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
|
||||
bld.mkOp1(OP_RCP, TYPE_F32, val, val);
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
|
||||
} else {
|
||||
for (c = 0; c < dim; ++c)
|
||||
src[c] = crd[c];
|
||||
}
|
||||
// texture
|
||||
bld.insert(tex = cloneForward(func, i));
|
||||
for (c = 0; c < dim; ++c)
|
||||
tex->setSrc(c, crd[c]);
|
||||
tex->setSrc(c, src[c]);
|
||||
// save results
|
||||
for (c = 0; i->defExists(c); ++c) {
|
||||
Instruction *mov;
|
||||
@@ -1174,7 +1207,7 @@ NV50LoweringPreSSA::handleRDSV(Instruction *i)
|
||||
bld.mkLoad(TYPE_F32,
|
||||
def,
|
||||
bld.mkSymbol(
|
||||
FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot,
|
||||
FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
|
||||
TYPE_U32, prog->driver->io.sampleInfoBase + 4 * idx),
|
||||
off);
|
||||
break;
|
||||
|
||||
@@ -600,7 +600,7 @@ NVC0LoweringPass::visit(BasicBlock *bb)
|
||||
inline Value *
|
||||
NVC0LoweringPass::loadTexHandle(Value *ptr, unsigned int slot)
|
||||
{
|
||||
uint8_t b = prog->driver->io.resInfoCBSlot;
|
||||
uint8_t b = prog->driver->io.auxCBSlot;
|
||||
uint32_t off = prog->driver->io.texBindBase + slot * 4;
|
||||
return bld.
|
||||
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
|
||||
@@ -615,6 +615,24 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
|
||||
const int lyr = arg - (i->tex.target.isMS() ? 2 : 1);
|
||||
const int chipset = prog->getTarget()->getChipset();
|
||||
|
||||
/* Only normalize in the non-explicit derivatives case. For explicit
|
||||
* derivatives, this is handled in handleManualTXD.
|
||||
*/
|
||||
if (i->tex.target.isCube() && i->dPdx[0].get() == NULL) {
|
||||
Value *src[3], *val;
|
||||
int c;
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), i->getSrc(c));
|
||||
val = bld.getScratch();
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
|
||||
bld.mkOp1(OP_RCP, TYPE_F32, val, val);
|
||||
for (c = 0; c < 3; ++c) {
|
||||
i->setSrc(c, bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(),
|
||||
i->getSrc(c), val));
|
||||
}
|
||||
}
|
||||
|
||||
// Arguments to the TEX instruction are a little insane. Even though the
|
||||
// encoding is identical between SM20 and SM30, the arguments mean
|
||||
// different things between Fermi and Kepler+. A lot of arguments are
|
||||
@@ -728,9 +746,13 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
|
||||
}
|
||||
|
||||
Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(lyr) : NULL;
|
||||
for (int s = dim; s >= 1; --s)
|
||||
i->setSrc(s, i->getSrc(s - 1));
|
||||
i->setSrc(0, arrayIndex);
|
||||
if (arrayIndex) {
|
||||
for (int s = dim; s >= 1; --s)
|
||||
i->setSrc(s, i->getSrc(s - 1));
|
||||
i->setSrc(0, arrayIndex);
|
||||
} else {
|
||||
i->moveSources(0, 1);
|
||||
}
|
||||
|
||||
if (arrayIndex) {
|
||||
int sat = (i->op == OP_TXF) ? 1 : 0;
|
||||
@@ -861,6 +883,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
|
||||
|
||||
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
|
||||
for (l = 0; l < 4; ++l) {
|
||||
Value *src[3], *val;
|
||||
// mov coordinates from lane l to all lanes
|
||||
for (c = 0; c < dim; ++c)
|
||||
bld.mkQuadop(0x00, crd[c], l, i->getSrc(c + array), zero);
|
||||
@@ -870,10 +893,24 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
|
||||
// add dPdy from lane l to lanes dy
|
||||
for (c = 0; c < dim; ++c)
|
||||
bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
|
||||
// normalize cube coordinates
|
||||
if (i->tex.target.isCube()) {
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
|
||||
val = bld.getScratch();
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
|
||||
bld.mkOp1(OP_RCP, TYPE_F32, val, val);
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
|
||||
} else {
|
||||
for (c = 0; c < dim; ++c)
|
||||
src[c] = crd[c];
|
||||
}
|
||||
// texture
|
||||
bld.insert(tex = cloneForward(func, i));
|
||||
for (c = 0; c < dim; ++c)
|
||||
tex->setSrc(c + array, crd[c]);
|
||||
tex->setSrc(c + array, src[c]);
|
||||
// save results
|
||||
for (c = 0; i->defExists(c); ++c) {
|
||||
Instruction *mov;
|
||||
@@ -1098,6 +1135,7 @@ NVC0LoweringPass::handleSharedATOM(Instruction *atom)
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
return;
|
||||
}
|
||||
|
||||
Instruction *i =
|
||||
@@ -1204,7 +1242,7 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
|
||||
inline Value *
|
||||
NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off)
|
||||
{
|
||||
uint8_t b = prog->driver->io.resInfoCBSlot;
|
||||
uint8_t b = prog->driver->io.auxCBSlot;
|
||||
off += prog->driver->io.suInfoBase;
|
||||
return bld.
|
||||
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
|
||||
@@ -1213,7 +1251,7 @@ NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off)
|
||||
inline Value *
|
||||
NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off)
|
||||
{
|
||||
uint8_t b = prog->driver->io.resInfoCBSlot;
|
||||
uint8_t b = prog->driver->io.auxCBSlot;
|
||||
off += prog->driver->io.suInfoBase;
|
||||
|
||||
if (ptr)
|
||||
@@ -1226,7 +1264,7 @@ NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off)
|
||||
inline Value *
|
||||
NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off)
|
||||
{
|
||||
uint8_t b = prog->driver->io.resInfoCBSlot;
|
||||
uint8_t b = prog->driver->io.auxCBSlot;
|
||||
off += prog->driver->io.suInfoBase;
|
||||
|
||||
if (ptr)
|
||||
@@ -1540,7 +1578,7 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
|
||||
call->indirect = 1;
|
||||
call->absolute = 1;
|
||||
call->setSrc(0, bld.mkSymbol(FILE_MEMORY_CONST,
|
||||
prog->driver->io.resInfoCBSlot, TYPE_U32,
|
||||
prog->driver->io.auxCBSlot, TYPE_U32,
|
||||
prog->driver->io.suInfoBase + base));
|
||||
call->setSrc(1, r[2]);
|
||||
call->setSrc(2, r[4]);
|
||||
@@ -1698,7 +1736,8 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
|
||||
}
|
||||
addr += prog->driver->prop.cp.gridInfoBase;
|
||||
bld.mkLoad(TYPE_U32, i->getDef(0),
|
||||
bld.mkSymbol(FILE_MEMORY_CONST, 0, TYPE_U32, addr), NULL);
|
||||
bld.mkSymbol(FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
|
||||
TYPE_U32, addr), NULL);
|
||||
break;
|
||||
case SV_SAMPLE_INDEX:
|
||||
// TODO: Properly pass source as an address in the PIX address space
|
||||
@@ -1715,7 +1754,7 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
|
||||
bld.mkLoad(TYPE_F32,
|
||||
i->getDef(0),
|
||||
bld.mkSymbol(
|
||||
FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot,
|
||||
FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
|
||||
TYPE_U32, prog->driver->io.sampleInfoBase +
|
||||
4 * sym->reg.data.sv.index),
|
||||
off);
|
||||
@@ -1780,7 +1819,7 @@ NVC0LoweringPass::handleSQRT(Instruction *i)
|
||||
{
|
||||
if (i->dType == TYPE_F64) {
|
||||
Value *pred = bld.getSSA(1, FILE_PREDICATE);
|
||||
Value *zero = bld.loadImm(NULL, 0.0d);
|
||||
Value *zero = bld.loadImm(NULL, 0.0);
|
||||
Value *dst = bld.getSSA(8);
|
||||
bld.mkOp1(OP_RSQ, i->dType, dst, i->getSrc(0));
|
||||
bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero);
|
||||
|
||||
@@ -204,6 +204,11 @@ static const char *ldstSubOpStr[] =
|
||||
"", "lock", "unlock"
|
||||
};
|
||||
|
||||
static const char *subfmOpStr[] =
|
||||
{
|
||||
"", "3d"
|
||||
};
|
||||
|
||||
static const char *DataTypeStr[] =
|
||||
{
|
||||
"-",
|
||||
@@ -548,6 +553,10 @@ void Instruction::print() const
|
||||
if (subOp < Elements(ldstSubOpStr))
|
||||
PRINT("%s ", ldstSubOpStr[subOp]);
|
||||
break;
|
||||
case OP_SUBFM:
|
||||
if (subOp < Elements(subfmOpStr))
|
||||
PRINT("%s ", subfmOpStr[subOp]);
|
||||
break;
|
||||
default:
|
||||
if (subOp)
|
||||
PRINT("(SUBOP:%u) ", subOp);
|
||||
|
||||
@@ -114,8 +114,6 @@ nouveau_codegen(int chipset, int type, struct tgsi_token tokens[],
|
||||
|
||||
info.io.auxCBSlot = 15;
|
||||
info.io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
|
||||
|
||||
info.io.resInfoCBSlot = 15;
|
||||
info.io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
|
||||
info.io.msInfoCBSlot = 15;
|
||||
info.io.msInfoBase = NV50_CB_AUX_MS_OFFSET;
|
||||
|
||||
@@ -67,122 +67,94 @@ nv50_screen_compute_setup(struct nv50_screen *screen,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
|
||||
BEGIN_NV04(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
|
||||
PUSH_DATA (push, screen->compute->handle);
|
||||
|
||||
BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1);
|
||||
BEGIN_NV04(push, NV50_CP(UNK02A0), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1);
|
||||
BEGIN_NV04(push, NV50_CP(DMA_STACK), 1);
|
||||
PUSH_DATA (push, fifo->vram);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2);
|
||||
BEGIN_NV04(push, NV50_CP(STACK_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, screen->stack_bo->offset);
|
||||
PUSH_DATA (push, screen->stack_bo->offset);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1);
|
||||
BEGIN_NV04(push, NV50_CP(STACK_SIZE_LOG), 1);
|
||||
PUSH_DATA (push, 4);
|
||||
|
||||
BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1);
|
||||
BEGIN_NV04(push, NV50_CP(UNK0290), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1);
|
||||
BEGIN_NV04(push, NV50_CP(LANES32_ENABLE), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1);
|
||||
BEGIN_NV04(push, NV50_CP(REG_MODE), 1);
|
||||
PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1);
|
||||
BEGIN_NV04(push, NV50_CP(UNK0384), 1);
|
||||
PUSH_DATA (push, 0x100);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1);
|
||||
BEGIN_NV04(push, NV50_CP(DMA_GLOBAL), 1);
|
||||
PUSH_DATA (push, fifo->vram);
|
||||
|
||||
for (i = 0; i < 15; i++) {
|
||||
BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2);
|
||||
BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(i)), 2);
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1);
|
||||
BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(i)), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1);
|
||||
BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(i)), 1);
|
||||
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
|
||||
}
|
||||
|
||||
BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2);
|
||||
BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(15)), 2);
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1);
|
||||
BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(15)), 1);
|
||||
PUSH_DATA (push, ~0);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1);
|
||||
BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(15)), 1);
|
||||
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
|
||||
|
||||
BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1);
|
||||
BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_LOG_ALLOC), 1);
|
||||
PUSH_DATA (push, 7);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1);
|
||||
BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_NO_CLAMP), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1);
|
||||
BEGIN_NV04(push, NV50_CP(STACK_WARPS_LOG_ALLOC), 1);
|
||||
PUSH_DATA (push, 7);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1);
|
||||
BEGIN_NV04(push, NV50_CP(STACK_WARPS_NO_CLAMP), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
|
||||
BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
|
||||
BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1);
|
||||
BEGIN_NV04(push, NV50_CP(DMA_TEXTURE), 1);
|
||||
PUSH_DATA (push, fifo->vram);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1);
|
||||
BEGIN_NV04(push, NV50_CP(TEX_LIMITS), 1);
|
||||
PUSH_DATA (push, 0x54);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1);
|
||||
BEGIN_NV04(push, NV50_CP(LINKED_TSC), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
|
||||
BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1);
|
||||
BEGIN_NV04(push, NV50_CP(DMA_TIC), 1);
|
||||
PUSH_DATA (push, fifo->vram);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3);
|
||||
BEGIN_NV04(push, NV50_CP(TIC_ADDRESS_HIGH), 3);
|
||||
PUSH_DATAh(push, screen->txc->offset);
|
||||
PUSH_DATA (push, screen->txc->offset);
|
||||
PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
|
||||
|
||||
BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1);
|
||||
BEGIN_NV04(push, NV50_CP(DMA_TSC), 1);
|
||||
PUSH_DATA (push, fifo->vram);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3);
|
||||
BEGIN_NV04(push, NV50_CP(TSC_ADDRESS_HIGH), 3);
|
||||
PUSH_DATAh(push, screen->txc->offset + 65536);
|
||||
PUSH_DATA (push, screen->txc->offset + 65536);
|
||||
PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
|
||||
|
||||
BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1);
|
||||
BEGIN_NV04(push, NV50_CP(DMA_CODE_CB), 1);
|
||||
PUSH_DATA (push, fifo->vram);
|
||||
|
||||
BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1);
|
||||
BEGIN_NV04(push, NV50_CP(DMA_LOCAL), 1);
|
||||
PUSH_DATA (push, fifo->vram);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2);
|
||||
BEGIN_NV04(push, NV50_CP(LOCAL_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, screen->tls_bo->offset + 65536);
|
||||
PUSH_DATA (push, screen->tls_bo->offset + 65536);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1);
|
||||
BEGIN_NV04(push, NV50_CP(LOCAL_SIZE_LOG), 1);
|
||||
PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
nv50_compute_validate_program(struct nv50_context *nv50)
|
||||
{
|
||||
struct nv50_program *prog = nv50->compprog;
|
||||
|
||||
if (prog->mem)
|
||||
return true;
|
||||
|
||||
if (!prog->translated) {
|
||||
prog->translated = nv50_program_translate(
|
||||
prog, nv50->screen->base.device->chipset, &nv50->base.debug);
|
||||
if (!prog->translated)
|
||||
return false;
|
||||
}
|
||||
if (unlikely(!prog->code_size))
|
||||
return false;
|
||||
|
||||
if (likely(prog->code_size)) {
|
||||
if (nv50_program_upload_code(nv50, prog)) {
|
||||
struct nouveau_pushbuf *push = nv50->base.pushbuf;
|
||||
BEGIN_NV04(push, NV50_COMPUTE(CODE_CB_FLUSH), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_compute_validate_globals(struct nv50_context *nv50)
|
||||
{
|
||||
@@ -198,26 +170,25 @@ nv50_compute_validate_globals(struct nv50_context *nv50)
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
nv50_compute_state_validate(struct nv50_context *nv50)
|
||||
{
|
||||
if (!nv50_compute_validate_program(nv50))
|
||||
return false;
|
||||
static struct nv50_state_validate
|
||||
validate_list_cp[] = {
|
||||
{ nv50_compprog_validate, NV50_NEW_CP_PROGRAM },
|
||||
{ nv50_compute_validate_globals, NV50_NEW_CP_GLOBALS },
|
||||
};
|
||||
|
||||
if (nv50->dirty_cp & NV50_NEW_CP_GLOBALS)
|
||||
nv50_compute_validate_globals(nv50);
|
||||
static bool
|
||||
nv50_state_validate_cp(struct nv50_context *nv50, uint32_t mask)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
/* TODO: validate textures, samplers, surfaces */
|
||||
ret = nv50_state_validate(nv50, mask, validate_list_cp,
|
||||
ARRAY_SIZE(validate_list_cp), &nv50->dirty_cp,
|
||||
nv50->bufctx_cp);
|
||||
|
||||
nv50_bufctx_fence(nv50->bufctx_cp, false);
|
||||
|
||||
nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_cp);
|
||||
if (unlikely(nouveau_pushbuf_validate(nv50->base.pushbuf)))
|
||||
return false;
|
||||
if (unlikely(nv50->state.flushed))
|
||||
nv50_bufctx_fence(nv50->bufctx_cp, true);
|
||||
|
||||
return true;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -227,7 +198,7 @@ nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
|
||||
struct nouveau_pushbuf *push = screen->base.pushbuf;
|
||||
unsigned size = align(nv50->compprog->parm_size, 0x4);
|
||||
|
||||
BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
|
||||
BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1);
|
||||
PUSH_DATA (push, (size / 4) << 8);
|
||||
|
||||
if (size) {
|
||||
@@ -245,7 +216,7 @@ nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
|
||||
nouveau_pushbuf_bufctx(push, nv50->bufctx);
|
||||
nouveau_pushbuf_validate(push);
|
||||
|
||||
BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM(0)), size / 4);
|
||||
BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), size / 4);
|
||||
nouveau_pushbuf_data(push, bo, offset, size);
|
||||
|
||||
nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm);
|
||||
@@ -278,7 +249,7 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
|
||||
struct nv50_program *cp = nv50->compprog;
|
||||
bool ret;
|
||||
|
||||
ret = !nv50_compute_state_validate(nv50);
|
||||
ret = !nv50_state_validate_cp(nv50, ~0);
|
||||
if (ret) {
|
||||
NOUVEAU_ERR("Failed to launch grid !\n");
|
||||
return;
|
||||
@@ -286,33 +257,33 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
|
||||
|
||||
nv50_compute_upload_input(nv50, info->input);
|
||||
|
||||
BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1);
|
||||
BEGIN_NV04(push, NV50_CP(CP_START_ID), 1);
|
||||
PUSH_DATA (push, nv50_compute_find_symbol(nv50, info->pc));
|
||||
|
||||
BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1);
|
||||
BEGIN_NV04(push, NV50_CP(SHARED_SIZE), 1);
|
||||
PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40));
|
||||
BEGIN_NV04(push, NV50_COMPUTE(CP_REG_ALLOC_TEMP), 1);
|
||||
BEGIN_NV04(push, NV50_CP(CP_REG_ALLOC_TEMP), 1);
|
||||
PUSH_DATA (push, cp->max_gpr);
|
||||
|
||||
/* grid/block setup */
|
||||
BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2);
|
||||
BEGIN_NV04(push, NV50_CP(BLOCKDIM_XY), 2);
|
||||
PUSH_DATA (push, info->block[1] << 16 | info->block[0]);
|
||||
PUSH_DATA (push, info->block[2]);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1);
|
||||
BEGIN_NV04(push, NV50_CP(BLOCK_ALLOC), 1);
|
||||
PUSH_DATA (push, 1 << 16 | block_size);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1);
|
||||
BEGIN_NV04(push, NV50_CP(BLOCKDIM_LATCH), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1);
|
||||
BEGIN_NV04(push, NV50_CP(GRIDDIM), 1);
|
||||
PUSH_DATA (push, info->grid[1] << 16 | info->grid[0]);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1);
|
||||
BEGIN_NV04(push, NV50_CP(GRIDID), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
|
||||
/* kernel launching */
|
||||
BEGIN_NV04(push, NV50_COMPUTE(LAUNCH), 1);
|
||||
BEGIN_NV04(push, NV50_CP(LAUNCH), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
|
||||
BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
|
||||
/* bind a compute shader clobbers fragment shader state */
|
||||
nv50->dirty |= NV50_NEW_FRAGPROG;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG;
|
||||
}
|
||||
|
||||
@@ -176,8 +176,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
|
||||
for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) {
|
||||
if (nv50->framebuffer.cbufs[i] &&
|
||||
nv50->framebuffer.cbufs[i]->texture == res) {
|
||||
nv50->dirty |= NV50_NEW_FRAMEBUFFER;
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
|
||||
nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER;
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
|
||||
if (!--ref)
|
||||
return ref;
|
||||
}
|
||||
@@ -186,8 +186,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
|
||||
if (bind & PIPE_BIND_DEPTH_STENCIL) {
|
||||
if (nv50->framebuffer.zsbuf &&
|
||||
nv50->framebuffer.zsbuf->texture == res) {
|
||||
nv50->dirty |= NV50_NEW_FRAMEBUFFER;
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
|
||||
nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER;
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
|
||||
if (!--ref)
|
||||
return ref;
|
||||
}
|
||||
@@ -202,8 +202,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
|
||||
assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
|
||||
for (i = 0; i < nv50->num_vtxbufs; ++i) {
|
||||
if (nv50->vtxbuf[i].buffer == res) {
|
||||
nv50->dirty |= NV50_NEW_ARRAYS;
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX);
|
||||
nv50->dirty_3d |= NV50_NEW_3D_ARRAYS;
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_VERTEX);
|
||||
if (!--ref)
|
||||
return ref;
|
||||
}
|
||||
@@ -211,8 +211,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
|
||||
|
||||
if (nv50->idxbuf.buffer == res) {
|
||||
/* Just rebind to the bufctx as there is no separate dirty bit */
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX);
|
||||
BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(res), RD);
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_INDEX);
|
||||
BCTX_REFN(nv50->bufctx_3d, 3D_INDEX, nv04_resource(res), RD);
|
||||
if (!--ref)
|
||||
return ref;
|
||||
}
|
||||
@@ -222,8 +222,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
|
||||
for (i = 0; i < nv50->num_textures[s]; ++i) {
|
||||
if (nv50->textures[s][i] &&
|
||||
nv50->textures[s][i]->texture == res) {
|
||||
nv50->dirty |= NV50_NEW_TEXTURES;
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
|
||||
nv50->dirty_3d |= NV50_NEW_3D_TEXTURES;
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
|
||||
if (!--ref)
|
||||
return ref;
|
||||
}
|
||||
@@ -236,9 +236,9 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
|
||||
continue;
|
||||
if (!nv50->constbuf[s][i].user &&
|
||||
nv50->constbuf[s][i].u.buf == res) {
|
||||
nv50->dirty |= NV50_NEW_CONSTBUF;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF;
|
||||
nv50->constbuf_dirty[s] |= 1 << i;
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i));
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i));
|
||||
if (!--ref)
|
||||
return ref;
|
||||
}
|
||||
@@ -345,10 +345,10 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
|
||||
|
||||
flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
|
||||
|
||||
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->code);
|
||||
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->uniforms);
|
||||
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->txc);
|
||||
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->stack_bo);
|
||||
BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->code);
|
||||
BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->uniforms);
|
||||
BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->txc);
|
||||
BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->stack_bo);
|
||||
if (screen->compute) {
|
||||
BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->code);
|
||||
BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->txc);
|
||||
@@ -357,7 +357,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
|
||||
|
||||
flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
|
||||
|
||||
BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->fence.bo);
|
||||
BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->fence.bo);
|
||||
BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo);
|
||||
if (screen->compute)
|
||||
BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->fence.bo);
|
||||
|
||||
@@ -26,43 +26,43 @@
|
||||
#include "nv50/nv50_3d.xml.h"
|
||||
#include "nv50/nv50_2d.xml.h"
|
||||
|
||||
#define NV50_NEW_BLEND (1 << 0)
|
||||
#define NV50_NEW_RASTERIZER (1 << 1)
|
||||
#define NV50_NEW_ZSA (1 << 2)
|
||||
#define NV50_NEW_VERTPROG (1 << 3)
|
||||
#define NV50_NEW_GMTYPROG (1 << 6)
|
||||
#define NV50_NEW_FRAGPROG (1 << 7)
|
||||
#define NV50_NEW_BLEND_COLOUR (1 << 8)
|
||||
#define NV50_NEW_STENCIL_REF (1 << 9)
|
||||
#define NV50_NEW_CLIP (1 << 10)
|
||||
#define NV50_NEW_SAMPLE_MASK (1 << 11)
|
||||
#define NV50_NEW_FRAMEBUFFER (1 << 12)
|
||||
#define NV50_NEW_STIPPLE (1 << 13)
|
||||
#define NV50_NEW_SCISSOR (1 << 14)
|
||||
#define NV50_NEW_VIEWPORT (1 << 15)
|
||||
#define NV50_NEW_ARRAYS (1 << 16)
|
||||
#define NV50_NEW_VERTEX (1 << 17)
|
||||
#define NV50_NEW_CONSTBUF (1 << 18)
|
||||
#define NV50_NEW_TEXTURES (1 << 19)
|
||||
#define NV50_NEW_SAMPLERS (1 << 20)
|
||||
#define NV50_NEW_STRMOUT (1 << 21)
|
||||
#define NV50_NEW_MIN_SAMPLES (1 << 22)
|
||||
#define NV50_NEW_CONTEXT (1 << 31)
|
||||
#define NV50_NEW_3D_BLEND (1 << 0)
|
||||
#define NV50_NEW_3D_RASTERIZER (1 << 1)
|
||||
#define NV50_NEW_3D_ZSA (1 << 2)
|
||||
#define NV50_NEW_3D_VERTPROG (1 << 3)
|
||||
#define NV50_NEW_3D_GMTYPROG (1 << 6)
|
||||
#define NV50_NEW_3D_FRAGPROG (1 << 7)
|
||||
#define NV50_NEW_3D_BLEND_COLOUR (1 << 8)
|
||||
#define NV50_NEW_3D_STENCIL_REF (1 << 9)
|
||||
#define NV50_NEW_3D_CLIP (1 << 10)
|
||||
#define NV50_NEW_3D_SAMPLE_MASK (1 << 11)
|
||||
#define NV50_NEW_3D_FRAMEBUFFER (1 << 12)
|
||||
#define NV50_NEW_3D_STIPPLE (1 << 13)
|
||||
#define NV50_NEW_3D_SCISSOR (1 << 14)
|
||||
#define NV50_NEW_3D_VIEWPORT (1 << 15)
|
||||
#define NV50_NEW_3D_ARRAYS (1 << 16)
|
||||
#define NV50_NEW_3D_VERTEX (1 << 17)
|
||||
#define NV50_NEW_3D_CONSTBUF (1 << 18)
|
||||
#define NV50_NEW_3D_TEXTURES (1 << 19)
|
||||
#define NV50_NEW_3D_SAMPLERS (1 << 20)
|
||||
#define NV50_NEW_3D_STRMOUT (1 << 21)
|
||||
#define NV50_NEW_3D_MIN_SAMPLES (1 << 22)
|
||||
#define NV50_NEW_3D_CONTEXT (1 << 31)
|
||||
|
||||
#define NV50_NEW_CP_PROGRAM (1 << 0)
|
||||
#define NV50_NEW_CP_GLOBALS (1 << 1)
|
||||
|
||||
/* 3d bufctx (during draw_vbo, blit_3d) */
|
||||
#define NV50_BIND_FB 0
|
||||
#define NV50_BIND_VERTEX 1
|
||||
#define NV50_BIND_VERTEX_TMP 2
|
||||
#define NV50_BIND_INDEX 3
|
||||
#define NV50_BIND_TEXTURES 4
|
||||
#define NV50_BIND_CB(s, i) (5 + 16 * (s) + (i))
|
||||
#define NV50_BIND_SO 53
|
||||
#define NV50_BIND_SCREEN 54
|
||||
#define NV50_BIND_TLS 55
|
||||
#define NV50_BIND_3D_COUNT 56
|
||||
#define NV50_BIND_3D_FB 0
|
||||
#define NV50_BIND_3D_VERTEX 1
|
||||
#define NV50_BIND_3D_VERTEX_TMP 2
|
||||
#define NV50_BIND_3D_INDEX 3
|
||||
#define NV50_BIND_3D_TEXTURES 4
|
||||
#define NV50_BIND_3D_CB(s, i) (5 + 16 * (s) + (i))
|
||||
#define NV50_BIND_3D_SO 53
|
||||
#define NV50_BIND_3D_SCREEN 54
|
||||
#define NV50_BIND_3D_TLS 55
|
||||
#define NV50_BIND_3D_COUNT 56
|
||||
|
||||
/* compute bufctx (during launch_grid) */
|
||||
#define NV50_BIND_CP_GLOBAL 0
|
||||
@@ -115,7 +115,7 @@ struct nv50_context {
|
||||
struct nouveau_bufctx *bufctx;
|
||||
struct nouveau_bufctx *bufctx_cp;
|
||||
|
||||
uint32_t dirty;
|
||||
uint32_t dirty_3d; /* dirty flags for 3d state */
|
||||
uint32_t dirty_cp; /* dirty flags for compute state */
|
||||
bool cb_dirty;
|
||||
|
||||
@@ -221,6 +221,7 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
|
||||
void nv50_vertprog_validate(struct nv50_context *);
|
||||
void nv50_gmtyprog_validate(struct nv50_context *);
|
||||
void nv50_fragprog_validate(struct nv50_context *);
|
||||
void nv50_compprog_validate(struct nv50_context *);
|
||||
void nv50_fp_linkage_validate(struct nv50_context *);
|
||||
void nv50_gp_linkage_validate(struct nv50_context *);
|
||||
void nv50_constbufs_validate(struct nv50_context *);
|
||||
@@ -231,7 +232,15 @@ void nv50_stream_output_validate(struct nv50_context *);
|
||||
extern void nv50_init_state_functions(struct nv50_context *);
|
||||
|
||||
/* nv50_state_validate.c */
|
||||
bool nv50_state_validate(struct nv50_context *, uint32_t state_mask);
|
||||
struct nv50_state_validate {
|
||||
void (*func)(struct nv50_context *);
|
||||
uint32_t states;
|
||||
};
|
||||
|
||||
bool nv50_state_validate(struct nv50_context *, uint32_t,
|
||||
struct nv50_state_validate *, int, uint32_t *,
|
||||
struct nouveau_bufctx *);
|
||||
bool nv50_state_validate_3d(struct nv50_context *, uint32_t);
|
||||
|
||||
/* nv50_surface.c */
|
||||
extern void nv50_clear(struct pipe_context *, unsigned buffers,
|
||||
|
||||
@@ -335,7 +335,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
|
||||
info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
|
||||
info->io.genUserClip = prog->vp.clpd_nr;
|
||||
|
||||
info->io.resInfoCBSlot = 15;
|
||||
info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
|
||||
info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET;
|
||||
info->io.msInfoCBSlot = 15;
|
||||
|
||||
@@ -202,10 +202,10 @@ nv50_hw_sm_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
|
||||
func = nv50_hw_sm_get_func(c);
|
||||
|
||||
/* configure and reset the counter(s) */
|
||||
BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1);
|
||||
BEGIN_NV04(push, NV50_CP(MP_PM_CONTROL(c)), 1);
|
||||
PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8)
|
||||
| cfg->ctr[i].unit | cfg->ctr[i].mode);
|
||||
BEGIN_NV04(push, NV50_COMPUTE(MP_PM_SET(c)), 1);
|
||||
BEGIN_NV04(push, NV50_CP(MP_PM_SET(c)), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
}
|
||||
return true;
|
||||
@@ -240,7 +240,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
|
||||
PUSH_SPACE(push, 8);
|
||||
for (c = 0; c < 4; c++) {
|
||||
if (screen->pm.mp_counter[c]) {
|
||||
BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1);
|
||||
BEGIN_NV04(push, NV50_CP(MP_PM_CONTROL(c)), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
}
|
||||
}
|
||||
@@ -257,7 +257,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
|
||||
hq->bo);
|
||||
|
||||
PUSH_SPACE(push, 2);
|
||||
BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
|
||||
BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
|
||||
pipe->bind_compute_state(pipe, screen->pm.prog);
|
||||
@@ -295,7 +295,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
|
||||
mask |= 1 << hsq->ctr[i];
|
||||
func = nv50_hw_sm_get_func(hsq->ctr[i]);
|
||||
|
||||
BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(hsq->ctr[i])), 1);
|
||||
BEGIN_NV04(push, NV50_CP(MP_PM_CONTROL(hsq->ctr[i])), 1);
|
||||
PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8)
|
||||
| cfg->ctr[i].unit | cfg->ctr[i].mode);
|
||||
}
|
||||
|
||||
@@ -29,6 +29,8 @@
|
||||
#include "nv50/nv50_context.h"
|
||||
#include "nv50/nv50_query_hw.h"
|
||||
|
||||
#include "nv50/nv50_compute.xml.h"
|
||||
|
||||
void
|
||||
nv50_constbufs_validate(struct nv50_context *nv50)
|
||||
{
|
||||
@@ -94,7 +96,7 @@ nv50_constbufs_validate(struct nv50_context *nv50)
|
||||
BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
|
||||
PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
|
||||
|
||||
BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD);
|
||||
BCTX_REFN(nv50->bufctx_3d, 3D_CB(s, i), res, RD);
|
||||
|
||||
nv50->cb_dirty = 1; /* Force cache flush for UBO. */
|
||||
} else {
|
||||
@@ -131,14 +133,14 @@ nv50_program_update_context_state(struct nv50_context *nv50,
|
||||
|
||||
if (prog && prog->tls_space) {
|
||||
if (nv50->state.new_tls_space)
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS);
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TLS);
|
||||
if (!nv50->state.tls_required || nv50->state.new_tls_space)
|
||||
BCTX_REFN_bo(nv50->bufctx_3d, TLS, flags, nv50->screen->tls_bo);
|
||||
BCTX_REFN_bo(nv50->bufctx_3d, 3D_TLS, flags, nv50->screen->tls_bo);
|
||||
nv50->state.new_tls_space = false;
|
||||
nv50->state.tls_required |= 1 << stage;
|
||||
} else {
|
||||
if (nv50->state.tls_required == (1 << stage))
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS);
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TLS);
|
||||
nv50->state.tls_required &= ~(1 << stage);
|
||||
}
|
||||
}
|
||||
@@ -181,7 +183,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
|
||||
fp->fp.force_persample_interp = rast->force_persample_interp;
|
||||
}
|
||||
|
||||
if (fp->mem && !(nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_MIN_SAMPLES)))
|
||||
if (fp->mem && !(nv50->dirty_3d & (NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_MIN_SAMPLES)))
|
||||
return;
|
||||
|
||||
if (!nv50_program_validate(nv50, fp))
|
||||
@@ -238,6 +240,19 @@ nv50_gmtyprog_validate(struct nv50_context *nv50)
|
||||
/* GP_ENABLE is updated in linkage validation */
|
||||
}
|
||||
|
||||
void
|
||||
nv50_compprog_validate(struct nv50_context *nv50)
|
||||
{
|
||||
struct nouveau_pushbuf *push = nv50->base.pushbuf;
|
||||
struct nv50_program *cp = nv50->compprog;
|
||||
|
||||
if (cp && !nv50_program_validate(nv50, cp))
|
||||
return;
|
||||
|
||||
BEGIN_NV04(push, NV50_CP(CODE_CB_FLUSH), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_sprite_coords_validate(struct nv50_context *nv50)
|
||||
{
|
||||
@@ -309,7 +324,7 @@ nv50_validate_derived_rs(struct nv50_context *nv50)
|
||||
PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard);
|
||||
}
|
||||
|
||||
if (nv50->dirty & NV50_NEW_FRAGPROG)
|
||||
if (nv50->dirty_3d & NV50_NEW_3D_FRAGPROG)
|
||||
return;
|
||||
psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
|
||||
color = nv50->state.semantic_color & ~NV50_3D_SEMANTIC_COLOR_CLMP_EN;
|
||||
@@ -378,9 +393,9 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
|
||||
uint8_t map[64];
|
||||
uint8_t so_map[64];
|
||||
|
||||
if (!(nv50->dirty & (NV50_NEW_VERTPROG |
|
||||
NV50_NEW_FRAGPROG |
|
||||
NV50_NEW_GMTYPROG))) {
|
||||
if (!(nv50->dirty_3d & (NV50_NEW_3D_VERTPROG |
|
||||
NV50_NEW_3D_FRAGPROG |
|
||||
NV50_NEW_3D_GMTYPROG))) {
|
||||
uint8_t bfc, ffc;
|
||||
ffc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK);
|
||||
bfc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK)
|
||||
@@ -633,8 +648,6 @@ nv50_stream_output_validate(struct nv50_context *nv50)
|
||||
BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1);
|
||||
PUSH_DATA (push, ctrl);
|
||||
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO);
|
||||
|
||||
for (i = 0; i < nv50->num_so_targets; ++i) {
|
||||
struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]);
|
||||
struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
|
||||
@@ -664,7 +677,7 @@ nv50_stream_output_validate(struct nv50_context *nv50)
|
||||
prims = MIN2(prims, limit);
|
||||
}
|
||||
targ->stride = so->stride[i];
|
||||
BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
|
||||
BCTX_REFN(nv50->bufctx_3d, 3D_SO, buf, WR);
|
||||
}
|
||||
if (prims != ~0) {
|
||||
BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
|
||||
|
||||
@@ -200,7 +200,7 @@ nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso)
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
|
||||
nv50->blend = hwcso;
|
||||
nv50->dirty |= NV50_NEW_BLEND;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_BLEND;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -337,7 +337,7 @@ nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
|
||||
nv50->rast = hwcso;
|
||||
nv50->dirty |= NV50_NEW_RASTERIZER;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_RASTERIZER;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -426,7 +426,7 @@ nv50_zsa_state_bind(struct pipe_context *pipe, void *hwcso)
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
|
||||
nv50->zsa = hwcso;
|
||||
nv50->dirty |= NV50_NEW_ZSA;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_ZSA;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -605,7 +605,7 @@ nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s,
|
||||
|
||||
nv50->num_samplers[s] = nr;
|
||||
|
||||
nv50->dirty |= NV50_NEW_SAMPLERS;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -698,9 +698,9 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, int s,
|
||||
|
||||
nv50->num_textures[s] = nr;
|
||||
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
|
||||
|
||||
nv50->dirty |= NV50_NEW_TEXTURES;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_TEXTURES;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -776,7 +776,7 @@ nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso)
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
|
||||
nv50->vertprog = hwcso;
|
||||
nv50->dirty |= NV50_NEW_VERTPROG;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_VERTPROG;
|
||||
}
|
||||
|
||||
static void *
|
||||
@@ -792,7 +792,7 @@ nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso)
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
|
||||
nv50->fragprog = hwcso;
|
||||
nv50->dirty |= NV50_NEW_FRAGPROG;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG;
|
||||
}
|
||||
|
||||
static void *
|
||||
@@ -808,7 +808,7 @@ nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso)
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
|
||||
nv50->gmtyprog = hwcso;
|
||||
nv50->dirty |= NV50_NEW_GMTYPROG;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_GMTYPROG;
|
||||
}
|
||||
|
||||
static void *
|
||||
@@ -857,7 +857,7 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
|
||||
nv50->constbuf[s][i].u.buf = NULL;
|
||||
else
|
||||
if (nv50->constbuf[s][i].u.buf)
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i));
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i));
|
||||
|
||||
pipe_resource_reference(&nv50->constbuf[s][i].u.buf, res);
|
||||
|
||||
@@ -882,7 +882,7 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
|
||||
}
|
||||
nv50->constbuf_dirty[s] |= 1 << i;
|
||||
|
||||
nv50->dirty |= NV50_NEW_CONSTBUF;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF;
|
||||
}
|
||||
|
||||
/* =============================================================================
|
||||
@@ -895,7 +895,7 @@ nv50_set_blend_color(struct pipe_context *pipe,
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
|
||||
nv50->blend_colour = *bcol;
|
||||
nv50->dirty |= NV50_NEW_BLEND_COLOUR;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_BLEND_COLOUR;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -905,7 +905,7 @@ nv50_set_stencil_ref(struct pipe_context *pipe,
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
|
||||
nv50->stencil_ref = *sr;
|
||||
nv50->dirty |= NV50_NEW_STENCIL_REF;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_STENCIL_REF;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -916,7 +916,7 @@ nv50_set_clip_state(struct pipe_context *pipe,
|
||||
|
||||
memcpy(nv50->clip.ucp, clip->ucp, sizeof(clip->ucp));
|
||||
|
||||
nv50->dirty |= NV50_NEW_CLIP;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_CLIP;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -925,7 +925,7 @@ nv50_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
|
||||
nv50->sample_mask = sample_mask;
|
||||
nv50->dirty |= NV50_NEW_SAMPLE_MASK;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_SAMPLE_MASK;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -935,7 +935,7 @@ nv50_set_min_samples(struct pipe_context *pipe, unsigned min_samples)
|
||||
|
||||
if (nv50->min_samples != min_samples) {
|
||||
nv50->min_samples = min_samples;
|
||||
nv50->dirty |= NV50_NEW_MIN_SAMPLES;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_MIN_SAMPLES;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -945,11 +945,11 @@ nv50_set_framebuffer_state(struct pipe_context *pipe,
|
||||
{
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
|
||||
|
||||
util_copy_framebuffer_state(&nv50->framebuffer, fb);
|
||||
|
||||
nv50->dirty |= NV50_NEW_FRAMEBUFFER;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -959,7 +959,7 @@ nv50_set_polygon_stipple(struct pipe_context *pipe,
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
|
||||
nv50->stipple = *stipple;
|
||||
nv50->dirty |= NV50_NEW_STIPPLE;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_STIPPLE;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -977,7 +977,7 @@ nv50_set_scissor_states(struct pipe_context *pipe,
|
||||
continue;
|
||||
nv50->scissors[start_slot + i] = scissor[i];
|
||||
nv50->scissors_dirty |= 1 << (start_slot + i);
|
||||
nv50->dirty |= NV50_NEW_SCISSOR;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_SCISSOR;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -996,7 +996,7 @@ nv50_set_viewport_states(struct pipe_context *pipe,
|
||||
continue;
|
||||
nv50->viewports[start_slot + i] = vpt[i];
|
||||
nv50->viewports_dirty |= 1 << (start_slot + i);
|
||||
nv50->dirty |= NV50_NEW_VIEWPORT;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_VIEWPORT;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1008,8 +1008,8 @@ nv50_set_vertex_buffers(struct pipe_context *pipe,
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
unsigned i;
|
||||
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX);
|
||||
nv50->dirty |= NV50_NEW_ARRAYS;
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_VERTEX);
|
||||
nv50->dirty_3d |= NV50_NEW_3D_ARRAYS;
|
||||
|
||||
util_set_vertex_buffers_count(nv50->vtxbuf, &nv50->num_vtxbufs, vb,
|
||||
start_slot, count);
|
||||
@@ -1051,14 +1051,14 @@ nv50_set_index_buffer(struct pipe_context *pipe,
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
|
||||
if (nv50->idxbuf.buffer)
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX);
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_INDEX);
|
||||
|
||||
if (ib) {
|
||||
pipe_resource_reference(&nv50->idxbuf.buffer, ib->buffer);
|
||||
nv50->idxbuf.index_size = ib->index_size;
|
||||
if (ib->buffer) {
|
||||
nv50->idxbuf.offset = ib->offset;
|
||||
BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(ib->buffer), RD);
|
||||
BCTX_REFN(nv50->bufctx_3d, 3D_INDEX, nv04_resource(ib->buffer), RD);
|
||||
} else {
|
||||
nv50->idxbuf.user_buffer = ib->user_buffer;
|
||||
}
|
||||
@@ -1073,7 +1073,7 @@ nv50_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
|
||||
nv50->vertex = hwcso;
|
||||
nv50->dirty |= NV50_NEW_VERTEX;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_VERTEX;
|
||||
}
|
||||
|
||||
static struct pipe_stream_output_target *
|
||||
@@ -1180,8 +1180,10 @@ nv50_set_stream_output_targets(struct pipe_context *pipe,
|
||||
}
|
||||
nv50->num_so_targets = num_targets;
|
||||
|
||||
if (nv50->so_targets_dirty)
|
||||
nv50->dirty |= NV50_NEW_STRMOUT;
|
||||
if (nv50->so_targets_dirty) {
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_SO);
|
||||
nv50->dirty_3d |= NV50_NEW_3D_STRMOUT;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
||||
@@ -25,7 +25,7 @@ nv50_validate_fb(struct nv50_context *nv50)
|
||||
unsigned ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1;
|
||||
uint32_t array_size = 0xffff, array_mode = 0;
|
||||
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
|
||||
|
||||
BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
|
||||
PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs);
|
||||
@@ -90,7 +90,7 @@ nv50_validate_fb(struct nv50_context *nv50)
|
||||
mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
|
||||
|
||||
/* only register for writing, otherwise we'd always serialize here */
|
||||
BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR);
|
||||
BCTX_REFN(nv50->bufctx_3d, 3D_FB, &mt->base, WR);
|
||||
}
|
||||
|
||||
if (fb->zsbuf) {
|
||||
@@ -118,7 +118,7 @@ nv50_validate_fb(struct nv50_context *nv50)
|
||||
mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
|
||||
mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
|
||||
|
||||
BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR);
|
||||
BCTX_REFN(nv50->bufctx_3d, 3D_FB, &mt->base, WR);
|
||||
} else {
|
||||
BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
@@ -187,8 +187,8 @@ nv50_validate_scissor(struct nv50_context *nv50)
|
||||
#ifdef NV50_SCISSORS_CLIPPING
|
||||
int minx, maxx, miny, maxy, i;
|
||||
|
||||
if (!(nv50->dirty &
|
||||
(NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT | NV50_NEW_FRAMEBUFFER)) &&
|
||||
if (!(nv50->dirty_3d &
|
||||
(NV50_NEW_3D_SCISSOR | NV50_NEW_3D_VIEWPORT | NV50_NEW_3D_FRAMEBUFFER)) &&
|
||||
nv50->state.scissor == nv50->rast->pipe.scissor)
|
||||
return;
|
||||
|
||||
@@ -197,7 +197,7 @@ nv50_validate_scissor(struct nv50_context *nv50)
|
||||
|
||||
nv50->state.scissor = nv50->rast->pipe.scissor;
|
||||
|
||||
if ((nv50->dirty & NV50_NEW_FRAMEBUFFER) && !nv50->state.scissor)
|
||||
if ((nv50->dirty_3d & NV50_NEW_3D_FRAMEBUFFER) && !nv50->state.scissor)
|
||||
nv50->scissors_dirty = (1 << NV50_MAX_VIEWPORTS) - 1;
|
||||
|
||||
for (i = 0; i < NV50_MAX_VIEWPORTS; i++) {
|
||||
@@ -290,10 +290,10 @@ nv50_check_program_ucps(struct nv50_context *nv50,
|
||||
|
||||
vp->vp.clpd_nr = n;
|
||||
if (likely(vp == nv50->vertprog)) {
|
||||
nv50->dirty |= NV50_NEW_VERTPROG;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_VERTPROG;
|
||||
nv50_vertprog_validate(nv50);
|
||||
} else {
|
||||
nv50->dirty |= NV50_NEW_GMTYPROG;
|
||||
nv50->dirty_3d |= NV50_NEW_3D_GMTYPROG;
|
||||
nv50_gmtyprog_validate(nv50);
|
||||
}
|
||||
nv50_fp_linkage_validate(nv50);
|
||||
@@ -342,7 +342,7 @@ nv50_validate_clip(struct nv50_context *nv50)
|
||||
struct nv50_program *vp;
|
||||
uint8_t clip_enable;
|
||||
|
||||
if (nv50->dirty & NV50_NEW_CLIP) {
|
||||
if (nv50->dirty_3d & NV50_NEW_3D_CLIP) {
|
||||
BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
|
||||
PUSH_DATA (push, (NV50_CB_AUX_UCP_OFFSET << 8) | NV50_CB_AUX);
|
||||
BEGIN_NI04(push, NV50_3D(CB_DATA(0)), PIPE_MAX_CLIP_PLANES * 4);
|
||||
@@ -436,7 +436,8 @@ nv50_switch_pipe_context(struct nv50_context *ctx_to)
|
||||
else
|
||||
ctx_to->state = ctx_to->screen->save_state;
|
||||
|
||||
ctx_to->dirty = ~0;
|
||||
ctx_to->dirty_3d = ~0;
|
||||
ctx_to->dirty_cp = ~0;
|
||||
ctx_to->viewports_dirty = ~0;
|
||||
ctx_to->scissors_dirty = ~0;
|
||||
|
||||
@@ -445,71 +446,71 @@ nv50_switch_pipe_context(struct nv50_context *ctx_to)
|
||||
ctx_to->constbuf_dirty[2] = (1 << NV50_MAX_PIPE_CONSTBUFS) - 1;
|
||||
|
||||
if (!ctx_to->vertex)
|
||||
ctx_to->dirty &= ~(NV50_NEW_VERTEX | NV50_NEW_ARRAYS);
|
||||
ctx_to->dirty_3d &= ~(NV50_NEW_3D_VERTEX | NV50_NEW_3D_ARRAYS);
|
||||
|
||||
if (!ctx_to->vertprog)
|
||||
ctx_to->dirty &= ~NV50_NEW_VERTPROG;
|
||||
ctx_to->dirty_3d &= ~NV50_NEW_3D_VERTPROG;
|
||||
if (!ctx_to->fragprog)
|
||||
ctx_to->dirty &= ~NV50_NEW_FRAGPROG;
|
||||
ctx_to->dirty_3d &= ~NV50_NEW_3D_FRAGPROG;
|
||||
|
||||
if (!ctx_to->blend)
|
||||
ctx_to->dirty &= ~NV50_NEW_BLEND;
|
||||
ctx_to->dirty_3d &= ~NV50_NEW_3D_BLEND;
|
||||
if (!ctx_to->rast)
|
||||
#ifdef NV50_SCISSORS_CLIPPING
|
||||
ctx_to->dirty &= ~(NV50_NEW_RASTERIZER | NV50_NEW_SCISSOR);
|
||||
ctx_to->dirty_3d &= ~(NV50_NEW_3D_RASTERIZER | NV50_NEW_3D_SCISSOR);
|
||||
#else
|
||||
ctx_to->dirty &= ~NV50_NEW_RASTERIZER;
|
||||
ctx_to->dirty_3d &= ~NV50_NEW_3D_RASTERIZER;
|
||||
#endif
|
||||
if (!ctx_to->zsa)
|
||||
ctx_to->dirty &= ~NV50_NEW_ZSA;
|
||||
ctx_to->dirty_3d &= ~NV50_NEW_3D_ZSA;
|
||||
|
||||
ctx_to->screen->cur_ctx = ctx_to;
|
||||
}
|
||||
|
||||
static struct state_validate {
|
||||
void (*func)(struct nv50_context *);
|
||||
uint32_t states;
|
||||
} validate_list[] = {
|
||||
{ nv50_validate_fb, NV50_NEW_FRAMEBUFFER },
|
||||
{ nv50_validate_blend, NV50_NEW_BLEND },
|
||||
{ nv50_validate_zsa, NV50_NEW_ZSA },
|
||||
{ nv50_validate_sample_mask, NV50_NEW_SAMPLE_MASK },
|
||||
{ nv50_validate_rasterizer, NV50_NEW_RASTERIZER },
|
||||
{ nv50_validate_blend_colour, NV50_NEW_BLEND_COLOUR },
|
||||
{ nv50_validate_stencil_ref, NV50_NEW_STENCIL_REF },
|
||||
{ nv50_validate_stipple, NV50_NEW_STIPPLE },
|
||||
static struct nv50_state_validate
|
||||
validate_list_3d[] = {
|
||||
{ nv50_validate_fb, NV50_NEW_3D_FRAMEBUFFER },
|
||||
{ nv50_validate_blend, NV50_NEW_3D_BLEND },
|
||||
{ nv50_validate_zsa, NV50_NEW_3D_ZSA },
|
||||
{ nv50_validate_sample_mask, NV50_NEW_3D_SAMPLE_MASK },
|
||||
{ nv50_validate_rasterizer, NV50_NEW_3D_RASTERIZER },
|
||||
{ nv50_validate_blend_colour, NV50_NEW_3D_BLEND_COLOUR },
|
||||
{ nv50_validate_stencil_ref, NV50_NEW_3D_STENCIL_REF },
|
||||
{ nv50_validate_stipple, NV50_NEW_3D_STIPPLE },
|
||||
#ifdef NV50_SCISSORS_CLIPPING
|
||||
{ nv50_validate_scissor, NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT |
|
||||
NV50_NEW_RASTERIZER |
|
||||
NV50_NEW_FRAMEBUFFER },
|
||||
{ nv50_validate_scissor, NV50_NEW_3D_SCISSOR | NV50_NEW_3D_VIEWPORT |
|
||||
NV50_NEW_3D_RASTERIZER |
|
||||
NV50_NEW_3D_FRAMEBUFFER },
|
||||
#else
|
||||
{ nv50_validate_scissor, NV50_NEW_SCISSOR },
|
||||
{ nv50_validate_scissor, NV50_NEW_3D_SCISSOR },
|
||||
#endif
|
||||
{ nv50_validate_viewport, NV50_NEW_VIEWPORT },
|
||||
{ nv50_vertprog_validate, NV50_NEW_VERTPROG },
|
||||
{ nv50_gmtyprog_validate, NV50_NEW_GMTYPROG },
|
||||
{ nv50_fragprog_validate, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
|
||||
NV50_NEW_MIN_SAMPLES },
|
||||
{ nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
|
||||
NV50_NEW_GMTYPROG | NV50_NEW_RASTERIZER },
|
||||
{ nv50_gp_linkage_validate, NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG },
|
||||
{ nv50_validate_derived_rs, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
|
||||
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
|
||||
{ nv50_validate_derived_2, NV50_NEW_ZSA | NV50_NEW_FRAMEBUFFER },
|
||||
{ nv50_validate_derived_3, NV50_NEW_BLEND | NV50_NEW_FRAMEBUFFER },
|
||||
{ nv50_validate_clip, NV50_NEW_CLIP | NV50_NEW_RASTERIZER |
|
||||
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
|
||||
{ nv50_constbufs_validate, NV50_NEW_CONSTBUF },
|
||||
{ nv50_validate_textures, NV50_NEW_TEXTURES },
|
||||
{ nv50_validate_samplers, NV50_NEW_SAMPLERS },
|
||||
{ nv50_stream_output_validate, NV50_NEW_STRMOUT |
|
||||
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
|
||||
{ nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
|
||||
{ nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES },
|
||||
{ nv50_validate_viewport, NV50_NEW_3D_VIEWPORT },
|
||||
{ nv50_vertprog_validate, NV50_NEW_3D_VERTPROG },
|
||||
{ nv50_gmtyprog_validate, NV50_NEW_3D_GMTYPROG },
|
||||
{ nv50_fragprog_validate, NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_RASTERIZER |
|
||||
NV50_NEW_3D_MIN_SAMPLES },
|
||||
{ nv50_fp_linkage_validate, NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_VERTPROG |
|
||||
NV50_NEW_3D_GMTYPROG | NV50_NEW_3D_RASTERIZER },
|
||||
{ nv50_gp_linkage_validate, NV50_NEW_3D_GMTYPROG | NV50_NEW_3D_VERTPROG },
|
||||
{ nv50_validate_derived_rs, NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_RASTERIZER |
|
||||
NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG },
|
||||
{ nv50_validate_derived_2, NV50_NEW_3D_ZSA | NV50_NEW_3D_FRAMEBUFFER },
|
||||
{ nv50_validate_derived_3, NV50_NEW_3D_BLEND | NV50_NEW_3D_FRAMEBUFFER },
|
||||
{ nv50_validate_clip, NV50_NEW_3D_CLIP | NV50_NEW_3D_RASTERIZER |
|
||||
NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG },
|
||||
{ nv50_constbufs_validate, NV50_NEW_3D_CONSTBUF },
|
||||
{ nv50_validate_textures, NV50_NEW_3D_TEXTURES },
|
||||
{ nv50_validate_samplers, NV50_NEW_3D_SAMPLERS },
|
||||
{ nv50_stream_output_validate, NV50_NEW_3D_STRMOUT |
|
||||
NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG },
|
||||
{ nv50_vertex_arrays_validate, NV50_NEW_3D_VERTEX | NV50_NEW_3D_ARRAYS },
|
||||
{ nv50_validate_min_samples, NV50_NEW_3D_MIN_SAMPLES },
|
||||
};
|
||||
|
||||
bool
|
||||
nv50_state_validate(struct nv50_context *nv50, uint32_t mask)
|
||||
nv50_state_validate(struct nv50_context *nv50, uint32_t mask,
|
||||
struct nv50_state_validate *validate_list, int size,
|
||||
uint32_t *dirty, struct nouveau_bufctx *bufctx)
|
||||
{
|
||||
uint32_t state_mask;
|
||||
int ret;
|
||||
@@ -518,16 +519,16 @@ nv50_state_validate(struct nv50_context *nv50, uint32_t mask)
|
||||
if (nv50->screen->cur_ctx != nv50)
|
||||
nv50_switch_pipe_context(nv50);
|
||||
|
||||
state_mask = nv50->dirty & mask;
|
||||
state_mask = *dirty & mask;
|
||||
|
||||
if (state_mask) {
|
||||
for (i = 0; i < ARRAY_SIZE(validate_list); ++i) {
|
||||
struct state_validate *validate = &validate_list[i];
|
||||
for (i = 0; i < size; i++) {
|
||||
struct nv50_state_validate *validate = &validate_list[i];
|
||||
|
||||
if (state_mask & validate->states)
|
||||
validate->func(nv50);
|
||||
}
|
||||
nv50->dirty &= ~state_mask;
|
||||
*dirty &= ~state_mask;
|
||||
|
||||
if (nv50->state.rt_serialize) {
|
||||
nv50->state.rt_serialize = false;
|
||||
@@ -535,14 +536,26 @@ nv50_state_validate(struct nv50_context *nv50, uint32_t mask)
|
||||
PUSH_DATA (nv50->base.pushbuf, 0);
|
||||
}
|
||||
|
||||
nv50_bufctx_fence(nv50->bufctx_3d, false);
|
||||
nv50_bufctx_fence(bufctx, false);
|
||||
}
|
||||
nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_3d);
|
||||
nouveau_pushbuf_bufctx(nv50->base.pushbuf, bufctx);
|
||||
ret = nouveau_pushbuf_validate(nv50->base.pushbuf);
|
||||
|
||||
return !ret;
|
||||
}
|
||||
|
||||
bool
|
||||
nv50_state_validate_3d(struct nv50_context *nv50, uint32_t mask)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
ret = nv50_state_validate(nv50, mask, validate_list_3d,
|
||||
ARRAY_SIZE(validate_list_3d), &nv50->dirty_3d,
|
||||
nv50->bufctx_3d);
|
||||
|
||||
if (unlikely(nv50->state.flushed)) {
|
||||
nv50->state.flushed = false;
|
||||
nv50_bufctx_fence(nv50->bufctx_3d, true);
|
||||
}
|
||||
return !ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user