a6123a80da
If the backend supports it, intrinsics with a component() are straightforward to shrink from the start. Notably helps vectorized I/O. v2: add an option for this and enable only on grown up backends, because some backends ignore the component() parameter. RADV GFX11: Totals from 921 (1.16% of 79439) affected shaders: Instrs: 616558 -> 615529 (-0.17%); split: -0.30%, +0.14% CodeSize: 3099864 -> 3095632 (-0.14%); split: -0.25%, +0.11% Latency: 2177075 -> 2160966 (-0.74%); split: -0.79%, +0.05% InvThroughput: 299997 -> 298664 (-0.44%); split: -0.47%, +0.02% VClause: 16343 -> 16395 (+0.32%); split: -0.01%, +0.32% SClause: 10715 -> 10714 (-0.01%) Copies: 24736 -> 24701 (-0.14%); split: -0.37%, +0.23% PreVGPRs: 30179 -> 30173 (-0.02%) VALU: 353472 -> 353439 (-0.01%); split: -0.03%, +0.02% SALU: 40323 -> 40322 (-0.00%) VMEM: 25353 -> 25352 (-0.00%) AGX: total instructions in shared programs: 2038217 -> 2038049 (<.01%) instructions in affected programs: 10249 -> 10081 (-1.64%) total alu in shared programs: 1593094 -> 1592939 (<.01%) alu in affected programs: 7145 -> 6990 (-2.17%) total fscib in shared programs: 1589254 -> 1589102 (<.01%) fscib in affected programs: 7217 -> 7065 (-2.11%) total bytes in shared programs: 13975666 -> 13974722 (<.01%) bytes in affected programs: 65942 -> 64998 (-1.43%) total regs in shared programs: 592758 -> 591187 (-0.27%) regs in affected programs: 6936 -> 5365 (-22.65%) Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> (v1) Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28004>
580 lines
20 KiB
C++
580 lines
20 KiB
C++
/*
|
|
* Copyright © 2022 Pavel Ondračka
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "nir_test.h"
|
|
|
|
class nir_opt_shrink_vectors_test : public nir_test {
|
|
protected:
|
|
nir_opt_shrink_vectors_test();
|
|
|
|
nir_def *in_def;
|
|
nir_variable *out_var;
|
|
};
|
|
|
|
nir_opt_shrink_vectors_test::nir_opt_shrink_vectors_test()
|
|
: nir_test::nir_test("nir_opt_shrink_vectors_test")
|
|
{
|
|
nir_variable *var = nir_variable_create(b->shader, nir_var_shader_in, glsl_vec_type(2), "in");
|
|
in_def = nir_load_var(b, var);
|
|
|
|
out_var = nir_variable_create(b->shader, nir_var_shader_out, glsl_vec_type(1), "out");
|
|
}
|
|
|
|
static unsigned translate_swizzle(char swz)
|
|
{
|
|
const char *swizzles_dict = "xyzw";
|
|
const char *extended_swizzles_dict = "abcdefghijklmnop";
|
|
|
|
const char *ptr = strchr(swizzles_dict, swz);
|
|
if (ptr)
|
|
return ptr - swizzles_dict;
|
|
else
|
|
return strchr(extended_swizzles_dict, swz) - extended_swizzles_dict;
|
|
}
|
|
|
|
static void set_swizzle(nir_alu_src * src, const char * swizzle)
|
|
{
|
|
unsigned i = 0;
|
|
while (swizzle[i]) {
|
|
src->swizzle[i] = translate_swizzle(swizzle[i]);
|
|
i++;
|
|
}
|
|
}
|
|
|
|
static void check_swizzle(nir_alu_src * src, const char * swizzle)
|
|
{
|
|
unsigned i = 0;
|
|
while (swizzle[i]) {
|
|
ASSERT_TRUE(src->swizzle[i] == translate_swizzle(swizzle[i]));
|
|
i++;
|
|
}
|
|
}
|
|
|
|
TEST_F(nir_opt_shrink_vectors_test, opt_shrink_vectors_load_const_trailing_component_only)
|
|
{
|
|
/* Test that opt_shrink_vectors correctly removes unused trailing channels
|
|
* of load_const.
|
|
*
|
|
* vec4 32 ssa_1 = load_const (1.0, 2.0, 3.0, 4.0)
|
|
* vec1 32 ssa_2 = fmov ssa_1.x
|
|
*
|
|
* to
|
|
*
|
|
* vec1 32 ssa_1 = load_const (1.0)
|
|
* vec1 32 ssa_2 = fmov ssa_1.x
|
|
*/
|
|
|
|
nir_def *imm_vec = nir_imm_vec4(b, 1.0, 2.0, 3.0, 4.0);
|
|
|
|
nir_def *alu_result = nir_build_alu1(b, nir_op_mov, imm_vec);
|
|
nir_alu_instr *alu_instr = nir_instr_as_alu(alu_result->parent_instr);
|
|
set_swizzle(&alu_instr->src[0], "x");
|
|
alu_result->num_components = 1;
|
|
|
|
nir_store_var(b, out_var, alu_result, 1);
|
|
|
|
ASSERT_TRUE(nir_opt_shrink_vectors(b->shader, true));
|
|
|
|
nir_validate_shader(b->shader, NULL);
|
|
|
|
ASSERT_TRUE(imm_vec->num_components == 1);
|
|
nir_load_const_instr * imm_vec_instr = nir_instr_as_load_const(imm_vec->parent_instr);
|
|
ASSERT_TRUE(nir_const_value_as_float(imm_vec_instr->value[0], 32) == 1.0);
|
|
|
|
ASSERT_FALSE(nir_opt_shrink_vectors(b->shader, true));
|
|
}
|
|
|
|
TEST_F(nir_opt_shrink_vectors_test, opt_shrink_vectors_alu_trailing_component_only)
|
|
{
|
|
/* Test that opt_shrink_vectors correctly removes unused trailing channels
|
|
* of alus.
|
|
*
|
|
* vec4 32 ssa_1 = fmov ssa_0.xyzx
|
|
* vec1 32 ssa_2 = fmov ssa_1.x
|
|
*
|
|
* to
|
|
*
|
|
* vec1 32 ssa_1 = fmov ssa_0.x
|
|
* vec1 32 ssa_2 = fmov ssa_1.x
|
|
*/
|
|
|
|
nir_def *alu_result = nir_build_alu1(b, nir_op_mov, in_def);
|
|
nir_alu_instr *alu_instr = nir_instr_as_alu(alu_result->parent_instr);
|
|
alu_result->num_components = 4;
|
|
set_swizzle(&alu_instr->src[0], "xyxx");
|
|
|
|
nir_def *alu2_result = nir_build_alu1(b, nir_op_mov, alu_result);
|
|
nir_alu_instr *alu2_instr = nir_instr_as_alu(alu2_result->parent_instr);
|
|
set_swizzle(&alu2_instr->src[0], "x");
|
|
alu2_result->num_components = 1;
|
|
|
|
nir_store_var(b, out_var, alu2_result, 1);
|
|
|
|
ASSERT_TRUE(nir_opt_shrink_vectors(b->shader, true));
|
|
|
|
nir_validate_shader(b->shader, NULL);
|
|
|
|
check_swizzle(&alu_instr->src[0], "x");
|
|
ASSERT_TRUE(alu_result->num_components == 1);
|
|
|
|
ASSERT_FALSE(nir_opt_shrink_vectors(b->shader, true));
|
|
}
|
|
|
|
TEST_F(nir_opt_shrink_vectors_test, opt_shrink_vectors_simple)
|
|
{
|
|
/* Tests that opt_shrink_vectors correctly shrinks a simple case.
|
|
*
|
|
* vec4 32 ssa_2 = load_const (3.0, 1.0, 2.0, 1.0)
|
|
* vec4 32 ssa_3 = fadd ssa_1.xxxy, ssa_2.ywyz
|
|
* vec1 32 ssa_4 = fdot3 ssa_3.xzw ssa_3.xzw
|
|
*
|
|
* to
|
|
*
|
|
* vec2 32 ssa_2 = load_const (1.0, 2.0)
|
|
* vec2 32 ssa_3 = fadd ssa_1, ssa_2
|
|
* vec1 32 ssa_4 = fdot3 ssa_3.xxy ssa_3.xxy
|
|
*/
|
|
|
|
nir_def *imm_vec = nir_imm_vec4(b, 3.0, 1.0, 2.0, 1.0);
|
|
|
|
nir_def *alu_result = nir_build_alu2(b, nir_op_fadd, in_def, imm_vec);
|
|
nir_alu_instr *alu_instr = nir_instr_as_alu(alu_result->parent_instr);
|
|
alu_result->num_components = 4;
|
|
set_swizzle(&alu_instr->src[0], "xxxy");
|
|
set_swizzle(&alu_instr->src[1], "ywyz");
|
|
|
|
nir_def *alu2_result = nir_build_alu2(b, nir_op_fdot3, alu_result, alu_result);
|
|
nir_alu_instr *alu2_instr = nir_instr_as_alu(alu2_result->parent_instr);
|
|
set_swizzle(&alu2_instr->src[0], "xzw");
|
|
set_swizzle(&alu2_instr->src[1], "xzw");
|
|
|
|
nir_store_var(b, out_var, alu2_result, 1);
|
|
|
|
ASSERT_TRUE(nir_opt_shrink_vectors(b->shader, true));
|
|
|
|
nir_validate_shader(b->shader, NULL);
|
|
|
|
ASSERT_TRUE(imm_vec->num_components == 2);
|
|
nir_load_const_instr * imm_vec_instr = nir_instr_as_load_const(imm_vec->parent_instr);
|
|
ASSERT_TRUE(nir_const_value_as_float(imm_vec_instr->value[0], 32) == 1.0);
|
|
ASSERT_TRUE(nir_const_value_as_float(imm_vec_instr->value[1], 32) == 2.0);
|
|
|
|
ASSERT_TRUE(alu_result->num_components == 2);
|
|
check_swizzle(&alu_instr->src[0], "xy");
|
|
check_swizzle(&alu_instr->src[1], "xy");
|
|
|
|
check_swizzle(&alu2_instr->src[0], "xxy");
|
|
check_swizzle(&alu2_instr->src[1], "xxy");
|
|
|
|
ASSERT_FALSE(nir_opt_shrink_vectors(b->shader, true));
|
|
|
|
nir_validate_shader(b->shader, NULL);
|
|
}
|
|
|
|
TEST_F(nir_opt_shrink_vectors_test, opt_shrink_vectors_vec8)
|
|
{
|
|
/* Tests that opt_shrink_vectors correctly shrinks a case
|
|
* dealing with vec8 shrinking. The shrinking would result in
|
|
* vec6 for load const and vec7 for fadd and is therefore not allowed,
|
|
* but check that we still properly reuse the channels and move
|
|
* the unused channels to the end.
|
|
*
|
|
* vec8 32 ssa_2 = load_const (1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 6.0)
|
|
* vec8 32 ssa_3 = fadd ssa_1.xxxxxxxy, ssa_2.afhdefgh
|
|
* vec1 32 ssa_4 = fdot8 ssa_3.accdefgh ssa_3.accdefgh
|
|
*
|
|
* to
|
|
*
|
|
* vec8 32 ssa_2 = load_const (1.0, 3.0, 4.0, 5.0, 2.0, 6.0, .., ..))
|
|
* vec8 32 ssa_3 = fadd ssa_1.xxxxxxy_ ssa_2.afbcdef_
|
|
* vec1 32 ssa_4 = fdot8 ssa_3.abbcdefg ssa_3.abbcdefg
|
|
*/
|
|
|
|
nir_const_value v[8] = {
|
|
nir_const_value_for_float(1.0, 32),
|
|
nir_const_value_for_float(1.0, 32),
|
|
nir_const_value_for_float(2.0, 32),
|
|
nir_const_value_for_float(3.0, 32),
|
|
nir_const_value_for_float(4.0, 32),
|
|
nir_const_value_for_float(5.0, 32),
|
|
nir_const_value_for_float(2.0, 32),
|
|
nir_const_value_for_float(6.0, 32),
|
|
};
|
|
nir_def *imm_vec = nir_build_imm(b, 8, 32, v);
|
|
|
|
nir_def *alu_result = nir_build_alu2(b, nir_op_fadd, in_def, imm_vec);
|
|
nir_alu_instr *alu_instr = nir_instr_as_alu(alu_result->parent_instr);
|
|
alu_result->num_components = 8;
|
|
set_swizzle(&alu_instr->src[0], "xxxxxxxy");
|
|
set_swizzle(&alu_instr->src[1], "afhdefgh");
|
|
|
|
nir_def *alu2_result = nir_build_alu2(b, nir_op_fdot8, alu_result, alu_result);
|
|
nir_alu_instr *alu2_instr = nir_instr_as_alu(alu2_result->parent_instr);
|
|
set_swizzle(&alu2_instr->src[0], "accdefgh");
|
|
set_swizzle(&alu2_instr->src[1], "accdefgh");
|
|
|
|
nir_store_var(b, out_var, alu2_result, 1);
|
|
|
|
ASSERT_TRUE(nir_opt_shrink_vectors(b->shader, true));
|
|
|
|
nir_validate_shader(b->shader, NULL);
|
|
|
|
ASSERT_TRUE(imm_vec->num_components == 8);
|
|
nir_load_const_instr * imm_vec_instr = nir_instr_as_load_const(imm_vec->parent_instr);
|
|
ASSERT_TRUE(nir_const_value_as_float(imm_vec_instr->value[0], 32) == 1.0);
|
|
ASSERT_TRUE(nir_const_value_as_float(imm_vec_instr->value[1], 32) == 3.0);
|
|
ASSERT_TRUE(nir_const_value_as_float(imm_vec_instr->value[2], 32) == 4.0);
|
|
ASSERT_TRUE(nir_const_value_as_float(imm_vec_instr->value[3], 32) == 5.0);
|
|
ASSERT_TRUE(nir_const_value_as_float(imm_vec_instr->value[4], 32) == 2.0);
|
|
ASSERT_TRUE(nir_const_value_as_float(imm_vec_instr->value[5], 32) == 6.0);
|
|
|
|
ASSERT_TRUE(alu_result->num_components == 8);
|
|
check_swizzle(&alu_instr->src[0], "xxxxxxy");
|
|
check_swizzle(&alu_instr->src[1], "afbcdef");
|
|
|
|
check_swizzle(&alu2_instr->src[0], "abbcdefg");
|
|
check_swizzle(&alu2_instr->src[1], "abbcdefg");
|
|
|
|
ASSERT_FALSE(nir_opt_shrink_vectors(b->shader, true));
|
|
|
|
nir_validate_shader(b->shader, NULL);
|
|
}
|
|
|
|
TEST_F(nir_opt_shrink_vectors_test, opt_shrink_phis_loop_simple)
|
|
{
|
|
/* Test that the phi is shrinked in the following case.
|
|
*
|
|
* v = vec4(0.0, 0.0, 0.0, 0.0);
|
|
* while (v.y < 3) {
|
|
* v.y += 1.0;
|
|
* }
|
|
*
|
|
* This mimics nir for loops that come out of nine+ttn.
|
|
*/
|
|
nir_def *v = nir_imm_vec4(b, 0.0, 0.0, 0.0, 0.0);
|
|
nir_def *increment = nir_imm_float(b, 1.0);
|
|
nir_def *loop_max = nir_imm_float(b, 3.0);
|
|
|
|
nir_phi_instr *const phi = nir_phi_instr_create(b->shader);
|
|
nir_def *phi_def = &phi->def;
|
|
|
|
nir_loop *loop = nir_push_loop(b);
|
|
|
|
nir_def_init(&phi->instr, &phi->def, v->num_components, v->bit_size);
|
|
|
|
nir_phi_instr_add_src(phi, v->parent_instr->block, v);
|
|
|
|
nir_def *fge = nir_fge(b, phi_def, loop_max);
|
|
nir_alu_instr *fge_alu_instr = nir_instr_as_alu(fge->parent_instr);
|
|
fge->num_components = 1;
|
|
fge_alu_instr->src[0].swizzle[0] = 1;
|
|
|
|
nir_if *nif = nir_push_if(b, fge);
|
|
{
|
|
nir_jump_instr *jump = nir_jump_instr_create(b->shader, nir_jump_break);
|
|
nir_builder_instr_insert(b, &jump->instr);
|
|
}
|
|
nir_pop_if(b, nif);
|
|
|
|
nir_def *fadd = nir_fadd(b, phi_def, increment);
|
|
nir_alu_instr *fadd_alu_instr = nir_instr_as_alu(fadd->parent_instr);
|
|
fadd->num_components = 1;
|
|
fadd_alu_instr->src[0].swizzle[0] = 1;
|
|
|
|
nir_scalar srcs[4] = {{0}};
|
|
for (unsigned i = 0; i < 4; i++) {
|
|
srcs[i] = nir_get_scalar(phi_def, i);
|
|
}
|
|
srcs[1] = nir_get_scalar(fadd, 0);
|
|
nir_def *vec = nir_vec_scalars(b, srcs, 4);
|
|
|
|
nir_phi_instr_add_src(phi, vec->parent_instr->block, vec);
|
|
|
|
nir_pop_loop(b, loop);
|
|
|
|
b->cursor = nir_before_block(nir_loop_first_block(loop));
|
|
nir_builder_instr_insert(b, &phi->instr);
|
|
|
|
/* Generated nir:
|
|
*
|
|
* impl main {
|
|
* block block_0:
|
|
* * preds: *
|
|
* vec1 32 ssa_0 = deref_var &in (shader_in vec2)
|
|
* vec2 32 ssa_1 = intrinsic load_deref (ssa_0) (access=0)
|
|
* vec4 32 ssa_2 = load_const (0x00000000, 0x00000000, 0x00000000, 0x00000000) = (0.000000, 0.000000, 0.000000, 0.000000)
|
|
* vec1 32 ssa_3 = load_const (0x3f800000 = 1.000000)
|
|
* vec1 32 ssa_4 = load_const (0x40400000 = 3.000000)
|
|
* * succs: block_1 *
|
|
* loop {
|
|
* block block_1:
|
|
* * preds: block_0 block_4 *
|
|
* vec4 32 ssa_8 = phi block_0: ssa_2, block_4: ssa_7
|
|
* vec1 1 ssa_5 = fge ssa_8.y, ssa_4
|
|
* * succs: block_2 block_3 *
|
|
* if ssa_5 {
|
|
* block block_2:
|
|
* * preds: block_1 *
|
|
* break
|
|
* * succs: block_5 *
|
|
* } else {
|
|
* block block_3:
|
|
* * preds: block_1 *
|
|
* * succs: block_4 *
|
|
* }
|
|
* block block_4:
|
|
* * preds: block_3 *
|
|
* vec1 32 ssa_6 = fadd ssa_8.y, ssa_3
|
|
* vec4 32 ssa_7 = vec4 ssa_8.x, ssa_6, ssa_8.z, ssa_8.w
|
|
* * succs: block_1 *
|
|
* }
|
|
* block block_5:
|
|
* * preds: block_2 *
|
|
* * succs: block_6 *
|
|
* block block_6:
|
|
* }
|
|
*/
|
|
|
|
nir_validate_shader(b->shader, NULL);
|
|
|
|
ASSERT_TRUE(nir_opt_shrink_vectors(b->shader, true));
|
|
ASSERT_TRUE(phi_def->num_components == 1);
|
|
check_swizzle(&fge_alu_instr->src[0], "x");
|
|
check_swizzle(&fadd_alu_instr->src[0], "x");
|
|
|
|
nir_validate_shader(b->shader, NULL);
|
|
}
|
|
|
|
TEST_F(nir_opt_shrink_vectors_test, opt_shrink_phis_loop_swizzle)
|
|
{
|
|
/* Test that the phi is shrinked properly in the following case where
|
|
* some swizzling happens in the channels.
|
|
*
|
|
* v = vec4(0.0, 0.0, 0.0, 0.0);
|
|
* while (v.z < 3) {
|
|
* v = vec4(v.x, v.z + 1, v.y, v.w};
|
|
* }
|
|
*/
|
|
nir_def *v = nir_imm_vec4(b, 0.0, 0.0, 0.0, 0.0);
|
|
nir_def *increment = nir_imm_float(b, 1.0);
|
|
nir_def *loop_max = nir_imm_float(b, 3.0);
|
|
|
|
nir_phi_instr *const phi = nir_phi_instr_create(b->shader);
|
|
nir_def *phi_def = &phi->def;
|
|
|
|
nir_loop *loop = nir_push_loop(b);
|
|
|
|
nir_def_init(&phi->instr, &phi->def, v->num_components, v->bit_size);
|
|
|
|
nir_phi_instr_add_src(phi, v->parent_instr->block, v);
|
|
|
|
nir_def *fge = nir_fge(b, phi_def, loop_max);
|
|
nir_alu_instr *fge_alu_instr = nir_instr_as_alu(fge->parent_instr);
|
|
fge->num_components = 1;
|
|
fge_alu_instr->src[0].swizzle[0] = 2;
|
|
|
|
nir_if *nif = nir_push_if(b, fge);
|
|
|
|
nir_jump_instr *jump = nir_jump_instr_create(b->shader, nir_jump_break);
|
|
nir_builder_instr_insert(b, &jump->instr);
|
|
|
|
nir_pop_if(b, nif);
|
|
|
|
nir_def *fadd = nir_fadd(b, phi_def, increment);
|
|
nir_alu_instr *fadd_alu_instr = nir_instr_as_alu(fadd->parent_instr);
|
|
fadd->num_components = 1;
|
|
fadd_alu_instr->src[0].swizzle[0] = 2;
|
|
|
|
nir_scalar srcs[4] = {{0}};
|
|
srcs[0] = nir_get_scalar(phi_def, 0);
|
|
srcs[1] = nir_get_scalar(fadd, 0);
|
|
srcs[2] = nir_get_scalar(phi_def, 1);
|
|
srcs[3] = nir_get_scalar(phi_def, 3);
|
|
nir_def *vec = nir_vec_scalars(b, srcs, 4);
|
|
|
|
nir_phi_instr_add_src(phi, vec->parent_instr->block, vec);
|
|
|
|
nir_pop_loop(b, loop);
|
|
|
|
b->cursor = nir_before_block(nir_loop_first_block(loop));
|
|
nir_builder_instr_insert(b, &phi->instr);
|
|
|
|
/* Generated nir:
|
|
*
|
|
* impl main {
|
|
* block block_0:
|
|
* * preds: *
|
|
* vec1 32 ssa_0 = deref_var &in (shader_in vec2)
|
|
* vec2 32 ssa_1 = intrinsic load_deref (ssa_0) (access=0)
|
|
* vec4 32 ssa_2 = load_const (0x00000000, 0x00000000, 0x00000000, 0x00000000) = (0.000000, 0.000000, 0.000000, 0.000000)
|
|
* vec1 32 ssa_3 = load_const (0x3f800000 = 1.000000)
|
|
* vec1 32 ssa_4 = load_const (0x40400000 = 3.000000)
|
|
* * succs: block_1 *
|
|
* loop {
|
|
* block block_1:
|
|
* * preds: block_0 block_4 *
|
|
* vec4 32 ssa_8 = phi block_0: ssa_2, block_4: ssa_7
|
|
* vec1 1 ssa_5 = fge ssa_8.z, ssa_4
|
|
* * succs: block_2 block_3 *
|
|
* if ssa_5 {
|
|
* block block_2:
|
|
* * preds: block_1 *
|
|
* break
|
|
* * succs: block_5 *
|
|
* } else {
|
|
* block block_3:
|
|
* * preds: block_1 *
|
|
* * succs: block_4 *
|
|
* }
|
|
* block block_4:
|
|
* * preds: block_3 *
|
|
* vec1 32 ssa_6 = fadd ssa_8.z, ssa_3
|
|
* vec4 32 ssa_7 = vec4 ssa_8.x, ssa_6, ssa_8.y, ssa_8.w
|
|
* * succs: block_1 *
|
|
* }
|
|
* block block_5:
|
|
* * preds: block_2 *
|
|
* * succs: block_6 *
|
|
* block block_6:
|
|
* }
|
|
*/
|
|
|
|
nir_validate_shader(b->shader, NULL);
|
|
|
|
ASSERT_TRUE(nir_opt_shrink_vectors(b->shader, true));
|
|
ASSERT_TRUE(phi_def->num_components == 2);
|
|
|
|
check_swizzle(&fge_alu_instr->src[0], "y");
|
|
check_swizzle(&fadd_alu_instr->src[0], "y");
|
|
|
|
nir_validate_shader(b->shader, NULL);
|
|
}
|
|
|
|
TEST_F(nir_opt_shrink_vectors_test, opt_shrink_phis_loop_phi_out)
|
|
{
|
|
/* Test that the phi is not shrinked when used by intrinsic.
|
|
*
|
|
* v = vec4(0.0, 0.0, 0.0, 0.0);
|
|
* while (v.y < 3) {
|
|
* v.y += 1.0;
|
|
* }
|
|
* out = v;
|
|
*/
|
|
nir_def *v = nir_imm_vec4(b, 0.0, 0.0, 0.0, 0.0);
|
|
nir_def *increment = nir_imm_float(b, 1.0);
|
|
nir_def *loop_max = nir_imm_float(b, 3.0);
|
|
|
|
nir_phi_instr *const phi = nir_phi_instr_create(b->shader);
|
|
nir_def *phi_def = &phi->def;
|
|
|
|
nir_loop *loop = nir_push_loop(b);
|
|
|
|
nir_def_init(&phi->instr, &phi->def, v->num_components, v->bit_size);
|
|
|
|
nir_phi_instr_add_src(phi, v->parent_instr->block, v);
|
|
|
|
nir_def *fge = nir_fge(b, phi_def, loop_max);
|
|
nir_alu_instr *fge_alu_instr = nir_instr_as_alu(fge->parent_instr);
|
|
fge->num_components = 1;
|
|
fge_alu_instr->src[0].swizzle[0] = 1;
|
|
|
|
nir_if *nif = nir_push_if(b, fge);
|
|
{
|
|
nir_jump_instr *jump = nir_jump_instr_create(b->shader, nir_jump_break);
|
|
nir_builder_instr_insert(b, &jump->instr);
|
|
}
|
|
nir_pop_if(b, nif);
|
|
|
|
nir_def *fadd = nir_fadd(b, phi_def, increment);
|
|
nir_alu_instr *fadd_alu_instr = nir_instr_as_alu(fadd->parent_instr);
|
|
fadd->num_components = 1;
|
|
fadd_alu_instr->src[0].swizzle[0] = 1;
|
|
|
|
nir_scalar srcs[4] = {{0}};
|
|
for (unsigned i = 0; i < 4; i++) {
|
|
srcs[i] = nir_get_scalar(phi_def, i);
|
|
}
|
|
srcs[1] = nir_get_scalar(fadd, 0);
|
|
nir_def *vec = nir_vec_scalars(b, srcs, 4);
|
|
|
|
nir_phi_instr_add_src(phi, vec->parent_instr->block, vec);
|
|
|
|
nir_pop_loop(b, loop);
|
|
|
|
out_var = nir_variable_create(b->shader,
|
|
nir_var_shader_out,
|
|
glsl_vec_type(4), "out4");
|
|
|
|
nir_store_var(b, out_var, phi_def, BITFIELD_MASK(4));
|
|
|
|
b->cursor = nir_before_block(nir_loop_first_block(loop));
|
|
nir_builder_instr_insert(b, &phi->instr);
|
|
|
|
/* Generated nir:
|
|
*
|
|
* impl main {
|
|
* block block_0:
|
|
* * preds: *
|
|
* vec1 32 ssa_0 = deref_var &in (shader_in vec2)
|
|
* vec2 32 ssa_1 = intrinsic load_deref (ssa_0) (access=0)
|
|
* vec4 32 ssa_2 = load_const (0x00000000, 0x00000000, 0x00000000, 0x00000000) = (0.000000, 0.000000, 0.000000, 0.000000)
|
|
* vec1 32 ssa_3 = load_const (0x3f800000 = 1.000000)
|
|
* vec1 32 ssa_4 = load_const (0x40400000 = 3.000000)
|
|
* * succs: block_1 *
|
|
* loop {
|
|
* block block_1:
|
|
* * preds: block_0 block_4 *
|
|
* vec4 32 ssa_9 = phi block_0: ssa_2, block_4: ssa_7
|
|
* vec1 1 ssa_5 = fge ssa_9.y, ssa_4
|
|
* * succs: block_2 block_3 *
|
|
* if ssa_5 {
|
|
* block block_2:
|
|
* * preds: block_1 *
|
|
* break
|
|
* * succs: block_5 *
|
|
* } else {
|
|
* block block_3:
|
|
* * preds: block_1 *
|
|
* * succs: block_4 *
|
|
* }
|
|
* block block_4:
|
|
* * preds: block_3 *
|
|
* vec1 32 ssa_6 = fadd ssa_9.y, ssa_3
|
|
* vec4 32 ssa_7 = vec4 ssa_9.x, ssa_6, ssa_9.z, ssa_9.w
|
|
* * succs: block_1 *
|
|
* }
|
|
* block block_5:
|
|
* * preds: block_2 *
|
|
* vec1 32 ssa_8 = deref_var &out4 (shader_out vec4)
|
|
* intrinsic store_deref (ssa_8, ssa_9) (wrmask=xyzw *15*, access=0)
|
|
* * succs: block_6 *
|
|
* block block_6:
|
|
* }
|
|
*/
|
|
|
|
nir_validate_shader(b->shader, NULL);
|
|
|
|
ASSERT_FALSE(nir_opt_shrink_vectors(b->shader, true));
|
|
ASSERT_TRUE(phi_def->num_components == 4);
|
|
}
|