From 06ab73768612084b0ea614c0da87f2f63587b788 Mon Sep 17 00:00:00 2001 From: Sagar Ghuge Date: Mon, 28 Jun 2021 17:41:20 -0700 Subject: [PATCH] nir: Add optimizations for iadd3 This patch also adds has_iadd3 bit to give more control if backend supports ternary add instruction or not. v2: - Add patterns in late optimization (Connor Abbott) Suggested-by: Alyssa/Jason Signed-off-by: Sagar Ghuge Reviewed-by: Alyssa Rosenzweig Reviewed-by: Jason Ekstrand Part-of: --- src/compiler/nir/nir.h | 3 +++ src/compiler/nir/nir_opt_algebraic.py | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 10af740ca9d..7da57d549d1 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3433,6 +3433,9 @@ typedef struct nir_shader_compiler_options { /* Lowers when rotate instruction is not supported */ bool lower_rotate; + /** Backend supports ternary addition */ + bool has_iadd3; + /** * Backend supports imul24, and would like to use it (when possible) * for address/offset calculation. If true, driver should call diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index eef8027c7f6..6bb9884acbd 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -2269,6 +2269,10 @@ late_optimizations = [ (('~fadd@32', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma32'), (('~fadd@64', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma64'), + (('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, b, c), 'options->has_iadd3'), + (('iadd', ('isub(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, ('ineg', b), c), 'options->has_iadd3'), + (('isub', ('isub(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, ('ineg', b), ('ineg', c)), 'options->has_iadd3'), + # These are duplicated from the main optimizations table. The late # patterns that rearrange expressions like x - .5 < 0 to x < .5 can create # new patterns like these. The patterns that compare with zero are removed