From da77188d7db58bda61769646596eda2f4c3412fa Mon Sep 17 00:00:00 2001
From: Tomeu Vizoso <tomeu.vizoso@ideasonboard.com>
Date: Tue, 12 Nov 2024 13:24:17 +0100
Subject: [PATCH] etnaviv/ml: Implement FullyConnected

Lower FullyConnected to a regular convolution so it executes in the NN
cores.

Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32510>
---
 src/gallium/drivers/etnaviv/etnaviv_ml.c    | 10 ++++
 src/gallium/drivers/etnaviv/etnaviv_ml.h    |  1 +
 src/gallium/drivers/etnaviv/etnaviv_ml_nn.c | 60 +++++++++++++++++++++
 src/gallium/drivers/etnaviv/etnaviv_ml_nn.h |  5 ++
 4 files changed, 76 insertions(+)

diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml.c b/src/gallium/drivers/etnaviv/etnaviv_ml.c
index 9f839038284..46ef5eb1d8c 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_ml.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_ml.c
@@ -413,6 +413,12 @@ lower_operations(struct etna_ml_subgraph *subgraph,
 
             break;
          }
+         case PIPE_ML_OPERATION_TYPE_FULLY_CONNECTED: {
+            struct etna_operation *operation = calloc(1, sizeof(*operation));
+            etna_ml_lower_fully_connected(subgraph, poperation, operation);
+            list_addtail(&operation->link, etna_operations);
+            break;
+         }
          default:
             unreachable("Unsupported ML operation type");
       }
@@ -493,6 +499,10 @@ count_tensors(const struct pipe_ml_operation *poperations,
          tensor_count = MAX2(tensor_count, poperation->conv.weight_tensor->index);
          tensor_count = MAX2(tensor_count, poperation->conv.bias_tensor->index);
          break;
+      case PIPE_ML_OPERATION_TYPE_FULLY_CONNECTED:
+         tensor_count = MAX2(tensor_count, poperation->fcon.weight_tensor->index);
+         tensor_count = MAX2(tensor_count, poperation->fcon.bias_tensor->index);
+         break;
       case PIPE_ML_OPERATION_TYPE_PAD:
       case PIPE_ML_OPERATION_TYPE_ADD:
       case PIPE_ML_OPERATION_TYPE_CONCATENATION:
diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml.h b/src/gallium/drivers/etnaviv/etnaviv_ml.h
index 382ed1615d6..69422990b47 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_ml.h
+++ b/src/gallium/drivers/etnaviv/etnaviv_ml.h
@@ -71,6 +71,7 @@ struct etna_operation {
    bool addition;
    bool depthwise;
    bool pointwise;
+   bool fully_connected;
    bool pooling_first_pixel;
    bool padding_same;
    bool relu;
diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c
index 99250b5a6d8..d5ea56b65fd 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c
@@ -656,6 +656,52 @@ etna_ml_lower_add(struct etna_ml_subgraph *subgraph,
    }
 }
 
+void
+etna_ml_lower_fully_connected(struct etna_ml_subgraph *subgraph,
+                              const struct pipe_ml_operation *poperation,
+                              struct etna_operation *operation)
+{
+   assert(poperation->type == PIPE_ML_OPERATION_TYPE_FULLY_CONNECTED);
+
+   operation->type = ETNA_JOB_TYPE_NN;
+   operation->addition = false;
+   operation->depthwise = false;
+   operation->pointwise = false;
+   operation->fully_connected = true;
+   operation->pooling_first_pixel = false;
+   operation->padding_same = false;
+   operation->stride = 1;
+
+   operation->input_tensors[0] = poperation->input_tensors[0]->index;
+   operation->input_count = 1;
+   operation->input_width = poperation->input_tensors[0]->dims[1];
+   operation->input_height = 1;
+   operation->input_channels = 1;
+   operation->input_zero_point = poperation->input_tensors[0]->zero_point;
+   operation->input_scale = poperation->input_tensors[0]->scale;
+   operation->input_tensor_sizes[0] = operation->input_width *
+                                      operation->input_height *
+                                      operation->input_channels;
+
+   operation->output_tensors[0] = poperation->output_tensors[0]->index;
+   operation->output_width = 1;
+   operation->output_height = 1;
+   operation->output_channels = poperation->output_tensors[0]->dims[1];
+   operation->output_zero_point = poperation->output_tensors[0]->zero_point;
+   operation->output_scale = poperation->output_tensors[0]->scale;
+   operation->output_tensor_sizes[0] = operation->output_width *
+                                      operation->output_height *
+                                      operation->output_channels;
+
+   pipe_resource_reference(&operation->weight_tensor, poperation->conv.weight_tensor->resource);
+   operation->weight_width = poperation->conv.weight_tensor->dims[1];
+   operation->weight_height = 1;
+   operation->weight_zero_point = poperation->conv.weight_tensor->zero_point;
+   operation->weight_scale = poperation->conv.weight_tensor->scale;
+
+   pipe_resource_reference(&operation->bias_tensor, poperation->conv.bias_tensor->resource);
+}
+
 void
 etna_ml_calc_addition_sizes(unsigned *input_width, unsigned *input_height, unsigned *input_channels,
                             unsigned *output_width, unsigned *output_height, unsigned *output_channels)
@@ -729,6 +775,20 @@ create_nn_config(struct etna_ml_subgraph *subgraph, const struct etna_operation
       SWAP(output_width, output_height);
    }
 
+   if (operation->fully_connected) {
+      unsigned original_input_width = input_width;
+      input_width = 15;
+      while (original_input_width % input_width)
+         input_width--;
+      unsigned original_input_height = original_input_width / input_width;
+      input_height = 15;
+      while (original_input_height % input_height)
+         input_height--;
+      input_channels = original_input_height / input_height;
+      weight_width = input_width;
+      weight_height = input_height;
+   }
+
    etna_bo_cpu_prep(bo, DRM_ETNA_PREP_WRITE);
 
    struct etna_nn_params *map = etna_bo_map(bo);
diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.h b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.h
index 6f0c546cf9e..329acbe6431 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.h
+++ b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.h
@@ -32,6 +32,11 @@ etna_ml_lower_add(struct etna_ml_subgraph *subgraph,
                   const struct pipe_ml_operation *poperation,
                   struct etna_operation *operation);
 
+void
+etna_ml_lower_fully_connected(struct etna_ml_subgraph *subgraph,
+                              const struct pipe_ml_operation *poperation,
+                              struct etna_operation *operation);
+
 void
 etna_ml_compile_operation_nn(struct etna_ml_subgraph *subgraph,
                              const struct etna_operation *operation,