From c75b512673d95e032028fa2de7d2339df9f84b32 Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Fri, 9 Feb 2024 16:26:39 +0100 Subject: [PATCH] etnaviv/nn: Fix calculation of remaining out channels We were wrongly counting the remaining number of output channels in the last superblock, when the former isn't divisible by the latter. MobileNetV1: 9.991ms -> 9.991ms SSDLite MobileDet: 32.692ms -> 27ms Reviewed-by: Philipp Zabel Part-of: --- src/gallium/drivers/etnaviv/etnaviv_ml_nn.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c index 6eda18c86ca..7ea075e7ff7 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c @@ -532,10 +532,6 @@ calc_superblocks(struct etna_context *ctx, const struct etna_operation *operatio unsigned num_kernels = DIV_ROUND_UP(output_channels, kernels_per_core * nn_core_count); unsigned superblocks = DIV_ROUND_UP(DIV_ROUND_UP(output_channels, nn_core_count), num_kernels); - /* TODO: Remove this once we support superblocks that don't divide output_channels in the compressed buffer */ - while(output_channels % superblocks) - superblocks++; - return superblocks; } @@ -984,7 +980,7 @@ write_core_6(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigned core, co unsigned kernels_in_superblock = DIV_ROUND_UP(kernels_per_core, superblocks); if (superblock == superblocks - 1) - kernels_in_superblock = DIV_ROUND_UP(kernels_per_core, superblocks) - kernels_per_core % superblocks; + kernels_in_superblock = kernels_per_core - kernels_in_superblock * (superblocks - 1); for (unsigned kernel = 0; kernel < kernels_in_superblock; kernel++) { unsigned out_channel = core * kernels_in_superblock + kernel + superblock * DIV_ROUND_UP(kernels_per_core, superblocks) * cores_used; @@ -1063,7 +1059,7 @@ write_core_interleaved(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigne unsigned kernels_in_superblock = DIV_ROUND_UP(kernels_per_core, superblocks); if (superblock == superblocks - 1) - kernels_in_superblock = DIV_ROUND_UP(kernels_per_core, superblocks) - kernels_per_core % superblocks; + kernels_in_superblock = kernels_per_core - kernels_in_superblock * (superblocks - 1); for (unsigned z = 0; z < input_channels; z++) { for (unsigned kernel = 0; kernel < kernels_in_superblock; kernel++) { @@ -1148,7 +1144,7 @@ write_core_sequential(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigned unsigned kernels_in_superblock = DIV_ROUND_UP(kernels_per_core, superblocks); if (superblock == superblocks - 1) - kernels_in_superblock = DIV_ROUND_UP(kernels_per_core, superblocks) - kernels_per_core % superblocks; + kernels_in_superblock = kernels_per_core - kernels_in_superblock * (superblocks - 1); for (unsigned kernel = 0; kernel < kernels_in_superblock; kernel++) { unsigned out_channel = core * kernels_in_superblock + kernel + superblock * DIV_ROUND_UP(kernels_per_core, superblocks) * cores_used;