diff --git a/src/etnaviv/ci/etnaviv-vipnano-fails.txt b/src/etnaviv/ci/etnaviv-vipnano-fails.txt index 3645e91ea16..7b482a816c8 100644 --- a/src/etnaviv/ci/etnaviv-vipnano-fails.txt +++ b/src/etnaviv/ci/etnaviv-vipnano-fails.txt @@ -290,80 +290,3 @@ DepthwiseConv2D.Op/input_size_5_weight_size_5_channels_256_stride_2_padding_same DepthwiseConv2D.Op/input_size_5_weight_size_5_channels_32_stride_2_padding_same_1_is_signed_0,Fail YoloX.Whole,Fail -YoloXParam.Op/yolox005,Fail -YoloXParam.Op/yolox007,Fail -YoloXParam.Op/yolox008,Fail -YoloXParam.Op/yolox009,Fail -YoloXParam.Op/yolox010,Fail -YoloXParam.Op/yolox012,Fail -YoloXParam.Op/yolox014,Fail -YoloXParam.Op/yolox016,Fail -YoloXParam.Op/yolox017,Fail -YoloXParam.Op/yolox018,Fail -YoloXParam.Op/yolox019,Fail -YoloXParam.Op/yolox021,Fail -YoloXParam.Op/yolox022,Fail -YoloXParam.Op/yolox024,Fail -YoloXParam.Op/yolox025,Fail -YoloXParam.Op/yolox027,Fail -YoloXParam.Op/yolox029,Fail -YoloXParam.Op/yolox031,Fail -YoloXParam.Op/yolox032,Fail -YoloXParam.Op/yolox033,Fail -YoloXParam.Op/yolox034,Fail -YoloXParam.Op/yolox036,Fail -YoloXParam.Op/yolox037,Fail -YoloXParam.Op/yolox039,Fail -YoloXParam.Op/yolox040,Fail -YoloXParam.Op/yolox042,Fail -YoloXParam.Op/yolox044,Fail -YoloXParam.Op/yolox046,Fail -YoloXParam.Op/yolox047,Fail -YoloXParam.Op/yolox052,Fail -YoloXParam.Op/yolox053,Fail -YoloXParam.Op/yolox054,Fail -YoloXParam.Op/yolox055,Fail -YoloXParam.Op/yolox056,Fail -YoloXParam.Op/yolox058,Fail -YoloXParam.Op/yolox059,Fail -YoloXParam.Op/yolox062,Fail -YoloXParam.Op/yolox063,Fail -YoloXParam.Op/yolox064,Fail -YoloXParam.Op/yolox065,Fail -YoloXParam.Op/yolox067,Fail -YoloXParam.Op/yolox068,Fail -YoloXParam.Op/yolox071,Fail -YoloXParam.Op/yolox072,Fail -YoloXParam.Op/yolox073,Fail -YoloXParam.Op/yolox074,Fail -YoloXParam.Op/yolox076,Fail -YoloXParam.Op/yolox078,Fail -YoloXParam.Op/yolox080,Fail -YoloXParam.Op/yolox081,Fail -YoloXParam.Op/yolox082,Fail -YoloXParam.Op/yolox083,Fail -YoloXParam.Op/yolox085,Fail -YoloXParam.Op/yolox087,Fail -YoloXParam.Op/yolox089,Fail -YoloXParam.Op/yolox090,Fail -YoloXParam.Op/yolox091,Fail -YoloXParam.Op/yolox092,Fail -YoloXParam.Op/yolox094,Fail -YoloXParam.Op/yolox095,Fail -YoloXParam.Op/yolox096,Fail -YoloXParam.Op/yolox097,Fail -YoloXParam.Op/yolox100,Fail -YoloXParam.Op/yolox101,Fail -YoloXParam.Op/yolox102,Fail -YoloXParam.Op/yolox106,Fail -YoloXParam.Op/yolox107,Fail -YoloXParam.Op/yolox108,Fail -YoloXParam.Op/yolox111,Fail -YoloXParam.Op/yolox112,Fail -YoloXParam.Op/yolox113,Fail -YoloXParam.Op/yolox117,Fail -YoloXParam.Op/yolox118,Fail -YoloXParam.Op/yolox119,Fail -YoloXParam.Op/yolox122,Fail -YoloXParam.Op/yolox123,Fail -YoloXParam.Op/yolox124,Fail diff --git a/src/etnaviv/ci/etnaviv-vipnano-si-plus-fails.txt b/src/etnaviv/ci/etnaviv-vipnano-si-plus-fails.txt index 84a12ab57e9..ae777e39c2a 100644 --- a/src/etnaviv/ci/etnaviv-vipnano-si-plus-fails.txt +++ b/src/etnaviv/ci/etnaviv-vipnano-si-plus-fails.txt @@ -12,80 +12,3 @@ MobileDetParam.Op/mobiledet082,Fail MobileDet.Whole,Fail YoloX.Whole,Fail -YoloXParam.Op/yolox005,Fail -YoloXParam.Op/yolox007,Fail -YoloXParam.Op/yolox008,Fail -YoloXParam.Op/yolox009,Fail -YoloXParam.Op/yolox010,Fail -YoloXParam.Op/yolox012,Fail -YoloXParam.Op/yolox014,Fail -YoloXParam.Op/yolox016,Fail -YoloXParam.Op/yolox017,Fail -YoloXParam.Op/yolox018,Fail -YoloXParam.Op/yolox019,Fail -YoloXParam.Op/yolox021,Fail -YoloXParam.Op/yolox022,Fail -YoloXParam.Op/yolox024,Fail -YoloXParam.Op/yolox025,Fail -YoloXParam.Op/yolox027,Fail -YoloXParam.Op/yolox029,Fail -YoloXParam.Op/yolox031,Fail -YoloXParam.Op/yolox032,Fail -YoloXParam.Op/yolox033,Fail -YoloXParam.Op/yolox034,Fail -YoloXParam.Op/yolox036,Fail -YoloXParam.Op/yolox037,Fail -YoloXParam.Op/yolox039,Fail -YoloXParam.Op/yolox040,Fail -YoloXParam.Op/yolox042,Fail -YoloXParam.Op/yolox044,Fail -YoloXParam.Op/yolox046,Fail -YoloXParam.Op/yolox047,Fail -YoloXParam.Op/yolox052,Fail -YoloXParam.Op/yolox053,Fail -YoloXParam.Op/yolox054,Fail -YoloXParam.Op/yolox055,Fail -YoloXParam.Op/yolox056,Fail -YoloXParam.Op/yolox058,Fail -YoloXParam.Op/yolox059,Fail -YoloXParam.Op/yolox062,Fail -YoloXParam.Op/yolox063,Fail -YoloXParam.Op/yolox064,Fail -YoloXParam.Op/yolox065,Fail -YoloXParam.Op/yolox067,Fail -YoloXParam.Op/yolox068,Fail -YoloXParam.Op/yolox071,Fail -YoloXParam.Op/yolox072,Fail -YoloXParam.Op/yolox073,Fail -YoloXParam.Op/yolox074,Fail -YoloXParam.Op/yolox076,Fail -YoloXParam.Op/yolox078,Fail -YoloXParam.Op/yolox080,Fail -YoloXParam.Op/yolox081,Fail -YoloXParam.Op/yolox082,Fail -YoloXParam.Op/yolox083,Fail -YoloXParam.Op/yolox085,Fail -YoloXParam.Op/yolox087,Fail -YoloXParam.Op/yolox089,Fail -YoloXParam.Op/yolox090,Fail -YoloXParam.Op/yolox091,Fail -YoloXParam.Op/yolox092,Fail -YoloXParam.Op/yolox094,Fail -YoloXParam.Op/yolox095,Fail -YoloXParam.Op/yolox096,Fail -YoloXParam.Op/yolox097,Fail -YoloXParam.Op/yolox100,Fail -YoloXParam.Op/yolox101,Fail -YoloXParam.Op/yolox102,Fail -YoloXParam.Op/yolox106,Fail -YoloXParam.Op/yolox107,Fail -YoloXParam.Op/yolox108,Fail -YoloXParam.Op/yolox111,Fail -YoloXParam.Op/yolox112,Fail -YoloXParam.Op/yolox113,Fail -YoloXParam.Op/yolox117,Fail -YoloXParam.Op/yolox118,Fail -YoloXParam.Op/yolox119,Fail -YoloXParam.Op/yolox122,Fail -YoloXParam.Op/yolox123,Fail -YoloXParam.Op/yolox124,Fail diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml.c b/src/gallium/drivers/etnaviv/etnaviv_ml.c index 799d3913647..716c5526bd9 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml.c @@ -55,7 +55,7 @@ etna_ml_create_tensor(struct etna_ml_subgraph *subgraph, unsigned idx, unsigned struct pipe_resource *res = tensors[idx]; if (res != NULL) { - assert(size == pipe_buffer_size(res) || size == pipe_buffer_size(res) / 2); + assert(size == sizes[idx]); return; } @@ -239,11 +239,19 @@ dump_graph(struct list_head *etna_operations) switch(operation->type) { case ETNA_JOB_TYPE_TP: ML_DBG("%3d %-4s %3d %3d", - i, "TP", operation->input_tensors[0], operation->output_tensor); + i, "TP", operation->input_tensors[0], operation->output_tensors[0]); break; case ETNA_JOB_TYPE_NN: ML_DBG("%3d %-4s %3d %3d in2: %3d", - i, "NN", operation->input_tensors[0], operation->output_tensor, operation->input_tensors[1]); + i, "NN", operation->input_tensors[0], operation->output_tensors[0], operation->input_tensors[1]); + break; + case ETNA_JOB_TYPE_CONCAT: + ML_DBG("%3d %-4s %3d %3d in2: %3d", + i, "CONC", operation->input_tensors[0], operation->output_tensors[0], operation->input_tensors[1]); + break; + case ETNA_JOB_TYPE_SPLIT: + ML_DBG("%3d %-4s %3d %3d out2: %3d", + i, "SPLIT", operation->input_tensors[0], operation->output_tensors[0], operation->output_tensors[1]); break; } ML_DBG("\n"); @@ -292,7 +300,7 @@ lower_operations(struct etna_ml_subgraph *subgraph, ML_DBG("Adding detranspose for convolution operation.\n"); struct etna_operation *detranspose = calloc(1, sizeof(*operation)); etna_ml_lower_detranspose(subgraph, operation, detranspose); - operation->output_tensor = detranspose->input_tensors[0]; + operation->output_tensors[0] = detranspose->input_tensors[0]; list_addtail(&detranspose->link, etna_operations); } break; @@ -305,38 +313,118 @@ lower_operations(struct etna_ml_subgraph *subgraph, if (needs_detranspose(poperations, count, poperation)) { struct etna_operation *detranspose = calloc(1, sizeof(*operation)); etna_ml_lower_detranspose(subgraph, operation, detranspose); - operation->output_tensor = detranspose->input_tensors[0]; + operation->output_tensors[0] = detranspose->input_tensors[0]; list_addtail(&detranspose->link, etna_operations); } break; } + case PIPE_ML_OPERATION_TYPE_CONCATENATION: { + bool do_transpose = needs_transpose(poperations, count, poperation); + + struct etna_operation *operation = calloc(1, sizeof(*operation)); + operation->type = ETNA_JOB_TYPE_CONCAT; + assert(poperation->input_count <= MAX_TENSORS); + unsigned input_size = 0; + for (int i = 0; i < poperation->input_count; i++) { + unsigned input_tensor = poperation->input_tensors[i]->index; + + if (do_transpose) { + struct etna_operation *operation = calloc(1, sizeof(*operation)); + etna_ml_lower_transpose(subgraph, poperation->input_tensors[i], operation, &input_tensor); + list_addtail(&operation->link, etna_operations); + } + + operation->input_tensors[i] = input_tensor; + operation->input_tensor_sizes[i] = poperation->input_tensors[i]->dims[1] * + poperation->input_tensors[i]->dims[2] * + poperation->input_tensors[i]->dims[3]; + input_size += input_size; + } + operation->input_count = poperation->input_count; + + operation->output_tensors[0] = poperation->output_tensors[0]->index; + operation->output_width = poperation->output_tensors[0]->dims[1]; + operation->output_height = poperation->output_tensors[0]->dims[2]; + operation->output_channels = poperation->output_tensors[0]->dims[3]; + operation->output_tensor_sizes[0] = operation->output_width * + operation->output_height * + operation->output_channels; + + list_addtail(&operation->link, etna_operations); + + if (needs_detranspose(poperations, count, poperation)) { + struct etna_operation *detranspose = calloc(1, sizeof(*operation)); + etna_ml_lower_detranspose(subgraph, operation, detranspose); + operation->output_tensors[0] = detranspose->input_tensors[0]; + list_addtail(&detranspose->link, etna_operations); + } + + break; + } + case PIPE_ML_OPERATION_TYPE_SPLIT: { + struct etna_operation *operation = calloc(1, sizeof(*operation)); + operation->type = ETNA_JOB_TYPE_SPLIT; + + operation->input_tensors[0] = poperation->input_tensors[1]->index; + operation->input_tensor_sizes[0] = poperation->input_tensors[1]->dims[1] * + poperation->input_tensors[1]->dims[2] * + poperation->input_tensors[1]->dims[3]; + + assert(poperation->output_count <= MAX_TENSORS); + for (int i = 0; i < poperation->output_count; i++) { + operation->output_tensors[i] = poperation->output_tensors[i]->index; + operation->output_tensor_sizes[i] = poperation->output_tensors[i]->dims[1] * + poperation->output_tensors[i]->dims[2] * + poperation->output_tensors[i]->dims[3]; + } + operation->output_count = poperation->output_count; + + list_addtail(&operation->link, etna_operations); + + break; + } default: unreachable("Unsupported ML operation type"); } } - /* Create combined input tensors first */ list_for_each_entry(struct etna_operation, operation, etna_operations, link) { - if (operation->input_count == 1) - continue; + if (operation->type == ETNA_JOB_TYPE_CONCAT) { + etna_ml_create_tensor(subgraph, operation->output_tensors[0], operation->output_tensor_sizes[0]); - etna_ml_create_tensor(subgraph, operation->input_tensors[0], operation->input_tensor_size); + unsigned offset = 0; + for (int i = 0; i < operation->input_count; i++) { + reference_tensor_with_offset(subgraph, + operation->output_tensors[0], + operation->input_tensors[i], + offset, + operation->input_tensor_sizes[i]); + offset += operation->input_tensor_sizes[i]; + } + } else if (operation->type == ETNA_JOB_TYPE_SPLIT) { + etna_ml_create_tensor(subgraph, operation->input_tensors[0], operation->input_tensor_sizes[0]); - for (int i = 1; i < operation->input_count; i++) + unsigned offset = 0; + for (int i = 0; i < operation->output_count; i++) { + reference_tensor_with_offset(subgraph, + operation->input_tensors[0], + operation->output_tensors[i], + offset, + operation->output_tensor_sizes[i]); + offset += operation->output_tensor_sizes[i]; + } + } else if (operation->type == ETNA_JOB_TYPE_NN && operation->input_count > 1) { /* Add */ + etna_ml_destroy_tensor(subgraph, operation->input_tensors[0]); + etna_ml_create_tensor(subgraph, operation->input_tensors[0], operation->input_tensor_sizes[0] + + operation->input_tensor_sizes[1]); reference_tensor_with_offset(subgraph, operation->input_tensors[0], - operation->input_tensors[i], - i * operation->input_tensor_size / operation->input_count, - operation->input_tensor_size / operation->input_count); - - } - - /* Create all other input tensors */ - list_for_each_entry(struct etna_operation, operation, etna_operations, link) { - if (operation->input_count != 1) - continue; - - etna_ml_create_tensor(subgraph, operation->input_tensors[0], operation->input_tensor_size); + operation->input_tensors[1], + operation->input_tensor_sizes[0], + operation->input_tensor_sizes[1]); + } else { + etna_ml_create_tensor(subgraph, operation->input_tensors[0], operation->input_tensor_sizes[0]); + } } /* Create any output tensors that aren't inputs to other operations, these @@ -344,12 +432,11 @@ lower_operations(struct etna_ml_subgraph *subgraph, */ ML_DBG("Ensuring all output tensors have their memory backing.\n"); list_for_each_entry(struct etna_operation, operation, etna_operations, link) { - struct pipe_resource *res = etna_ml_get_tensor(subgraph, operation->output_tensor); + struct pipe_resource *res = etna_ml_get_tensor(subgraph, operation->output_tensors[0]); if (res != NULL) continue; - unsigned size = operation->output_width * operation->output_height * operation->output_channels; - etna_ml_create_tensor(subgraph, operation->output_tensor, size); + etna_ml_create_tensor(subgraph, operation->output_tensors[0], operation->output_tensor_sizes[0]); } if (DBG_ENABLED(ETNA_DBG_ML_MSGS)) @@ -377,6 +464,8 @@ count_tensors(const struct pipe_ml_operation *poperations, tensor_count = MAX2(tensor_count, poperation->conv.bias_tensor->index); break; case PIPE_ML_OPERATION_TYPE_ADD: + case PIPE_ML_OPERATION_TYPE_CONCATENATION: + case PIPE_ML_OPERATION_TYPE_SPLIT: break; default: unreachable("Unsupported ML operation type"); @@ -437,6 +526,9 @@ etna_ml_subgraph_create(struct pipe_context *pcontext, case ETNA_JOB_TYPE_TP: etna_ml_compile_operation_tp(subgraph, operation, &instruction); break; + case ETNA_JOB_TYPE_CONCAT: + case ETNA_JOB_TYPE_SPLIT: + continue; } util_dynarray_append(&subgraph->operations, struct etna_vip_instruction, instruction); @@ -452,17 +544,17 @@ etna_ml_subgraph_create(struct pipe_context *pcontext, } static void -dump_buffer(const uint32_t *ptr, unsigned size, char *name, int operation_nr, int suboperation_nr) +dump_buffer(const uint8_t *ptr, char *name, int operation_nr, int suboperation_nr, int offset, unsigned size) { char buffer[255]; snprintf(buffer, sizeof(buffer), "mesa-%s-%03u-%03u.bin", name, operation_nr, suboperation_nr); - ML_DBG("Dumping buffer from 0x%lx to %s\n", ptr, buffer); + ML_DBG("Dumping buffer from 0x%lx at offset %d with size %d to %s\n", ptr, offset, size, buffer); FILE *f = fopen(buffer, "wb"); assert(f); - fwrite(ptr, 1, size, f); + fwrite(ptr + offset, 1, size, f); if(ferror(f)) { ML_DBG("Error in writing to file: %s\n", strerror(errno)); } @@ -471,10 +563,12 @@ dump_buffer(const uint32_t *ptr, unsigned size, char *name, int operation_nr, in } static void -dump_bo(struct etna_bo *bo, char *name, int operation_nr, int suboperation_nr) +dump_bo(struct etna_bo *bo, char *name, int operation_nr, int suboperation_nr, int offset, int size) { - const uint32_t *map = etna_bo_map(bo); - dump_buffer(map, etna_bo_size(bo), name, operation_nr, suboperation_nr); + const uint8_t *map = etna_bo_map(bo); + if (size == 0) + size = etna_bo_size(bo) - offset; + dump_buffer(map, name, operation_nr, suboperation_nr, offset, size); } static void @@ -576,12 +670,12 @@ etna_ml_subgraph_invoke(struct pipe_context *pctx, struct pipe_ml_subgraph *psub switch (operation->type) { case ETNA_JOB_TYPE_TP: for (unsigned j = 0; j < tp_core_count && operation->configs[j]; j++) { - dump_bo(operation->configs[j], "tp", i, j); + dump_bo(operation->configs[j], "tp", i, j, 0, 0); } break; case ETNA_JOB_TYPE_NN: - dump_bo(operation->configs[0], "nn", i, 0); - dump_bo(operation->coefficients, "compressed", i, 0); + dump_bo(operation->configs[0], "nn", i, 0, 0, 0); + dump_bo(operation->coefficients, "compressed", i, 0, 0, 0); break; default: unreachable("Unsupported ML operation type"); @@ -623,7 +717,7 @@ etna_ml_subgraph_invoke(struct pipe_context *pctx, struct pipe_ml_subgraph *psub close_batch(pctx); if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS)) - dump_buffer(ctx->stream->buffer, ctx->stream->offset * 4, "cmd", i, 0); + dump_buffer((uint8_t *)ctx->stream->buffer, "cmd", i, 0, 0, ctx->stream->offset * 4); pctx->flush(pctx, NULL, 0); @@ -631,11 +725,11 @@ etna_ml_subgraph_invoke(struct pipe_context *pctx, struct pipe_ml_subgraph *psub struct pipe_transfer *transfer = NULL; pipe_buffer_map(pctx, operation->input, PIPE_MAP_READ, &transfer); - dump_bo(etna_resource(operation->input)->bo, "input", i, 0); + dump_bo(etna_resource(operation->input)->bo, "input", i, 0, operation->input_offset, 0); pipe_buffer_unmap(pctx, transfer); pipe_buffer_map(pctx, operation->output, PIPE_MAP_READ, &transfer); - dump_bo(etna_resource(operation->output)->bo, "output", i, 0); + dump_bo(etna_resource(operation->output)->bo, "output", i, 0, operation->output_offset, 0); pipe_buffer_unmap(pctx, transfer); } diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml.h b/src/gallium/drivers/etnaviv/etnaviv_ml.h index 3b05618f13d..5e287eb0f5c 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml.h +++ b/src/gallium/drivers/etnaviv/etnaviv_ml.h @@ -25,6 +25,8 @@ enum etna_job_type { ETNA_JOB_TYPE_NN, ETNA_JOB_TYPE_TP, + ETNA_JOB_TYPE_CONCAT, /* Fake operation, won't execute on HW. Hack will go away after the move to NIR. */ + ETNA_JOB_TYPE_SPLIT, /* Fake operation, won't execute on HW. Hack will go away after the move to NIR. */ }; enum etna_ml_tp_type { @@ -57,7 +59,7 @@ struct etna_vip_instruction { struct etna_bo *kernel; }; -#define MAX_INPUTS 10 +#define MAX_TENSORS 10 struct etna_operation { struct list_head link; @@ -73,16 +75,22 @@ struct etna_operation { unsigned stride; - unsigned input_tensors[MAX_INPUTS]; + unsigned input_tensors[MAX_TENSORS]; unsigned input_count; - unsigned input_tensor_size; + unsigned input_tensor_sizes[MAX_TENSORS]; + + /* The following apply to the first input tensor only */ unsigned input_width; unsigned input_height; unsigned input_channels; uint8_t input_zero_point; float input_scale; - unsigned output_tensor; + unsigned output_tensors[MAX_TENSORS]; + unsigned output_count; + unsigned output_tensor_sizes[MAX_TENSORS]; + + /* The following apply to the first output tensor only */ unsigned output_width; unsigned output_height; unsigned output_channels; diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c index 858409f990d..ee55f6451a8 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c @@ -3,6 +3,7 @@ * SPDX-License-Identifier: MIT */ +#include "pipe/p_state.h" #include "util/u_inlines.h" #include "etnaviv_context.h" @@ -473,7 +474,7 @@ etna_ml_lower_convolution(struct etna_ml_subgraph *subgraph, operation->input_zero_point = etna_tensor_zero_point(poperation->input_tensors[0]); operation->input_scale = poperation->input_tensors[0]->scale; - operation->output_tensor = poperation->output_tensors[0]->index; + operation->output_tensors[0] = poperation->output_tensors[0]->index; operation->output_width = poperation->output_tensors[0]->dims[1]; operation->output_height = poperation->output_tensors[0]->dims[2]; operation->output_channels = poperation->output_tensors[0]->dims[3]; @@ -506,10 +507,14 @@ etna_ml_lower_convolution(struct etna_ml_subgraph *subgraph, else if (operation->input_channels > 1) transpose(subgraph, operation); - operation->input_tensor_size = operation->input_width * - operation->input_height * - operation->input_channels; + operation->input_tensor_sizes[0] = operation->input_width * + operation->input_height * + operation->input_channels; ML_DBG("%dx%dx%d\n", operation->input_width, operation->input_height, operation->input_channels); + + operation->output_tensor_sizes[0] = operation->output_width * + operation->output_height * + operation->output_channels; } static float @@ -560,6 +565,7 @@ etna_ml_lower_add(struct etna_ml_subgraph *subgraph, assert(poperation->type == PIPE_ML_OPERATION_TYPE_ADD); + operation->type = ETNA_JOB_TYPE_NN; operation->addition = true; operation->depthwise = false; operation->pointwise = false; @@ -567,26 +573,33 @@ etna_ml_lower_add(struct etna_ml_subgraph *subgraph, operation->padding_same = false; operation->stride = 1; - operation->input_tensors[0] = poperation->input_tensors[0]->index; - operation->input_tensors[1] = poperation->input_tensors[1]->index; - operation->input_count = 2; operation->input_width = poperation->input_tensors[0]->dims[1]; operation->input_height = poperation->input_tensors[0]->dims[2]; operation->input_channels = poperation->input_tensors[0]->dims[3]; operation->input_zero_point = etna_tensor_zero_point(poperation->input_tensors[0]); operation->input_scale = poperation->input_tensors[0]->scale; - operation->input_tensor_size = operation->input_width * - operation->input_height * - operation->input_channels * - 2; - operation->output_tensor = poperation->output_tensors[0]->index; + operation->input_tensors[0] = poperation->input_tensors[0]->index; + operation->input_tensor_sizes[0] = operation->input_width * + operation->input_height * + operation->input_channels; + operation->input_tensors[1] = poperation->input_tensors[1]->index; + operation->input_tensor_sizes[1] = operation->input_width * + operation->input_height * + operation->input_channels; + operation->input_count = 2; + + operation->output_tensors[0] = poperation->output_tensors[0]->index; operation->output_width = poperation->output_tensors[0]->dims[1]; operation->output_height = poperation->output_tensors[0]->dims[2]; operation->output_channels = poperation->output_tensors[0]->dims[3]; operation->output_zero_point = etna_tensor_zero_point(poperation->output_tensors[0]); operation->output_scale = poperation->output_tensors[0]->scale; + operation->output_tensor_sizes[0] = operation->output_width * + operation->output_height * + operation->output_channels; + if (nn_core_version < 8) { operation->weight_tensor = etna_ml_create_resource(context, 8); operation->weight_width = 2; @@ -807,8 +820,8 @@ create_nn_config(struct etna_ml_subgraph *subgraph, const struct etna_operation } } - struct pipe_resource *output = etna_ml_get_tensor(subgraph, operation->output_tensor); - offset = etna_ml_get_offset(subgraph, operation->output_tensor); + struct pipe_resource *output = etna_ml_get_tensor(subgraph, operation->output_tensors[0]); + offset = etna_ml_get_offset(subgraph, operation->output_tensors[0]); map->out_image_address = etna_bo_gpu_va(etna_resource(output)->bo) + offset; map->out_image_x_size = output_width; map->out_image_y_size = output_height; @@ -983,7 +996,7 @@ etna_ml_compile_operation_nn(struct etna_ml_subgraph *subgraph, const struct etn assert(input); pipe_resource_reference(&instruction->input, input); - struct pipe_resource *output = etna_ml_get_tensor(subgraph, operation->output_tensor); + struct pipe_resource *output = etna_ml_get_tensor(subgraph, operation->output_tensors[0]); assert(output); pipe_resource_reference(&instruction->output, output); diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c b/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c index bd8e7037175..371de498531 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c @@ -266,8 +266,8 @@ create_transpose_config(struct etna_ml_subgraph *subgraph, const struct etna_ope unsigned offset = etna_ml_get_offset(subgraph, operation->input_tensors[0]); map->in_image_base_address = etna_bo_gpu_va(etna_resource(input)->bo) + offset; - struct pipe_resource *output = etna_ml_get_tensor(subgraph, operation->output_tensor); - offset = etna_ml_get_offset(subgraph, operation->output_tensor); + struct pipe_resource *output = etna_ml_get_tensor(subgraph, operation->output_tensors[0]); + offset = etna_ml_get_offset(subgraph, operation->output_tensors[0]); map->out_image_base_address = etna_bo_gpu_va(etna_resource(output)->bo) + offset; map->out_loop_1_inc = operation->input_width * operation->input_height; @@ -316,8 +316,8 @@ create_detranspose_config(struct etna_ml_subgraph *subgraph, const struct etna_o unsigned offset = etna_ml_get_offset(subgraph, operation->input_tensors[0]); map->in_image_base_address = etna_bo_gpu_va(etna_resource(input)->bo) + offset; - struct pipe_resource *output = etna_ml_get_tensor(subgraph, operation->output_tensor); - offset = etna_ml_get_offset(subgraph, operation->output_tensor); + struct pipe_resource *output = etna_ml_get_tensor(subgraph, operation->output_tensors[0]); + offset = etna_ml_get_offset(subgraph, operation->output_tensors[0]); map->out_image_base_address = etna_bo_gpu_va(etna_resource(output)->bo) + offset; map->out_loop_0_inc = input_channels; @@ -468,8 +468,8 @@ create_reshuffle_config(struct etna_ml_subgraph *subgraph, const struct etna_ope unsigned offset = etna_ml_get_offset(subgraph, operation->input_tensors[0]); map->in_image_base_address = etna_bo_gpu_va(etna_resource(input)->bo) + offset; - struct pipe_resource *output = etna_ml_get_tensor(subgraph, operation->output_tensor); - offset = etna_ml_get_offset(subgraph, operation->output_tensor); + struct pipe_resource *output = etna_ml_get_tensor(subgraph, operation->output_tensors[0]); + offset = etna_ml_get_offset(subgraph, operation->output_tensors[0]); map->out_image_base_address = etna_bo_gpu_va(etna_resource(output)->bo) + offset; for (unsigned i = 0; i < tp_core; i++) { @@ -566,17 +566,20 @@ etna_ml_lower_transpose(struct etna_ml_subgraph *subgraph, operation->input_channels = input_tensor->dims[3]; operation->input_zero_point = etna_tensor_zero_point(input_tensor); operation->input_scale = input_tensor->scale; - operation->input_tensor_size = operation->input_width * - operation->input_height * - operation->input_channels; + operation->input_tensor_sizes[0] = operation->input_width * + operation->input_height * + operation->input_channels; *output_tensor = etna_ml_allocate_tensor(subgraph); - operation->output_tensor = *output_tensor; + operation->output_tensors[0] = *output_tensor; operation->output_width = operation->input_width; operation->output_height = operation->input_height; operation->output_channels = operation->input_channels; operation->output_zero_point = operation->input_zero_point; operation->output_scale = operation->input_scale; + operation->output_tensor_sizes[0] = operation->output_width * + operation->output_height * + operation->output_channels; } void @@ -594,16 +597,20 @@ etna_ml_lower_detranspose(struct etna_ml_subgraph *subgraph, operation->input_channels = convolution->output_channels; operation->input_zero_point = convolution->output_zero_point; operation->input_scale = convolution->output_scale; - operation->input_tensor_size = operation->input_width * - operation->input_height * - operation->input_channels; + operation->input_tensor_sizes[0] = operation->input_width * + operation->input_height * + operation->input_channels; - operation->output_tensor = convolution->output_tensor; + operation->output_tensors[0] = convolution->output_tensors[0]; + operation->output_count = 1; operation->output_width = convolution->output_width; operation->output_height = convolution->output_height; operation->output_channels = convolution->output_channels; operation->output_zero_point = convolution->output_zero_point; operation->output_scale = convolution->output_scale; + operation->output_tensor_sizes[0] = operation->output_width * + operation->output_height * + operation->output_channels; } void @@ -624,17 +631,20 @@ etna_ml_lower_reshuffle(struct etna_ml_subgraph *subgraph, operation->input_channels = convolution->input_tensors[0]->dims[3]; operation->input_zero_point = etna_tensor_zero_point(convolution->input_tensors[0]); operation->input_scale = convolution->input_tensors[0]->scale; - operation->input_tensor_size = operation->input_width * - operation->input_height * - operation->input_channels; + operation->input_tensor_sizes[0] = operation->input_width * + operation->input_height * + operation->input_channels; *output_tensor = etna_ml_allocate_tensor(subgraph); - operation->output_tensor = *output_tensor; + operation->output_tensors[0] = *output_tensor; operation->output_width = DIV_ROUND_UP(operation->input_width, operation->stride); operation->output_height = DIV_ROUND_UP(operation->input_height, operation->stride); operation->output_channels = operation->input_channels * operation->stride * operation->stride; operation->output_zero_point = etna_tensor_zero_point(convolution->input_tensors[0]); operation->output_scale = convolution->input_tensors[0]->scale; + operation->output_tensor_sizes[0] = operation->output_width * + operation->output_height * + operation->output_channels; /* When destriding a convolution, the transformation to be made to the input * tensor will depend on the size of the weight tensor. @@ -663,10 +673,13 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph, assert(input); pipe_resource_reference(&instruction->input, input); - struct pipe_resource *output = etna_ml_get_tensor(subgraph, operation->output_tensor); + struct pipe_resource *output = etna_ml_get_tensor(subgraph, operation->output_tensors[0]); assert(output); pipe_resource_reference(&instruction->output, output); + instruction->input_offset = etna_ml_get_offset(subgraph, operation->input_tensors[0]); + instruction->output_offset = etna_ml_get_offset(subgraph, operation->output_tensors[0]); + switch (operation->tp_type) { case ETNA_ML_TP_TRANSPOSE: instruction->configs[0] = create_transpose_config(subgraph, operation);