From 2425d31c1a9075cb9e820257be484f71d959f816 Mon Sep 17 00:00:00 2001 From: Shorin Sergey Date: Fri, 11 Apr 2025 16:34:50 +0300 Subject: [PATCH 01/11] [fnx] update onnxlib --- modules/fnxext/onnx_engine.cpp | 2 +- modules/fnxext/onnx_engine/src/Makefile | 61 -- modules/fnxext/onnx_engine/src/default/Acos.c | 49 +- .../fnxext/onnx_engine/src/default/Acosh.c | 50 +- modules/fnxext/onnx_engine/src/default/Add.c | 2 +- .../fnxext/onnx_engine/src/default/ArgMax.c | 6 +- .../fnxext/onnx_engine/src/default/ArgMin.c | 6 +- modules/fnxext/onnx_engine/src/default/Asin.c | 49 +- .../fnxext/onnx_engine/src/default/Asinh.c | 49 +- modules/fnxext/onnx_engine/src/default/Atan.c | 49 +- .../fnxext/onnx_engine/src/default/Atanh.c | 49 +- .../onnx_engine/src/default/AveragePool.c | 88 +- .../src/default/BatchNormalization.c | 9 +- .../fnxext/onnx_engine/src/default/BitShift.c | 6 +- .../onnx_engine/src/default/BitwiseAnd.c | 218 +++++ .../onnx_engine/src/default/BitwiseNot.c | 169 ++++ .../onnx_engine/src/default/BitwiseOr.c | 218 +++++ .../onnx_engine/src/default/BitwiseXor.c | 218 +++++ modules/fnxext/onnx_engine/src/default/Cast.c | 60 +- modules/fnxext/onnx_engine/src/default/Celu.c | 6 +- modules/fnxext/onnx_engine/src/default/Clip.c | 10 +- .../fnxext/onnx_engine/src/default/Concat.c | 25 +- .../fnxext/onnx_engine/src/default/Constant.c | 16 +- .../onnx_engine/src/default/ConstantOfShape.c | 24 +- modules/fnxext/onnx_engine/src/default/Conv.c | 104 +-- .../onnx_engine/src/default/ConvInteger.c | 484 +++++++++++ .../src/default/DynamicQuantizeLSTM.c | 765 ++++++++++++++++++ .../src/default/DynamicQuantizeLinear.c | 133 +++ modules/fnxext/onnx_engine/src/default/Elu.c | 4 +- .../fnxext/onnx_engine/src/default/Expand.c | 6 +- .../fnxext/onnx_engine/src/default/Flatten.c | 10 +- .../fnxext/onnx_engine/src/default/Gather.c | 132 +++ modules/fnxext/onnx_engine/src/default/Gemm.c | 4 +- .../src/default/GlobalAveragePool.c | 6 +- .../onnx_engine/src/default/GlobalLpPool.c | 4 +- .../onnx_engine/src/default/HardSigmoid.c | 10 +- .../fnxext/onnx_engine/src/default/Identity.c | 6 +- modules/fnxext/onnx_engine/src/default/If.c | 16 +- .../src/default/InstanceNormalization.c | 4 +- .../fnxext/onnx_engine/src/default/IsInf.c | 4 +- modules/fnxext/onnx_engine/src/default/LRN.c | 4 +- modules/fnxext/onnx_engine/src/default/LSTM.c | 570 ++++++++++++- .../onnx_engine/src/default/LeakyRelu.c | 4 +- .../onnx_engine/src/default/LogSoftmax.c | 8 +- .../fnxext/onnx_engine/src/default/MatMul.c | 105 +-- .../onnx_engine/src/default/MatMulInteger.c | 321 +++++++- .../fnxext/onnx_engine/src/default/MaxPool.c | 50 +- modules/fnxext/onnx_engine/src/default/Mod.c | 4 +- .../onnx_engine/src/default/Multinomial.c | 4 +- .../onnx_engine/src/default/RandomNormal.c | 10 +- .../src/default/RandomNormalLike.c | 4 +- .../onnx_engine/src/default/RandomUniform.c | 10 +- .../src/default/RandomUniformLike.c | 4 +- .../fnxext/onnx_engine/src/default/Range.c | 4 +- .../fnxext/onnx_engine/src/default/ReduceL1.c | 60 +- .../fnxext/onnx_engine/src/default/ReduceL2.c | 60 +- .../onnx_engine/src/default/ReduceLogSum.c | 60 +- .../onnx_engine/src/default/ReduceLogSumExp.c | 60 +- .../onnx_engine/src/default/ReduceMax.c | 60 +- .../onnx_engine/src/default/ReduceMean.c | 60 +- .../onnx_engine/src/default/ReduceMin.c | 60 +- .../onnx_engine/src/default/ReduceProd.c | 60 +- .../onnx_engine/src/default/ReduceSum.c | 50 +- .../onnx_engine/src/default/ReduceSumSquare.c | 60 +- .../fnxext/onnx_engine/src/default/Reshape.c | 16 +- modules/fnxext/onnx_engine/src/default/Selu.c | 4 +- .../fnxext/onnx_engine/src/default/Shape.c | 2 +- .../fnxext/onnx_engine/src/default/Shrink.c | 4 +- .../fnxext/onnx_engine/src/default/Softmax.c | 8 +- .../fnxext/onnx_engine/src/default/Squeeze.c | 86 +- .../onnx_engine/src/default/ThresholdedRelu.c | 4 +- modules/fnxext/onnx_engine/src/default/Tile.c | 4 +- .../onnx_engine/src/default/Transpose.c | 14 +- .../onnx_engine/src/default/Unsqueeze.c | 96 ++- .../fnxext/onnx_engine/src/default/Where.c | 4 +- .../fnxext/onnx_engine/src/default/default.c | 5 + .../fnxext/onnx_engine/src/default/default.h | 5 + modules/fnxext/onnx_engine/src/hmap.c | 36 +- modules/fnxext/onnx_engine/src/matrix.c | 177 ++++ modules/fnxext/onnx_engine/src/matrix.h | 16 + modules/fnxext/onnx_engine/src/onnx.c | 727 +++++++++++------ modules/fnxext/onnx_engine/src/onnx.h | 73 +- .../fnxext/onnx_engine/src/onnx.proto3.pb-c.c | 741 ++++++++++++++--- .../fnxext/onnx_engine/src/onnx.proto3.pb-c.h | 313 ++++++- modules/fnxext/onnx_engine/src/onnxconf.h | 49 +- modules/fnxext/onnx_engine/src/protobuf-c.c | 143 ++-- modules/fnxext/onnx_engine/src/protobuf-c.h | 13 +- 87 files changed, 6108 insertions(+), 1255 deletions(-) delete mode 100644 modules/fnxext/onnx_engine/src/Makefile create mode 100644 modules/fnxext/onnx_engine/src/default/BitwiseAnd.c create mode 100644 modules/fnxext/onnx_engine/src/default/BitwiseNot.c create mode 100644 modules/fnxext/onnx_engine/src/default/BitwiseOr.c create mode 100644 modules/fnxext/onnx_engine/src/default/BitwiseXor.c create mode 100644 modules/fnxext/onnx_engine/src/default/DynamicQuantizeLSTM.c create mode 100644 modules/fnxext/onnx_engine/src/matrix.c create mode 100644 modules/fnxext/onnx_engine/src/matrix.h diff --git a/modules/fnxext/onnx_engine.cpp b/modules/fnxext/onnx_engine.cpp index 7e3700da8b0..05eb67bdee5 100644 --- a/modules/fnxext/onnx_engine.cpp +++ b/modules/fnxext/onnx_engine.cpp @@ -66,7 +66,7 @@ Array OnnxEngine::run(const Array& data) { Variant OnnxEngine::load_from_file(const String &file_path) { Vector onnx_data = FileAccess::get_file_as_array(file_path); - ctx = onnx_context_alloc(onnx_data.ptr(), onnx_data.size(), NULL, 0); + ctx = onnx_context_alloc(onnx_data.ptr(), onnx_data.size(), NULL, 0, 0); if (ctx) { const char *input_layer_name = _get_input_layer_name(); diff --git a/modules/fnxext/onnx_engine/src/Makefile b/modules/fnxext/onnx_engine/src/Makefile deleted file mode 100644 index 39e40c9f67b..00000000000 --- a/modules/fnxext/onnx_engine/src/Makefile +++ /dev/null @@ -1,61 +0,0 @@ -# -# Makefile for library -# - -CROSS_COMPILE ?= - -AS := $(CROSS_COMPILE)gcc -x assembler-with-cpp -CC := $(CROSS_COMPILE)gcc -CXX := $(CROSS_COMPILE)g++ -LD := $(CROSS_COMPILE)ld -AR := $(CROSS_COMPILE)ar -OC := $(CROSS_COMPILE)objcopy -OD := $(CROSS_COMPILE)objdump -RM := rm -fr - -ASFLAGS := -g -ggdb -Wall -O3 -CFLAGS := -g -ggdb -Wall -O3 -CXXFLAGS := -g -ggdb -Wall -O3 -INCDIRS := -I . -SRCDIRS := . default - -SFILES := $(foreach dir, $(SRCDIRS), $(wildcard $(dir)/*.S)) -CFILES := $(foreach dir, $(SRCDIRS), $(wildcard $(dir)/*.c)) -CPPFILES := $(foreach dir, $(SRCDIRS), $(wildcard $(dir)/*.cpp)) - -SDEPS := $(patsubst %, %, $(SFILES:.S=.o.d)) -CDEPS := $(patsubst %, %, $(CFILES:.c=.o.d)) -CPPDEPS := $(patsubst %, %, $(CPPFILES:.cpp=.o.d)) -DEPS := $(SDEPS) $(CDEPS) $(CPPDEPS) - -SOBJS := $(patsubst %, %, $(SFILES:.S=.o)) -COBJS := $(patsubst %, %, $(CFILES:.c=.o)) -CPPOBJS := $(patsubst %, %, $(CPPFILES:.cpp=.o)) -OBJS := $(SOBJS) $(COBJS) $(CPPOBJS) - -NAME := libonnx.a - -.PHONY: all clean - -all : $(NAME) - -$(NAME) : $(OBJS) - @echo [AR] Archiving $@ - @$(AR) -rcs $@ $(OBJS) - -$(SOBJS) : %.o : %.S - @echo [AS] $< - @$(AS) $(ASFLAGS) -MD -MP -MF $@.d $(INCDIRS) -c $< -o $@ - -$(COBJS) : %.o : %.c - @echo [CC] $< - @$(CC) $(CFLAGS) -MD -MP -MF $@.d $(INCDIRS) -c $< -o $@ - -$(CPPOBJS) : %.o : %.cpp - @echo [CXX] $< - @$(CXX) $(CXXFLAGS) -MD -MP -MF $@.d $(INCDIRS) -c $< -o $@ - -clean: - @$(RM) $(DEPS) $(OBJS) $(NAME) - -sinclude $(DEPS) diff --git a/modules/fnxext/onnx_engine/src/default/Acos.c b/modules/fnxext/onnx_engine/src/default/Acos.c index cda7d94a160..47b723bb136 100644 --- a/modules/fnxext/onnx_engine/src/default/Acos.c +++ b/modules/fnxext/onnx_engine/src/default/Acos.c @@ -20,6 +20,21 @@ static int Acos_reshape(struct onnx_node_t * n) return onnx_tensor_reshape_identity(y, x, x->type); } +static void Acos_bfloat16(struct onnx_node_t * n) +{ + struct onnx_tensor_t * x = n->inputs[0]; + struct onnx_tensor_t * y = n->outputs[0]; + uint16_t * px = (uint16_t *)x->datas; + uint16_t * py = (uint16_t *)y->datas; + float v; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + v = bfloat16_to_float32(px[i]); + py[i] = float32_to_bfloat16(acosf(v)); + } +} + static void Acos_float16(struct onnx_node_t * n) { struct onnx_tensor_t * x = n->inputs[0]; @@ -59,7 +74,39 @@ static void Acos_float64(struct onnx_node_t * n) void resolver_default_op_Acos(struct onnx_node_t * n) { - if(n->opset >= 7) + if(n->opset >= 22) + { + switch(n->inputs[0]->type) + { + case ONNX_TENSOR_TYPE_BFLOAT16: + n->init = Acos_init; + n->exit = Acos_exit; + n->reshape = Acos_reshape; + n->operator_ = Acos_bfloat16; + break; + case ONNX_TENSOR_TYPE_FLOAT16: + n->init = Acos_init; + n->exit = Acos_exit; + n->reshape = Acos_reshape; + n->operator_ = Acos_float16; + break; + case ONNX_TENSOR_TYPE_FLOAT32: + n->init = Acos_init; + n->exit = Acos_exit; + n->reshape = Acos_reshape; + n->operator_ = Acos_float32; + break; + case ONNX_TENSOR_TYPE_FLOAT64: + n->init = Acos_init; + n->exit = Acos_exit; + n->reshape = Acos_reshape; + n->operator_ = Acos_float64; + break; + default: + break; + } + } + else if(n->opset >= 7) { switch(n->inputs[0]->type) { diff --git a/modules/fnxext/onnx_engine/src/default/Acosh.c b/modules/fnxext/onnx_engine/src/default/Acosh.c index a4fb03c9844..341ba4bd248 100644 --- a/modules/fnxext/onnx_engine/src/default/Acosh.c +++ b/modules/fnxext/onnx_engine/src/default/Acosh.c @@ -20,6 +20,22 @@ static int Acosh_reshape(struct onnx_node_t * n) return onnx_tensor_reshape_identity(y, x, x->type); } +static void Acosh_bfloat16(struct onnx_node_t * n) +{ + struct onnx_tensor_t * x = n->inputs[0]; + struct onnx_tensor_t * y = n->outputs[0]; + uint16_t * px = (uint16_t *)x->datas; + uint16_t * py = (uint16_t *)y->datas; + float v; + size_t i, l; + + for(i = 0, l = y->ndata; i < l; i++) + { + v = bfloat16_to_float32(px[i]); + py[i] = float32_to_bfloat16(acoshf(v)); + } +} + static void Acosh_float16(struct onnx_node_t * n) { struct onnx_tensor_t * x = n->inputs[0]; @@ -62,7 +78,39 @@ static void Acosh_float64(struct onnx_node_t * n) void resolver_default_op_Acosh(struct onnx_node_t * n) { - if(n->opset >= 9) + if(n->opset >= 22) + { + switch(n->inputs[0]->type) + { + case ONNX_TENSOR_TYPE_BFLOAT16: + n->init = Acosh_init; + n->exit = Acosh_exit; + n->reshape = Acosh_reshape; + n->operator_ = Acosh_bfloat16; + break; + case ONNX_TENSOR_TYPE_FLOAT16: + n->init = Acosh_init; + n->exit = Acosh_exit; + n->reshape = Acosh_reshape; + n->operator_ = Acosh_float16; + break; + case ONNX_TENSOR_TYPE_FLOAT32: + n->init = Acosh_init; + n->exit = Acosh_exit; + n->reshape = Acosh_reshape; + n->operator_ = Acosh_float32; + break; + case ONNX_TENSOR_TYPE_FLOAT64: + n->init = Acosh_init; + n->exit = Acosh_exit; + n->reshape = Acosh_reshape; + n->operator_ = Acosh_float64; + break; + default: + break; + } + } + else if(n->opset >= 9) { switch(n->inputs[0]->type) { diff --git a/modules/fnxext/onnx_engine/src/default/Add.c b/modules/fnxext/onnx_engine/src/default/Add.c index 575226a8fb5..6337da48091 100644 --- a/modules/fnxext/onnx_engine/src/default/Add.c +++ b/modules/fnxext/onnx_engine/src/default/Add.c @@ -229,7 +229,7 @@ void resolver_default_op_Add(struct onnx_node_t * n) { if(n->opset >= 14) { - switch(n->inputs[0]->type) + switch(n->inputs[0]->type) { case ONNX_TENSOR_TYPE_INT8: n->init = Add_init; diff --git a/modules/fnxext/onnx_engine/src/default/ArgMax.c b/modules/fnxext/onnx_engine/src/default/ArgMax.c index 78bc2aa5c30..13e04733b30 100644 --- a/modules/fnxext/onnx_engine/src/default/ArgMax.c +++ b/modules/fnxext/onnx_engine/src/default/ArgMax.c @@ -15,7 +15,7 @@ static int ArgMax_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->axis = onnx_attribute_read_int(n, "axis", 0); @@ -33,7 +33,7 @@ static int ArgMax_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } @@ -55,7 +55,7 @@ static int ArgMax_reshape(struct onnx_node_t * n) pdat->stride = x->strides[axis]; if(pdat->keepdims) { - memcpy(dims, x->dims, sizeof(int) * ndim); + onnx_memcpy(dims, x->dims, sizeof(int) * ndim); dims[axis] = 1; } else diff --git a/modules/fnxext/onnx_engine/src/default/ArgMin.c b/modules/fnxext/onnx_engine/src/default/ArgMin.c index a40f376fd11..a2e1a351553 100644 --- a/modules/fnxext/onnx_engine/src/default/ArgMin.c +++ b/modules/fnxext/onnx_engine/src/default/ArgMin.c @@ -15,7 +15,7 @@ static int ArgMin_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->axis = onnx_attribute_read_int(n, "axis", 0); @@ -33,7 +33,7 @@ static int ArgMin_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } @@ -55,7 +55,7 @@ static int ArgMin_reshape(struct onnx_node_t * n) pdat->stride = x->strides[axis]; if(pdat->keepdims) { - memcpy(dims, x->dims, sizeof(int) * ndim); + onnx_memcpy(dims, x->dims, sizeof(int) * ndim); dims[axis] = 1; } else diff --git a/modules/fnxext/onnx_engine/src/default/Asin.c b/modules/fnxext/onnx_engine/src/default/Asin.c index 6eba4cedb72..1282a40202a 100644 --- a/modules/fnxext/onnx_engine/src/default/Asin.c +++ b/modules/fnxext/onnx_engine/src/default/Asin.c @@ -20,6 +20,21 @@ static int Asin_reshape(struct onnx_node_t * n) return onnx_tensor_reshape_identity(y, x, x->type); } +static void Asin_bfloat16(struct onnx_node_t * n) +{ + struct onnx_tensor_t * x = n->inputs[0]; + struct onnx_tensor_t * y = n->outputs[0]; + uint16_t * px = (uint16_t *)x->datas; + uint16_t * py = (uint16_t *)y->datas; + float v; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + v = bfloat16_to_float32(px[i]); + py[i] = float32_to_bfloat16(asinf(v)); + } +} + static void Asin_float16(struct onnx_node_t * n) { struct onnx_tensor_t * x = n->inputs[0]; @@ -59,7 +74,39 @@ static void Asin_float64(struct onnx_node_t * n) void resolver_default_op_Asin(struct onnx_node_t * n) { - if(n->opset >= 7) + if(n->opset >= 22) + { + switch(n->inputs[0]->type) + { + case ONNX_TENSOR_TYPE_BFLOAT16: + n->init = Asin_init; + n->exit = Asin_exit; + n->reshape = Asin_reshape; + n->operator_ = Asin_bfloat16; + break; + case ONNX_TENSOR_TYPE_FLOAT16: + n->init = Asin_init; + n->exit = Asin_exit; + n->reshape = Asin_reshape; + n->operator_ = Asin_float16; + break; + case ONNX_TENSOR_TYPE_FLOAT32: + n->init = Asin_init; + n->exit = Asin_exit; + n->reshape = Asin_reshape; + n->operator_ = Asin_float32; + break; + case ONNX_TENSOR_TYPE_FLOAT64: + n->init = Asin_init; + n->exit = Asin_exit; + n->reshape = Asin_reshape; + n->operator_ = Asin_float64; + break; + default: + break; + } + } + else if(n->opset >= 7) { switch(n->inputs[0]->type) { diff --git a/modules/fnxext/onnx_engine/src/default/Asinh.c b/modules/fnxext/onnx_engine/src/default/Asinh.c index 742080b7ab8..ec799a0b1a0 100644 --- a/modules/fnxext/onnx_engine/src/default/Asinh.c +++ b/modules/fnxext/onnx_engine/src/default/Asinh.c @@ -20,6 +20,21 @@ static int Asinh_reshape(struct onnx_node_t * n) return onnx_tensor_reshape_identity(y, x, x->type); } +static void Asinh_bfloat16(struct onnx_node_t * n) +{ + struct onnx_tensor_t * x = n->inputs[0]; + struct onnx_tensor_t * y = n->outputs[0]; + uint16_t * px = (uint16_t *)x->datas; + uint16_t * py = (uint16_t *)y->datas; + float v; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + v = bfloat16_to_float32(px[i]); + py[i] = float32_to_bfloat16(asinhf(v)); + } +} + static void Asinh_float16(struct onnx_node_t * n) { struct onnx_tensor_t * x = n->inputs[0]; @@ -59,7 +74,39 @@ static void Asinh_float64(struct onnx_node_t * n) void resolver_default_op_Asinh(struct onnx_node_t * n) { - if(n->opset >= 9) + if(n->opset >= 22) + { + switch(n->inputs[0]->type) + { + case ONNX_TENSOR_TYPE_BFLOAT16: + n->init = Asinh_init; + n->exit = Asinh_exit; + n->reshape = Asinh_reshape; + n->operator_ = Asinh_bfloat16; + break; + case ONNX_TENSOR_TYPE_FLOAT16: + n->init = Asinh_init; + n->exit = Asinh_exit; + n->reshape = Asinh_reshape; + n->operator_ = Asinh_float16; + break; + case ONNX_TENSOR_TYPE_FLOAT32: + n->init = Asinh_init; + n->exit = Asinh_exit; + n->reshape = Asinh_reshape; + n->operator_ = Asinh_float32; + break; + case ONNX_TENSOR_TYPE_FLOAT64: + n->init = Asinh_init; + n->exit = Asinh_exit; + n->reshape = Asinh_reshape; + n->operator_ = Asinh_float64; + break; + default: + break; + } + } + else if(n->opset >= 9) { switch(n->inputs[0]->type) { diff --git a/modules/fnxext/onnx_engine/src/default/Atan.c b/modules/fnxext/onnx_engine/src/default/Atan.c index cd0b14d2fae..9ac47464dc2 100644 --- a/modules/fnxext/onnx_engine/src/default/Atan.c +++ b/modules/fnxext/onnx_engine/src/default/Atan.c @@ -20,6 +20,21 @@ static int Atan_reshape(struct onnx_node_t * n) return onnx_tensor_reshape_identity(y, x, x->type); } +static void Atan_bfloat16(struct onnx_node_t * n) +{ + struct onnx_tensor_t * x = n->inputs[0]; + struct onnx_tensor_t * y = n->outputs[0]; + uint16_t * px = (uint16_t *)x->datas; + uint16_t * py = (uint16_t *)y->datas; + float v; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + v = bfloat16_to_float32(px[i]); + py[i] = float32_to_bfloat16(atanf(v)); + } +} + static void Atan_float16(struct onnx_node_t * n) { struct onnx_tensor_t * x = n->inputs[0]; @@ -59,7 +74,39 @@ static void Atan_float64(struct onnx_node_t * n) void resolver_default_op_Atan(struct onnx_node_t * n) { - if(n->opset >= 7) + if(n->opset >= 22) + { + switch(n->inputs[0]->type) + { + case ONNX_TENSOR_TYPE_BFLOAT16: + n->init = Atan_init; + n->exit = Atan_exit; + n->reshape = Atan_reshape; + n->operator_ = Atan_bfloat16; + break; + case ONNX_TENSOR_TYPE_FLOAT16: + n->init = Atan_init; + n->exit = Atan_exit; + n->reshape = Atan_reshape; + n->operator_ = Atan_float16; + break; + case ONNX_TENSOR_TYPE_FLOAT32: + n->init = Atan_init; + n->exit = Atan_exit; + n->reshape = Atan_reshape; + n->operator_ = Atan_float32; + break; + case ONNX_TENSOR_TYPE_FLOAT64: + n->init = Atan_init; + n->exit = Atan_exit; + n->reshape = Atan_reshape; + n->operator_ = Atan_float64; + break; + default: + break; + } + } + else if(n->opset >= 7) { switch(n->inputs[0]->type) { diff --git a/modules/fnxext/onnx_engine/src/default/Atanh.c b/modules/fnxext/onnx_engine/src/default/Atanh.c index 1f97d50b39d..ff52ae764f3 100644 --- a/modules/fnxext/onnx_engine/src/default/Atanh.c +++ b/modules/fnxext/onnx_engine/src/default/Atanh.c @@ -20,6 +20,21 @@ static int Atanh_reshape(struct onnx_node_t * n) return onnx_tensor_reshape_identity(y, x, x->type); } +static void Atanh_bfloat16(struct onnx_node_t * n) +{ + struct onnx_tensor_t * x = n->inputs[0]; + struct onnx_tensor_t * y = n->outputs[0]; + uint16_t * px = (uint16_t *)x->datas; + uint16_t * py = (uint16_t *)y->datas; + float v; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + v = bfloat16_to_float32(px[i]); + py[i] = float32_to_bfloat16(atanhf(v)); + } +} + static void Atanh_float16(struct onnx_node_t * n) { struct onnx_tensor_t * x = n->inputs[0]; @@ -59,7 +74,39 @@ static void Atanh_float64(struct onnx_node_t * n) void resolver_default_op_Atanh(struct onnx_node_t * n) { - if(n->opset >= 9) + if(n->opset >= 22) + { + switch(n->inputs[0]->type) + { + case ONNX_TENSOR_TYPE_BFLOAT16: + n->init = Atanh_init; + n->exit = Atanh_exit; + n->reshape = Atanh_reshape; + n->operator_ = Atanh_bfloat16; + break; + case ONNX_TENSOR_TYPE_FLOAT16: + n->init = Atanh_init; + n->exit = Atanh_exit; + n->reshape = Atanh_reshape; + n->operator_ = Atanh_float16; + break; + case ONNX_TENSOR_TYPE_FLOAT32: + n->init = Atanh_init; + n->exit = Atanh_exit; + n->reshape = Atanh_reshape; + n->operator_ = Atanh_float32; + break; + case ONNX_TENSOR_TYPE_FLOAT64: + n->init = Atanh_init; + n->exit = Atanh_exit; + n->reshape = Atanh_reshape; + n->operator_ = Atanh_float64; + break; + default: + break; + } + } + else if(n->opset >= 9) { switch(n->inputs[0]->type) { diff --git a/modules/fnxext/onnx_engine/src/default/AveragePool.c b/modules/fnxext/onnx_engine/src/default/AveragePool.c index d0862d38507..48c7d6a4713 100644 --- a/modules/fnxext/onnx_engine/src/default/AveragePool.c +++ b/modules/fnxext/onnx_engine/src/default/AveragePool.c @@ -29,10 +29,10 @@ static int AveragePool_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { - memset(pdat, 0, sizeof(struct operator_pdata_t)); + onnx_memset(pdat, 0, sizeof(struct operator_pdata_t)); switch(shash(onnx_attribute_read_string(n, "auto_pad", "NOTSET"))) { case 0xc3966fc2: /* "NOTSET" */ @@ -56,12 +56,12 @@ static int AveragePool_init(struct onnx_node_t * n) pdat->nkernel = onnx_attribute_read_ints(n, "kernel_shape", &ints); if(pdat->nkernel > 0) { - pdat->kernels = malloc(sizeof(int) * pdat->nkernel); + pdat->kernels = onnx_malloc(sizeof(int) * pdat->nkernel); for(i = 0; i < pdat->nkernel; i++) pdat->kernels[i] = ints[i]; } pdat->npad = pdat->nkernel * 2; - pdat->pads = malloc(sizeof(int) * pdat->npad); + pdat->pads = onnx_malloc(sizeof(int) * pdat->npad); if(pdat->pads) { l = onnx_attribute_read_ints(n, "pads", &ints); @@ -71,7 +71,7 @@ static int AveragePool_init(struct onnx_node_t * n) pdat->pads[i] = 0; } pdat->nstride = pdat->nkernel; - pdat->strides = malloc(sizeof(int) * pdat->nstride); + pdat->strides = onnx_malloc(sizeof(int) * pdat->nstride); if(pdat->strides) { l = onnx_attribute_read_ints(n, "strides", &ints); @@ -94,12 +94,12 @@ static int AveragePool_exit(struct onnx_node_t * n) if(pdat) { if(pdat->kernels) - free(pdat->kernels); + onnx_free(pdat->kernels); if(pdat->pads) - free(pdat->pads); + onnx_free(pdat->pads); if(pdat->strides) - free(pdat->strides); - free(pdat); + onnx_free(pdat->strides); + onnx_free(pdat); } return 1; } @@ -117,7 +117,7 @@ static int AveragePool_reshape(struct onnx_node_t * n) switch(pdat->auto_pad) { case AUTO_PAD_NOTSET: - memcpy(pdat->cpads, pdat->pads, sizeof(int) * pdat->npad); + onnx_memcpy(pdat->cpads, pdat->pads, sizeof(int) * pdat->npad); break; case AUTO_PAD_SAME_UPPER: for(i = 0; i < pdat->npad / 2; i++) @@ -136,7 +136,7 @@ static int AveragePool_reshape(struct onnx_node_t * n) } break; case AUTO_PAD_VALID: - memset(pdat->cpads, 0, sizeof(int) * pdat->npad); + onnx_memset(pdat->cpads, 0, sizeof(int) * pdat->npad); break; default: break; @@ -216,13 +216,13 @@ static void AveragePool_float16(struct onnx_node_t * n) for(i = 0, size = 1; i < x->ndim - 2; ++i) size *= pdat->kernels[i]; - memset(o_dim, 0, sizeof(o_dim)); + onnx_memset(o_dim, 0, sizeof(o_dim)); do { for(i = 2; i < x->ndim; i++) b_dim[i] = o_dim[i] * pdat->strides[i - 2] - pdat->cpads[i - 2]; sum = 0; padcnt = 0; - memset(k_dim, 0, sizeof(k_dim)); + onnx_memset(k_dim, 0, sizeof(k_dim)); do { i_dim[0] = o_dim[0]; i_dim[1] = o_dim[1]; @@ -267,13 +267,13 @@ static void AveragePool_float32(struct onnx_node_t * n) for(i = 0, size = 1; i < x->ndim - 2; ++i) size *= pdat->kernels[i]; - memset(o_dim, 0, sizeof(o_dim)); + onnx_memset(o_dim, 0, sizeof(o_dim)); do { for(i = 2; i < x->ndim; i++) b_dim[i] = o_dim[i] * pdat->strides[i - 2] - pdat->cpads[i - 2]; sum = 0; padcnt = 0; - memset(k_dim, 0, sizeof(k_dim)); + onnx_memset(k_dim, 0, sizeof(k_dim)); do { i_dim[0] = o_dim[0]; i_dim[1] = o_dim[1]; @@ -318,13 +318,13 @@ static void AveragePool_float64(struct onnx_node_t * n) for(i = 0, size = 1; i < x->ndim - 2; ++i) size *= pdat->kernels[i]; - memset(o_dim, 0, sizeof(o_dim)); + onnx_memset(o_dim, 0, sizeof(o_dim)); do { for(i = 2; i < x->ndim; i++) b_dim[i] = o_dim[i] * pdat->strides[i - 2] - pdat->cpads[i - 2]; sum = 0; padcnt = 0; - memset(k_dim, 0, sizeof(k_dim)); + onnx_memset(k_dim, 0, sizeof(k_dim)); do { i_dim[0] = o_dim[0]; i_dim[1] = o_dim[1]; @@ -354,7 +354,59 @@ static void AveragePool_float64(struct onnx_node_t * n) void resolver_default_op_AveragePool(struct onnx_node_t * n) { - if(n->opset >= 11) + if(n->opset >= 22) + { + switch(n->inputs[0]->type) + { + case ONNX_TENSOR_TYPE_FLOAT16: + n->init = AveragePool_init; + n->exit = AveragePool_exit; + n->reshape = AveragePool_reshape; + n->operator_ = AveragePool_float16; + break; + case ONNX_TENSOR_TYPE_FLOAT32: + n->init = AveragePool_init; + n->exit = AveragePool_exit; + n->reshape = AveragePool_reshape; + n->operator_ = AveragePool_float32; + break; + case ONNX_TENSOR_TYPE_FLOAT64: + n->init = AveragePool_init; + n->exit = AveragePool_exit; + n->reshape = AveragePool_reshape; + n->operator_ = AveragePool_float64; + break; + default: + break; + } + } + else if(n->opset >= 19) + { + switch(n->inputs[0]->type) + { + case ONNX_TENSOR_TYPE_FLOAT16: + n->init = AveragePool_init; + n->exit = AveragePool_exit; + n->reshape = AveragePool_reshape; + n->operator_ = AveragePool_float16; + break; + case ONNX_TENSOR_TYPE_FLOAT32: + n->init = AveragePool_init; + n->exit = AveragePool_exit; + n->reshape = AveragePool_reshape; + n->operator_ = AveragePool_float32; + break; + case ONNX_TENSOR_TYPE_FLOAT64: + n->init = AveragePool_init; + n->exit = AveragePool_exit; + n->reshape = AveragePool_reshape; + n->operator_ = AveragePool_float64; + break; + default: + break; + } + } + else if(n->opset >= 11) { switch(n->inputs[0]->type) { diff --git a/modules/fnxext/onnx_engine/src/default/BatchNormalization.c b/modules/fnxext/onnx_engine/src/default/BatchNormalization.c index 84d63995172..6c8aef3907c 100644 --- a/modules/fnxext/onnx_engine/src/default/BatchNormalization.c +++ b/modules/fnxext/onnx_engine/src/default/BatchNormalization.c @@ -11,7 +11,7 @@ static int BatchNormalization_init(struct onnx_node_t * n) if((n->ninput == 5) && (n->noutput >= 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->epsilon = onnx_attribute_read_float(n, "epsilon", 1e-05); @@ -28,7 +28,7 @@ static int BatchNormalization_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } @@ -138,7 +138,10 @@ static void BatchNormalization_float64(struct onnx_node_t * n) void resolver_default_op_BatchNormalization(struct onnx_node_t * n) { - if(n->opset >= 14) + if(n->opset >= 15) + { + } + else if(n->opset >= 14) { } else if(n->opset >= 9) diff --git a/modules/fnxext/onnx_engine/src/default/BitShift.c b/modules/fnxext/onnx_engine/src/default/BitShift.c index 238e7654861..8927d630bd0 100644 --- a/modules/fnxext/onnx_engine/src/default/BitShift.c +++ b/modules/fnxext/onnx_engine/src/default/BitShift.c @@ -10,10 +10,10 @@ static int BitShift_init(struct onnx_node_t * n) if((n->ninput == 2) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { - pdat->isleft = (strcmp(onnx_attribute_read_string(n, "direction", "LEFT"), "LEFT") == 0) ? 1 : 0; + pdat->isleft = (onnx_strcmp(onnx_attribute_read_string(n, "direction", "LEFT"), "LEFT") == 0) ? 1 : 0; n->priv = pdat; return 1; } @@ -26,7 +26,7 @@ static int BitShift_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/BitwiseAnd.c b/modules/fnxext/onnx_engine/src/default/BitwiseAnd.c new file mode 100644 index 00000000000..9128a29dda5 --- /dev/null +++ b/modules/fnxext/onnx_engine/src/default/BitwiseAnd.c @@ -0,0 +1,218 @@ +#include "../onnx.h" + +static int BitwiseAnd_init(struct onnx_node_t * n) +{ + if((n->ninput == 2) && (n->noutput == 1)) + return 1; + return 0; +} + +static int BitwiseAnd_exit(struct onnx_node_t * n) +{ + return 1; +} + +static int BitwiseAnd_reshape(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + + return onnx_tensor_reshape_multi_broadcast(y, a, b, a->type); +} + +static void BitwiseAnd_int8(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + int8_t * py = (int8_t *)y->datas; + int8_t * pa; + int8_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa & *pb; + } +} + +static void BitwiseAnd_int16(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + int16_t * py = (int16_t *)y->datas; + int16_t * pa; + int16_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa & *pb; + } +} + +static void BitwiseAnd_int32(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + int32_t * py = (int32_t *)y->datas; + int32_t * pa; + int32_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa & *pb; + } +} + +static void BitwiseAnd_int64(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + int64_t * py = (int64_t *)y->datas; + int64_t * pa; + int64_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa & *pb; + } +} + +static void BitwiseAnd_uint8(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + uint8_t * py = (uint8_t *)y->datas; + uint8_t * pa; + uint8_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa & *pb; + } +} + +static void BitwiseAnd_uint16(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + uint16_t * py = (uint16_t *)y->datas; + uint16_t * pa; + uint16_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa & *pb; + } +} + +static void BitwiseAnd_uint32(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + uint32_t * py = (uint32_t *)y->datas; + uint32_t * pa; + uint32_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa & *pb; + } +} + +static void BitwiseAnd_uint64(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + uint64_t * py = (uint64_t *)y->datas; + uint64_t * pa; + uint64_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa & *pb; + } +} + +void resolver_default_op_BitwiseAnd(struct onnx_node_t * n) +{ + if(n->opset >= 18) + { + switch(n->inputs[0]->type) + { + case ONNX_TENSOR_TYPE_INT8: + n->init = BitwiseAnd_init; + n->exit = BitwiseAnd_exit; + n->reshape = BitwiseAnd_reshape; + n->operator_ = BitwiseAnd_int8; + break; + case ONNX_TENSOR_TYPE_INT16: + n->init = BitwiseAnd_init; + n->exit = BitwiseAnd_exit; + n->reshape = BitwiseAnd_reshape; + n->operator_ = BitwiseAnd_int16; + break; + case ONNX_TENSOR_TYPE_INT32: + n->init = BitwiseAnd_init; + n->exit = BitwiseAnd_exit; + n->reshape = BitwiseAnd_reshape; + n->operator_ = BitwiseAnd_int32; + break; + case ONNX_TENSOR_TYPE_INT64: + n->init = BitwiseAnd_init; + n->exit = BitwiseAnd_exit; + n->reshape = BitwiseAnd_reshape; + n->operator_ = BitwiseAnd_int64; + break; + case ONNX_TENSOR_TYPE_UINT8: + n->init = BitwiseAnd_init; + n->exit = BitwiseAnd_exit; + n->reshape = BitwiseAnd_reshape; + n->operator_ = BitwiseAnd_uint8; + break; + case ONNX_TENSOR_TYPE_UINT16: + n->init = BitwiseAnd_init; + n->exit = BitwiseAnd_exit; + n->reshape = BitwiseAnd_reshape; + n->operator_ = BitwiseAnd_uint16; + break; + case ONNX_TENSOR_TYPE_UINT32: + n->init = BitwiseAnd_init; + n->exit = BitwiseAnd_exit; + n->reshape = BitwiseAnd_reshape; + n->operator_ = BitwiseAnd_uint32; + break; + case ONNX_TENSOR_TYPE_UINT64: + n->init = BitwiseAnd_init; + n->exit = BitwiseAnd_exit; + n->reshape = BitwiseAnd_reshape; + n->operator_ = BitwiseAnd_uint64; + break; + default: + break; + } + } +} diff --git a/modules/fnxext/onnx_engine/src/default/BitwiseNot.c b/modules/fnxext/onnx_engine/src/default/BitwiseNot.c new file mode 100644 index 00000000000..f05814a8d05 --- /dev/null +++ b/modules/fnxext/onnx_engine/src/default/BitwiseNot.c @@ -0,0 +1,169 @@ +#include "../onnx.h" + +static int BitwiseNot_init(struct onnx_node_t * n) +{ + if((n->ninput == 1) && (n->noutput == 1)) + return 1; + return 0; +} + +static int BitwiseNot_exit(struct onnx_node_t * n) +{ + return 1; +} + +static int BitwiseNot_reshape(struct onnx_node_t * n) +{ + struct onnx_tensor_t * x = n->inputs[0]; + struct onnx_tensor_t * y = n->outputs[0]; + + return onnx_tensor_reshape_identity(y, x, x->type); +} + +static void BitwiseNot_int8(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * x = n->inputs[0]; + int8_t * py = (int8_t *)y->datas; + int8_t * px = (int8_t *)x->datas; + + for(size_t i = 0, l = y->ndata; i < l; i++) + py[i] = ~px[i]; +} + +static void BitwiseNot_int16(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * x = n->inputs[0]; + int16_t * py = (int16_t *)y->datas; + int16_t * px = (int16_t *)x->datas; + + for(size_t i = 0, l = y->ndata; i < l; i++) + py[i] = ~px[i]; +} + +static void BitwiseNot_int32(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * x = n->inputs[0]; + int32_t * py = (int32_t *)y->datas; + int32_t * px = (int32_t *)x->datas; + + for(size_t i = 0, l = y->ndata; i < l; i++) + py[i] = ~px[i]; +} + +static void BitwiseNot_int64(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * x = n->inputs[0]; + int64_t * py = (int64_t *)y->datas; + int64_t * px = (int64_t *)x->datas; + + for(size_t i = 0, l = y->ndata; i < l; i++) + py[i] = ~px[i]; +} + +static void BitwiseNot_uint8(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * x = n->inputs[0]; + uint8_t * py = (uint8_t *)y->datas; + uint8_t * px = (uint8_t *)x->datas; + + for(size_t i = 0, l = y->ndata; i < l; i++) + py[i] = ~px[i]; +} + +static void BitwiseNot_uint16(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * x = n->inputs[0]; + uint16_t * py = (uint16_t *)y->datas; + uint16_t * px = (uint16_t *)x->datas; + + for(size_t i = 0, l = y->ndata; i < l; i++) + py[i] = ~px[i]; +} + +static void BitwiseNot_uint32(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * x = n->inputs[0]; + uint32_t * py = (uint32_t *)y->datas; + uint32_t * px = (uint32_t *)x->datas; + + for(size_t i = 0, l = y->ndata; i < l; i++) + py[i] = ~px[i]; +} + +static void BitwiseNot_uint64(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * x = n->inputs[0]; + uint64_t * py = (uint64_t *)y->datas; + uint64_t * px = (uint64_t *)x->datas; + + for(size_t i = 0, l = y->ndata; i < l; i++) + py[i] = ~px[i]; +} + +void resolver_default_op_BitwiseNot(struct onnx_node_t * n) +{ + if(n->opset >= 18) + { + switch(n->inputs[0]->type) + { + case ONNX_TENSOR_TYPE_INT8: + n->init = BitwiseNot_init; + n->exit = BitwiseNot_exit; + n->reshape = BitwiseNot_reshape; + n->operator_ = BitwiseNot_int8; + break; + case ONNX_TENSOR_TYPE_INT16: + n->init = BitwiseNot_init; + n->exit = BitwiseNot_exit; + n->reshape = BitwiseNot_reshape; + n->operator_ = BitwiseNot_int16; + break; + case ONNX_TENSOR_TYPE_INT32: + n->init = BitwiseNot_init; + n->exit = BitwiseNot_exit; + n->reshape = BitwiseNot_reshape; + n->operator_ = BitwiseNot_int32; + break; + case ONNX_TENSOR_TYPE_INT64: + n->init = BitwiseNot_init; + n->exit = BitwiseNot_exit; + n->reshape = BitwiseNot_reshape; + n->operator_ = BitwiseNot_int64; + break; + case ONNX_TENSOR_TYPE_UINT8: + n->init = BitwiseNot_init; + n->exit = BitwiseNot_exit; + n->reshape = BitwiseNot_reshape; + n->operator_ = BitwiseNot_uint8; + break; + case ONNX_TENSOR_TYPE_UINT16: + n->init = BitwiseNot_init; + n->exit = BitwiseNot_exit; + n->reshape = BitwiseNot_reshape; + n->operator_ = BitwiseNot_uint16; + break; + case ONNX_TENSOR_TYPE_UINT32: + n->init = BitwiseNot_init; + n->exit = BitwiseNot_exit; + n->reshape = BitwiseNot_reshape; + n->operator_ = BitwiseNot_uint32; + break; + case ONNX_TENSOR_TYPE_UINT64: + n->init = BitwiseNot_init; + n->exit = BitwiseNot_exit; + n->reshape = BitwiseNot_reshape; + n->operator_ = BitwiseNot_uint64; + break; + default: + break; + } + } +} diff --git a/modules/fnxext/onnx_engine/src/default/BitwiseOr.c b/modules/fnxext/onnx_engine/src/default/BitwiseOr.c new file mode 100644 index 00000000000..d1adbde4b0f --- /dev/null +++ b/modules/fnxext/onnx_engine/src/default/BitwiseOr.c @@ -0,0 +1,218 @@ +#include "../onnx.h" + +static int BitwiseOr_init(struct onnx_node_t * n) +{ + if((n->ninput == 2) && (n->noutput == 1)) + return 1; + return 0; +} + +static int BitwiseOr_exit(struct onnx_node_t * n) +{ + return 1; +} + +static int BitwiseOr_reshape(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + + return onnx_tensor_reshape_multi_broadcast(y, a, b, a->type); +} + +static void BitwiseOr_int8(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + int8_t * py = (int8_t *)y->datas; + int8_t * pa; + int8_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa | *pb; + } +} + +static void BitwiseOr_int16(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + int16_t * py = (int16_t *)y->datas; + int16_t * pa; + int16_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa | *pb; + } +} + +static void BitwiseOr_int32(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + int32_t * py = (int32_t *)y->datas; + int32_t * pa; + int32_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa | *pb; + } +} + +static void BitwiseOr_int64(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + int64_t * py = (int64_t *)y->datas; + int64_t * pa; + int64_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa | *pb; + } +} + +static void BitwiseOr_uint8(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + uint8_t * py = (uint8_t *)y->datas; + uint8_t * pa; + uint8_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa | *pb; + } +} + +static void BitwiseOr_uint16(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + uint16_t * py = (uint16_t *)y->datas; + uint16_t * pa; + uint16_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa | *pb; + } +} + +static void BitwiseOr_uint32(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + uint32_t * py = (uint32_t *)y->datas; + uint32_t * pa; + uint32_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa | *pb; + } +} + +static void BitwiseOr_uint64(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + uint64_t * py = (uint64_t *)y->datas; + uint64_t * pa; + uint64_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa | *pb; + } +} + +void resolver_default_op_BitwiseOr(struct onnx_node_t * n) +{ + if(n->opset >= 18) + { + switch(n->inputs[0]->type) + { + case ONNX_TENSOR_TYPE_INT8: + n->init = BitwiseOr_init; + n->exit = BitwiseOr_exit; + n->reshape = BitwiseOr_reshape; + n->operator_ = BitwiseOr_int8; + break; + case ONNX_TENSOR_TYPE_INT16: + n->init = BitwiseOr_init; + n->exit = BitwiseOr_exit; + n->reshape = BitwiseOr_reshape; + n->operator_ = BitwiseOr_int16; + break; + case ONNX_TENSOR_TYPE_INT32: + n->init = BitwiseOr_init; + n->exit = BitwiseOr_exit; + n->reshape = BitwiseOr_reshape; + n->operator_ = BitwiseOr_int32; + break; + case ONNX_TENSOR_TYPE_INT64: + n->init = BitwiseOr_init; + n->exit = BitwiseOr_exit; + n->reshape = BitwiseOr_reshape; + n->operator_ = BitwiseOr_int64; + break; + case ONNX_TENSOR_TYPE_UINT8: + n->init = BitwiseOr_init; + n->exit = BitwiseOr_exit; + n->reshape = BitwiseOr_reshape; + n->operator_ = BitwiseOr_uint8; + break; + case ONNX_TENSOR_TYPE_UINT16: + n->init = BitwiseOr_init; + n->exit = BitwiseOr_exit; + n->reshape = BitwiseOr_reshape; + n->operator_ = BitwiseOr_uint16; + break; + case ONNX_TENSOR_TYPE_UINT32: + n->init = BitwiseOr_init; + n->exit = BitwiseOr_exit; + n->reshape = BitwiseOr_reshape; + n->operator_ = BitwiseOr_uint32; + break; + case ONNX_TENSOR_TYPE_UINT64: + n->init = BitwiseOr_init; + n->exit = BitwiseOr_exit; + n->reshape = BitwiseOr_reshape; + n->operator_ = BitwiseOr_uint64; + break; + default: + break; + } + } +} diff --git a/modules/fnxext/onnx_engine/src/default/BitwiseXor.c b/modules/fnxext/onnx_engine/src/default/BitwiseXor.c new file mode 100644 index 00000000000..befacad528d --- /dev/null +++ b/modules/fnxext/onnx_engine/src/default/BitwiseXor.c @@ -0,0 +1,218 @@ +#include "../onnx.h" + +static int BitwiseXor_init(struct onnx_node_t * n) +{ + if((n->ninput == 2) && (n->noutput == 1)) + return 1; + return 0; +} + +static int BitwiseXor_exit(struct onnx_node_t * n) +{ + return 1; +} + +static int BitwiseXor_reshape(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + + return onnx_tensor_reshape_multi_broadcast(y, a, b, a->type); +} + +static void BitwiseXor_int8(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + int8_t * py = (int8_t *)y->datas; + int8_t * pa; + int8_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa ^ *pb; + } +} + +static void BitwiseXor_int16(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + int16_t * py = (int16_t *)y->datas; + int16_t * pa; + int16_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa ^ *pb; + } +} + +static void BitwiseXor_int32(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + int32_t * py = (int32_t *)y->datas; + int32_t * pa; + int32_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa ^ *pb; + } +} + +static void BitwiseXor_int64(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + int64_t * py = (int64_t *)y->datas; + int64_t * pa; + int64_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa ^ *pb; + } +} + +static void BitwiseXor_uint8(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + uint8_t * py = (uint8_t *)y->datas; + uint8_t * pa; + uint8_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa ^ *pb; + } +} + +static void BitwiseXor_uint16(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + uint16_t * py = (uint16_t *)y->datas; + uint16_t * pa; + uint16_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa ^ *pb; + } +} + +static void BitwiseXor_uint32(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + uint32_t * py = (uint32_t *)y->datas; + uint32_t * pa; + uint32_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa ^ *pb; + } +} + +static void BitwiseXor_uint64(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + uint64_t * py = (uint64_t *)y->datas; + uint64_t * pa; + uint64_t * pb; + + for(size_t i = 0, l = y->ndata; i < l; i++) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + py[i] = *pa ^ *pb; + } +} + +void resolver_default_op_BitwiseXor(struct onnx_node_t * n) +{ + if(n->opset >= 18) + { + switch(n->inputs[0]->type) + { + case ONNX_TENSOR_TYPE_INT8: + n->init = BitwiseXor_init; + n->exit = BitwiseXor_exit; + n->reshape = BitwiseXor_reshape; + n->operator_ = BitwiseXor_int8; + break; + case ONNX_TENSOR_TYPE_INT16: + n->init = BitwiseXor_init; + n->exit = BitwiseXor_exit; + n->reshape = BitwiseXor_reshape; + n->operator_ = BitwiseXor_int16; + break; + case ONNX_TENSOR_TYPE_INT32: + n->init = BitwiseXor_init; + n->exit = BitwiseXor_exit; + n->reshape = BitwiseXor_reshape; + n->operator_ = BitwiseXor_int32; + break; + case ONNX_TENSOR_TYPE_INT64: + n->init = BitwiseXor_init; + n->exit = BitwiseXor_exit; + n->reshape = BitwiseXor_reshape; + n->operator_ = BitwiseXor_int64; + break; + case ONNX_TENSOR_TYPE_UINT8: + n->init = BitwiseXor_init; + n->exit = BitwiseXor_exit; + n->reshape = BitwiseXor_reshape; + n->operator_ = BitwiseXor_uint8; + break; + case ONNX_TENSOR_TYPE_UINT16: + n->init = BitwiseXor_init; + n->exit = BitwiseXor_exit; + n->reshape = BitwiseXor_reshape; + n->operator_ = BitwiseXor_uint16; + break; + case ONNX_TENSOR_TYPE_UINT32: + n->init = BitwiseXor_init; + n->exit = BitwiseXor_exit; + n->reshape = BitwiseXor_reshape; + n->operator_ = BitwiseXor_uint32; + break; + case ONNX_TENSOR_TYPE_UINT64: + n->init = BitwiseXor_init; + n->exit = BitwiseXor_exit; + n->reshape = BitwiseXor_reshape; + n->operator_ = BitwiseXor_uint64; + break; + default: + break; + } + } +} diff --git a/modules/fnxext/onnx_engine/src/default/Cast.c b/modules/fnxext/onnx_engine/src/default/Cast.c index 6d292779e8d..237dde6f143 100644 --- a/modules/fnxext/onnx_engine/src/default/Cast.c +++ b/modules/fnxext/onnx_engine/src/default/Cast.c @@ -10,7 +10,7 @@ static int Cast_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->to = (enum onnx_tensor_type_t)onnx_attribute_read_int(n, "to", n->inputs[0]->type); @@ -26,7 +26,7 @@ static int Cast_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } @@ -147,9 +147,9 @@ static void Cast_bool(struct onnx_node_t * n) for(i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); + onnx_free(py[i]); sprintf(buf, "%u", (px[i] != 0) ? 1 : 0); - py[i] = strdup(buf); + py[i] = onnx_strdup(buf); } } break; @@ -266,9 +266,9 @@ static void Cast_int8(struct onnx_node_t * n) for(i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); + onnx_free(py[i]); sprintf(buf, "%d", px[i]); - py[i] = strdup(buf); + py[i] = onnx_strdup(buf); } } break; @@ -385,9 +385,9 @@ static void Cast_int16(struct onnx_node_t * n) for(i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); + onnx_free(py[i]); sprintf(buf, "%d", px[i]); - py[i] = strdup(buf); + py[i] = onnx_strdup(buf); } } break; @@ -504,9 +504,9 @@ static void Cast_int32(struct onnx_node_t * n) for(i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); + onnx_free(py[i]); sprintf(buf, "%d", px[i]); - py[i] = strdup(buf); + py[i] = onnx_strdup(buf); } } break; @@ -623,9 +623,9 @@ static void Cast_int64(struct onnx_node_t * n) for(i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); + onnx_free(py[i]); sprintf(buf, "%ld", px[i]); - py[i] = strdup(buf); + py[i] = onnx_strdup(buf); } } break; @@ -742,9 +742,9 @@ static void Cast_uint8(struct onnx_node_t * n) for(i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); + onnx_free(py[i]); sprintf(buf, "%u", px[i]); - py[i] = strdup(buf); + py[i] = onnx_strdup(buf); } } break; @@ -861,9 +861,9 @@ static void Cast_uint16(struct onnx_node_t * n) for(i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); + onnx_free(py[i]); sprintf(buf, "%u", px[i]); - py[i] = strdup(buf); + py[i] = onnx_strdup(buf); } } break; @@ -980,9 +980,9 @@ static void Cast_uint32(struct onnx_node_t * n) for(i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); + onnx_free(py[i]); sprintf(buf, "%u", px[i]); - py[i] = strdup(buf); + py[i] = onnx_strdup(buf); } } break; @@ -1099,9 +1099,9 @@ static void Cast_uint64(struct onnx_node_t * n) for(i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); + onnx_free(py[i]); sprintf(buf, "%lu", px[i]); - py[i] = strdup(buf); + py[i] = onnx_strdup(buf); } } break; @@ -1218,9 +1218,9 @@ static void Cast_bfloat16(struct onnx_node_t * n) for(i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); + onnx_free(py[i]); sprintf(buf, "%g", bfloat16_to_float32(px[i])); - py[i] = strdup(buf); + py[i] = onnx_strdup(buf); } } break; @@ -1337,9 +1337,9 @@ static void Cast_float16(struct onnx_node_t * n) for(i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); + onnx_free(py[i]); sprintf(buf, "%g", float16_to_float32(px[i])); - py[i] = strdup(buf); + py[i] = onnx_strdup(buf); } } break; @@ -1456,9 +1456,9 @@ static void Cast_float32(struct onnx_node_t * n) for(i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); + onnx_free(py[i]); sprintf(buf, "%g", px[i]); - py[i] = strdup(buf); + py[i] = onnx_strdup(buf); } } break; @@ -1575,9 +1575,9 @@ static void Cast_float64(struct onnx_node_t * n) for(i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); + onnx_free(py[i]); sprintf(buf, "%g", px[i]); - py[i] = strdup(buf); + py[i] = onnx_strdup(buf); } } break; @@ -1693,8 +1693,8 @@ static void Cast_string(struct onnx_node_t * n) for(i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); - py[i] = strdup(px[i]); + onnx_free(py[i]); + py[i] = onnx_strdup(px[i]); } } break; diff --git a/modules/fnxext/onnx_engine/src/default/Celu.c b/modules/fnxext/onnx_engine/src/default/Celu.c index 538cb83d04e..e8641ac43e3 100644 --- a/modules/fnxext/onnx_engine/src/default/Celu.c +++ b/modules/fnxext/onnx_engine/src/default/Celu.c @@ -10,7 +10,7 @@ static int Celu_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->alpha = onnx_attribute_read_float(n, "alpha", 1.0); @@ -26,7 +26,7 @@ static int Celu_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } @@ -47,7 +47,7 @@ static void Celu_float32(struct onnx_node_t * n) float * py = (float *)y->datas; for(size_t i = 0, l = y->ndata; i < l; i++) - py[i] = maxx((float)0.0, (float)px[i]) + minn((float)0.0, (float)pdat->alpha * (expf(px[i] / pdat->alpha) - 1)); + py[i] = XMAX((float)0.0, (float)px[i]) + XMIN((float)0.0, (float)pdat->alpha * (expf(px[i] / pdat->alpha) - 1)); } void resolver_default_op_Celu(struct onnx_node_t * n) diff --git a/modules/fnxext/onnx_engine/src/default/Clip.c b/modules/fnxext/onnx_engine/src/default/Clip.c index 1419ef03aa6..cf5f2b96688 100644 --- a/modules/fnxext/onnx_engine/src/default/Clip.c +++ b/modules/fnxext/onnx_engine/src/default/Clip.c @@ -35,7 +35,7 @@ static int Clip_init(struct onnx_node_t * n) if((n->ninput >= 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->pmin = NULL; @@ -52,7 +52,7 @@ static int Clip_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } @@ -65,13 +65,13 @@ static int Clip_reshape(struct onnx_node_t * n) pdat->pmin = NULL; pdat->pmax = NULL; - for(i = 1; i < minn(3, n->ninput); i++) + for(i = 1; i < XMIN(3, n->ninput); i++) { if(n->inputs[i]->ndim == 0) { - if(strcmp(n->inputs[i]->name, "min") == 0) + if(onnx_strcmp(n->inputs[i]->name, "min") == 0) pdat->pmin = (union onnx_scalar_t *)n->inputs[i]->datas; - else if(strcmp(n->inputs[i]->name, "max") == 0) + else if(onnx_strcmp(n->inputs[i]->name, "max") == 0) pdat->pmax = (union onnx_scalar_t *)n->inputs[i]->datas; } } diff --git a/modules/fnxext/onnx_engine/src/default/Concat.c b/modules/fnxext/onnx_engine/src/default/Concat.c index 4cd37d85e67..2369c31b4e7 100644 --- a/modules/fnxext/onnx_engine/src/default/Concat.c +++ b/modules/fnxext/onnx_engine/src/default/Concat.c @@ -11,7 +11,7 @@ static int Concat_init(struct onnx_node_t * n) if((n->ninput >= 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->axis = onnx_attribute_read_int(n, "axis", 1); @@ -27,7 +27,7 @@ static int Concat_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } @@ -50,13 +50,16 @@ static int Concat_reshape(struct onnx_node_t * n) for(i = 1; i < n->ninput; i++) { pdims = n->inputs[i]->dims; - for(j = 0; j < ndim; j++) + if(pdims) { - if(j == pdat->caxis) - s += pdims[j]; - else if(x->dims[j] != pdims[j]) - return 0; - dims[j] = pdims[j]; + for(j = 0; j < ndim; j++) + { + if(j == pdat->caxis) + s += pdims[j]; + else if(x->dims[j] != pdims[j]) + return 0; + dims[j] = pdims[j]; + } } } dims[pdat->caxis] = s; @@ -90,8 +93,8 @@ static void Concat_operator(struct onnx_node_t * n) for(o = 0, j = 0, k = ybase, l = x->ndata; o < l; o++) { if(py[k + o]) - free(py[k + o]); - py[k + o] = strdup(px[o]); + onnx_free(py[k + o]); + py[k + o] = onnx_strdup(px[o]); if(++j == xpitch) { k += (ypitch - xpitch); @@ -116,7 +119,7 @@ static void Concat_operator(struct onnx_node_t * n) xpitch *= x->dims[i]; for(o = 0, j = 0, k = ybase, l = x->ndata; o < l; o++) { - memcpy(py + (k + o) * sz, px + o * sz, sz); + onnx_memcpy(py + (k + o) * sz, px + o * sz, sz); if(++j == xpitch) { k += (ypitch - xpitch); diff --git a/modules/fnxext/onnx_engine/src/default/Constant.c b/modules/fnxext/onnx_engine/src/default/Constant.c index 3422b200ad8..b62bc6e18d4 100644 --- a/modules/fnxext/onnx_engine/src/default/Constant.c +++ b/modules/fnxext/onnx_engine/src/default/Constant.c @@ -14,7 +14,7 @@ static int Constant_init(struct onnx_node_t * n) switch(attr->type) { case ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__FLOAT: - if(strcmp(attr->name, "value_float") == 0) + if(onnx_strcmp(attr->name, "value_float") == 0) { if((y->ndim != 0) || (y->type != ONNX_TENSOR_TYPE_FLOAT32)) onnx_tensor_reinit(y, ONNX_TENSOR_TYPE_FLOAT32, NULL, 0); @@ -23,7 +23,7 @@ static int Constant_init(struct onnx_node_t * n) } break; case ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__INT: - if(strcmp(attr->name, "value_int") == 0) + if(onnx_strcmp(attr->name, "value_int") == 0) { if((y->ndim != 0) || (y->type != ONNX_TENSOR_TYPE_INT64)) onnx_tensor_reinit(y, ONNX_TENSOR_TYPE_INT64, NULL, 0); @@ -34,7 +34,7 @@ static int Constant_init(struct onnx_node_t * n) case ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__STRING: break; case ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__FLOATS: - if((strcmp(attr->name, "value_floats") == 0) && (attr->n_floats > 0)) + if((onnx_strcmp(attr->name, "value_floats") == 0) && (attr->n_floats > 0)) { if((y->ndim != 1) || (y->dims[0] != attr->n_floats) || (y->type != ONNX_TENSOR_TYPE_FLOAT32)) onnx_tensor_reinit(y, ONNX_TENSOR_TYPE_FLOAT32, (int[]){ attr->n_floats }, 1); @@ -43,7 +43,7 @@ static int Constant_init(struct onnx_node_t * n) } break; case ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__INTS: - if((strcmp(attr->name, "value_ints") == 0) && (attr->n_ints > 0)) + if((onnx_strcmp(attr->name, "value_ints") == 0) && (attr->n_ints > 0)) { if((y->ndim != 1) || (y->dims[0] != attr->n_ints) || (y->type != ONNX_TENSOR_TYPE_INT64)) onnx_tensor_reinit(y, ONNX_TENSOR_TYPE_INT64, (int[]){ attr->n_ints }, 1); @@ -52,7 +52,7 @@ static int Constant_init(struct onnx_node_t * n) } break; case ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__STRINGS: - if((strcmp(attr->name, "value_strings") == 0) && (attr->n_strings > 0)) + if((onnx_strcmp(attr->name, "value_strings") == 0) && (attr->n_strings > 0)) { if((y->ndim != 1) || (y->dims[0] != attr->n_strings) || (y->type != ONNX_TENSOR_TYPE_STRING)) onnx_tensor_reinit(y, ONNX_TENSOR_TYPE_STRING, (int[]){ attr->n_strings }, 1); @@ -63,17 +63,17 @@ static int Constant_init(struct onnx_node_t * n) { if(str[i]) { - free(str[i]); + onnx_free(str[i]); str[i] = NULL; } } for(size_t i = 0; i < y->ndata; i++) { - str[i] = malloc(attr->strings[i].len + 1); + str[i] = onnx_malloc(attr->strings[i].len + 1); if(str[i]) { str[i][attr->strings[i].len] = 0; - memcpy(str[i], attr->strings[i].data, attr->strings[i].len); + onnx_memcpy(str[i], attr->strings[i].data, attr->strings[i].len); } } } diff --git a/modules/fnxext/onnx_engine/src/default/ConstantOfShape.c b/modules/fnxext/onnx_engine/src/default/ConstantOfShape.c index 985bad519f6..da150e10fbd 100644 --- a/modules/fnxext/onnx_engine/src/default/ConstantOfShape.c +++ b/modules/fnxext/onnx_engine/src/default/ConstantOfShape.c @@ -39,13 +39,13 @@ static int ConstantOfShape_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { for(i = 0; i < n->proto->n_attribute; i++) { attr = n->proto->attribute[i]; - if((attr->type == ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__TENSOR) && (strcmp(attr->name, "value") == 0)) + if((attr->type == ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__TENSOR) && (onnx_strcmp(attr->name, "value") == 0)) { t = attr->t; break; @@ -57,7 +57,17 @@ static int ConstantOfShape_init(struct onnx_node_t * n) switch(t->data_type) { case ONNX__TENSOR_PROTO__DATA_TYPE__FLOAT: - pdat->scalar.v_float32 = t->float_data[0]; + if(t->n_float_data > 0) + pdat->scalar.v_float32 = t->float_data[0]; + else if(t->raw_data.len >= sizeof(float)) + { + uint32_t * q = (uint32_t *)t->raw_data.data; + union { uint32_t u; float f; } v; + v.u = le32_to_cpu(q[0]); + pdat->scalar.v_float32 = v.f; + } + else + pdat->scalar.v_float32 = 0.0f; break; case ONNX__TENSOR_PROTO__DATA_TYPE__UINT8: pdat->scalar.v_uint8 = t->int32_data[0]; @@ -104,14 +114,14 @@ static int ConstantOfShape_init(struct onnx_node_t * n) pdat->scalar.v_complex128.imaginary = t->double_data[1]; break; default: - memset(&pdat->scalar, 0, sizeof(union onnx_scalar_t)); + onnx_memset(&pdat->scalar, 0, sizeof(union onnx_scalar_t)); break; } } else { pdat->type = ONNX_TENSOR_TYPE_FLOAT32; - memset(&pdat->scalar, 0, sizeof(union onnx_scalar_t)); + pdat->scalar.v_float32 = 0.0f; } pdat->size = onnx_tensor_type_sizeof(pdat->type); n->priv = pdat; @@ -126,7 +136,7 @@ static int ConstantOfShape_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } @@ -155,7 +165,7 @@ static void ConstantOfShape_operator(struct onnx_node_t * n) onnx_tensor_reinit(y, pdat->type, NULL, 0); } for(i = 0, l = y->ndata, p = y->datas; i < l; i++, p += pdat->size) - memcpy(p, &pdat->scalar, pdat->size); + onnx_memcpy(p, &pdat->scalar, pdat->size); } void resolver_default_op_ConstantOfShape(struct onnx_node_t * n) diff --git a/modules/fnxext/onnx_engine/src/default/Conv.c b/modules/fnxext/onnx_engine/src/default/Conv.c index 35748311127..5e52e2f390f 100644 --- a/modules/fnxext/onnx_engine/src/default/Conv.c +++ b/modules/fnxext/onnx_engine/src/default/Conv.c @@ -36,10 +36,10 @@ static int Conv_init(struct onnx_node_t * n) if((n->ninput >= 2) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { - memset(pdat, 0, sizeof(struct operator_pdata_t)); + onnx_memset(pdat, 0, sizeof(struct operator_pdata_t)); switch(shash(onnx_attribute_read_string(n, "auto_pad", "NOTSET"))) { case 0xc3966fc2: /* "NOTSET" */ @@ -62,12 +62,12 @@ static int Conv_init(struct onnx_node_t * n) pdat->nkernel = onnx_attribute_read_ints(n, "kernel_shape", &ints); if(pdat->nkernel > 0) { - pdat->kernels = malloc(sizeof(int) * pdat->nkernel); + pdat->kernels = onnx_malloc(sizeof(int) * pdat->nkernel); for(i = 0; i < pdat->nkernel; i++) pdat->kernels[i] = ints[i]; } pdat->ndilation = pdat->nkernel; - pdat->dilations = malloc(sizeof(int) * pdat->ndilation); + pdat->dilations = onnx_malloc(sizeof(int) * pdat->ndilation); if(pdat->dilations) { l = onnx_attribute_read_ints(n, "dilations", &ints); @@ -77,7 +77,7 @@ static int Conv_init(struct onnx_node_t * n) pdat->dilations[i] = 1; } pdat->npad = pdat->nkernel * 2; - pdat->pads = malloc(sizeof(int) * pdat->npad); + pdat->pads = onnx_malloc(sizeof(int) * pdat->npad); if(pdat->pads) { l = onnx_attribute_read_ints(n, "pads", &ints); @@ -87,7 +87,7 @@ static int Conv_init(struct onnx_node_t * n) pdat->pads[i] = 0; } pdat->nstride = pdat->nkernel; - pdat->strides = malloc(sizeof(int) * pdat->nstride); + pdat->strides = onnx_malloc(sizeof(int) * pdat->nstride); if(pdat->strides) { l = onnx_attribute_read_ints(n, "strides", &ints); @@ -110,14 +110,14 @@ static int Conv_exit(struct onnx_node_t * n) if(pdat) { if(pdat->kernels) - free(pdat->kernels); + onnx_free(pdat->kernels); if(pdat->dilations) - free(pdat->dilations); + onnx_free(pdat->dilations); if(pdat->pads) - free(pdat->pads); + onnx_free(pdat->pads); if(pdat->strides) - free(pdat->strides); - free(pdat); + onnx_free(pdat->strides); + onnx_free(pdat); } return 1; } @@ -136,7 +136,7 @@ static int Conv_reshape(struct onnx_node_t * n) switch(pdat->auto_pad) { case AUTO_PAD_NOTSET: - memcpy(pdat->cpads, pdat->pads, sizeof(int) * pdat->npad); + onnx_memcpy(pdat->cpads, pdat->pads, sizeof(int) * pdat->npad); break; case AUTO_PAD_SAME_UPPER: for(i = 0; i < pdat->npad / 2; i++) @@ -155,7 +155,7 @@ static int Conv_reshape(struct onnx_node_t * n) } break; case AUTO_PAD_VALID: - memset(pdat->cpads, 0, sizeof(int) * pdat->npad); + onnx_memset(pdat->cpads, 0, sizeof(int) * pdat->npad); break; default: break; @@ -320,21 +320,21 @@ static void Conv_float16(struct onnx_node_t * n) typedef float (*mytype)/*[oH * oW]*/[MM]; /* try im2col first */ - matw = malloc(MM * H * W * C * sizeof(float)); - matx = malloc(oH * oW * H * W * C * sizeof(float)); - maty = malloc(oH * oW * MM * sizeof(float)); + matw = onnx_malloc(MM * H * W * C * sizeof(float)); + matx = onnx_malloc(oH * oW * H * W * C * sizeof(float)); + maty = onnx_malloc(oH * oW * MM * sizeof(float)); if (matw && matx && maty) { conv_mode = CONV_IM2COL; } else { - if (matw) free(matw); - if (matx) free(matx); - if (maty) free(maty); + if (matw) onnx_free(matw); + if (matx) onnx_free(matx); + if (maty) onnx_free(maty); /* then try cached conv */ - pxcache = malloc(oN * (oC * pdat->group / M) * C * H * W * sizeof(float)); + pxcache = onnx_malloc(oN * (oC * pdat->group / M) * C * H * W * sizeof(float)); if (pxcache) { conv_mode = CONV_CACHED; @@ -419,7 +419,7 @@ static void Conv_float16(struct onnx_node_t * n) } if (pxcache) { - free(pxcache); + onnx_free(pxcache); } } else if (conv_mode == CONV_IM2COL) @@ -487,9 +487,9 @@ static void Conv_float16(struct onnx_node_t * n) } } } - free(matw); - free(matx); - free(maty); + onnx_free(matw); + onnx_free(matx); + onnx_free(maty); } else { @@ -503,13 +503,13 @@ static void Conv_float16(struct onnx_node_t * n) int w_dim[ndim]; int b_dim[ndim]; - memset(o_dim, 0, sizeof(o_dim)); + onnx_memset(o_dim, 0, sizeof(o_dim)); do { b_dim[0] = o_dim[0]; for(i = 2; i < ndim; i++) b_dim[i] = o_dim[i] * pdat->strides[i - 2] - pdat->cpads[i - 2]; sum = 0; - memset(w_dim, 0, sizeof(w_dim)); + onnx_memset(w_dim, 0, sizeof(w_dim)); w_dim[0] = o_dim[1]; do { if(w_dim[1] == 1) @@ -606,21 +606,21 @@ static void Conv_float32(struct onnx_node_t * n) typedef float (*mytype)/*[oH * oW]*/[MM]; /* try im2col first */ - matw = malloc(MM * H * W * C * sizeof(float)); - matx = malloc(oH * oW * H * W * C * sizeof(float)); - maty = malloc(oH * oW * MM * sizeof(float)); + matw = onnx_malloc(MM * H * W * C * sizeof(float)); + matx = onnx_malloc(oH * oW * H * W * C * sizeof(float)); + maty = onnx_malloc(oH * oW * MM * sizeof(float)); if (matw && matx && maty) { conv_mode = CONV_IM2COL; } else { - if (matw) free(matw); - if (matx) free(matx); - if (maty) free(maty); + if (matw) onnx_free(matw); + if (matx) onnx_free(matx); + if (maty) onnx_free(maty); /* then try cached conv */ - pxcache = malloc(oN * (oC * pdat->group / M) * C * H * W * sizeof(float)); + pxcache = onnx_malloc(oN * (oC * pdat->group / M) * C * H * W * sizeof(float)); if (pxcache) { conv_mode = CONV_CACHED; @@ -705,7 +705,7 @@ static void Conv_float32(struct onnx_node_t * n) } if (pxcache) { - free(pxcache); + onnx_free(pxcache); } } else if (conv_mode == CONV_IM2COL) @@ -773,9 +773,9 @@ static void Conv_float32(struct onnx_node_t * n) } } } - free(matw); - free(matx); - free(maty); + onnx_free(matw); + onnx_free(matx); + onnx_free(maty); } else { @@ -789,13 +789,13 @@ static void Conv_float32(struct onnx_node_t * n) int w_dim[ndim]; int b_dim[ndim]; - memset(o_dim, 0, sizeof(o_dim)); + onnx_memset(o_dim, 0, sizeof(o_dim)); do { b_dim[0] = o_dim[0]; for(i = 2; i < ndim; i++) b_dim[i] = o_dim[i] * pdat->strides[i - 2] - pdat->cpads[i - 2]; sum = 0; - memset(w_dim, 0, sizeof(w_dim)); + onnx_memset(w_dim, 0, sizeof(w_dim)); w_dim[0] = o_dim[1]; do { if(w_dim[1] == 1) @@ -892,21 +892,21 @@ static void Conv_float64(struct onnx_node_t * n) typedef double (*mytype)/*[oH * oW]*/[MM]; /* try im2col first */ - matw = malloc(MM * H * W * C * sizeof(double)); - matx = malloc(oH * oW * H * W * C * sizeof(double)); - maty = malloc(oH * oW * MM * sizeof(double)); + matw = onnx_malloc(MM * H * W * C * sizeof(double)); + matx = onnx_malloc(oH * oW * H * W * C * sizeof(double)); + maty = onnx_malloc(oH * oW * MM * sizeof(double)); if (matw && matx && maty) { conv_mode = CONV_IM2COL; } else { - if (matw) free(matw); - if (matx) free(matx); - if (maty) free(maty); + if (matw) onnx_free(matw); + if (matx) onnx_free(matx); + if (maty) onnx_free(maty); /* then try cached conv */ - pxcache = malloc(oN * (oC * pdat->group / M) * C * H * W * sizeof(double)); + pxcache = onnx_malloc(oN * (oC * pdat->group / M) * C * H * W * sizeof(double)); if (pxcache) { conv_mode = CONV_CACHED; @@ -991,7 +991,7 @@ static void Conv_float64(struct onnx_node_t * n) } if (pxcache) { - free(pxcache); + onnx_free(pxcache); } } else if (conv_mode == CONV_IM2COL) @@ -1059,9 +1059,9 @@ static void Conv_float64(struct onnx_node_t * n) } } } - free(matw); - free(matx); - free(maty); + onnx_free(matw); + onnx_free(matx); + onnx_free(maty); } else { @@ -1075,13 +1075,13 @@ static void Conv_float64(struct onnx_node_t * n) int w_dim[ndim]; int b_dim[ndim]; - memset(o_dim, 0, sizeof(o_dim)); + onnx_memset(o_dim, 0, sizeof(o_dim)); do { b_dim[0] = o_dim[0]; for(i = 2; i < ndim; i++) b_dim[i] = o_dim[i] * pdat->strides[i - 2] - pdat->cpads[i - 2]; sum = 0; - memset(w_dim, 0, sizeof(w_dim)); + onnx_memset(w_dim, 0, sizeof(w_dim)); w_dim[0] = o_dim[1]; do { if(w_dim[1] == 1) diff --git a/modules/fnxext/onnx_engine/src/default/ConvInteger.c b/modules/fnxext/onnx_engine/src/default/ConvInteger.c index 147f4843437..68d4e3a8f4e 100644 --- a/modules/fnxext/onnx_engine/src/default/ConvInteger.c +++ b/modules/fnxext/onnx_engine/src/default/ConvInteger.c @@ -1,8 +1,492 @@ #include "../onnx.h" +enum auto_pad_t { + AUTO_PAD_NOTSET = 0, + AUTO_PAD_SAME_UPPER = 1, + AUTO_PAD_SAME_LOWER = 2, + AUTO_PAD_VALID = 3, +}; + +struct operator_pdata_t { + enum auto_pad_t auto_pad; + int group; + int * kernels; + int nkernel; + int * dilations; + int ndilation; + int * pads; + int npad; + int * strides; + int nstride; + + int cpads[32]; +}; + +static int ConvInteger_init(struct onnx_node_t * n) +{ + struct operator_pdata_t * pdat; + int64_t * ints; + int i, l; + + if((n->ninput >= 2) && (n->noutput == 1)) + { + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); + if(pdat) + { + onnx_memset(pdat, 0, sizeof(struct operator_pdata_t)); + pdat->auto_pad = AUTO_PAD_NOTSET; + // switch(shash(onnx_attribute_read_string(n, "auto_pad", "NOTSET"))) + // { + // case 0xc3966fc2: /* "NOTSET" */ + // pdat->auto_pad = AUTO_PAD_NOTSET; + // break; + // case 0xcbbc7856: /* "SAME_UPPER" */ + // pdat->auto_pad = AUTO_PAD_SAME_UPPER; + // break; + // case 0xcb192d33: /* "SAME_LOWER" */ + // pdat->auto_pad = AUTO_PAD_SAME_LOWER; + // break; + // case 0x0e382d15: /* "VALID" */ + // pdat->auto_pad = AUTO_PAD_VALID; + // break; + // default: + // pdat->auto_pad = AUTO_PAD_NOTSET; + // break; + // } + pdat->group = onnx_attribute_read_int(n, "group", 1); + pdat->nkernel = onnx_attribute_read_ints(n, "kernel_shape", &ints); + if(pdat->nkernel > 0) + { + pdat->kernels = onnx_malloc(sizeof(int) * pdat->nkernel); + for(i = 0; i < pdat->nkernel; i++) + pdat->kernels[i] = ints[i]; + } + pdat->ndilation = pdat->nkernel; + pdat->dilations = onnx_malloc(sizeof(int) * pdat->ndilation); + if(pdat->dilations) + { + l = onnx_attribute_read_ints(n, "dilations", &ints); + for(i = 0; i < l; i++) + pdat->dilations[i] = ints[i]; + for(; i < pdat->ndilation; i++) + pdat->dilations[i] = 1; + } + pdat->npad = pdat->nkernel * 2; + pdat->pads = onnx_malloc(sizeof(int) * pdat->npad); + if(pdat->pads) + { + l = onnx_attribute_read_ints(n, "pads", &ints); + for(i = 0; i < l; i++) + pdat->pads[i] = ints[i]; + for(; i < pdat->npad; i++) + pdat->pads[i] = 0; + } + pdat->nstride = pdat->nkernel; + pdat->strides = onnx_malloc(sizeof(int) * pdat->nstride); + if(pdat->strides) + { + l = onnx_attribute_read_ints(n, "strides", &ints); + for(i = 0; i < l; i++) + pdat->strides[i] = ints[i]; + for(; i < pdat->nstride; i++) + pdat->strides[i] = 1; + } + n->priv = pdat; + return 1; + } + } + return 0; +} + +static int ConvInteger_exit(struct onnx_node_t * n) +{ + struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; + + if(pdat) + { + if(pdat->kernels) + onnx_free(pdat->kernels); + if(pdat->dilations) + onnx_free(pdat->dilations); + if(pdat->pads) + onnx_free(pdat->pads); + if(pdat->strides) + onnx_free(pdat->strides); + onnx_free(pdat); + } + return 1; +} + +static int ConvInteger_reshape(struct onnx_node_t * n) +{ + struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * x = n->inputs[0]; + struct onnx_tensor_t * w = n->inputs[1]; + int ndim = x->ndim; + int dims[ndim]; + int pad; + int i; + + switch(pdat->auto_pad) + { + case AUTO_PAD_NOTSET: + onnx_memcpy(pdat->cpads, pdat->pads, sizeof(int) * pdat->npad); + break; + case AUTO_PAD_SAME_UPPER: + for(i = 0; i < pdat->npad / 2; i++) + { + pad = (ceilf(x->dims[i + 2] / (float)pdat->strides[i]) - 1) * pdat->strides[i] + ((pdat->kernels[i] - 1) * pdat->dilations[i] + 1) - x->dims[i + 2]; + pdat->cpads[i] = pad / 2; + pdat->cpads[i + pdat->nkernel] = pad - pdat->cpads[i]; + } + break; + case AUTO_PAD_SAME_LOWER: + for(i = 0; i < pdat->npad / 2; i++) + { + pad = (ceilf(x->dims[i + 2] / (float)pdat->strides[i]) - 1) * pdat->strides[i] + ((pdat->kernels[i] - 1) * pdat->dilations[i] + 1) - x->dims[i + 2]; + pdat->cpads[i + pdat->nkernel] = pad / 2; + pdat->cpads[i] = pad - pdat->cpads[i + pdat->nkernel]; + } + break; + case AUTO_PAD_VALID: + onnx_memset(pdat->cpads, 0, sizeof(int) * pdat->npad); + break; + default: + break; + } + dims[0] = x->dims[0]; + dims[1] = w->dims[0]; + for(i = 0; i < ndim - 2; i++) + { + switch(pdat->auto_pad) + { + case AUTO_PAD_NOTSET: + dims[i + 2] = floorf((x->dims[i + 2] + pdat->cpads[i] + pdat->cpads[i + pdat->nkernel] - ((pdat->kernels[i] - 1) * pdat->dilations[i] + 1)) / (float)pdat->strides[i] + 1); + break; + case AUTO_PAD_SAME_UPPER: + case AUTO_PAD_SAME_LOWER: + dims[i + 2] = ceilf(x->dims[i + 2] / (float)pdat->strides[i]); + break; + case AUTO_PAD_VALID: + dims[i + 2] = ceilf((x->dims[i + 2] - ((pdat->kernels[i] - 1) * pdat->dilations[i] + 1) + 1) / (float)pdat->strides[i]); + break; + default: + break; + } + } + return onnx_tensor_reshape(y, dims, ndim, ONNX_TENSOR_TYPE_INT32); +} + +static inline int dim_next(int ndim, int * dims, int * dim_max) +{ + if(ndim == 0) + return 0; + while(1) + { + ndim = ndim - 1; + dims[ndim] += 1; + if(dims[ndim] < dim_max[ndim]) + return 1; + else + { + if(ndim == 0) + return 0; + dims[ndim] = 0; + } + } +} + +static inline int dim_offset(int ndim, int * dims, int * dim_max) +{ + int o, s; + int i; + + for(i = ndim - 1, o = 0, s = 1; i >= 0; i--) + { + o += dims[i] * s; + s *= dim_max[i]; + } + return o; +} + +static void ConvInteger_int8_uint8(struct onnx_node_t * n) +{ + struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * x = n->inputs[0]; + struct onnx_tensor_t * w = n->inputs[1]; + struct onnx_tensor_t * x_zero_point = (n->ninput > 2) ? n->inputs[2] : NULL; + struct onnx_tensor_t * w_zero_point = (n->ninput > 3) ? n->inputs[3] : NULL; + int32_t * py = (int32_t *)y->datas; + int8_t * px = (int8_t *)x->datas; + int8_t * pw = (int8_t *)w->datas; + int8_t x_zp = x_zero_point ? *((int8_t *)x_zero_point->datas) : 0; + int8_t w_zp = w_zero_point ? *((int8_t *)w_zero_point->datas) : 0; + + int32_t sum; + int v, weight; + int ndim = x->ndim; + int M = w->dims[0]; + int C = w->dims[1]; + int H = w->dims[2]; + int W = w->dims[3]; + int ch, i; + + if(ndim == 4) + { + int iC = x->dims[1]; + int iH = x->dims[2]; + int iW = x->dims[3]; + + int oN = y->dims[0]; + int oC = w->dims[0]; + int oH = y->dims[2]; + int oW = y->dims[3]; + + typedef int8_t (*pxtype)[iC][iH][iW]; + typedef int8_t (*pwtype)[C][H][W]; + typedef int32_t (*pytype)[M][oH][oW]; + + for(int h = 0; h < oH; ++h) + { + for(int w = 0; w < oW; ++w) + { + int base_h = h * pdat->strides[0] - pdat->cpads[0]; + int base_w = w * pdat->strides[1] - pdat->cpads[1]; + + for(int n = 0; n < oN; ++n) + { + for(int c = 0; c < oC; ++c) + { + int base_c = (c * pdat->group / M) * C; + sum = 0; + for(int i = (base_h < 0 ? (-base_h) / pdat->dilations[0] : 0); i < H; ++i) + { + int input_h = base_h + i * pdat->dilations[0]; + if(input_h >= iH) + break; + for(int j = (base_w < 0 ? (-base_w) / pdat->dilations[1] : 0); j < W; ++j) + { + int input_w = base_w + j * pdat->dilations[1]; + if(input_w >= iW) + break; + for(int w_channel = 0; w_channel < C; ++w_channel) + { + ch = base_c + w_channel; + v = ((pxtype)px)[n][ch][input_h][input_w] - x_zp; + weight = ((pwtype)pw)[c][w_channel][i][j] - w_zp; + sum += v * weight; + } + } + } + ((pytype)py)[n][c][h][w] = sum; + } + } + } + } + } + else + { + int i_dim[ndim]; + int o_dim[ndim]; + int w_dim[ndim]; + int b_dim[ndim]; + + onnx_memset(o_dim, 0, sizeof(o_dim)); + do { + b_dim[0] = o_dim[0]; + for(i = 2; i < ndim; i++) + b_dim[i] = o_dim[i] * pdat->strides[i - 2] - pdat->cpads[i - 2]; + sum = 0; + onnx_memset(w_dim, 0, sizeof(w_dim)); + w_dim[0] = o_dim[1]; + do { + if(w_dim[1] == 1) + break; + i_dim[0] = b_dim[0]; + for(i = 2; i < ndim; i++) + i_dim[i] = b_dim[i] + w_dim[i] * pdat->dilations[i - 2]; + for(ch = 0; ch < C; ch++) + { + i_dim[1] = (o_dim[1] * pdat->group / M) * C + ch; + w_dim[1] = ch; + for(i = 0; i < ndim; i++) + { + if((i_dim[i] < 0) || (i_dim[i] >= x->dims[i])) + { + v = -x_zp; + break; + } + } + if(i >= ndim) + v = px[dim_offset(ndim, i_dim, x->dims)] - x_zp; + for(i = 0; i < ndim; i++) + { + if((w_dim[i] < 0) || (w_dim[i] >= w->dims[i])) + { + weight = -w_zp; + break; + } + } + if(i >= ndim) + weight = pw[dim_offset(ndim, w_dim, w->dims)] - w_zp; + sum += v * weight; + } + w_dim[1] = 0; + } while(dim_next(ndim, w_dim, w->dims)); + py[dim_offset(ndim, o_dim, y->dims)] = sum; + } while(dim_next(ndim, o_dim, y->dims)); + } +} + +static void ConvInteger_uint8_uint8(struct onnx_node_t * n) +{ + struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * x = n->inputs[0]; + struct onnx_tensor_t * w = n->inputs[1]; + struct onnx_tensor_t * x_zero_point = (n->ninput > 2) ? n->inputs[2] : NULL; + struct onnx_tensor_t * w_zero_point = (n->ninput > 3) ? n->inputs[3] : NULL; + int32_t * py = (int32_t *)y->datas; + uint8_t * px = (uint8_t *)x->datas; + uint8_t * pw = (uint8_t *)w->datas; + uint8_t x_zp = x_zero_point ? *((uint8_t *)x_zero_point->datas) : 0; + uint8_t w_zp = w_zero_point ? *((uint8_t *)w_zero_point->datas) : 0; + + int32_t sum; + int v, weight; + int ndim = x->ndim; + int M = w->dims[0]; + int C = w->dims[1]; + int H = w->dims[2]; + int W = w->dims[3]; + int ch, i; + + if(ndim == 4) + { + int iC = x->dims[1]; + int iH = x->dims[2]; + int iW = x->dims[3]; + + int oN = y->dims[0]; + int oC = w->dims[0]; + int oH = y->dims[2]; + int oW = y->dims[3]; + + typedef uint8_t (*pxtype)[iC][iH][iW]; + typedef uint8_t (*pwtype)[C][H][W]; + typedef int32_t (*pytype)[M][oH][oW]; + + for(int h = 0; h < oH; ++h) + { + for(int w = 0; w < oW; ++w) + { + int base_h = h * pdat->strides[0] - pdat->cpads[0]; + int base_w = w * pdat->strides[1] - pdat->cpads[1]; + + for(int n = 0; n < oN; ++n) + { + for(int c = 0; c < oC; ++c) + { + int base_c = (c * pdat->group / M) * C; + sum = 0; + for(int i = (base_h < 0 ? (-base_h) / pdat->dilations[0] : 0); i < H; ++i) + { + int input_h = base_h + i * pdat->dilations[0]; + if(input_h >= iH) + break; + for(int j = (base_w < 0 ? (-base_w) / pdat->dilations[1] : 0); j < W; ++j) + { + int input_w = base_w + j * pdat->dilations[1]; + if(input_w >= iW) + break; + for(int w_channel = 0; w_channel < C; ++w_channel) + { + ch = base_c + w_channel; + v = ((pxtype)px)[n][ch][input_h][input_w] - x_zp; + weight = ((pwtype)pw)[c][w_channel][i][j] - w_zp; + sum += v * weight; + } + } + } + ((pytype)py)[n][c][h][w] = sum; + } + } + } + } + } + else + { + int i_dim[ndim]; + int o_dim[ndim]; + int w_dim[ndim]; + int b_dim[ndim]; + + onnx_memset(o_dim, 0, sizeof(o_dim)); + do { + b_dim[0] = o_dim[0]; + for(i = 2; i < ndim; i++) + b_dim[i] = o_dim[i] * pdat->strides[i - 2] - pdat->cpads[i - 2]; + sum = 0; + onnx_memset(w_dim, 0, sizeof(w_dim)); + w_dim[0] = o_dim[1]; + do { + if(w_dim[1] == 1) + break; + i_dim[0] = b_dim[0]; + for(i = 2; i < ndim; i++) + i_dim[i] = b_dim[i] + w_dim[i] * pdat->dilations[i - 2]; + for(ch = 0; ch < C; ch++) + { + i_dim[1] = (o_dim[1] * pdat->group / M) * C + ch; + w_dim[1] = ch; + for(i = 0; i < ndim; i++) + { + if((i_dim[i] < 0) || (i_dim[i] >= x->dims[i])) + { + v = -x_zp; + break; + } + } + if(i >= ndim) + v = px[dim_offset(ndim, i_dim, x->dims)] - x_zp; + for(i = 0; i < ndim; i++) + { + if((w_dim[i] < 0) || (w_dim[i] >= w->dims[i])) + { + weight = -w_zp; + break; + } + } + if(i >= ndim) + weight = pw[dim_offset(ndim, w_dim, w->dims)] - w_zp; + sum += v * weight; + } + w_dim[1] = 0; + } while(dim_next(ndim, w_dim, w->dims)); + py[dim_offset(ndim, o_dim, y->dims)] = sum; + } while(dim_next(ndim, o_dim, y->dims)); + } +} + void resolver_default_op_ConvInteger(struct onnx_node_t * n) { if(n->opset >= 10) { + if((n->inputs[0]->type == ONNX_TENSOR_TYPE_INT8) && (n->inputs[1]->type == ONNX_TENSOR_TYPE_INT8)) + { + n->init = ConvInteger_init; + n->exit = ConvInteger_exit; + n->reshape = ConvInteger_reshape; + n->operator_ = ConvInteger_int8_uint8; + } + else if((n->inputs[0]->type == ONNX_TENSOR_TYPE_UINT8) && (n->inputs[1]->type == ONNX_TENSOR_TYPE_UINT8)) + { + n->init = ConvInteger_init; + n->exit = ConvInteger_exit; + n->reshape = ConvInteger_reshape; + n->operator_ = ConvInteger_uint8_uint8; + } } } diff --git a/modules/fnxext/onnx_engine/src/default/DynamicQuantizeLSTM.c b/modules/fnxext/onnx_engine/src/default/DynamicQuantizeLSTM.c new file mode 100644 index 00000000000..6113b7ca0fc --- /dev/null +++ b/modules/fnxext/onnx_engine/src/default/DynamicQuantizeLSTM.c @@ -0,0 +1,765 @@ +#include "../onnx.h" +#include +#include + +enum { + DQLSTM_INPUT_X = 0, + DQLSTM_INPUT_W = 1, + DQLSTM_INPUT_R = 2, + DQLSTM_INPUT_B = 3, + DQLSTM_INPUT_SEQUENCE_LENS = 4, + DQLSTM_INPUT_INITIAL_H = 5, + DQLSTM_INPUT_INITIAL_C = 6, + DQLSTM_INPUT_P = 7, + DQLSTM_INPUT_W_SCALE = 8, + DQLSTM_INPUT_W_ZERO_POINT = 9, + DQLSTM_INPUT_R_SCALE = 10, + DQLSTM_INPUT_R_ZERO_POINT = 11, +}; + +enum { + DQLSTM_OUTPUT_Y = 0, + DQLSTM_OUTPUT_Y_H = 1, + DQLSTM_OUTPUT_Y_C = 2, +}; + +struct dqlstm_param_t { + int has_bias; + int has_sequence_lens; + int has_initial_h; + int has_initial_c; + int has_peephole; + int hidden_size; + char * direction; + float clip; + int layout; + int input_forget; + int num_directions; +}; + +static inline float sigmoid(float x) +{ + if (x >= 0) + return 1.0f / (1.0f + expf(-x)); + else + return expf(x) / (1.0f + expf(x)); +} + +static int DynamicQuantizeLSTM_init(struct onnx_node_t * n) +{ + struct dqlstm_param_t * p; + char * dir; + + if (n->ninput >= 12 && n->noutput >= 1) + { + p = malloc(sizeof(struct dqlstm_param_t)); + if (p) + { + memset(p, 0, sizeof(struct dqlstm_param_t)); + + // Set default values + p->clip = 0.0; + p->layout = 0; + p->input_forget = 0; + + // Parse attributes using the API functions + p->hidden_size = (int)onnx_attribute_read_int(n, "hidden_size", 0); + p->clip = onnx_attribute_read_float(n, "clip", 0.0f); + p->layout = (int)onnx_attribute_read_int(n, "layout", 0); + p->input_forget = (int)onnx_attribute_read_int(n, "input_forget", 0); + + dir = onnx_attribute_read_string(n, "direction", "forward"); + if (dir) + { + p->direction = dir; + } + else + { + p->direction = strdup("forward"); + } + + // Determine number of directions based on direction attribute + if (strncmp(p->direction, "bidirectional", 13) == 0) + { + p->num_directions = 2; + } + else + { + p->num_directions = 1; + } + + // Check optional inputs + p->has_bias = (n->ninput > DQLSTM_INPUT_B && n->inputs[DQLSTM_INPUT_B]) ? 1 : 0; + p->has_sequence_lens = (n->ninput > DQLSTM_INPUT_SEQUENCE_LENS && n->inputs[DQLSTM_INPUT_SEQUENCE_LENS]) ? 1 : 0; + p->has_initial_h = (n->ninput > DQLSTM_INPUT_INITIAL_H && n->inputs[DQLSTM_INPUT_INITIAL_H]) ? 1 : 0; + p->has_initial_c = (n->ninput > DQLSTM_INPUT_INITIAL_C && n->inputs[DQLSTM_INPUT_INITIAL_C]) ? 1 : 0; + p->has_peephole = (n->ninput > DQLSTM_INPUT_P && n->inputs[DQLSTM_INPUT_P]) ? 1 : 0; + + n->priv = p; + return 1; + } + } + return 0; +} + +static int DynamicQuantizeLSTM_exit(struct onnx_node_t * n) +{ + struct dqlstm_param_t * p = (struct dqlstm_param_t *)n->priv; + + if (p) + { + free(p); + } + return 1; +} + +static int DynamicQuantizeLSTM_reshape(struct onnx_node_t * n) +{ + struct onnx_tensor_t * x = n->inputs[DQLSTM_INPUT_X]; + struct dqlstm_param_t * p = (struct dqlstm_param_t *)n->priv; + int seq_length, batch_size; + + if (!p) + return 0; + + // Parse dimensions based on layout + if (p->layout == 0) + { + // Layout 0: [seq_length, batch_size, input_size] + seq_length = x->dims[0]; + batch_size = x->dims[1]; + } + else + { + // Layout 1: [batch_size, seq_length, input_size] + batch_size = x->dims[0]; + seq_length = x->dims[1]; + } + + // Reshape Y if requested + if (n->noutput > DQLSTM_OUTPUT_Y && n->outputs[DQLSTM_OUTPUT_Y]) + { + struct onnx_tensor_t * y = n->outputs[DQLSTM_OUTPUT_Y]; + int dims[4]; + + if (p->layout == 0) + { + // Y shape is [seq_length, num_directions, batch_size, hidden_size] + dims[0] = seq_length; + dims[1] = p->num_directions; + dims[2] = batch_size; + dims[3] = p->hidden_size; + } + else + { + // Y shape is [batch_size, seq_length, num_directions, hidden_size] + dims[0] = batch_size; + dims[1] = seq_length; + dims[2] = p->num_directions; + dims[3] = p->hidden_size; + } + + if (!onnx_tensor_reshape(y, dims, 4, ONNX_TENSOR_TYPE_FLOAT32)) + return 0; + } + + // Reshape Y_h if requested + if (n->noutput > DQLSTM_OUTPUT_Y_H && n->outputs[DQLSTM_OUTPUT_Y_H]) + { + struct onnx_tensor_t * y_h = n->outputs[DQLSTM_OUTPUT_Y_H]; + int dims[3]; + + if (p->layout == 0) + { + // Y_h shape is [num_directions, batch_size, hidden_size] + dims[0] = p->num_directions; + dims[1] = batch_size; + dims[2] = p->hidden_size; + } + else + { + // Y_h shape is [batch_size, num_directions, hidden_size] + dims[0] = batch_size; + dims[1] = p->num_directions; + dims[2] = p->hidden_size; + } + + if (!onnx_tensor_reshape(y_h, dims, 3, ONNX_TENSOR_TYPE_FLOAT32)) + return 0; + } + + // Reshape Y_c if requested + if (n->noutput > DQLSTM_OUTPUT_Y_C && n->outputs[DQLSTM_OUTPUT_Y_C]) + { + struct onnx_tensor_t * y_c = n->outputs[DQLSTM_OUTPUT_Y_C]; + int dims[3]; + + if (p->layout == 0) + { + // Y_c shape is [num_directions, batch_size, hidden_size] + dims[0] = p->num_directions; + dims[1] = batch_size; + dims[2] = p->hidden_size; + } + else + { + // Y_c shape is [batch_size, num_directions, hidden_size] + dims[0] = batch_size; + dims[1] = p->num_directions; + dims[2] = p->hidden_size; + } + + if (!onnx_tensor_reshape(y_c, dims, 3, ONNX_TENSOR_TYPE_FLOAT32)) + return 0; + } + + return 1; +} + +// Исправленная функция для дэквантизации весов +static void dequantize_weights(const void* quantized_data, float* dequantized_data, + const float* scale, const uint8_t* zero_point, + int size, int scale_size, int is_signed, int num_directions, + int input_or_hidden_size, int hidden_size) +{ + // Размер 4*hidden_size для каждого направления + int gate_size = 4 * hidden_size; + + if (scale_size == 1) { + // Поэлементное квантование (per-tensor) + float s = scale[0]; + int zp = (int)zero_point[0]; + + if (is_signed) { + const int8_t* signed_data = (const int8_t*)quantized_data; + for (int i = 0; i < size; i++) { + dequantized_data[i] = s * (float)(signed_data[i]); // Для signed весов zp обычно 0 + } + } else { + const uint8_t* unsigned_data = (const uint8_t*)quantized_data; + for (int i = 0; i < size; i++) { + dequantized_data[i] = s * (float)(unsigned_data[i] - zp); + } + } + } else if (scale_size == num_directions) { + // Поканальное квантование по направлениям + if (is_signed) { + const int8_t* signed_data = (const int8_t*)quantized_data; + for (int d = 0; d < num_directions; d++) { + float s = scale[d]; + // Для signed весов zp обычно 0 + for (int i = 0; i < size / num_directions; i++) { + int idx = d * (size / num_directions) + i; + dequantized_data[idx] = s * (float)(signed_data[idx]); + } + } + } else { + const uint8_t* unsigned_data = (const uint8_t*)quantized_data; + for (int d = 0; d < num_directions; d++) { + float s = scale[d]; + int zp = (int)zero_point[d]; + for (int i = 0; i < size / num_directions; i++) { + int idx = d * (size / num_directions) + i; + dequantized_data[idx] = s * (float)(unsigned_data[idx] - zp); + } + } + } + } else if (scale_size == num_directions * gate_size / hidden_size) { + // Поканальное квантование по гейтам (4 гейта для каждого направления) + // scale_size должно быть равно num_directions * 4 + + // Количество элементов на один канал (на один гейт) + int elements_per_gate = input_or_hidden_size * hidden_size; + + if (is_signed) { + const int8_t* signed_data = (const int8_t*)quantized_data; + for (int d = 0; d < num_directions; d++) { + for (int g = 0; g < 4; g++) { // 4 гейта: i, o, f, c + float s = scale[d * 4 + g]; + // Для signed весов zp обычно 0 + + for (int i = 0; i < elements_per_gate; i++) { + int idx = d * 4 * elements_per_gate + g * elements_per_gate + i; + dequantized_data[idx] = s * (float)(signed_data[idx]); + } + } + } + } else { + const uint8_t* unsigned_data = (const uint8_t*)quantized_data; + for (int d = 0; d < num_directions; d++) { + for (int g = 0; g < 4; g++) { // 4 гейта: i, o, f, c + float s = scale[d * 4 + g]; + int zp = (int)zero_point[d * 4 + g]; + + for (int i = 0; i < elements_per_gate; i++) { + int idx = d * 4 * elements_per_gate + g * elements_per_gate + i; + dequantized_data[idx] = s * (float)(unsigned_data[idx] - zp); + } + } + } + } + } else { + // Поканальное квантование по другому принципу, применяем упрощенную схему + // где каждый scale и zero_point применяются к блоку элементов + int items_per_channel = size / scale_size; + + if (is_signed) { + const int8_t* signed_data = (const int8_t*)quantized_data; + for (int c = 0; c < scale_size; c++) { + float s = scale[c]; + // Для signed весов zp обычно 0 + + for (int i = 0; i < items_per_channel; i++) { + int idx = c * items_per_channel + i; + dequantized_data[idx] = s * (float)(signed_data[idx]); + } + } + } else { + const uint8_t* unsigned_data = (const uint8_t*)quantized_data; + for (int c = 0; c < scale_size; c++) { + float s = scale[c]; + int zp = (int)zero_point[c]; + + for (int i = 0; i < items_per_channel; i++) { + int idx = c * items_per_channel + i; + dequantized_data[idx] = s * (float)(unsigned_data[idx] - zp); + } + } + } + } +} + +static void DynamicQuantizeLSTM_float32(struct onnx_node_t * n) +{ + struct dqlstm_param_t * p = (struct dqlstm_param_t *)n->priv; + struct onnx_tensor_t * x = n->inputs[DQLSTM_INPUT_X]; + struct onnx_tensor_t * w = n->inputs[DQLSTM_INPUT_W]; + struct onnx_tensor_t * r = n->inputs[DQLSTM_INPUT_R]; + struct onnx_tensor_t * b = p->has_bias ? n->inputs[DQLSTM_INPUT_B] : NULL; + struct onnx_tensor_t * sequence_lens = p->has_sequence_lens ? n->inputs[DQLSTM_INPUT_SEQUENCE_LENS] : NULL; + struct onnx_tensor_t * initial_h = p->has_initial_h ? n->inputs[DQLSTM_INPUT_INITIAL_H] : NULL; + struct onnx_tensor_t * initial_c = p->has_initial_c ? n->inputs[DQLSTM_INPUT_INITIAL_C] : NULL; + struct onnx_tensor_t * p_tensor = p->has_peephole ? n->inputs[DQLSTM_INPUT_P] : NULL; + struct onnx_tensor_t * w_scale = n->inputs[DQLSTM_INPUT_W_SCALE]; + struct onnx_tensor_t * w_zero_point = n->inputs[DQLSTM_INPUT_W_ZERO_POINT]; + struct onnx_tensor_t * r_scale = n->inputs[DQLSTM_INPUT_R_SCALE]; + struct onnx_tensor_t * r_zero_point = n->inputs[DQLSTM_INPUT_R_ZERO_POINT]; + struct onnx_tensor_t * y = (n->noutput > DQLSTM_OUTPUT_Y) ? n->outputs[DQLSTM_OUTPUT_Y] : NULL; + struct onnx_tensor_t * y_h = (n->noutput > DQLSTM_OUTPUT_Y_H) ? n->outputs[DQLSTM_OUTPUT_Y_H] : NULL; + struct onnx_tensor_t * y_c = (n->noutput > DQLSTM_OUTPUT_Y_C) ? n->outputs[DQLSTM_OUTPUT_Y_C] : NULL; + float * px, * py, * py_h, * py_c; + float * ph_t, * pc_t, * pbx, * pbh; + float * gates, * it, * ot, * ft, * ct; + float * ppi, * ppo, * ppf; + int seq_length, batch_size, input_size, hidden_size; + int num_directions = p->num_directions; + int layout = p->layout; + int i, j, d, b_idx, s_idx, k; + + // Указатель на sequence_lens для проверки длин последовательностей + const int* sequence_lengths = sequence_lens ? (const int*)sequence_lens->datas : NULL; + int max_sequence_length = 0; + + // Get dimensions + if (layout == 0) + { + // [seq_length, batch_size, input_size] + seq_length = x->dims[0]; + batch_size = x->dims[1]; + input_size = x->dims[2]; + } + else + { + // [batch_size, seq_length, input_size] + batch_size = x->dims[0]; + seq_length = x->dims[1]; + input_size = x->dims[2]; + } + hidden_size = p->hidden_size; + + // Если предоставлены sequence_lengths, определите максимальную длину + if (sequence_lengths != NULL) { + max_sequence_length = 0; + for (int b = 0; b < batch_size; b++) { + if (sequence_lengths[b] > max_sequence_length) { + max_sequence_length = sequence_lengths[b]; + } + } + + // Используем max_sequence_length вместо seq_length, если она меньше + if (max_sequence_length > 0 && max_sequence_length < seq_length) { + seq_length = max_sequence_length; + } + } + + // Get scale sizes + int w_scale_size = w_scale->ndim > 0 ? (w_scale->ndim > 1 ? w_scale->dims[0] * w_scale->dims[1] : w_scale->dims[0]) : 1; + int r_scale_size = r_scale->ndim > 0 ? (r_scale->ndim > 1 ? r_scale->dims[0] * r_scale->dims[1] : r_scale->dims[0]) : 1; + + // Check if weights are signed or unsigned + int is_w_signed = (w->type == ONNX_TENSOR_TYPE_INT8); + int is_r_signed = (r->type == ONNX_TENSOR_TYPE_INT8); + + // Allocate memory for intermediate calculations and hidden/cell states + gates = (float *)malloc(batch_size * 4 * hidden_size * sizeof(float)); + ph_t = (float *)malloc(batch_size * hidden_size * sizeof(float)); + pc_t = (float *)malloc(batch_size * hidden_size * sizeof(float)); + + // Allocate memory for dequantized weights + float* w_dequantized = (float*)malloc(w->ndata * sizeof(float)); + float* r_dequantized = (float*)malloc(r->ndata * sizeof(float)); + + if (!gates || !ph_t || !pc_t || !w_dequantized || !r_dequantized) + { + if (gates) free(gates); + if (ph_t) free(ph_t); + if (pc_t) free(pc_t); + if (w_dequantized) free(w_dequantized); + if (r_dequantized) free(r_dequantized); + return; + } + + // Set up gate pointers (как в LSTM) + it = gates; + ot = gates + batch_size * hidden_size; + ft = gates + 2 * batch_size * hidden_size; + ct = gates + 3 * batch_size * hidden_size; + + // Get weight scale and zero point data + float* w_scale_data = (float*)w_scale->datas; + uint8_t* w_zp_data = (uint8_t*)w_zero_point->datas; + float* r_scale_data = (float*)r_scale->datas; + uint8_t* r_zp_data = (uint8_t*)r_zero_point->datas; + + // Дэквантизация весов (преобразование int8/uint8 в float) + dequantize_weights(w->datas, w_dequantized, w_scale_data, w_zp_data, + w->ndata, w_scale_size, is_w_signed, num_directions, input_size, hidden_size); + dequantize_weights(r->datas, r_dequantized, r_scale_data, r_zp_data, + r->ndata, r_scale_size, is_r_signed, num_directions, hidden_size, hidden_size); + + // Process each direction + for (d = 0; d < num_directions; d++) + { + // Initialize hidden and cell states + if (initial_h) + { + if (layout == 0) + memcpy(ph_t, (float *)initial_h->datas + d * batch_size * hidden_size, batch_size * hidden_size * sizeof(float)); + else + for (b_idx = 0; b_idx < batch_size; b_idx++) + memcpy(ph_t + b_idx * hidden_size, + (float *)initial_h->datas + b_idx * num_directions * hidden_size + d * hidden_size, + hidden_size * sizeof(float)); + } + else + { + memset(ph_t, 0, batch_size * hidden_size * sizeof(float)); + } + + if (initial_c) + { + if (layout == 0) + memcpy(pc_t, (float *)initial_c->datas + d * batch_size * hidden_size, batch_size * hidden_size * sizeof(float)); + else + for (b_idx = 0; b_idx < batch_size; b_idx++) + memcpy(pc_t + b_idx * hidden_size, + (float *)initial_c->datas + b_idx * num_directions * hidden_size + d * hidden_size, + hidden_size * sizeof(float)); + } + else + { + memset(pc_t, 0, batch_size * hidden_size * sizeof(float)); + } + + // Получаем указатели на весовые матрицы для текущего направления + float* pw_base = w_dequantized + d * input_size * 4 * hidden_size; + float* pr_base = r_dequantized + d * hidden_size * 4 * hidden_size; + + // Указатели на bias + if (b) + { + pbx = (float *)b->datas + d * 8 * hidden_size; // W bias + pbh = (float *)b->datas + d * 8 * hidden_size + 4 * hidden_size; // R bias + } + + // Указатели на peephole коэффициенты + if (p_tensor) + { + ppi = (float *)p_tensor->datas + d * 3 * hidden_size; // i peephole + ppo = (float *)p_tensor->datas + d * 3 * hidden_size + hidden_size; // o peephole + ppf = (float *)p_tensor->datas + d * 3 * hidden_size + 2 * hidden_size; // f peephole + } + + // Process sequence + for (s_idx = 0; s_idx < seq_length; s_idx++) + { + // Determine the actual sequence index based on direction + int seq_idx = s_idx; + if (strcmp(p->direction, "reverse") == 0 || + (strncmp(p->direction, "bidirectional", 13) == 0 && d == 1)) + { + seq_idx = seq_length - 1 - s_idx; + } + + // Get input data pointer for this sequence step + if (layout == 0) + { + // [seq_length, batch_size, input_size] + px = (float *)x->datas + seq_idx * batch_size * input_size; + } + else + { + // [batch_size, seq_length, input_size] + px = (float *)x->datas + batch_size * seq_idx * input_size; + } + + // Вычисление гейтов для каждого batch элемента + for (b_idx = 0; b_idx < batch_size; b_idx++) + { + // Проверяем sequence_lengths для текущего batch элемента + if (sequence_lengths != NULL) { + // Для обратного направления проверяем иначе + if (strcmp(p->direction, "reverse") == 0 || + (strncmp(p->direction, "bidirectional", 13) == 0 && d == 1)) { + if (seq_length - 1 - seq_idx >= sequence_lengths[b_idx]) { + // Пропускаем этот шаг для текущего batch элемента + continue; + } + } else { + // Для прямого направления + if (seq_idx >= sequence_lengths[b_idx]) { + continue; + } + } + } + + float * x_b = px + b_idx * input_size; + float * h_b = ph_t + b_idx * hidden_size; + float * c_b = pc_t + b_idx * hidden_size; + + // Получаем указатели на гейты для текущего элемента батча + float * it_b = it + b_idx * hidden_size; + float * ot_b = ot + b_idx * hidden_size; + float * ft_b = ft + b_idx * hidden_size; + float * ct_b = ct + b_idx * hidden_size; + + // Инициализация гейтов нулями перед суммированием + memset(it_b, 0, hidden_size * sizeof(float)); + memset(ot_b, 0, hidden_size * sizeof(float)); + memset(ft_b, 0, hidden_size * sizeof(float)); + memset(ct_b, 0, hidden_size * sizeof(float)); + + // Вычисляем все гейты (input, output, forget, cell) + // X * W + bias для каждого гейта + for (i = 0; i < 4; i++) + { + // Выбираем соответствующий гейт + float * g; + switch(i) { + case 0: g = it_b; break; // input gate + case 1: g = ot_b; break; // output gate + case 2: g = ft_b; break; // forget gate + case 3: g = ct_b; break; // cell gate + default: g = NULL; break; + } + + for (j = 0; j < hidden_size; j++) + { + float sum = 0.0f; + for (k = 0; k < input_size; k++) + { + + sum += x_b[k] * pw_base[k * 4 * hidden_size + i * hidden_size + j]; + } + g[j] = sum; + } + + // Add bias if available + if (b) + { + float * bi = pbx + i * hidden_size; + for (j = 0; j < hidden_size; j++) + { + g[j] += bi[j]; + } + } + } + + // H * R + bias для каждого гейта + for (i = 0; i < 4; i++) + { + // Выбираем соответствующий гейт + float * g; + switch(i) { + case 0: g = it_b; break; // input gate + case 1: g = ot_b; break; // output gate + case 2: g = ft_b; break; // forget gate + case 3: g = ct_b; break; // cell gate + default: g = NULL; break; + } + + for (j = 0; j < hidden_size; j++) + { + float sum = 0.0f; + for (k = 0; k < hidden_size; k++) + { + + sum += h_b[k] * pr_base[k * 4 * hidden_size + i * hidden_size + j]; + } + g[j] += sum; + } + + // Add bias if available + if (b) + { + float * bi = pbh + i * hidden_size; + for (j = 0; j < hidden_size; j++) + { + g[j] += bi[j]; + } + } + } + + // Добавляем peephole для input и forget гейтов + if (p->has_peephole) + { + // Input gate: + Pi (.) Ct-1 + for (j = 0; j < hidden_size; j++) + { + it_b[j] += ppi[j] * c_b[j]; + } + + // Forget gate: + Pf (.) Ct-1 + for (j = 0; j < hidden_size; j++) + { + ft_b[j] += ppf[j] * c_b[j]; + } + } + + // Применяем активацию более консистентно с LSTM имплементацией + for (j = 0; j < hidden_size; j++) + { + float i_val = sigmoid(it_b[j]); + float f_val; + + if (p->input_forget) { + // Если включен input_forget, то forget gate = 1 - input gate + f_val = 1.0f - i_val; + } else { + f_val = sigmoid(ft_b[j]); + } + + float c_val = tanhf(ct_b[j]); + + // Update cell state: Ct = ft (.) Ct-1 + it (.) ct + c_b[j] = f_val * c_b[j] + i_val * c_val; + + // Apply clip if specified + if (p->clip > 0) { + if (c_b[j] > p->clip) + c_b[j] = p->clip; + else if (c_b[j] < -p->clip) + c_b[j] = -p->clip; + } + } + + // Добавляем peephole для output gate после обновления cell state + if (p->has_peephole) { + // Output gate: + Po (.) Ct + for (j = 0; j < hidden_size; j++) { + ot_b[j] += ppo[j] * c_b[j]; + } + } + + // Применяем активацию к output gate и обновляем hidden state + for (j = 0; j < hidden_size; j++) { + float o_val = sigmoid(ot_b[j]); + h_b[j] = o_val * tanhf(c_b[j]); + } + + // Store the results to output tensors if needed + if (y) + { + if (layout == 0) + { + // Y shape is [seq_length, num_directions, batch_size, hidden_size] + py = (float *)y->datas + + seq_idx * num_directions * batch_size * hidden_size + + d * batch_size * hidden_size + + b_idx * hidden_size; + } + else + { + // Y shape is [batch_size, seq_length, num_directions, hidden_size] + py = (float *)y->datas + + b_idx * seq_length * num_directions * hidden_size + + seq_idx * num_directions * hidden_size + + d * hidden_size; + } + + memcpy(py, h_b, hidden_size * sizeof(float)); + } + } + } + + // Store final hidden and cell states + if (y_h) + { + if (layout == 0) + { + // Y_h shape is [num_directions, batch_size, hidden_size] + py_h = (float *)y_h->datas + d * batch_size * hidden_size; + memcpy(py_h, ph_t, batch_size * hidden_size * sizeof(float)); + } + else + { + // Y_h shape is [batch_size, num_directions, hidden_size] + for (b_idx = 0; b_idx < batch_size; b_idx++) + { + py_h = (float *)y_h->datas + b_idx * num_directions * hidden_size + d * hidden_size; + memcpy(py_h, ph_t + b_idx * hidden_size, hidden_size * sizeof(float)); + } + } + } + + if (y_c) + { + if (layout == 0) + { + // Y_c shape is [num_directions, batch_size, hidden_size] + py_c = (float *)y_c->datas + d * batch_size * hidden_size; + memcpy(py_c, pc_t, batch_size * hidden_size * sizeof(float)); + } + else + { + // Y_c shape is [batch_size, num_directions, hidden_size] + for (b_idx = 0; b_idx < batch_size; b_idx++) + { + py_c = (float *)y_c->datas + b_idx * num_directions * hidden_size + d * hidden_size; + memcpy(py_c, pc_t + b_idx * hidden_size, hidden_size * sizeof(float)); + } + } + } + } + + // Free allocated memory + free(gates); + free(ph_t); + free(pc_t); + free(w_dequantized); + free(r_dequantized); +} + +void resolver_default_op_DynamicQuantizeLSTM(struct onnx_node_t * n) +{ + if (n->opset >= 1) + { + if (n->inputs[0]->type == ONNX_TENSOR_TYPE_FLOAT32) + { + n->init = DynamicQuantizeLSTM_init; + n->exit = DynamicQuantizeLSTM_exit; + n->reshape = DynamicQuantizeLSTM_reshape; + n->operator_ = DynamicQuantizeLSTM_float32; + } + } +} \ No newline at end of file diff --git a/modules/fnxext/onnx_engine/src/default/DynamicQuantizeLinear.c b/modules/fnxext/onnx_engine/src/default/DynamicQuantizeLinear.c index 8b358d1d363..c739c3ab616 100644 --- a/modules/fnxext/onnx_engine/src/default/DynamicQuantizeLinear.c +++ b/modules/fnxext/onnx_engine/src/default/DynamicQuantizeLinear.c @@ -1,8 +1,141 @@ #include "../onnx.h" +#include + +struct operator_pdata_t { + int dummy; +}; + +static int DynamicQuantizeLinear_init(struct onnx_node_t * n) +{ + struct operator_pdata_t * pdat; + + if((n->ninput == 1) && (n->noutput == 3)) + { + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); + if(pdat) + { + pdat->dummy = 0; + n->priv = pdat; + return 1; + } + } + return 0; +} + +static int DynamicQuantizeLinear_exit(struct onnx_node_t * n) +{ + struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; + + if(pdat) + onnx_free(pdat); + return 1; +} + +static int DynamicQuantizeLinear_reshape(struct onnx_node_t * n) +{ + struct onnx_tensor_t * x = n->inputs[0]; + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * y_scale = n->outputs[1]; + struct onnx_tensor_t * y_zero_point = n->outputs[2]; + + if(x->type != ONNX_TENSOR_TYPE_FLOAT32) + return 0; + + if(!onnx_tensor_reshape(y, x->dims, x->ndim, ONNX_TENSOR_TYPE_UINT8)) + return 0; + + if(!onnx_tensor_reshape(y_scale, NULL, 0, ONNX_TENSOR_TYPE_FLOAT32)) + return 0; + + if(!onnx_tensor_reshape(y_zero_point, NULL, 0, ONNX_TENSOR_TYPE_UINT8)) + return 0; + + return 1; +} + +static void DynamicQuantizeLinear_float32(struct onnx_node_t * n) +{ + struct onnx_tensor_t * x = n->inputs[0]; + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * y_scale = n->outputs[1]; + struct onnx_tensor_t * y_zero_point = n->outputs[2]; + + float * px = (float *)x->datas; + uint8_t * py = (uint8_t *)y->datas; + float * py_scale = (float *)y_scale->datas; + uint8_t * py_zero_point = (uint8_t *)y_zero_point->datas; + + // Find min and max values in input tensor + float x_min = px[0]; + float x_max = px[0]; + + for(size_t i = 1; i < x->ndata; i++) + { + if(px[i] < x_min) + x_min = px[i]; + if(px[i] > x_max) + x_max = px[i]; + } + + // Adjust data range to include 0 + x_min = fmin(0, x_min); + x_max = fmax(0, x_max); + + // Calculate scale and zero point + float scale = (x_max - x_min) / 255.0f; + float zero_point_float = 0.0f; + + // Handle special case: if input range is zero (all inputs are the same value) + if(scale == 0.0f) + { + scale = 1.0f; + zero_point_float = 0.0f; + } + else + { + // Calculate zero point + zero_point_float = (0.0f - x_min) / scale; + } + + // Round to nearest even and saturate + int zero_point_int = (int)roundf(zero_point_float); + + // Saturate to uint8 range [0, 255] + if(zero_point_int < 0) + zero_point_int = 0; + else if(zero_point_int > 255) + zero_point_int = 255; + + // Set output scale and zero point + py_scale[0] = scale; + py_zero_point[0] = (uint8_t)zero_point_int; + + // Quantize input data + for(size_t i = 0; i < x->ndata; i++) + { + // Apply quantization formula: y = saturate(round(x / scale) + zero_point) + float quantized_float = roundf(px[i] / scale) + zero_point_int; + + // Saturate to uint8 range [0, 255] + if(quantized_float < 0.0f) + py[i] = 0; + else if(quantized_float > 255.0f) + py[i] = 255; + else + py[i] = (uint8_t)quantized_float; + } +} void resolver_default_op_DynamicQuantizeLinear(struct onnx_node_t * n) { if(n->opset >= 11) { + if(n->inputs[0]->type == ONNX_TENSOR_TYPE_FLOAT32) + { + n->init = DynamicQuantizeLinear_init; + n->exit = DynamicQuantizeLinear_exit; + n->reshape = DynamicQuantizeLinear_reshape; + n->operator_ = DynamicQuantizeLinear_float32; + } } } diff --git a/modules/fnxext/onnx_engine/src/default/Elu.c b/modules/fnxext/onnx_engine/src/default/Elu.c index f9febe37503..780abd3e40f 100644 --- a/modules/fnxext/onnx_engine/src/default/Elu.c +++ b/modules/fnxext/onnx_engine/src/default/Elu.c @@ -10,7 +10,7 @@ static int Elu_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->alpha = onnx_attribute_read_float(n, "alpha", 1.0); @@ -26,7 +26,7 @@ static int Elu_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/Expand.c b/modules/fnxext/onnx_engine/src/default/Expand.c index 7e7d2fc585f..d948d50121e 100644 --- a/modules/fnxext/onnx_engine/src/default/Expand.c +++ b/modules/fnxext/onnx_engine/src/default/Expand.c @@ -18,7 +18,7 @@ static int Expand_reshape(struct onnx_node_t * n) struct onnx_tensor_t * x = n->inputs[0]; struct onnx_tensor_t * s = n->inputs[1]; int64_t * ps = (int64_t *)s->datas; - int ndim = maxx(x->ndim, (int)s->ndata); + int ndim = XMAX(x->ndim, (int)s->ndata); int dims[ndim]; int i, j, k; @@ -266,8 +266,8 @@ static void Expand_string(struct onnx_node_t * n) { px = onnx_tensor_broadcast_map_address(x, y, i); if(py[i]) - free(py[i]); - py[i] = strdup(px[i]); + onnx_free(py[i]); + py[i] = onnx_strdup(px[i]); } } diff --git a/modules/fnxext/onnx_engine/src/default/Flatten.c b/modules/fnxext/onnx_engine/src/default/Flatten.c index d30fc23b093..aa2d40dfbe2 100644 --- a/modules/fnxext/onnx_engine/src/default/Flatten.c +++ b/modules/fnxext/onnx_engine/src/default/Flatten.c @@ -10,7 +10,7 @@ static int Flatten_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->axis = onnx_attribute_read_int(n, "axis", 1); @@ -26,7 +26,7 @@ static int Flatten_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } @@ -70,13 +70,13 @@ static void Flatten_operator(struct onnx_node_t * n) for(size_t i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); - py[i] = strdup(px[i]); + onnx_free(py[i]); + py[i] = onnx_strdup(px[i]); } } else { - memcpy(y->datas, x->datas, x->ndata * onnx_tensor_type_sizeof(x->type)); + onnx_memcpy(y->datas, x->datas, x->ndata * onnx_tensor_type_sizeof(x->type)); } } diff --git a/modules/fnxext/onnx_engine/src/default/Gather.c b/modules/fnxext/onnx_engine/src/default/Gather.c index f452d421741..4a82aba18c5 100644 --- a/modules/fnxext/onnx_engine/src/default/Gather.c +++ b/modules/fnxext/onnx_engine/src/default/Gather.c @@ -1,5 +1,111 @@ #include "../onnx.h" +static int Gather_init(struct onnx_node_t * n) +{ + if((n->ninput == 2) && (n->noutput == 1)) + return 1; + return 0; +} + +static int Gather_exit(struct onnx_node_t * n) +{ + return 1; +} + +static int Gather_reshape(struct onnx_node_t * n) +{ + struct onnx_tensor_t * data = n->inputs[0]; + struct onnx_tensor_t * indices = n->inputs[1]; + struct onnx_tensor_t * y = n->outputs[0]; + int64_t axis = onnx_attribute_read_int(n, "axis", 0); + int r = data->ndim; + int q = indices->ndim; + + if(axis < 0) + axis += r; + if(axis < 0 || axis >= r) + return 0; + + int ndim = q + (r - 1); + int dims[ndim]; + int i, k; + + k = 0; + for(i = 0; i < axis; i++) + dims[k++] = data->dims[i]; + for(i = 0; i < q; i++) + dims[k++] = indices->dims[i]; + for(i = axis + 1; i < r; i++) + dims[k++] = data->dims[i]; + + return onnx_tensor_reshape(y, dims, ndim, data->type); +} + +static void Gather_operator(struct onnx_node_t * n) +{ + struct onnx_tensor_t * data = n->inputs[0]; + struct onnx_tensor_t * indices = n->inputs[1]; + struct onnx_tensor_t * y = n->outputs[0]; + int32_t * pindices = (int32_t *)indices->datas; + int64_t axis = onnx_attribute_read_int(n, "axis", 0); + int r = data->ndim; + int typesize = onnx_tensor_type_sizeof(data->type); + + if(axis < 0) + axis += r; + + int outer_size = 1; + int indices_size = 1; + int inner_size = 1; + int i; + + for(i = 0; i < axis; i++) + outer_size *= data->dims[i]; + for(i = 0; i < indices->ndim; i++) + indices_size *= indices->dims[i]; + for(i = axis + 1; i < r; i++) + inner_size *= data->dims[i]; + + if(data->type == ONNX_TENSOR_TYPE_STRING) + { + char ** pdata = (char **)data->datas; + char ** py = (char **)y->datas; + + for(int out = 0; out < outer_size; out++) { + for(int idx = 0; idx < indices_size; idx++) { + int src_offset = out * data->dims[axis] * inner_size + + pindices[idx] * inner_size; + int dst_offset = out * indices_size * inner_size + + idx * inner_size; + + for(int in = 0; in < inner_size; in++) { + if(py[dst_offset + in]) + onnx_free(py[dst_offset + in]); + py[dst_offset + in] = pdata[src_offset + in] ? onnx_strdup(pdata[src_offset + in]) : NULL; + } + } + } + } + else + { + char * pdata = (char *)data->datas; + char * py = (char *)y->datas; + + for(int out = 0; out < outer_size; out++) { + for(int idx = 0; idx < indices_size; idx++) { + int src_offset = out * data->dims[axis] * inner_size + + pindices[idx] * inner_size; + int dst_offset = out * indices_size * inner_size + + idx * inner_size; + + onnx_memcpy(py + (dst_offset * typesize), + pdata + (src_offset * typesize), + inner_size * typesize); + } + } + } +} + void resolver_default_op_Gather(struct onnx_node_t * n) { if(n->opset >= 13) @@ -7,6 +113,32 @@ void resolver_default_op_Gather(struct onnx_node_t * n) } else if(n->opset >= 11) { + switch(n->inputs[0]->type) + { + case ONNX_TENSOR_TYPE_BOOL: + case ONNX_TENSOR_TYPE_INT8: + case ONNX_TENSOR_TYPE_INT16: + case ONNX_TENSOR_TYPE_INT32: + case ONNX_TENSOR_TYPE_INT64: + case ONNX_TENSOR_TYPE_UINT8: + case ONNX_TENSOR_TYPE_UINT16: + case ONNX_TENSOR_TYPE_UINT32: + case ONNX_TENSOR_TYPE_UINT64: + case ONNX_TENSOR_TYPE_BFLOAT16: + case ONNX_TENSOR_TYPE_FLOAT16: + case ONNX_TENSOR_TYPE_FLOAT32: + case ONNX_TENSOR_TYPE_FLOAT64: + case ONNX_TENSOR_TYPE_COMPLEX64: + case ONNX_TENSOR_TYPE_COMPLEX128: + case ONNX_TENSOR_TYPE_STRING: + n->init = Gather_init; + n->exit = Gather_exit; + n->reshape = Gather_reshape; + n->operator_ = Gather_operator; + break; + default: + break; + } } else if(n->opset >= 1) { diff --git a/modules/fnxext/onnx_engine/src/default/Gemm.c b/modules/fnxext/onnx_engine/src/default/Gemm.c index b5ef6e4329b..348a478c386 100644 --- a/modules/fnxext/onnx_engine/src/default/Gemm.c +++ b/modules/fnxext/onnx_engine/src/default/Gemm.c @@ -17,7 +17,7 @@ static int Gemm_init(struct onnx_node_t * n) if((n->ninput >= 2) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->alpha = onnx_attribute_read_float(n, "alpha", 1.0); @@ -39,7 +39,7 @@ static int Gemm_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/GlobalAveragePool.c b/modules/fnxext/onnx_engine/src/default/GlobalAveragePool.c index 25bacec168c..f43d7444157 100644 --- a/modules/fnxext/onnx_engine/src/default/GlobalAveragePool.c +++ b/modules/fnxext/onnx_engine/src/default/GlobalAveragePool.c @@ -43,7 +43,7 @@ static void GlobalAveragePool_float16(struct onnx_node_t * n) int idx[2], cnt; size_t i, j, l; - memset(sum, 0, sizeof(sum)); + onnx_memset(sum, 0, sizeof(sum)); for(i = 0, l = x->ndata; i < l; i++) { cnt = i; @@ -73,7 +73,7 @@ static void GlobalAveragePool_float32(struct onnx_node_t * n) int idx[2], cnt; size_t i, j, l; - memset(sum, 0, sizeof(sum)); + onnx_memset(sum, 0, sizeof(sum)); for(i = 0, l = x->ndata; i < l; i++) { cnt = i; @@ -103,7 +103,7 @@ static void GlobalAveragePool_float64(struct onnx_node_t * n) int idx[2], cnt; size_t i, j, l; - memset(sum, 0, sizeof(sum)); + onnx_memset(sum, 0, sizeof(sum)); for(i = 0, l = x->ndata; i < l; i++) { cnt = i; diff --git a/modules/fnxext/onnx_engine/src/default/GlobalLpPool.c b/modules/fnxext/onnx_engine/src/default/GlobalLpPool.c index 1826d651ca7..1b2a84055f7 100644 --- a/modules/fnxext/onnx_engine/src/default/GlobalLpPool.c +++ b/modules/fnxext/onnx_engine/src/default/GlobalLpPool.c @@ -10,7 +10,7 @@ static int GlobalLpPool_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { if(n->opset >= 2) @@ -29,7 +29,7 @@ static int GlobalLpPool_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/HardSigmoid.c b/modules/fnxext/onnx_engine/src/default/HardSigmoid.c index 7be78be3047..1e69a7c37d8 100644 --- a/modules/fnxext/onnx_engine/src/default/HardSigmoid.c +++ b/modules/fnxext/onnx_engine/src/default/HardSigmoid.c @@ -11,7 +11,7 @@ static int HardSigmoid_init(struct onnx_node_t * n) if((n->ninput > 0) && (n->noutput > 0)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->alpha = onnx_attribute_read_float(n, "alpha", 0.2); @@ -28,7 +28,7 @@ static int HardSigmoid_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } @@ -52,7 +52,7 @@ static void HardSigmoid_float16(struct onnx_node_t * n) for(size_t i = 0, l = y->ndata; i < l; i++) { v = float16_to_float32(px[i]); - py[i] = float32_to_float16(maxx((float)0.0, minn((float)1.0, (float)(pdat->alpha * v + pdat->beta)))); + py[i] = float32_to_float16(XMAX((float)0.0, XMIN((float)1.0, (float)(pdat->alpha * v + pdat->beta)))); } } @@ -65,7 +65,7 @@ static void HardSigmoid_float32(struct onnx_node_t * n) float * py = (float *)y->datas; for(size_t i = 0, l = y->ndata; i < l; i++) - py[i] = maxx((float)0.0, minn((float)1.0, (float)(pdat->alpha * px[i] + pdat->beta))); + py[i] = XMAX((float)0.0, XMIN((float)1.0, (float)(pdat->alpha * px[i] + pdat->beta))); } static void HardSigmoid_float64(struct onnx_node_t * n) @@ -77,7 +77,7 @@ static void HardSigmoid_float64(struct onnx_node_t * n) double * py = (double *)y->datas; for(size_t i = 0, l = y->ndata; i < l; i++) - py[i] = maxx((double)0.0, minn((double)1.0, (double)(pdat->alpha * px[i] + pdat->beta))); + py[i] = XMAX((double)0.0, XMIN((double)1.0, (double)(pdat->alpha * px[i] + pdat->beta))); } void resolver_default_op_HardSigmoid(struct onnx_node_t * n) diff --git a/modules/fnxext/onnx_engine/src/default/Identity.c b/modules/fnxext/onnx_engine/src/default/Identity.c index df723e89b95..a68cda0b5d3 100644 --- a/modules/fnxext/onnx_engine/src/default/Identity.c +++ b/modules/fnxext/onnx_engine/src/default/Identity.c @@ -32,13 +32,13 @@ static void Identity_operator(struct onnx_node_t * n) for(size_t i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); - py[i] = strdup(px[i]); + onnx_free(py[i]); + py[i] = onnx_strdup(px[i]); } } else { - memcpy(y->datas, x->datas, x->ndata * onnx_tensor_type_sizeof(x->type)); + onnx_memcpy(y->datas, x->datas, x->ndata * onnx_tensor_type_sizeof(x->type)); } } diff --git a/modules/fnxext/onnx_engine/src/default/If.c b/modules/fnxext/onnx_engine/src/default/If.c index 73151578534..5b343402422 100644 --- a/modules/fnxext/onnx_engine/src/default/If.c +++ b/modules/fnxext/onnx_engine/src/default/If.c @@ -11,7 +11,7 @@ static int If_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput >= 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->else_branch = onnx_graph_alloc(n->ctx, onnx_attribute_read_graph(n, "else_branch", NULL)); @@ -22,7 +22,7 @@ static int If_init(struct onnx_node_t * n) onnx_graph_free(pdat->else_branch); if(pdat->then_branch) onnx_graph_free(pdat->then_branch); - free(pdat); + onnx_free(pdat); return 0; } n->priv = pdat; @@ -42,7 +42,7 @@ static int If_exit(struct onnx_node_t * n) onnx_graph_free(pdat->else_branch); if(pdat->then_branch) onnx_graph_free(pdat->then_branch); - free(pdat); + onnx_free(pdat); } return 1; } @@ -69,7 +69,7 @@ static int If_reshape(struct onnx_node_t * n) } if(t) { - for(i = 0; i < minn(t->noutput, n->noutput); i++) + for(i = 0; i < XMIN(t->noutput, n->noutput); i++) { struct onnx_tensor_t * a = t->outputs[i]; struct onnx_tensor_t * b = n->outputs[i]; @@ -102,7 +102,7 @@ static void If_operator(struct onnx_node_t * n) } if(t) { - for(i = 0; i < minn(t->noutput, n->noutput); i++) + for(i = 0; i < XMIN(t->noutput, n->noutput); i++) { struct onnx_tensor_t * a = t->outputs[i]; struct onnx_tensor_t * b = n->outputs[i]; @@ -113,13 +113,13 @@ static void If_operator(struct onnx_node_t * n) for(size_t o = 0; o < b->ndata; o++) { if(pb[o]) - free(pb[o]); - pb[o] = strdup(pa[o]); + onnx_free(pb[o]); + pb[o] = onnx_strdup(pa[o]); } } else { - memcpy(b->datas, a->datas, a->ndata * onnx_tensor_type_sizeof(a->type)); + onnx_memcpy(b->datas, a->datas, a->ndata * onnx_tensor_type_sizeof(a->type)); } } } diff --git a/modules/fnxext/onnx_engine/src/default/InstanceNormalization.c b/modules/fnxext/onnx_engine/src/default/InstanceNormalization.c index eb63337b8e1..bfb304307a1 100644 --- a/modules/fnxext/onnx_engine/src/default/InstanceNormalization.c +++ b/modules/fnxext/onnx_engine/src/default/InstanceNormalization.c @@ -10,7 +10,7 @@ static int InstanceNormalization_init(struct onnx_node_t * n) if((n->ninput == 3) && (n->noutput >= 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->epsilon = onnx_attribute_read_float(n, "epsilon", 1e-05); @@ -26,7 +26,7 @@ static int InstanceNormalization_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/IsInf.c b/modules/fnxext/onnx_engine/src/default/IsInf.c index 9d731987e52..4383d433278 100644 --- a/modules/fnxext/onnx_engine/src/default/IsInf.c +++ b/modules/fnxext/onnx_engine/src/default/IsInf.c @@ -11,7 +11,7 @@ static int IsInf_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->detect_negative = onnx_attribute_read_int(n, "detect_negative", 1); @@ -28,7 +28,7 @@ static int IsInf_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/LRN.c b/modules/fnxext/onnx_engine/src/default/LRN.c index 2657606d5f2..40ae03562d1 100644 --- a/modules/fnxext/onnx_engine/src/default/LRN.c +++ b/modules/fnxext/onnx_engine/src/default/LRN.c @@ -13,7 +13,7 @@ static int LRN_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->alpha = onnx_attribute_read_float(n, "alpha", 0.0001); @@ -32,7 +32,7 @@ static int LRN_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/LSTM.c b/modules/fnxext/onnx_engine/src/default/LSTM.c index 7cee3121f76..be7e22cf8d6 100644 --- a/modules/fnxext/onnx_engine/src/default/LSTM.c +++ b/modules/fnxext/onnx_engine/src/default/LSTM.c @@ -1,14 +1,566 @@ #include "../onnx.h" +#include "../matrix.h" +#include +#include + +enum { + LSTM_INPUT_X = 0, + LSTM_INPUT_W = 1, + LSTM_INPUT_R = 2, + LSTM_INPUT_B = 3, + LSTM_INPUT_SEQUENCE_LENS = 4, + LSTM_INPUT_INITIAL_H = 5, + LSTM_INPUT_INITIAL_C = 6, + LSTM_INPUT_P = 7, +}; + +enum { + LSTM_OUTPUT_Y = 0, + LSTM_OUTPUT_Y_H = 1, + LSTM_OUTPUT_Y_C = 2, +}; + +struct lstm_param_t { + int has_bias; + int has_sequence_lens; + int has_initial_h; + int has_initial_c; + int has_peephole; + int hidden_size; + char * direction; + float clip; + int layout; + int input_forget; + int num_directions; +}; + +static inline float sigmoid(float x) +{ + if (x >= 0) + return 1.0f / (1.0f + expf(-x)); + else + return expf(x) / (1.0f + expf(x)); +} + +static int LSTM_init(struct onnx_node_t * n) +{ + struct lstm_param_t * p; + char * dir; + + if (n->ninput >= 3 && n->noutput >= 1) + { + p = malloc(sizeof(struct lstm_param_t)); + if (p) + { + memset(p, 0, sizeof(struct lstm_param_t)); + + // Set default values + p->clip = 0.0; + p->layout = 0; + p->input_forget = 0; + + // Parse attributes using the API functions + p->hidden_size = (int)onnx_attribute_read_int(n, "hidden_size", 0); + p->clip = onnx_attribute_read_float(n, "clip", 0.0f); + p->layout = (int)onnx_attribute_read_int(n, "layout", 0); + p->input_forget = (int)onnx_attribute_read_int(n, "input_forget", 0); + + dir = onnx_attribute_read_string(n, "direction", "forward"); + if (dir) + { + p->direction = dir; + } + else + { + p->direction = strdup("forward"); + } + + // Determine number of directions based on direction attribute + if (strncmp(p->direction, "bidirectional", 13) == 0) + { + p->num_directions = 2; + } + else + { + p->num_directions = 1; + } + + // Check optional inputs + p->has_bias = (n->ninput > LSTM_INPUT_B && n->inputs[LSTM_INPUT_B]) ? 1 : 0; + p->has_sequence_lens = (n->ninput > LSTM_INPUT_SEQUENCE_LENS && n->inputs[LSTM_INPUT_SEQUENCE_LENS]) ? 1 : 0; + p->has_initial_h = (n->ninput > LSTM_INPUT_INITIAL_H && n->inputs[LSTM_INPUT_INITIAL_H]) ? 1 : 0; + p->has_initial_c = (n->ninput > LSTM_INPUT_INITIAL_C && n->inputs[LSTM_INPUT_INITIAL_C]) ? 1 : 0; + p->has_peephole = (n->ninput > LSTM_INPUT_P && n->inputs[LSTM_INPUT_P]) ? 1 : 0; + + n->priv = p; + return 1; + } + } + return 0; +} + +static int LSTM_exit(struct onnx_node_t * n) +{ + struct lstm_param_t * p = (struct lstm_param_t *)n->priv; + + if (p) + { + free(p); + } + return 1; +} + +static int LSTM_reshape(struct onnx_node_t * n) +{ + struct onnx_tensor_t * x = n->inputs[LSTM_INPUT_X]; + struct lstm_param_t * p = (struct lstm_param_t *)n->priv; + int seq_length, batch_size; + + if (!p) + return 0; + + // Parse dimensions based on layout + if (p->layout == 0) + { + // Layout 0: [seq_length, batch_size, input_size] + seq_length = x->dims[0]; + batch_size = x->dims[1]; + // input_size = x->dims[2] - не используется в этой функции + } + else + { + // Layout 1: [batch_size, seq_length, input_size] + batch_size = x->dims[0]; + seq_length = x->dims[1]; + // input_size = x->dims[2] - не используется в этой функции + } + + // Reshape Y if requested + if (n->noutput > LSTM_OUTPUT_Y && n->outputs[LSTM_OUTPUT_Y]) + { + struct onnx_tensor_t * y = n->outputs[LSTM_OUTPUT_Y]; + int dims[4]; + + if (p->layout == 0) + { + // Y shape is [seq_length, num_directions, batch_size, hidden_size] + dims[0] = seq_length; + dims[1] = p->num_directions; + dims[2] = batch_size; + dims[3] = p->hidden_size; + } + else + { + // Y shape is [batch_size, seq_length, num_directions, hidden_size] + dims[0] = batch_size; + dims[1] = seq_length; + dims[2] = p->num_directions; + dims[3] = p->hidden_size; + } + + if (!onnx_tensor_reshape(y, dims, 4, x->type)) + return 0; + } + + // Reshape Y_h if requested + if (n->noutput > LSTM_OUTPUT_Y_H && n->outputs[LSTM_OUTPUT_Y_H]) + { + struct onnx_tensor_t * y_h = n->outputs[LSTM_OUTPUT_Y_H]; + int dims[3]; + + if (p->layout == 0) + { + // Y_h shape is [num_directions, batch_size, hidden_size] + dims[0] = p->num_directions; + dims[1] = batch_size; + dims[2] = p->hidden_size; + } + else + { + // Y_h shape is [batch_size, num_directions, hidden_size] + dims[0] = batch_size; + dims[1] = p->num_directions; + dims[2] = p->hidden_size; + } + + if (!onnx_tensor_reshape(y_h, dims, 3, x->type)) + return 0; + } + + // Reshape Y_c if requested + if (n->noutput > LSTM_OUTPUT_Y_C && n->outputs[LSTM_OUTPUT_Y_C]) + { + struct onnx_tensor_t * y_c = n->outputs[LSTM_OUTPUT_Y_C]; + int dims[3]; + + if (p->layout == 0) + { + // Y_c shape is [num_directions, batch_size, hidden_size] + dims[0] = p->num_directions; + dims[1] = batch_size; + dims[2] = p->hidden_size; + } + else + { + // Y_c shape is [batch_size, num_directions, hidden_size] + dims[0] = batch_size; + dims[1] = p->num_directions; + dims[2] = p->hidden_size; + } + + if (!onnx_tensor_reshape(y_c, dims, 3, x->type)) + return 0; + } + + return 1; +} + +static void LSTM_float32(struct onnx_node_t * n) +{ + struct lstm_param_t * p = (struct lstm_param_t *)n->priv; + struct onnx_tensor_t * x = n->inputs[LSTM_INPUT_X]; + struct onnx_tensor_t * w = n->inputs[LSTM_INPUT_W]; + struct onnx_tensor_t * r = n->inputs[LSTM_INPUT_R]; + struct onnx_tensor_t * b = p->has_bias ? n->inputs[LSTM_INPUT_B] : NULL; + struct onnx_tensor_t * initial_h = p->has_initial_h ? n->inputs[LSTM_INPUT_INITIAL_H] : NULL; + struct onnx_tensor_t * initial_c = p->has_initial_c ? n->inputs[LSTM_INPUT_INITIAL_C] : NULL; + struct onnx_tensor_t * p_tensor = p->has_peephole ? n->inputs[LSTM_INPUT_P] : NULL; + struct onnx_tensor_t * y = (n->noutput > LSTM_OUTPUT_Y) ? n->outputs[LSTM_OUTPUT_Y] : NULL; + struct onnx_tensor_t * y_h = (n->noutput > LSTM_OUTPUT_Y_H) ? n->outputs[LSTM_OUTPUT_Y_H] : NULL; + struct onnx_tensor_t * y_c = (n->noutput > LSTM_OUTPUT_Y_C) ? n->outputs[LSTM_OUTPUT_Y_C] : NULL; + float * px, * pw, * pr, * ppi, * ppo, * ppf, * py, * py_h, * py_c; + float * ph_t, * pc_t, * pbx, * pbh; + float * gates, * it, * ot, * ft, * ct; + int seq_length, batch_size, input_size, hidden_size; + int num_directions = p->num_directions; + int layout = p->layout; + int i, j, d, b_idx, s_idx; + + // Get dimensions + if (layout == 0) + { + // [seq_length, batch_size, input_size] + seq_length = x->dims[0]; + batch_size = x->dims[1]; + input_size = x->dims[2]; + } + else + { + // [batch_size, seq_length, input_size] + batch_size = x->dims[0]; + seq_length = x->dims[1]; + input_size = x->dims[2]; + } + hidden_size = p->hidden_size; + + // Allocate memory for intermediate calculations and hidden/cell states + gates = (float *)malloc(batch_size * 4 * hidden_size * sizeof(float)); + ph_t = (float *)malloc(batch_size * hidden_size * sizeof(float)); + pc_t = (float *)malloc(batch_size * hidden_size * sizeof(float)); + + if (!gates || !ph_t || !pc_t) + { + if (gates) free(gates); + if (ph_t) free(ph_t); + if (pc_t) free(pc_t); + return; + } + + // Set up gate pointers + it = gates; + ot = gates + batch_size * hidden_size; + ft = gates + 2 * batch_size * hidden_size; + ct = gates + 3 * batch_size * hidden_size; + + // Process each direction + for (d = 0; d < num_directions; d++) + { + // Initialize hidden and cell states + if (initial_h) + { + if (layout == 0) + memcpy(ph_t, (float *)initial_h->datas + d * batch_size * hidden_size, batch_size * hidden_size * sizeof(float)); + else + for (b_idx = 0; b_idx < batch_size; b_idx++) + memcpy(ph_t + b_idx * hidden_size, + (float *)initial_h->datas + b_idx * num_directions * hidden_size + d * hidden_size, + hidden_size * sizeof(float)); + } + else + { + memset(ph_t, 0, batch_size * hidden_size * sizeof(float)); + } + + if (initial_c) + { + if (layout == 0) + memcpy(pc_t, (float *)initial_c->datas + d * batch_size * hidden_size, batch_size * hidden_size * sizeof(float)); + else + for (b_idx = 0; b_idx < batch_size; b_idx++) + memcpy(pc_t + b_idx * hidden_size, + (float *)initial_c->datas + b_idx * num_directions * hidden_size + d * hidden_size, + hidden_size * sizeof(float)); + } + else + { + memset(pc_t, 0, batch_size * hidden_size * sizeof(float)); + } + + // Get weight and bias pointers for this direction + pw = (float *)w->datas + d * 4 * hidden_size * input_size; + pr = (float *)r->datas + d * 4 * hidden_size * hidden_size; + + if (b) + { + pbx = (float *)b->datas + d * 8 * hidden_size; // W bias + pbh = (float *)b->datas + d * 8 * hidden_size + 4 * hidden_size; // R bias + } + + if (p_tensor) + { + ppi = (float *)p_tensor->datas + d * 3 * hidden_size; // i peephole + ppo = (float *)p_tensor->datas + d * 3 * hidden_size + hidden_size; // o peephole + ppf = (float *)p_tensor->datas + d * 3 * hidden_size + 2 * hidden_size; // f peephole + } + + // Process sequence + for (s_idx = 0; s_idx < seq_length; s_idx++) + { + // Determine the actual sequence index based on direction + int seq_idx = s_idx; + if (strcmp(p->direction, "reverse") == 0) + { + seq_idx = seq_length - 1 - s_idx; + } + + // Get input data pointer for this sequence step + if (layout == 0) + { + // [seq_length, batch_size, input_size] + px = (float *)x->datas + seq_idx * batch_size * input_size; + } + else + { + // [batch_size, seq_length, input_size] + px = (float *)x->datas + batch_size * seq_idx * input_size; + } + + // Calculate gates: Xt*(Wi^T) + Ht-1*(Ri^T) + Wbi + Rbi + for (b_idx = 0; b_idx < batch_size; b_idx++) + { + float * x_b = px + b_idx * input_size; + float * h_b = ph_t + b_idx * hidden_size; + float * c_b = pc_t + b_idx * hidden_size; + float * gates_b = gates + b_idx * hidden_size; + + // X * W^T for all gates (i, o, f, c) + for (i = 0; i < 4; i++) + { + float * g = gates_b + i * batch_size * hidden_size; + float * wi = pw + i * hidden_size * input_size; + + // Matrix multiplication X * W^T for this gate + for (j = 0; j < hidden_size; j++) + { + float sum = 0.0f; + for (int k = 0; k < input_size; k++) + { + sum += x_b[k] * wi[j * input_size + k]; + } + g[j] = sum; + } + + // Add bias if available + if (b) + { + float * bi = pbx + i * hidden_size; + for (j = 0; j < hidden_size; j++) + { + g[j] += bi[j]; + } + } + } + + // H * R^T for all gates (i, o, f, c) + for (i = 0; i < 4; i++) + { + float * g = gates_b + i * batch_size * hidden_size; + float * ri = pr + i * hidden_size * hidden_size; + + // Matrix multiplication H * R^T for this gate + for (j = 0; j < hidden_size; j++) + { + float sum = 0.0f; + for (int k = 0; k < hidden_size; k++) + { + sum += h_b[k] * ri[j * hidden_size + k]; + } + g[j] += sum; + } + + // Add bias if available + if (b) + { + float * bi = pbh + i * hidden_size; + for (j = 0; j < hidden_size; j++) + { + g[j] += bi[j]; + } + } + } + + // Add peephole connections if available + if (p->has_peephole) + { + // Input gate: + Pi (.) Ct-1 + for (j = 0; j < hidden_size; j++) + { + it[j] += ppi[j] * c_b[j]; + } + + // Forget gate: + Pf (.) Ct-1 + for (j = 0; j < hidden_size; j++) + { + ft[j] += ppf[j] * c_b[j]; + } + } + + // Apply activation functions and calculate cell & hidden states + // it = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Pi (.) Ct-1 + Wbi + Rbi) + // ft = f(Xt*(Wf^T) + Ht-1*(Rf^T) + Pf (.) Ct-1 + Wbf + Rbf) + // ct = g(Xt*(Wc^T) + Ht-1*(Rc^T) + Wbc + Rbc) + // Ct = ft (.) Ct-1 + it (.) ct + // ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Po (.) Ct + Wbo + Rbo) + // Ht = ot (.) h(Ct) + + // Apply activations and update cell state + for (j = 0; j < hidden_size; j++) + { + float i_val = sigmoid(it[j]); + float f_val = sigmoid(ft[j]); + float c_val = tanhf(ct[j]); + + // Update cell state: Ct = ft (.) Ct-1 + it (.) ct + c_b[j] = f_val * c_b[j] + i_val * c_val; + + // Apply clip if specified + if (p->clip > 0) + { + if (c_b[j] > p->clip) + c_b[j] = p->clip; + else if (c_b[j] < -p->clip) + c_b[j] = -p->clip; + } + } + + // Add peephole to output gate if available: Po (.) Ct + if (p->has_peephole) + { + for (j = 0; j < hidden_size; j++) + { + ot[j] += ppo[j] * c_b[j]; + } + } + + // Update hidden state: Ht = ot (.) h(Ct) + for (j = 0; j < hidden_size; j++) + { + float o_val = sigmoid(ot[j]); + h_b[j] = o_val * tanhf(c_b[j]); + } + + // Store the results to output tensors if needed + if (y) + { + if (layout == 0) + { + // Y shape is [seq_length, num_directions, batch_size, hidden_size] + py = (float *)y->datas + + seq_idx * num_directions * batch_size * hidden_size + + d * batch_size * hidden_size + + b_idx * hidden_size; + } + else + { + // Y shape is [batch_size, seq_length, num_directions, hidden_size] + py = (float *)y->datas + + b_idx * seq_length * num_directions * hidden_size + + seq_idx * num_directions * hidden_size + + d * hidden_size; + } + + memcpy(py, h_b, hidden_size * sizeof(float)); + } + } + } + + // Store final hidden and cell states + if (y_h) + { + if (layout == 0) + { + // Y_h shape is [num_directions, batch_size, hidden_size] + py_h = (float *)y_h->datas + d * batch_size * hidden_size; + memcpy(py_h, ph_t, batch_size * hidden_size * sizeof(float)); + } + else + { + // Y_h shape is [batch_size, num_directions, hidden_size] + for (b_idx = 0; b_idx < batch_size; b_idx++) + { + py_h = (float *)y_h->datas + b_idx * num_directions * hidden_size + d * hidden_size; + memcpy(py_h, ph_t + b_idx * hidden_size, hidden_size * sizeof(float)); + } + } + } + + if (y_c) + { + if (layout == 0) + { + // Y_c shape is [num_directions, batch_size, hidden_size] + py_c = (float *)y_c->datas + d * batch_size * hidden_size; + memcpy(py_c, pc_t, batch_size * hidden_size * sizeof(float)); + } + else + { + // Y_c shape is [batch_size, num_directions, hidden_size] + for (b_idx = 0; b_idx < batch_size; b_idx++) + { + py_c = (float *)y_c->datas + b_idx * num_directions * hidden_size + d * hidden_size; + memcpy(py_c, pc_t + b_idx * hidden_size, hidden_size * sizeof(float)); + } + } + } + } + + // Free allocated memory + free(gates); + free(ph_t); + free(pc_t); +} void resolver_default_op_LSTM(struct onnx_node_t * n) { - if(n->opset >= 14) - { - } - else if(n->opset >= 7) - { - } - else if(n->opset >= 1) - { - } + if(n->opset >= 14) + { + } + else if(n->opset >= 7) + { + switch(n->inputs[0]->type) + { + case ONNX_TENSOR_TYPE_FLOAT32: + n->init = LSTM_init; + n->exit = LSTM_exit; + n->reshape = LSTM_reshape; + n->operator_ = LSTM_float32; + break; + default: + break; + } + } + else if(n->opset >= 1) + { + // Handle older opsets if needed + } } diff --git a/modules/fnxext/onnx_engine/src/default/LeakyRelu.c b/modules/fnxext/onnx_engine/src/default/LeakyRelu.c index 3d463978fc6..947d0c46b41 100644 --- a/modules/fnxext/onnx_engine/src/default/LeakyRelu.c +++ b/modules/fnxext/onnx_engine/src/default/LeakyRelu.c @@ -10,7 +10,7 @@ static int LeakyRelu_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->alpha = onnx_attribute_read_float(n, "alpha", 0.01); @@ -26,7 +26,7 @@ static int LeakyRelu_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/LogSoftmax.c b/modules/fnxext/onnx_engine/src/default/LogSoftmax.c index 288ba37beea..e044da56271 100644 --- a/modules/fnxext/onnx_engine/src/default/LogSoftmax.c +++ b/modules/fnxext/onnx_engine/src/default/LogSoftmax.c @@ -16,7 +16,7 @@ static int LogSoftmax_13_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_13_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_13_pdata_t)); if(pdat) { pdat->axis = onnx_attribute_read_int(n, "axis", -1); @@ -32,7 +32,7 @@ static int LogSoftmax_13_exit(struct onnx_node_t * n) struct operator_13_pdata_t * pdat = (struct operator_13_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } @@ -239,7 +239,7 @@ static int LogSoftmax_1_11_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_1_11_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_1_11_pdata_t)); if(pdat) { pdat->axis = onnx_attribute_read_int(n, "axis", 1); @@ -255,7 +255,7 @@ static int LogSoftmax_1_11_exit(struct onnx_node_t * n) struct operator_1_11_pdata_t * pdat = (struct operator_1_11_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/MatMul.c b/modules/fnxext/onnx_engine/src/default/MatMul.c index b9e8c0019ed..5234a993462 100644 --- a/modules/fnxext/onnx_engine/src/default/MatMul.c +++ b/modules/fnxext/onnx_engine/src/default/MatMul.c @@ -1,4 +1,5 @@ #include "../onnx.h" +#include "../matrix.h" struct operator_pdata_t { int m; @@ -12,7 +13,7 @@ static int MatMul_init(struct onnx_node_t * n) if((n->ninput == 2) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->m = 0; @@ -30,7 +31,7 @@ static int MatMul_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } @@ -65,7 +66,7 @@ static int MatMul_reshape(struct onnx_node_t * n) bdims = b->dims; bndim = b->ndim; } - int ndim = maxx(andim, bndim); + int ndim = XMAX(andim, bndim); int dims[ndim]; if(andim < 2 || bndim < 2) return 0; @@ -79,7 +80,7 @@ static int MatMul_reshape(struct onnx_node_t * n) int blen = (bndim - i) < 0 ? 1 : bdims[bndim - i]; if(alen != blen && alen > 1 && blen > 1) return 0; - dims[ndim - i] = maxx(alen, blen); + dims[ndim - i] = XMAX(alen, blen); } pdat->m = adims[andim - 2]; pdat->n = bdims[bndim - 1]; @@ -96,22 +97,12 @@ static void MatMul_int32(struct onnx_node_t * n) int32_t * py = (int32_t *)y->datas; int32_t * pa; int32_t * pb; - int32_t sum; for(size_t i = 0, l = y->ndata; i < l; i += pdat->m * pdat->n) { pa = onnx_tensor_broadcast_map_address(a, y, i); pb = onnx_tensor_broadcast_map_address(b, y, i); - for(int u = 0; u < pdat->m; u++) - { - for(int v = 0; v < pdat->n; v++) - { - sum = 0; - for(int w = 0; w < pdat->k; w++) - sum += pa[u * pdat->k + w] * pb[w * pdat->n + v]; - py[i + u * pdat->n + v] = sum; - } - } + matrix_mul(pdat->m, pdat->n, pdat->k, pa, pb, &py[i], ONNX_TENSOR_TYPE_INT32); } } @@ -124,22 +115,12 @@ static void MatMul_int64(struct onnx_node_t * n) int64_t * py = (int64_t *)y->datas; int64_t * pa; int64_t * pb; - int64_t sum; for(size_t i = 0, l = y->ndata; i < l; i += pdat->m * pdat->n) { pa = onnx_tensor_broadcast_map_address(a, y, i); pb = onnx_tensor_broadcast_map_address(b, y, i); - for(int u = 0; u < pdat->m; u++) - { - for(int v = 0; v < pdat->n; v++) - { - sum = 0; - for(int w = 0; w < pdat->k; w++) - sum += pa[u * pdat->k + w] * pb[w * pdat->n + v]; - py[i + u * pdat->n + v] = sum; - } - } + matrix_mul(pdat->m, pdat->n, pdat->k, pa, pb, &py[i], ONNX_TENSOR_TYPE_INT64); } } @@ -152,22 +133,12 @@ static void MatMul_uint32(struct onnx_node_t * n) uint32_t * py = (uint32_t *)y->datas; uint32_t * pa; uint32_t * pb; - uint32_t sum; for(size_t i = 0, l = y->ndata; i < l; i += pdat->m * pdat->n) { pa = onnx_tensor_broadcast_map_address(a, y, i); pb = onnx_tensor_broadcast_map_address(b, y, i); - for(int u = 0; u < pdat->m; u++) - { - for(int v = 0; v < pdat->n; v++) - { - sum = 0; - for(int w = 0; w < pdat->k; w++) - sum += pa[u * pdat->k + w] * pb[w * pdat->n + v]; - py[i + u * pdat->n + v] = sum; - } - } + matrix_mul(pdat->m, pdat->n, pdat->k, pa, pb, &py[i], ONNX_TENSOR_TYPE_UINT32); } } @@ -180,22 +151,12 @@ static void MatMul_uint64(struct onnx_node_t * n) uint64_t * py = (uint64_t *)y->datas; uint64_t * pa; uint64_t * pb; - uint64_t sum; for(size_t i = 0, l = y->ndata; i < l; i += pdat->m * pdat->n) { pa = onnx_tensor_broadcast_map_address(a, y, i); pb = onnx_tensor_broadcast_map_address(b, y, i); - for(int u = 0; u < pdat->m; u++) - { - for(int v = 0; v < pdat->n; v++) - { - sum = 0; - for(int w = 0; w < pdat->k; w++) - sum += pa[u * pdat->k + w] * pb[w * pdat->n + v]; - py[i + u * pdat->n + v] = sum; - } - } + matrix_mul(pdat->m, pdat->n, pdat->k, pa, pb, &py[i], ONNX_TENSOR_TYPE_UINT64); } } @@ -208,22 +169,12 @@ static void MatMul_bfloat16(struct onnx_node_t * n) uint16_t * py = (uint16_t *)y->datas; uint16_t * pa; uint16_t * pb; - float sum; for(size_t i = 0, l = y->ndata; i < l; i += pdat->m * pdat->n) { pa = onnx_tensor_broadcast_map_address(a, y, i); pb = onnx_tensor_broadcast_map_address(b, y, i); - for(int u = 0; u < pdat->m; u++) - { - for(int v = 0; v < pdat->n; v++) - { - sum = 0; - for(int w = 0; w < pdat->k; w++) - sum += bfloat16_to_float32(pa[u * pdat->k + w]) * bfloat16_to_float32(pb[w * pdat->n + v]); - py[i + u * pdat->n + v] = float32_to_bfloat16(sum); - } - } + matrix_mul(pdat->m, pdat->n, pdat->k, pa, pb, &py[i], ONNX_TENSOR_TYPE_BFLOAT16); } } @@ -236,22 +187,12 @@ static void MatMul_float16(struct onnx_node_t * n) uint16_t * py = (uint16_t *)y->datas; uint16_t * pa; uint16_t * pb; - float sum; for(size_t i = 0, l = y->ndata; i < l; i += pdat->m * pdat->n) { pa = onnx_tensor_broadcast_map_address(a, y, i); pb = onnx_tensor_broadcast_map_address(b, y, i); - for(int u = 0; u < pdat->m; u++) - { - for(int v = 0; v < pdat->n; v++) - { - sum = 0; - for(int w = 0; w < pdat->k; w++) - sum += float16_to_float32(pa[u * pdat->k + w]) * float16_to_float32(pb[w * pdat->n + v]); - py[i + u * pdat->n + v] = float32_to_float16(sum); - } - } + matrix_mul(pdat->m, pdat->n, pdat->k, pa, pb, &py[i], ONNX_TENSOR_TYPE_FLOAT16); } } @@ -264,22 +205,12 @@ static void MatMul_float32(struct onnx_node_t * n) float * py = (float *)y->datas; float * pa; float * pb; - float sum; for(size_t i = 0, l = y->ndata; i < l; i += pdat->m * pdat->n) { pa = onnx_tensor_broadcast_map_address(a, y, i); pb = onnx_tensor_broadcast_map_address(b, y, i); - for(int u = 0; u < pdat->m; u++) - { - for(int v = 0; v < pdat->n; v++) - { - sum = 0; - for(int w = 0; w < pdat->k; w++) - sum += pa[u * pdat->k + w] * pb[w * pdat->n + v]; - py[i + u * pdat->n + v] = sum; - } - } + matrix_mul(pdat->m, pdat->n, pdat->k, pa, pb, &py[i], ONNX_TENSOR_TYPE_FLOAT32); } } @@ -292,22 +223,12 @@ static void MatMul_float64(struct onnx_node_t * n) double * py = (double *)y->datas; double * pa; double * pb; - double sum; for(size_t i = 0, l = y->ndata; i < l; i += pdat->m * pdat->n) { pa = onnx_tensor_broadcast_map_address(a, y, i); pb = onnx_tensor_broadcast_map_address(b, y, i); - for(int u = 0; u < pdat->m; u++) - { - for(int v = 0; v < pdat->n; v++) - { - sum = 0; - for(int w = 0; w < pdat->k; w++) - sum += pa[u * pdat->k + w] * pb[w * pdat->n + v]; - py[i + u * pdat->n + v] = sum; - } - } + matrix_mul(pdat->m, pdat->n, pdat->k, pa, pb, &py[i], ONNX_TENSOR_TYPE_FLOAT64); } } diff --git a/modules/fnxext/onnx_engine/src/default/MatMulInteger.c b/modules/fnxext/onnx_engine/src/default/MatMulInteger.c index 4f6287f42f5..e6df22180ef 100644 --- a/modules/fnxext/onnx_engine/src/default/MatMulInteger.c +++ b/modules/fnxext/onnx_engine/src/default/MatMulInteger.c @@ -1,8 +1,327 @@ #include "../onnx.h" +#include "../matrix.h" + +struct operator_pdata_t { + int m; + int n; + int k; + int8_t * a_zero_point; + int8_t * b_zero_point; + int has_a_zero_point; + int has_b_zero_point; +}; + +static int MatMulInteger_init(struct onnx_node_t * n) +{ + struct operator_pdata_t * pdat; + + if((n->ninput >= 2) && (n->ninput <= 4) && (n->noutput == 1)) + { + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); + if(pdat) + { + pdat->m = 0; + pdat->n = 0; + pdat->k = 0; + pdat->a_zero_point = NULL; + pdat->b_zero_point = NULL; + pdat->has_a_zero_point = (n->ninput >= 3) ? 1 : 0; + pdat->has_b_zero_point = (n->ninput >= 4) ? 1 : 0; + n->priv = pdat; + return 1; + } + } + return 0; +} + +static int MatMulInteger_exit(struct onnx_node_t * n) +{ + struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; + + if(pdat) + { + if(pdat->a_zero_point) + onnx_free(pdat->a_zero_point); + if(pdat->b_zero_point) + onnx_free(pdat->b_zero_point); + onnx_free(pdat); + } + return 1; +} + +static int MatMulInteger_reshape(struct onnx_node_t * n) +{ + struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + struct onnx_tensor_t * azp = pdat->has_a_zero_point ? n->inputs[2] : NULL; + struct onnx_tensor_t * bzp = pdat->has_b_zero_point ? n->inputs[3] : NULL; + int andim; + int * adims; + int bndim; + int * bdims; + + if(a->ndim == 1) + { + adims = (int[]){ 1, a->dims[0] }; + andim = 2; + } + else + { + adims = a->dims; + andim = a->ndim; + } + if(b->ndim == 1) + { + bdims = (int[]){ b->dims[0], 1 }; + bndim = 2; + } + else + { + bdims = b->dims; + bndim = b->ndim; + } + int ndim = XMAX(andim, bndim); + int dims[ndim]; + if(andim < 2 || bndim < 2) + return 0; + if(adims[andim - 1] != bdims[bndim - 2]) + return 0; + dims[ndim - 2] = adims[andim - 2]; + dims[ndim - 1] = bdims[bndim - 1]; + for(int i = 3; i <= ndim; i++) + { + int alen = (andim - i) < 0 ? 1 : adims[andim - i]; + int blen = (bndim - i) < 0 ? 1 : bdims[bndim - i]; + if(alen != blen && alen > 1 && blen > 1) + return 0; + dims[ndim - i] = XMAX(alen, blen); + } + pdat->m = adims[andim - 2]; + pdat->n = bdims[bndim - 1]; + pdat->k = adims[andim - 1]; + + // Handle zero points + if(pdat->a_zero_point) + { + onnx_free(pdat->a_zero_point); + pdat->a_zero_point = NULL; + } + if(pdat->b_zero_point) + { + onnx_free(pdat->b_zero_point); + pdat->b_zero_point = NULL; + } + + if(azp && azp->ndata > 0) + { + if(azp->type == ONNX_TENSOR_TYPE_INT8) + pdat->a_zero_point = onnx_malloc(sizeof(int8_t) * azp->ndata); + else // UINT8 + pdat->a_zero_point = onnx_malloc(sizeof(int8_t) * azp->ndata); + + if(pdat->a_zero_point) + { + if(azp->type == ONNX_TENSOR_TYPE_INT8) + memcpy(pdat->a_zero_point, azp->datas, sizeof(int8_t) * azp->ndata); + else // UINT8 + { + // Convert uint8_t to int8_t + uint8_t * p = (uint8_t *)azp->datas; + for(size_t i = 0; i < azp->ndata; i++) + pdat->a_zero_point[i] = (int8_t)(p[i]); + } + } + } + + if(bzp && bzp->ndata > 0) + { + if(bzp->type == ONNX_TENSOR_TYPE_INT8) + pdat->b_zero_point = onnx_malloc(sizeof(int8_t) * bzp->ndata); + else // UINT8 + pdat->b_zero_point = onnx_malloc(sizeof(int8_t) * bzp->ndata); + + if(pdat->b_zero_point) + { + if(bzp->type == ONNX_TENSOR_TYPE_INT8) + memcpy(pdat->b_zero_point, bzp->datas, sizeof(int8_t) * bzp->ndata); + else // UINT8 + { + // Convert uint8_t to int8_t + uint8_t * p = (uint8_t *)bzp->datas; + for(size_t i = 0; i < bzp->ndata; i++) + pdat->b_zero_point[i] = (int8_t)(p[i]); + } + } + } + + return onnx_tensor_reshape(y, dims, ndim, ONNX_TENSOR_TYPE_INT32); +} + +static void MatMulInteger_int8(struct onnx_node_t * n) +{ + struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + int32_t * py = (int32_t *)y->datas; + int8_t * pa; + int8_t * pb; + int8_t a_zp = pdat->a_zero_point ? pdat->a_zero_point[0] : 0; + int8_t b_zp = pdat->b_zero_point ? pdat->b_zero_point[0] : 0; + + for(size_t i = 0, l = y->ndata; i < l; i += pdat->m * pdat->n) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + + // MatMul with zero point handling + for(int u = 0; u < pdat->m; u++) + { + for(int v = 0; v < pdat->n; v++) + { + int32_t sum = 0; + for(int w = 0; w < pdat->k; w++) + { + sum += ((int32_t)pa[u * pdat->k + w] - a_zp) * ((int32_t)pb[w * pdat->n + v] - b_zp); + } + py[i + u * pdat->n + v] = sum; + } + } + } +} + +static void MatMulInteger_uint8(struct onnx_node_t * n) +{ + struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + int32_t * py = (int32_t *)y->datas; + uint8_t * pa; + uint8_t * pb; + uint8_t a_zp = pdat->a_zero_point ? (uint8_t)pdat->a_zero_point[0] : 0; + uint8_t b_zp = pdat->b_zero_point ? (uint8_t)pdat->b_zero_point[0] : 0; + + for(size_t i = 0, l = y->ndata; i < l; i += pdat->m * pdat->n) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + + // MatMul with zero point handling + for(int u = 0; u < pdat->m; u++) + { + for(int v = 0; v < pdat->n; v++) + { + int32_t sum = 0; + for(int w = 0; w < pdat->k; w++) + { + sum += ((int32_t)pa[u * pdat->k + w] - a_zp) * ((int32_t)pb[w * pdat->n + v] - b_zp); + } + py[i + u * pdat->n + v] = sum; + } + } + } +} + +static void MatMulInteger_int8_uint8(struct onnx_node_t * n) +{ + struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + int32_t * py = (int32_t *)y->datas; + int8_t * pa; + uint8_t * pb; + int8_t a_zp = pdat->a_zero_point ? pdat->a_zero_point[0] : 0; + uint8_t b_zp = pdat->b_zero_point ? (uint8_t)pdat->b_zero_point[0] : 0; + + for(size_t i = 0, l = y->ndata; i < l; i += pdat->m * pdat->n) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + + // MatMul with zero point handling + for(int u = 0; u < pdat->m; u++) + { + for(int v = 0; v < pdat->n; v++) + { + int32_t sum = 0; + for(int w = 0; w < pdat->k; w++) + { + sum += ((int32_t)pa[u * pdat->k + w] - a_zp) * ((int32_t)pb[w * pdat->n + v] - b_zp); + } + py[i + u * pdat->n + v] = sum; + } + } + } +} + +static void MatMulInteger_uint8_int8(struct onnx_node_t * n) +{ + struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * a = n->inputs[0]; + struct onnx_tensor_t * b = n->inputs[1]; + int32_t * py = (int32_t *)y->datas; + uint8_t * pa; + int8_t * pb; + uint8_t a_zp = pdat->a_zero_point ? (uint8_t)pdat->a_zero_point[0] : 0; + int8_t b_zp = pdat->b_zero_point ? pdat->b_zero_point[0] : 0; + + for(size_t i = 0, l = y->ndata; i < l; i += pdat->m * pdat->n) + { + pa = onnx_tensor_broadcast_map_address(a, y, i); + pb = onnx_tensor_broadcast_map_address(b, y, i); + + // MatMul with zero point handling + for(int u = 0; u < pdat->m; u++) + { + for(int v = 0; v < pdat->n; v++) + { + int32_t sum = 0; + for(int w = 0; w < pdat->k; w++) + { + sum += ((int32_t)pa[u * pdat->k + w] - a_zp) * ((int32_t)pb[w * pdat->n + v] - b_zp); + } + py[i + u * pdat->n + v] = sum; + } + } + } +} void resolver_default_op_MatMulInteger(struct onnx_node_t * n) { if(n->opset >= 10) { + if((n->inputs[0]->type == ONNX_TENSOR_TYPE_INT8) && (n->inputs[1]->type == ONNX_TENSOR_TYPE_INT8)) + { + n->init = MatMulInteger_init; + n->exit = MatMulInteger_exit; + n->reshape = MatMulInteger_reshape; + n->operator_ = MatMulInteger_int8; + } + else if((n->inputs[0]->type == ONNX_TENSOR_TYPE_UINT8) && (n->inputs[1]->type == ONNX_TENSOR_TYPE_UINT8)) + { + n->init = MatMulInteger_init; + n->exit = MatMulInteger_exit; + n->reshape = MatMulInteger_reshape; + n->operator_ = MatMulInteger_uint8; + } + else if((n->inputs[0]->type == ONNX_TENSOR_TYPE_INT8) && (n->inputs[1]->type == ONNX_TENSOR_TYPE_UINT8)) + { + n->init = MatMulInteger_init; + n->exit = MatMulInteger_exit; + n->reshape = MatMulInteger_reshape; + n->operator_ = MatMulInteger_int8_uint8; + } + else if((n->inputs[0]->type == ONNX_TENSOR_TYPE_UINT8) && (n->inputs[1]->type == ONNX_TENSOR_TYPE_INT8)) + { + n->init = MatMulInteger_init; + n->exit = MatMulInteger_exit; + n->reshape = MatMulInteger_reshape; + n->operator_ = MatMulInteger_uint8_int8; + } } -} +} \ No newline at end of file diff --git a/modules/fnxext/onnx_engine/src/default/MaxPool.c b/modules/fnxext/onnx_engine/src/default/MaxPool.c index 6ea41284fc5..b4b1f78d2f3 100644 --- a/modules/fnxext/onnx_engine/src/default/MaxPool.c +++ b/modules/fnxext/onnx_engine/src/default/MaxPool.c @@ -31,10 +31,10 @@ static int MaxPool_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput >= 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { - memset(pdat, 0, sizeof(struct operator_pdata_t)); + onnx_memset(pdat, 0, sizeof(struct operator_pdata_t)); switch(shash(onnx_attribute_read_string(n, "auto_pad", "NOTSET"))) { case 0xc3966fc2: /* "NOTSET" */ @@ -58,12 +58,12 @@ static int MaxPool_init(struct onnx_node_t * n) pdat->nkernel = onnx_attribute_read_ints(n, "kernel_shape", &ints); if(pdat->nkernel > 0) { - pdat->kernels = malloc(sizeof(int) * pdat->nkernel); + pdat->kernels = onnx_malloc(sizeof(int) * pdat->nkernel); for(i = 0; i < pdat->nkernel; i++) pdat->kernels[i] = ints[i]; } pdat->ndilation = pdat->nkernel; - pdat->dilations = malloc(sizeof(int) * pdat->ndilation); + pdat->dilations = onnx_malloc(sizeof(int) * pdat->ndilation); if(pdat->dilations) { l = onnx_attribute_read_ints(n, "dilations", &ints); @@ -73,7 +73,7 @@ static int MaxPool_init(struct onnx_node_t * n) pdat->dilations[i] = 1; } pdat->npad = pdat->nkernel * 2; - pdat->pads = malloc(sizeof(int) * pdat->npad); + pdat->pads = onnx_malloc(sizeof(int) * pdat->npad); if(pdat->pads) { l = onnx_attribute_read_ints(n, "pads", &ints); @@ -83,7 +83,7 @@ static int MaxPool_init(struct onnx_node_t * n) pdat->pads[i] = 0; } pdat->nstride = pdat->nkernel; - pdat->strides = malloc(sizeof(int) * pdat->nstride); + pdat->strides = onnx_malloc(sizeof(int) * pdat->nstride); if(pdat->strides) { l = onnx_attribute_read_ints(n, "strides", &ints); @@ -106,14 +106,14 @@ static int MaxPool_exit(struct onnx_node_t * n) if(pdat) { if(pdat->kernels) - free(pdat->kernels); + onnx_free(pdat->kernels); if(pdat->dilations) - free(pdat->dilations); + onnx_free(pdat->dilations); if(pdat->pads) - free(pdat->pads); + onnx_free(pdat->pads); if(pdat->strides) - free(pdat->strides); - free(pdat); + onnx_free(pdat->strides); + onnx_free(pdat); } return 1; } @@ -131,7 +131,7 @@ static int MaxPool_reshape(struct onnx_node_t * n) switch(pdat->auto_pad) { case AUTO_PAD_NOTSET: - memcpy(pdat->cpads, pdat->pads, sizeof(int) * pdat->npad); + onnx_memcpy(pdat->cpads, pdat->pads, sizeof(int) * pdat->npad); break; case AUTO_PAD_SAME_UPPER: for(i = 0; i < pdat->npad / 2; i++) @@ -150,7 +150,7 @@ static int MaxPool_reshape(struct onnx_node_t * n) } break; case AUTO_PAD_VALID: - memset(pdat->cpads, 0, sizeof(int) * pdat->npad); + onnx_memset(pdat->cpads, 0, sizeof(int) * pdat->npad); break; default: break; @@ -227,12 +227,12 @@ static void MaxPool_int8(struct onnx_node_t * n) int b_dim[x->ndim]; int i; - memset(o_dim, 0, sizeof(o_dim)); + onnx_memset(o_dim, 0, sizeof(o_dim)); do { for(i = 2; i < x->ndim; ++i) b_dim[i] = o_dim[i] * pdat->strides[i - 2] - pdat->cpads[i - 2]; maxv = INT8_MIN; - memset(k_dim, 0, sizeof(k_dim)); + onnx_memset(k_dim, 0, sizeof(k_dim)); do { i_dim[0] = o_dim[0]; i_dim[1] = o_dim[1]; @@ -246,7 +246,7 @@ static void MaxPool_int8(struct onnx_node_t * n) if(i >= x->ndim) { v = px[dim_offset(x->ndim, i_dim, x->dims)]; - maxv = maxx(v, maxv); + maxv = XMAX(v, maxv); } } while(dim_next(x->ndim - 2, k_dim, pdat->kernels)); py[dim_offset(x->ndim, o_dim, y->dims)] = maxv; @@ -267,12 +267,12 @@ static void MaxPool_uint8(struct onnx_node_t * n) int b_dim[x->ndim]; int i; - memset(o_dim, 0, sizeof(o_dim)); + onnx_memset(o_dim, 0, sizeof(o_dim)); do { for(i = 2; i < x->ndim; ++i) b_dim[i] = o_dim[i] * pdat->strides[i - 2] - pdat->cpads[i - 2]; maxv = 0; - memset(k_dim, 0, sizeof(k_dim)); + onnx_memset(k_dim, 0, sizeof(k_dim)); do { i_dim[0] = o_dim[0]; i_dim[1] = o_dim[1]; @@ -286,7 +286,7 @@ static void MaxPool_uint8(struct onnx_node_t * n) if(i >= x->ndim) { v = px[dim_offset(x->ndim, i_dim, x->dims)]; - maxv = maxx(v, maxv); + maxv = XMAX(v, maxv); } } while(dim_next(x->ndim - 2, k_dim, pdat->kernels)); py[dim_offset(x->ndim, o_dim, y->dims)] = maxv; @@ -307,12 +307,12 @@ static void MaxPool_float16(struct onnx_node_t * n) int b_dim[x->ndim]; int i; - memset(o_dim, 0, sizeof(o_dim)); + onnx_memset(o_dim, 0, sizeof(o_dim)); do { for(i = 2; i < x->ndim; ++i) b_dim[i] = o_dim[i] * pdat->strides[i - 2] - pdat->cpads[i - 2]; maxv = -FLT_MAX; - memset(k_dim, 0, sizeof(k_dim)); + onnx_memset(k_dim, 0, sizeof(k_dim)); do { i_dim[0] = o_dim[0]; i_dim[1] = o_dim[1]; @@ -347,12 +347,12 @@ static void MaxPool_float32(struct onnx_node_t * n) int b_dim[x->ndim]; int i; - memset(o_dim, 0, sizeof(o_dim)); + onnx_memset(o_dim, 0, sizeof(o_dim)); do { for(i = 2; i < x->ndim; ++i) b_dim[i] = o_dim[i] * pdat->strides[i - 2] - pdat->cpads[i - 2]; maxv = -FLT_MAX; - memset(k_dim, 0, sizeof(k_dim)); + onnx_memset(k_dim, 0, sizeof(k_dim)); do { i_dim[0] = o_dim[0]; i_dim[1] = o_dim[1]; @@ -387,12 +387,12 @@ static void MaxPool_float64(struct onnx_node_t * n) int b_dim[x->ndim]; int i; - memset(o_dim, 0, sizeof(o_dim)); + onnx_memset(o_dim, 0, sizeof(o_dim)); do { for(i = 2; i < x->ndim; ++i) b_dim[i] = o_dim[i] * pdat->strides[i - 2] - pdat->cpads[i - 2]; maxv = -DBL_MAX; - memset(k_dim, 0, sizeof(k_dim)); + onnx_memset(k_dim, 0, sizeof(k_dim)); do { i_dim[0] = o_dim[0]; i_dim[1] = o_dim[1]; diff --git a/modules/fnxext/onnx_engine/src/default/Mod.c b/modules/fnxext/onnx_engine/src/default/Mod.c index 5da0f108fe4..2f065b369ab 100644 --- a/modules/fnxext/onnx_engine/src/default/Mod.c +++ b/modules/fnxext/onnx_engine/src/default/Mod.c @@ -10,7 +10,7 @@ static int Mod_init(struct onnx_node_t * n) if((n->ninput == 2) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->fmod = onnx_attribute_read_int(n, "fmod", 0); @@ -26,7 +26,7 @@ static int Mod_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/Multinomial.c b/modules/fnxext/onnx_engine/src/default/Multinomial.c index 2a7c6576a80..3d97011f489 100644 --- a/modules/fnxext/onnx_engine/src/default/Multinomial.c +++ b/modules/fnxext/onnx_engine/src/default/Multinomial.c @@ -12,7 +12,7 @@ static int Multinomial_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->dtype = (enum onnx_tensor_type_t)onnx_attribute_read_int(n, "dtype", 6); @@ -30,7 +30,7 @@ static int Multinomial_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/RandomNormal.c b/modules/fnxext/onnx_engine/src/default/RandomNormal.c index 343bd97a100..d73a5da829b 100644 --- a/modules/fnxext/onnx_engine/src/default/RandomNormal.c +++ b/modules/fnxext/onnx_engine/src/default/RandomNormal.c @@ -17,11 +17,11 @@ static int RandomNormal_init(struct onnx_node_t * n) if(n->noutput == 1) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->nshape = onnx_attribute_read_ints(n, "shape", &ints); - if((pdat->nshape > 0) && (pdat->shape = malloc(sizeof(int) * pdat->nshape))) + if((pdat->nshape > 0) && (pdat->shape = onnx_malloc(sizeof(int) * pdat->nshape))) { pdat->dtype = (enum onnx_tensor_type_t)onnx_attribute_read_int(n, "dtype", 1); pdat->mean = onnx_attribute_read_float(n, "mean", 0.0); @@ -34,7 +34,7 @@ static int RandomNormal_init(struct onnx_node_t * n) } else { - free(pdat); + onnx_free(pdat); return 0; } } @@ -49,8 +49,8 @@ static int RandomNormal_exit(struct onnx_node_t * n) if(pdat) { if(pdat->shape) - free(pdat->shape); - free(pdat); + onnx_free(pdat->shape); + onnx_free(pdat); } return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/RandomNormalLike.c b/modules/fnxext/onnx_engine/src/default/RandomNormalLike.c index 29e03fc4284..3937e2c693a 100644 --- a/modules/fnxext/onnx_engine/src/default/RandomNormalLike.c +++ b/modules/fnxext/onnx_engine/src/default/RandomNormalLike.c @@ -13,7 +13,7 @@ static int RandomNormalLike_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->dtype = (enum onnx_tensor_type_t)onnx_attribute_read_int(n, "dtype", 0); @@ -32,7 +32,7 @@ static int RandomNormalLike_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/RandomUniform.c b/modules/fnxext/onnx_engine/src/default/RandomUniform.c index 3984717b535..e510db90ef1 100644 --- a/modules/fnxext/onnx_engine/src/default/RandomUniform.c +++ b/modules/fnxext/onnx_engine/src/default/RandomUniform.c @@ -17,11 +17,11 @@ static int RandomUniform_init(struct onnx_node_t * n) if(n->noutput == 1) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->nshape = onnx_attribute_read_ints(n, "shape", &ints); - if((pdat->nshape > 0) && (pdat->shape = malloc(sizeof(int) * pdat->nshape))) + if((pdat->nshape > 0) && (pdat->shape = onnx_malloc(sizeof(int) * pdat->nshape))) { pdat->dtype = (enum onnx_tensor_type_t)onnx_attribute_read_int(n, "dtype", 1); pdat->high = onnx_attribute_read_float(n, "high", 1.0); @@ -34,7 +34,7 @@ static int RandomUniform_init(struct onnx_node_t * n) } else { - free(pdat); + onnx_free(pdat); return 0; } } @@ -49,8 +49,8 @@ static int RandomUniform_exit(struct onnx_node_t * n) if(pdat) { if(pdat->shape) - free(pdat->shape); - free(pdat); + onnx_free(pdat->shape); + onnx_free(pdat); } return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/RandomUniformLike.c b/modules/fnxext/onnx_engine/src/default/RandomUniformLike.c index 54248744d01..93a5f6614eb 100644 --- a/modules/fnxext/onnx_engine/src/default/RandomUniformLike.c +++ b/modules/fnxext/onnx_engine/src/default/RandomUniformLike.c @@ -13,7 +13,7 @@ static int RandomUniformLike_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->dtype = (enum onnx_tensor_type_t)onnx_attribute_read_int(n, "dtype", 0); @@ -32,7 +32,7 @@ static int RandomUniformLike_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/Range.c b/modules/fnxext/onnx_engine/src/default/Range.c index 8506254f39b..c8ecff87203 100644 --- a/modules/fnxext/onnx_engine/src/default/Range.c +++ b/modules/fnxext/onnx_engine/src/default/Range.c @@ -12,7 +12,7 @@ static int Range_init(struct onnx_node_t * n) if((n->ninput == 3) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->start = 0; @@ -30,7 +30,7 @@ static int Range_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/ReduceL1.c b/modules/fnxext/onnx_engine/src/default/ReduceL1.c index de4822ea22b..b0f7ae2a65e 100644 --- a/modules/fnxext/onnx_engine/src/default/ReduceL1.c +++ b/modules/fnxext/onnx_engine/src/default/ReduceL1.c @@ -17,7 +17,7 @@ static int ReduceL1_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { nint = onnx_attribute_read_ints(n, "axes", &ints); @@ -25,8 +25,8 @@ static int ReduceL1_init(struct onnx_node_t * n) pdat->naxes = nint; else pdat->naxes = n->inputs[0]->ndim; - pdat->axes = malloc(sizeof(int) * pdat->naxes); - pdat->caxes = malloc(sizeof(int) * pdat->naxes); + pdat->axes = onnx_malloc(sizeof(int) * pdat->naxes); + pdat->caxes = onnx_malloc(sizeof(int) * pdat->naxes); if(pdat->axes && pdat->caxes) { if(nint > 0) @@ -46,10 +46,10 @@ static int ReduceL1_init(struct onnx_node_t * n) else { if(pdat->axes) - free(pdat->axes); + onnx_free(pdat->axes); if(pdat->caxes) - free(pdat->caxes); - free(pdat); + onnx_free(pdat->caxes); + onnx_free(pdat); } } } @@ -63,10 +63,10 @@ static int ReduceL1_exit(struct onnx_node_t * n) if(pdat) { if(pdat->axes) - free(pdat->axes); + onnx_free(pdat->axes); if(pdat->caxes) - free(pdat->caxes); - free(pdat); + onnx_free(pdat->caxes); + onnx_free(pdat); } return 1; } @@ -92,7 +92,7 @@ static int ReduceL1_reshape(struct onnx_node_t * n) } if(pdat->keepdims) { - memcpy(dims, x->dims, sizeof(int) * ndim); + onnx_memcpy(dims, x->dims, sizeof(int) * ndim); for(i = 0; i < pdat->naxes; i++) dims[pdat->caxes[i]] = 1; } @@ -179,10 +179,10 @@ static void ReduceL1_int8(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -227,10 +227,10 @@ static void ReduceL1_int32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -275,10 +275,10 @@ static void ReduceL1_int64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -323,10 +323,10 @@ static void ReduceL1_uint8(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -371,10 +371,10 @@ static void ReduceL1_uint32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -419,10 +419,10 @@ static void ReduceL1_uint64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -467,10 +467,10 @@ static void ReduceL1_bfloat16(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -515,10 +515,10 @@ static void ReduceL1_float16(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -563,10 +563,10 @@ static void ReduceL1_float32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -611,10 +611,10 @@ static void ReduceL1_float64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do diff --git a/modules/fnxext/onnx_engine/src/default/ReduceL2.c b/modules/fnxext/onnx_engine/src/default/ReduceL2.c index c7efad36c2f..20efea3368f 100644 --- a/modules/fnxext/onnx_engine/src/default/ReduceL2.c +++ b/modules/fnxext/onnx_engine/src/default/ReduceL2.c @@ -17,7 +17,7 @@ static int ReduceL2_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { nint = onnx_attribute_read_ints(n, "axes", &ints); @@ -25,8 +25,8 @@ static int ReduceL2_init(struct onnx_node_t * n) pdat->naxes = nint; else pdat->naxes = n->inputs[0]->ndim; - pdat->axes = malloc(sizeof(int) * pdat->naxes); - pdat->caxes = malloc(sizeof(int) * pdat->naxes); + pdat->axes = onnx_malloc(sizeof(int) * pdat->naxes); + pdat->caxes = onnx_malloc(sizeof(int) * pdat->naxes); if(pdat->axes && pdat->caxes) { if(nint > 0) @@ -46,10 +46,10 @@ static int ReduceL2_init(struct onnx_node_t * n) else { if(pdat->axes) - free(pdat->axes); + onnx_free(pdat->axes); if(pdat->caxes) - free(pdat->caxes); - free(pdat); + onnx_free(pdat->caxes); + onnx_free(pdat); } } } @@ -63,10 +63,10 @@ static int ReduceL2_exit(struct onnx_node_t * n) if(pdat) { if(pdat->axes) - free(pdat->axes); + onnx_free(pdat->axes); if(pdat->caxes) - free(pdat->caxes); - free(pdat); + onnx_free(pdat->caxes); + onnx_free(pdat); } return 1; } @@ -92,7 +92,7 @@ static int ReduceL2_reshape(struct onnx_node_t * n) } if(pdat->keepdims) { - memcpy(dims, x->dims, sizeof(int) * ndim); + onnx_memcpy(dims, x->dims, sizeof(int) * ndim); for(i = 0; i < pdat->naxes; i++) dims[pdat->caxes[i]] = 1; } @@ -180,10 +180,10 @@ static void ReduceL2_int8(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -230,10 +230,10 @@ static void ReduceL2_int32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -280,10 +280,10 @@ static void ReduceL2_int64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -330,10 +330,10 @@ static void ReduceL2_uint8(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -380,10 +380,10 @@ static void ReduceL2_uint32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -430,10 +430,10 @@ static void ReduceL2_uint64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -479,10 +479,10 @@ static void ReduceL2_bfloat16(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -528,10 +528,10 @@ static void ReduceL2_float16(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -577,10 +577,10 @@ static void ReduceL2_float32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -626,10 +626,10 @@ static void ReduceL2_float64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do diff --git a/modules/fnxext/onnx_engine/src/default/ReduceLogSum.c b/modules/fnxext/onnx_engine/src/default/ReduceLogSum.c index f5f03817904..9c7ea576bef 100644 --- a/modules/fnxext/onnx_engine/src/default/ReduceLogSum.c +++ b/modules/fnxext/onnx_engine/src/default/ReduceLogSum.c @@ -17,7 +17,7 @@ static int ReduceLogSum_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { nint = onnx_attribute_read_ints(n, "axes", &ints); @@ -25,8 +25,8 @@ static int ReduceLogSum_init(struct onnx_node_t * n) pdat->naxes = nint; else pdat->naxes = n->inputs[0]->ndim; - pdat->axes = malloc(sizeof(int) * pdat->naxes); - pdat->caxes = malloc(sizeof(int) * pdat->naxes); + pdat->axes = onnx_malloc(sizeof(int) * pdat->naxes); + pdat->caxes = onnx_malloc(sizeof(int) * pdat->naxes); if(pdat->axes && pdat->caxes) { if(nint > 0) @@ -46,10 +46,10 @@ static int ReduceLogSum_init(struct onnx_node_t * n) else { if(pdat->axes) - free(pdat->axes); + onnx_free(pdat->axes); if(pdat->caxes) - free(pdat->caxes); - free(pdat); + onnx_free(pdat->caxes); + onnx_free(pdat); } } } @@ -63,10 +63,10 @@ static int ReduceLogSum_exit(struct onnx_node_t * n) if(pdat) { if(pdat->axes) - free(pdat->axes); + onnx_free(pdat->axes); if(pdat->caxes) - free(pdat->caxes); - free(pdat); + onnx_free(pdat->caxes); + onnx_free(pdat); } return 1; } @@ -92,7 +92,7 @@ static int ReduceLogSum_reshape(struct onnx_node_t * n) } if(pdat->keepdims) { - memcpy(dims, x->dims, sizeof(int) * ndim); + onnx_memcpy(dims, x->dims, sizeof(int) * ndim); for(i = 0; i < pdat->naxes; i++) dims[pdat->caxes[i]] = 1; } @@ -179,10 +179,10 @@ static void ReduceLogSum_int8(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -227,10 +227,10 @@ static void ReduceLogSum_int32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -275,10 +275,10 @@ static void ReduceLogSum_int64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -323,10 +323,10 @@ static void ReduceLogSum_uint8(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -371,10 +371,10 @@ static void ReduceLogSum_uint32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -419,10 +419,10 @@ static void ReduceLogSum_uint64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -467,10 +467,10 @@ static void ReduceLogSum_bfloat16(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -515,10 +515,10 @@ static void ReduceLogSum_float16(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -563,10 +563,10 @@ static void ReduceLogSum_float32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -611,10 +611,10 @@ static void ReduceLogSum_float64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do diff --git a/modules/fnxext/onnx_engine/src/default/ReduceLogSumExp.c b/modules/fnxext/onnx_engine/src/default/ReduceLogSumExp.c index c92126553b8..a76f05b96e9 100644 --- a/modules/fnxext/onnx_engine/src/default/ReduceLogSumExp.c +++ b/modules/fnxext/onnx_engine/src/default/ReduceLogSumExp.c @@ -17,7 +17,7 @@ static int ReduceLogSumExp_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { nint = onnx_attribute_read_ints(n, "axes", &ints); @@ -25,8 +25,8 @@ static int ReduceLogSumExp_init(struct onnx_node_t * n) pdat->naxes = nint; else pdat->naxes = n->inputs[0]->ndim; - pdat->axes = malloc(sizeof(int) * pdat->naxes); - pdat->caxes = malloc(sizeof(int) * pdat->naxes); + pdat->axes = onnx_malloc(sizeof(int) * pdat->naxes); + pdat->caxes = onnx_malloc(sizeof(int) * pdat->naxes); if(pdat->axes && pdat->caxes) { if(nint > 0) @@ -46,10 +46,10 @@ static int ReduceLogSumExp_init(struct onnx_node_t * n) else { if(pdat->axes) - free(pdat->axes); + onnx_free(pdat->axes); if(pdat->caxes) - free(pdat->caxes); - free(pdat); + onnx_free(pdat->caxes); + onnx_free(pdat); } } } @@ -63,10 +63,10 @@ static int ReduceLogSumExp_exit(struct onnx_node_t * n) if(pdat) { if(pdat->axes) - free(pdat->axes); + onnx_free(pdat->axes); if(pdat->caxes) - free(pdat->caxes); - free(pdat); + onnx_free(pdat->caxes); + onnx_free(pdat); } return 1; } @@ -92,7 +92,7 @@ static int ReduceLogSumExp_reshape(struct onnx_node_t * n) } if(pdat->keepdims) { - memcpy(dims, x->dims, sizeof(int) * ndim); + onnx_memcpy(dims, x->dims, sizeof(int) * ndim); for(i = 0; i < pdat->naxes; i++) dims[pdat->caxes[i]] = 1; } @@ -179,10 +179,10 @@ static void ReduceLogSumExp_int8(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -227,10 +227,10 @@ static void ReduceLogSumExp_int32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -275,10 +275,10 @@ static void ReduceLogSumExp_int64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -323,10 +323,10 @@ static void ReduceLogSumExp_uint8(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -371,10 +371,10 @@ static void ReduceLogSumExp_uint32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -419,10 +419,10 @@ static void ReduceLogSumExp_uint64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -467,10 +467,10 @@ static void ReduceLogSumExp_bfloat16(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -515,10 +515,10 @@ static void ReduceLogSumExp_float16(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -563,10 +563,10 @@ static void ReduceLogSumExp_float32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -611,10 +611,10 @@ static void ReduceLogSumExp_float64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do diff --git a/modules/fnxext/onnx_engine/src/default/ReduceMax.c b/modules/fnxext/onnx_engine/src/default/ReduceMax.c index aff5e1eb951..f74ec5eb512 100644 --- a/modules/fnxext/onnx_engine/src/default/ReduceMax.c +++ b/modules/fnxext/onnx_engine/src/default/ReduceMax.c @@ -17,7 +17,7 @@ static int ReduceMax_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { nint = onnx_attribute_read_ints(n, "axes", &ints); @@ -25,8 +25,8 @@ static int ReduceMax_init(struct onnx_node_t * n) pdat->naxes = nint; else pdat->naxes = n->inputs[0]->ndim; - pdat->axes = malloc(sizeof(int) * pdat->naxes); - pdat->caxes = malloc(sizeof(int) * pdat->naxes); + pdat->axes = onnx_malloc(sizeof(int) * pdat->naxes); + pdat->caxes = onnx_malloc(sizeof(int) * pdat->naxes); if(pdat->axes && pdat->caxes) { if(nint > 0) @@ -46,10 +46,10 @@ static int ReduceMax_init(struct onnx_node_t * n) else { if(pdat->axes) - free(pdat->axes); + onnx_free(pdat->axes); if(pdat->caxes) - free(pdat->caxes); - free(pdat); + onnx_free(pdat->caxes); + onnx_free(pdat); } } } @@ -63,10 +63,10 @@ static int ReduceMax_exit(struct onnx_node_t * n) if(pdat) { if(pdat->axes) - free(pdat->axes); + onnx_free(pdat->axes); if(pdat->caxes) - free(pdat->caxes); - free(pdat); + onnx_free(pdat->caxes); + onnx_free(pdat); } return 1; } @@ -92,7 +92,7 @@ static int ReduceMax_reshape(struct onnx_node_t * n) } if(pdat->keepdims) { - memcpy(dims, x->dims, sizeof(int) * ndim); + onnx_memcpy(dims, x->dims, sizeof(int) * ndim); for(i = 0; i < pdat->naxes; i++) dims[pdat->caxes[i]] = 1; } @@ -179,10 +179,10 @@ static void ReduceMax_int8(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); maxv = px[o]; do @@ -229,10 +229,10 @@ static void ReduceMax_int32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); maxv = px[o]; do @@ -279,10 +279,10 @@ static void ReduceMax_int64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); maxv = px[o]; do @@ -329,10 +329,10 @@ static void ReduceMax_uint8(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); maxv = px[o]; do @@ -379,10 +379,10 @@ static void ReduceMax_uint32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); maxv = px[o]; do @@ -429,10 +429,10 @@ static void ReduceMax_uint64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); maxv = px[o]; do @@ -479,10 +479,10 @@ static void ReduceMax_bfloat16(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); maxv = bfloat16_to_float32(px[o]); do @@ -529,10 +529,10 @@ static void ReduceMax_float16(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); maxv = float16_to_float32(px[o]); do @@ -579,10 +579,10 @@ static void ReduceMax_float32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); maxv = px[o]; do @@ -629,10 +629,10 @@ static void ReduceMax_float64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); maxv = px[o]; do diff --git a/modules/fnxext/onnx_engine/src/default/ReduceMean.c b/modules/fnxext/onnx_engine/src/default/ReduceMean.c index 197459c79a6..1e6057b05a4 100644 --- a/modules/fnxext/onnx_engine/src/default/ReduceMean.c +++ b/modules/fnxext/onnx_engine/src/default/ReduceMean.c @@ -17,7 +17,7 @@ static int ReduceMean_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { nint = onnx_attribute_read_ints(n, "axes", &ints); @@ -25,8 +25,8 @@ static int ReduceMean_init(struct onnx_node_t * n) pdat->naxes = nint; else pdat->naxes = n->inputs[0]->ndim; - pdat->axes = malloc(sizeof(int) * pdat->naxes); - pdat->caxes = malloc(sizeof(int) * pdat->naxes); + pdat->axes = onnx_malloc(sizeof(int) * pdat->naxes); + pdat->caxes = onnx_malloc(sizeof(int) * pdat->naxes); if(pdat->axes && pdat->caxes) { if(nint > 0) @@ -46,10 +46,10 @@ static int ReduceMean_init(struct onnx_node_t * n) else { if(pdat->axes) - free(pdat->axes); + onnx_free(pdat->axes); if(pdat->caxes) - free(pdat->caxes); - free(pdat); + onnx_free(pdat->caxes); + onnx_free(pdat); } } } @@ -63,10 +63,10 @@ static int ReduceMean_exit(struct onnx_node_t * n) if(pdat) { if(pdat->axes) - free(pdat->axes); + onnx_free(pdat->axes); if(pdat->caxes) - free(pdat->caxes); - free(pdat); + onnx_free(pdat->caxes); + onnx_free(pdat); } return 1; } @@ -92,7 +92,7 @@ static int ReduceMean_reshape(struct onnx_node_t * n) } if(pdat->keepdims) { - memcpy(dims, x->dims, sizeof(int) * ndim); + onnx_memcpy(dims, x->dims, sizeof(int) * ndim); for(i = 0; i < pdat->naxes; i++) dims[pdat->caxes[i]] = 1; } @@ -181,10 +181,10 @@ static void ReduceMean_int8(struct onnx_node_t * n) for(i = 0, mean = 1; i < pdat->naxes; i++) mean *= iter_in_axes_max[i]; i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -231,10 +231,10 @@ static void ReduceMean_int32(struct onnx_node_t * n) for(i = 0, mean = 1; i < pdat->naxes; i++) mean *= iter_in_axes_max[i]; i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -281,10 +281,10 @@ static void ReduceMean_int64(struct onnx_node_t * n) for(i = 0, mean = 1; i < pdat->naxes; i++) mean *= iter_in_axes_max[i]; i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -331,10 +331,10 @@ static void ReduceMean_uint8(struct onnx_node_t * n) for(i = 0, mean = 1; i < pdat->naxes; i++) mean *= iter_in_axes_max[i]; i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -381,10 +381,10 @@ static void ReduceMean_uint32(struct onnx_node_t * n) for(i = 0, mean = 1; i < pdat->naxes; i++) mean *= iter_in_axes_max[i]; i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -431,10 +431,10 @@ static void ReduceMean_uint64(struct onnx_node_t * n) for(i = 0, mean = 1; i < pdat->naxes; i++) mean *= iter_in_axes_max[i]; i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -481,10 +481,10 @@ static void ReduceMean_bfloat16(struct onnx_node_t * n) for(i = 0, mean = 1; i < pdat->naxes; i++) mean *= iter_in_axes_max[i]; i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -531,10 +531,10 @@ static void ReduceMean_float16(struct onnx_node_t * n) for(i = 0, mean = 1; i < pdat->naxes; i++) mean *= iter_in_axes_max[i]; i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -581,10 +581,10 @@ static void ReduceMean_float32(struct onnx_node_t * n) for(i = 0, mean = 1; i < pdat->naxes; i++) mean *= iter_in_axes_max[i]; i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -631,10 +631,10 @@ static void ReduceMean_float64(struct onnx_node_t * n) for(i = 0, mean = 1; i < pdat->naxes; i++) mean *= iter_in_axes_max[i]; i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do diff --git a/modules/fnxext/onnx_engine/src/default/ReduceMin.c b/modules/fnxext/onnx_engine/src/default/ReduceMin.c index 7c003547ac9..eafbe208455 100644 --- a/modules/fnxext/onnx_engine/src/default/ReduceMin.c +++ b/modules/fnxext/onnx_engine/src/default/ReduceMin.c @@ -17,7 +17,7 @@ static int ReduceMin_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { nint = onnx_attribute_read_ints(n, "axes", &ints); @@ -25,8 +25,8 @@ static int ReduceMin_init(struct onnx_node_t * n) pdat->naxes = nint; else pdat->naxes = n->inputs[0]->ndim; - pdat->axes = malloc(sizeof(int) * pdat->naxes); - pdat->caxes = malloc(sizeof(int) * pdat->naxes); + pdat->axes = onnx_malloc(sizeof(int) * pdat->naxes); + pdat->caxes = onnx_malloc(sizeof(int) * pdat->naxes); if(pdat->axes && pdat->caxes) { if(nint > 0) @@ -46,10 +46,10 @@ static int ReduceMin_init(struct onnx_node_t * n) else { if(pdat->axes) - free(pdat->axes); + onnx_free(pdat->axes); if(pdat->caxes) - free(pdat->caxes); - free(pdat); + onnx_free(pdat->caxes); + onnx_free(pdat); } } } @@ -63,10 +63,10 @@ static int ReduceMin_exit(struct onnx_node_t * n) if(pdat) { if(pdat->axes) - free(pdat->axes); + onnx_free(pdat->axes); if(pdat->caxes) - free(pdat->caxes); - free(pdat); + onnx_free(pdat->caxes); + onnx_free(pdat); } return 1; } @@ -92,7 +92,7 @@ static int ReduceMin_reshape(struct onnx_node_t * n) } if(pdat->keepdims) { - memcpy(dims, x->dims, sizeof(int) * ndim); + onnx_memcpy(dims, x->dims, sizeof(int) * ndim); for(i = 0; i < pdat->naxes; i++) dims[pdat->caxes[i]] = 1; } @@ -179,10 +179,10 @@ static void ReduceMin_int8(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); minv = px[o]; do @@ -229,10 +229,10 @@ static void ReduceMin_int32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); minv = px[o]; do @@ -279,10 +279,10 @@ static void ReduceMin_int64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); minv = px[o]; do @@ -329,10 +329,10 @@ static void ReduceMin_uint8(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); minv = px[o]; do @@ -379,10 +379,10 @@ static void ReduceMin_uint32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); minv = px[o]; do @@ -429,10 +429,10 @@ static void ReduceMin_uint64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); minv = px[o]; do @@ -479,10 +479,10 @@ static void ReduceMin_bfloat16(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); minv = bfloat16_to_float32(px[o]); do @@ -529,10 +529,10 @@ static void ReduceMin_float16(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); minv = float16_to_float32(px[o]); do @@ -579,10 +579,10 @@ static void ReduceMin_float32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); minv = px[o]; do @@ -629,10 +629,10 @@ static void ReduceMin_float64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); minv = px[o]; do diff --git a/modules/fnxext/onnx_engine/src/default/ReduceProd.c b/modules/fnxext/onnx_engine/src/default/ReduceProd.c index f412e1b402b..273eb7e6a32 100644 --- a/modules/fnxext/onnx_engine/src/default/ReduceProd.c +++ b/modules/fnxext/onnx_engine/src/default/ReduceProd.c @@ -17,7 +17,7 @@ static int ReduceProd_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { nint = onnx_attribute_read_ints(n, "axes", &ints); @@ -25,8 +25,8 @@ static int ReduceProd_init(struct onnx_node_t * n) pdat->naxes = nint; else pdat->naxes = n->inputs[0]->ndim; - pdat->axes = malloc(sizeof(int) * pdat->naxes); - pdat->caxes = malloc(sizeof(int) * pdat->naxes); + pdat->axes = onnx_malloc(sizeof(int) * pdat->naxes); + pdat->caxes = onnx_malloc(sizeof(int) * pdat->naxes); if(pdat->axes && pdat->caxes) { if(nint > 0) @@ -46,10 +46,10 @@ static int ReduceProd_init(struct onnx_node_t * n) else { if(pdat->axes) - free(pdat->axes); + onnx_free(pdat->axes); if(pdat->caxes) - free(pdat->caxes); - free(pdat); + onnx_free(pdat->caxes); + onnx_free(pdat); } } } @@ -63,10 +63,10 @@ static int ReduceProd_exit(struct onnx_node_t * n) if(pdat) { if(pdat->axes) - free(pdat->axes); + onnx_free(pdat->axes); if(pdat->caxes) - free(pdat->caxes); - free(pdat); + onnx_free(pdat->caxes); + onnx_free(pdat); } return 1; } @@ -92,7 +92,7 @@ static int ReduceProd_reshape(struct onnx_node_t * n) } if(pdat->keepdims) { - memcpy(dims, x->dims, sizeof(int) * ndim); + onnx_memcpy(dims, x->dims, sizeof(int) * ndim); for(i = 0; i < pdat->naxes; i++) dims[pdat->caxes[i]] = 1; } @@ -179,10 +179,10 @@ static void ReduceProd_int8(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); prod = 1; do @@ -227,10 +227,10 @@ static void ReduceProd_int32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); prod = 1; do @@ -275,10 +275,10 @@ static void ReduceProd_int64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); prod = 1; do @@ -323,10 +323,10 @@ static void ReduceProd_uint8(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); prod = 1; do @@ -371,10 +371,10 @@ static void ReduceProd_uint32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); prod = 1; do @@ -419,10 +419,10 @@ static void ReduceProd_uint64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); prod = 1; do @@ -467,10 +467,10 @@ static void ReduceProd_bfloat16(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); prod = 1; do @@ -515,10 +515,10 @@ static void ReduceProd_float16(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); prod = 1; do @@ -563,10 +563,10 @@ static void ReduceProd_float32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); prod = 1; do @@ -611,10 +611,10 @@ static void ReduceProd_float64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); prod = 1; do diff --git a/modules/fnxext/onnx_engine/src/default/ReduceSum.c b/modules/fnxext/onnx_engine/src/default/ReduceSum.c index 7d6fdcbb673..cb5432aca6a 100644 --- a/modules/fnxext/onnx_engine/src/default/ReduceSum.c +++ b/modules/fnxext/onnx_engine/src/default/ReduceSum.c @@ -14,7 +14,7 @@ static int ReduceSum_init(struct onnx_node_t * n) if((n->ninput >= 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->keepdims = onnx_attribute_read_int(n, "keepdims", 1); @@ -31,7 +31,7 @@ static int ReduceSum_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } @@ -49,7 +49,7 @@ static int ReduceSum_reshape(struct onnx_node_t * n) { struct onnx_tensor_t * a = n->inputs[1]; int64_t * pa = (int64_t *)a->datas; - pdat->naxes = minn(minn(x->ndim, 32), (int)a->ndata); + pdat->naxes = XMIN(XMIN(x->ndim, 32), (int)a->ndata); for(i = 0; i < pdat->naxes; i++) { axis = pa[i]; @@ -62,7 +62,7 @@ static int ReduceSum_reshape(struct onnx_node_t * n) } else if(pdat->noop_with_empty_axes == 0) { - pdat->naxes = minn(x->ndim, 32); + pdat->naxes = XMIN(x->ndim, 32); for(i = 0; i < pdat->naxes; i++) pdat->caxes[i] = i; } @@ -72,7 +72,7 @@ static int ReduceSum_reshape(struct onnx_node_t * n) } if(pdat->keepdims) { - memcpy(dims, x->dims, sizeof(int) * ndim); + onnx_memcpy(dims, x->dims, sizeof(int) * ndim); for(i = 0; i < pdat->naxes; i++) dims[pdat->caxes[i]] = 1; } @@ -159,10 +159,10 @@ static void ReduceSum_int8(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -207,10 +207,10 @@ static void ReduceSum_int32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -255,10 +255,10 @@ static void ReduceSum_int64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -303,10 +303,10 @@ static void ReduceSum_uint8(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -351,10 +351,10 @@ static void ReduceSum_uint32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -399,10 +399,10 @@ static void ReduceSum_uint64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -447,10 +447,10 @@ static void ReduceSum_bfloat16(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -495,10 +495,10 @@ static void ReduceSum_float16(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -543,10 +543,10 @@ static void ReduceSum_float32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -591,10 +591,10 @@ static void ReduceSum_float64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do diff --git a/modules/fnxext/onnx_engine/src/default/ReduceSumSquare.c b/modules/fnxext/onnx_engine/src/default/ReduceSumSquare.c index 78f4f15845f..f7e1c4d252f 100644 --- a/modules/fnxext/onnx_engine/src/default/ReduceSumSquare.c +++ b/modules/fnxext/onnx_engine/src/default/ReduceSumSquare.c @@ -17,7 +17,7 @@ static int ReduceSumSquare_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { nint = onnx_attribute_read_ints(n, "axes", &ints); @@ -25,8 +25,8 @@ static int ReduceSumSquare_init(struct onnx_node_t * n) pdat->naxes = nint; else pdat->naxes = n->inputs[0]->ndim; - pdat->axes = malloc(sizeof(int) * pdat->naxes); - pdat->caxes = malloc(sizeof(int) * pdat->naxes); + pdat->axes = onnx_malloc(sizeof(int) * pdat->naxes); + pdat->caxes = onnx_malloc(sizeof(int) * pdat->naxes); if(pdat->axes && pdat->caxes) { if(nint > 0) @@ -46,10 +46,10 @@ static int ReduceSumSquare_init(struct onnx_node_t * n) else { if(pdat->axes) - free(pdat->axes); + onnx_free(pdat->axes); if(pdat->caxes) - free(pdat->caxes); - free(pdat); + onnx_free(pdat->caxes); + onnx_free(pdat); } } } @@ -63,10 +63,10 @@ static int ReduceSumSquare_exit(struct onnx_node_t * n) if(pdat) { if(pdat->axes) - free(pdat->axes); + onnx_free(pdat->axes); if(pdat->caxes) - free(pdat->caxes); - free(pdat); + onnx_free(pdat->caxes); + onnx_free(pdat); } return 1; } @@ -92,7 +92,7 @@ static int ReduceSumSquare_reshape(struct onnx_node_t * n) } if(pdat->keepdims) { - memcpy(dims, x->dims, sizeof(int) * ndim); + onnx_memcpy(dims, x->dims, sizeof(int) * ndim); for(i = 0; i < pdat->naxes; i++) dims[pdat->caxes[i]] = 1; } @@ -180,10 +180,10 @@ static void ReduceSumSquare_int8(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -230,10 +230,10 @@ static void ReduceSumSquare_int32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -280,10 +280,10 @@ static void ReduceSumSquare_int64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -330,10 +330,10 @@ static void ReduceSumSquare_uint8(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -380,10 +380,10 @@ static void ReduceSumSquare_uint32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -430,10 +430,10 @@ static void ReduceSumSquare_uint64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -479,10 +479,10 @@ static void ReduceSumSquare_bfloat16(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -528,10 +528,10 @@ static void ReduceSumSquare_float16(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -577,10 +577,10 @@ static void ReduceSumSquare_float32(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do @@ -626,10 +626,10 @@ static void ReduceSumSquare_float64(struct onnx_node_t * n) k += 1; } i = 0; - memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); + onnx_memset(iter_not_in_axes, 0, sizeof(int) * not_in_axes_num); do { - memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); + onnx_memset(iter_in_axes, 0, sizeof(int) * pdat->naxes); o = dim_offset(not_in_axes_num, iter_not_in_axes, not_in_axes_axis_dis); sum = 0; do diff --git a/modules/fnxext/onnx_engine/src/default/Reshape.c b/modules/fnxext/onnx_engine/src/default/Reshape.c index cf8766f43ee..04d778a6fe3 100644 --- a/modules/fnxext/onnx_engine/src/default/Reshape.c +++ b/modules/fnxext/onnx_engine/src/default/Reshape.c @@ -2,17 +2,8 @@ static int Reshape_init(struct onnx_node_t * n) { - struct onnx_tensor_t * x; - struct onnx_tensor_t * s; - if((n->ninput == 2) && (n->noutput == 1)) { - x = n->inputs[0]; - s = n->inputs[1]; - if((x->ndim == 0) || (x->type == ONNX_TENSOR_TYPE_UNDEFINED)) - return 0; - if((s->ndim == 0) || (s->type != ONNX_TENSOR_TYPE_INT64)) - return 0; return 1; } return 0; @@ -61,6 +52,7 @@ static void Reshape_operator(struct onnx_node_t * n) { struct onnx_tensor_t * y = n->outputs[0]; struct onnx_tensor_t * x = n->inputs[0]; + struct onnx_tensor_t * new_shape = n->inputs[1]; char ** py = (char **)y->datas; char ** px = (char **)x->datas; @@ -69,13 +61,13 @@ static void Reshape_operator(struct onnx_node_t * n) for(size_t i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); - py[i] = strdup(px[i]); + onnx_free(py[i]); + py[i] = onnx_strdup(px[i]); } } else { - memcpy(y->datas, x->datas, x->ndata * onnx_tensor_type_sizeof(x->type)); + onnx_memcpy(y->datas, x->datas, x->ndata * onnx_tensor_type_sizeof(x->type)); } } diff --git a/modules/fnxext/onnx_engine/src/default/Selu.c b/modules/fnxext/onnx_engine/src/default/Selu.c index e46589e482a..1a48a480d06 100644 --- a/modules/fnxext/onnx_engine/src/default/Selu.c +++ b/modules/fnxext/onnx_engine/src/default/Selu.c @@ -11,7 +11,7 @@ static int Selu_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->alpha = onnx_attribute_read_float(n, "alpha", 1.67326); @@ -28,7 +28,7 @@ static int Selu_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/Shape.c b/modules/fnxext/onnx_engine/src/default/Shape.c index f12b2853dee..3752d97d0fc 100644 --- a/modules/fnxext/onnx_engine/src/default/Shape.c +++ b/modules/fnxext/onnx_engine/src/default/Shape.c @@ -27,7 +27,7 @@ static void Shape_operator(struct onnx_node_t * n) int64_t * py = (int64_t *)y->datas; size_t i, l; - for(i = 0, l = minn(y->ndata, (size_t)x->ndim); i < l; i++) + for(i = 0, l = XMIN(y->ndata, (size_t)x->ndim); i < l; i++) py[i] = x->dims[i]; } diff --git a/modules/fnxext/onnx_engine/src/default/Shrink.c b/modules/fnxext/onnx_engine/src/default/Shrink.c index fb00b0521b0..05826e2e315 100644 --- a/modules/fnxext/onnx_engine/src/default/Shrink.c +++ b/modules/fnxext/onnx_engine/src/default/Shrink.c @@ -11,7 +11,7 @@ static int Shrink_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->bias = onnx_attribute_read_float(n, "bias", 0.0); @@ -28,7 +28,7 @@ static int Shrink_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/Softmax.c b/modules/fnxext/onnx_engine/src/default/Softmax.c index 3c0024c57a3..31752c391e1 100644 --- a/modules/fnxext/onnx_engine/src/default/Softmax.c +++ b/modules/fnxext/onnx_engine/src/default/Softmax.c @@ -16,7 +16,7 @@ static int Softmax_13_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_13_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_13_pdata_t)); if(pdat) { pdat->axis = onnx_attribute_read_int(n, "axis", -1); @@ -32,7 +32,7 @@ static int Softmax_13_exit(struct onnx_node_t * n) struct operator_13_pdata_t * pdat = (struct operator_13_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } @@ -239,7 +239,7 @@ static int Softmax_1_11_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_1_11_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_1_11_pdata_t)); if(pdat) { pdat->axis = onnx_attribute_read_int(n, "axis", 1); @@ -255,7 +255,7 @@ static int Softmax_1_11_exit(struct onnx_node_t * n) struct operator_1_11_pdata_t * pdat = (struct operator_1_11_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/Squeeze.c b/modules/fnxext/onnx_engine/src/default/Squeeze.c index 3f8cecf8f65..dd9400bdff6 100644 --- a/modules/fnxext/onnx_engine/src/default/Squeeze.c +++ b/modules/fnxext/onnx_engine/src/default/Squeeze.c @@ -72,16 +72,70 @@ static void Squeeze_operator(struct onnx_node_t * n) for(size_t i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); - py[i] = strdup(px[i]); + onnx_free(py[i]); + py[i] = onnx_strdup(px[i]); } } else { - memcpy(y->datas, x->datas, x->ndata * onnx_tensor_type_sizeof(x->type)); + onnx_memcpy(y->datas, x->datas, x->ndata * onnx_tensor_type_sizeof(x->type)); } } +static int Squeeze_init_v11(struct onnx_node_t * n) +{ + if((n->ninput == 1) && (n->noutput == 1)) + return 1; + return 0; +} + +static int Squeeze_reshape_v11(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * x = n->inputs[0]; + int64_t * axes = NULL; + int naxes = 0; + int dims[x->ndim]; + int ndim = 0; + int axis, flag; + int i, j; + + naxes = onnx_attribute_read_ints(n, "axes", &axes); + + if(naxes > 0) + { + for(i = 0, ndim = 0; i < x->ndim; i++) + { + if(x->dims[i] == 1) + { + for(j = 0, flag = 0; j < naxes; j++) + { + axis = axes[j]; + if(axis < 0) + axis += x->ndim; + if(i == axis) + { + flag = 1; + break; + } + } + if(flag) + continue; + } + dims[ndim++] = x->dims[i]; + } + } + else + { + for(i = 0, ndim = 0; i < x->ndim; i++) + { + if(x->dims[i] > 1) + dims[ndim++] = x->dims[i]; + } + } + return onnx_tensor_reshape(y, dims, ndim, x->type); +} + void resolver_default_op_Squeeze(struct onnx_node_t * n) { if(n->opset >= 13) @@ -115,6 +169,32 @@ void resolver_default_op_Squeeze(struct onnx_node_t * n) } else if(n->opset >= 11) { + switch(n->inputs[0]->type) + { + case ONNX_TENSOR_TYPE_BOOL: + case ONNX_TENSOR_TYPE_INT8: + case ONNX_TENSOR_TYPE_INT16: + case ONNX_TENSOR_TYPE_INT32: + case ONNX_TENSOR_TYPE_INT64: + case ONNX_TENSOR_TYPE_UINT8: + case ONNX_TENSOR_TYPE_UINT16: + case ONNX_TENSOR_TYPE_UINT32: + case ONNX_TENSOR_TYPE_UINT64: + case ONNX_TENSOR_TYPE_BFLOAT16: + case ONNX_TENSOR_TYPE_FLOAT16: + case ONNX_TENSOR_TYPE_FLOAT32: + case ONNX_TENSOR_TYPE_FLOAT64: + case ONNX_TENSOR_TYPE_COMPLEX64: + case ONNX_TENSOR_TYPE_COMPLEX128: + case ONNX_TENSOR_TYPE_STRING: + n->init = Squeeze_init_v11; + n->exit = Squeeze_exit; + n->reshape = Squeeze_reshape_v11; + n->operator_ = Squeeze_operator; + break; + default: + break; + } } else if(n->opset >= 1) { diff --git a/modules/fnxext/onnx_engine/src/default/ThresholdedRelu.c b/modules/fnxext/onnx_engine/src/default/ThresholdedRelu.c index ce9b46d1b51..f50051ade07 100644 --- a/modules/fnxext/onnx_engine/src/default/ThresholdedRelu.c +++ b/modules/fnxext/onnx_engine/src/default/ThresholdedRelu.c @@ -10,7 +10,7 @@ static int ThresholdedRelu_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->alpha = onnx_attribute_read_float(n, "alpha", 1.0); @@ -26,7 +26,7 @@ static int ThresholdedRelu_exit(struct onnx_node_t * n) struct operator_pdata_t * pdat = (struct operator_pdata_t *)n->priv; if(pdat) - free(pdat); + onnx_free(pdat); return 1; } diff --git a/modules/fnxext/onnx_engine/src/default/Tile.c b/modules/fnxext/onnx_engine/src/default/Tile.c index 99a1d71988a..a668f534f3f 100644 --- a/modules/fnxext/onnx_engine/src/default/Tile.c +++ b/modules/fnxext/onnx_engine/src/default/Tile.c @@ -250,8 +250,8 @@ static void Tile_string(struct onnx_node_t * n) { px = onnx_tensor_broadcast_map_address(x, y, i); if(py[i]) - free(py[i]); - py[i] = strdup(px[i]); + onnx_free(py[i]); + py[i] = onnx_strdup(px[i]); } } diff --git a/modules/fnxext/onnx_engine/src/default/Transpose.c b/modules/fnxext/onnx_engine/src/default/Transpose.c index 6c79d5a04ee..9a49417a2cf 100644 --- a/modules/fnxext/onnx_engine/src/default/Transpose.c +++ b/modules/fnxext/onnx_engine/src/default/Transpose.c @@ -13,11 +13,11 @@ static int Transpose_init(struct onnx_node_t * n) if((n->ninput == 1) && (n->noutput == 1)) { - pdat = malloc(sizeof(struct operator_pdata_t)); + pdat = onnx_malloc(sizeof(struct operator_pdata_t)); if(pdat) { pdat->nperm = n->inputs[0]->ndim; - pdat->perm = malloc(sizeof(int) * pdat->nperm); + pdat->perm = onnx_malloc(sizeof(int) * pdat->nperm); if(pdat->perm) { if(pdat->nperm == onnx_attribute_read_ints(n, "perm", &ints)) @@ -35,7 +35,7 @@ static int Transpose_init(struct onnx_node_t * n) } else { - free(pdat); + onnx_free(pdat); } } } @@ -49,8 +49,8 @@ static int Transpose_exit(struct onnx_node_t * n) if(pdat) { if(pdat->perm) - free(pdat->perm); - free(pdat); + onnx_free(pdat->perm); + onnx_free(pdat); } return 1; } @@ -422,8 +422,8 @@ static void Transpose_string(struct onnx_node_t * n) ix[pdat->perm[i]] = iy[i]; ox = onnx_tensor_indices_to_offset(x, ix); if(py[oy]) - free(py[oy]); - py[oy] = strdup(px[ox]); + onnx_free(py[oy]); + py[oy] = onnx_strdup(px[ox]); } } diff --git a/modules/fnxext/onnx_engine/src/default/Unsqueeze.c b/modules/fnxext/onnx_engine/src/default/Unsqueeze.c index b2c4b45b309..6864aef143b 100644 --- a/modules/fnxext/onnx_engine/src/default/Unsqueeze.c +++ b/modules/fnxext/onnx_engine/src/default/Unsqueeze.c @@ -23,7 +23,7 @@ static int Unsqueeze_reshape(struct onnx_node_t * n) int axis; int i, j; - memset(dims, 0, sizeof(int) * ndim); + onnx_memset(dims, 0, sizeof(int) * ndim); for(i = 0; i < a->ndata; i++) { axis = pa[i]; @@ -52,13 +52,75 @@ static void Unsqueeze_operator(struct onnx_node_t * n) for(size_t i = 0, l = y->ndata; i < l; i++) { if(py[i]) - free(py[i]); - py[i] = strdup(px[i]); + onnx_free(py[i]); + py[i] = onnx_strdup(px[i]); } } else { - memcpy(y->datas, x->datas, x->ndata * onnx_tensor_type_sizeof(x->type)); + onnx_memcpy(y->datas, x->datas, x->ndata * onnx_tensor_type_sizeof(x->type)); + } +} + +static int Unsqueeze_11_init(struct onnx_node_t * n) +{ + if((n->ninput == 1) && (n->noutput == 1)) + return 1; + return 0; +} + +static int Unsqueeze_11_exit(struct onnx_node_t * n) +{ + return 1; +} + +static int Unsqueeze_11_reshape(struct onnx_node_t * n) +{ + struct onnx_tensor_t * y = n->outputs[0]; + struct onnx_tensor_t * x = n->inputs[0]; + int64_t * axes = NULL; + int naxes = onnx_attribute_read_ints(n, "axes", &axes); + int ndim = x->ndim + naxes; + int dims[ndim]; + int axis; + int i, j; + + onnx_memset(dims, 0, sizeof(int) * ndim); + for(i = 0; i < naxes; i++) + { + axis = axes[i]; + if(axis < 0) + axis += ndim; + if(axis >= 0 && axis < ndim) + dims[axis] = 1; + } + for(i = 0, j = 0; i < ndim; i++) + { + if(dims[i] != 1) + dims[i] = x->dims[j++]; + } + return onnx_tensor_reshape(y, dims, ndim, x->type); +} + +static void Unsqueeze_11_operator(struct onnx_node_t * n) +{ + struct onnx_tensor_t * x = n->inputs[0]; + struct onnx_tensor_t * y = n->outputs[0]; + char ** px = (char **)x->datas; + char ** py = (char **)y->datas; + + if(x->type == ONNX_TENSOR_TYPE_STRING) + { + for(size_t i = 0, l = y->ndata; i < l; i++) + { + if(py[i]) + onnx_free(py[i]); + py[i] = onnx_strdup(px[i]); + } + } + else + { + onnx_memcpy(y->datas, x->datas, x->ndata * onnx_tensor_type_sizeof(x->type)); } } @@ -95,6 +157,32 @@ void resolver_default_op_Unsqueeze(struct onnx_node_t * n) } else if(n->opset >= 11) { + switch(n->inputs[0]->type) + { + case ONNX_TENSOR_TYPE_BOOL: + case ONNX_TENSOR_TYPE_INT8: + case ONNX_TENSOR_TYPE_INT16: + case ONNX_TENSOR_TYPE_INT32: + case ONNX_TENSOR_TYPE_INT64: + case ONNX_TENSOR_TYPE_UINT8: + case ONNX_TENSOR_TYPE_UINT16: + case ONNX_TENSOR_TYPE_UINT32: + case ONNX_TENSOR_TYPE_UINT64: + case ONNX_TENSOR_TYPE_BFLOAT16: + case ONNX_TENSOR_TYPE_FLOAT16: + case ONNX_TENSOR_TYPE_FLOAT32: + case ONNX_TENSOR_TYPE_FLOAT64: + case ONNX_TENSOR_TYPE_COMPLEX64: + case ONNX_TENSOR_TYPE_COMPLEX128: + case ONNX_TENSOR_TYPE_STRING: + n->init = Unsqueeze_11_init; + n->exit = Unsqueeze_11_exit; + n->reshape = Unsqueeze_11_reshape; + n->operator_ = Unsqueeze_11_operator; + break; + default: + break; + } } else if(n->opset >= 1) { diff --git a/modules/fnxext/onnx_engine/src/default/Where.c b/modules/fnxext/onnx_engine/src/default/Where.c index f471845c86f..c72b922a1d4 100644 --- a/modules/fnxext/onnx_engine/src/default/Where.c +++ b/modules/fnxext/onnx_engine/src/default/Where.c @@ -362,8 +362,8 @@ static void Where_string(struct onnx_node_t * n) else px = (char **)onnx_tensor_broadcast_map_address(x2, y, i); if(py[i]) - free(py[i]); - py[i] = strdup(px[i]); + onnx_free(py[i]); + py[i] = onnx_strdup(px[i]); } } diff --git a/modules/fnxext/onnx_engine/src/default/default.c b/modules/fnxext/onnx_engine/src/default/default.c index 3a0e305beac..2bac8bdfb69 100644 --- a/modules/fnxext/onnx_engine/src/default/default.c +++ b/modules/fnxext/onnx_engine/src/default/default.c @@ -29,6 +29,10 @@ struct onnx_resolver_t resolver_default = { .op_AveragePool = resolver_default_op_AveragePool, .op_BatchNormalization = resolver_default_op_BatchNormalization, .op_BitShift = resolver_default_op_BitShift, + .op_BitwiseAnd = resolver_default_op_BitwiseAnd, + .op_BitwiseNot = resolver_default_op_BitwiseNot, + .op_BitwiseOr = resolver_default_op_BitwiseOr, + .op_BitwiseXor = resolver_default_op_BitwiseXor, .op_Cast = resolver_default_op_Cast, .op_Ceil = resolver_default_op_Ceil, .op_Clip = resolver_default_op_Clip, @@ -180,5 +184,6 @@ struct onnx_resolver_t resolver_default = { .op_Range = resolver_default_op_Range, .op_Softmax = resolver_default_op_Softmax, .op_SoftmaxCrossEntropyLoss = resolver_default_op_SoftmaxCrossEntropyLoss, + .op_DynamicQuantizeLSTM = resolver_default_op_DynamicQuantizeLSTM, }; diff --git a/modules/fnxext/onnx_engine/src/default/default.h b/modules/fnxext/onnx_engine/src/default/default.h index 1fa126b2fe5..ae8ae148fed 100644 --- a/modules/fnxext/onnx_engine/src/default/default.h +++ b/modules/fnxext/onnx_engine/src/default/default.h @@ -24,6 +24,10 @@ void resolver_default_op_Atanh(struct onnx_node_t * n); void resolver_default_op_AveragePool(struct onnx_node_t * n); void resolver_default_op_BatchNormalization(struct onnx_node_t * n); void resolver_default_op_BitShift(struct onnx_node_t * n); +void resolver_default_op_BitwiseAnd(struct onnx_node_t * n); +void resolver_default_op_BitwiseNot(struct onnx_node_t * n); +void resolver_default_op_BitwiseOr(struct onnx_node_t * n); +void resolver_default_op_BitwiseXor(struct onnx_node_t * n); void resolver_default_op_Cast(struct onnx_node_t * n); void resolver_default_op_Ceil(struct onnx_node_t * n); void resolver_default_op_Clip(struct onnx_node_t * n); @@ -175,6 +179,7 @@ void resolver_default_op_NegativeLogLikelihoodLoss(struct onnx_node_t * n); void resolver_default_op_Range(struct onnx_node_t * n); void resolver_default_op_Softmax(struct onnx_node_t * n); void resolver_default_op_SoftmaxCrossEntropyLoss(struct onnx_node_t * n); +void resolver_default_op_DynamicQuantizeLSTM(struct onnx_node_t * n); extern struct onnx_resolver_t resolver_default; diff --git a/modules/fnxext/onnx_engine/src/hmap.c b/modules/fnxext/onnx_engine/src/hmap.c index 43538127f9a..b8c4330f5ef 100644 --- a/modules/fnxext/onnx_engine/src/hmap.c +++ b/modules/fnxext/onnx_engine/src/hmap.c @@ -58,14 +58,14 @@ struct hmap_t * hmap_alloc(int size, void (*cb)(struct hmap_t *, struct hmap_ent if(size & (size - 1)) size = roundup_pow_of_two(size); - m = malloc(sizeof(struct hmap_t)); + m = onnx_malloc(sizeof(struct hmap_t)); if(!m) return NULL; - m->hash = malloc(sizeof(struct hlist_head) * size); + m->hash = onnx_malloc(sizeof(struct hlist_head) * size); if(!m->hash) { - free(m); + onnx_free(m); return NULL; } for(i = 0; i < size; i++) @@ -83,8 +83,8 @@ void hmap_free(struct hmap_t * m) if(m) { hmap_clear(m); - free(m->hash); - free(m); + onnx_free(m->hash); + onnx_free(m); } } @@ -101,8 +101,8 @@ void hmap_clear(struct hmap_t * m) m->n--; if(m->callback) m->callback(m, pos); - free(pos->key); - free(pos); + onnx_free(pos->key); + onnx_free(pos); } } } @@ -121,7 +121,7 @@ static void hmap_resize(struct hmap_t * m, unsigned int size) if(size & (size - 1)) size = roundup_pow_of_two(size); - hash = malloc(sizeof(struct hlist_head) * size); + hash = onnx_malloc(sizeof(struct hlist_head) * size); if(!hash) return; for(i = 0; i < size; i++) @@ -131,7 +131,7 @@ static void hmap_resize(struct hmap_t * m, unsigned int size) { hlist_del(&pos->node); } - free(m->hash); + onnx_free(m->hash); m->hash = hash; m->size = size; @@ -151,7 +151,7 @@ void hmap_add(struct hmap_t * m, const char * key, void * value) hlist_for_each_entry_safe(pos, n, &m->hash[shash(key) & (m->size - 1)], node) { - if(strcmp(pos->key, key) == 0) + if(onnx_strcmp(pos->key, key) == 0) { if(pos->value != value) pos->value = value; @@ -162,11 +162,11 @@ void hmap_add(struct hmap_t * m, const char * key, void * value) if(m->n > (m->size >> 1)) hmap_resize(m, m->size << 1); - pos = malloc(sizeof(struct hmap_entry_t)); + pos = onnx_malloc(sizeof(struct hmap_entry_t)); if(!pos) return; - pos->key = strdup(key); + pos->key = onnx_strdup(key); pos->value = value; init_hlist_node(&pos->node); hlist_add_head(&pos->node, &m->hash[shash(pos->key) & (m->size - 1)]); @@ -188,13 +188,13 @@ void hmap_remove(struct hmap_t * m, const char * key) hlist_for_each_entry_safe(pos, n, &m->hash[shash(key) & (m->size - 1)], node) { - if(strcmp(pos->key, key) == 0) + if(onnx_strcmp(pos->key, key) == 0) { hlist_del(&pos->node); list_del(&pos->head); m->n--; - free(pos->key); - free(pos); + onnx_free(pos->key); + onnx_free(pos); return; } } @@ -266,7 +266,7 @@ static void lsort(void * priv, struct list_head * head, int (*cmp)(void * priv, if(list_empty(head)) return; - memset(part, 0, sizeof(part)); + onnx_memset(part, 0, sizeof(part)); head->prev->next = NULL; list = head->next; @@ -301,7 +301,7 @@ static int hmap_compare(void * priv, struct list_head * a, struct list_head * b) { char * keya = (char *)list_entry(a, struct hmap_entry_t, head)->key; char * keyb = (char *)list_entry(b, struct hmap_entry_t, head)->key; - return strcmp(keya, keyb); + return onnx_strcmp(keya, keyb); } void hmap_sort(struct hmap_t * m) @@ -320,7 +320,7 @@ void * hmap_search(struct hmap_t * m, const char * key) hlist_for_each_entry_safe(pos, n, &m->hash[shash(key) & (m->size - 1)], node) { - if(strcmp(pos->key, key) == 0) + if(onnx_strcmp(pos->key, key) == 0) return pos->value; } return NULL; diff --git a/modules/fnxext/onnx_engine/src/matrix.c b/modules/fnxext/onnx_engine/src/matrix.c new file mode 100644 index 00000000000..e10996cbf42 --- /dev/null +++ b/modules/fnxext/onnx_engine/src/matrix.c @@ -0,0 +1,177 @@ + + +#include "matrix.h" + +/** + * Performs matrix multiplication for various data types + * + * X = A * B + * + * @param m Number of rows in A + * @param n Number of columns in B + * @param k Number of columns in A / rows in B + * @param A Matrix A data + * @param B Matrix B data + * @param X Output matrix data + * @param type Tensor data type + */ +void matrix_mul(int m, int n, int k, void * A, void * B, void * X, enum onnx_tensor_type_t type) +{ + switch(type) + { + case ONNX_TENSOR_TYPE_INT32: + { + int32_t * pa = (int32_t *)A; + int32_t * pb = (int32_t *)B; + int32_t * px = (int32_t *)X; + int32_t sum; + + for(int u = 0; u < m; u++) + { + for(int v = 0; v < n; v++) + { + sum = 0; + for(int w = 0; w < k; w++) + sum += pa[u * k + w] * pb[w * n + v]; + px[u * n + v] = sum; + } + } + break; + } + case ONNX_TENSOR_TYPE_INT64: + { + int64_t * pa = (int64_t *)A; + int64_t * pb = (int64_t *)B; + int64_t * px = (int64_t *)X; + int64_t sum; + + for(int u = 0; u < m; u++) + { + for(int v = 0; v < n; v++) + { + sum = 0; + for(int w = 0; w < k; w++) + sum += pa[u * k + w] * pb[w * n + v]; + px[u * n + v] = sum; + } + } + break; + } + case ONNX_TENSOR_TYPE_UINT32: + { + uint32_t * pa = (uint32_t *)A; + uint32_t * pb = (uint32_t *)B; + uint32_t * px = (uint32_t *)X; + uint32_t sum; + + for(int u = 0; u < m; u++) + { + for(int v = 0; v < n; v++) + { + sum = 0; + for(int w = 0; w < k; w++) + sum += pa[u * k + w] * pb[w * n + v]; + px[u * n + v] = sum; + } + } + break; + } + case ONNX_TENSOR_TYPE_UINT64: + { + uint64_t * pa = (uint64_t *)A; + uint64_t * pb = (uint64_t *)B; + uint64_t * px = (uint64_t *)X; + uint64_t sum; + + for(int u = 0; u < m; u++) + { + for(int v = 0; v < n; v++) + { + sum = 0; + for(int w = 0; w < k; w++) + sum += pa[u * k + w] * pb[w * n + v]; + px[u * n + v] = sum; + } + } + break; + } + case ONNX_TENSOR_TYPE_BFLOAT16: + { + uint16_t * pa = (uint16_t *)A; + uint16_t * pb = (uint16_t *)B; + uint16_t * px = (uint16_t *)X; + float sum; + + for(int u = 0; u < m; u++) + { + for(int v = 0; v < n; v++) + { + sum = 0; + for(int w = 0; w < k; w++) + sum += bfloat16_to_float32(pa[u * k + w]) * bfloat16_to_float32(pb[w * n + v]); + px[u * n + v] = float32_to_bfloat16(sum); + } + } + break; + } + case ONNX_TENSOR_TYPE_FLOAT16: + { + uint16_t * pa = (uint16_t *)A; + uint16_t * pb = (uint16_t *)B; + uint16_t * px = (uint16_t *)X; + float sum; + + for(int u = 0; u < m; u++) + { + for(int v = 0; v < n; v++) + { + sum = 0; + for(int w = 0; w < k; w++) + sum += float16_to_float32(pa[u * k + w]) * float16_to_float32(pb[w * n + v]); + px[u * n + v] = float32_to_float16(sum); + } + } + break; + } + case ONNX_TENSOR_TYPE_FLOAT32: + { + float * pa = (float *)A; + float * pb = (float *)B; + float * px = (float *)X; + float sum; + + for(int u = 0; u < m; u++) + { + for(int v = 0; v < n; v++) + { + sum = 0; + for(int w = 0; w < k; w++) + sum += pa[u * k + w] * pb[w * n + v]; + px[u * n + v] = sum; + } + } + break; + } + case ONNX_TENSOR_TYPE_FLOAT64: + { + double * pa = (double *)A; + double * pb = (double *)B; + double * px = (double *)X; + double sum; + + for(int u = 0; u < m; u++) + { + for(int v = 0; v < n; v++) + { + sum = 0; + for(int w = 0; w < k; w++) + sum += pa[u * k + w] * pb[w * n + v]; + px[u * n + v] = sum; + } + } + break; + } + default: + break; + } +} \ No newline at end of file diff --git a/modules/fnxext/onnx_engine/src/matrix.h b/modules/fnxext/onnx_engine/src/matrix.h new file mode 100644 index 00000000000..2a902348aed --- /dev/null +++ b/modules/fnxext/onnx_engine/src/matrix.h @@ -0,0 +1,16 @@ +#ifndef __MATRIX_H__ +#define __MATRIX_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "onnx.h" + +void matrix_mul(int m, int n, int k, void * A, void * B, void * X, enum onnx_tensor_type_t type); + +#ifdef __cplusplus +} +#endif + +#endif /* __MATRIX_H__ */ \ No newline at end of file diff --git a/modules/fnxext/onnx_engine/src/onnx.c b/modules/fnxext/onnx_engine/src/onnx.c index c79af7d4eb8..a8b035d7d41 100644 --- a/modules/fnxext/onnx_engine/src/onnx.c +++ b/modules/fnxext/onnx_engine/src/onnx.c @@ -28,15 +28,13 @@ #include "onnx.h" #include "default/default.h" -#define ONNX_LOG(...) printf(__VA_ARGS__) - static void hmap_entry_callback(struct hmap_t * m, struct hmap_entry_t * e) { if(e && e->value) onnx_tensor_free((struct onnx_tensor_t *)e->value); } -struct onnx_context_t * onnx_context_alloc(const void * buf, size_t len, struct onnx_resolver_t ** r, int rlen) +struct onnx_context_t * onnx_context_alloc(const void * buf, size_t len, struct onnx_resolver_t ** r, int rlen, struct hmap_t * shape_params) { struct onnx_context_t * ctx; int i; @@ -44,15 +42,15 @@ struct onnx_context_t * onnx_context_alloc(const void * buf, size_t len, struct if(!buf || len <= 0) return NULL; - ctx = malloc(sizeof(struct onnx_context_t)); + ctx = onnx_malloc(sizeof(struct onnx_context_t)); if(!ctx) return NULL; - + ctx->shape_params = shape_params; ctx->model = onnx__model_proto__unpack(NULL, len, buf); if(!ctx->model) { if(ctx) - free(ctx); + onnx_free(ctx); return NULL; } @@ -62,27 +60,27 @@ struct onnx_context_t * onnx_context_alloc(const void * buf, size_t len, struct if(ctx->model) onnx__model_proto__free_unpacked(ctx->model, NULL); if(ctx) - free(ctx); + onnx_free(ctx); return NULL; } ctx->rlen = rlen; if(r && (ctx->rlen > 0)) { - ctx->r = malloc(sizeof(struct onnx_resolver_t *) * ctx->rlen); - ctx->rctx = malloc(sizeof(void *) * ctx->rlen); + ctx->r = onnx_malloc(sizeof(struct onnx_resolver_t *) * ctx->rlen); + ctx->rctx = onnx_malloc(sizeof(void *) * ctx->rlen); if(!ctx->r || !ctx->rctx) { if(ctx->rctx) - free(ctx->rctx); + onnx_free(ctx->rctx); if(ctx->r) - free(ctx->r); + onnx_free(ctx->r); if(ctx->map) hmap_free(ctx->map); if(ctx->model) onnx__model_proto__free_unpacked(ctx->model, NULL); if(ctx) - free(ctx); + onnx_free(ctx); return NULL; } } @@ -108,22 +106,22 @@ struct onnx_context_t * onnx_context_alloc(const void * buf, size_t len, struct ctx->r[i]->destroy(ctx->rctx[i]); } if(ctx->rctx) - free(ctx->rctx); + onnx_free(ctx->rctx); if(ctx->r) - free(ctx->r); + onnx_free(ctx->r); if(ctx->map) hmap_free(ctx->map); if(ctx->model) onnx__model_proto__free_unpacked(ctx->model, NULL); if(ctx) - free(ctx); + onnx_free(ctx); return NULL; } return ctx; } -struct onnx_context_t * onnx_context_alloc_from_file(const char * filename, struct onnx_resolver_t ** r, int rlen) +struct onnx_context_t * onnx_context_alloc_from_file(const char * filename, struct onnx_resolver_t ** r, int rlen, struct hmap_t * shape_params) { struct onnx_context_t * ctx = NULL; FILE * fp; @@ -138,12 +136,12 @@ struct onnx_context_t * onnx_context_alloc_from_file(const char * filename, stru fseek(fp, 0L, SEEK_SET); if(l > 0) { - buf = malloc(l); + buf = onnx_malloc(l); if(buf) { for(len = 0; len < l; len += fread(buf + len, 1, l - len, fp)); - ctx = onnx_context_alloc(buf, len, r, rlen); - free(buf); + ctx = onnx_context_alloc(buf, len, r, rlen, shape_params); + onnx_free(buf); } } fclose(fp); @@ -165,23 +163,23 @@ void onnx_context_free(struct onnx_context_t * ctx) ctx->r[i]->destroy(ctx->rctx[i]); } if(ctx->rctx) - free(ctx->rctx); + onnx_free(ctx->rctx); if(ctx->r) - free(ctx->r); + onnx_free(ctx->r); if(ctx->map) hmap_free(ctx->map); if(ctx->model) onnx__model_proto__free_unpacked(ctx->model, NULL); - free(ctx); + onnx_free(ctx); } } -static struct onnx_tensor_t * onnx_tensor_alloc_from_value_info(Onnx__ValueInfoProto * v) +static struct onnx_tensor_t * onnx_tensor_alloc_from_value_info(struct onnx_context_t * ctx, Onnx__ValueInfoProto * v) { struct onnx_tensor_t * t; enum onnx_tensor_type_t type; int * dims = NULL; - int ndim; + int ndim = 0; int i; if(!v || !v->name) @@ -191,35 +189,44 @@ static struct onnx_tensor_t * onnx_tensor_alloc_from_value_info(Onnx__ValueInfoP { case ONNX__TYPE_PROTO__VALUE_TENSOR_TYPE: type = (enum onnx_tensor_type_t)v->type->tensor_type->elem_type; - ndim = v->type->tensor_type->shape->n_dim; - if(ndim > 0) - { - dims = malloc(sizeof(int) * ndim); - if(dims) + if(v->type->tensor_type->shape) { + ndim = v->type->tensor_type->shape->n_dim; + if(ndim > 0) { - for(i = 0; i < ndim; i++) + dims = onnx_malloc(sizeof(int) * ndim); + if(dims) { - switch(v->type->tensor_type->shape->dim[i]->value_case) + for(i = 0; i < ndim; i++) { - case ONNX__TENSOR_SHAPE_PROTO__DIMENSION__VALUE_DIM_VALUE: - dims[i] = v->type->tensor_type->shape->dim[i]->dim_value; - break; - case ONNX__TENSOR_SHAPE_PROTO__DIMENSION__VALUE_DIM_PARAM: - if(strcmp(v->type->tensor_type->shape->dim[i]->dim_param, "batch_size") == 0) - dims[i] = 1; - else - dims[i] = 1; - break; - default: - dims[i] = 1; - break; + switch(v->type->tensor_type->shape->dim[i]->value_case) + { + case ONNX__TENSOR_SHAPE_PROTO__DIMENSION__VALUE_DIM_VALUE: + dims[i] = v->type->tensor_type->shape->dim[i]->dim_value; + break; + case ONNX__TENSOR_SHAPE_PROTO__DIMENSION__VALUE_DIM_PARAM: + { + const char* dim_param = v->type->tensor_type->shape->dim[i]->dim_param; + if(ctx->shape_params) { + int64_t* value = hmap_search(ctx->shape_params, dim_param); + if(value) { + dims[i] = *value; + break; + } + } + dims[i] = -1; + } + break; + default: + dims[i] = -1; + break; + } } } } } t = onnx_tensor_alloc(v->name, type, dims, ndim); if(dims) - free(dims); + onnx_free(dims); break; case ONNX__TYPE_PROTO__VALUE_SEQUENCE_TYPE: t = NULL; @@ -257,7 +264,7 @@ static void onnx_tensor_copy_from_tensor_proto(struct onnx_tensor_t * t, Onnx__T union { uint32_t u; float f; } v; if(t->ndata > 0) { - n = minn(t->ndata, (size_t)o->raw_data.len / sz); + n = XMIN(t->ndata, (size_t)o->raw_data.len / sz); for(i = 0; i < n; i++) { v.u = le32_to_cpu(q[i]); @@ -272,8 +279,8 @@ static void onnx_tensor_copy_from_tensor_proto(struct onnx_tensor_t * t, Onnx__T uint8_t * q = (uint8_t *)o->raw_data.data; if(t->ndata > 0) { - n = minn(t->ndata, (size_t)o->raw_data.len); - memcpy(p, q, n); + n = XMIN(t->ndata, (size_t)o->raw_data.len); + onnx_memcpy(p, q, n); } } break; @@ -283,8 +290,8 @@ static void onnx_tensor_copy_from_tensor_proto(struct onnx_tensor_t * t, Onnx__T int8_t * q = (int8_t *)o->raw_data.data; if(t->ndata > 0) { - n = minn(t->ndata, (size_t)o->raw_data.len); - memcpy(p, q, n); + n = XMIN(t->ndata, (size_t)o->raw_data.len); + onnx_memcpy(p, q, n); } } break; @@ -294,7 +301,7 @@ static void onnx_tensor_copy_from_tensor_proto(struct onnx_tensor_t * t, Onnx__T uint16_t * q = (uint16_t *)o->raw_data.data; if(t->ndata > 0) { - n = minn(t->ndata, (size_t)o->raw_data.len / sz); + n = XMIN(t->ndata, (size_t)o->raw_data.len / sz); for(i = 0; i < n; i++) p[i] = le16_to_cpu(q[i]); } @@ -306,7 +313,7 @@ static void onnx_tensor_copy_from_tensor_proto(struct onnx_tensor_t * t, Onnx__T int16_t * q = (int16_t *)o->raw_data.data; if(t->ndata > 0) { - n = minn(t->ndata, (size_t)o->raw_data.len / sz); + n = XMIN(t->ndata, (size_t)o->raw_data.len / sz); for(i = 0; i < n; i++) p[i] = le16_to_cpu(q[i]); } @@ -318,7 +325,7 @@ static void onnx_tensor_copy_from_tensor_proto(struct onnx_tensor_t * t, Onnx__T int32_t * q = (int32_t *)o->raw_data.data; if(t->ndata > 0) { - n = minn(t->ndata, (size_t)o->raw_data.len / sz); + n = XMIN(t->ndata, (size_t)o->raw_data.len / sz); for(i = 0; i < n; i++) p[i] = le32_to_cpu(q[i]); } @@ -330,7 +337,7 @@ static void onnx_tensor_copy_from_tensor_proto(struct onnx_tensor_t * t, Onnx__T int64_t * q = (int64_t *)o->raw_data.data; if(t->ndata > 0) { - n = minn(t->ndata, (size_t)o->raw_data.len / sz); + n = XMIN(t->ndata, (size_t)o->raw_data.len / sz); for(i = 0; i < n; i++) p[i] = le64_to_cpu(q[i]); } @@ -344,8 +351,8 @@ static void onnx_tensor_copy_from_tensor_proto(struct onnx_tensor_t * t, Onnx__T uint8_t * q = (uint8_t *)o->raw_data.data; if(t->ndata > 0) { - n = minn(t->ndata, (size_t)o->raw_data.len); - memcpy(p, q, n); + n = XMIN(t->ndata, (size_t)o->raw_data.len); + onnx_memcpy(p, q, n); } } break; @@ -355,7 +362,7 @@ static void onnx_tensor_copy_from_tensor_proto(struct onnx_tensor_t * t, Onnx__T uint16_t * q = (uint16_t *)o->raw_data.data; if(t->ndata > 0) { - n = minn(t->ndata, (size_t)o->raw_data.len / sz); + n = XMIN(t->ndata, (size_t)o->raw_data.len / sz); for(i = 0; i < n; i++) p[i] = le16_to_cpu(q[i]); } @@ -368,7 +375,7 @@ static void onnx_tensor_copy_from_tensor_proto(struct onnx_tensor_t * t, Onnx__T union { uint64_t u; double f; } v; if(t->ndata > 0) { - n = minn(t->ndata, (size_t)o->raw_data.len / sz); + n = XMIN(t->ndata, (size_t)o->raw_data.len / sz); for(i = 0; i < n; i++) { v.u = le64_to_cpu(q[i]); @@ -383,7 +390,7 @@ static void onnx_tensor_copy_from_tensor_proto(struct onnx_tensor_t * t, Onnx__T uint32_t * q = (uint32_t *)o->raw_data.data; if(t->ndata > 0) { - n = minn(t->ndata, (size_t)o->raw_data.len / sz); + n = XMIN(t->ndata, (size_t)o->raw_data.len / sz); for(i = 0; i < n; i++) p[i] = le32_to_cpu(q[i]); } @@ -395,7 +402,7 @@ static void onnx_tensor_copy_from_tensor_proto(struct onnx_tensor_t * t, Onnx__T uint64_t * q = (uint64_t *)o->raw_data.data; if(t->ndata > 0) { - n = minn(t->ndata, (size_t)o->raw_data.len / sz); + n = XMIN(t->ndata, (size_t)o->raw_data.len / sz); for(i = 0; i < n; i++) p[i] = le64_to_cpu(q[i]); } @@ -408,7 +415,7 @@ static void onnx_tensor_copy_from_tensor_proto(struct onnx_tensor_t * t, Onnx__T union { uint32_t u; float f; } v; if(t->ndata > 0) { - n = minn(t->ndata, (size_t)o->raw_data.len / sz) * 2; + n = XMIN(t->ndata, (size_t)o->raw_data.len / sz) * 2; for(i = 0; i < n; i++) { v.u = le32_to_cpu(q[i]); @@ -424,7 +431,7 @@ static void onnx_tensor_copy_from_tensor_proto(struct onnx_tensor_t * t, Onnx__T union { uint64_t u; double f; } v; if(t->ndata > 0) { - n = minn(t->ndata, (size_t)o->raw_data.len / sz) * 2; + n = XMIN(t->ndata, (size_t)o->raw_data.len / sz) * 2; for(i = 0; i < n; i++) { v.u = le64_to_cpu(q[i]); @@ -439,7 +446,7 @@ static void onnx_tensor_copy_from_tensor_proto(struct onnx_tensor_t * t, Onnx__T uint16_t * q = (uint16_t *)o->raw_data.data; if(t->ndata > 0) { - n = minn(t->ndata, (size_t)o->raw_data.len / sz); + n = XMIN(t->ndata, (size_t)o->raw_data.len / sz); for(i = 0; i < n; i++) p[i] = le16_to_cpu(q[i]); } @@ -454,9 +461,9 @@ static void onnx_tensor_copy_from_tensor_proto(struct onnx_tensor_t * t, Onnx__T switch(o->data_type) { case ONNX__TENSOR_PROTO__DATA_TYPE__FLOAT: - n = minn(t->ndata, (size_t)o->n_float_data); + n = XMIN(t->ndata, (size_t)o->n_float_data); if((n > 0) && t->datas && o->float_data) - memcpy(t->datas, o->float_data, sizeof(float) * n); + onnx_memcpy(t->datas, o->float_data, sizeof(float) * n); break; case ONNX__TENSOR_PROTO__DATA_TYPE__UINT8: case ONNX__TENSOR_PROTO__DATA_TYPE__INT8: @@ -467,12 +474,12 @@ static void onnx_tensor_copy_from_tensor_proto(struct onnx_tensor_t * t, Onnx__T case ONNX__TENSOR_PROTO__DATA_TYPE__FLOAT16: case ONNX__TENSOR_PROTO__DATA_TYPE__BFLOAT16: //TODO - n = minn(t->ndata, (size_t)o->n_int32_data); + n = XMIN(t->ndata, (size_t)o->n_int32_data); if((n > 0) && t->datas && o->int32_data) - memcpy(t->datas, o->int32_data, sz * n); + onnx_memcpy(t->datas, o->int32_data, sz * n); break; case ONNX__TENSOR_PROTO__DATA_TYPE__STRING: - n = minn(t->ndata, (size_t)o->n_string_data); + n = XMIN(t->ndata, (size_t)o->n_string_data); if((n > 0) && t->datas && o->string_data) { char ** str = (char **)t->datas; @@ -480,47 +487,47 @@ static void onnx_tensor_copy_from_tensor_proto(struct onnx_tensor_t * t, Onnx__T { if(str[i]) { - free(str[i]); + onnx_free(str[i]); str[i] = NULL; } } for(i = 0; i < n; i++) { - str[i] = malloc(o->string_data[i].len + 1); + str[i] = onnx_malloc(o->string_data[i].len + 1); if(str[i]) { str[i][o->string_data[i].len] = 0; - memcpy(str[i], o->string_data[i].data, o->string_data[i].len); + onnx_memcpy(str[i], o->string_data[i].data, o->string_data[i].len); } } } break; case ONNX__TENSOR_PROTO__DATA_TYPE__INT64: - n = minn(t->ndata, (size_t)o->n_int64_data); + n = XMIN(t->ndata, (size_t)o->n_int64_data); if((n > 0) && t->datas && o->int64_data) - memcpy(t->datas, o->int64_data, sizeof(int64_t) * n); + onnx_memcpy(t->datas, o->int64_data, sizeof(int64_t) * n); break; case ONNX__TENSOR_PROTO__DATA_TYPE__DOUBLE: - n = minn(t->ndata, (size_t)o->n_double_data); + n = XMIN(t->ndata, (size_t)o->n_double_data); if((n > 0) && t->datas && o->double_data) - memcpy(t->datas, o->double_data, sizeof(double) * n); + onnx_memcpy(t->datas, o->double_data, sizeof(double) * n); break; case ONNX__TENSOR_PROTO__DATA_TYPE__UINT32: case ONNX__TENSOR_PROTO__DATA_TYPE__UINT64: //TODO - n = minn(t->ndata, (size_t)o->n_uint64_data); + n = XMIN(t->ndata, (size_t)o->n_uint64_data); if((n > 0) && t->datas && o->uint64_data) - memcpy(t->datas, o->uint64_data, sz * n); + onnx_memcpy(t->datas, o->uint64_data, sz * n); break; case ONNX__TENSOR_PROTO__DATA_TYPE__COMPLEX64: - n = minn(t->ndata, (size_t)(o->n_float_data / 2)); + n = XMIN(t->ndata, (size_t)(o->n_float_data / 2)); if((n > 0) && t->datas && o->float_data) - memcpy(t->datas, o->float_data, sizeof(float) * 2 * n); + onnx_memcpy(t->datas, o->float_data, sizeof(float) * 2 * n); break; case ONNX__TENSOR_PROTO__DATA_TYPE__COMPLEX128: - n = minn(t->ndata, (size_t)(o->n_double_data / 2)); + n = XMIN(t->ndata, (size_t)(o->n_double_data / 2)); if((n > 0) && t->datas && o->double_data) - memcpy(t->datas, o->double_data, sizeof(double) * 2 * n); + onnx_memcpy(t->datas, o->double_data, sizeof(double) * 2 * n); break; default: break; @@ -538,7 +545,7 @@ static int reshape_dummy(struct onnx_node_t * n) static void operator_dummy(struct onnx_node_t * n) { - ONNX_LOG("\033[45;37mUnsupported opset\033[0m => %s-%d (%s)\r\n", n->proto->op_type, n->opset, (strlen(n->proto->domain) > 0) ? n->proto->domain : "ai.onnx"); + onnx_printf("\033[45;37mUnsupported opset\033[0m => %s-%d (%s)\r\n", n->proto->op_type, n->opset, (onnx_strlen(n->proto->domain) > 0) ? n->proto->domain : "ai.onnx"); } static void resolver_solve_operator(struct onnx_resolver_t * r, struct onnx_node_t * n) @@ -591,6 +598,18 @@ static void resolver_solve_operator(struct onnx_resolver_t * r, struct onnx_node case 0x0bfe45a2: /* "BitShift" */ rop = r->op_BitShift; break; + case 0xdadf882f: /* "BitwiseAnd" */ + rop = r->op_BitwiseAnd; + break; + case 0xdadfbfad: /* "BitwiseNot" */ + rop = r->op_BitwiseNot; + break; + case 0xdfd83c3d: /* "BitwiseOr" */ + rop = r->op_BitwiseOr; + break; + case 0xdadfea35: /* "BitwiseXor" */ + rop = r->op_BitwiseXor; + break; case 0x7c8378d0: /* "Cast" */ rop = r->op_Cast; break; @@ -1036,13 +1055,14 @@ static void resolver_solve_operator(struct onnx_resolver_t * r, struct onnx_node case 0x522154a3: /* "SoftmaxCrossEntropyLoss" */ rop = r->op_SoftmaxCrossEntropyLoss; break; - + case 0xbe84199b: /* "DynamicQuantizeLSTM" */ + rop = r->op_DynamicQuantizeLSTM; + break; default: rop = NULL; break; } - if(rop) - rop(n); + n->rop = rop; } } @@ -1060,16 +1080,16 @@ struct onnx_graph_t * onnx_graph_alloc(struct onnx_context_t * ctx, Onnx__GraphP if(!graph) return NULL; - g = malloc(sizeof(struct onnx_graph_t)); + g = onnx_malloc(sizeof(struct onnx_graph_t)); if(!g) return NULL; - memset(g, 0, sizeof(struct onnx_graph_t)); + onnx_memset(g, 0, sizeof(struct onnx_graph_t)); g->nlen = graph->n_node; - g->nodes = malloc(sizeof(struct onnx_node_t) * g->nlen); + g->nodes = onnx_malloc(sizeof(struct onnx_node_t) * g->nlen); if(!g->nodes) { - free(g); + onnx_free(g); return NULL; } @@ -1078,12 +1098,12 @@ struct onnx_graph_t * onnx_graph_alloc(struct onnx_context_t * ctx, Onnx__GraphP v = graph->input[i]; if(!onnx_tensor_search(ctx, v->name)) { - t = onnx_tensor_alloc_from_value_info(v); + t = onnx_tensor_alloc_from_value_info(ctx, v); if(t) { for(j = 0; j < graph->n_initializer; j++) { - if(strcmp(graph->initializer[j]->name, t->name) == 0) + if(onnx_strcmp(graph->initializer[j]->name, t->name) == 0) { onnx_tensor_copy_from_tensor_proto(t, graph->initializer[j]); break; @@ -1099,7 +1119,7 @@ struct onnx_graph_t * onnx_graph_alloc(struct onnx_context_t * ctx, Onnx__GraphP v = graph->output[i]; if(!onnx_tensor_search(ctx, v->name)) { - t = onnx_tensor_alloc_from_value_info(v); + t = onnx_tensor_alloc_from_value_info(ctx, v); if(t) hmap_add(ctx->map, t->name, t); } @@ -1110,7 +1130,7 @@ struct onnx_graph_t * onnx_graph_alloc(struct onnx_context_t * ctx, Onnx__GraphP v = graph->value_info[i]; if(!onnx_tensor_search(ctx, v->name)) { - t = onnx_tensor_alloc_from_value_info(v); + t = onnx_tensor_alloc_from_value_info(ctx, v); if(t) hmap_add(ctx->map, t->name, t); } @@ -1139,7 +1159,7 @@ struct onnx_graph_t * onnx_graph_alloc(struct onnx_context_t * ctx, Onnx__GraphP { for(k = 0; k < graph->n_initializer; k++) { - if(strcmp(graph->initializer[k]->name, name) == 0) + if(onnx_strcmp(graph->initializer[k]->name, name) == 0) { o = graph->initializer[k]; if(o) @@ -1158,13 +1178,6 @@ struct onnx_graph_t * onnx_graph_alloc(struct onnx_context_t * ctx, Onnx__GraphP } } } - if(!onnx_tensor_search(ctx, name)) - { - if(g->nodes) - free(g->nodes); - free(g); - return NULL; - } } } } @@ -1172,19 +1185,20 @@ struct onnx_graph_t * onnx_graph_alloc(struct onnx_context_t * ctx, Onnx__GraphP for(i = 0; i < g->nlen; i++) { n = &g->nodes[i]; - memset(n, 0, sizeof(struct onnx_node_t)); + onnx_memset(n, 0, sizeof(struct onnx_node_t)); n->ctx = ctx; n->proto = graph->node[i]; + n->index = i; domain = n->proto->domain; - if(!domain || (strlen(domain) == 0)) + if(!domain || (onnx_strlen(domain) == 0)) domain = "ai.onnx"; for(j = 0; j < ctx->model->n_opset_import; j++) { p = ctx->model->opset_import[j]->domain; - if(!p || (strlen(p) == 0)) + if(!p || (onnx_strlen(p) == 0)) p = "ai.onnx"; - if(strcmp(domain, p) == 0) + if(onnx_strcmp(domain, p) == 0) { n->opset = ctx->model->opset_import[j]->version; break; @@ -1192,17 +1206,27 @@ struct onnx_graph_t * onnx_graph_alloc(struct onnx_context_t * ctx, Onnx__GraphP } if(n->proto->n_input > 0) { - n->inputs = malloc(sizeof(struct onnx_tensor_t *) * n->proto->n_input); + if (n->index == 23) { + n->index = 23; + } + + + + n->inputs = onnx_malloc(sizeof(struct onnx_tensor_t *) * n->proto->n_input); if(n->inputs) { n->ninput = n->proto->n_input; - for(j = 0; j < n->ninput; j++) + for(j = 0; j < n->ninput; j++) + { + n->inputs[j] = onnx_tensor_search(ctx, n->proto->input[j]); + + } } } if(n->proto->n_output > 0) { - n->outputs = malloc(sizeof(struct onnx_tensor_t *) * n->proto->n_output); + n->outputs = onnx_malloc(sizeof(struct onnx_tensor_t *) * n->proto->n_output); if(n->outputs) { n->noutput = n->proto->n_output; @@ -1213,58 +1237,80 @@ struct onnx_graph_t * onnx_graph_alloc(struct onnx_context_t * ctx, Onnx__GraphP for(j = 0; j < ctx->rlen; j++) { resolver_solve_operator(ctx->r[j], n); - if(n->operator_) + if(n->rop) { n->r = ctx->r[j]; n->rctx = ctx->rctx[j]; break; } } - if(!n->operator_) + if(!n->rop) { resolver_solve_operator(&resolver_default, n); - if(n->operator_) + if(n->rop) { n->r = &resolver_default; n->rctx = NULL; } } - if(!n->reshape) - n->reshape = reshape_dummy; - if(!n->operator_) - n->operator_ = operator_dummy; - if(n->init) - { - if(n->init(n) <= 0) + + if (initialize_input_states(n) <= 0) { + if(g->nodes) { - if(g->nodes) + for(j = 0; j <= i; j++) { - for(j = 0; j <= i; j++) - { - n = &g->nodes[j]; - if(n->exit) - n->exit(n); - if(n->inputs) - free(n->inputs); - if(n->outputs) - free(n->outputs); - } - free(g->nodes); + free_node(&g->nodes[j]); } - free(g); - return NULL; + onnx_free(g->nodes); } + onnx_free(g); + return NULL; } - if(n->reshape) - n->reshape(n); } return g; } +void free_node(struct onnx_node_t * n) { + if(n->exit) + n->exit(n); + + if(n->inputs) + onnx_free(n->inputs); + if(n->outputs) + onnx_free(n->outputs); + + if(n->last_input_states) + { + for(int j = 0; j < n->ninput; j++) + { + if(n->last_input_states[j].dims) + onnx_free(n->last_input_states[j].dims); + } + onnx_free(n->last_input_states); + } + +} + +int is_all_inputs_ready(struct onnx_node_t * n) +{ + if(n->ninput > 0) + { + for(int i = 0; i < n->ninput; i++) + { + struct onnx_tensor_t * x = n->inputs[i]; + if (!x) + continue; + if(!x || x->type == ONNX_TENSOR_TYPE_UNDEFINED || x->ndata == 0) { + return 0; + } + } + } + return 1; +} + void onnx_graph_free(struct onnx_graph_t * g) { - struct onnx_node_t * n; int i; if(g) @@ -1273,23 +1319,18 @@ void onnx_graph_free(struct onnx_graph_t * g) { for(i = 0; i < g->nlen; i++) { - n = &g->nodes[i]; - if(n->exit) - n->exit(n); - if(n->inputs) - free(n->inputs); - if(n->outputs) - free(n->outputs); + free_node(&g->nodes[i]); + } - free(g->nodes); + onnx_free(g->nodes); } - free(g); + onnx_free(g); } } const char * onnx_tensor_type_tostring(enum onnx_tensor_type_t type) { - static const char * typestr[17] = { + static const char * typestr[] = { "undefined", "float32", "uint8", @@ -1307,6 +1348,12 @@ const char * onnx_tensor_type_tostring(enum onnx_tensor_type_t type) "complex64", "complex128", "bfloat16", + "float8e4m3fn", + "float8e4m3fnuz", + "float8e5m2", + "float8e5m2fnuz", + "uint4", + "int4", }; if((type > 0) && (type < (sizeof(typestr) / sizeof((typestr)[0])))) return typestr[type]; @@ -1315,7 +1362,7 @@ const char * onnx_tensor_type_tostring(enum onnx_tensor_type_t type) int onnx_tensor_type_sizeof(enum onnx_tensor_type_t type) { - static const int typesz[17] = { + static const int typesz[] = { 0, sizeof(float), sizeof(uint8_t), @@ -1333,6 +1380,12 @@ int onnx_tensor_type_sizeof(enum onnx_tensor_type_t type) sizeof(float) * 2, sizeof(double) * 2, sizeof(uint16_t), + sizeof(uint8_t), + sizeof(uint8_t), + sizeof(uint8_t), + sizeof(uint8_t), + sizeof(uint8_t), + sizeof(uint8_t), }; if((type > 0) && (type < (sizeof(typesz) / sizeof((typesz)[0])))) return typesz[type]; @@ -1353,12 +1406,12 @@ struct onnx_tensor_t * onnx_tensor_alloc(const char * name, enum onnx_tensor_typ if(!name) return NULL; - t = malloc(sizeof(struct onnx_tensor_t)); + t = onnx_malloc(sizeof(struct onnx_tensor_t)); if(!t) return NULL; - memset(t, 0, sizeof(struct onnx_tensor_t)); + onnx_memset(t, 0, sizeof(struct onnx_tensor_t)); - t->name = strdup(name); + t->name = onnx_strdup(name); onnx_tensor_reinit(t, type, dims, ndim); return t; } @@ -1382,17 +1435,17 @@ struct onnx_tensor_t * onnx_tensor_alloc_from_file(const char * filename) fseek(fp, 0L, SEEK_SET); if(l > 0) { - buf = malloc(l); + buf = onnx_malloc(l); if(buf) { for(len = 0; len < l; len += fread(buf + len, 1, l - len, fp)); pb = onnx__tensor_proto__unpack(NULL, len, buf); - free(buf); + onnx_free(buf); if(pb) { if(pb->n_dims > 0) { - dims = malloc(sizeof(int) * pb->n_dims); + dims = onnx_malloc(sizeof(int) * pb->n_dims); if(dims) { for(i = 0; i < pb->n_dims; i++) @@ -1402,7 +1455,7 @@ struct onnx_tensor_t * onnx_tensor_alloc_from_file(const char * filename) } t = onnx_tensor_alloc(pb->name, (enum onnx_tensor_type_t)pb->data_type, dims, ndim); if((ndim > 0) && dims) - free(dims); + onnx_free(dims); onnx_tensor_copy_from_tensor_proto(t, pb); onnx__tensor_proto__free_unpacked(pb, NULL); } @@ -1420,13 +1473,13 @@ void onnx_tensor_free(struct onnx_tensor_t * t) if(t) { if(t->name) - free(t->name); + onnx_free(t->name); if(t->ndim > 0) { if(t->strides) - free(t->strides); + onnx_free(t->strides); if(t->dims) - free(t->dims); + onnx_free(t->dims); } if((t->ndata > 0) && t->datas) { @@ -1436,12 +1489,12 @@ void onnx_tensor_free(struct onnx_tensor_t * t) for(size_t idx = 0; idx < t->ndata; idx++) { if(str[idx]) - free(str[idx]); + onnx_free(str[idx]); } } - free(t->datas); + onnx_free(t->datas); } - free(t); + onnx_free(t); } } @@ -1465,10 +1518,12 @@ int onnx_tensor_equal(struct onnx_tensor_t * a, struct onnx_tensor_t * b) switch(a->type) { case ONNX_TENSOR_TYPE_BOOL: + case ONNX_TENSOR_TYPE_INT4: case ONNX_TENSOR_TYPE_INT8: case ONNX_TENSOR_TYPE_INT16: case ONNX_TENSOR_TYPE_INT32: case ONNX_TENSOR_TYPE_INT64: + case ONNX_TENSOR_TYPE_UINT4: case ONNX_TENSOR_TYPE_UINT8: case ONNX_TENSOR_TYPE_UINT16: case ONNX_TENSOR_TYPE_UINT32: @@ -1476,6 +1531,13 @@ int onnx_tensor_equal(struct onnx_tensor_t * a, struct onnx_tensor_t * b) if(memcmp(a->datas, b->datas, a->ndata * onnx_tensor_type_sizeof(a->type)) != 0) return 0; break; + case ONNX_TENSOR_TYPE_FLOAT8E4M3FN: + case ONNX_TENSOR_TYPE_FLOAT8E4M3FNUZ: + case ONNX_TENSOR_TYPE_FLOAT8E5M2: + case ONNX_TENSOR_TYPE_FLOAT8E5M2FNUZ: + if(memcmp(a->datas, b->datas, a->ndata * onnx_tensor_type_sizeof(a->type)) != 0) + return 0; + break; case ONNX_TENSOR_TYPE_BFLOAT16: { uint16_t * p = (uint16_t *)a->datas; @@ -1548,7 +1610,7 @@ int onnx_tensor_equal(struct onnx_tensor_t * a, struct onnx_tensor_t * b) char ** q = (char **)b->datas; for(i = 0; i < a->ndata; i++) { - if(p[i] && q[i] && (strcmp(p[i], q[i]) != 0)) + if(p[i] && q[i] && (onnx_strcmp(p[i], q[i]) != 0)) return 0; } } @@ -1571,12 +1633,12 @@ void onnx_tensor_reinit(struct onnx_tensor_t * t, enum onnx_tensor_type_t type, { if(t->strides) { - free(t->strides); + onnx_free(t->strides); t->strides = NULL; } if(t->dims) { - free(t->dims); + onnx_free(t->dims); t->dims = NULL; } t->ndim = 0; @@ -1590,12 +1652,12 @@ void onnx_tensor_reinit(struct onnx_tensor_t * t, enum onnx_tensor_type_t type, { if(str[idx]) { - free(str[idx]); + onnx_free(str[idx]); str[idx] = NULL; } } } - free(t->datas); + onnx_free(t->datas); t->datas = NULL; t->ndata = 0; } @@ -1606,27 +1668,32 @@ void onnx_tensor_reinit(struct onnx_tensor_t * t, enum onnx_tensor_type_t type, { for(i = 0; i < ndim; i++) { - if(dims[i] <= 0) + if(dims[i] < 0 && dims[i] != -1) return; } - t->strides = malloc(sizeof(int) * ndim); - t->dims = malloc(sizeof(int) * ndim); + t->strides = onnx_malloc(sizeof(int) * ndim); + t->dims = onnx_malloc(sizeof(int) * ndim); if(t->strides && t->dims) { t->strides[ndim - 1] = 1; for(i = ndim - 2; i >= 0; i--) - t->strides[i] = dims[i + 1] * t->strides[i + 1]; - memcpy(t->dims, dims, sizeof(int) * ndim); + { + if(dims[i+1] > 0) + t->strides[i] = dims[i + 1] * t->strides[i + 1]; + else + t->strides[i] = t->strides[i + 1]; + } + onnx_memcpy(t->dims, dims, sizeof(int) * ndim); t->ndim = ndim; for(i = 0, n = 1; i < t->ndim; i++) - n *= t->dims[i]; + n *= (t->dims[i] > 0) ? t->dims[i] : 1; sz = onnx_tensor_type_sizeof(t->type); if(sz > 0) { - t->datas = malloc(n * sz); + t->datas = onnx_malloc(n * sz); if(t->datas) { - memset(t->datas, 0, n * sz); + onnx_memset(t->datas, 0, n * sz); t->ndata = n; } } @@ -1635,12 +1702,12 @@ void onnx_tensor_reinit(struct onnx_tensor_t * t, enum onnx_tensor_type_t type, { if(t->strides) { - free(t->strides); + onnx_free(t->strides); t->strides = NULL; } if(t->dims) { - free(t->dims); + onnx_free(t->dims); t->dims = NULL; } } @@ -1650,10 +1717,10 @@ void onnx_tensor_reinit(struct onnx_tensor_t * t, enum onnx_tensor_type_t type, sz = onnx_tensor_type_sizeof(t->type); if(sz > 0) { - t->datas = malloc(sz); + t->datas = onnx_malloc(sz); if(t->datas) { - memset(t->datas, 0, sz); + onnx_memset(t->datas, 0, sz); t->ndata = 1; } } @@ -1682,19 +1749,19 @@ void onnx_tensor_apply(struct onnx_tensor_t * t, void * buf, size_t len) { if(p[idx]) { - free(p[idx]); + onnx_free(p[idx]); p[idx] = NULL; } } - l = minn(t->ndata, (size_t)len); + l = XMIN(t->ndata, (size_t)len); for(size_t idx = 0; idx < l; idx++) - p[idx] = strdup(q[idx]); + p[idx] = onnx_strdup(q[idx]); } else { l = t->ndata * sz; if(l > 0) - memcpy(t->datas, buf, minn(l, len)); + onnx_memcpy(t->datas, buf, XMIN(l, len)); } } } @@ -1711,7 +1778,7 @@ static Onnx__AttributeProto * onnx_search_attribute(struct onnx_node_t * n, cons for(i = 0; i < n->proto->n_attribute; i++) { attr = n->proto->attribute[i]; - if(strcmp(attr->name, name) == 0) + if(onnx_strcmp(attr->name, name) == 0) return attr; } } @@ -1751,6 +1818,40 @@ char * onnx_attribute_read_string(struct onnx_node_t * n, const char * name, cha return def; } +int onnx_attribute_read_strings(struct onnx_node_t * n, const char * name, char ***strings) +{ + Onnx__AttributeProto * attr = onnx_search_attribute(n, name); + + if(attr && (attr->type == ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__STRINGS)) + { + if(attr->n_strings > 0 && attr->strings) + { + *strings = onnx_malloc(sizeof(char*) * attr->n_strings); + if(*strings) + { + for(size_t i = 0; i < attr->n_strings; i++) + { + if(attr->strings[i].len > 0) + { + (*strings)[i] = onnx_malloc(attr->strings[i].len + 1); + if((*strings)[i]) + { + onnx_memcpy((*strings)[i], attr->strings[i].data, attr->strings[i].len); + (*strings)[i][attr->strings[i].len] = '\0'; + } + } + else + { + (*strings)[i] = NULL; + } + } + return attr->n_strings; + } + } + } + return 0; +} + int onnx_attribute_read_floats(struct onnx_node_t * n, const char * name, float ** floats) { Onnx__AttributeProto * attr = onnx_search_attribute(n, name); @@ -1788,7 +1889,7 @@ int onnx_attribute_read_tensor(struct onnx_node_t * n, const char * name, struct { if(attr->t->n_dims > 0) { - dims = malloc(sizeof(int) * attr->t->n_dims); + dims = onnx_malloc(sizeof(int) * attr->t->n_dims); if(dims) { for(i = 0; i < attr->t->n_dims; i++) @@ -1799,7 +1900,7 @@ int onnx_attribute_read_tensor(struct onnx_node_t * n, const char * name, struct if((t->ndim != ndim) || (memcmp(t->dims, dims, sizeof(int) * ndim) != 0) || (t->type != (enum onnx_tensor_type_t)attr->t->data_type)) onnx_tensor_reinit(t, (enum onnx_tensor_type_t)attr->t->data_type, dims, ndim); if((ndim > 0) && dims) - free(dims); + onnx_free(dims); onnx_tensor_copy_from_tensor_proto(t, attr->t); return 1; } @@ -1841,31 +1942,32 @@ void onnx_tensor_dump(struct onnx_tensor_t * t, int detail) if(t) { - ONNX_LOG("%s: %s", t->name, onnx_tensor_type_tostring(t->type)); + onnx_printf("%s: %s", t->name, onnx_tensor_type_tostring(t->type)); if(t->ndim > 0) { - ONNX_LOG("["); + onnx_printf("["); for(i = 0; i < t->ndim; i++) { - ONNX_LOG("%d", t->dims[i]); + onnx_printf("%d", t->dims[i]); if(i != t->ndim - 1) - ONNX_LOG(" x "); + onnx_printf(" x "); } - ONNX_LOG("]"); + onnx_printf("]"); + int print_limit = 100; if(detail) { - ONNX_LOG(" = \r\n"); + onnx_printf(" = \r\n"); for(i = 0; i < t->ndim; i++) { if(t->dims[i] <= 0) return; } - sizes = malloc(sizeof(int) * t->ndim); - levels = malloc(sizeof(int) * t->ndim); + sizes = onnx_malloc(sizeof(int) * t->ndim); + levels = onnx_malloc(sizeof(int) * t->ndim); sizes[t->ndim - 1] = t->dims[t->ndim - 1]; levels[t->ndim - 1] = 0; - lbuf = malloc(sizeof(char) * (t->ndim + 1)); - rbuf = malloc(sizeof(char) * (t->ndim + 1)); + lbuf = onnx_malloc(sizeof(char) * (t->ndim + 1)); + rbuf = onnx_malloc(sizeof(char) * (t->ndim + 1)); lp = lbuf; rp = rbuf; for(i = t->ndim - 2; i >= 0; i--) @@ -1873,7 +1975,7 @@ void onnx_tensor_dump(struct onnx_tensor_t * t, int detail) sizes[i] = t->dims[i] * sizes[i + 1]; levels[i] = 0; } - for(size_t idx = 0; idx < t->ndata; idx++) + for(size_t idx = 0; (idx < t->ndata) && (print_limit > 0); idx++) { for(j = 0; j < t->ndim; j++) { @@ -1899,154 +2001,155 @@ void onnx_tensor_dump(struct onnx_tensor_t * t, int detail) } } *lp = *rp = '\0'; - ONNX_LOG("%s", rbuf); + onnx_printf("%s", rbuf); if(*rbuf != '\0') { - ONNX_LOG("\r\n"); - for(k = t->ndim - strlen(rbuf); k > 0; k--) - ONNX_LOG(" "); + onnx_printf("\r\n"); + for(k = t->ndim - onnx_strlen(rbuf); k > 0; k--) + onnx_printf(" "); } - ONNX_LOG("%s", lbuf); + onnx_printf("%s", lbuf); if(*lbuf == '\0') - ONNX_LOG(" "); + onnx_printf(" "); p = (void *)(t->datas + onnx_tensor_type_sizeof(t->type) * idx); switch(t->type) { case ONNX_TENSOR_TYPE_BOOL: - ONNX_LOG("%s,", *((uint8_t *)p) ? "true" : "false"); + onnx_printf("%s,", *((uint8_t *)p) ? "true" : "false"); break; case ONNX_TENSOR_TYPE_INT8: - ONNX_LOG("%d,", *((int8_t *)p)); + onnx_printf("%d,", *((int8_t *)p)); break; case ONNX_TENSOR_TYPE_INT16: - ONNX_LOG("%d,", *((int16_t *)p)); + onnx_printf("%d,", *((int16_t *)p)); break; case ONNX_TENSOR_TYPE_INT32: - ONNX_LOG("%d,", *((int32_t *)p)); + onnx_printf("%d,", *((int32_t *)p)); break; case ONNX_TENSOR_TYPE_INT64: - ONNX_LOG("%ld,", *((int64_t *)p)); + onnx_printf("%ld,", *((int64_t *)p)); break; case ONNX_TENSOR_TYPE_UINT8: - ONNX_LOG("%u,", *((uint8_t *)p)); + onnx_printf("%u,", *((uint8_t *)p)); break; case ONNX_TENSOR_TYPE_UINT16: - ONNX_LOG("%u,", *((uint16_t *)p)); + onnx_printf("%u,", *((uint16_t *)p)); break; case ONNX_TENSOR_TYPE_UINT32: - ONNX_LOG("%u,", *((uint32_t *)p)); + onnx_printf("%u,", *((uint32_t *)p)); break; case ONNX_TENSOR_TYPE_UINT64: - ONNX_LOG("%lu,", *((uint64_t *)p)); + onnx_printf("%lu,", *((uint64_t *)p)); break; case ONNX_TENSOR_TYPE_BFLOAT16: - ONNX_LOG("%g,", bfloat16_to_float32(*((uint16_t *)p))); + onnx_printf("%g,", bfloat16_to_float32(*((uint16_t *)p))); break; case ONNX_TENSOR_TYPE_FLOAT16: - ONNX_LOG("%g,", float16_to_float32(*((uint16_t *)p))); + onnx_printf("%g,", float16_to_float32(*((uint16_t *)p))); break; case ONNX_TENSOR_TYPE_FLOAT32: - ONNX_LOG("%g,", *((float *)p)); + onnx_printf("%g,", *((float *)p)); break; case ONNX_TENSOR_TYPE_FLOAT64: - ONNX_LOG("%g,", *((double *)p)); + onnx_printf("%g,", *((double *)p)); break; case ONNX_TENSOR_TYPE_COMPLEX64: - ONNX_LOG("%g + %gi,", *((float *)p), *((float *)(p + sizeof(float)))); + onnx_printf("%g + %gi,", *((float *)p), *((float *)(p + sizeof(float)))); break; case ONNX_TENSOR_TYPE_COMPLEX128: - ONNX_LOG("%g + %gi,", *((double *)p), *((double *)(p + sizeof(double)))); + onnx_printf("%g + %gi,", *((double *)p), *((double *)(p + sizeof(double)))); break; case ONNX_TENSOR_TYPE_STRING: - ONNX_LOG("%s,", (char *)(((char **)p)[0])); + onnx_printf("%s,", (char *)(((char **)p)[0])); break; default: - ONNX_LOG("?,"); + onnx_printf("?,"); break; } lp = lbuf; rp = rbuf; + print_limit--; } for(j = 0; j < t->ndim; j++) - ONNX_LOG("]"); - free(sizes); - free(levels); - free(lbuf); - free(rbuf); - ONNX_LOG("\r\n"); + onnx_printf("]"); + onnx_free(sizes); + onnx_free(levels); + onnx_free(lbuf); + onnx_free(rbuf); + onnx_printf("\r\n"); } else { - ONNX_LOG(" = "); - ONNX_LOG("[...]"); - ONNX_LOG("\r\n"); + onnx_printf(" = "); + onnx_printf("[...]"); + onnx_printf("\r\n"); } } else if(t->ndata == 1) { - ONNX_LOG(" = "); + onnx_printf(" = "); p = (void *)(t->datas); switch(t->type) { case ONNX_TENSOR_TYPE_BOOL: - ONNX_LOG("%s", *((uint8_t *)p) ? "true" : "false"); + onnx_printf("%s", *((uint8_t *)p) ? "true" : "false"); break; case ONNX_TENSOR_TYPE_INT8: - ONNX_LOG("%d", *((int8_t *)p)); + onnx_printf("%d", *((int8_t *)p)); break; case ONNX_TENSOR_TYPE_INT16: - ONNX_LOG("%d", *((int16_t *)p)); + onnx_printf("%d", *((int16_t *)p)); break; case ONNX_TENSOR_TYPE_INT32: - ONNX_LOG("%d", *((int32_t *)p)); + onnx_printf("%d", *((int32_t *)p)); break; case ONNX_TENSOR_TYPE_INT64: - ONNX_LOG("%ld", *((int64_t *)p)); + onnx_printf("%ld", *((int64_t *)p)); break; case ONNX_TENSOR_TYPE_UINT8: - ONNX_LOG("%u", *((uint8_t *)p)); + onnx_printf("%u", *((uint8_t *)p)); break; case ONNX_TENSOR_TYPE_UINT16: - ONNX_LOG("%u", *((uint16_t *)p)); + onnx_printf("%u", *((uint16_t *)p)); break; case ONNX_TENSOR_TYPE_UINT32: - ONNX_LOG("%u", *((uint32_t *)p)); + onnx_printf("%u", *((uint32_t *)p)); break; case ONNX_TENSOR_TYPE_UINT64: - ONNX_LOG("%lu", *((uint64_t *)p)); + onnx_printf("%lu", *((uint64_t *)p)); break; case ONNX_TENSOR_TYPE_BFLOAT16: - ONNX_LOG("%g", bfloat16_to_float32(*((uint16_t *)p))); + onnx_printf("%g", bfloat16_to_float32(*((uint16_t *)p))); break; case ONNX_TENSOR_TYPE_FLOAT16: - ONNX_LOG("%g", float16_to_float32(*((uint16_t *)p))); + onnx_printf("%g", float16_to_float32(*((uint16_t *)p))); break; case ONNX_TENSOR_TYPE_FLOAT32: - ONNX_LOG("%g", *((float *)p)); + onnx_printf("%g", *((float *)p)); break; case ONNX_TENSOR_TYPE_FLOAT64: - ONNX_LOG("%g", *((double *)p)); + onnx_printf("%g", *((double *)p)); break; case ONNX_TENSOR_TYPE_COMPLEX64: - ONNX_LOG("%g + %gi", *((float *)p), *((float *)(p + sizeof(float)))); + onnx_printf("%g + %gi", *((float *)p), *((float *)(p + sizeof(float)))); break; case ONNX_TENSOR_TYPE_COMPLEX128: - ONNX_LOG("%g + %gi", *((double *)p), *((double *)(p + sizeof(double)))); + onnx_printf("%g + %gi", *((double *)p), *((double *)(p + sizeof(double)))); break; case ONNX_TENSOR_TYPE_STRING: - ONNX_LOG("%s", (char *)(((char **)p)[0])); + onnx_printf("%s", (char *)(((char **)p)[0])); break; default: - ONNX_LOG("?"); + onnx_printf("?"); break; } - ONNX_LOG("\r\n"); + onnx_printf("\r\n"); } else { - ONNX_LOG(" = "); - ONNX_LOG("null"); - ONNX_LOG("\r\n"); + onnx_printf(" = "); + onnx_printf("null"); + onnx_printf("\r\n"); } } } @@ -2057,22 +2160,22 @@ void onnx_node_dump(struct onnx_node_t * n, int detail) if(n) { - ONNX_LOG("%s: %s-%d (%s)\r\n", n->proto->name, n->proto->op_type, n->opset, (strlen(n->proto->domain) > 0) ? n->proto->domain : "ai.onnx"); + onnx_printf("%s: %s-%d (%s)\r\n", n->proto->name, n->proto->op_type, n->opset, (onnx_strlen(n->proto->domain) > 0) ? n->proto->domain : "ai.onnx"); if(n->ninput > 0) { - ONNX_LOG("\tInputs:\r\n"); + onnx_printf("\tInputs:\r\n"); for(i = 0; i < n->ninput; i++) { - ONNX_LOG("\t\t"); + onnx_printf("\t\t"); onnx_tensor_dump(n->inputs[i], detail); } } if(n->noutput > 0) { - ONNX_LOG("\tOutputs:\r\n"); + onnx_printf("\tOutputs:\r\n"); for(i = 0; i < n->noutput; i++) { - ONNX_LOG("\t\t"); + onnx_printf("\t\t"); onnx_tensor_dump(n->outputs[i], detail); } } @@ -2098,18 +2201,96 @@ void onnx_context_dump(struct onnx_context_t * ctx, int detail) { if(ctx->model) { - ONNX_LOG("IR Version: v%ld\r\n", ctx->model->ir_version); - ONNX_LOG("Producer: %s %s\r\n", ctx->model->producer_name, ctx->model->producer_version); - ONNX_LOG("Domain: %s\r\n", ctx->model->domain); - ONNX_LOG("Imports:\r\n"); + onnx_printf("IR Version: v%ld\r\n", ctx->model->ir_version); + onnx_printf("Producer: %s %s\r\n", ctx->model->producer_name, ctx->model->producer_version); + onnx_printf("Domain: %s\r\n", ctx->model->domain); + onnx_printf("Imports:\r\n"); for(i = 0; i < ctx->model->n_opset_import; i++) - ONNX_LOG("\t%s v%ld\r\n", (strlen(ctx->model->opset_import[i]->domain) > 0) ? ctx->model->opset_import[i]->domain : "ai.onnx", ctx->model->opset_import[i]->version); + onnx_printf("\t%s v%ld\r\n", (onnx_strlen(ctx->model->opset_import[i]->domain) > 0) ? ctx->model->opset_import[i]->domain : "ai.onnx", ctx->model->opset_import[i]->version); } if(ctx->g) onnx_graph_dump(ctx->g, detail); } } + +int initialize_input_states(struct onnx_node_t * n) +{ + if (!n->last_input_states && n->ninput > 0) { + n->last_input_states = onnx_malloc(sizeof(struct onnx_input_state_t) * n->ninput); + if (!n->last_input_states) { + return 0; + } + + for (int i = 0; i < n->ninput; i++) { + n->last_input_states[i].type = ONNX_TENSOR_TYPE_UNDEFINED; + n->last_input_states[i].dims = NULL; + n->last_input_states[i].ndim = -1; + } + } + return 1; +} + + +int have_inputs_changed(struct onnx_node_t * n) +{ + for (int i = 0; i < n->ninput; i++) { + struct onnx_tensor_t * input = n->inputs[i]; + struct onnx_input_state_t * state = &n->last_input_states[i]; + + if (!input) + continue; + + if (input->type != state->type) { + return 1; + } + + if (input->ndim != state->ndim) { + return 1; + } + + for (int j = 0; j < input->ndim; j++) { + if (input->dims[j] != state->dims[j]) { + return 1; + } + } + } + + return 0; +} + + +static void update_input_states(struct onnx_node_t * n) +{ + + for (int i = 0; i < n->ninput; i++) { + struct onnx_tensor_t * input = n->inputs[i]; + struct onnx_input_state_t * state = &n->last_input_states[i]; + + if (input) + state->type = input->type; + + if (state->dims) { + onnx_free(state->dims); + } + + if (input && input->ndim > 0) { + state->ndim = input->ndim; + state->dims = onnx_malloc(sizeof(int) * input->ndim); + if (state->dims) { + for (int j = 0; j < input->ndim; j++) { + state->dims[j] = input->dims[j]; + } + } + } else { + state->ndim = 0; + state->dims = NULL; + } + } + +} + + void onnx_run(struct onnx_context_t * ctx) { struct onnx_node_t * n; @@ -2120,8 +2301,36 @@ void onnx_run(struct onnx_context_t * ctx) for(i = 0; i < ctx->g->nlen; i++) { n = &ctx->g->nodes[i]; - if(n->reshape(n)) - n->operator_(n); + + + onnx_printf("Node %s\r\n", n->proto->op_type); + + if (have_inputs_changed(n) || !n->initialized) { + if (n->exit) { + n->exit(n); + } + n->initialized = 1; + if (n->rop) + n->rop(n); + else { + n->reshape = reshape_dummy; + n->operator_ = operator_dummy; + } + if(n->init) + n->init(n); + update_input_states(n); + } + + if (is_all_inputs_ready(n)) { + if (n->reshape && n->reshape(n)) { + n->operator_(n); + } else { + onnx_printf("Reshape problem"); + } + } else { + onnx_printf("Not all inputs are ready for node %s\r\n", n->proto->op_type); + } + onnx_tensor_dump(n->outputs[0], 1); } } } diff --git a/modules/fnxext/onnx_engine/src/onnx.h b/modules/fnxext/onnx_engine/src/onnx.h index 3b090782e76..3d6cee3ed4e 100644 --- a/modules/fnxext/onnx_engine/src/onnx.h +++ b/modules/fnxext/onnx_engine/src/onnx.h @@ -20,23 +20,29 @@ struct onnx_context_t; struct onnx_resolver_t; enum onnx_tensor_type_t { - ONNX_TENSOR_TYPE_UNDEFINED = 0, - ONNX_TENSOR_TYPE_BOOL = 9, - ONNX_TENSOR_TYPE_INT8 = 3, - ONNX_TENSOR_TYPE_INT16 = 5, - ONNX_TENSOR_TYPE_INT32 = 6, - ONNX_TENSOR_TYPE_INT64 = 7, - ONNX_TENSOR_TYPE_UINT8 = 2, - ONNX_TENSOR_TYPE_UINT16 = 4, - ONNX_TENSOR_TYPE_UINT32 = 12, - ONNX_TENSOR_TYPE_UINT64 = 13, - ONNX_TENSOR_TYPE_BFLOAT16 = 16, - ONNX_TENSOR_TYPE_FLOAT16 = 10, - ONNX_TENSOR_TYPE_FLOAT32 = 1, - ONNX_TENSOR_TYPE_FLOAT64 = 11, - ONNX_TENSOR_TYPE_COMPLEX64 = 14, - ONNX_TENSOR_TYPE_COMPLEX128 = 15, - ONNX_TENSOR_TYPE_STRING = 8, + ONNX_TENSOR_TYPE_UNDEFINED = 0, + ONNX_TENSOR_TYPE_BOOL = 9, + ONNX_TENSOR_TYPE_INT4 = 22, + ONNX_TENSOR_TYPE_INT8 = 3, + ONNX_TENSOR_TYPE_INT16 = 5, + ONNX_TENSOR_TYPE_INT32 = 6, + ONNX_TENSOR_TYPE_INT64 = 7, + ONNX_TENSOR_TYPE_UINT4 = 21, + ONNX_TENSOR_TYPE_UINT8 = 2, + ONNX_TENSOR_TYPE_UINT16 = 4, + ONNX_TENSOR_TYPE_UINT32 = 12, + ONNX_TENSOR_TYPE_UINT64 = 13, + ONNX_TENSOR_TYPE_FLOAT8E4M3FN = 17, + ONNX_TENSOR_TYPE_FLOAT8E4M3FNUZ = 18, + ONNX_TENSOR_TYPE_FLOAT8E5M2 = 19, + ONNX_TENSOR_TYPE_FLOAT8E5M2FNUZ = 20, + ONNX_TENSOR_TYPE_BFLOAT16 = 16, + ONNX_TENSOR_TYPE_FLOAT16 = 10, + ONNX_TENSOR_TYPE_FLOAT32 = 1, + ONNX_TENSOR_TYPE_FLOAT64 = 11, + ONNX_TENSOR_TYPE_COMPLEX64 = 14, + ONNX_TENSOR_TYPE_COMPLEX128 = 15, + ONNX_TENSOR_TYPE_STRING = 8, }; struct onnx_tensor_t { @@ -49,6 +55,12 @@ struct onnx_tensor_t { size_t ndata; }; +struct onnx_input_state_t { + enum onnx_tensor_type_t type; + int * dims; + int ndim; +}; + struct onnx_node_t { struct onnx_context_t * ctx; struct onnx_resolver_t * r; @@ -59,11 +71,16 @@ struct onnx_node_t { struct onnx_tensor_t ** outputs; int noutput; Onnx__NodeProto * proto; + int index; + + struct onnx_input_state_t * last_input_states; + int initialized; - void (*operator_)(struct onnx_node_t * n); int (*init)(struct onnx_node_t * n); int (*exit)(struct onnx_node_t * n); int (*reshape)(struct onnx_node_t * n); + void (*operator_)(struct onnx_node_t * n); + void (*rop)(struct onnx_node_t *); void * priv; }; @@ -79,6 +96,7 @@ struct onnx_context_t { void ** rctx; int rlen; struct onnx_graph_t * g; + struct hmap_t * shape_params; }; struct onnx_resolver_t { @@ -101,6 +119,10 @@ struct onnx_resolver_t { void (*op_AveragePool)(struct onnx_node_t * n); void (*op_BatchNormalization)(struct onnx_node_t * n); void (*op_BitShift)(struct onnx_node_t * n); + void (*op_BitwiseAnd)(struct onnx_node_t * n); + void (*op_BitwiseNot)(struct onnx_node_t * n); + void (*op_BitwiseOr)(struct onnx_node_t * n); + void (*op_BitwiseXor)(struct onnx_node_t * n); void (*op_Cast)(struct onnx_node_t * n); void (*op_Ceil)(struct onnx_node_t * n); void (*op_Clip)(struct onnx_node_t * n); @@ -252,15 +274,20 @@ struct onnx_resolver_t { void (*op_Range)(struct onnx_node_t * n); void (*op_Softmax)(struct onnx_node_t * n); void (*op_SoftmaxCrossEntropyLoss)(struct onnx_node_t * n); + void (*op_DynamicQuantizeLSTM)(struct onnx_node_t * n); }; -struct onnx_context_t * onnx_context_alloc(const void * buf, size_t len, struct onnx_resolver_t ** r, int rlen); -struct onnx_context_t * onnx_context_alloc_from_file(const char * filename, struct onnx_resolver_t ** r, int rlen); +struct onnx_context_t * onnx_context_alloc(const void * buf, size_t len, struct onnx_resolver_t ** r, int rlen, struct hmap_t * shape_params); +struct onnx_context_t * onnx_context_alloc_from_file(const char * filename, struct onnx_resolver_t ** r, int rlen, struct hmap_t * shape_params); void onnx_context_free(struct onnx_context_t * ctx); struct onnx_graph_t * onnx_graph_alloc(struct onnx_context_t * ctx, Onnx__GraphProto * graph); void onnx_graph_free(struct onnx_graph_t * g); +void free_node(struct onnx_node_t * n); + +int is_all_inputs_ready(struct onnx_node_t * n); + const char * onnx_tensor_type_tostring(enum onnx_tensor_type_t type); int onnx_tensor_type_sizeof(enum onnx_tensor_type_t type); struct onnx_tensor_t * onnx_tensor_search(struct onnx_context_t * ctx, const char * name); @@ -326,7 +353,7 @@ static inline int onnx_tensor_reshape_identity(struct onnx_tensor_t * y, struct static inline int onnx_tensor_reshape_multi_broadcast(struct onnx_tensor_t * y, struct onnx_tensor_t * a, struct onnx_tensor_t * b, enum onnx_tensor_type_t type) { - int ndim = maxx(a->ndim, b->ndim); + int ndim = XMAX(a->ndim, b->ndim); int dims[ndim]; int i, j, k; @@ -379,6 +406,8 @@ static inline void * onnx_tensor_broadcast_map_address(struct onnx_tensor_t * x, float onnx_attribute_read_float(struct onnx_node_t * n, const char * name, float def); int64_t onnx_attribute_read_int(struct onnx_node_t * n, const char * name, int64_t def); char * onnx_attribute_read_string(struct onnx_node_t * n, const char * name, char * def); +int onnx_attribute_read_strings(struct onnx_node_t * n, const char * name, char ***strings); + int onnx_attribute_read_ints(struct onnx_node_t * n, const char * name, int64_t ** ints); int onnx_attribute_read_floats(struct onnx_node_t * n, const char * name, float ** floats); int onnx_attribute_read_tensor(struct onnx_node_t * n, const char * name, struct onnx_tensor_t * t); @@ -390,6 +419,8 @@ void onnx_node_dump(struct onnx_node_t * n, int detail); void onnx_graph_dump(struct onnx_graph_t * g, int detail); void onnx_context_dump(struct onnx_context_t * ctx, int detail); +int initialize_input_states(struct onnx_node_t * n); + void onnx_run(struct onnx_context_t * ctx); #ifdef __cplusplus diff --git a/modules/fnxext/onnx_engine/src/onnx.proto3.pb-c.c b/modules/fnxext/onnx_engine/src/onnx.proto3.pb-c.c index 486f4eff5ee..c2a3804046b 100644 --- a/modules/fnxext/onnx_engine/src/onnx.proto3.pb-c.c +++ b/modules/fnxext/onnx_engine/src/onnx.proto3.pb-c.c @@ -16,21 +16,21 @@ void onnx__attribute_proto__init size_t onnx__attribute_proto__get_packed_size (const Onnx__AttributeProto *message) { - assert(message->base.descriptor == &onnx__attribute_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__attribute_proto__descriptor); return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); } size_t onnx__attribute_proto__pack (const Onnx__AttributeProto *message, uint8_t *out) { - assert(message->base.descriptor == &onnx__attribute_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__attribute_proto__descriptor); return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); } size_t onnx__attribute_proto__pack_to_buffer (const Onnx__AttributeProto *message, ProtobufCBuffer *buffer) { - assert(message->base.descriptor == &onnx__attribute_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__attribute_proto__descriptor); return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); } Onnx__AttributeProto * @@ -49,7 +49,7 @@ void onnx__attribute_proto__free_unpacked { if(!message) return; - assert(message->base.descriptor == &onnx__attribute_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__attribute_proto__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } void onnx__value_info_proto__init @@ -61,21 +61,21 @@ void onnx__value_info_proto__init size_t onnx__value_info_proto__get_packed_size (const Onnx__ValueInfoProto *message) { - assert(message->base.descriptor == &onnx__value_info_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__value_info_proto__descriptor); return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); } size_t onnx__value_info_proto__pack (const Onnx__ValueInfoProto *message, uint8_t *out) { - assert(message->base.descriptor == &onnx__value_info_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__value_info_proto__descriptor); return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); } size_t onnx__value_info_proto__pack_to_buffer (const Onnx__ValueInfoProto *message, ProtobufCBuffer *buffer) { - assert(message->base.descriptor == &onnx__value_info_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__value_info_proto__descriptor); return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); } Onnx__ValueInfoProto * @@ -94,7 +94,7 @@ void onnx__value_info_proto__free_unpacked { if(!message) return; - assert(message->base.descriptor == &onnx__value_info_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__value_info_proto__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } void onnx__node_proto__init @@ -106,21 +106,21 @@ void onnx__node_proto__init size_t onnx__node_proto__get_packed_size (const Onnx__NodeProto *message) { - assert(message->base.descriptor == &onnx__node_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__node_proto__descriptor); return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); } size_t onnx__node_proto__pack (const Onnx__NodeProto *message, uint8_t *out) { - assert(message->base.descriptor == &onnx__node_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__node_proto__descriptor); return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); } size_t onnx__node_proto__pack_to_buffer (const Onnx__NodeProto *message, ProtobufCBuffer *buffer) { - assert(message->base.descriptor == &onnx__node_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__node_proto__descriptor); return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); } Onnx__NodeProto * @@ -139,7 +139,7 @@ void onnx__node_proto__free_unpacked { if(!message) return; - assert(message->base.descriptor == &onnx__node_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__node_proto__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } void onnx__training_info_proto__init @@ -151,21 +151,21 @@ void onnx__training_info_proto__init size_t onnx__training_info_proto__get_packed_size (const Onnx__TrainingInfoProto *message) { - assert(message->base.descriptor == &onnx__training_info_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__training_info_proto__descriptor); return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); } size_t onnx__training_info_proto__pack (const Onnx__TrainingInfoProto *message, uint8_t *out) { - assert(message->base.descriptor == &onnx__training_info_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__training_info_proto__descriptor); return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); } size_t onnx__training_info_proto__pack_to_buffer (const Onnx__TrainingInfoProto *message, ProtobufCBuffer *buffer) { - assert(message->base.descriptor == &onnx__training_info_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__training_info_proto__descriptor); return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); } Onnx__TrainingInfoProto * @@ -184,7 +184,7 @@ void onnx__training_info_proto__free_unpacked { if(!message) return; - assert(message->base.descriptor == &onnx__training_info_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__training_info_proto__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } void onnx__model_proto__init @@ -196,21 +196,21 @@ void onnx__model_proto__init size_t onnx__model_proto__get_packed_size (const Onnx__ModelProto *message) { - assert(message->base.descriptor == &onnx__model_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__model_proto__descriptor); return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); } size_t onnx__model_proto__pack (const Onnx__ModelProto *message, uint8_t *out) { - assert(message->base.descriptor == &onnx__model_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__model_proto__descriptor); return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); } size_t onnx__model_proto__pack_to_buffer (const Onnx__ModelProto *message, ProtobufCBuffer *buffer) { - assert(message->base.descriptor == &onnx__model_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__model_proto__descriptor); return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); } Onnx__ModelProto * @@ -229,7 +229,7 @@ void onnx__model_proto__free_unpacked { if(!message) return; - assert(message->base.descriptor == &onnx__model_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__model_proto__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } void onnx__string_string_entry_proto__init @@ -241,21 +241,21 @@ void onnx__string_string_entry_proto__init size_t onnx__string_string_entry_proto__get_packed_size (const Onnx__StringStringEntryProto *message) { - assert(message->base.descriptor == &onnx__string_string_entry_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__string_string_entry_proto__descriptor); return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); } size_t onnx__string_string_entry_proto__pack (const Onnx__StringStringEntryProto *message, uint8_t *out) { - assert(message->base.descriptor == &onnx__string_string_entry_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__string_string_entry_proto__descriptor); return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); } size_t onnx__string_string_entry_proto__pack_to_buffer (const Onnx__StringStringEntryProto *message, ProtobufCBuffer *buffer) { - assert(message->base.descriptor == &onnx__string_string_entry_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__string_string_entry_proto__descriptor); return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); } Onnx__StringStringEntryProto * @@ -274,7 +274,7 @@ void onnx__string_string_entry_proto__free_unpacked { if(!message) return; - assert(message->base.descriptor == &onnx__string_string_entry_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__string_string_entry_proto__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } void onnx__tensor_annotation__init @@ -286,21 +286,21 @@ void onnx__tensor_annotation__init size_t onnx__tensor_annotation__get_packed_size (const Onnx__TensorAnnotation *message) { - assert(message->base.descriptor == &onnx__tensor_annotation__descriptor); + onnx_assert(message->base.descriptor == &onnx__tensor_annotation__descriptor); return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); } size_t onnx__tensor_annotation__pack (const Onnx__TensorAnnotation *message, uint8_t *out) { - assert(message->base.descriptor == &onnx__tensor_annotation__descriptor); + onnx_assert(message->base.descriptor == &onnx__tensor_annotation__descriptor); return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); } size_t onnx__tensor_annotation__pack_to_buffer (const Onnx__TensorAnnotation *message, ProtobufCBuffer *buffer) { - assert(message->base.descriptor == &onnx__tensor_annotation__descriptor); + onnx_assert(message->base.descriptor == &onnx__tensor_annotation__descriptor); return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); } Onnx__TensorAnnotation * @@ -319,7 +319,7 @@ void onnx__tensor_annotation__free_unpacked { if(!message) return; - assert(message->base.descriptor == &onnx__tensor_annotation__descriptor); + onnx_assert(message->base.descriptor == &onnx__tensor_annotation__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } void onnx__graph_proto__init @@ -331,21 +331,21 @@ void onnx__graph_proto__init size_t onnx__graph_proto__get_packed_size (const Onnx__GraphProto *message) { - assert(message->base.descriptor == &onnx__graph_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__graph_proto__descriptor); return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); } size_t onnx__graph_proto__pack (const Onnx__GraphProto *message, uint8_t *out) { - assert(message->base.descriptor == &onnx__graph_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__graph_proto__descriptor); return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); } size_t onnx__graph_proto__pack_to_buffer (const Onnx__GraphProto *message, ProtobufCBuffer *buffer) { - assert(message->base.descriptor == &onnx__graph_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__graph_proto__descriptor); return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); } Onnx__GraphProto * @@ -364,7 +364,7 @@ void onnx__graph_proto__free_unpacked { if(!message) return; - assert(message->base.descriptor == &onnx__graph_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__graph_proto__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } void onnx__tensor_proto__segment__init @@ -382,21 +382,21 @@ void onnx__tensor_proto__init size_t onnx__tensor_proto__get_packed_size (const Onnx__TensorProto *message) { - assert(message->base.descriptor == &onnx__tensor_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__tensor_proto__descriptor); return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); } size_t onnx__tensor_proto__pack (const Onnx__TensorProto *message, uint8_t *out) { - assert(message->base.descriptor == &onnx__tensor_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__tensor_proto__descriptor); return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); } size_t onnx__tensor_proto__pack_to_buffer (const Onnx__TensorProto *message, ProtobufCBuffer *buffer) { - assert(message->base.descriptor == &onnx__tensor_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__tensor_proto__descriptor); return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); } Onnx__TensorProto * @@ -415,7 +415,7 @@ void onnx__tensor_proto__free_unpacked { if(!message) return; - assert(message->base.descriptor == &onnx__tensor_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__tensor_proto__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } void onnx__sparse_tensor_proto__init @@ -427,21 +427,21 @@ void onnx__sparse_tensor_proto__init size_t onnx__sparse_tensor_proto__get_packed_size (const Onnx__SparseTensorProto *message) { - assert(message->base.descriptor == &onnx__sparse_tensor_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__sparse_tensor_proto__descriptor); return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); } size_t onnx__sparse_tensor_proto__pack (const Onnx__SparseTensorProto *message, uint8_t *out) { - assert(message->base.descriptor == &onnx__sparse_tensor_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__sparse_tensor_proto__descriptor); return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); } size_t onnx__sparse_tensor_proto__pack_to_buffer (const Onnx__SparseTensorProto *message, ProtobufCBuffer *buffer) { - assert(message->base.descriptor == &onnx__sparse_tensor_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__sparse_tensor_proto__descriptor); return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); } Onnx__SparseTensorProto * @@ -460,7 +460,7 @@ void onnx__sparse_tensor_proto__free_unpacked { if(!message) return; - assert(message->base.descriptor == &onnx__sparse_tensor_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__sparse_tensor_proto__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } void onnx__tensor_shape_proto__dimension__init @@ -478,21 +478,21 @@ void onnx__tensor_shape_proto__init size_t onnx__tensor_shape_proto__get_packed_size (const Onnx__TensorShapeProto *message) { - assert(message->base.descriptor == &onnx__tensor_shape_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__tensor_shape_proto__descriptor); return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); } size_t onnx__tensor_shape_proto__pack (const Onnx__TensorShapeProto *message, uint8_t *out) { - assert(message->base.descriptor == &onnx__tensor_shape_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__tensor_shape_proto__descriptor); return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); } size_t onnx__tensor_shape_proto__pack_to_buffer (const Onnx__TensorShapeProto *message, ProtobufCBuffer *buffer) { - assert(message->base.descriptor == &onnx__tensor_shape_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__tensor_shape_proto__descriptor); return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); } Onnx__TensorShapeProto * @@ -511,7 +511,7 @@ void onnx__tensor_shape_proto__free_unpacked { if(!message) return; - assert(message->base.descriptor == &onnx__tensor_shape_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__tensor_shape_proto__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } void onnx__type_proto__tensor__init @@ -532,6 +532,18 @@ void onnx__type_proto__map__init static const Onnx__TypeProto__Map init_value = ONNX__TYPE_PROTO__MAP__INIT; *message = init_value; } +void onnx__type_proto__optional__init + (Onnx__TypeProto__Optional *message) +{ + static const Onnx__TypeProto__Optional init_value = ONNX__TYPE_PROTO__OPTIONAL__INIT; + *message = init_value; +} +void onnx__type_proto__sparse_tensor__init + (Onnx__TypeProto__SparseTensor *message) +{ + static const Onnx__TypeProto__SparseTensor init_value = ONNX__TYPE_PROTO__SPARSE_TENSOR__INIT; + *message = init_value; +} void onnx__type_proto__init (Onnx__TypeProto *message) { @@ -541,21 +553,21 @@ void onnx__type_proto__init size_t onnx__type_proto__get_packed_size (const Onnx__TypeProto *message) { - assert(message->base.descriptor == &onnx__type_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__type_proto__descriptor); return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); } size_t onnx__type_proto__pack (const Onnx__TypeProto *message, uint8_t *out) { - assert(message->base.descriptor == &onnx__type_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__type_proto__descriptor); return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); } size_t onnx__type_proto__pack_to_buffer (const Onnx__TypeProto *message, ProtobufCBuffer *buffer) { - assert(message->base.descriptor == &onnx__type_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__type_proto__descriptor); return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); } Onnx__TypeProto * @@ -574,7 +586,7 @@ void onnx__type_proto__free_unpacked { if(!message) return; - assert(message->base.descriptor == &onnx__type_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__type_proto__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } void onnx__operator_set_id_proto__init @@ -586,21 +598,21 @@ void onnx__operator_set_id_proto__init size_t onnx__operator_set_id_proto__get_packed_size (const Onnx__OperatorSetIdProto *message) { - assert(message->base.descriptor == &onnx__operator_set_id_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__operator_set_id_proto__descriptor); return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); } size_t onnx__operator_set_id_proto__pack (const Onnx__OperatorSetIdProto *message, uint8_t *out) { - assert(message->base.descriptor == &onnx__operator_set_id_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__operator_set_id_proto__descriptor); return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); } size_t onnx__operator_set_id_proto__pack_to_buffer (const Onnx__OperatorSetIdProto *message, ProtobufCBuffer *buffer) { - assert(message->base.descriptor == &onnx__operator_set_id_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__operator_set_id_proto__descriptor); return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); } Onnx__OperatorSetIdProto * @@ -619,10 +631,55 @@ void onnx__operator_set_id_proto__free_unpacked { if(!message) return; - assert(message->base.descriptor == &onnx__operator_set_id_proto__descriptor); + onnx_assert(message->base.descriptor == &onnx__operator_set_id_proto__descriptor); + protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); +} +void onnx__function_proto__init + (Onnx__FunctionProto *message) +{ + static const Onnx__FunctionProto init_value = ONNX__FUNCTION_PROTO__INIT; + *message = init_value; +} +size_t onnx__function_proto__get_packed_size + (const Onnx__FunctionProto *message) +{ + onnx_assert(message->base.descriptor == &onnx__function_proto__descriptor); + return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message)); +} +size_t onnx__function_proto__pack + (const Onnx__FunctionProto *message, + uint8_t *out) +{ + onnx_assert(message->base.descriptor == &onnx__function_proto__descriptor); + return protobuf_c_message_pack ((const ProtobufCMessage*)message, out); +} +size_t onnx__function_proto__pack_to_buffer + (const Onnx__FunctionProto *message, + ProtobufCBuffer *buffer) +{ + onnx_assert(message->base.descriptor == &onnx__function_proto__descriptor); + return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer); +} +Onnx__FunctionProto * + onnx__function_proto__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) +{ + return (Onnx__FunctionProto *) + protobuf_c_message_unpack (&onnx__function_proto__descriptor, + allocator, len, data); +} +void onnx__function_proto__free_unpacked + (Onnx__FunctionProto *message, + ProtobufCAllocator *allocator) +{ + if(!message) + return; + onnx_assert(message->base.descriptor == &onnx__function_proto__descriptor); protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator); } -static const ProtobufCEnumValue onnx__attribute_proto__attribute_type__enum_values_by_number[13] = +static const ProtobufCEnumValue onnx__attribute_proto__attribute_type__enum_values_by_number[15] = { { "UNDEFINED", "ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__UNDEFINED", 0 }, { "FLOAT", "ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__FLOAT", 1 }, @@ -637,11 +694,13 @@ static const ProtobufCEnumValue onnx__attribute_proto__attribute_type__enum_valu { "GRAPHS", "ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__GRAPHS", 10 }, { "SPARSE_TENSOR", "ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__SPARSE_TENSOR", 11 }, { "SPARSE_TENSORS", "ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__SPARSE_TENSORS", 12 }, + { "TYPE_PROTO", "ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__TYPE_PROTO", 13 }, + { "TYPE_PROTOS", "ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__TYPE_PROTOS", 14 }, }; static const ProtobufCIntRange onnx__attribute_proto__attribute_type__value_ranges[] = { -{0, 0},{0, 13} +{0, 0},{0, 15} }; -static const ProtobufCEnumValueIndex onnx__attribute_proto__attribute_type__enum_values_by_name[13] = +static const ProtobufCEnumValueIndex onnx__attribute_proto__attribute_type__enum_values_by_name[15] = { { "FLOAT", 1 }, { "FLOATS", 6 }, @@ -655,6 +714,8 @@ static const ProtobufCEnumValueIndex onnx__attribute_proto__attribute_type__enum { "STRINGS", 8 }, { "TENSOR", 4 }, { "TENSORS", 9 }, + { "TYPE_PROTO", 13 }, + { "TYPE_PROTOS", 14 }, { "UNDEFINED", 0 }, }; const ProtobufCEnumDescriptor onnx__attribute_proto__attribute_type__descriptor = @@ -664,15 +725,15 @@ const ProtobufCEnumDescriptor onnx__attribute_proto__attribute_type__descriptor "AttributeType", "Onnx__AttributeProto__AttributeType", "onnx", - 13, + 15, onnx__attribute_proto__attribute_type__enum_values_by_number, - 13, + 15, onnx__attribute_proto__attribute_type__enum_values_by_name, 1, onnx__attribute_proto__attribute_type__value_ranges, NULL,NULL,NULL,NULL /* reserved[1234] */ }; -static const ProtobufCFieldDescriptor onnx__attribute_proto__field_descriptors[16] = +static const ProtobufCFieldDescriptor onnx__attribute_proto__field_descriptors[18] = { { "name", @@ -755,7 +816,7 @@ static const ProtobufCFieldDescriptor onnx__attribute_proto__field_descriptors[1 offsetof(Onnx__AttributeProto, floats), NULL, NULL, - 0 | PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ + PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, { @@ -767,7 +828,7 @@ static const ProtobufCFieldDescriptor onnx__attribute_proto__field_descriptors[1 offsetof(Onnx__AttributeProto, ints), NULL, NULL, - 0 | PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ + PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, { @@ -818,6 +879,30 @@ static const ProtobufCFieldDescriptor onnx__attribute_proto__field_descriptors[1 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "tp", + 14, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(Onnx__AttributeProto, tp), + &onnx__type_proto__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "type_protos", + 15, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Onnx__AttributeProto, n_type_protos), + offsetof(Onnx__AttributeProto, type_protos), + &onnx__type_proto__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, { "type", 20, @@ -876,21 +961,23 @@ static const unsigned onnx__attribute_proto__field_indices_by_name[] = { 2, /* field[2] = i */ 7, /* field[7] = ints */ 0, /* field[0] = name */ - 13, /* field[13] = ref_attr_name */ + 15, /* field[15] = ref_attr_name */ 3, /* field[3] = s */ - 14, /* field[14] = sparse_tensor */ - 15, /* field[15] = sparse_tensors */ + 16, /* field[16] = sparse_tensor */ + 17, /* field[17] = sparse_tensors */ 8, /* field[8] = strings */ 4, /* field[4] = t */ 9, /* field[9] = tensors */ - 12, /* field[12] = type */ + 12, /* field[12] = tp */ + 14, /* field[14] = type */ + 13, /* field[13] = type_protos */ }; static const ProtobufCIntRange onnx__attribute_proto__number_ranges[3 + 1] = { { 1, 0 }, { 13, 11 }, - { 20, 12 }, - { 0, 16 } + { 20, 14 }, + { 0, 18 } }; const ProtobufCMessageDescriptor onnx__attribute_proto__descriptor = { @@ -900,14 +987,14 @@ const ProtobufCMessageDescriptor onnx__attribute_proto__descriptor = "Onnx__AttributeProto", "onnx", sizeof(Onnx__AttributeProto), - 16, + 18, onnx__attribute_proto__field_descriptors, onnx__attribute_proto__field_indices_by_name, 3, onnx__attribute_proto__number_ranges, (ProtobufCMessageInit) onnx__attribute_proto__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor onnx__value_info_proto__field_descriptors[3] = +static const ProtobufCFieldDescriptor onnx__value_info_proto__field_descriptors[4] = { { "name", @@ -945,16 +1032,29 @@ static const ProtobufCFieldDescriptor onnx__value_info_proto__field_descriptors[ 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "metadata_props", + 4, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Onnx__ValueInfoProto, n_metadata_props), + offsetof(Onnx__ValueInfoProto, metadata_props), + &onnx__string_string_entry_proto__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned onnx__value_info_proto__field_indices_by_name[] = { 2, /* field[2] = doc_string */ + 3, /* field[3] = metadata_props */ 0, /* field[0] = name */ 1, /* field[1] = type */ }; static const ProtobufCIntRange onnx__value_info_proto__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 3 } + { 0, 4 } }; const ProtobufCMessageDescriptor onnx__value_info_proto__descriptor = { @@ -964,14 +1064,14 @@ const ProtobufCMessageDescriptor onnx__value_info_proto__descriptor = "Onnx__ValueInfoProto", "onnx", sizeof(Onnx__ValueInfoProto), - 3, + 4, onnx__value_info_proto__field_descriptors, onnx__value_info_proto__field_indices_by_name, 1, onnx__value_info_proto__number_ranges, (ProtobufCMessageInit) onnx__value_info_proto__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor onnx__node_proto__field_descriptors[7] = +static const ProtobufCFieldDescriptor onnx__node_proto__field_descriptors[9] = { { "input", @@ -1057,20 +1157,46 @@ static const ProtobufCFieldDescriptor onnx__node_proto__field_descriptors[7] = 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "overload", + 8, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Onnx__NodeProto, overload), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "metadata_props", + 9, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Onnx__NodeProto, n_metadata_props), + offsetof(Onnx__NodeProto, metadata_props), + &onnx__string_string_entry_proto__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned onnx__node_proto__field_indices_by_name[] = { 4, /* field[4] = attribute */ 5, /* field[5] = doc_string */ 6, /* field[6] = domain */ 0, /* field[0] = input */ + 8, /* field[8] = metadata_props */ 2, /* field[2] = name */ 3, /* field[3] = op_type */ 1, /* field[1] = output */ + 7, /* field[7] = overload */ }; static const ProtobufCIntRange onnx__node_proto__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 7 } + { 0, 9 } }; const ProtobufCMessageDescriptor onnx__node_proto__descriptor = { @@ -1080,7 +1206,7 @@ const ProtobufCMessageDescriptor onnx__node_proto__descriptor = "Onnx__NodeProto", "onnx", sizeof(Onnx__NodeProto), - 7, + 9, onnx__node_proto__field_descriptors, onnx__node_proto__field_indices_by_name, 1, onnx__node_proto__number_ranges, @@ -1164,7 +1290,7 @@ const ProtobufCMessageDescriptor onnx__training_info_proto__descriptor = (ProtobufCMessageInit) onnx__training_info_proto__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor onnx__model_proto__field_descriptors[10] = +static const ProtobufCFieldDescriptor onnx__model_proto__field_descriptors[11] = { { "ir_version", @@ -1286,10 +1412,23 @@ static const ProtobufCFieldDescriptor onnx__model_proto__field_descriptors[10] = 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "functions", + 25, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Onnx__ModelProto, n_functions), + offsetof(Onnx__ModelProto, functions), + &onnx__function_proto__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned onnx__model_proto__field_indices_by_name[] = { 5, /* field[5] = doc_string */ 3, /* field[3] = domain */ + 10, /* field[10] = functions */ 6, /* field[6] = graph */ 0, /* field[0] = ir_version */ 8, /* field[8] = metadata_props */ @@ -1299,12 +1438,13 @@ static const unsigned onnx__model_proto__field_indices_by_name[] = { 2, /* field[2] = producer_version */ 9, /* field[9] = training_info */ }; -static const ProtobufCIntRange onnx__model_proto__number_ranges[3 + 1] = +static const ProtobufCIntRange onnx__model_proto__number_ranges[4 + 1] = { { 1, 0 }, { 14, 8 }, { 20, 9 }, - { 0, 10 } + { 25, 10 }, + { 0, 11 } }; const ProtobufCMessageDescriptor onnx__model_proto__descriptor = { @@ -1314,10 +1454,10 @@ const ProtobufCMessageDescriptor onnx__model_proto__descriptor = "Onnx__ModelProto", "onnx", sizeof(Onnx__ModelProto), - 10, + 11, onnx__model_proto__field_descriptors, onnx__model_proto__field_indices_by_name, - 3, onnx__model_proto__number_ranges, + 4, onnx__model_proto__number_ranges, (ProtobufCMessageInit) onnx__model_proto__init, NULL,NULL,NULL /* reserved[123] */ }; @@ -1423,7 +1563,7 @@ const ProtobufCMessageDescriptor onnx__tensor_annotation__descriptor = (ProtobufCMessageInit) onnx__tensor_annotation__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor onnx__graph_proto__field_descriptors[9] = +static const ProtobufCFieldDescriptor onnx__graph_proto__field_descriptors[10] = { { "node", @@ -1533,11 +1673,24 @@ static const ProtobufCFieldDescriptor onnx__graph_proto__field_descriptors[9] = 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "metadata_props", + 16, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Onnx__GraphProto, n_metadata_props), + offsetof(Onnx__GraphProto, metadata_props), + &onnx__string_string_entry_proto__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned onnx__graph_proto__field_indices_by_name[] = { 3, /* field[3] = doc_string */ 2, /* field[2] = initializer */ 4, /* field[4] = input */ + 9, /* field[9] = metadata_props */ 1, /* field[1] = name */ 0, /* field[0] = node */ 5, /* field[5] = output */ @@ -1550,7 +1703,7 @@ static const ProtobufCIntRange onnx__graph_proto__number_ranges[3 + 1] = { 1, 0 }, { 5, 2 }, { 10, 3 }, - { 0, 9 } + { 0, 10 } }; const ProtobufCMessageDescriptor onnx__graph_proto__descriptor = { @@ -1560,7 +1713,7 @@ const ProtobufCMessageDescriptor onnx__graph_proto__descriptor = "Onnx__GraphProto", "onnx", sizeof(Onnx__GraphProto), - 9, + 10, onnx__graph_proto__field_descriptors, onnx__graph_proto__field_indices_by_name, 3, onnx__graph_proto__number_ranges, @@ -1618,7 +1771,7 @@ const ProtobufCMessageDescriptor onnx__tensor_proto__segment__descriptor = (ProtobufCMessageInit) onnx__tensor_proto__segment__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCEnumValue onnx__tensor_proto__data_type__enum_values_by_number[17] = +static const ProtobufCEnumValue onnx__tensor_proto__data_type__enum_values_by_number[23] = { { "UNDEFINED", "ONNX__TENSOR_PROTO__DATA_TYPE__UNDEFINED", 0 }, { "FLOAT", "ONNX__TENSOR_PROTO__DATA_TYPE__FLOAT", 1 }, @@ -1637,11 +1790,17 @@ static const ProtobufCEnumValue onnx__tensor_proto__data_type__enum_values_by_nu { "COMPLEX64", "ONNX__TENSOR_PROTO__DATA_TYPE__COMPLEX64", 14 }, { "COMPLEX128", "ONNX__TENSOR_PROTO__DATA_TYPE__COMPLEX128", 15 }, { "BFLOAT16", "ONNX__TENSOR_PROTO__DATA_TYPE__BFLOAT16", 16 }, + { "FLOAT8E4M3FN", "ONNX__TENSOR_PROTO__DATA_TYPE__FLOAT8E4M3FN", 17 }, + { "FLOAT8E4M3FNUZ", "ONNX__TENSOR_PROTO__DATA_TYPE__FLOAT8E4M3FNUZ", 18 }, + { "FLOAT8E5M2", "ONNX__TENSOR_PROTO__DATA_TYPE__FLOAT8E5M2", 19 }, + { "FLOAT8E5M2FNUZ", "ONNX__TENSOR_PROTO__DATA_TYPE__FLOAT8E5M2FNUZ", 20 }, + { "UINT4", "ONNX__TENSOR_PROTO__DATA_TYPE__UINT4", 21 }, + { "INT4", "ONNX__TENSOR_PROTO__DATA_TYPE__INT4", 22 }, }; static const ProtobufCIntRange onnx__tensor_proto__data_type__value_ranges[] = { -{0, 0},{0, 17} +{0, 0},{0, 23} }; -static const ProtobufCEnumValueIndex onnx__tensor_proto__data_type__enum_values_by_name[17] = +static const ProtobufCEnumValueIndex onnx__tensor_proto__data_type__enum_values_by_name[23] = { { "BFLOAT16", 16 }, { "BOOL", 9 }, @@ -1650,13 +1809,19 @@ static const ProtobufCEnumValueIndex onnx__tensor_proto__data_type__enum_values_ { "DOUBLE", 11 }, { "FLOAT", 1 }, { "FLOAT16", 10 }, + { "FLOAT8E4M3FN", 17 }, + { "FLOAT8E4M3FNUZ", 18 }, + { "FLOAT8E5M2", 19 }, + { "FLOAT8E5M2FNUZ", 20 }, { "INT16", 5 }, { "INT32", 6 }, + { "INT4", 22 }, { "INT64", 7 }, { "INT8", 3 }, { "STRING", 8 }, { "UINT16", 4 }, { "UINT32", 12 }, + { "UINT4", 21 }, { "UINT64", 13 }, { "UINT8", 2 }, { "UNDEFINED", 0 }, @@ -1668,9 +1833,9 @@ const ProtobufCEnumDescriptor onnx__tensor_proto__data_type__descriptor = "DataType", "Onnx__TensorProto__DataType", "onnx", - 17, + 23, onnx__tensor_proto__data_type__enum_values_by_number, - 17, + 23, onnx__tensor_proto__data_type__enum_values_by_name, 1, onnx__tensor_proto__data_type__value_ranges, @@ -1704,7 +1869,7 @@ const ProtobufCEnumDescriptor onnx__tensor_proto__data_location__descriptor = onnx__tensor_proto__data_location__value_ranges, NULL,NULL,NULL,NULL /* reserved[1234] */ }; -static const ProtobufCFieldDescriptor onnx__tensor_proto__field_descriptors[14] = +static const ProtobufCFieldDescriptor onnx__tensor_proto__field_descriptors[15] = { { "dims", @@ -1715,7 +1880,7 @@ static const ProtobufCFieldDescriptor onnx__tensor_proto__field_descriptors[14] offsetof(Onnx__TensorProto, dims), NULL, NULL, - 0 | PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ + PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, { @@ -1751,7 +1916,7 @@ static const ProtobufCFieldDescriptor onnx__tensor_proto__field_descriptors[14] offsetof(Onnx__TensorProto, float_data), NULL, NULL, - 0 | PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ + PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, { @@ -1763,7 +1928,7 @@ static const ProtobufCFieldDescriptor onnx__tensor_proto__field_descriptors[14] offsetof(Onnx__TensorProto, int32_data), NULL, NULL, - 0 | PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ + PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, { @@ -1787,7 +1952,7 @@ static const ProtobufCFieldDescriptor onnx__tensor_proto__field_descriptors[14] offsetof(Onnx__TensorProto, int64_data), NULL, NULL, - 0 | PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ + PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, { @@ -1823,7 +1988,7 @@ static const ProtobufCFieldDescriptor onnx__tensor_proto__field_descriptors[14] offsetof(Onnx__TensorProto, double_data), NULL, NULL, - 0 | PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ + PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, { @@ -1835,7 +2000,7 @@ static const ProtobufCFieldDescriptor onnx__tensor_proto__field_descriptors[14] offsetof(Onnx__TensorProto, uint64_data), NULL, NULL, - 0 | PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ + PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, { @@ -1874,6 +2039,18 @@ static const ProtobufCFieldDescriptor onnx__tensor_proto__field_descriptors[14] 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "metadata_props", + 16, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Onnx__TensorProto, n_metadata_props), + offsetof(Onnx__TensorProto, metadata_props), + &onnx__string_string_entry_proto__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned onnx__tensor_proto__field_indices_by_name[] = { 13, /* field[13] = data_location */ @@ -1885,16 +2062,18 @@ static const unsigned onnx__tensor_proto__field_indices_by_name[] = { 3, /* field[3] = float_data */ 4, /* field[4] = int32_data */ 6, /* field[6] = int64_data */ + 14, /* field[14] = metadata_props */ 7, /* field[7] = name */ 8, /* field[8] = raw_data */ 2, /* field[2] = segment */ 5, /* field[5] = string_data */ 10, /* field[10] = uint64_data */ }; -static const ProtobufCIntRange onnx__tensor_proto__number_ranges[1 + 1] = +static const ProtobufCIntRange onnx__tensor_proto__number_ranges[2 + 1] = { { 1, 0 }, - { 0, 14 } + { 16, 14 }, + { 0, 15 } }; const ProtobufCMessageDescriptor onnx__tensor_proto__descriptor = { @@ -1904,10 +2083,10 @@ const ProtobufCMessageDescriptor onnx__tensor_proto__descriptor = "Onnx__TensorProto", "onnx", sizeof(Onnx__TensorProto), - 14, + 15, onnx__tensor_proto__field_descriptors, onnx__tensor_proto__field_indices_by_name, - 1, onnx__tensor_proto__number_ranges, + 2, onnx__tensor_proto__number_ranges, (ProtobufCMessageInit) onnx__tensor_proto__init, NULL,NULL,NULL /* reserved[123] */ }; @@ -1946,7 +2125,7 @@ static const ProtobufCFieldDescriptor onnx__sparse_tensor_proto__field_descripto offsetof(Onnx__SparseTensorProto, dims), NULL, NULL, - 0 | PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ + PROTOBUF_C_FIELD_FLAG_PACKED, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, }; @@ -1986,7 +2165,7 @@ static const ProtobufCFieldDescriptor onnx__tensor_shape_proto__dimension__field offsetof(Onnx__TensorShapeProto__Dimension, dim_value), NULL, NULL, - 0 | PROTOBUF_C_FIELD_FLAG_ONEOF, /* flags */ + PROTOBUF_C_FIELD_FLAG_ONEOF, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, { @@ -1998,7 +2177,7 @@ static const ProtobufCFieldDescriptor onnx__tensor_shape_proto__dimension__field offsetof(Onnx__TensorShapeProto__Dimension, dim_param), NULL, &protobuf_c_empty_string, - 0 | PROTOBUF_C_FIELD_FLAG_ONEOF, /* flags */ + PROTOBUF_C_FIELD_FLAG_ONEOF, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, { @@ -2217,7 +2396,96 @@ const ProtobufCMessageDescriptor onnx__type_proto__map__descriptor = (ProtobufCMessageInit) onnx__type_proto__map__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCFieldDescriptor onnx__type_proto__field_descriptors[4] = +static const ProtobufCFieldDescriptor onnx__type_proto__optional__field_descriptors[1] = +{ + { + "elem_type", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(Onnx__TypeProto__Optional, elem_type), + &onnx__type_proto__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned onnx__type_proto__optional__field_indices_by_name[] = { + 0, /* field[0] = elem_type */ +}; +static const ProtobufCIntRange onnx__type_proto__optional__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 1 } +}; +const ProtobufCMessageDescriptor onnx__type_proto__optional__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "onnx.TypeProto.Optional", + "Optional", + "Onnx__TypeProto__Optional", + "onnx", + sizeof(Onnx__TypeProto__Optional), + 1, + onnx__type_proto__optional__field_descriptors, + onnx__type_proto__optional__field_indices_by_name, + 1, onnx__type_proto__optional__number_ranges, + (ProtobufCMessageInit) onnx__type_proto__optional__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor onnx__type_proto__sparse_tensor__field_descriptors[2] = +{ + { + "elem_type", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_INT32, + 0, /* quantifier_offset */ + offsetof(Onnx__TypeProto__SparseTensor, elem_type), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "shape", + 2, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(Onnx__TypeProto__SparseTensor, shape), + &onnx__tensor_shape_proto__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned onnx__type_proto__sparse_tensor__field_indices_by_name[] = { + 0, /* field[0] = elem_type */ + 1, /* field[1] = shape */ +}; +static const ProtobufCIntRange onnx__type_proto__sparse_tensor__number_ranges[1 + 1] = +{ + { 1, 0 }, + { 0, 2 } +}; +const ProtobufCMessageDescriptor onnx__type_proto__sparse_tensor__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "onnx.TypeProto.SparseTensor", + "SparseTensor", + "Onnx__TypeProto__SparseTensor", + "onnx", + sizeof(Onnx__TypeProto__SparseTensor), + 2, + onnx__type_proto__sparse_tensor__field_descriptors, + onnx__type_proto__sparse_tensor__field_indices_by_name, + 1, onnx__type_proto__sparse_tensor__number_ranges, + (ProtobufCMessageInit) onnx__type_proto__sparse_tensor__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor onnx__type_proto__field_descriptors[6] = { { "tensor_type", @@ -2228,7 +2496,7 @@ static const ProtobufCFieldDescriptor onnx__type_proto__field_descriptors[4] = offsetof(Onnx__TypeProto, tensor_type), &onnx__type_proto__tensor__descriptor, NULL, - 0 | PROTOBUF_C_FIELD_FLAG_ONEOF, /* flags */ + PROTOBUF_C_FIELD_FLAG_ONEOF, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, { @@ -2240,7 +2508,7 @@ static const ProtobufCFieldDescriptor onnx__type_proto__field_descriptors[4] = offsetof(Onnx__TypeProto, sequence_type), &onnx__type_proto__sequence__descriptor, NULL, - 0 | PROTOBUF_C_FIELD_FLAG_ONEOF, /* flags */ + PROTOBUF_C_FIELD_FLAG_ONEOF, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, { @@ -2252,7 +2520,7 @@ static const ProtobufCFieldDescriptor onnx__type_proto__field_descriptors[4] = offsetof(Onnx__TypeProto, map_type), &onnx__type_proto__map__descriptor, NULL, - 0 | PROTOBUF_C_FIELD_FLAG_ONEOF, /* flags */ + PROTOBUF_C_FIELD_FLAG_ONEOF, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, { @@ -2267,18 +2535,45 @@ static const ProtobufCFieldDescriptor onnx__type_proto__field_descriptors[4] = 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "sparse_tensor_type", + 8, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Onnx__TypeProto, value_case), + offsetof(Onnx__TypeProto, sparse_tensor_type), + &onnx__type_proto__sparse_tensor__descriptor, + NULL, + PROTOBUF_C_FIELD_FLAG_ONEOF, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "optional_type", + 9, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Onnx__TypeProto, value_case), + offsetof(Onnx__TypeProto, optional_type), + &onnx__type_proto__optional__descriptor, + NULL, + PROTOBUF_C_FIELD_FLAG_ONEOF, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned onnx__type_proto__field_indices_by_name[] = { 3, /* field[3] = denotation */ 2, /* field[2] = map_type */ + 5, /* field[5] = optional_type */ 1, /* field[1] = sequence_type */ + 4, /* field[4] = sparse_tensor_type */ 0, /* field[0] = tensor_type */ }; -static const ProtobufCIntRange onnx__type_proto__number_ranges[2 + 1] = +static const ProtobufCIntRange onnx__type_proto__number_ranges[3 + 1] = { { 1, 0 }, { 4, 1 }, - { 0, 4 } + { 8, 4 }, + { 0, 6 } }; const ProtobufCMessageDescriptor onnx__type_proto__descriptor = { @@ -2288,10 +2583,10 @@ const ProtobufCMessageDescriptor onnx__type_proto__descriptor = "Onnx__TypeProto", "onnx", sizeof(Onnx__TypeProto), - 4, + 6, onnx__type_proto__field_descriptors, onnx__type_proto__field_indices_by_name, - 2, onnx__type_proto__number_ranges, + 3, onnx__type_proto__number_ranges, (ProtobufCMessageInit) onnx__type_proto__init, NULL,NULL,NULL /* reserved[123] */ }; @@ -2346,7 +2641,189 @@ const ProtobufCMessageDescriptor onnx__operator_set_id_proto__descriptor = (ProtobufCMessageInit) onnx__operator_set_id_proto__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCEnumValue onnx__version__enum_values_by_number[8] = +static const ProtobufCFieldDescriptor onnx__function_proto__field_descriptors[12] = +{ + { + "name", + 1, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Onnx__FunctionProto, name), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "input", + 4, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_STRING, + offsetof(Onnx__FunctionProto, n_input), + offsetof(Onnx__FunctionProto, input), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "output", + 5, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_STRING, + offsetof(Onnx__FunctionProto, n_output), + offsetof(Onnx__FunctionProto, output), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "attribute", + 6, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_STRING, + offsetof(Onnx__FunctionProto, n_attribute), + offsetof(Onnx__FunctionProto, attribute), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "node", + 7, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Onnx__FunctionProto, n_node), + offsetof(Onnx__FunctionProto, node), + &onnx__node_proto__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "doc_string", + 8, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Onnx__FunctionProto, doc_string), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "opset_import", + 9, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Onnx__FunctionProto, n_opset_import), + offsetof(Onnx__FunctionProto, opset_import), + &onnx__operator_set_id_proto__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "domain", + 10, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Onnx__FunctionProto, domain), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "attribute_proto", + 11, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Onnx__FunctionProto, n_attribute_proto), + offsetof(Onnx__FunctionProto, attribute_proto), + &onnx__attribute_proto__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "value_info", + 12, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Onnx__FunctionProto, n_value_info), + offsetof(Onnx__FunctionProto, value_info), + &onnx__value_info_proto__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "overload", + 13, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(Onnx__FunctionProto, overload), + NULL, + &protobuf_c_empty_string, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, + { + "metadata_props", + 14, + PROTOBUF_C_LABEL_REPEATED, + PROTOBUF_C_TYPE_MESSAGE, + offsetof(Onnx__FunctionProto, n_metadata_props), + offsetof(Onnx__FunctionProto, metadata_props), + &onnx__string_string_entry_proto__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned onnx__function_proto__field_indices_by_name[] = { + 3, /* field[3] = attribute */ + 8, /* field[8] = attribute_proto */ + 5, /* field[5] = doc_string */ + 7, /* field[7] = domain */ + 1, /* field[1] = input */ + 11, /* field[11] = metadata_props */ + 0, /* field[0] = name */ + 4, /* field[4] = node */ + 6, /* field[6] = opset_import */ + 2, /* field[2] = output */ + 10, /* field[10] = overload */ + 9, /* field[9] = value_info */ +}; +static const ProtobufCIntRange onnx__function_proto__number_ranges[2 + 1] = +{ + { 1, 0 }, + { 4, 1 }, + { 0, 12 } +}; +const ProtobufCMessageDescriptor onnx__function_proto__descriptor = +{ + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "onnx.FunctionProto", + "FunctionProto", + "Onnx__FunctionProto", + "onnx", + sizeof(Onnx__FunctionProto), + 12, + onnx__function_proto__field_descriptors, + onnx__function_proto__field_indices_by_name, + 2, onnx__function_proto__number_ranges, + (ProtobufCMessageInit) onnx__function_proto__init, + NULL,NULL,NULL /* reserved[123] */ +}; +static const ProtobufCEnumValue onnx__version__enum_values_by_number[11] = { { "_START_VERSION", "ONNX__VERSION___START_VERSION", 0 }, { "IR_VERSION_2017_10_10", "ONNX__VERSION__IR_VERSION_2017_10_10", 1 }, @@ -2355,20 +2832,26 @@ static const ProtobufCEnumValue onnx__version__enum_values_by_number[8] = { "IR_VERSION_2019_1_22", "ONNX__VERSION__IR_VERSION_2019_1_22", 4 }, { "IR_VERSION_2019_3_18", "ONNX__VERSION__IR_VERSION_2019_3_18", 5 }, { "IR_VERSION_2019_9_19", "ONNX__VERSION__IR_VERSION_2019_9_19", 6 }, - { "IR_VERSION", "ONNX__VERSION__IR_VERSION", 7 }, + { "IR_VERSION_2020_5_8", "ONNX__VERSION__IR_VERSION_2020_5_8", 7 }, + { "IR_VERSION_2021_7_30", "ONNX__VERSION__IR_VERSION_2021_7_30", 8 }, + { "IR_VERSION_2023_5_5", "ONNX__VERSION__IR_VERSION_2023_5_5", 9 }, + { "IR_VERSION", "ONNX__VERSION__IR_VERSION", 10 }, }; static const ProtobufCIntRange onnx__version__value_ranges[] = { -{0, 0},{0, 8} +{0, 0},{0, 11} }; -static const ProtobufCEnumValueIndex onnx__version__enum_values_by_name[8] = +static const ProtobufCEnumValueIndex onnx__version__enum_values_by_name[11] = { - { "IR_VERSION", 7 }, + { "IR_VERSION", 10 }, { "IR_VERSION_2017_10_10", 1 }, { "IR_VERSION_2017_10_30", 2 }, { "IR_VERSION_2017_11_3", 3 }, { "IR_VERSION_2019_1_22", 4 }, { "IR_VERSION_2019_3_18", 5 }, { "IR_VERSION_2019_9_19", 6 }, + { "IR_VERSION_2020_5_8", 7 }, + { "IR_VERSION_2021_7_30", 8 }, + { "IR_VERSION_2023_5_5", 9 }, { "_START_VERSION", 0 }, }; const ProtobufCEnumDescriptor onnx__version__descriptor = @@ -2378,11 +2861,39 @@ const ProtobufCEnumDescriptor onnx__version__descriptor = "Version", "Onnx__Version", "onnx", - 8, + 11, onnx__version__enum_values_by_number, - 8, + 11, onnx__version__enum_values_by_name, 1, onnx__version__value_ranges, NULL,NULL,NULL,NULL /* reserved[1234] */ }; +static const ProtobufCEnumValue onnx__operator_status__enum_values_by_number[2] = +{ + { "EXPERIMENTAL", "ONNX__OPERATOR_STATUS__EXPERIMENTAL", 0 }, + { "STABLE", "ONNX__OPERATOR_STATUS__STABLE", 1 }, +}; +static const ProtobufCIntRange onnx__operator_status__value_ranges[] = { +{0, 0},{0, 2} +}; +static const ProtobufCEnumValueIndex onnx__operator_status__enum_values_by_name[2] = +{ + { "EXPERIMENTAL", 0 }, + { "STABLE", 1 }, +}; +const ProtobufCEnumDescriptor onnx__operator_status__descriptor = +{ + PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC, + "onnx.OperatorStatus", + "OperatorStatus", + "Onnx__OperatorStatus", + "onnx", + 2, + onnx__operator_status__enum_values_by_number, + 2, + onnx__operator_status__enum_values_by_name, + 1, + onnx__operator_status__value_ranges, + NULL,NULL,NULL,NULL /* reserved[1234] */ +}; diff --git a/modules/fnxext/onnx_engine/src/onnx.proto3.pb-c.h b/modules/fnxext/onnx_engine/src/onnx.proto3.pb-c.h index a2a585b8c34..5719097b705 100644 --- a/modules/fnxext/onnx_engine/src/onnx.proto3.pb-c.h +++ b/modules/fnxext/onnx_engine/src/onnx.proto3.pb-c.h @@ -10,7 +10,7 @@ PROTOBUF_C__BEGIN_DECLS #if PROTOBUF_C_VERSION_NUMBER < 1003000 # error This file was generated by a newer version of protoc-c which is incompatible with your libprotobuf-c headers. Please update your headers. -#elif 1004000 < PROTOBUF_C_MIN_COMPILER_VERSION +#elif 1005000 < PROTOBUF_C_MIN_COMPILER_VERSION # error This file was generated by an older version of protoc-c which is incompatible with your libprotobuf-c headers. Please regenerate this file with a newer version of protoc-c. #endif @@ -32,7 +32,10 @@ typedef struct Onnx__TypeProto Onnx__TypeProto; typedef struct Onnx__TypeProto__Tensor Onnx__TypeProto__Tensor; typedef struct Onnx__TypeProto__Sequence Onnx__TypeProto__Sequence; typedef struct Onnx__TypeProto__Map Onnx__TypeProto__Map; +typedef struct Onnx__TypeProto__Optional Onnx__TypeProto__Optional; +typedef struct Onnx__TypeProto__SparseTensor Onnx__TypeProto__SparseTensor; typedef struct Onnx__OperatorSetIdProto Onnx__OperatorSetIdProto; +typedef struct Onnx__FunctionProto Onnx__FunctionProto; /* --- enums --- */ @@ -49,12 +52,14 @@ typedef enum _Onnx__AttributeProto__AttributeType { ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__TENSOR = 4, ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__GRAPH = 5, ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__SPARSE_TENSOR = 11, + ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__TYPE_PROTO = 13, ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__FLOATS = 6, ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__INTS = 7, ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__STRINGS = 8, ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__TENSORS = 9, ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__GRAPHS = 10, - ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__SPARSE_TENSORS = 12 + ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__SPARSE_TENSORS = 12, + ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__TYPE_PROTOS = 14 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE) } Onnx__AttributeProto__AttributeType; typedef enum _Onnx__TensorProto__DataType { @@ -119,7 +124,42 @@ typedef enum _Onnx__TensorProto__DataType { * floating-point number truncated to 16 bits. * This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits. */ - ONNX__TENSOR_PROTO__DATA_TYPE__BFLOAT16 = 16 + ONNX__TENSOR_PROTO__DATA_TYPE__BFLOAT16 = 16, + /* + * Non-IEEE floating-point format based on papers + * FP8 Formats for Deep Learning, https://arxiv.org/abs/2209.05433, + * 8-bit Numerical Formats For Deep Neural Networks, https://arxiv.org/pdf/2206.02915.pdf. + * Operators supported FP8 are Cast, CastLike, QuantizeLinear, DequantizeLinear. + * The computation usually happens inside a block quantize / dequantize + * fused by the runtime. + */ + /* + * float 8, mostly used for coefficients, supports nan, not inf + */ + ONNX__TENSOR_PROTO__DATA_TYPE__FLOAT8E4M3FN = 17, + /* + * float 8, mostly used for coefficients, supports nan, not inf, no negative zero + */ + ONNX__TENSOR_PROTO__DATA_TYPE__FLOAT8E4M3FNUZ = 18, + /* + * follows IEEE 754, supports nan, inf, mostly used for gradients + */ + ONNX__TENSOR_PROTO__DATA_TYPE__FLOAT8E5M2 = 19, + /* + * follows IEEE 754, supports nan, not inf, mostly used for gradients, no negative zero + */ + ONNX__TENSOR_PROTO__DATA_TYPE__FLOAT8E5M2FNUZ = 20, + /* + * 4-bit data-types + */ + /* + * Unsigned integer in range [0, 15] + */ + ONNX__TENSOR_PROTO__DATA_TYPE__UINT4 = 21, + /* + * Signed integer in range [-8, 7], using two's-complement representation + */ + ONNX__TENSOR_PROTO__DATA_TYPE__INT4 = 22 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(ONNX__TENSOR_PROTO__DATA_TYPE) } Onnx__TensorProto__DataType; /* @@ -185,7 +225,7 @@ typedef enum _Onnx__Version { */ ONNX__VERSION__IR_VERSION_2019_9_19 = 6, /* - * IR VERSION 7 published on + * IR VERSION 7 published on May 8, 2020 * - Add support to allow function body graph to rely on multiple external opreator sets. * - Add a list to promote inference graph's initializers to global and * mutable variables. Global variables are visible in all graphs of the @@ -195,9 +235,36 @@ typedef enum _Onnx__Version { * can modify the values of mutable variables. * - Implicitly add inference graph into each TrainingInfoProto's algorithm. */ - ONNX__VERSION__IR_VERSION = 7 + ONNX__VERSION__IR_VERSION_2020_5_8 = 7, + /* + * IR VERSION 8 published on July 30, 2021 + * Introduce TypeProto.SparseTensor + * Introduce TypeProto.Optional + * Added a list of FunctionProtos local to the model + * Deprecated since_version and operator status from FunctionProto + */ + ONNX__VERSION__IR_VERSION_2021_7_30 = 8, + /* + * IR VERSION 9 published on May 5, 2023 + * Added AttributeProto to FunctionProto so that default attribute values can be set. + * Added FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ. + */ + ONNX__VERSION__IR_VERSION_2023_5_5 = 9, + /* + * IR VERSION 10 published on TBD + * Added UINT4, INT4. + */ + ONNX__VERSION__IR_VERSION = 10 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(ONNX__VERSION) } Onnx__Version; +/* + * Operator/function status. + */ +typedef enum _Onnx__OperatorStatus { + ONNX__OPERATOR_STATUS__EXPERIMENTAL = 0, + ONNX__OPERATOR_STATUS__STABLE = 1 + PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(ONNX__OPERATOR_STATUS) +} Onnx__OperatorStatus; /* --- messages --- */ @@ -268,6 +335,14 @@ struct Onnx__AttributeProto * sparse tensor value */ Onnx__SparseTensorProto *sparse_tensor; + /* + * Do not use field below, it's deprecated. + * optional ValueProto v = 12; // value - subsumes everything but graph + */ + /* + * type proto + */ + Onnx__TypeProto *tp; /* * list of floats */ @@ -298,10 +373,15 @@ struct Onnx__AttributeProto */ size_t n_sparse_tensors; Onnx__SparseTensorProto **sparse_tensors; + /* + * list of type protos + */ + size_t n_type_protos; + Onnx__TypeProto **type_protos; }; #define ONNX__ATTRIBUTE_PROTO__INIT \ { PROTOBUF_C_MESSAGE_INIT (&onnx__attribute_proto__descriptor) \ - , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__UNDEFINED, 0, 0, {0,NULL}, NULL, NULL, NULL, 0,NULL, 0,NULL, 0,NULL, 0,NULL, 0,NULL, 0,NULL } + , (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, ONNX__ATTRIBUTE_PROTO__ATTRIBUTE_TYPE__UNDEFINED, 0, 0, {0,NULL}, NULL, NULL, NULL, NULL, 0,NULL, 0,NULL, 0,NULL, 0,NULL, 0,NULL, 0,NULL, 0,NULL } /* @@ -327,10 +407,15 @@ struct Onnx__ValueInfoProto * A human-readable documentation for this value. Markdown is allowed. */ char *doc_string; + /* + * Named metadata values; keys should be distinct. + */ + size_t n_metadata_props; + Onnx__StringStringEntryProto **metadata_props; }; #define ONNX__VALUE_INFO_PROTO__INIT \ { PROTOBUF_C_MESSAGE_INIT (&onnx__value_info_proto__descriptor) \ - , (char *)protobuf_c_empty_string, NULL, (char *)protobuf_c_empty_string } + , (char *)protobuf_c_empty_string, NULL, (char *)protobuf_c_empty_string, 0,NULL } /* @@ -355,7 +440,7 @@ struct Onnx__NodeProto char **output; /* * An optional identifier for this node in a graph. - * This field MAY be absent in ths version of the IR. + * This field MAY be absent in this version of the IR. */ /* * namespace Node @@ -375,6 +460,10 @@ struct Onnx__NodeProto * namespace Domain */ char *domain; + /* + * Overload identifier, used only to map this to a model-local function. + */ + char *overload; /* * Additional named attributes. */ @@ -384,10 +473,15 @@ struct Onnx__NodeProto * A human-readable documentation for this node. Markdown is allowed. */ char *doc_string; + /* + * Named metadata values; keys should be distinct. + */ + size_t n_metadata_props; + Onnx__StringStringEntryProto **metadata_props; }; #define ONNX__NODE_PROTO__INIT \ { PROTOBUF_C_MESSAGE_INIT (&onnx__node_proto__descriptor) \ - , 0,NULL, 0,NULL, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0,NULL, (char *)protobuf_c_empty_string } + , 0,NULL, 0,NULL, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0,NULL, (char *)protobuf_c_empty_string, 0,NULL } /* @@ -434,7 +528,7 @@ struct Onnx__TrainingInfoProto * optimizer node, increment of iteration count. * An execution of the training algorithm step is performed by executing the * graph obtained by combining the inference graph (namely "ModelProto.graph") - * and the "algorithm" graph. That is, the actual the actual + * and the "algorithm" graph. That is, the actual * input/initializer/output/node/value_info/sparse_initializer list of * the training graph is the concatenation of * "ModelProto.graph.input/initializer/output/node/value_info/sparse_initializer" @@ -582,10 +676,27 @@ struct Onnx__ModelProto */ size_t n_training_info; Onnx__TrainingInfoProto **training_info; + /* + * A list of function protos local to the model. + * The (domain, name, overload) tuple must be unique across the function protos in this list. + * In case of any conflicts the behavior (whether the model local functions are given higher priority, + * or standard operator sets are given higher priotity or this is treated as error) is defined by + * the runtimes. + * The operator sets imported by FunctionProto should be compatible with the ones + * imported by ModelProto and other model local FunctionProtos. + * Example, if same operator set say 'A' is imported by a FunctionProto and ModelProto + * or by 2 FunctionProtos then versions for the operator set may be different but, + * the operator schema returned for op_type, domain, version combination + * for both the versions should be same for every node in the function body. + * One FunctionProto can reference other FunctionProto in the model, however, recursive reference + * is not allowed. + */ + size_t n_functions; + Onnx__FunctionProto **functions; }; #define ONNX__MODEL_PROTO__INIT \ { PROTOBUF_C_MESSAGE_INIT (&onnx__model_proto__descriptor) \ - , 0, 0,NULL, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, NULL, 0,NULL, 0,NULL } + , 0, 0,NULL, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0, (char *)protobuf_c_empty_string, NULL, 0,NULL, 0,NULL, 0,NULL } /* @@ -681,10 +792,15 @@ struct Onnx__GraphProto */ size_t n_quantization_annotation; Onnx__TensorAnnotation **quantization_annotation; + /* + * Named metadata values; keys should be distinct. + */ + size_t n_metadata_props; + Onnx__StringStringEntryProto **metadata_props; }; #define ONNX__GRAPH_PROTO__INIT \ { PROTOBUF_C_MESSAGE_INIT (&onnx__graph_proto__descriptor) \ - , 0,NULL, (char *)protobuf_c_empty_string, 0,NULL, 0,NULL, (char *)protobuf_c_empty_string, 0,NULL, 0,NULL, 0,NULL, 0,NULL } + , 0,NULL, (char *)protobuf_c_empty_string, 0,NULL, 0,NULL, (char *)protobuf_c_empty_string, 0,NULL, 0,NULL, 0,NULL, 0,NULL, 0,NULL } /* @@ -733,11 +849,13 @@ struct Onnx__TensorProto size_t n_float_data; float *float_data; /* - * For int32, uint8, int8, uint16, int16, bool, and float16 values - * float16 values must be bit-wise converted to an uint16_t prior + * For int32, uint8, int8, uint16, int16, uint4, int4, bool, float8 and float16 values + * float16 and float8 values must be bit-wise converted to an uint16_t prior * to writing to the buffer. + * uint4 and int4 values must be packed to 4bitx2 prior to writing to the buffer, the first element is stored in + * the 4 LSB and the second element is stored in the 4 MSB. * When this field is present, the data_type field MUST be - * INT32, INT16, INT8, UINT16, UINT8, BOOL, or FLOAT16 + * INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ */ size_t n_int32_data; int32_t *int32_data; @@ -777,6 +895,7 @@ struct Onnx__TensorProto * Complex64 elements must be written as two consecutive FLOAT values, real component first. * Complex128 elements must be written as two consecutive DOUBLE values, real component first. * Boolean type MUST be written one byte per tensor element (00000001 for true, 00000000 for false). + * uint4 and int4 values must be packed to 4bitx2, the first element is stored in the 4 LSB and the second element is stored in the 4 MSB. * Note: the advantage of specific field rather than the raw_data field is * that in some cases (e.g. int data), protobuf does a better packing via * variable length storage, and may lead to smaller binary footprint. @@ -818,10 +937,15 @@ struct Onnx__TensorProto */ size_t n_uint64_data; uint64_t *uint64_data; + /* + * Named metadata values; keys should be distinct. + */ + size_t n_metadata_props; + Onnx__StringStringEntryProto **metadata_props; }; #define ONNX__TENSOR_PROTO__INIT \ { PROTOBUF_C_MESSAGE_INIT (&onnx__tensor_proto__descriptor) \ - , 0,NULL, 0, NULL, 0,NULL, 0,NULL, 0,NULL, 0,NULL, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, {0,NULL}, 0,NULL, ONNX__TENSOR_PROTO__DATA_LOCATION__DEFAULT, 0,NULL, 0,NULL } + , 0,NULL, 0, NULL, 0,NULL, 0,NULL, 0,NULL, 0,NULL, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, {0,NULL}, 0,NULL, ONNX__TENSOR_PROTO__DATA_LOCATION__DEFAULT, 0,NULL, 0,NULL, 0,NULL } /* @@ -875,7 +999,7 @@ struct Onnx__TensorShapeProto__Dimension * Standard denotation can optionally be used to denote tensor * dimensions with standard semantic descriptions to ensure * that operations are applied to the correct axis of a tensor. - * Refer to https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition + * Refer to https://github.com/onnx/onnx/blob/main/docs/DimensionDenotation.md#denotation-definition * for pre-defined dimension denotations. */ char *denotation; @@ -964,11 +1088,47 @@ struct Onnx__TypeProto__Map , 0, NULL } +/* + * wrapper for Tensor, Sequence, or Map + */ +struct Onnx__TypeProto__Optional +{ + ProtobufCMessage base; + /* + * The type and optional shape of the element wrapped. + * This field MUST be present for this version of the IR. + * Possible values correspond to OptionalProto.DataType enum + */ + Onnx__TypeProto *elem_type; +}; +#define ONNX__TYPE_PROTO__OPTIONAL__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&onnx__type_proto__optional__descriptor) \ + , NULL } + + +struct Onnx__TypeProto__SparseTensor +{ + ProtobufCMessage base; + /* + * This field MUST NOT have the value of UNDEFINED + * This field MUST have a valid TensorProto.DataType value + * This field MUST be present for this version of the IR. + */ + int32_t elem_type; + Onnx__TensorShapeProto *shape; +}; +#define ONNX__TYPE_PROTO__SPARSE_TENSOR__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&onnx__type_proto__sparse_tensor__descriptor) \ + , 0, NULL } + + typedef enum { ONNX__TYPE_PROTO__VALUE__NOT_SET = 0, ONNX__TYPE_PROTO__VALUE_TENSOR_TYPE = 1, ONNX__TYPE_PROTO__VALUE_SEQUENCE_TYPE = 4, - ONNX__TYPE_PROTO__VALUE_MAP_TYPE = 5 + ONNX__TYPE_PROTO__VALUE_MAP_TYPE = 5, + ONNX__TYPE_PROTO__VALUE_OPTIONAL_TYPE = 9, + ONNX__TYPE_PROTO__VALUE_SPARSE_TENSOR_TYPE = 8 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(ONNX__TYPE_PROTO__VALUE__CASE) } Onnx__TypeProto__ValueCase; @@ -982,7 +1142,7 @@ struct Onnx__TypeProto /* * An optional denotation can be used to denote the whole * type with a standard semantic description as to what is - * stored inside. Refer to https://github.com/onnx/onnx/blob/master/docs/TypeDenotation.md#type-denotation-definition + * stored inside. Refer to https://github.com/onnx/onnx/blob/main/docs/TypeDenotation.md#type-denotation-definition * for pre-defined type denotations. */ char *denotation; @@ -1000,6 +1160,14 @@ struct Onnx__TypeProto * The type of a map. */ Onnx__TypeProto__Map *map_type; + /* + * The type of an optional. + */ + Onnx__TypeProto__Optional *optional_type; + /* + * Type of the sparse tensor + */ + Onnx__TypeProto__SparseTensor *sparse_tensor_type; }; }; #define ONNX__TYPE_PROTO__INIT \ @@ -1032,6 +1200,75 @@ struct Onnx__OperatorSetIdProto , (char *)protobuf_c_empty_string, 0 } +struct Onnx__FunctionProto +{ + ProtobufCMessage base; + /* + * The name of the function, similar to op_type in NodeProto. + * This is part of the unique-id (domain, name, overload) of FunctionProtos in a model. + */ + char *name; + /* + * The inputs and outputs of the function. + */ + size_t n_input; + char **input; + size_t n_output; + char **output; + /* + * The attribute parameters of the function. + * It is for function parameters without default values. + */ + size_t n_attribute; + char **attribute; + /* + * The attribute protos of the function. + * It is for function attributes with default values. + * A function attribute shall be represented either as + * a string attribute or an AttributeProto, not both. + */ + size_t n_attribute_proto; + Onnx__AttributeProto **attribute_proto; + /* + * The nodes in the function. + */ + size_t n_node; + Onnx__NodeProto **node; + /* + * A human-readable documentation for this function. Markdown is allowed. + */ + char *doc_string; + size_t n_opset_import; + Onnx__OperatorSetIdProto **opset_import; + /* + * The domain which this function belongs to. + * This is part of the unique-id (domain, name, overload) of FunctionProtos in a model. + */ + char *domain; + /* + * The overload identifier of the function. + * This is part of the unique-id (domain, name, overload) of FunctionProtos in a model. + */ + char *overload; + /* + * Information for the values in the function. The ValueInfoProto.name's + * must be distinct and refer to names in the function (including inputs, + * outputs, and intermediate values). It is optional for a value to appear + * in value_info list. + */ + size_t n_value_info; + Onnx__ValueInfoProto **value_info; + /* + * Named metadata values; keys should be distinct. + */ + size_t n_metadata_props; + Onnx__StringStringEntryProto **metadata_props; +}; +#define ONNX__FUNCTION_PROTO__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&onnx__function_proto__descriptor) \ + , (char *)protobuf_c_empty_string, 0,NULL, 0,NULL, 0,NULL, 0,NULL, 0,NULL, (char *)protobuf_c_empty_string, 0,NULL, (char *)protobuf_c_empty_string, (char *)protobuf_c_empty_string, 0,NULL, 0,NULL } + + /* Onnx__AttributeProto methods */ void onnx__attribute_proto__init (Onnx__AttributeProto *message); @@ -1256,6 +1493,12 @@ void onnx__type_proto__sequence__init /* Onnx__TypeProto__Map methods */ void onnx__type_proto__map__init (Onnx__TypeProto__Map *message); +/* Onnx__TypeProto__Optional methods */ +void onnx__type_proto__optional__init + (Onnx__TypeProto__Optional *message); +/* Onnx__TypeProto__SparseTensor methods */ +void onnx__type_proto__sparse_tensor__init + (Onnx__TypeProto__SparseTensor *message); /* Onnx__TypeProto methods */ void onnx__type_proto__init (Onnx__TypeProto *message); @@ -1294,6 +1537,25 @@ Onnx__OperatorSetIdProto * void onnx__operator_set_id_proto__free_unpacked (Onnx__OperatorSetIdProto *message, ProtobufCAllocator *allocator); +/* Onnx__FunctionProto methods */ +void onnx__function_proto__init + (Onnx__FunctionProto *message); +size_t onnx__function_proto__get_packed_size + (const Onnx__FunctionProto *message); +size_t onnx__function_proto__pack + (const Onnx__FunctionProto *message, + uint8_t *out); +size_t onnx__function_proto__pack_to_buffer + (const Onnx__FunctionProto *message, + ProtobufCBuffer *buffer); +Onnx__FunctionProto * + onnx__function_proto__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void onnx__function_proto__free_unpacked + (Onnx__FunctionProto *message, + ProtobufCAllocator *allocator); /* --- per-message closures --- */ typedef void (*Onnx__AttributeProto_Closure) @@ -1344,12 +1606,21 @@ typedef void (*Onnx__TypeProto__Sequence_Closure) typedef void (*Onnx__TypeProto__Map_Closure) (const Onnx__TypeProto__Map *message, void *closure_data); +typedef void (*Onnx__TypeProto__Optional_Closure) + (const Onnx__TypeProto__Optional *message, + void *closure_data); +typedef void (*Onnx__TypeProto__SparseTensor_Closure) + (const Onnx__TypeProto__SparseTensor *message, + void *closure_data); typedef void (*Onnx__TypeProto_Closure) (const Onnx__TypeProto *message, void *closure_data); typedef void (*Onnx__OperatorSetIdProto_Closure) (const Onnx__OperatorSetIdProto *message, void *closure_data); +typedef void (*Onnx__FunctionProto_Closure) + (const Onnx__FunctionProto *message, + void *closure_data); /* --- services --- */ @@ -1357,6 +1628,7 @@ typedef void (*Onnx__OperatorSetIdProto_Closure) /* --- descriptors --- */ extern const ProtobufCEnumDescriptor onnx__version__descriptor; +extern const ProtobufCEnumDescriptor onnx__operator_status__descriptor; extern const ProtobufCMessageDescriptor onnx__attribute_proto__descriptor; extern const ProtobufCEnumDescriptor onnx__attribute_proto__attribute_type__descriptor; extern const ProtobufCMessageDescriptor onnx__value_info_proto__descriptor; @@ -1377,7 +1649,10 @@ extern const ProtobufCMessageDescriptor onnx__type_proto__descriptor; extern const ProtobufCMessageDescriptor onnx__type_proto__tensor__descriptor; extern const ProtobufCMessageDescriptor onnx__type_proto__sequence__descriptor; extern const ProtobufCMessageDescriptor onnx__type_proto__map__descriptor; +extern const ProtobufCMessageDescriptor onnx__type_proto__optional__descriptor; +extern const ProtobufCMessageDescriptor onnx__type_proto__sparse_tensor__descriptor; extern const ProtobufCMessageDescriptor onnx__operator_set_id_proto__descriptor; +extern const ProtobufCMessageDescriptor onnx__function_proto__descriptor; PROTOBUF_C__END_DECLS diff --git a/modules/fnxext/onnx_engine/src/onnxconf.h b/modules/fnxext/onnx_engine/src/onnxconf.h index 9b27d524f39..7f1ccc059b2 100644 --- a/modules/fnxext/onnx_engine/src/onnxconf.h +++ b/modules/fnxext/onnx_engine/src/onnxconf.h @@ -10,17 +10,60 @@ extern "C" { #include #include #include +#include +#include + #include #include #include "list.h" #include "hmap.h" +#ifndef onnx_malloc +#define onnx_malloc malloc +#endif + +#ifndef onnx_free +#define onnx_free free +#endif + +#ifndef onnx_memcpy +#define onnx_memcpy memcpy +#endif + +#ifndef onnx_memset +#define onnx_memset memset +#endif + +#ifndef onnx_memmove +#define onnx_memmove memmove +#endif + +#ifndef onnx_strdup +#define onnx_strdup strdup +#endif + +#ifndef onnx_strcmp +#define onnx_strcmp strcmp +#endif + +#ifndef onnx_strlen +#define onnx_strlen strlen +#endif + +#ifndef onnx_printf +#define onnx_printf printf +#endif + +#ifndef onnx_assert +#define onnx_assert assert +#endif + /* * Macro */ -#define minn(a, b) ({typeof(a) _amin = (a); typeof(b) _bmin = (b); (void)(&_amin == &_bmin); _amin < _bmin ? _amin : _bmin;}) -#define maxx(a, b) ({typeof(a) _amax = (a); typeof(b) _bmax = (b); (void)(&_amax == &_bmax); _amax > _bmax ? _amax : _bmax;}) -#define clamp(v, a, b) min(max(a, v), b) +#define XMIN(a, b) ({typeof(a) _amin = (a); typeof(b) _bmin = (b); (void)(&_amin == &_bmin); _amin < _bmin ? _amin : _bmin;}) +#define XMAX(a, b) ({typeof(a) _amax = (a); typeof(b) _bmax = (b); (void)(&_amax == &_bmax); _amax > _bmax ? _amax : _bmax;}) +#define XCLAMP(v, a, b) XMIN(XMAX(a, v), b) /* * little or big endian diff --git a/modules/fnxext/onnx_engine/src/protobuf-c.c b/modules/fnxext/onnx_engine/src/protobuf-c.c index ad1bdb14f82..28f02c31304 100644 --- a/modules/fnxext/onnx_engine/src/protobuf-c.c +++ b/modules/fnxext/onnx_engine/src/protobuf-c.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2015, Dave Benson and the protobuf-c authors. + * Copyright (c) 2008-2023, Dave Benson and the protobuf-c authors. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -53,7 +53,7 @@ #define TRUE 1 #define FALSE 0 -#define PROTOBUF_C__ASSERT_NOT_REACHED() assert(0) +#define PROTOBUF_C__ASSERT_NOT_REACHED() onnx_assert(0) /* Workaround for Microsoft compilers. */ #ifdef _MSC_VER @@ -118,16 +118,16 @@ const char protobuf_c_empty_string[] = ""; /* Assertions for magic numbers. */ #define ASSERT_IS_ENUM_DESCRIPTOR(desc) \ - assert((desc)->magic == PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC) + onnx_assert((desc)->magic == PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC) #define ASSERT_IS_MESSAGE_DESCRIPTOR(desc) \ - assert((desc)->magic == PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC) + onnx_assert((desc)->magic == PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC) #define ASSERT_IS_MESSAGE(message) \ ASSERT_IS_MESSAGE_DESCRIPTOR((message)->descriptor) #define ASSERT_IS_SERVICE_DESCRIPTOR(desc) \ - assert((desc)->magic == PROTOBUF_C__SERVICE_DESCRIPTOR_MAGIC) + onnx_assert((desc)->magic == PROTOBUF_C__SERVICE_DESCRIPTOR_MAGIC) /**@}*/ @@ -151,14 +151,14 @@ static void * system_alloc(void *allocator_data, size_t size) { (void)allocator_data; - return malloc(size); + return onnx_malloc(size); } static void system_free(void *allocator_data, void *data) { (void)allocator_data; - free(data); + onnx_free(data); } static inline void * @@ -206,7 +206,7 @@ protobuf_c_buffer_simple_append(ProtobufCBuffer *buffer, new_data = do_alloc(allocator, new_alloced); if (!new_data) return; - memcpy(new_data, simp->data, simp->len); + onnx_memcpy(new_data, simp->data, simp->len); if (simp->must_free_data) do_free(allocator, simp->data); else @@ -214,7 +214,7 @@ protobuf_c_buffer_simple_append(ProtobufCBuffer *buffer, simp->data = new_data; simp->alloced = new_alloced; } - memcpy(simp->data + simp->len, data, len); + onnx_memcpy(simp->data + simp->len, data, len); simp->len = new_len; } @@ -316,9 +316,8 @@ int32_size(int32_t v) static inline uint32_t zigzag32(int32_t v) { - // Note: the right-shift must be arithmetic - // Note: left shift must be unsigned because of overflow - return ((uint32_t)(v) << 1) ^ (uint32_t)(v >> 31); + // Note: Using unsigned types prevents undefined behavior + return ((uint32_t)v << 1) ^ -((uint32_t)v >> 31); } /** @@ -380,9 +379,8 @@ uint64_size(uint64_t v) static inline uint64_t zigzag64(int64_t v) { - // Note: the right-shift must be arithmetic - // Note: left shift must be unsigned because of overflow - return ((uint64_t)(v) << 1) ^ (uint64_t)(v >> 63); + // Note: Using unsigned types prevents undefined behavior + return ((uint64_t)v << 1) ^ -((uint64_t)v >> 63); } /** @@ -445,7 +443,7 @@ required_field_get_packed_size(const ProtobufCFieldDescriptor *field, return rv + 8; case PROTOBUF_C_TYPE_STRING: { const char *str = *(char * const *) member; - size_t len = str ? strlen(str) : 0; + size_t len = str ? onnx_strlen(str) : 0; return rv + uint32_size(len) + len; } case PROTOBUF_C_TYPE_BYTES: { @@ -662,7 +660,7 @@ repeated_field_get_packed_size(const ProtobufCFieldDescriptor *field, break; case PROTOBUF_C_TYPE_STRING: for (i = 0; i < count; i++) { - size_t len = strlen(((char **) array)[i]); + size_t len = onnx_strlen(((char **) array)[i]); rv += uint32_size(len) + len; } break; @@ -796,13 +794,14 @@ uint32_pack(uint32_t value, uint8_t *out) } } } - /* assert: value<128 */ + /* onnx_assert: value<128 */ out[rv++] = value; return rv; } /** - * Pack a signed 32-bit integer and return the number of bytes written. + * Pack a signed 32-bit integer and return the number of bytes written, + * passed as unsigned to avoid implementation-specific behavior. * Negative numbers are encoded as two's complement 64-bit integers. * * \param value @@ -813,14 +812,14 @@ uint32_pack(uint32_t value, uint8_t *out) * Number of bytes written to `out`. */ static inline size_t -int32_pack(int32_t value, uint8_t *out) +int32_pack(uint32_t value, uint8_t *out) { - if (value < 0) { + if ((int32_t)value < 0) { out[0] = value | 0x80; out[1] = (value >> 7) | 0x80; out[2] = (value >> 14) | 0x80; out[3] = (value >> 21) | 0x80; - out[4] = (value >> 28) | 0x80; + out[4] = (value >> 28) | 0xf0; out[5] = out[6] = out[7] = out[8] = 0xff; out[9] = 0x01; return 10; @@ -918,7 +917,7 @@ static inline size_t fixed32_pack(uint32_t value, void *out) { #if !defined(WORDS_BIGENDIAN) - memcpy(out, &value, 4); + onnx_memcpy(out, &value, 4); #else uint8_t *buf = out; @@ -949,7 +948,7 @@ static inline size_t fixed64_pack(uint64_t value, void *out) { #if !defined(WORDS_BIGENDIAN) - memcpy(out, &value, 8); + onnx_memcpy(out, &value, 8); #else fixed32_pack(value, out); fixed32_pack(value >> 32, ((char *) out) + 4); @@ -999,9 +998,9 @@ string_pack(const char *str, uint8_t *out) out[0] = 0; return 1; } else { - size_t len = strlen(str); + size_t len = onnx_strlen(str); size_t rv = uint32_pack(len, out); - memcpy(out + rv, str, len); + onnx_memcpy(out + rv, str, len); return rv + len; } } @@ -1022,7 +1021,7 @@ binary_data_pack(const ProtobufCBinaryData *bd, uint8_t *out) { size_t len = bd->len; size_t rv = uint32_pack(len, out); - memcpy(out + rv, bd->data, len); + onnx_memcpy(out + rv, bd->data, len); return rv + len; } @@ -1047,7 +1046,7 @@ prefixed_message_pack(const ProtobufCMessage *message, uint8_t *out) size_t rv = protobuf_c_message_pack(message, out + 1); uint32_t rv_packed_size = uint32_size(rv); if (rv_packed_size != 1) - memmove(out + rv_packed_size, out + 1, rv); + onnx_memmove(out + rv_packed_size, out + 1, rv); return uint32_pack(rv, out) + rv; } } @@ -1279,7 +1278,7 @@ static void copy_to_little_endian_32(void *out, const void *in, const unsigned n) { #if !defined(WORDS_BIGENDIAN) - memcpy(out, in, n * 4); + onnx_memcpy(out, in, n * 4); #else unsigned i; const uint32_t *ini = in; @@ -1302,7 +1301,7 @@ static void copy_to_little_endian_64(void *out, const void *in, const unsigned n) { #if !defined(WORDS_BIGENDIAN) - memcpy(out, in, n * 8); + onnx_memcpy(out, in, n * 8); #else unsigned i; const uint64_t *ini = in; @@ -1437,8 +1436,8 @@ repeated_field_pack(const ProtobufCFieldDescriptor *field, payload_len = payload_at - (out + header_len); actual_length_size = uint32_size(payload_len); if (length_size_min != actual_length_size) { - assert(actual_length_size == length_size_min + 1); - memmove(out + header_len + 1, out + header_len, + onnx_assert(actual_length_size == length_size_min + 1); + onnx_memmove(out + header_len + 1, out + header_len, payload_len); header_len++; } @@ -1463,7 +1462,7 @@ unknown_field_pack(const ProtobufCMessageUnknownField *field, uint8_t *out) { size_t rv = tag_pack(field->tag, out); out[0] |= field->wire_type; - memcpy(out + rv, field->data, field->len); + onnx_memcpy(out + rv, field->data, field->len); return rv + field->len; } @@ -1601,7 +1600,7 @@ required_field_pack_to_buffer(const ProtobufCFieldDescriptor *field, break; case PROTOBUF_C_TYPE_STRING: { const char *str = *(char *const *) member; - size_t sublen = str ? strlen(str) : 0; + size_t sublen = str ? onnx_strlen(str) : 0; scratch[0] |= PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED; rv += uint32_pack(sublen, scratch + rv); @@ -1889,7 +1888,6 @@ pack_buffer_packed_payload(const ProtobufCFieldDescriptor *field, for (i = 0; i < count; i++) { unsigned len = boolean_pack(((protobuf_c_boolean *) array)[i], scratch); buffer->append(buffer, len, scratch); - rv += len; } return count; default: @@ -1923,7 +1921,8 @@ repeated_field_pack_to_buffer(const ProtobufCFieldDescriptor *field, rv += uint32_pack(payload_len, scratch + rv); buffer->append(buffer, rv, scratch); tmp = pack_buffer_packed_payload(field, count, array, buffer); - assert(tmp == payload_len); + onnx_assert(tmp == payload_len); + (void)tmp; return rv + payload_len; } else { size_t siz; @@ -2206,9 +2205,9 @@ merge_messages(ProtobufCMessage *earlier_msg, if (!new_field) return FALSE; - memcpy(new_field, *p_earlier, + onnx_memcpy(new_field, *p_earlier, *n_earlier * el_size); - memcpy(new_field + + onnx_memcpy(new_field + *n_earlier * el_size, *p_latter, *n_latter * el_size); @@ -2323,7 +2322,7 @@ merge_messages(ProtobufCMessage *earlier_msg, if (need_to_merge) { size_t el_size = sizeof_elt_in_repeated_array(field->type); - memcpy(latter_elem, earlier_elem, el_size); + onnx_memcpy(latter_elem, earlier_elem, el_size); /* * Reset the element from the old message to 0 * to make sure earlier message deallocation @@ -2331,7 +2330,7 @@ merge_messages(ProtobufCMessage *earlier_msg, * message, earlier message will be freed after * this function is called anyway */ - memset(earlier_elem, 0, el_size); + onnx_memset(earlier_elem, 0, el_size); if (field->quantifier_offset != 0) { /* Set the has field or the case enum, @@ -2425,7 +2424,7 @@ static inline int32_t unzigzag32(uint32_t v) { // Note: Using unsigned types prevents undefined behavior - return (int32_t)((v >> 1) ^ (~(v & 1) + 1)); + return (int32_t)((v >> 1) ^ -(v & 1)); } static inline uint32_t @@ -2433,7 +2432,7 @@ parse_fixed_uint32(const uint8_t *data) { #if !defined(WORDS_BIGENDIAN) uint32_t t; - memcpy(&t, data, 4); + onnx_memcpy(&t, data, 4); return t; #else return data[0] | @@ -2467,7 +2466,7 @@ static inline int64_t unzigzag64(uint64_t v) { // Note: Using unsigned types prevents undefined behavior - return (int64_t)((v >> 1) ^ (~(v & 1) + 1)); + return (int64_t)((v >> 1) ^ -(v & 1)); } static inline uint64_t @@ -2475,7 +2474,7 @@ parse_fixed_uint64(const uint8_t *data) { #if !defined(WORDS_BIGENDIAN) uint64_t t; - memcpy(&t, data, 8); + onnx_memcpy(&t, data, 8); return t; #else return (uint64_t) parse_fixed_uint32(data) | @@ -2557,13 +2556,13 @@ parse_required_member(ScannedMember *scanned_member, if (maybe_clear && *pstr != NULL) { const char *def = scanned_member->field->default_value; - if (*pstr != NULL && *pstr != def) + if (*pstr != def) do_free(allocator, *pstr); } *pstr = do_alloc(allocator, len - pref_len + 1); if (*pstr == NULL) return FALSE; - memcpy(*pstr, data + pref_len, len - pref_len); + onnx_memcpy(*pstr, data + pref_len, len - pref_len); (*pstr)[len - pref_len] = 0; return TRUE; } @@ -2586,7 +2585,7 @@ parse_required_member(ScannedMember *scanned_member, bd->data = do_alloc(allocator, len - pref_len); if (bd->data == NULL) return FALSE; - memcpy(bd->data, data + pref_len, len - pref_len); + onnx_memcpy(bd->data, data + pref_len, len - pref_len); } else { bd->data = NULL; } @@ -2604,10 +2603,13 @@ parse_required_member(ScannedMember *scanned_member, return FALSE; def_mess = scanned_member->field->default_value; - subm = protobuf_c_message_unpack(scanned_member->field->descriptor, - allocator, - len - pref_len, - data + pref_len); + if (len >= pref_len) + subm = protobuf_c_message_unpack(scanned_member->field->descriptor, + allocator, + len - pref_len, + data + pref_len); + else + subm = NULL; if (maybe_clear && *pmessage != NULL && @@ -2679,7 +2681,7 @@ parse_oneof_member (ScannedMember *scanned_member, break; } - memset (member, 0, el_size); + onnx_memset (member, 0, el_size); } if (!parse_required_member (scanned_member, member, allocator, TRUE)) return FALSE; @@ -2864,7 +2866,7 @@ parse_packed_repeated_member(ScannedMember *scanned_member, #if !defined(WORDS_BIGENDIAN) no_unpacking_needed: - memcpy(array, at, count * siz); + onnx_memcpy(array, at, count * siz); *p_n += count; return TRUE; #endif @@ -2897,7 +2899,7 @@ parse_member(ScannedMember *scanned_member, ufield->data = do_alloc(allocator, scanned_member->len); if (ufield->data == NULL) return FALSE; - memcpy(ufield->data, scanned_member->data, ufield->len); + onnx_memcpy(ufield->data, scanned_member->data, ufield->len); return TRUE; } member = (char *) message + field->offset; @@ -2945,7 +2947,7 @@ message_init_generic(const ProtobufCMessageDescriptor *desc, { unsigned i; - memset(message, 0, desc->sizeof_message); + onnx_memset(message, 0, desc->sizeof_message); message->descriptor = desc; for (i = 0; i < desc->n_fields; i++) { if (desc->fields[i].default_value != NULL && @@ -2963,7 +2965,7 @@ message_init_generic(const ProtobufCMessageDescriptor *desc, case PROTOBUF_C_TYPE_FIXED32: case PROTOBUF_C_TYPE_FLOAT: case PROTOBUF_C_TYPE_ENUM: - memcpy(field, dv, 4); + onnx_memcpy(field, dv, 4); break; case PROTOBUF_C_TYPE_INT64: case PROTOBUF_C_TYPE_SINT64: @@ -2971,13 +2973,13 @@ message_init_generic(const ProtobufCMessageDescriptor *desc, case PROTOBUF_C_TYPE_UINT64: case PROTOBUF_C_TYPE_FIXED64: case PROTOBUF_C_TYPE_DOUBLE: - memcpy(field, dv, 8); + onnx_memcpy(field, dv, 8); break; case PROTOBUF_C_TYPE_BOOL: - memcpy(field, dv, sizeof(protobuf_c_boolean)); + onnx_memcpy(field, dv, sizeof(protobuf_c_boolean)); break; case PROTOBUF_C_TYPE_BYTES: - memcpy(field, dv, sizeof(ProtobufCBinaryData)); + onnx_memcpy(field, dv, sizeof(ProtobufCBinaryData)); break; case PROTOBUF_C_TYPE_STRING: @@ -3072,7 +3074,7 @@ protobuf_c_message_unpack(const ProtobufCMessageDescriptor *desc, } required_fields_bitmap_alloced = TRUE; } - memset(required_fields_bitmap, 0, required_fields_bitmap_len); + onnx_memset(required_fields_bitmap, 0, required_fields_bitmap_len); /* * Generated code always defines "message_init". However, we provide a @@ -3229,6 +3231,9 @@ protobuf_c_message_unpack(const ProtobufCMessageDescriptor *desc, /* allocate space for repeated fields, also check that all required fields have been set */ for (f = 0; f < desc->n_fields; f++) { const ProtobufCFieldDescriptor *field = desc->fields + f; + if (field == NULL) { + continue; + } if (field->label == PROTOBUF_C_LABEL_REPEATED) { size_t siz = sizeof_elt_in_repeated_array(field->type); @@ -3239,7 +3244,7 @@ protobuf_c_message_unpack(const ProtobufCMessageDescriptor *desc, unsigned n = *n_ptr; void *a; *n_ptr = 0; - assert(rv->descriptor != NULL); + onnx_assert(rv->descriptor != NULL); #define CLEAR_REMAINING_N_PTRS() \ for(f++;f < desc->n_fields; f++) \ { \ @@ -3513,7 +3518,7 @@ protobuf_c_service_invoke_internal(ProtobufCService *service, * likely invoking a newly added method on an old service. (Although * other memory corruption bugs can cause this assertion too.) */ - assert(method_index < service->descriptor->n_methods); + onnx_assert(method_index < service->descriptor->n_methods); /* * Get the array of virtual methods (which are enumerated by the @@ -3538,7 +3543,7 @@ protobuf_c_service_generated_init(ProtobufCService *service, service->descriptor = descriptor; service->destroy = destroy; service->invoke = protobuf_c_service_invoke_internal; - memset(service + 1, 0, descriptor->n_methods * sizeof(GenericHandler)); + onnx_memset(service + 1, 0, descriptor->n_methods * sizeof(GenericHandler)); } void protobuf_c_service_destroy(ProtobufCService *service) @@ -3562,7 +3567,7 @@ protobuf_c_enum_descriptor_get_value_by_name(const ProtobufCEnumDescriptor *desc while (count > 1) { unsigned mid = start + count / 2; - int rv = strcmp(desc->values_by_name[mid].name, name); + int rv = onnx_strcmp(desc->values_by_name[mid].name, name); if (rv == 0) return desc->values + desc->values_by_name[mid].index; else if (rv < 0) { @@ -3573,7 +3578,7 @@ protobuf_c_enum_descriptor_get_value_by_name(const ProtobufCEnumDescriptor *desc } if (count == 0) return NULL; - if (strcmp(desc->values_by_name[start].name, name) == 0) + if (onnx_strcmp(desc->values_by_name[start].name, name) == 0) return desc->values + desc->values_by_name[start].index; return NULL; } @@ -3605,7 +3610,7 @@ protobuf_c_message_descriptor_get_field_by_name(const ProtobufCMessageDescriptor unsigned mid = start + count / 2; int rv; field = desc->fields + desc->fields_sorted_by_name[mid]; - rv = strcmp(field->name, name); + rv = onnx_strcmp(field->name, name); if (rv == 0) return field; else if (rv < 0) { @@ -3617,7 +3622,7 @@ protobuf_c_message_descriptor_get_field_by_name(const ProtobufCMessageDescriptor if (count == 0) return NULL; field = desc->fields + desc->fields_sorted_by_name[start]; - if (strcmp(field->name, name) == 0) + if (onnx_strcmp(field->name, name) == 0) return field; return NULL; } @@ -3648,7 +3653,7 @@ protobuf_c_service_descriptor_get_method_by_name(const ProtobufCServiceDescripto unsigned mid = start + count / 2; unsigned mid_index = desc->method_indices_by_name[mid]; const char *mid_name = desc->methods[mid_index].name; - int rv = strcmp(mid_name, name); + int rv = onnx_strcmp(mid_name, name); if (rv == 0) return desc->methods + desc->method_indices_by_name[mid]; @@ -3661,7 +3666,7 @@ protobuf_c_service_descriptor_get_method_by_name(const ProtobufCServiceDescripto } if (count == 0) return NULL; - if (strcmp(desc->methods[desc->method_indices_by_name[start]].name, name) == 0) + if (onnx_strcmp(desc->methods[desc->method_indices_by_name[start]].name, name) == 0) return desc->methods + desc->method_indices_by_name[start]; return NULL; } diff --git a/modules/fnxext/onnx_engine/src/protobuf-c.h b/modules/fnxext/onnx_engine/src/protobuf-c.h index b633722ed5e..45362ccf6f3 100644 --- a/modules/fnxext/onnx_engine/src/protobuf-c.h +++ b/modules/fnxext/onnx_engine/src/protobuf-c.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2018, Dave Benson and the protobuf-c authors. + * Copyright (c) 2008-2023, Dave Benson and the protobuf-c authors. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -196,10 +196,7 @@ size_t foo__bar__baz_bah__pack_to_buffer #ifndef PROTOBUF_C_H #define PROTOBUF_C_H -#include -#include -#include -#include +#include "onnxconf.h" #ifdef __cplusplus # define PROTOBUF_C__BEGIN_DECLS extern "C" { @@ -794,13 +791,13 @@ protobuf_c_version_number(void); * The version of the protobuf-c headers, represented as a string using the same * format as protobuf_c_version(). */ -#define PROTOBUF_C_VERSION "1.4.0" +#define PROTOBUF_C_VERSION "1.5.0" /** * The version of the protobuf-c headers, represented as an integer using the * same format as protobuf_c_version_number(). */ -#define PROTOBUF_C_VERSION_NUMBER 1004000 +#define PROTOBUF_C_VERSION_NUMBER 1005000 /** * The minimum protoc-c version which works with the current version of the @@ -1065,7 +1062,7 @@ do { \ (simp_buf)->allocator, \ (simp_buf)->data); \ else \ - free((simp_buf)->data); \ + onnx_free((simp_buf)->data); \ } \ } while (0) From 134a43758fa539546ceb2cd665419fa031a5f3fd Mon Sep 17 00:00:00 2001 From: Shorin Sergey Date: Fri, 11 Apr 2025 19:03:02 +0300 Subject: [PATCH 02/11] [fnx] OnnxEngine - add default shape_params --- modules/fnxext/onnx_engine.cpp | 29 +++++++++++++++++++++++++-- modules/fnxext/onnx_engine.h | 2 +- modules/fnxext/onnx_engine/src/onnx.c | 2 ++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/modules/fnxext/onnx_engine.cpp b/modules/fnxext/onnx_engine.cpp index 05eb67bdee5..0312824c3bc 100644 --- a/modules/fnxext/onnx_engine.cpp +++ b/modules/fnxext/onnx_engine.cpp @@ -63,12 +63,37 @@ Array OnnxEngine::run(const Array& data) { } -Variant OnnxEngine::load_from_file(const String &file_path) { +Variant OnnxEngine::load_from_file(const String &file_path, const Dictionary ¶ms) { Vector onnx_data = FileAccess::get_file_as_array(file_path); + + List shape_keys; + params.get_key_list(&shape_keys); + + struct hmap_t *shape_params = hmap_alloc(0, NULL); + if (!shape_params) { + print_line(String("OnnxEngine - Cant alloc shape_params")); + return 0; + } + + // TODO + // for (List::Element *E = shape_keys.front(); E; E = E->next()) { + + // int(E->get()); + // hmap_add(shape_params, String(E->get()).ptr(), &width); + + // } + + + int64_t width = 128; + int64_t batch_size = 1; + hmap_add(shape_params, "width", &width); + hmap_add(shape_params, "batch_size", &batch_size); + - ctx = onnx_context_alloc(onnx_data.ptr(), onnx_data.size(), NULL, 0, 0); + ctx = onnx_context_alloc(onnx_data.ptr(), onnx_data.size(), NULL, 0, shape_params); if (ctx) { + print_line(String("OnnxEngine- ctx created")); const char *input_layer_name = _get_input_layer_name(); const char *output_layer_name = _get_output_layer_name(); input = onnx_tensor_search(ctx, input_layer_name); diff --git a/modules/fnxext/onnx_engine.h b/modules/fnxext/onnx_engine.h index e5c82af10a3..f35ccebb01a 100644 --- a/modules/fnxext/onnx_engine.h +++ b/modules/fnxext/onnx_engine.h @@ -18,7 +18,7 @@ class OnnxEngine: public Reference { public: void _init(); - Variant load_from_file(const String &file_path); + Variant load_from_file(const String &file_path, const Dictionary ¶ms); Variant set_input_layer(const String &layer_name); Variant set_output_layer(const String &layer_name); diff --git a/modules/fnxext/onnx_engine/src/onnx.c b/modules/fnxext/onnx_engine/src/onnx.c index a8b035d7d41..9dc8ede748c 100644 --- a/modules/fnxext/onnx_engine/src/onnx.c +++ b/modules/fnxext/onnx_engine/src/onnx.c @@ -166,6 +166,8 @@ void onnx_context_free(struct onnx_context_t * ctx) onnx_free(ctx->rctx); if(ctx->r) onnx_free(ctx->r); + if(ctx->shape_params) + hmap_free(ctx->shape_params); if(ctx->map) hmap_free(ctx->map); if(ctx->model) From 7da90a2026d533e0eec3aca635567e4dce58cf67 Mon Sep 17 00:00:00 2001 From: Shorin Sergey Date: Fri, 11 Apr 2025 22:24:49 +0300 Subject: [PATCH 03/11] [fnx] OnnxEngine - fix bugs --- modules/fnxext/onnx_engine.cpp | 75 +++++++++++++++++++++++++--------- modules/fnxext/onnx_engine.h | 3 +- 2 files changed, 57 insertions(+), 21 deletions(-) diff --git a/modules/fnxext/onnx_engine.cpp b/modules/fnxext/onnx_engine.cpp index 0312824c3bc..8f39e982a29 100644 --- a/modules/fnxext/onnx_engine.cpp +++ b/modules/fnxext/onnx_engine.cpp @@ -13,7 +13,7 @@ void OnnxEngine::_bind_methods() { ClassDB::bind_method(D_METHOD("set_output_layer", "layer_name"), &OnnxEngine::set_output_layer); ClassDB::bind_method(D_METHOD("run", "input_data"), &OnnxEngine::run); ClassDB::bind_method(D_METHOD("print_layers"), &OnnxEngine::print_layers); - ClassDB::bind_method(D_METHOD("load_from_file", "p_path"), &OnnxEngine::load_from_file); + ClassDB::bind_method(D_METHOD("load_from_file", "p_path", "params", "input_layer_name", "output_layer_name"), &OnnxEngine::load_from_file, DEFVAL(Dictionary()), DEFVAL(""), DEFVAL("")); } @@ -63,7 +63,7 @@ Array OnnxEngine::run(const Array& data) { } -Variant OnnxEngine::load_from_file(const String &file_path, const Dictionary ¶ms) { +Variant OnnxEngine::load_from_file(const String &file_path, const Dictionary ¶ms, const String &input_layer_name, const String &output_layer_name) { Vector onnx_data = FileAccess::get_file_as_array(file_path); List shape_keys; @@ -75,29 +75,53 @@ Variant OnnxEngine::load_from_file(const String &file_path, const Dictionary &pa return 0; } - // TODO - // for (List::Element *E = shape_keys.front(); E; E = E->next()) { + for (List::Element *E = shape_keys.front(); E; E = E->next()) { + Variant key = E->get(); + String key_str = key.operator String(); + Variant value = params[key]; - // int(E->get()); - // hmap_add(shape_params, String(E->get()).ptr(), &width); - - // } - - - int64_t width = 128; - int64_t batch_size = 1; - hmap_add(shape_params, "width", &width); - hmap_add(shape_params, "batch_size", &batch_size); + if (value.get_type() == Variant::INT) { + int64_t* param_value = (int64_t*)malloc(sizeof(int64_t)); + *param_value = (int64_t)value; + hmap_add(shape_params, key_str.utf8().get_data(), param_value); + allocated_params.push_back(param_value); + } else if (value.get_type() == Variant::REAL) { + float* param_value = (float*)malloc(sizeof(float)); + *param_value = (float)value; + hmap_add(shape_params, key_str.utf8().get_data(), param_value); + allocated_params.push_back(param_value); + } + } + + // Default parameters if none provided + if (shape_keys.size() == 0) { + int64_t* width = (int64_t*)malloc(sizeof(int64_t)); + int64_t* batch_size = (int64_t*)malloc(sizeof(int64_t)); + *width = 128; + *batch_size = 1; + hmap_add(shape_params, "width", width); + hmap_add(shape_params, "batch_size", batch_size); + allocated_params.push_back(width); + allocated_params.push_back(batch_size); + } ctx = onnx_context_alloc(onnx_data.ptr(), onnx_data.size(), NULL, 0, shape_params); if (ctx) { print_line(String("OnnxEngine- ctx created")); - const char *input_layer_name = _get_input_layer_name(); - const char *output_layer_name = _get_output_layer_name(); - input = onnx_tensor_search(ctx, input_layer_name); - output = onnx_tensor_search(ctx, output_layer_name); + + // Use provided input/output layer names if specified, otherwise use default ones + + CharString utf8_input_layer_name = input_layer_name.utf8(); + CharString utf8_output_layer_name = output_layer_name.utf8(); + + const char *in_layer_name = input_layer_name.empty() ? _get_input_layer_name() : utf8_input_layer_name.get_data(); + const char *out_layer_name = output_layer_name.empty() ? _get_output_layer_name() : utf8_output_layer_name.get_data(); + + input = onnx_tensor_search(ctx, in_layer_name); + output = onnx_tensor_search(ctx, out_layer_name); + if (input && output) return Variant(true); else @@ -110,14 +134,16 @@ Variant OnnxEngine::load_from_file(const String &file_path, const Dictionary &pa const char *OnnxEngine::_get_input_layer_name() { if (!ctx->g->nlen) return NULL; - + if (!ctx->g->nodes[0].inputs) + return NULL; return ctx->g->nodes[0].inputs[0]->name; } const char *OnnxEngine::_get_output_layer_name() { if (!ctx->g->nlen) return NULL; - + if (!ctx->g->nodes[ctx->g->nlen - 1].outputs) + return NULL; return ctx->g->nodes[ctx->g->nlen - 1].outputs[0]->name; } @@ -134,7 +160,16 @@ OnnxEngine::OnnxEngine() { input = NULL; output = NULL; } + OnnxEngine::~OnnxEngine(){ + for (int i = 0; i < allocated_params.size(); i++) { + void* ptr = allocated_params[i]; + if (ptr) { + free(ptr); + } + } + allocated_params.clear(); + if (ctx != NULL) { onnx_context_free(ctx); } diff --git a/modules/fnxext/onnx_engine.h b/modules/fnxext/onnx_engine.h index f35ccebb01a..804b14e0db5 100644 --- a/modules/fnxext/onnx_engine.h +++ b/modules/fnxext/onnx_engine.h @@ -12,13 +12,14 @@ class OnnxEngine: public Reference { struct onnx_context_t* ctx; struct onnx_tensor_t* input; struct onnx_tensor_t* output; + Vector allocated_params; protected: static void _bind_methods(); public: void _init(); - Variant load_from_file(const String &file_path, const Dictionary ¶ms); + Variant load_from_file(const String &file_path, const Dictionary ¶ms = Dictionary(), const String &input_layer_name = "", const String &output_layer_name = ""); Variant set_input_layer(const String &layer_name); Variant set_output_layer(const String &layer_name); From f27f0e4f7f42dd3d8de44704ba09c28ad1616825 Mon Sep 17 00:00:00 2001 From: Shorin Sergey Date: Mon, 14 Apr 2025 15:27:57 +0300 Subject: [PATCH 04/11] [fnx] OnnxEngine - remove prints --- modules/fnxext/onnx_engine/src/onnx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/fnxext/onnx_engine/src/onnx.c b/modules/fnxext/onnx_engine/src/onnx.c index 9dc8ede748c..e8e42fe5164 100644 --- a/modules/fnxext/onnx_engine/src/onnx.c +++ b/modules/fnxext/onnx_engine/src/onnx.c @@ -2305,7 +2305,7 @@ void onnx_run(struct onnx_context_t * ctx) n = &ctx->g->nodes[i]; - onnx_printf("Node %s\r\n", n->proto->op_type); + // onnx_printf("Node %s\r\n", n->proto->op_type); if (have_inputs_changed(n) || !n->initialized) { if (n->exit) { @@ -2332,7 +2332,7 @@ void onnx_run(struct onnx_context_t * ctx) } else { onnx_printf("Not all inputs are ready for node %s\r\n", n->proto->op_type); } - onnx_tensor_dump(n->outputs[0], 1); + // onnx_tensor_dump(n->outputs[0], 1); } } } From 9507d42def5c200bc82f150c0198bd556ae63afa Mon Sep 17 00:00:00 2001 From: Yakov Borevich Date: Mon, 5 May 2025 19:33:04 +0200 Subject: [PATCH 05/11] try build headless --- modules/flash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/flash b/modules/flash index 0f1ea65f52a..4e0cfe6a6c9 160000 --- a/modules/flash +++ b/modules/flash @@ -1 +1 @@ -Subproject commit 0f1ea65f52ac195e09916f555165809527a40998 +Subproject commit 4e0cfe6a6c9f2cceb1ac31459794b9a9f03fec3d From fdcef54bb68ad824af2f57cd27299c12617c0a75 Mon Sep 17 00:00:00 2001 From: Yakov Borevich Date: Mon, 5 May 2025 20:16:37 +0200 Subject: [PATCH 06/11] use ubuntu-22.04 --- .github/workflows/fnx.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/fnx.yaml b/.github/workflows/fnx.yaml index bf95c4d853b..89703991722 100644 --- a/.github/workflows/fnx.yaml +++ b/.github/workflows/fnx.yaml @@ -41,7 +41,7 @@ jobs: rsync -a -m -R --include '*/' --include '*.h' --include '*.inc' --exclude '*' --exclude '.git' --exclude 'bin' --exclude 'ios.headers' . ./ios.headers zip -X -r ios.headers.zip ios.headers mv ios.headers.zip bin/ - - os: ubuntu-20.04 + - os: ubuntu-22.04 token: android.template.debug build: | scons platform=android -j 8 target=debug android_arch=arm64v8 tools=no module_firebase_enabled=no module_bullet_enabled=no module_websocket_enabled=no game_center=no builtin_pcre2_with_jit=no builtin_libvpx=no module_webm_enabled=no @@ -58,7 +58,7 @@ jobs: zip -r android.native_libs.unstripped.debug.zip ./ cd - cp platform/android/java/lib/libs/debug/android.native_libs.unstripped.debug.zip bin - - os: ubuntu-20.04 + - os: ubuntu-22.04 token: android.template.release build: | scons platform=android -j 8 target=release_debug android_arch=arm64v8 tools=no module_firebase_enabled=no module_bullet_enabled=no module_websocket_enabled=no game_center=no builtin_pcre2_with_jit=no builtin_libvpx=no module_webm_enabled=no @@ -76,7 +76,7 @@ jobs: zip -r android.native_libs.unstripped.release.zip ./ cd - cp platform/android/java/lib/libs/release/android.native_libs.unstripped.release.zip bin - - os: ubuntu-20.04 + - os: ubuntu-22.04 token: linux.editor.headless build: | scons target=release_debug platform=server tools=yes module_firebase_enabled=no module_bullet_enabled=no module_websocket_enabled=no game_center=no module_mono_enabled=no mono_glue=no @@ -97,7 +97,7 @@ jobs: # softwareupdate --install-rosetta --agree-to-license - name: Make apt sources.list use the default Ubuntu repositories - if: ${{matrix.os == 'ubuntu-20.04'}} + if: ${{matrix.os == 'ubuntu-22.04'}} run: | sudo rm -f /etc/apt/sources.list.d/* sudo cp -f misc/ci/sources.list /etc/apt/sources.list From ed31c2f4dee168891bb93326f7dfd51c1fba990c Mon Sep 17 00:00:00 2001 From: Yakov Borevich Date: Mon, 5 May 2025 20:34:10 +0200 Subject: [PATCH 07/11] use ubuntu-22.04, update sources list --- .github/workflows/fnx.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/fnx.yaml b/.github/workflows/fnx.yaml index 89703991722..62cae042471 100644 --- a/.github/workflows/fnx.yaml +++ b/.github/workflows/fnx.yaml @@ -99,8 +99,6 @@ jobs: - name: Make apt sources.list use the default Ubuntu repositories if: ${{matrix.os == 'ubuntu-22.04'}} run: | - sudo rm -f /etc/apt/sources.list.d/* - sudo cp -f misc/ci/sources.list /etc/apt/sources.list sudo apt-get update - name: Set up Java 8 From ebf4fe4cee6fe97fa71f5857d4b6cfb75306a9f3 Mon Sep 17 00:00:00 2001 From: Yakov Borevich Date: Tue, 27 May 2025 15:04:02 +0200 Subject: [PATCH 08/11] import for server platform as well --- modules/flash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/flash b/modules/flash index 4e0cfe6a6c9..4426b4644b7 160000 --- a/modules/flash +++ b/modules/flash @@ -1 +1 @@ -Subproject commit 4e0cfe6a6c9f2cceb1ac31459794b9a9f03fec3d +Subproject commit 4426b4644b7033c9ac5ac08f33ffcfcad57323f9 From 3f60c79f3413157d23167f923a088ffcadbdfc11 Mon Sep 17 00:00:00 2001 From: Yakov Borevich Date: Tue, 27 May 2025 15:10:48 +0200 Subject: [PATCH 09/11] update importer version --- modules/flash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/flash b/modules/flash index 4426b4644b7..ba6523894ee 160000 --- a/modules/flash +++ b/modules/flash @@ -1 +1 @@ -Subproject commit 4426b4644b7033c9ac5ac08f33ffcfcad57323f9 +Subproject commit ba6523894ee3c256b2f2526d1a6502858dd155a7 From 58301a6551544831f0ced3f0b782a84e1f3902ca Mon Sep 17 00:00:00 2001 From: Yakov Borevich Date: Tue, 27 May 2025 17:26:49 +0200 Subject: [PATCH 10/11] proper name files --- modules/flash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/flash b/modules/flash index ba6523894ee..d425398162e 160000 --- a/modules/flash +++ b/modules/flash @@ -1 +1 @@ -Subproject commit ba6523894ee3c256b2f2526d1a6502858dd155a7 +Subproject commit d425398162e937a78df7dbff0a8ab701fce073be From a028e30111b6b32a381fb660ee23e80b6c150389 Mon Sep 17 00:00:00 2001 From: Yakov Borevich Date: Mon, 16 Jun 2025 12:50:20 +0200 Subject: [PATCH 11/11] fix server textures --- modules/flash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/flash b/modules/flash index d425398162e..15cc1e526b3 160000 --- a/modules/flash +++ b/modules/flash @@ -1 +1 @@ -Subproject commit d425398162e937a78df7dbff0a8ab701fce073be +Subproject commit 15cc1e526b33f7762f5d719926c46e9610e8e36f