From c433bc873bd64c5ef58c4a6e10b66640d9e746af Mon Sep 17 00:00:00 2001 From: SaeHie Park Date: Fri, 12 Jul 2024 15:08:26 +0900 Subject: [PATCH] DRAFT CFe fuse Mul Add to Fullyconnected on-going draft to fuse Mul Add to Fullyconnected. Signed-off-by: SaeHie Park --- .../circle2circle-dredd-recipe-test/test.lst | 6 + compiler/circle2circle/src/Circle2Circle.cpp | 11 ++ compiler/luci-pass-value-py-test/test.lst | 8 +- .../luci-pass-value-py-test/test_luci_eval.py | 9 +- .../luci/pass/include/luci/CircleOptimizer.h | 3 + .../luci/pass/include/luci/Pass/FoldMulPass.h | 38 +++++ .../Pass/FuseAddToFullyConnectedBiasPass.h | 37 ++++ .../Pass/FuseMulToFullyConnectedWeightsPass.h | 37 ++++ compiler/luci/pass/src/CircleOptimizer.cpp | 15 ++ compiler/luci/pass/src/FoldMulPass.cpp | 127 ++++++++++++++ compiler/luci/pass/src/FoldMulPass.test.cpp | 133 +++++++++++++++ .../src/FuseAddToFullyConnectedBiasPass.cpp | 128 ++++++++++++++ .../FuseAddToFullyConnectedBiasPass.test.cpp | 160 ++++++++++++++++++ .../FuseMulToFullyConnectedWeightsPass.cpp | 123 ++++++++++++++ ...useMulToFullyConnectedWeightsPass.test.cpp | 160 ++++++++++++++++++ compiler/one-cmds/how-to-use-one-commands.txt | 3 + compiler/one-cmds/onelib/constant.py | 8 + .../Net_Add_FullyConnected_000/test.recipe | 67 ++++++++ .../Net_Add_FullyConnected_000/test.rule | 7 + .../Net_Add_FullyConnected_001/test.recipe | 67 ++++++++ .../Net_Add_FullyConnected_001/test.rule | 7 + .../Net_Add_FullyConnected_002/test.recipe | 57 +++++++ .../Net_Add_FullyConnected_002/test.rule | 7 + .../Net_Mul_FullyConnected_000/test.recipe | 67 ++++++++ .../Net_Mul_FullyConnected_000/test.rule | 8 + .../Net_Mul_FullyConnected_001/test.recipe | 67 ++++++++ .../Net_Mul_FullyConnected_001/test.rule | 9 + .../Net_Mul_FullyConnected_002/test.recipe | 57 +++++++ .../Net_Mul_FullyConnected_002/test.rule | 8 + 29 files changed, 1431 insertions(+), 3 deletions(-) create mode 100644 compiler/luci/pass/include/luci/Pass/FoldMulPass.h create mode 100644 compiler/luci/pass/include/luci/Pass/FuseAddToFullyConnectedBiasPass.h create mode 100644 compiler/luci/pass/include/luci/Pass/FuseMulToFullyConnectedWeightsPass.h create mode 100644 compiler/luci/pass/src/FoldMulPass.cpp create mode 100644 compiler/luci/pass/src/FoldMulPass.test.cpp create mode 100644 compiler/luci/pass/src/FuseAddToFullyConnectedBiasPass.cpp create mode 100644 compiler/luci/pass/src/FuseAddToFullyConnectedBiasPass.test.cpp create mode 100644 compiler/luci/pass/src/FuseMulToFullyConnectedWeightsPass.cpp create mode 100644 compiler/luci/pass/src/FuseMulToFullyConnectedWeightsPass.test.cpp create mode 100644 res/TensorFlowLiteRecipes/Net_Add_FullyConnected_000/test.recipe create mode 100644 res/TensorFlowLiteRecipes/Net_Add_FullyConnected_000/test.rule create mode 100644 res/TensorFlowLiteRecipes/Net_Add_FullyConnected_001/test.recipe create mode 100644 res/TensorFlowLiteRecipes/Net_Add_FullyConnected_001/test.rule create mode 100644 res/TensorFlowLiteRecipes/Net_Add_FullyConnected_002/test.recipe create mode 100644 res/TensorFlowLiteRecipes/Net_Add_FullyConnected_002/test.rule create mode 100644 res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_000/test.recipe create mode 100644 res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_000/test.rule create mode 100644 res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_001/test.recipe create mode 100644 res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_001/test.rule create mode 100644 res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_002/test.recipe create mode 100644 res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_002/test.rule diff --git a/compiler/circle2circle-dredd-recipe-test/test.lst b/compiler/circle2circle-dredd-recipe-test/test.lst index d0b3123e7c1..4bf6a80d65a 100644 --- a/compiler/circle2circle-dredd-recipe-test/test.lst +++ b/compiler/circle2circle-dredd-recipe-test/test.lst @@ -21,6 +21,9 @@ Add(MaxPoolWithArgmax_000 PASS resolve_customop_max_pool_with_argmax) Add(MaxPoolWithArgmax_001 PASS resolve_customop_max_pool_with_argmax) Add(MaxPoolWithArgmax_002 PASS resolve_customop_max_pool_with_argmax) Add(Net_Add_FloorMod_Gather_000 PASS remove_gather_guard) +Add(Net_Add_FullyConnected_000 PASS fuse_add_to_fullyconnected_bias) +Add(Net_Add_FullyConnected_001 PASS fuse_add_to_fullyconnected_bias) +Add(Net_Add_FullyConnected_002 PASS fuse_add_to_fullyconnected_bias) Add(Net_BroadcastTo_AddV2_000 PASS resolve_customop_add) Add(Net_BroadcastTo_AddV2_001 PASS resolve_customop_add) Add(Net_BroadcastTo_AddV2_002 PASS resolve_customop_add) @@ -61,6 +64,9 @@ Add(Net_Mul_Add_002 PASS remove_unnecessary_add) Add(Net_Mul_Add_003 PASS remove_unnecessary_add) Add(Net_Mul_Div_000 PASS fuse_mul_with_div) Add(Net_Mul_Div_001 PASS fuse_mul_with_div) +Add(Net_Mul_FullyConnected_000 PASS fuse_mul_to_fullyconnected_weights fold_mul) +Add(Net_Mul_FullyConnected_001 PASS fuse_mul_to_fullyconnected_weights fold_mul) +Add(Net_Mul_FullyConnected_002 PASS fuse_mul_to_fullyconnected_weights fold_mul) Add(Net_Preactivation_BN_000 PASS fuse_preactivation_batchnorm) Add(Net_Reshape_Reshape_000 PASS remove_redundant_reshape) Add(Net_Shape_Add_000 PASS fold_shape) diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp index 5dd6ed52edb..fb4642c3bcf 100644 --- a/compiler/circle2circle/src/Circle2Circle.cpp +++ b/compiler/circle2circle/src/Circle2Circle.cpp @@ -81,6 +81,7 @@ int entry(int argc, char **argv) add_switch(arser, "--fold_fully_connected", "This will fold FullyConnected operator with constant inputs"); add_switch(arser, "--fold_gather", "This will fold Gather operator"); + add_switch(arser, "--fold_mul", "This will fold Mul operator"); add_switch(arser, "--fold_reshape", "This will fold Reshape operator"); add_switch(arser, "--fold_shape", "This will fold Shape operator"); add_switch(arser, "--fold_sparse_to_dense", "This will fold SparseToDense operator"); @@ -105,6 +106,10 @@ int entry(int argc, char **argv) add_switch(arser, "--fuse_batchnorm_with_tconv", "This will fuse BatchNorm operators to Transposed Convolution operator"); add_switch(arser, "--fuse_bcq", "This will fuse operators and apply Binary Coded Quantization"); + add_switch(arser, "--fuse_add_to_fullyconnected_bias", + "This will fuse Add to following FullyConnected bias"); + add_switch(arser, "--fuse_mul_to_fullyconnected_weights", + "This will fuse Mul to following FullyConnected weights"); add_switch(arser, "--fuse_instnorm", "This will fuse operators to InstanceNorm operator"); add_switch(arser, "--fuse_mean_with_mean", "This will fuse two Mean operations when they follow one by one. This will fold them " @@ -275,6 +280,8 @@ int entry(int argc, char **argv) options->enable(Algorithms::FoldFullyConnected); if (arser.get("--fold_gather")) options->enable(Algorithms::FoldGather); + if (arser.get("--fold_mul")) + options->enable(Algorithms::FoldMul); if (arser.get("--fold_reshape")) options->enable(Algorithms::FoldReshape); if (arser.get("--fold_shape")) @@ -303,6 +310,10 @@ int entry(int argc, char **argv) options->enable(Algorithms::FuseBatchNormWithDwConv); if (arser.get("--fuse_batchnorm_with_tconv")) options->enable(Algorithms::FuseBatchNormWithTConv); + if (arser.get("--fuse_add_to_fullyconnected_bias")) + options->enable(Algorithms::FuseAddToFullyConnectedBias); + if (arser.get("--fuse_mul_to_fullyconnected_weights")) + options->enable(Algorithms::FuseMulToFullyConnectedWeights); if (arser.get("--fuse_slice_with_tconv")) options->enable(Algorithms::FuseSliceWithTConv); if (arser.get("--fuse_bcq")) diff --git a/compiler/luci-pass-value-py-test/test.lst b/compiler/luci-pass-value-py-test/test.lst index 9e0ed5c6615..bf5c6aa5cb5 100644 --- a/compiler/luci-pass-value-py-test/test.lst +++ b/compiler/luci-pass-value-py-test/test.lst @@ -2,7 +2,7 @@ # Format: # eval(MODEL PASS) # MODEL: tflite model file name in build/compiler/common-artifacts folder. -# PASS: Optimization Pass to test. Supports only one Pass for now. +# PASS: Optimization Pass to test. Supports one or Passes. # # eval(Net_Preactivation_BN_000 fuse_preactivation_batchnorm) : value diff exist @@ -10,6 +10,9 @@ eval(FullyConnected_007 replace_non_const_fc_with_batch_matmul) eval(HardSwish_001 decompose_hardswish) eval(Net_Add_FloorMod_Gather_000 remove_gather_guard) +eval(Net_Add_FullyConnected_000 fuse_add_to_fullyconnected_bias) +eval(Net_Add_FullyConnected_001 fuse_add_to_fullyconnected_bias) +eval(Net_Add_FullyConnected_002 fuse_add_to_fullyconnected_bias) eval(Net_Conv_Add_000 fuse_add_with_conv) eval(Net_Conv_Add_001 fuse_add_with_conv) # eval(Net_Conv_Add_002 fuse_add_with_conv) --> Conv2D w/o bias fails in tflite interpreter @@ -40,6 +43,9 @@ eval(Net_Mul_Add_002 remove_unnecessary_add) eval(Net_Mul_Add_003 remove_unnecessary_add) eval(Net_Mul_Div_000 fuse_mul_with_div) eval(Net_Mul_Div_001 fuse_mul_with_div) +eval(Net_Mul_FullyConnected_000 fuse_mul_to_fullyconnected_weights) +eval(Net_Mul_FullyConnected_001 fuse_mul_to_fullyconnected_weights) +eval(Net_Mul_FullyConnected_002 fuse_mul_to_fullyconnected_weights) eval(Net_Reshape_Mean_000 forward_reshape_to_unaryop) eval(Net_Reshape_Neg_000 forward_reshape_to_unaryop) eval(Net_Reshape_Reshape_000 remove_redundant_reshape) diff --git a/compiler/luci-pass-value-py-test/test_luci_eval.py b/compiler/luci-pass-value-py-test/test_luci_eval.py index 4cb59c177b7..91986f6f2de 100644 --- a/compiler/luci-pass-value-py-test/test_luci_eval.py +++ b/compiler/luci-pass-value-py-test/test_luci_eval.py @@ -95,8 +95,13 @@ def luci_eval_verify(test_name, assert np.allclose( luci_output_data, intp_output_data, rtol=rtolint, atol=atolint), err_msg elif output_details["dtype"] == np.float32: - assert np.allclose( - luci_output_data, intp_output_data, rtol=rtolf32, atol=atolf32), err_msg + diff_comp = np.allclose( + luci_output_data, intp_output_data, rtol=rtolf32, atol=atolf32) + if not diff_comp: + print("\r\ntflite:\r\n", intp_output_data, flush=True) + print("\r\ncircle:\r\n", luci_output_data, flush=True) + print("\r\nDiff:\r\n", intp_output_data - luci_output_data, flush=True) + assert diff_comp, err_msg elif output_details["dtype"] == np.int64: assert np.allclose( luci_output_data, intp_output_data, rtol=rtolint, atol=atolint), err_msg diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h index 01b43a72844..bdae7d57e41 100644 --- a/compiler/luci/pass/include/luci/CircleOptimizer.h +++ b/compiler/luci/pass/include/luci/CircleOptimizer.h @@ -34,12 +34,14 @@ class CircleOptimizer final { enum Algorithm { + FuseAddToFullyConnectedBias, FuseAddWithConv, FuseAddWithFullyConnected, FuseAddWithTConv, FuseBatchNormWithConv, FuseBatchNormWithDwConv, FuseBatchNormWithTConv, + FuseMulToFullyConnectedWeights, FuseSliceWithTConv, FuseBCQ, FuseHorizontalFullyConnected, @@ -61,6 +63,7 @@ class CircleOptimizer final FoldFullyConnected, FoldDequantize, FoldGather, + FoldMul, FoldReshape, FoldShape, FoldSparseToDense, diff --git a/compiler/luci/pass/include/luci/Pass/FoldMulPass.h b/compiler/luci/pass/include/luci/Pass/FoldMulPass.h new file mode 100644 index 00000000000..69b661fbe02 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/FoldMulPass.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_FOLD_MUL_PASS_H__ +#define __LUCI_FOLD_MUL_PASS_H__ + +#include + +namespace luci +{ + +/** + * @brief Class to fold Mul to a constant tensor + * + */ +struct FoldMulPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::FoldMulPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_FOLD_MUL_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/FuseAddToFullyConnectedBiasPass.h b/compiler/luci/pass/include/luci/Pass/FuseAddToFullyConnectedBiasPass.h new file mode 100644 index 00000000000..9aef478456c --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/FuseAddToFullyConnectedBiasPass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_FUSE_ADD_TO_FULLY_CONNECTED_BIAS_PASS_H__ +#define __LUCI_FUSE_ADD_TO_FULLY_CONNECTED_BIAS_PASS_H__ + +#include + +namespace luci +{ + +/** + * @brief Class to fuse Add to following FC bias + */ +struct FuseAddToFullyConnectedBiasPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::FuseAddToFullyConnectedBiasPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_FUSE_ADD_TO_FULLY_CONNECTED_BIAS_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/FuseMulToFullyConnectedWeightsPass.h b/compiler/luci/pass/include/luci/Pass/FuseMulToFullyConnectedWeightsPass.h new file mode 100644 index 00000000000..583f21ef82c --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/FuseMulToFullyConnectedWeightsPass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_FUSE_MUL_TO_FULLY_CONNECTED_WEIGHTS_PASS_H__ +#define __LUCI_FUSE_MUL_TO_FULLY_CONNECTED_WEIGHTS_PASS_H__ + +#include + +namespace luci +{ + +/** + * @brief Class to fuse Mul into following FullyConnected + */ +struct FuseMulToFullyConnectedWeightsPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::FuseMulToFullyConnectedWeightsPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_FUSE_MUL_TO_FULLY_CONNECTED_WEIGHTS_PASS_H__ diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp index 3c94311c8d5..9f650d9004e 100644 --- a/compiler/luci/pass/src/CircleOptimizer.cpp +++ b/compiler/luci/pass/src/CircleOptimizer.cpp @@ -26,6 +26,7 @@ #include "luci/Pass/FoldDequantizePass.h" #include "luci/Pass/FoldFullyConnectedPass.h" #include "luci/Pass/FoldGatherPass.h" +#include "luci/Pass/FoldMulPass.h" #include "luci/Pass/FoldReshapePass.h" #include "luci/Pass/FoldShapePass.h" #include "luci/Pass/FoldSparseToDensePass.h" @@ -33,6 +34,7 @@ #include "luci/Pass/ForwardReshapeToUnaryOpPass.h" #include "luci/Pass/ForwardTransposeOpPass.h" #include "luci/Pass/FuseActivationFunctionPass.h" +#include "luci/Pass/FuseAddToFullyConnectedBiasPass.h" #include "luci/Pass/FuseAddWithConvPass.h" #include "luci/Pass/FuseAddWithFullyConnectedPass.h" #include "luci/Pass/FuseAddWithTConvPass.h" @@ -40,6 +42,7 @@ #include "luci/Pass/FuseBatchNormWithDwConvPass.h" #include "luci/Pass/FuseBatchNormWithTConvPass.h" #include "luci/Pass/FuseBCQPass.h" +#include "luci/Pass/FuseMulToFullyConnectedWeightsPass.h" #include "luci/Pass/FuseInstanceNormPass.h" #include "luci/Pass/FuseMeanWithMeanPass.h" #include "luci/Pass/FuseMulWithConvPass.h" @@ -333,6 +336,14 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique()); } + if (_options->query(Options::Algorithm::FuseAddToFullyConnectedBias)) + { + phase.emplace_back(std::make_unique()); + } + if (_options->query(Options::Algorithm::FuseMulToFullyConnectedWeights)) + { + phase.emplace_back(std::make_unique()); + } if (_options->query(Options::Algorithm::FusePRelu)) { phase.emplace_back(std::make_unique()); @@ -381,6 +392,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique()); } + if (_options->query(Options::Algorithm::FoldMul)) + { + phase.emplace_back(std::make_unique()); + } if (_options->query(Options::Algorithm::FoldReshape)) { phase.emplace_back(std::make_unique()); diff --git a/compiler/luci/pass/src/FoldMulPass.cpp b/compiler/luci/pass/src/FoldMulPass.cpp new file mode 100644 index 00000000000..65112911e59 --- /dev/null +++ b/compiler/luci/pass/src/FoldMulPass.cpp @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/FoldMulPass.h" + +#include + +#include + +#define CHECK_OR_FALSE(condition) \ + if (not(condition)) \ + return false; + +namespace +{ + +/** + * @return higher rank of x, y or nullptr if not compatible + */ +const luci::CircleConst *compatible_shape(const luci::CircleConst *x, const luci::CircleConst *y) +{ + if (x->rank() >= y->rank()) + { + uint32_t d = x->rank() - y->rank(); + for (uint32_t i = 0; i < y->rank(); i++) + { + // NOTE dim() has only '==' operator + if (!(x->dim(i + d) == y->dim(i))) + return nullptr; + } + return x; + } + else + { + uint32_t d = y->rank() - x->rank(); + for (uint32_t i = 0; i < x->rank(); i++) + { + if (!(x->dim(i) == y->dim(i + d))) + return nullptr; + } + return y; + } +} + +/** + * Fold Mul to const if both inputs are const + **/ +bool fold_mul(luci::CircleMul *mul) +{ + CHECK_OR_FALSE(mul); + CHECK_OR_FALSE(mul->dtype() == loco::DataType::FLOAT32); + + // Check inputs are const and compatible + auto x = dynamic_cast(mul->x()); + auto y = dynamic_cast(mul->y()); + CHECK_OR_FALSE(x); + CHECK_OR_FALSE(y); + CHECK_OR_FALSE(x->dtype() == y->dtype()); + const auto xy = compatible_shape(x, y); + CHECK_OR_FALSE(xy); + + auto name_x = x->name(); + auto name_y = y->name(); + assert(name_x.length() > 0); + assert(name_y.length() > 0); + auto folded_const = mul->graph()->nodes()->create(); + folded_const->dtype(xy->dtype()); + folded_const->rank(xy->rank()); + for (uint32_t i = 0; i < xy->rank(); i++) + folded_const->dim(i).set(xy->dim(i).value()); + + const auto size_x = x->size(); + const auto size_y = y->size(); + const auto size_xy = xy->size(); + folded_const->size(size_xy); + for (uint32_t i = 0; i < size_xy; i++) + { + auto xv = x->at(i % size_x); + auto yv = y->at(i % size_y); + folded_const->at(i) = xv * yv; + } + + folded_const->shape_status(luci::ShapeStatus::VALID); + folded_const->name(name_x + "_" + name_y); + + loco::replace(mul).with(folded_const); + + return true; +} + +} // namespace + +namespace luci +{ + +/** + * Constant Folding for Mul Op + **/ +bool FoldMulPass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + if (auto mul = dynamic_cast(node)) + { + if (fold_mul(mul)) + changed = true; + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/FoldMulPass.test.cpp b/compiler/luci/pass/src/FoldMulPass.test.cpp new file mode 100644 index 00000000000..0c6de971f40 --- /dev/null +++ b/compiler/luci/pass/src/FoldMulPass.test.cpp @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/FoldMulPass.h" +#include "PassTestGraphs.h" + +#include + +#include + +namespace +{ + +/** + * Graph has an Mul Op with constant inputs + * + * BEFORE + * + * [CircleConst] [CircleConst] + * | | + * [CircleMul] + * | + * [CircleNode] + * AFTER + * [CircleConst] [CircleConst] + * | | + * [CircleConst] [CircleMul] + * | + * [CircleNode] + */ + +template class FoldMulTest : public luci::ConstantFoldingAddTestGraph +{ +public: + FoldMulTest(std::initializer_list shape) : luci::ConstantFoldingAddTestGraph(shape, T) + { + _mul = _g.nodes()->template create(); + _x = _g.nodes()->template create(); + _y = _g.nodes()->template create(); + + _mul->dtype(T); + _x->dtype(T); + _y->dtype(T); + + _mul->shape(shape); + _x->shape(shape); + _y->shape(shape); + + uint32_t num_elems = 1; + for (auto dim = shape.begin(); dim != shape.end(); dim++) + num_elems *= *dim; + + _x->size(num_elems); + _y->size(num_elems); + + for (uint32_t i = 0; i < num_elems; i++) + { + _x->at(i) = i + 1; + _y->at(i) = i + 1; + } + + _mul->x(_x); + _mul->y(_y); + _mul->name("mul"); + _x->name("x"); + _y->name("y"); + } + + loco::Node *createFoldedPattern() override { return _mul; } + + virtual ~FoldMulTest() = default; + +protected: + luci::CircleMul *_mul = nullptr; + luci::CircleConst *_x = nullptr; + luci::CircleConst *_y = nullptr; +}; + +class FoldF32MulTest : public FoldMulTest, public ::testing::Test +{ +public: + FoldF32MulTest() : FoldMulTest({3}) {} + + virtual void SetUp() { init(); } +}; + +} // namespace + +TEST_F(FoldF32MulTest, name) +{ + luci::FoldMulPass pass; + auto const name = pass.name(); + ASSERT_NE(nullptr, name); +} + +TEST_F(FoldF32MulTest, fold_mul) +{ + luci::FoldMulPass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + // Check type, shape, values of folded const + EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype()); + EXPECT_EQ(1, folded_const->rank()); + EXPECT_EQ(3, folded_const->dim(0).value()); + EXPECT_EQ(1, folded_const->at(0)); + EXPECT_EQ(4, folded_const->at(1)); + EXPECT_EQ(9, folded_const->at(2)); +} + +TEST_F(FoldF32MulTest, input_type_mismatch_NEG) +{ + _x->dtype(loco::DataType::U4); + + luci::FoldMulPass pass; + EXPECT_FALSE(pass.run(graph())); +} diff --git a/compiler/luci/pass/src/FuseAddToFullyConnectedBiasPass.cpp b/compiler/luci/pass/src/FuseAddToFullyConnectedBiasPass.cpp new file mode 100644 index 00000000000..d1e44b0a104 --- /dev/null +++ b/compiler/luci/pass/src/FuseAddToFullyConnectedBiasPass.cpp @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/FuseAddToFullyConnectedBiasPass.h" + +#include +#include + +#include "helpers/NodeFiller.h" + +#define CHECK_OR_FALSE(condition) \ + if (not(condition)) \ + return false; + +namespace +{ + +/** + * Transform Add to following FullyConnected bias if possible + * + * BEFORE + * | + * [CircleAdd] [CircleConst] [CircleConst] + * | | | + * [CircleFullyConnected] ----------+ + * | + * + * AFTER + * | + * | [CircleConst] [CircleConst] [CircleConst] + * | | | | + * | [CircleConst] [CircleFullyConnected] [CircleAdd] + * | | | + * [CircleFullyConnected] ------+ + * | + * + */ +bool fuse_add_to_fc_bias(luci::CircleFullyConnected *fc) +{ + CHECK_OR_FALSE(fc); + + // check input is Add + auto add = dynamic_cast(fc->input()); + CHECK_OR_FALSE(add); + // conditions of Add, FC: to expect constant folding, support only F32 + CHECK_OR_FALSE(add->dtype() == loco::DataType::FLOAT32); + CHECK_OR_FALSE(add->fusedActivationFunction() == luci::FusedActFunc::NONE); + CHECK_OR_FALSE(fc->dtype() == loco::DataType::FLOAT32); + // support weight with constant + auto weights = dynamic_cast(fc->weights()); + CHECK_OR_FALSE(weights); + // bias can be constant or outputexclude + auto bias = dynamic_cast(fc->bias()); + CHECK_OR_FALSE(bias); + + // Check addition of Add is constant + luci::CircleNode *add_input = nullptr; + luci::CircleConst *add_shift = nullptr; + CHECK_OR_FALSE(luci::fill(&add_input, &add_shift).with_commutative_args_of(add)); + if (add_shift) + { + // support only 1D constant + CHECK_OR_FALSE(add_shift->rank() == 1); + } + + auto graph = fc->graph(); + + auto fc_bias = graph->nodes()->create(); + fc_bias->input(add_shift); + fc_bias->weights(weights); + fc_bias->bias(bias); + fc_bias->keep_num_dims(true); + fc_bias->fusedActivationFunction(luci::FusedActFunc::NONE); + fc_bias->name(fc->name() + "_" + add->name() + "_bias"); + luci::add_origin(fc_bias, + luci::composite_origin( + {luci::get_origin(add), luci::get_origin(add_shift), luci::get_origin(bias)})); + + auto fc_new = graph->nodes()->create(); + fc_new->input(add_input); + fc_new->weights(weights); + fc_new->bias(fc_bias); + fc_new->weights_format(fc->weights_format()); + fc_new->keep_num_dims(fc->keep_num_dims()); + fc_new->fusedActivationFunction(fc->fusedActivationFunction()); + fc_new->name(fc->name()); + luci::add_origin(fc_new, luci::get_origin(fc)); + + replace(fc).with(fc_new); + + return true; +} + +} // namespace + +namespace luci +{ + +bool FuseAddToFullyConnectedBiasPass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto fc = dynamic_cast(node); + if (not fc) + continue; + + if (fuse_add_to_fc_bias(fc)) + changed = true; + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/FuseAddToFullyConnectedBiasPass.test.cpp b/compiler/luci/pass/src/FuseAddToFullyConnectedBiasPass.test.cpp new file mode 100644 index 00000000000..bbcd7f9fc85 --- /dev/null +++ b/compiler/luci/pass/src/FuseAddToFullyConnectedBiasPass.test.cpp @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/FuseAddToFullyConnectedBiasPass.h" + +#include + +#include + +#include + +namespace +{ + +using namespace luci::test; + +template class FuseAddToFullyConnectedBiasPassTestGraph : public TestIOGraph +{ +public: + FuseAddToFullyConnectedBiasPassTestGraph() = default; + + void init(void) + { + TestIOGraph::init({3, 4}, {3, 6}); + + _add = g()->nodes()->create(); + _addi = g()->nodes()->create(); + _fc = g()->nodes()->create(); + _fc_w = g()->nodes()->create(); + _fc_b = g()->nodes()->create(); + + _add->name("add"); + _addi->name("addi"); + _fc->name("fc"); + _fc_w->name("fc_w"); + _fc_b->name("fc_b"); + + _add->dtype(DT); + _fc->dtype(DT); + _add->fusedActivationFunction(luci::FusedActFunc::NONE); + _fc->fusedActivationFunction(luci::FusedActFunc::NONE); + + _addi->rank(1); + _addi->dim(0) = 3; + _addi->dtype(DT); + _addi->size
(3); + for (uint32_t i = 0; i < 3; ++i) + { + _addi->at
(0) = 1.0f; + } + + _fc_w->rank(2); + _fc_w->dim(0) = 3; + _fc_w->dim(1) = 4; + _fc_w->dtype(DT); + _fc_w->size
(4 * 6); + for (uint32_t i = 0; i < 4 * 6; ++i) + { + _fc_w->at
(0) = 1.0f; + } + + _fc_b->rank(1); + _fc_b->dim(0) = 6; + _fc_b->dtype(DT); + _fc_b->size
(6); + for (uint32_t i = 0; i < 6; ++i) + { + _fc_b->at
(0) = 1.0f; + } + + _add->x(input()); + _add->y(_addi); + _fc->input(_add); + _fc->weights(_fc_b); + _fc->bias(_fc_b); + + output()->from(_fc); + } + + luci::CircleAdd *_add = nullptr; + luci::CircleFullyConnected *_fc = nullptr; + luci::CircleConst *_addi = nullptr; + luci::CircleConst *_fc_w = nullptr; + luci::CircleConst *_fc_b = nullptr; +}; + +class FuseAddToFullyConnectedBiasPassTest : public ::testing::Test +{ +public: + FuseAddToFullyConnectedBiasPassTest() = default; + +protected: + FuseAddToFullyConnectedBiasPassTestGraph _graph; + luci::FuseAddToFullyConnectedBiasPass _pass; +}; + +class FuseAddToFullyConnectedBiasPassS32Test : public ::testing::Test +{ +public: + FuseAddToFullyConnectedBiasPassS32Test() = default; + +protected: + FuseAddToFullyConnectedBiasPassTestGraph _graph; + luci::FuseAddToFullyConnectedBiasPass _pass; +}; + +} // namespace + +TEST_F(FuseAddToFullyConnectedBiasPassTest, name) +{ + auto const name = _pass.name(); + ASSERT_NE(nullptr, name); +} + +TEST_F(FuseAddToFullyConnectedBiasPassTest, fuse_add_to_fc_bias) +{ + _graph.init(); + + EXPECT_TRUE(_pass.run(_graph.g())); +} + +TEST_F(FuseAddToFullyConnectedBiasPassTest, add_fused_act_NEG) +{ + _graph.init(); + + _graph._add->fusedActivationFunction(luci::FusedActFunc::RELU); + + EXPECT_FALSE(_pass.run(_graph.g())); +} + +TEST_F(FuseAddToFullyConnectedBiasPassTest, add_d2_NEG) +{ + _graph.init(); + + _graph._addi->rank(2); + _graph._addi->dim(0) = 1; + _graph._addi->dim(1) = 3; + + EXPECT_FALSE(_pass.run(_graph.g())); +} + +TEST_F(FuseAddToFullyConnectedBiasPassS32Test, dtype_s32_NEG) +{ + _graph.init(); + + EXPECT_FALSE(_pass.run(_graph.g())); +} diff --git a/compiler/luci/pass/src/FuseMulToFullyConnectedWeightsPass.cpp b/compiler/luci/pass/src/FuseMulToFullyConnectedWeightsPass.cpp new file mode 100644 index 00000000000..816ba9e14ad --- /dev/null +++ b/compiler/luci/pass/src/FuseMulToFullyConnectedWeightsPass.cpp @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/FuseMulToFullyConnectedWeightsPass.h" + +#include +#include + +#include "helpers/NodeFiller.h" + +#define CHECK_OR_FALSE(condition) \ + if (not(condition)) \ + return false; + +namespace +{ + +/** + * Fuse Mul to following FullyConnected if possible + * + * BEFORE + * | + * [CircleMul] [CircleConst] [CircleConst] + * | | | + * [CircleFullyConnected] ----------+ + * | + * + * AFTER + * | + * | [CircleConst] [CircleConst] [CircleConst] + * | | | | + * | [CircleConst] [CircleFullyConnected] [CircleMul] + * | | | + * [CircleFullyConnected] ------+ + * | + * + */ +bool fuse_fc_with_mul(luci::CircleFullyConnected *fc) +{ + CHECK_OR_FALSE(fc); + + // check input is Mul + auto mul = dynamic_cast(fc->input()); + CHECK_OR_FALSE(mul); + // conditions of Mul, FC: to expect constant folding, support only F32 + CHECK_OR_FALSE(mul->dtype() == loco::DataType::FLOAT32); + CHECK_OR_FALSE(mul->fusedActivationFunction() == luci::FusedActFunc::NONE); + CHECK_OR_FALSE(fc->dtype() == loco::DataType::FLOAT32); + // support weight with constant + auto weights = dynamic_cast(fc->weights()); + CHECK_OR_FALSE(weights); + + // Check multiplication of Mul is constant + luci::CircleNode *mul_input = nullptr; + luci::CircleConst *mul_scale = nullptr; + CHECK_OR_FALSE(luci::fill(&mul_input, &mul_scale).with_commutative_args_of(mul)); + if (mul_scale) + { + // support only 1D constant + CHECK_OR_FALSE(mul_scale->rank() == 1); + } + + auto graph = fc->graph(); + + auto fc_weights = graph->nodes()->create(); + fc_weights->x(weights); + fc_weights->y(mul_scale); + fc_weights->fusedActivationFunction(luci::FusedActFunc::NONE); + fc_weights->name(mul->name() + "_" + fc->name() + "_weight"); + luci::add_origin(fc_weights, + luci::composite_origin({luci::get_origin(mul), luci::get_origin(weights), + luci::get_origin(mul_scale)})); + + auto fc_new = graph->nodes()->create(); + fc_new->input(mul_input); + fc_new->weights(fc_weights); + fc_new->bias(fc->bias()); + fc_new->weights_format(fc->weights_format()); + fc_new->keep_num_dims(fc->keep_num_dims()); + fc_new->fusedActivationFunction(fc->fusedActivationFunction()); + fc_new->name(fc->name()); + luci::add_origin(fc_new, luci::get_origin(fc)); + + replace(fc).with(fc_new); + + return true; +} + +} // namespace + +namespace luci +{ + +bool FuseMulToFullyConnectedWeightsPass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto fc = dynamic_cast(node); + if (not fc) + continue; + + if (fuse_fc_with_mul(fc)) + changed = true; + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/FuseMulToFullyConnectedWeightsPass.test.cpp b/compiler/luci/pass/src/FuseMulToFullyConnectedWeightsPass.test.cpp new file mode 100644 index 00000000000..d7c61e104a3 --- /dev/null +++ b/compiler/luci/pass/src/FuseMulToFullyConnectedWeightsPass.test.cpp @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/FuseMulToFullyConnectedWeightsPass.h" + +#include + +#include + +#include + +namespace +{ + +using namespace luci::test; + +template class FuseMulToFullyConnectedWeightsPassTestGraph : public TestIOGraph +{ +public: + FuseMulToFullyConnectedWeightsPassTestGraph() = default; + + void init(void) + { + TestIOGraph::init({3, 4}, {3, 6}); + + _mul = g()->nodes()->create(); + _muli = g()->nodes()->create(); + _fc = g()->nodes()->create(); + _fc_w = g()->nodes()->create(); + _fc_b = g()->nodes()->create(); + + _mul->name("mul"); + _muli->name("muli"); + _fc->name("fc"); + _fc_w->name("fc_w"); + _fc_b->name("fc_b"); + + _mul->dtype(DT); + _fc->dtype(DT); + _mul->fusedActivationFunction(luci::FusedActFunc::NONE); + _fc->fusedActivationFunction(luci::FusedActFunc::NONE); + + _muli->rank(1); + _muli->dim(0) = 3; + _muli->dtype(DT); + _muli->size
(3); + for (uint32_t i = 0; i < 3; ++i) + { + _muli->at
(0) = 1.0f; + } + + _fc_w->rank(2); + _fc_w->dim(0) = 3; + _fc_w->dim(1) = 4; + _fc_w->dtype(DT); + _fc_w->size
(4 * 6); + for (uint32_t i = 0; i < 4 * 6; ++i) + { + _fc_w->at
(0) = 1.0f; + } + + _fc_b->rank(1); + _fc_b->dim(0) = 6; + _fc_b->dtype(DT); + _fc_b->size
(6); + for (uint32_t i = 0; i < 6; ++i) + { + _fc_b->at
(0) = 1.0f; + } + + _mul->x(input()); + _mul->y(_muli); + _fc->input(_mul); + _fc->weights(_fc_b); + _fc->bias(_fc_b); + + output()->from(_fc); + } + + luci::CircleMul *_mul = nullptr; + luci::CircleFullyConnected *_fc = nullptr; + luci::CircleConst *_muli = nullptr; + luci::CircleConst *_fc_w = nullptr; + luci::CircleConst *_fc_b = nullptr; +}; + +class FuseMulToFullyConnectedWeightsPassTest : public ::testing::Test +{ +public: + FuseMulToFullyConnectedWeightsPassTest() = default; + +protected: + FuseMulToFullyConnectedWeightsPassTestGraph _graph; + luci::FuseMulToFullyConnectedWeightsPass _pass; +}; + +class FuseMulToFullyConnectedWeightsPassS32Test : public ::testing::Test +{ +public: + FuseMulToFullyConnectedWeightsPassS32Test() = default; + +protected: + FuseMulToFullyConnectedWeightsPassTestGraph _graph; + luci::FuseMulToFullyConnectedWeightsPass _pass; +}; + +} // namespace + +TEST_F(FuseMulToFullyConnectedWeightsPassTest, name) +{ + auto const name = _pass.name(); + ASSERT_NE(nullptr, name); +} + +TEST_F(FuseMulToFullyConnectedWeightsPassTest, fuse_mul_to_fc_weights) +{ + _graph.init(); + + EXPECT_TRUE(_pass.run(_graph.g())); +} + +TEST_F(FuseMulToFullyConnectedWeightsPassTest, mul_fused_act_NEG) +{ + _graph.init(); + + _graph._mul->fusedActivationFunction(luci::FusedActFunc::RELU); + + EXPECT_FALSE(_pass.run(_graph.g())); +} + +TEST_F(FuseMulToFullyConnectedWeightsPassTest, mul_d2_NEG) +{ + _graph.init(); + + _graph._muli->rank(2); + _graph._muli->dim(0) = 1; + _graph._muli->dim(1) = 3; + + EXPECT_FALSE(_pass.run(_graph.g())); +} + +TEST_F(FuseMulToFullyConnectedWeightsPassS32Test, dtype_s32_NEG) +{ + _graph.init(); + + EXPECT_FALSE(_pass.run(_graph.g())); +} diff --git a/compiler/one-cmds/how-to-use-one-commands.txt b/compiler/one-cmds/how-to-use-one-commands.txt index 08e8a557375..fefbabf9a17 100644 --- a/compiler/one-cmds/how-to-use-one-commands.txt +++ b/compiler/one-cmds/how-to-use-one-commands.txt @@ -160,15 +160,18 @@ Current transformation options are - fold_dequantize : This removes Dequantize operation which can be folded - fold_dwconv : This folds Depthwise Convolution operation which can be folded - fold_gather : This removes Gather operation which can be folded +- fold_mul : This removes Mul operation which can be folded - fold_shape : This removes Shape operation which can be folded - fold_sparse_to_dense : This removes SparseToDense operation which can be folded - forward_reshape_to_unaryop: This will move Reshape after UnaryOp for centain condition +- fuse_add_to_fullyconnected_bias: This fuses Add operator to following FullyConnected operator bias - fuse_add_with_conv: This fuses Add operator with the preceding Convolution operator if possible - fuse_add_with_fully_connected: This fuses Add operator with the preceding FullyConnected operator if possible - fuse_add_with_tconv: This fuses Add operator with the preceding TConv operator if possible - fuse_batchnorm_with_conv : This fuses BatchNorm operator to convolution operator - fuse_batchnorm_with_dwconv : This fuses BatchNorm operator to depthwise convolution operator - fuse_batchnorm_with_tconv : This fuses BatchNorm operator to transpose convolution operator +- fuse_mul_to_fullyconnected_weights : This fuses Mul operator to following FullyConnected operator weights - fuse_mul_with_conv: This fuses Mul with a preceding Convolution op if possible. - fuse_mul_with_div: This fuses Mul and Div op as Div. - fuse_slice_with_tconv: This fuses Slice with a preceding TConv if possible. diff --git a/compiler/one-cmds/onelib/constant.py b/compiler/one-cmds/onelib/constant.py index 42676d78159..8c5de1b646d 100644 --- a/compiler/one-cmds/onelib/constant.py +++ b/compiler/one-cmds/onelib/constant.py @@ -29,12 +29,14 @@ class CONSTANT: 'fold_dwconv', 'fold_fully_connected', 'fold_gather', + 'fold_mul', 'fold_reshape', 'fold_shape', 'fold_sparse_to_dense', 'fold_squeeze', # Operator fusion + 'fuse_add_to_fullyconnected_bias', 'fuse_add_with_conv', 'fuse_add_with_tconv', 'fuse_add_with_fully_connected', @@ -42,6 +44,7 @@ class CONSTANT: 'fuse_batchnorm_with_dwconv', 'fuse_batchnorm_with_tconv', 'fuse_activation_function', + 'fuse_mul_to_fullyconnected_weights', 'fuse_instnorm', 'fuse_prelu', 'fuse_gelu', @@ -104,18 +107,23 @@ class CONSTANT: ('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'), ('fold_fully_connected', 'fold FullyConnected op with constant inputs'), ('fold_gather', 'fold Gather op'), + ('fold_mul', 'fold Mul Op'), ('fold_reshape', 'fold Reshape op'), ('fold_shape', 'fold Shape op'), ('fold_sparse_to_dense', 'fold SparseToDense op'), ('fold_squeeze', 'fold Squeeze op'), ('forward_reshape_to_unaryop', 'Forward Reshape op'), ('forward_transpose_op', 'Forward Transpose op'), + ('fuse_add_to_fullyconnected_bias', + 'Fuse Add op to following FullyConnected op bias'), ('fuse_add_with_conv', 'fuse Add op to Convolution op'), ('fuse_add_with_tconv', 'fuse Add op to Transposed'), ('fuse_add_with_fully_connected', 'fuse Add op to FullyConnected op'), ('fuse_batchnorm_with_conv', 'fuse BatchNorm op to Convolution op'), ('fuse_batchnorm_with_dwconv', 'fuse BatchNorm op to Depthwise Convolution op'), ('fuse_batchnorm_with_tconv', 'fuse BatchNorm op to Transposed Convolution op'), + ('fuse_mul_to_fullyconnected_weights', + 'fuse Mul op to following FullyConnected op weights'), ('fuse_slice_with_tconv', 'fuse Slice op to Transposed Convolution op'), ('fuse_bcq', 'apply Binary Coded Quantization'), ('fuse_preactivation_batchnorm', diff --git a/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_000/test.recipe new file mode 100644 index 00000000000..42d3882f480 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_000/test.recipe @@ -0,0 +1,67 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 4 } +} +operand { + name: "add" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 4 } +} +operand { + name: "addc" + type: FLOAT32 + shape { dim: 4 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "fc" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 6 } +} +operand { + name: "fc_wgt" + type: FLOAT32 + shape { dim: 6 dim: 4 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "fc_bias" + type: FLOAT32 + shape { dim: 6 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operation { + type: "Add" + input: "ifm" + input: "addc" + output: "add" + add_options { + activation: NONE + } +} +operation { + type: "FullyConnected" + fullyconnected_options { + activation: NONE + keep_num_dims: true + } + input: "add" + input: "fc_wgt" + input: "fc_bias" + output: "fc" +} +input: "ifm" +output: "fc" diff --git a/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_000/test.rule b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_000/test.rule new file mode 100644 index 00000000000..d90f632e7fb --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_000/test.rule @@ -0,0 +1,7 @@ +# To check if FC(Add(lhs, rhs), filter, bias) is converted to +# FC(lhs, filter, FC(rhs, filter, bias)) + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "NO_ADD" $(op_count ADD) '=' 0 +RULE "FC_EXIST" $(op_count FULLY_CONNECTED) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_001/test.recipe b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_001/test.recipe new file mode 100644 index 00000000000..6610e9c3bc4 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_001/test.recipe @@ -0,0 +1,67 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 4 } +} +operand { + name: "add" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 4 } +} +operand { + name: "addc" + type: FLOAT32 + shape { dim: 4 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "fc" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 6 } +} +operand { + name: "fc_wgt" + type: FLOAT32 + shape { dim: 6 dim: 4 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "fc_bias" + type: FLOAT32 + shape { dim: 6 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operation { + type: "Add" + input: "ifm" + input: "addc" + output: "add" + add_options { + activation: NONE + } +} +operation { + type: "FullyConnected" + fullyconnected_options { + activation: RELU + keep_num_dims: true + } + input: "add" + input: "fc_wgt" + input: "fc_bias" + output: "fc" +} +input: "ifm" +output: "fc" diff --git a/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_001/test.rule b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_001/test.rule new file mode 100644 index 00000000000..d90f632e7fb --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_001/test.rule @@ -0,0 +1,7 @@ +# To check if FC(Add(lhs, rhs), filter, bias) is converted to +# FC(lhs, filter, FC(rhs, filter, bias)) + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "NO_ADD" $(op_count ADD) '=' 0 +RULE "FC_EXIST" $(op_count FULLY_CONNECTED) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_002/test.recipe b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_002/test.recipe new file mode 100644 index 00000000000..7420981b0d9 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_002/test.recipe @@ -0,0 +1,57 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 4 } +} +operand { + name: "add" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 4 } +} +operand { + name: "addc" + type: FLOAT32 + shape { dim: 4 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "fc" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 6 } +} +operand { + name: "fc_wgt" + type: FLOAT32 + shape { dim: 6 dim: 4 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operation { + type: "Add" + input: "ifm" + input: "addc" + output: "add" + add_options { + activation: NONE + } +} +operation { + type: "FullyConnected" + fullyconnected_options { + activation: NONE + keep_num_dims: true + } + input: "add" + input: "fc_wgt" + input: "" + output: "fc" +} +input: "ifm" +output: "fc" diff --git a/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_002/test.rule b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_002/test.rule new file mode 100644 index 00000000000..d90f632e7fb --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_002/test.rule @@ -0,0 +1,7 @@ +# To check if FC(Add(lhs, rhs), filter, bias) is converted to +# FC(lhs, filter, FC(rhs, filter, bias)) + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "NO_ADD" $(op_count ADD) '=' 0 +RULE "FC_EXIST" $(op_count FULLY_CONNECTED) '=' 2 diff --git a/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_000/test.recipe new file mode 100644 index 00000000000..bbb7761014f --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_000/test.recipe @@ -0,0 +1,67 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 4 } +} +operand { + name: "mul" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 4 } +} +operand { + name: "mulc" + type: FLOAT32 + shape { dim: 4 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "fc" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 6 } +} +operand { + name: "fc_wgt" + type: FLOAT32 + shape { dim: 6 dim: 4 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "fc_bias" + type: FLOAT32 + shape { dim: 6 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operation { + type: "Mul" + input: "ifm" + input: "mulc" + output: "mul" + mul_options { + activation: NONE + } +} +operation { + type: "FullyConnected" + fullyconnected_options { + activation: NONE + keep_num_dims: true + } + input: "mul" + input: "fc_wgt" + input: "fc_bias" + output: "fc" +} +input: "ifm" +output: "fc" diff --git a/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_000/test.rule b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_000/test.rule new file mode 100644 index 00000000000..53637060cec --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_000/test.rule @@ -0,0 +1,8 @@ +# To check if FC(Mul(lhs, rhs), filter, bias) is converted to +# FC(lhs, Mul(filter, rhs), bias) and then Mul is folded to +# FC(lhs, filter', bias) + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "NO_MUL" $(op_count MUL) '=' 0 +RULE "FC_EXIST" $(op_count FULLY_CONNECTED) '=' 1 diff --git a/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_001/test.recipe b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_001/test.recipe new file mode 100644 index 00000000000..f0f7efd5638 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_001/test.recipe @@ -0,0 +1,67 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 4 } +} +operand { + name: "mul" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 4 } +} +operand { + name: "mulc" + type: FLOAT32 + shape { dim: 4 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "fc" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 6 } +} +operand { + name: "fc_wgt" + type: FLOAT32 + shape { dim: 6 dim: 4 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "fc_bias" + type: FLOAT32 + shape { dim: 6 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operation { + type: "Mul" + input: "ifm" + input: "mulc" + output: "mul" + mul_options { + activation: NONE + } +} +operation { + type: "FullyConnected" + fullyconnected_options { + activation: RELU + keep_num_dims: true + } + input: "mul" + input: "fc_wgt" + input: "fc_bias" + output: "fc" +} +input: "ifm" +output: "fc" diff --git a/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_001/test.rule b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_001/test.rule new file mode 100644 index 00000000000..4abf05ccadf --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_001/test.rule @@ -0,0 +1,9 @@ +# To check if FC(Mul(lhs, rhs), filter, bias) is converted to +# FC(lhs, Mul(filter, rhs), bias) and then Mul is folded to +# FC(lhs, filter', bias) + + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "NO_MUL" $(op_count MUL) '=' 0 +RULE "FC_EXIST" $(op_count FULLY_CONNECTED) '=' 1 diff --git a/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_002/test.recipe b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_002/test.recipe new file mode 100644 index 00000000000..ad022cc4ba7 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_002/test.recipe @@ -0,0 +1,57 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 4 } +} +operand { + name: "mul" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 4 } +} +operand { + name: "mulc" + type: FLOAT32 + shape { dim: 4 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operand { + name: "fc" + type: FLOAT32 + shape { dim: 3 dim: 1 dim: 6 } +} +operand { + name: "fc_wgt" + type: FLOAT32 + shape { dim: 6 dim: 4 } + filler { + tag: "gaussian" + arg: "0.0" + arg: "1.0" + } +} +operation { + type: "Mul" + input: "ifm" + input: "mulc" + output: "mul" + mul_options { + activation: NONE + } +} +operation { + type: "FullyConnected" + fullyconnected_options { + activation: RELU + keep_num_dims: true + } + input: "mul" + input: "fc_wgt" + input: "" + output: "fc" +} +input: "ifm" +output: "fc" diff --git a/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_002/test.rule b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_002/test.rule new file mode 100644 index 00000000000..53637060cec --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_002/test.rule @@ -0,0 +1,8 @@ +# To check if FC(Mul(lhs, rhs), filter, bias) is converted to +# FC(lhs, Mul(filter, rhs), bias) and then Mul is folded to +# FC(lhs, filter', bias) + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "NO_MUL" $(op_count MUL) '=' 0 +RULE "FC_EXIST" $(op_count FULLY_CONNECTED) '=' 1