diff --git a/compiler/circle2circle-dredd-recipe-test/test.lst b/compiler/circle2circle-dredd-recipe-test/test.lst index 6f32a2966af..09220b8e81e 100644 --- a/compiler/circle2circle-dredd-recipe-test/test.lst +++ b/compiler/circle2circle-dredd-recipe-test/test.lst @@ -63,6 +63,7 @@ Add(Net_InstanceNorm_005 PASS fuse_instnorm) Add(Net_InstanceNorm_006 PASS fuse_instnorm) Add(Net_InstanceNorm_007 PASS fuse_instnorm) Add(Net_InstanceNorm_008 PASS fuse_instnorm) +Add(Net_LayerNorm_000 PASS fuse_layernorm_to_instnorm) Add(Net_Maximum_Minimum_000 PASS transform_min_max_to_relu6) Add(Net_Mul_Add_000 PASS remove_unnecessary_add) Add(Net_Mul_Add_001 PASS remove_unnecessary_add) diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp index 7112bcbcec4..9c307ffa5f3 100644 --- a/compiler/circle2circle/src/Circle2Circle.cpp +++ b/compiler/circle2circle/src/Circle2Circle.cpp @@ -109,6 +109,8 @@ int entry(int argc, char **argv) "This will fuse BatchNorm operators to Transposed Convolution operator"); add_switch(arser, "--fuse_bcq", "This will fuse operators and apply Binary Coded Quantization"); add_switch(arser, "--fuse_instnorm", "This will fuse operators to InstanceNorm operator"); + add_switch(arser, "--fuse_layernorm_to_instnorm", + "This will fuse decomposed LayerNorm operators to InstanceNorm operator"); add_switch(arser, "--fuse_mean_with_mean", "This will fuse two Mean operations when they follow one by one. This will fold them " "into one operation and merge reduction indices."); @@ -297,6 +299,7 @@ int entry(int argc, char **argv) option_str_to_enum["fuse_slice_with_tconv"] = Algorithms::FuseSliceWithTConv; option_str_to_enum["fuse_bcq"] = Algorithms::FuseBCQ; option_str_to_enum["fuse_instnorm"] = Algorithms::FuseInstanceNorm; + option_str_to_enum["fuse_layernorm_to_instnorm"] = Algorithms::FuseLayerNormToInstNorm; option_str_to_enum["fuse_mean_with_mean"] = Algorithms::FuseMeanWithMean; option_str_to_enum["fuse_mul_with_conv"] = Algorithms::FuseMulWithConv; option_str_to_enum["fuse_mul_with_div"] = Algorithms::FuseMulWithDiv; diff --git a/compiler/luci-pass-value-py-test/test.lst b/compiler/luci-pass-value-py-test/test.lst index 7812be16991..6bcfe5471f9 100644 --- a/compiler/luci-pass-value-py-test/test.lst +++ b/compiler/luci-pass-value-py-test/test.lst @@ -42,6 +42,7 @@ eval(Net_InstanceNorm_001 fuse_instnorm) eval(Net_InstanceNorm_002 fuse_instnorm) eval(Net_InstanceNorm_003 fuse_instnorm) eval(Net_InstanceNorm_008 fuse_instnorm) +eval(Net_LayerNorm_000 fuse_layernorm_to_instnorm) eval(Net_Mul_Add_000 remove_unnecessary_add) eval(Net_Mul_Add_001 remove_unnecessary_add) eval(Net_Mul_Add_002 remove_unnecessary_add) diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.h b/compiler/luci/lang/include/luci/IR/CircleNodes.h index 0cae7a8f205..3c4e378772d 100644 --- a/compiler/luci/lang/include/luci/IR/CircleNodes.h +++ b/compiler/luci/lang/include/luci/IR/CircleNodes.h @@ -146,6 +146,7 @@ // Virtual nodes #include "Nodes/CircleConst.h" #include "Nodes/CircleInput.h" +#include "Nodes/CircleLayerNorm.h" #include "Nodes/CircleOutput.h" #include "Nodes/CircleVariable.h" // Multi-output virtual nodes diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.lst b/compiler/luci/lang/include/luci/IR/CircleNodes.lst index 50ddfbcd7a9..c5c807ffe5e 100644 --- a/compiler/luci/lang/include/luci/IR/CircleNodes.lst +++ b/compiler/luci/lang/include/luci/IR/CircleNodes.lst @@ -144,6 +144,7 @@ CIRCLE_NODE(ROPE, CircleRoPE) // Virtual node(s) CIRCLE_VNODE(CIRCLECONST, CircleConst) CIRCLE_VNODE(CIRCLEINPUT, CircleInput) +CIRCLE_VNODE(CIRCLELAYERNORM, CircleLayerNorm) CIRCLE_VNODE(CIRCLEOUTPUT, CircleOutput) CIRCLE_VNODE(CIRCLEOUTPUTDUMMY, CircleOutputDummy) CIRCLE_VNODE(CIRCLEOUTPUTEXCLUDE, CircleOutputExclude) diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleLayerNorm.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleLayerNorm.h new file mode 100644 index 00000000000..ceea28d2aab --- /dev/null +++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleLayerNorm.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_IR_CIRCLE_LAYER_NORM_H__ +#define __LUCI_IR_CIRCLE_LAYER_NORM_H__ + +#include "luci/IR/CircleNodeDecl.h" +#include "luci/IR/CircleOpcode.h" + +#include "luci/IR/AttrFusedActFunc.h" +#include "luci/IR/CircleNodeMixins.h" + +namespace luci +{ + +/** + * @brief Virtual LayerNorm + */ +class CircleLayerNorm final + : public FixedArityNode<3, CircleNodeImpl> +{ +public: + loco::Node *input(void) const { return at(0)->node(); } + void input(loco::Node *node) { at(0)->node(node); } + + loco::Node *gamma(void) const { return at(1)->node(); } + void gamma(loco::Node *node) { at(1)->node(node); } + + loco::Node *beta(void) const { return at(2)->node(); } + void beta(loco::Node *node) { at(2)->node(node); } + +public: + float epsilon() const { return _epsilon; } + void epsilon(float epsilon) { _epsilon = epsilon; } + +private: + float _epsilon{1e-05}; +}; + +} // namespace luci + +#endif // __LUCI_IR_CIRCLE_LAYER_NORM_H__ diff --git a/compiler/luci/logex/include/luci/CircleNodeSummaryBuilders.h b/compiler/luci/logex/include/luci/CircleNodeSummaryBuilders.h index 6884ef2e36e..ff47cc2704f 100644 --- a/compiler/luci/logex/include/luci/CircleNodeSummaryBuilders.h +++ b/compiler/luci/logex/include/luci/CircleNodeSummaryBuilders.h @@ -870,6 +870,13 @@ class CircleWhileOutSummaryBuilder final : public CircleNodeSummaryBuilder std::vector get_input_names(const luci::CircleNode *); }; +class CircleLayerNormSummaryBuilder final : public CircleNodeSummaryBuilder +{ +private: + std::vector get_input_names(const luci::CircleNode *); + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); +}; + } // namespace luci #endif // __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDERS__ diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp index 1768013bb0f..3ef149bce94 100644 --- a/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp +++ b/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp @@ -265,6 +265,7 @@ CircleNodeSummaryBuilder::create_builder(const luci::CircleNode *node) CIRCLE_NODE(CIRCLECUSTOMOUT, CircleCustomOutSummaryBuilder) CIRCLE_NODE(CIRCLEIFOUT, CircleIfOutSummaryBuilder) CIRCLE_NODE(CIRCLEINPUT, CircleInputSummaryBuilder) + CIRCLE_NODE(CIRCLELAYERNORM, CircleLayerNormSummaryBuilder) CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV4OUT, CircleNonMaxSuppressionV4OutSummaryBuilder) CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV5OUT, CircleNonMaxSuppressionV5OutSummaryBuilder) CIRCLE_NODE(CIRCLEOUTPUT, CircleOutputSummaryBuilder) diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp index 8297ad4cad0..116fddad8b3 100644 --- a/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp +++ b/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp @@ -1220,6 +1220,18 @@ void CircleWhileSummaryBuilder::build_attributes(const luci::CircleNode *node, s.args().append("else_branch", std::to_string(circle_while->body_branch())); } +std::vector CircleLayerNormSummaryBuilder::get_input_names(const luci::CircleNode *) +{ + return {"input", "gamma", "beta"}; +} + +void CircleLayerNormSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto layernorm = loco::must_cast(node); + s.args().append("epsilon", std::to_string(layernorm->epsilon())); +} + std::vector CircleOutputSummaryBuilder::get_input_names(const luci::CircleNode *) { return {"from"}; diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h index ed7cbf611df..a703e2e845a 100644 --- a/compiler/luci/pass/include/luci/CircleOptimizer.h +++ b/compiler/luci/pass/include/luci/CircleOptimizer.h @@ -46,6 +46,7 @@ class CircleOptimizer final FuseBCQ, FuseHorizontalFullyConnected, FuseInstanceNorm, + FuseLayerNormToInstNorm, FuseMeanWithMean, FuseMulWithConv, FuseMulWithDiv, diff --git a/compiler/luci/pass/include/luci/Pass/FuseLayerNormToInstNormPass.h b/compiler/luci/pass/include/luci/Pass/FuseLayerNormToInstNormPass.h new file mode 100644 index 00000000000..88d22267ede --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/FuseLayerNormToInstNormPass.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_FUSE_LAYER_NORM_TO_INST_NORM_PASS_H__ +#define __LUCI_FUSE_LAYER_NORM_TO_INST_NORM_PASS_H__ + +#include + +namespace luci +{ + +/** + * @brief Class to fuse certain pattern of subgraph into CircleLayerNorm + * and then to CircleInstanceNorm with auxiliary nodes + * + * For detailed subgraph pattern to be fused, please check its implementation. + */ +struct FuseLayerNormToInstNormPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::FuseLayerNormToInstNormPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_FUSE_LAYER_NORM_TO_INST_NORM_PASS_H__ diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp index ef6a2d86a4d..79abb11f58c 100644 --- a/compiler/luci/pass/src/CircleOptimizer.cpp +++ b/compiler/luci/pass/src/CircleOptimizer.cpp @@ -45,6 +45,7 @@ #include "luci/Pass/FuseBCQPass.h" #include "luci/Pass/FuseMulToFullyConnectedWeightsPass.h" #include "luci/Pass/FuseInstanceNormPass.h" +#include "luci/Pass/FuseLayerNormToInstNormPass.h" #include "luci/Pass/FuseMeanWithMeanPass.h" #include "luci/Pass/FuseMulWithConvPass.h" #include "luci/Pass/FuseMulWithDivPass.h" @@ -336,6 +337,7 @@ void CircleOptimizer::optimize(loco::Graph *g) const option_to_pass[Options::Algorithm::ResolveCustomOpMaxPoolWithArgmax] = &createPassInstance; option_to_pass[Options::Algorithm::ResolveCustomOpSplitV] = &createPassInstance; option_to_pass[Options::Algorithm::FuseInstanceNorm] = &createPassInstance; + option_to_pass[Options::Algorithm::FuseLayerNormToInstNorm] = &createPassInstance; option_to_pass[Options::Algorithm::FuseBatchNormWithConv] = &createPassInstance; option_to_pass[Options::Algorithm::FuseBatchNormWithDwConv] = &createPassInstance; option_to_pass[Options::Algorithm::FuseBatchNormWithTConv] = &createPassInstance; diff --git a/compiler/luci/pass/src/FuseLayerNormToInstNormPass.cpp b/compiler/luci/pass/src/FuseLayerNormToInstNormPass.cpp new file mode 100644 index 00000000000..c556f2f62f7 --- /dev/null +++ b/compiler/luci/pass/src/FuseLayerNormToInstNormPass.cpp @@ -0,0 +1,417 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/FuseLayerNormToInstNormPass.h" +#include "helpers/NodeFiller.h" + +#include + +#include +#include + +#include + +/** + * Below diagram shows decomposed LayerNorm pattern to fuse to LayerNorm + * + * input + * | + * +------+ + * | | + * | V + * | mean_in + * | | + * | V + * +---> sub + * | + * +------+ + * | | + * | V + * | mul + * | | + * | V + * | mean_mul + * | | + * | V + * | add_eps + * | | + * | V + * | rsqrt + * | | + * | V + * +-> mul_sub + * | + * V + * output + * + * Below diagram shows decomposed graph with InstanceNorm from LayerNorm + * + * input + * (N,L,D) + * | + * V + * transpose_in + * (N,D,L) + * | + * V + * reshape_in + * (N,1,D,L) + * | + * V + * instancenorm + * (N,1,D,L) + * | + * V + * reshape_out + * (N,D,L) + * | + * V + * transpose_out + * (N,L,D) + * | + * V + * output + */ + +namespace luci +{ + +namespace +{ + +class LayerNormPattern +{ +public: + LayerNormPattern(luci::CircleMul *candidate) { _mul_sub = candidate; } + ~LayerNormPattern() = default; + +public: + bool matched(void); + +public: + uint32_t _batch = 0; + uint32_t _length = 0; + uint32_t _dim = 0; + luci::CircleNode *_input = nullptr; + luci::CircleMean *_mean_in = nullptr; + luci::CircleSub *_sub = nullptr; + luci::CircleMul *_mul = nullptr; + luci::CircleMean *_mean_mul = nullptr; + luci::CircleAdd *_add_eps = nullptr; + luci::CircleRsqrt *_rsqrt = nullptr; + luci::CircleMul *_mul_sub = nullptr; + float _epsilon = 0.00001f; +}; + +#define CHECK_OR_FALSE(condition) \ + if (not(condition)) \ + return false; + +bool LayerNormPattern::matched(void) +{ + CHECK_OR_FALSE(_mul_sub != nullptr); + CHECK_OR_FALSE(_mul_sub->rank() == 3); + CHECK_OR_FALSE(_mul_sub->dtype() == loco::DataType::FLOAT32); + + CHECK_OR_FALSE(luci::fill(&_sub, &_rsqrt).with_commutative_args_of(_mul_sub)); + _add_eps = dynamic_cast(_rsqrt->x()); + CHECK_OR_FALSE(_add_eps != nullptr); + + luci::CircleConst *add_epsilon = nullptr; + CHECK_OR_FALSE(luci::fill(&_mean_mul, &add_epsilon).with_commutative_args_of(_add_eps)); + CHECK_OR_FALSE(_mean_mul->keep_dims()); + CHECK_OR_FALSE(add_epsilon->dtype() == loco::DataType::FLOAT32); + CHECK_OR_FALSE(add_epsilon->size() == 1); + + _mul = dynamic_cast(_mean_mul->input()); + CHECK_OR_FALSE(_mul != nullptr); + luci::CircleConst *mean_mul_indices = + dynamic_cast(_mean_mul->reduction_indices()); + CHECK_OR_FALSE(mean_mul_indices != nullptr); + // TODO check mean_mul_indices value + + luci::CircleSub *sub1 = nullptr; + luci::CircleSub *sub2 = nullptr; + CHECK_OR_FALSE(luci::fill(&sub1, &sub2).with_commutative_args_of(_mul)); + CHECK_OR_FALSE(sub1 == _sub); + CHECK_OR_FALSE(sub2 == _sub); + + _input = dynamic_cast(_sub->x()); + CHECK_OR_FALSE(_input != nullptr); + _mean_in = dynamic_cast(_sub->y()); + CHECK_OR_FALSE(_mean_in != nullptr); + CHECK_OR_FALSE(_mean_in->keep_dims()); + + luci::CircleNode *input = dynamic_cast(_mean_in->input()); + CHECK_OR_FALSE(input == _input); + luci::CircleConst *mean_in_indices = + dynamic_cast(_mean_in->reduction_indices()); + CHECK_OR_FALSE(mean_in_indices != nullptr); + // TODO check mean_in_indices value + + return true; +} + +} // namespace + +namespace +{ + +class FuseLayerNorm final +{ +public: + FuseLayerNorm(const LayerNormPattern *p) : _p(p) {} + +public: + void apply(void); + +private: + luci::CircleLayerNorm *create_layernorm(loco::Graph *g); + +private: + const LayerNormPattern *_p; +}; + +luci::CircleConst *make_const(loco::Graph *g, uint32_t dim, float value) +{ + auto const_one = g->nodes()->create(); + const_one->dtype(loco::DataType::FLOAT32); + const_one->rank(1); + const_one->size(dim); + for (uint32_t d = 0; d < dim; ++d) + const_one->at(d) = value; + return const_one; +} + +luci::CircleLayerNorm *FuseLayerNorm::create_layernorm(loco::Graph *g) +{ + assert(g); + + CircleConst *gamma = make_const(g, 1, 1.0f); + CircleConst *beta = make_const(g, 1, 0.0f); + + auto ln = g->nodes()->create(); + ln->input(_p->_input); + ln->gamma(gamma); + ln->beta(beta); + ln->epsilon(_p->_epsilon); + ln->name(_p->_mul_sub->name() + "_layernorm"); + + gamma->name(_p->_mul_sub->name() + "_layernorm/gamma"); + beta->name(_p->_mul_sub->name() + "_layernorm/beta"); + + return ln; +} + +void FuseLayerNorm::apply() +{ + auto g = _p->_mul_sub->graph(); + auto layernorm = create_layernorm(g); + + // set origin + std::vector> origin_vec{ + luci::get_origin(_p->_mean_in), luci::get_origin(_p->_sub), luci::get_origin(_p->_mul), + luci::get_origin(_p->_mean_mul), luci::get_origin(_p->_add_eps), luci::get_origin(_p->_rsqrt)}; + + luci::add_origin(layernorm, luci::composite_origin(origin_vec)); + + replace(_p->_mul_sub).with(layernorm); +} + +} // namespace + +namespace +{ + +class DecomposeToInstanceNorm final +{ +public: + DecomposeToInstanceNorm(CircleLayerNorm *node) : _layernorm(node) {} + +public: + void apply(void); + +private: + luci::CircleTranspose *create_subgraph(loco::Graph *g); + +private: + CircleLayerNorm *_layernorm; +}; + +luci::CircleConst *create_transpose_perm(loco::Graph *g, const std::initializer_list perm) +{ + auto const_perm = g->nodes()->create(); + const_perm->dtype(loco::DataType::S32); + const_perm->size(perm.size()); + const_perm->rank(1); + const_perm->dim(0) = perm.size(); + uint32_t i = 0; + for (auto p = perm.begin(); p != perm.end(); ++p, ++i) + const_perm->at(i) = *p; + const_perm->shape_status(luci::ShapeStatus::VALID); + return const_perm; +} + +void setNewShape(luci::CircleReshape *reshape, const std::initializer_list shape) +{ + reshape->newShape()->rank(shape.size()); + uint32_t i = 0; + for (auto s = shape.begin(); s != shape.end(); ++s, ++i) + { + reshape->newShape()->dim(i) = *s; + } +} + +// creates with +// Transpose [N, D, L] -> Reshape [N, 1, D, L] -> +// InstanceNorm [N, 1, D, L] -> +// Reshape [N, D, L] -> Transpose [N, L, D] +luci::CircleTranspose *DecomposeToInstanceNorm::create_subgraph(loco::Graph *g) +{ + auto input = loco::must_cast(_layernorm->input()); + + auto name = _layernorm->name(); + auto origin = luci::get_origin(_layernorm); + assert(_layernorm->rank() == 3); + auto dim_N = _layernorm->dim(0).value(); + auto dim_L = _layernorm->dim(1).value(); + auto dim_D = _layernorm->dim(2).value(); + + auto perm_in = create_transpose_perm(g, {0, 2, 1}); + perm_in->name(name + "/Transpose1/perm"); + luci::add_origin(perm_in, origin); + auto transpose_in = g->nodes()->create(); + transpose_in->a(input); + transpose_in->perm(perm_in); + transpose_in->name(name + "/Transpose1"); + luci::add_origin(transpose_in, origin); + + auto reshape_in_d = g->nodes()->create(); + reshape_in_d->name(name + "/Reshape1/dummy"); + reshape_in_d->dtype(loco::DataType::S32); + reshape_in_d->rank(0); + auto rehape_in = g->nodes()->create(); + rehape_in->tensor(transpose_in); + rehape_in->shape(reshape_in_d); + setNewShape(rehape_in, {dim_N, 1, dim_D, dim_L}); + rehape_in->name(name + "/Reshape1"); + luci::add_origin(rehape_in, origin); + + auto *instnorm = g->nodes()->create(); + instnorm->input(rehape_in); + instnorm->gamma((_layernorm->gamma())); + instnorm->beta((_layernorm->beta())); + instnorm->fusedActivationFunction(luci::FusedActFunc::NONE); + instnorm->epsilon(_layernorm->epsilon()); + instnorm->name(name + "/InstanceNorm"); + luci::add_origin(instnorm, origin); + + auto reshape_out_d = g->nodes()->create(); + reshape_out_d->name(name + "/Reshape2/dummy"); + reshape_out_d->dtype(loco::DataType::S32); + reshape_out_d->rank(0); + auto rehape_out = g->nodes()->create(); + rehape_out->tensor(instnorm); + rehape_out->shape(reshape_out_d); + setNewShape(rehape_out, {dim_N, dim_D, dim_L}); + rehape_out->name(name + "/Reshape2"); + luci::add_origin(rehape_out, origin); + + auto perm_out = create_transpose_perm(g, {0, 2, 1}); + perm_out->name(name + "/Transpose2/perm"); + luci::add_origin(perm_out, origin); + auto transpose_out = g->nodes()->create(); + transpose_out->a(rehape_out); + transpose_out->perm(perm_out); + transpose_out->name(name + "/Transpose2"); + luci::add_origin(transpose_out, origin); + + return transpose_out; +} + +void DecomposeToInstanceNorm::apply(void) +{ + auto g = _layernorm->graph(); + auto transpose = create_subgraph(g); + + replace(_layernorm).with(transpose); +} + +} // namespace + +namespace +{ + +bool fuse_layernorm(luci::CircleMul *mul) +{ + assert(mul); + + LayerNormPattern pattern(mul); + if (pattern.matched()) + { + FuseLayerNorm fuse(&pattern); + fuse.apply(); + return true; + } + return false; +} + +bool convert_to_instancenorm(luci::CircleLayerNorm *layernorm) +{ + CHECK_OR_FALSE(layernorm->rank() == 3); + CHECK_OR_FALSE(layernorm->dtype() == loco::DataType::FLOAT32); + + DecomposeToInstanceNorm decomp(layernorm); + decomp.apply(); + + return true; +} + +} // namespace + +bool FuseLayerNormToInstNormPass::run(loco::Graph *g) +{ + bool changed = false; + + // fuse certain sub-graph to CircleLayerNorm + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto mul = dynamic_cast(node); + if (mul != nullptr) + { + if (fuse_layernorm(mul)) + changed = true; + } + } + // if there is any conversion, return as changed so that shape-dtype is infered + if (changed) + return changed; + + // convert CircleLayerNorm to certain sub-graph with CircleInstanceNorm + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto layernorm = dynamic_cast(node); + if (layernorm != nullptr) + { + if (convert_to_instancenorm(layernorm)) + changed = true; + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp index 80229d4566f..29e2640cd94 100644 --- a/compiler/luci/service/src/CircleShapeInferenceRule.cpp +++ b/compiler/luci/service/src/CircleShapeInferenceRule.cpp @@ -1522,6 +1522,12 @@ loco::NodeShape infer_input(const luci::CircleInput *node) return loco::NodeShape{shape}; } +loco::NodeShape infer_layernorm(const luci::CircleLayerNorm *node) +{ + auto input_shape = luci::shape_get(node->input()).as(); + return loco::NodeShape{input_shape}; +} + loco::NodeShape infer_output(const luci::CircleOutput *node) { auto graph_outputs = node->graph()->outputs(); @@ -2216,6 +2222,8 @@ class ShapeInferenceAlgorithm final : public luci::CircleNodeVisitordtype(); } + loco::DataType visit(const luci::CircleLayerNorm *node) final + { + return luci::dtype_get(node->input()); + } + loco::DataType visit(const luci::CircleOutput *node) final { auto graph_outputs = node->graph()->outputs(); diff --git a/res/TensorFlowLiteRecipes/Net_LayerNorm_000/test.recipe b/res/TensorFlowLiteRecipes/Net_LayerNorm_000/test.recipe new file mode 100644 index 00000000000..4377e60aad0 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_LayerNorm_000/test.recipe @@ -0,0 +1,157 @@ +# +# Generated with tflchef-reverse +# refer https://github.com/Samsung/ONE/issues/14467 +# +operand { + name: "serving_default_input:0" + type: FLOAT32 + shape { + dim: 1 + dim: 16 + dim: 4 + } +} +operand { + name: "onnx_tf_prefix_/ln/Constant_1" + type: FLOAT32 + shape { + } + filler { + tag: "explicit" + arg: "1e-05" + } +} +operand { + name: "onnx_tf_prefix_/ln/ReduceMean/reduction_indices" + type: INT32 + shape { + } + filler { + tag: "explicit" + arg: "-1" + } +} +operand { + name: "onnx_tf_prefix_/ln/ReduceMean" + type: FLOAT32 + shape { + dim: 1 + dim: 16 + dim: 1 + } +} +operand { + name: "onnx_tf_prefix_/ln/Sub" + type: FLOAT32 + shape { + dim: 1 + dim: 16 + dim: 4 + } +} +operand { + name: "onnx_tf_prefix_/ln/Pow;onnx_tf_prefix_/ln/Constant" + type: FLOAT32 + shape { + dim: 1 + dim: 16 + dim: 4 + } +} +operand { + name: "onnx_tf_prefix_/ln/ReduceMean_1" + type: FLOAT32 + shape { + dim: 1 + dim: 16 + dim: 1 + } +} +operand { + name: "onnx_tf_prefix_/ln/Add" + type: FLOAT32 + shape { + dim: 1 + dim: 16 + dim: 1 + } +} +operand { + name: "onnx_tf_prefix_/ln/Div;onnx_tf_prefix_/ln/Sqrt" + type: FLOAT32 + shape { + dim: 1 + dim: 16 + dim: 1 + } +} +operand { + name: "PartitionedCall:0" + type: FLOAT32 + shape { + dim: 1 + dim: 16 + dim: 4 + } +} +operation { + type: "Mean" + input: "serving_default_input:0" + input: "onnx_tf_prefix_/ln/ReduceMean/reduction_indices" + output: "onnx_tf_prefix_/ln/ReduceMean" + mean_options { + keep_dims: true + } +} +operation { + type: "Sub" + input: "serving_default_input:0" + input: "onnx_tf_prefix_/ln/ReduceMean" + output: "onnx_tf_prefix_/ln/Sub" + sub_options { + activation: NONE + } +} +operation { + type: "Mul" + input: "onnx_tf_prefix_/ln/Sub" + input: "onnx_tf_prefix_/ln/Sub" + output: "onnx_tf_prefix_/ln/Pow;onnx_tf_prefix_/ln/Constant" + mul_options { + activation: NONE + } +} +operation { + type: "Mean" + input: "onnx_tf_prefix_/ln/Pow;onnx_tf_prefix_/ln/Constant" + input: "onnx_tf_prefix_/ln/ReduceMean/reduction_indices" + output: "onnx_tf_prefix_/ln/ReduceMean_1" + mean_options { + keep_dims: true + } +} +operation { + type: "Add" + input: "onnx_tf_prefix_/ln/ReduceMean_1" + input: "onnx_tf_prefix_/ln/Constant_1" + output: "onnx_tf_prefix_/ln/Add" + add_options { + activation: NONE + } +} +operation { + type: "Rsqrt" + input: "onnx_tf_prefix_/ln/Add" + output: "onnx_tf_prefix_/ln/Div;onnx_tf_prefix_/ln/Sqrt" +} +operation { + type: "Mul" + input: "onnx_tf_prefix_/ln/Sub" + input: "onnx_tf_prefix_/ln/Div;onnx_tf_prefix_/ln/Sqrt" + output: "PartitionedCall:0" + mul_options { + activation: NONE + } +} +input: "serving_default_input:0" +output: "PartitionedCall:0" diff --git a/res/TensorFlowLiteRecipes/Net_LayerNorm_000/test.rule b/res/TensorFlowLiteRecipes/Net_LayerNorm_000/test.rule new file mode 100644 index 00000000000..5cf14f93738 --- /dev/null +++ b/res/TensorFlowLiteRecipes/Net_LayerNorm_000/test.rule @@ -0,0 +1,12 @@ +# To check if this network is converted to circle RmsNorm op + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "INSTANCE_NORM_EXIST" $(op_count INSTANCE_NORM) '=' 1 +RULE "TRANSPOSE_EXIST" $(op_count TRANSPOSE) '=' 2 +RULE "RESHAPE_EXIST" $(op_count TRANSPOSE) '=' 2 +RULE "NO_MEAN" $(op_count MEAN) '=' 0 +RULE "NO_RSQRT" $(op_count RSQRT) '=' 0 +RULE "NO_ADD" $(op_count ADD) '=' 0 +RULE "NO_MUL" $(op_count MUL) '=' 0 +RULE "NO_SUB" $(op_count SUB) '=' 0