From c433bc873bd64c5ef58c4a6e10b66640d9e746af Mon Sep 17 00:00:00 2001
From: SaeHie Park <saehie.park@gmail.com>
Date: Fri, 12 Jul 2024 15:08:26 +0900
Subject: [PATCH] DRAFT CFe fuse Mul Add to Fullyconnected

on-going draft to fuse Mul Add to Fullyconnected.

Signed-off-by: SaeHie Park <saehie.park@gmail.com>
---
 .../circle2circle-dredd-recipe-test/test.lst  |   6 +
 compiler/circle2circle/src/Circle2Circle.cpp  |  11 ++
 compiler/luci-pass-value-py-test/test.lst     |   8 +-
 .../luci-pass-value-py-test/test_luci_eval.py |   9 +-
 .../luci/pass/include/luci/CircleOptimizer.h  |   3 +
 .../luci/pass/include/luci/Pass/FoldMulPass.h |  38 +++++
 .../Pass/FuseAddToFullyConnectedBiasPass.h    |  37 ++++
 .../Pass/FuseMulToFullyConnectedWeightsPass.h |  37 ++++
 compiler/luci/pass/src/CircleOptimizer.cpp    |  15 ++
 compiler/luci/pass/src/FoldMulPass.cpp        | 127 ++++++++++++++
 compiler/luci/pass/src/FoldMulPass.test.cpp   | 133 +++++++++++++++
 .../src/FuseAddToFullyConnectedBiasPass.cpp   | 128 ++++++++++++++
 .../FuseAddToFullyConnectedBiasPass.test.cpp  | 160 ++++++++++++++++++
 .../FuseMulToFullyConnectedWeightsPass.cpp    | 123 ++++++++++++++
 ...useMulToFullyConnectedWeightsPass.test.cpp | 160 ++++++++++++++++++
 compiler/one-cmds/how-to-use-one-commands.txt |   3 +
 compiler/one-cmds/onelib/constant.py          |   8 +
 .../Net_Add_FullyConnected_000/test.recipe    |  67 ++++++++
 .../Net_Add_FullyConnected_000/test.rule      |   7 +
 .../Net_Add_FullyConnected_001/test.recipe    |  67 ++++++++
 .../Net_Add_FullyConnected_001/test.rule      |   7 +
 .../Net_Add_FullyConnected_002/test.recipe    |  57 +++++++
 .../Net_Add_FullyConnected_002/test.rule      |   7 +
 .../Net_Mul_FullyConnected_000/test.recipe    |  67 ++++++++
 .../Net_Mul_FullyConnected_000/test.rule      |   8 +
 .../Net_Mul_FullyConnected_001/test.recipe    |  67 ++++++++
 .../Net_Mul_FullyConnected_001/test.rule      |   9 +
 .../Net_Mul_FullyConnected_002/test.recipe    |  57 +++++++
 .../Net_Mul_FullyConnected_002/test.rule      |   8 +
 29 files changed, 1431 insertions(+), 3 deletions(-)
 create mode 100644 compiler/luci/pass/include/luci/Pass/FoldMulPass.h
 create mode 100644 compiler/luci/pass/include/luci/Pass/FuseAddToFullyConnectedBiasPass.h
 create mode 100644 compiler/luci/pass/include/luci/Pass/FuseMulToFullyConnectedWeightsPass.h
 create mode 100644 compiler/luci/pass/src/FoldMulPass.cpp
 create mode 100644 compiler/luci/pass/src/FoldMulPass.test.cpp
 create mode 100644 compiler/luci/pass/src/FuseAddToFullyConnectedBiasPass.cpp
 create mode 100644 compiler/luci/pass/src/FuseAddToFullyConnectedBiasPass.test.cpp
 create mode 100644 compiler/luci/pass/src/FuseMulToFullyConnectedWeightsPass.cpp
 create mode 100644 compiler/luci/pass/src/FuseMulToFullyConnectedWeightsPass.test.cpp
 create mode 100644 res/TensorFlowLiteRecipes/Net_Add_FullyConnected_000/test.recipe
 create mode 100644 res/TensorFlowLiteRecipes/Net_Add_FullyConnected_000/test.rule
 create mode 100644 res/TensorFlowLiteRecipes/Net_Add_FullyConnected_001/test.recipe
 create mode 100644 res/TensorFlowLiteRecipes/Net_Add_FullyConnected_001/test.rule
 create mode 100644 res/TensorFlowLiteRecipes/Net_Add_FullyConnected_002/test.recipe
 create mode 100644 res/TensorFlowLiteRecipes/Net_Add_FullyConnected_002/test.rule
 create mode 100644 res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_000/test.recipe
 create mode 100644 res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_000/test.rule
 create mode 100644 res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_001/test.recipe
 create mode 100644 res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_001/test.rule
 create mode 100644 res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_002/test.recipe
 create mode 100644 res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_002/test.rule

diff --git a/compiler/circle2circle-dredd-recipe-test/test.lst b/compiler/circle2circle-dredd-recipe-test/test.lst
index d0b3123e7c1..4bf6a80d65a 100644
--- a/compiler/circle2circle-dredd-recipe-test/test.lst
+++ b/compiler/circle2circle-dredd-recipe-test/test.lst
@@ -21,6 +21,9 @@ Add(MaxPoolWithArgmax_000 PASS resolve_customop_max_pool_with_argmax)
 Add(MaxPoolWithArgmax_001 PASS resolve_customop_max_pool_with_argmax)
 Add(MaxPoolWithArgmax_002 PASS resolve_customop_max_pool_with_argmax)
 Add(Net_Add_FloorMod_Gather_000 PASS remove_gather_guard)
+Add(Net_Add_FullyConnected_000 PASS fuse_add_to_fullyconnected_bias)
+Add(Net_Add_FullyConnected_001 PASS fuse_add_to_fullyconnected_bias)
+Add(Net_Add_FullyConnected_002 PASS fuse_add_to_fullyconnected_bias)
 Add(Net_BroadcastTo_AddV2_000 PASS resolve_customop_add)
 Add(Net_BroadcastTo_AddV2_001 PASS resolve_customop_add)
 Add(Net_BroadcastTo_AddV2_002 PASS resolve_customop_add)
@@ -61,6 +64,9 @@ Add(Net_Mul_Add_002 PASS remove_unnecessary_add)
 Add(Net_Mul_Add_003 PASS remove_unnecessary_add)
 Add(Net_Mul_Div_000 PASS fuse_mul_with_div)
 Add(Net_Mul_Div_001 PASS fuse_mul_with_div)
+Add(Net_Mul_FullyConnected_000 PASS fuse_mul_to_fullyconnected_weights fold_mul)
+Add(Net_Mul_FullyConnected_001 PASS fuse_mul_to_fullyconnected_weights fold_mul)
+Add(Net_Mul_FullyConnected_002 PASS fuse_mul_to_fullyconnected_weights fold_mul)
 Add(Net_Preactivation_BN_000 PASS fuse_preactivation_batchnorm)
 Add(Net_Reshape_Reshape_000 PASS remove_redundant_reshape)
 Add(Net_Shape_Add_000 PASS fold_shape)
diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp
index 5dd6ed52edb..fb4642c3bcf 100644
--- a/compiler/circle2circle/src/Circle2Circle.cpp
+++ b/compiler/circle2circle/src/Circle2Circle.cpp
@@ -81,6 +81,7 @@ int entry(int argc, char **argv)
   add_switch(arser, "--fold_fully_connected",
              "This will fold FullyConnected operator with constant inputs");
   add_switch(arser, "--fold_gather", "This will fold Gather operator");
+  add_switch(arser, "--fold_mul", "This will fold Mul operator");
   add_switch(arser, "--fold_reshape", "This will fold Reshape operator");
   add_switch(arser, "--fold_shape", "This will fold Shape operator");
   add_switch(arser, "--fold_sparse_to_dense", "This will fold SparseToDense operator");
@@ -105,6 +106,10 @@ int entry(int argc, char **argv)
   add_switch(arser, "--fuse_batchnorm_with_tconv",
              "This will fuse BatchNorm operators to Transposed Convolution operator");
   add_switch(arser, "--fuse_bcq", "This will fuse operators and apply Binary Coded Quantization");
+  add_switch(arser, "--fuse_add_to_fullyconnected_bias",
+             "This will fuse Add to following FullyConnected bias");
+  add_switch(arser, "--fuse_mul_to_fullyconnected_weights",
+             "This will fuse Mul to following FullyConnected weights");
   add_switch(arser, "--fuse_instnorm", "This will fuse operators to InstanceNorm operator");
   add_switch(arser, "--fuse_mean_with_mean",
              "This will fuse two Mean operations when they follow one by one. This will fold them "
@@ -275,6 +280,8 @@ int entry(int argc, char **argv)
     options->enable(Algorithms::FoldFullyConnected);
   if (arser.get<bool>("--fold_gather"))
     options->enable(Algorithms::FoldGather);
+  if (arser.get<bool>("--fold_mul"))
+    options->enable(Algorithms::FoldMul);
   if (arser.get<bool>("--fold_reshape"))
     options->enable(Algorithms::FoldReshape);
   if (arser.get<bool>("--fold_shape"))
@@ -303,6 +310,10 @@ int entry(int argc, char **argv)
     options->enable(Algorithms::FuseBatchNormWithDwConv);
   if (arser.get<bool>("--fuse_batchnorm_with_tconv"))
     options->enable(Algorithms::FuseBatchNormWithTConv);
+  if (arser.get<bool>("--fuse_add_to_fullyconnected_bias"))
+    options->enable(Algorithms::FuseAddToFullyConnectedBias);
+  if (arser.get<bool>("--fuse_mul_to_fullyconnected_weights"))
+    options->enable(Algorithms::FuseMulToFullyConnectedWeights);
   if (arser.get<bool>("--fuse_slice_with_tconv"))
     options->enable(Algorithms::FuseSliceWithTConv);
   if (arser.get<bool>("--fuse_bcq"))
diff --git a/compiler/luci-pass-value-py-test/test.lst b/compiler/luci-pass-value-py-test/test.lst
index 9e0ed5c6615..bf5c6aa5cb5 100644
--- a/compiler/luci-pass-value-py-test/test.lst
+++ b/compiler/luci-pass-value-py-test/test.lst
@@ -2,7 +2,7 @@
 # Format:
 #   eval(MODEL PASS)
 # MODEL: tflite model file name in build/compiler/common-artifacts folder.
-# PASS: Optimization Pass to test. Supports only one Pass for now.
+# PASS: Optimization Pass to test. Supports one or Passes.
 #
 
 # eval(Net_Preactivation_BN_000 fuse_preactivation_batchnorm) : value diff exist
@@ -10,6 +10,9 @@
 eval(FullyConnected_007 replace_non_const_fc_with_batch_matmul)
 eval(HardSwish_001 decompose_hardswish)
 eval(Net_Add_FloorMod_Gather_000 remove_gather_guard)
+eval(Net_Add_FullyConnected_000 fuse_add_to_fullyconnected_bias)
+eval(Net_Add_FullyConnected_001 fuse_add_to_fullyconnected_bias)
+eval(Net_Add_FullyConnected_002 fuse_add_to_fullyconnected_bias)
 eval(Net_Conv_Add_000 fuse_add_with_conv)
 eval(Net_Conv_Add_001 fuse_add_with_conv)
 # eval(Net_Conv_Add_002 fuse_add_with_conv) --> Conv2D w/o bias fails in tflite interpreter
@@ -40,6 +43,9 @@ eval(Net_Mul_Add_002 remove_unnecessary_add)
 eval(Net_Mul_Add_003 remove_unnecessary_add)
 eval(Net_Mul_Div_000 fuse_mul_with_div)
 eval(Net_Mul_Div_001 fuse_mul_with_div)
+eval(Net_Mul_FullyConnected_000 fuse_mul_to_fullyconnected_weights)
+eval(Net_Mul_FullyConnected_001 fuse_mul_to_fullyconnected_weights)
+eval(Net_Mul_FullyConnected_002 fuse_mul_to_fullyconnected_weights)
 eval(Net_Reshape_Mean_000 forward_reshape_to_unaryop)
 eval(Net_Reshape_Neg_000 forward_reshape_to_unaryop)
 eval(Net_Reshape_Reshape_000 remove_redundant_reshape)
diff --git a/compiler/luci-pass-value-py-test/test_luci_eval.py b/compiler/luci-pass-value-py-test/test_luci_eval.py
index 4cb59c177b7..91986f6f2de 100644
--- a/compiler/luci-pass-value-py-test/test_luci_eval.py
+++ b/compiler/luci-pass-value-py-test/test_luci_eval.py
@@ -95,8 +95,13 @@ def luci_eval_verify(test_name,
             assert np.allclose(
                 luci_output_data, intp_output_data, rtol=rtolint, atol=atolint), err_msg
         elif output_details["dtype"] == np.float32:
-            assert np.allclose(
-                luci_output_data, intp_output_data, rtol=rtolf32, atol=atolf32), err_msg
+            diff_comp = np.allclose(
+                luci_output_data, intp_output_data, rtol=rtolf32, atol=atolf32)
+            if not diff_comp:
+                print("\r\ntflite:\r\n", intp_output_data, flush=True)
+                print("\r\ncircle:\r\n", luci_output_data, flush=True)
+                print("\r\nDiff:\r\n", intp_output_data - luci_output_data, flush=True)
+                assert diff_comp, err_msg
         elif output_details["dtype"] == np.int64:
             assert np.allclose(
                 luci_output_data, intp_output_data, rtol=rtolint, atol=atolint), err_msg
diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h
index 01b43a72844..bdae7d57e41 100644
--- a/compiler/luci/pass/include/luci/CircleOptimizer.h
+++ b/compiler/luci/pass/include/luci/CircleOptimizer.h
@@ -34,12 +34,14 @@ class CircleOptimizer final
   {
     enum Algorithm
     {
+      FuseAddToFullyConnectedBias,
       FuseAddWithConv,
       FuseAddWithFullyConnected,
       FuseAddWithTConv,
       FuseBatchNormWithConv,
       FuseBatchNormWithDwConv,
       FuseBatchNormWithTConv,
+      FuseMulToFullyConnectedWeights,
       FuseSliceWithTConv,
       FuseBCQ,
       FuseHorizontalFullyConnected,
@@ -61,6 +63,7 @@ class CircleOptimizer final
       FoldFullyConnected,
       FoldDequantize,
       FoldGather,
+      FoldMul,
       FoldReshape,
       FoldShape,
       FoldSparseToDense,
diff --git a/compiler/luci/pass/include/luci/Pass/FoldMulPass.h b/compiler/luci/pass/include/luci/Pass/FoldMulPass.h
new file mode 100644
index 00000000000..69b661fbe02
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FoldMulPass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_MUL_PASS_H__
+#define __LUCI_FOLD_MUL_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fold Mul to a constant tensor
+ *
+ */
+struct FoldMulPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FoldMulPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_MUL_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FuseAddToFullyConnectedBiasPass.h b/compiler/luci/pass/include/luci/Pass/FuseAddToFullyConnectedBiasPass.h
new file mode 100644
index 00000000000..9aef478456c
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseAddToFullyConnectedBiasPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_ADD_TO_FULLY_CONNECTED_BIAS_PASS_H__
+#define __LUCI_FUSE_ADD_TO_FULLY_CONNECTED_BIAS_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse Add to following FC bias
+ */
+struct FuseAddToFullyConnectedBiasPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FuseAddToFullyConnectedBiasPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_ADD_TO_FULLY_CONNECTED_BIAS_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FuseMulToFullyConnectedWeightsPass.h b/compiler/luci/pass/include/luci/Pass/FuseMulToFullyConnectedWeightsPass.h
new file mode 100644
index 00000000000..583f21ef82c
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseMulToFullyConnectedWeightsPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_MUL_TO_FULLY_CONNECTED_WEIGHTS_PASS_H__
+#define __LUCI_FUSE_MUL_TO_FULLY_CONNECTED_WEIGHTS_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse Mul into following FullyConnected
+ */
+struct FuseMulToFullyConnectedWeightsPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FuseMulToFullyConnectedWeightsPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_MUL_TO_FULLY_CONNECTED_WEIGHTS_PASS_H__
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp
index 3c94311c8d5..9f650d9004e 100644
--- a/compiler/luci/pass/src/CircleOptimizer.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -26,6 +26,7 @@
 #include "luci/Pass/FoldDequantizePass.h"
 #include "luci/Pass/FoldFullyConnectedPass.h"
 #include "luci/Pass/FoldGatherPass.h"
+#include "luci/Pass/FoldMulPass.h"
 #include "luci/Pass/FoldReshapePass.h"
 #include "luci/Pass/FoldShapePass.h"
 #include "luci/Pass/FoldSparseToDensePass.h"
@@ -33,6 +34,7 @@
 #include "luci/Pass/ForwardReshapeToUnaryOpPass.h"
 #include "luci/Pass/ForwardTransposeOpPass.h"
 #include "luci/Pass/FuseActivationFunctionPass.h"
+#include "luci/Pass/FuseAddToFullyConnectedBiasPass.h"
 #include "luci/Pass/FuseAddWithConvPass.h"
 #include "luci/Pass/FuseAddWithFullyConnectedPass.h"
 #include "luci/Pass/FuseAddWithTConvPass.h"
@@ -40,6 +42,7 @@
 #include "luci/Pass/FuseBatchNormWithDwConvPass.h"
 #include "luci/Pass/FuseBatchNormWithTConvPass.h"
 #include "luci/Pass/FuseBCQPass.h"
+#include "luci/Pass/FuseMulToFullyConnectedWeightsPass.h"
 #include "luci/Pass/FuseInstanceNormPass.h"
 #include "luci/Pass/FuseMeanWithMeanPass.h"
 #include "luci/Pass/FuseMulWithConvPass.h"
@@ -333,6 +336,14 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   {
     phase.emplace_back(std::make_unique<FuseActivationFunctionPass>());
   }
+  if (_options->query(Options::Algorithm::FuseAddToFullyConnectedBias))
+  {
+    phase.emplace_back(std::make_unique<FuseAddToFullyConnectedBiasPass>());
+  }
+  if (_options->query(Options::Algorithm::FuseMulToFullyConnectedWeights))
+  {
+    phase.emplace_back(std::make_unique<FuseMulToFullyConnectedWeightsPass>());
+  }
   if (_options->query(Options::Algorithm::FusePRelu))
   {
     phase.emplace_back(std::make_unique<FusePReluPass>());
@@ -381,6 +392,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   {
     phase.emplace_back(std::make_unique<luci::FoldGatherPass>());
   }
+  if (_options->query(Options::Algorithm::FoldMul))
+  {
+    phase.emplace_back(std::make_unique<luci::FoldMulPass>());
+  }
   if (_options->query(Options::Algorithm::FoldReshape))
   {
     phase.emplace_back(std::make_unique<luci::FoldReshapePass>());
diff --git a/compiler/luci/pass/src/FoldMulPass.cpp b/compiler/luci/pass/src/FoldMulPass.cpp
new file mode 100644
index 00000000000..65112911e59
--- /dev/null
+++ b/compiler/luci/pass/src/FoldMulPass.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldMulPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <algorithm>
+
+#define CHECK_OR_FALSE(condition) \
+  if (not(condition))             \
+    return false;
+
+namespace
+{
+
+/**
+ * @return higher rank of x, y or nullptr if not compatible
+ */
+const luci::CircleConst *compatible_shape(const luci::CircleConst *x, const luci::CircleConst *y)
+{
+  if (x->rank() >= y->rank())
+  {
+    uint32_t d = x->rank() - y->rank();
+    for (uint32_t i = 0; i < y->rank(); i++)
+    {
+      // NOTE dim() has only '==' operator
+      if (!(x->dim(i + d) == y->dim(i)))
+        return nullptr;
+    }
+    return x;
+  }
+  else
+  {
+    uint32_t d = y->rank() - x->rank();
+    for (uint32_t i = 0; i < x->rank(); i++)
+    {
+      if (!(x->dim(i) == y->dim(i + d)))
+        return nullptr;
+    }
+    return y;
+  }
+}
+
+/**
+ * Fold Mul to const if both inputs are const
+ **/
+bool fold_mul(luci::CircleMul *mul)
+{
+  CHECK_OR_FALSE(mul);
+  CHECK_OR_FALSE(mul->dtype() == loco::DataType::FLOAT32);
+
+  // Check inputs are const and compatible
+  auto x = dynamic_cast<luci::CircleConst *>(mul->x());
+  auto y = dynamic_cast<luci::CircleConst *>(mul->y());
+  CHECK_OR_FALSE(x);
+  CHECK_OR_FALSE(y);
+  CHECK_OR_FALSE(x->dtype() == y->dtype());
+  const auto xy = compatible_shape(x, y);
+  CHECK_OR_FALSE(xy);
+
+  auto name_x = x->name();
+  auto name_y = y->name();
+  assert(name_x.length() > 0);
+  assert(name_y.length() > 0);
+  auto folded_const = mul->graph()->nodes()->create<luci::CircleConst>();
+  folded_const->dtype(xy->dtype());
+  folded_const->rank(xy->rank());
+  for (uint32_t i = 0; i < xy->rank(); i++)
+    folded_const->dim(i).set(xy->dim(i).value());
+
+  const auto size_x = x->size<loco::DataType::FLOAT32>();
+  const auto size_y = y->size<loco::DataType::FLOAT32>();
+  const auto size_xy = xy->size<loco::DataType::FLOAT32>();
+  folded_const->size<loco::DataType::FLOAT32>(size_xy);
+  for (uint32_t i = 0; i < size_xy; i++)
+  {
+    auto xv = x->at<loco::DataType::FLOAT32>(i % size_x);
+    auto yv = y->at<loco::DataType::FLOAT32>(i % size_y);
+    folded_const->at<loco::DataType::FLOAT32>(i) = xv * yv;
+  }
+
+  folded_const->shape_status(luci::ShapeStatus::VALID);
+  folded_const->name(name_x + "_" + name_y);
+
+  loco::replace(mul).with(folded_const);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for Mul Op
+ **/
+bool FoldMulPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto mul = dynamic_cast<luci::CircleMul *>(node))
+    {
+      if (fold_mul(mul))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldMulPass.test.cpp b/compiler/luci/pass/src/FoldMulPass.test.cpp
new file mode 100644
index 00000000000..0c6de971f40
--- /dev/null
+++ b/compiler/luci/pass/src/FoldMulPass.test.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldMulPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  Graph has an Mul Op with constant inputs
+ *
+ *    BEFORE
+ *
+ *    [CircleConst] [CircleConst]
+ *               |    |
+ *             [CircleMul]
+ *                  |
+ *             [CircleNode]
+ *    AFTER
+ *                             [CircleConst] [CircleConst]
+ *                                       |    |
+ *             [CircleConst]           [CircleMul]
+ *                  |
+ *             [CircleNode]
+ */
+
+template <loco::DataType T> class FoldMulTest : public luci::ConstantFoldingAddTestGraph
+{
+public:
+  FoldMulTest(std::initializer_list<uint32_t> shape) : luci::ConstantFoldingAddTestGraph(shape, T)
+  {
+    _mul = _g.nodes()->template create<luci::CircleMul>();
+    _x = _g.nodes()->template create<luci::CircleConst>();
+    _y = _g.nodes()->template create<luci::CircleConst>();
+
+    _mul->dtype(T);
+    _x->dtype(T);
+    _y->dtype(T);
+
+    _mul->shape(shape);
+    _x->shape(shape);
+    _y->shape(shape);
+
+    uint32_t num_elems = 1;
+    for (auto dim = shape.begin(); dim != shape.end(); dim++)
+      num_elems *= *dim;
+
+    _x->size<T>(num_elems);
+    _y->size<T>(num_elems);
+
+    for (uint32_t i = 0; i < num_elems; i++)
+    {
+      _x->at<T>(i) = i + 1;
+      _y->at<T>(i) = i + 1;
+    }
+
+    _mul->x(_x);
+    _mul->y(_y);
+    _mul->name("mul");
+    _x->name("x");
+    _y->name("y");
+  }
+
+  loco::Node *createFoldedPattern() override { return _mul; }
+
+  virtual ~FoldMulTest() = default;
+
+protected:
+  luci::CircleMul *_mul = nullptr;
+  luci::CircleConst *_x = nullptr;
+  luci::CircleConst *_y = nullptr;
+};
+
+class FoldF32MulTest : public FoldMulTest<loco::DataType::FLOAT32>, public ::testing::Test
+{
+public:
+  FoldF32MulTest() : FoldMulTest<loco::DataType::FLOAT32>({3}) {}
+
+  virtual void SetUp() { init(); }
+};
+
+} // namespace
+
+TEST_F(FoldF32MulTest, name)
+{
+  luci::FoldMulPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(FoldF32MulTest, fold_mul)
+{
+  luci::FoldMulPass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Check type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+  EXPECT_EQ(1, folded_const->rank());
+  EXPECT_EQ(3, folded_const->dim(0).value());
+  EXPECT_EQ(1, folded_const->at<loco::DataType::FLOAT32>(0));
+  EXPECT_EQ(4, folded_const->at<loco::DataType::FLOAT32>(1));
+  EXPECT_EQ(9, folded_const->at<loco::DataType::FLOAT32>(2));
+}
+
+TEST_F(FoldF32MulTest, input_type_mismatch_NEG)
+{
+  _x->dtype(loco::DataType::U4);
+
+  luci::FoldMulPass pass;
+  EXPECT_FALSE(pass.run(graph()));
+}
diff --git a/compiler/luci/pass/src/FuseAddToFullyConnectedBiasPass.cpp b/compiler/luci/pass/src/FuseAddToFullyConnectedBiasPass.cpp
new file mode 100644
index 00000000000..d1e44b0a104
--- /dev/null
+++ b/compiler/luci/pass/src/FuseAddToFullyConnectedBiasPass.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseAddToFullyConnectedBiasPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+#include "helpers/NodeFiller.h"
+
+#define CHECK_OR_FALSE(condition) \
+  if (not(condition))             \
+    return false;
+
+namespace
+{
+
+/**
+ *  Transform Add to following FullyConnected bias if possible
+ *
+ *  BEFORE
+ *                |
+ *           [CircleAdd] [CircleConst] [CircleConst]
+ *                |       |              |
+ *      [CircleFullyConnected] ----------+
+ *                |
+ *
+ *  AFTER
+ *                |
+ *                |        [CircleConst] [CircleConst] [CircleConst]
+ *                |                   |       |         |
+ *                |   [CircleConst] [CircleFullyConnected]   [CircleAdd]
+ *                |       |           |
+ *       [CircleFullyConnected] ------+
+ *                |
+ *
+ */
+bool fuse_add_to_fc_bias(luci::CircleFullyConnected *fc)
+{
+  CHECK_OR_FALSE(fc);
+
+  // check input is Add
+  auto add = dynamic_cast<luci::CircleAdd *>(fc->input());
+  CHECK_OR_FALSE(add);
+  // conditions of Add, FC: to expect constant folding, support only F32
+  CHECK_OR_FALSE(add->dtype() == loco::DataType::FLOAT32);
+  CHECK_OR_FALSE(add->fusedActivationFunction() == luci::FusedActFunc::NONE);
+  CHECK_OR_FALSE(fc->dtype() == loco::DataType::FLOAT32);
+  // support weight with constant
+  auto weights = dynamic_cast<luci::CircleConst *>(fc->weights());
+  CHECK_OR_FALSE(weights);
+  // bias can be constant or outputexclude
+  auto bias = dynamic_cast<luci::CircleNode *>(fc->bias());
+  CHECK_OR_FALSE(bias);
+
+  // Check addition of Add is constant
+  luci::CircleNode *add_input = nullptr;
+  luci::CircleConst *add_shift = nullptr;
+  CHECK_OR_FALSE(luci::fill(&add_input, &add_shift).with_commutative_args_of(add));
+  if (add_shift)
+  {
+    // support only 1D constant
+    CHECK_OR_FALSE(add_shift->rank() == 1);
+  }
+
+  auto graph = fc->graph();
+
+  auto fc_bias = graph->nodes()->create<luci::CircleFullyConnected>();
+  fc_bias->input(add_shift);
+  fc_bias->weights(weights);
+  fc_bias->bias(bias);
+  fc_bias->keep_num_dims(true);
+  fc_bias->fusedActivationFunction(luci::FusedActFunc::NONE);
+  fc_bias->name(fc->name() + "_" + add->name() + "_bias");
+  luci::add_origin(fc_bias,
+                   luci::composite_origin(
+                     {luci::get_origin(add), luci::get_origin(add_shift), luci::get_origin(bias)}));
+
+  auto fc_new = graph->nodes()->create<luci::CircleFullyConnected>();
+  fc_new->input(add_input);
+  fc_new->weights(weights);
+  fc_new->bias(fc_bias);
+  fc_new->weights_format(fc->weights_format());
+  fc_new->keep_num_dims(fc->keep_num_dims());
+  fc_new->fusedActivationFunction(fc->fusedActivationFunction());
+  fc_new->name(fc->name());
+  luci::add_origin(fc_new, luci::get_origin(fc));
+
+  replace(fc).with(fc_new);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseAddToFullyConnectedBiasPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto fc = dynamic_cast<luci::CircleFullyConnected *>(node);
+    if (not fc)
+      continue;
+
+    if (fuse_add_to_fc_bias(fc))
+      changed = true;
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseAddToFullyConnectedBiasPass.test.cpp b/compiler/luci/pass/src/FuseAddToFullyConnectedBiasPass.test.cpp
new file mode 100644
index 00000000000..bbcd7f9fc85
--- /dev/null
+++ b/compiler/luci/pass/src/FuseAddToFullyConnectedBiasPass.test.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseAddToFullyConnectedBiasPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+template <loco::DataType DT> class FuseAddToFullyConnectedBiasPassTestGraph : public TestIOGraph
+{
+public:
+  FuseAddToFullyConnectedBiasPassTestGraph() = default;
+
+  void init(void)
+  {
+    TestIOGraph::init({3, 4}, {3, 6});
+
+    _add = g()->nodes()->create<luci::CircleAdd>();
+    _addi = g()->nodes()->create<luci::CircleConst>();
+    _fc = g()->nodes()->create<luci::CircleFullyConnected>();
+    _fc_w = g()->nodes()->create<luci::CircleConst>();
+    _fc_b = g()->nodes()->create<luci::CircleConst>();
+
+    _add->name("add");
+    _addi->name("addi");
+    _fc->name("fc");
+    _fc_w->name("fc_w");
+    _fc_b->name("fc_b");
+
+    _add->dtype(DT);
+    _fc->dtype(DT);
+    _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+    _addi->rank(1);
+    _addi->dim(0) = 3;
+    _addi->dtype(DT);
+    _addi->size<DT>(3);
+    for (uint32_t i = 0; i < 3; ++i)
+    {
+      _addi->at<DT>(0) = 1.0f;
+    }
+
+    _fc_w->rank(2);
+    _fc_w->dim(0) = 3;
+    _fc_w->dim(1) = 4;
+    _fc_w->dtype(DT);
+    _fc_w->size<DT>(4 * 6);
+    for (uint32_t i = 0; i < 4 * 6; ++i)
+    {
+      _fc_w->at<DT>(0) = 1.0f;
+    }
+
+    _fc_b->rank(1);
+    _fc_b->dim(0) = 6;
+    _fc_b->dtype(DT);
+    _fc_b->size<DT>(6);
+    for (uint32_t i = 0; i < 6; ++i)
+    {
+      _fc_b->at<DT>(0) = 1.0f;
+    }
+
+    _add->x(input());
+    _add->y(_addi);
+    _fc->input(_add);
+    _fc->weights(_fc_b);
+    _fc->bias(_fc_b);
+
+    output()->from(_fc);
+  }
+
+  luci::CircleAdd *_add = nullptr;
+  luci::CircleFullyConnected *_fc = nullptr;
+  luci::CircleConst *_addi = nullptr;
+  luci::CircleConst *_fc_w = nullptr;
+  luci::CircleConst *_fc_b = nullptr;
+};
+
+class FuseAddToFullyConnectedBiasPassTest : public ::testing::Test
+{
+public:
+  FuseAddToFullyConnectedBiasPassTest() = default;
+
+protected:
+  FuseAddToFullyConnectedBiasPassTestGraph<loco::DataType::FLOAT32> _graph;
+  luci::FuseAddToFullyConnectedBiasPass _pass;
+};
+
+class FuseAddToFullyConnectedBiasPassS32Test : public ::testing::Test
+{
+public:
+  FuseAddToFullyConnectedBiasPassS32Test() = default;
+
+protected:
+  FuseAddToFullyConnectedBiasPassTestGraph<loco::DataType::S32> _graph;
+  luci::FuseAddToFullyConnectedBiasPass _pass;
+};
+
+} // namespace
+
+TEST_F(FuseAddToFullyConnectedBiasPassTest, name)
+{
+  auto const name = _pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(FuseAddToFullyConnectedBiasPassTest, fuse_add_to_fc_bias)
+{
+  _graph.init();
+
+  EXPECT_TRUE(_pass.run(_graph.g()));
+}
+
+TEST_F(FuseAddToFullyConnectedBiasPassTest, add_fused_act_NEG)
+{
+  _graph.init();
+
+  _graph._add->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  EXPECT_FALSE(_pass.run(_graph.g()));
+}
+
+TEST_F(FuseAddToFullyConnectedBiasPassTest, add_d2_NEG)
+{
+  _graph.init();
+
+  _graph._addi->rank(2);
+  _graph._addi->dim(0) = 1;
+  _graph._addi->dim(1) = 3;
+
+  EXPECT_FALSE(_pass.run(_graph.g()));
+}
+
+TEST_F(FuseAddToFullyConnectedBiasPassS32Test, dtype_s32_NEG)
+{
+  _graph.init();
+
+  EXPECT_FALSE(_pass.run(_graph.g()));
+}
diff --git a/compiler/luci/pass/src/FuseMulToFullyConnectedWeightsPass.cpp b/compiler/luci/pass/src/FuseMulToFullyConnectedWeightsPass.cpp
new file mode 100644
index 00000000000..816ba9e14ad
--- /dev/null
+++ b/compiler/luci/pass/src/FuseMulToFullyConnectedWeightsPass.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseMulToFullyConnectedWeightsPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+#include "helpers/NodeFiller.h"
+
+#define CHECK_OR_FALSE(condition) \
+  if (not(condition))             \
+    return false;
+
+namespace
+{
+
+/**
+ *  Fuse Mul to following FullyConnected if possible
+ *
+ *  BEFORE
+ *                |
+ *           [CircleMul] [CircleConst] [CircleConst]
+ *                |       |              |
+ *      [CircleFullyConnected] ----------+
+ *                |
+ *
+ *  AFTER
+ *                |
+ *                |        [CircleConst] [CircleConst] [CircleConst]
+ *                |                   |       |         |
+ *                |   [CircleConst] [CircleFullyConnected]   [CircleMul]
+ *                |       |           |
+ *       [CircleFullyConnected] ------+
+ *                |
+ *
+ */
+bool fuse_fc_with_mul(luci::CircleFullyConnected *fc)
+{
+  CHECK_OR_FALSE(fc);
+
+  // check input is Mul
+  auto mul = dynamic_cast<luci::CircleMul *>(fc->input());
+  CHECK_OR_FALSE(mul);
+  // conditions of Mul, FC: to expect constant folding, support only F32
+  CHECK_OR_FALSE(mul->dtype() == loco::DataType::FLOAT32);
+  CHECK_OR_FALSE(mul->fusedActivationFunction() == luci::FusedActFunc::NONE);
+  CHECK_OR_FALSE(fc->dtype() == loco::DataType::FLOAT32);
+  // support weight with constant
+  auto weights = dynamic_cast<luci::CircleConst *>(fc->weights());
+  CHECK_OR_FALSE(weights);
+
+  // Check multiplication of Mul is constant
+  luci::CircleNode *mul_input = nullptr;
+  luci::CircleConst *mul_scale = nullptr;
+  CHECK_OR_FALSE(luci::fill(&mul_input, &mul_scale).with_commutative_args_of(mul));
+  if (mul_scale)
+  {
+    // support only 1D constant
+    CHECK_OR_FALSE(mul_scale->rank() == 1);
+  }
+
+  auto graph = fc->graph();
+
+  auto fc_weights = graph->nodes()->create<luci::CircleMul>();
+  fc_weights->x(weights);
+  fc_weights->y(mul_scale);
+  fc_weights->fusedActivationFunction(luci::FusedActFunc::NONE);
+  fc_weights->name(mul->name() + "_" + fc->name() + "_weight");
+  luci::add_origin(fc_weights,
+                   luci::composite_origin({luci::get_origin(mul), luci::get_origin(weights),
+                                           luci::get_origin(mul_scale)}));
+
+  auto fc_new = graph->nodes()->create<luci::CircleFullyConnected>();
+  fc_new->input(mul_input);
+  fc_new->weights(fc_weights);
+  fc_new->bias(fc->bias());
+  fc_new->weights_format(fc->weights_format());
+  fc_new->keep_num_dims(fc->keep_num_dims());
+  fc_new->fusedActivationFunction(fc->fusedActivationFunction());
+  fc_new->name(fc->name());
+  luci::add_origin(fc_new, luci::get_origin(fc));
+
+  replace(fc).with(fc_new);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseMulToFullyConnectedWeightsPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto fc = dynamic_cast<luci::CircleFullyConnected *>(node);
+    if (not fc)
+      continue;
+
+    if (fuse_fc_with_mul(fc))
+      changed = true;
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseMulToFullyConnectedWeightsPass.test.cpp b/compiler/luci/pass/src/FuseMulToFullyConnectedWeightsPass.test.cpp
new file mode 100644
index 00000000000..d7c61e104a3
--- /dev/null
+++ b/compiler/luci/pass/src/FuseMulToFullyConnectedWeightsPass.test.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseMulToFullyConnectedWeightsPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+template <loco::DataType DT> class FuseMulToFullyConnectedWeightsPassTestGraph : public TestIOGraph
+{
+public:
+  FuseMulToFullyConnectedWeightsPassTestGraph() = default;
+
+  void init(void)
+  {
+    TestIOGraph::init({3, 4}, {3, 6});
+
+    _mul = g()->nodes()->create<luci::CircleMul>();
+    _muli = g()->nodes()->create<luci::CircleConst>();
+    _fc = g()->nodes()->create<luci::CircleFullyConnected>();
+    _fc_w = g()->nodes()->create<luci::CircleConst>();
+    _fc_b = g()->nodes()->create<luci::CircleConst>();
+
+    _mul->name("mul");
+    _muli->name("muli");
+    _fc->name("fc");
+    _fc_w->name("fc_w");
+    _fc_b->name("fc_b");
+
+    _mul->dtype(DT);
+    _fc->dtype(DT);
+    _mul->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+    _muli->rank(1);
+    _muli->dim(0) = 3;
+    _muli->dtype(DT);
+    _muli->size<DT>(3);
+    for (uint32_t i = 0; i < 3; ++i)
+    {
+      _muli->at<DT>(0) = 1.0f;
+    }
+
+    _fc_w->rank(2);
+    _fc_w->dim(0) = 3;
+    _fc_w->dim(1) = 4;
+    _fc_w->dtype(DT);
+    _fc_w->size<DT>(4 * 6);
+    for (uint32_t i = 0; i < 4 * 6; ++i)
+    {
+      _fc_w->at<DT>(0) = 1.0f;
+    }
+
+    _fc_b->rank(1);
+    _fc_b->dim(0) = 6;
+    _fc_b->dtype(DT);
+    _fc_b->size<DT>(6);
+    for (uint32_t i = 0; i < 6; ++i)
+    {
+      _fc_b->at<DT>(0) = 1.0f;
+    }
+
+    _mul->x(input());
+    _mul->y(_muli);
+    _fc->input(_mul);
+    _fc->weights(_fc_b);
+    _fc->bias(_fc_b);
+
+    output()->from(_fc);
+  }
+
+  luci::CircleMul *_mul = nullptr;
+  luci::CircleFullyConnected *_fc = nullptr;
+  luci::CircleConst *_muli = nullptr;
+  luci::CircleConst *_fc_w = nullptr;
+  luci::CircleConst *_fc_b = nullptr;
+};
+
+class FuseMulToFullyConnectedWeightsPassTest : public ::testing::Test
+{
+public:
+  FuseMulToFullyConnectedWeightsPassTest() = default;
+
+protected:
+  FuseMulToFullyConnectedWeightsPassTestGraph<loco::DataType::FLOAT32> _graph;
+  luci::FuseMulToFullyConnectedWeightsPass _pass;
+};
+
+class FuseMulToFullyConnectedWeightsPassS32Test : public ::testing::Test
+{
+public:
+  FuseMulToFullyConnectedWeightsPassS32Test() = default;
+
+protected:
+  FuseMulToFullyConnectedWeightsPassTestGraph<loco::DataType::S32> _graph;
+  luci::FuseMulToFullyConnectedWeightsPass _pass;
+};
+
+} // namespace
+
+TEST_F(FuseMulToFullyConnectedWeightsPassTest, name)
+{
+  auto const name = _pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(FuseMulToFullyConnectedWeightsPassTest, fuse_mul_to_fc_weights)
+{
+  _graph.init();
+
+  EXPECT_TRUE(_pass.run(_graph.g()));
+}
+
+TEST_F(FuseMulToFullyConnectedWeightsPassTest, mul_fused_act_NEG)
+{
+  _graph.init();
+
+  _graph._mul->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  EXPECT_FALSE(_pass.run(_graph.g()));
+}
+
+TEST_F(FuseMulToFullyConnectedWeightsPassTest, mul_d2_NEG)
+{
+  _graph.init();
+
+  _graph._muli->rank(2);
+  _graph._muli->dim(0) = 1;
+  _graph._muli->dim(1) = 3;
+
+  EXPECT_FALSE(_pass.run(_graph.g()));
+}
+
+TEST_F(FuseMulToFullyConnectedWeightsPassS32Test, dtype_s32_NEG)
+{
+  _graph.init();
+
+  EXPECT_FALSE(_pass.run(_graph.g()));
+}
diff --git a/compiler/one-cmds/how-to-use-one-commands.txt b/compiler/one-cmds/how-to-use-one-commands.txt
index 08e8a557375..fefbabf9a17 100644
--- a/compiler/one-cmds/how-to-use-one-commands.txt
+++ b/compiler/one-cmds/how-to-use-one-commands.txt
@@ -160,15 +160,18 @@ Current transformation options are
 - fold_dequantize : This removes Dequantize operation which can be folded
 - fold_dwconv : This folds Depthwise Convolution operation which can be folded
 - fold_gather : This removes Gather operation which can be folded
+- fold_mul : This removes Mul operation which can be folded
 - fold_shape : This removes Shape operation which can be folded
 - fold_sparse_to_dense : This removes SparseToDense operation which can be folded
 - forward_reshape_to_unaryop: This will move Reshape after UnaryOp for centain condition
+- fuse_add_to_fullyconnected_bias: This fuses Add operator to following FullyConnected operator bias
 - fuse_add_with_conv: This fuses Add operator with the preceding Convolution operator if possible
 - fuse_add_with_fully_connected: This fuses Add operator with the preceding FullyConnected operator if possible
 - fuse_add_with_tconv: This fuses Add operator with the preceding TConv operator if possible
 - fuse_batchnorm_with_conv : This fuses BatchNorm operator to convolution operator
 - fuse_batchnorm_with_dwconv : This fuses BatchNorm operator to depthwise convolution operator
 - fuse_batchnorm_with_tconv : This fuses BatchNorm operator to transpose convolution operator
+- fuse_mul_to_fullyconnected_weights : This fuses Mul operator to following FullyConnected operator weights
 - fuse_mul_with_conv: This fuses Mul with a preceding Convolution op if possible.
 - fuse_mul_with_div: This fuses Mul and Div op as Div.
 - fuse_slice_with_tconv: This fuses Slice with a preceding TConv if possible.
diff --git a/compiler/one-cmds/onelib/constant.py b/compiler/one-cmds/onelib/constant.py
index 42676d78159..8c5de1b646d 100644
--- a/compiler/one-cmds/onelib/constant.py
+++ b/compiler/one-cmds/onelib/constant.py
@@ -29,12 +29,14 @@ class CONSTANT:
         'fold_dwconv',
         'fold_fully_connected',
         'fold_gather',
+        'fold_mul',
         'fold_reshape',
         'fold_shape',
         'fold_sparse_to_dense',
         'fold_squeeze',
 
         # Operator fusion
+        'fuse_add_to_fullyconnected_bias',
         'fuse_add_with_conv',
         'fuse_add_with_tconv',
         'fuse_add_with_fully_connected',
@@ -42,6 +44,7 @@ class CONSTANT:
         'fuse_batchnorm_with_dwconv',
         'fuse_batchnorm_with_tconv',
         'fuse_activation_function',
+        'fuse_mul_to_fullyconnected_weights',
         'fuse_instnorm',
         'fuse_prelu',
         'fuse_gelu',
@@ -104,18 +107,23 @@ class CONSTANT:
         ('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'),
         ('fold_fully_connected', 'fold FullyConnected op with constant inputs'),
         ('fold_gather', 'fold Gather op'),
+        ('fold_mul', 'fold Mul Op'),
         ('fold_reshape', 'fold Reshape op'),
         ('fold_shape', 'fold Shape op'),
         ('fold_sparse_to_dense', 'fold SparseToDense op'),
         ('fold_squeeze', 'fold Squeeze op'),
         ('forward_reshape_to_unaryop', 'Forward Reshape op'),
         ('forward_transpose_op', 'Forward Transpose op'),
+        ('fuse_add_to_fullyconnected_bias',
+         'Fuse Add op to following FullyConnected op bias'),
         ('fuse_add_with_conv', 'fuse Add op to Convolution op'),
         ('fuse_add_with_tconv', 'fuse Add op to Transposed'),
         ('fuse_add_with_fully_connected', 'fuse Add op to FullyConnected op'),
         ('fuse_batchnorm_with_conv', 'fuse BatchNorm op to Convolution op'),
         ('fuse_batchnorm_with_dwconv', 'fuse BatchNorm op to Depthwise Convolution op'),
         ('fuse_batchnorm_with_tconv', 'fuse BatchNorm op to Transposed Convolution op'),
+        ('fuse_mul_to_fullyconnected_weights',
+         'fuse Mul op to following FullyConnected op weights'),
         ('fuse_slice_with_tconv', 'fuse Slice op to Transposed Convolution op'),
         ('fuse_bcq', 'apply Binary Coded Quantization'),
         ('fuse_preactivation_batchnorm',
diff --git a/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_000/test.recipe
new file mode 100644
index 00000000000..42d3882f480
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_000/test.recipe
@@ -0,0 +1,67 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 4 }
+}
+operand {
+  name: "add"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 4 }
+}
+operand {
+  name: "addc"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "fc"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 6 }
+}
+operand {
+  name: "fc_wgt"
+  type: FLOAT32
+  shape { dim: 6 dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "fc_bias"
+  type: FLOAT32
+  shape { dim: 6 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operation {
+  type: "Add"
+  input: "ifm"
+  input: "addc"
+  output: "add"
+  add_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+    keep_num_dims: true
+  }
+  input: "add"
+  input: "fc_wgt"
+  input: "fc_bias"
+  output: "fc"
+}
+input: "ifm"
+output: "fc"
diff --git a/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_000/test.rule b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_000/test.rule
new file mode 100644
index 00000000000..d90f632e7fb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_000/test.rule
@@ -0,0 +1,7 @@
+# To check if FC(Add(lhs, rhs), filter, bias) is converted to 
+# FC(lhs, filter, FC(rhs, filter, bias))
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
+RULE    "FC_EXIST"                $(op_count FULLY_CONNECTED) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_001/test.recipe b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_001/test.recipe
new file mode 100644
index 00000000000..6610e9c3bc4
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_001/test.recipe
@@ -0,0 +1,67 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 4 }
+}
+operand {
+  name: "add"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 4 }
+}
+operand {
+  name: "addc"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "fc"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 6 }
+}
+operand {
+  name: "fc_wgt"
+  type: FLOAT32
+  shape { dim: 6 dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "fc_bias"
+  type: FLOAT32
+  shape { dim: 6 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operation {
+  type: "Add"
+  input: "ifm"
+  input: "addc"
+  output: "add"
+  add_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: RELU
+    keep_num_dims: true
+  }
+  input: "add"
+  input: "fc_wgt"
+  input: "fc_bias"
+  output: "fc"
+}
+input: "ifm"
+output: "fc"
diff --git a/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_001/test.rule b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_001/test.rule
new file mode 100644
index 00000000000..d90f632e7fb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_001/test.rule
@@ -0,0 +1,7 @@
+# To check if FC(Add(lhs, rhs), filter, bias) is converted to 
+# FC(lhs, filter, FC(rhs, filter, bias))
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
+RULE    "FC_EXIST"                $(op_count FULLY_CONNECTED) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_002/test.recipe b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_002/test.recipe
new file mode 100644
index 00000000000..7420981b0d9
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_002/test.recipe
@@ -0,0 +1,57 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 4 }
+}
+operand {
+  name: "add"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 4 }
+}
+operand {
+  name: "addc"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "fc"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 6 }
+}
+operand {
+  name: "fc_wgt"
+  type: FLOAT32
+  shape { dim: 6 dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operation {
+  type: "Add"
+  input: "ifm"
+  input: "addc"
+  output: "add"
+  add_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+    keep_num_dims: true
+  }
+  input: "add"
+  input: "fc_wgt"
+  input: ""
+  output: "fc"
+}
+input: "ifm"
+output: "fc"
diff --git a/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_002/test.rule b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_002/test.rule
new file mode 100644
index 00000000000..d90f632e7fb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Add_FullyConnected_002/test.rule
@@ -0,0 +1,7 @@
+# To check if FC(Add(lhs, rhs), filter, bias) is converted to 
+# FC(lhs, filter, FC(rhs, filter, bias))
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
+RULE    "FC_EXIST"                $(op_count FULLY_CONNECTED) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_000/test.recipe
new file mode 100644
index 00000000000..bbb7761014f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_000/test.recipe
@@ -0,0 +1,67 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 4 }
+}
+operand {
+  name: "mul"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 4 }
+}
+operand {
+  name: "mulc"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "fc"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 6 }
+}
+operand {
+  name: "fc_wgt"
+  type: FLOAT32
+  shape { dim: 6 dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "fc_bias"
+  type: FLOAT32
+  shape { dim: 6 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operation {
+  type: "Mul"
+  input: "ifm"
+  input: "mulc"
+  output: "mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+    keep_num_dims: true
+  }
+  input: "mul"
+  input: "fc_wgt"
+  input: "fc_bias"
+  output: "fc"
+}
+input: "ifm"
+output: "fc"
diff --git a/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_000/test.rule b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_000/test.rule
new file mode 100644
index 00000000000..53637060cec
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_000/test.rule
@@ -0,0 +1,8 @@
+# To check if FC(Mul(lhs, rhs), filter, bias) is converted to
+# FC(lhs, Mul(filter, rhs), bias) and then Mul is folded to
+# FC(lhs, filter', bias)
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "FC_EXIST"                $(op_count FULLY_CONNECTED) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_001/test.recipe b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_001/test.recipe
new file mode 100644
index 00000000000..f0f7efd5638
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_001/test.recipe
@@ -0,0 +1,67 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 4 }
+}
+operand {
+  name: "mul"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 4 }
+}
+operand {
+  name: "mulc"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "fc"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 6 }
+}
+operand {
+  name: "fc_wgt"
+  type: FLOAT32
+  shape { dim: 6 dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "fc_bias"
+  type: FLOAT32
+  shape { dim: 6 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operation {
+  type: "Mul"
+  input: "ifm"
+  input: "mulc"
+  output: "mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: RELU
+    keep_num_dims: true
+  }
+  input: "mul"
+  input: "fc_wgt"
+  input: "fc_bias"
+  output: "fc"
+}
+input: "ifm"
+output: "fc"
diff --git a/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_001/test.rule b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_001/test.rule
new file mode 100644
index 00000000000..4abf05ccadf
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_001/test.rule
@@ -0,0 +1,9 @@
+# To check if FC(Mul(lhs, rhs), filter, bias) is converted to
+# FC(lhs, Mul(filter, rhs), bias) and then Mul is folded to
+# FC(lhs, filter', bias)
+
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "FC_EXIST"                $(op_count FULLY_CONNECTED) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_002/test.recipe b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_002/test.recipe
new file mode 100644
index 00000000000..ad022cc4ba7
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_002/test.recipe
@@ -0,0 +1,57 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 4 }
+}
+operand {
+  name: "mul"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 4 }
+}
+operand {
+  name: "mulc"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "fc"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 6 }
+}
+operand {
+  name: "fc_wgt"
+  type: FLOAT32
+  shape { dim: 6 dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operation {
+  type: "Mul"
+  input: "ifm"
+  input: "mulc"
+  output: "mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: RELU
+    keep_num_dims: true
+  }
+  input: "mul"
+  input: "fc_wgt"
+  input: ""
+  output: "fc"
+}
+input: "ifm"
+output: "fc"
diff --git a/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_002/test.rule b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_002/test.rule
new file mode 100644
index 00000000000..53637060cec
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Mul_FullyConnected_002/test.rule
@@ -0,0 +1,8 @@
+# To check if FC(Mul(lhs, rhs), filter, bias) is converted to
+# FC(lhs, Mul(filter, rhs), bias) and then Mul is folded to
+# FC(lhs, filter', bias)
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "FC_EXIST"                $(op_count FULLY_CONNECTED) '=' 1