Skip to content

Commit

Permalink
DRAFT CFe fuse Mul Add to Fullyconnected
Browse files Browse the repository at this point in the history
on-going draft to fuse Mul Add to Fullyconnected.

Signed-off-by: SaeHie Park <[email protected]>
  • Loading branch information
seanshpark committed Jul 16, 2024
1 parent ab68724 commit d57d842
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 3 deletions.
6 changes: 6 additions & 0 deletions compiler/circle2circle-dredd-recipe-test/test.lst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ Add(MaxPoolWithArgmax_000 PASS resolve_customop_max_pool_with_argmax)
Add(MaxPoolWithArgmax_001 PASS resolve_customop_max_pool_with_argmax)
Add(MaxPoolWithArgmax_002 PASS resolve_customop_max_pool_with_argmax)
Add(Net_Add_FloorMod_Gather_000 PASS remove_gather_guard)
Add(Net_Add_FullyConnected_000 PASS fuse_add_to_fullyconnected_bias)
Add(Net_Add_FullyConnected_001 PASS fuse_add_to_fullyconnected_bias)
Add(Net_Add_FullyConnected_002 PASS fuse_add_to_fullyconnected_bias)
Add(Net_BroadcastTo_AddV2_000 PASS resolve_customop_add)
Add(Net_BroadcastTo_AddV2_001 PASS resolve_customop_add)
Add(Net_BroadcastTo_AddV2_002 PASS resolve_customop_add)
Expand Down Expand Up @@ -61,6 +64,9 @@ Add(Net_Mul_Add_002 PASS remove_unnecessary_add)
Add(Net_Mul_Add_003 PASS remove_unnecessary_add)
Add(Net_Mul_Div_000 PASS fuse_mul_with_div)
Add(Net_Mul_Div_001 PASS fuse_mul_with_div)
Add(Net_Mul_FullyConnected_000 PASS fuse_mul_to_fullyconnected_weights fold_mul)
Add(Net_Mul_FullyConnected_001 PASS fuse_mul_to_fullyconnected_weights fold_mul)
Add(Net_Mul_FullyConnected_002 PASS fuse_mul_to_fullyconnected_weights fold_mul)
Add(Net_Preactivation_BN_000 PASS fuse_preactivation_batchnorm)
Add(Net_Reshape_Reshape_000 PASS remove_redundant_reshape)
Add(Net_Shape_Add_000 PASS fold_shape)
Expand Down
11 changes: 11 additions & 0 deletions compiler/circle2circle/src/Circle2Circle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ int entry(int argc, char **argv)
add_switch(arser, "--fold_fully_connected",
"This will fold FullyConnected operator with constant inputs");
add_switch(arser, "--fold_gather", "This will fold Gather operator");
add_switch(arser, "--fold_mul", "This will fold Mul operator");
add_switch(arser, "--fold_reshape", "This will fold Reshape operator");
add_switch(arser, "--fold_shape", "This will fold Shape operator");
add_switch(arser, "--fold_sparse_to_dense", "This will fold SparseToDense operator");
Expand All @@ -93,6 +94,8 @@ int entry(int argc, char **argv)
"This will fuse Activation function to a preceding operator");
add_switch(arser, "--fuse_horizontal_fc_layers",
"This will fuse horizontal FullyConnected layers");
add_switch(arser, "--fuse_add_to_fullyconnected_bias",
"This will fuse Add to following FullyConnected bias");
add_switch(arser, "--fuse_add_with_conv", "This will fuse Add operator to Convolution operator");
add_switch(arser, "--fuse_add_with_fully_connected",
"This will fuse Add operator to FullyConnected operator");
Expand All @@ -109,6 +112,8 @@ int entry(int argc, char **argv)
add_switch(arser, "--fuse_mean_with_mean",
"This will fuse two Mean operations when they follow one by one. This will fold them "
"into one operation and merge reduction indices.");
add_switch(arser, "--fuse_mul_to_fullyconnected_weights",
"This will fuse Mul to following FullyConnected weights");
add_switch(arser, "--fuse_mul_with_conv",
"This will fuse Mul operation with a preceding Conv if possible.");
add_switch(arser, "--fuse_mul_with_div",
Expand Down Expand Up @@ -275,6 +280,8 @@ int entry(int argc, char **argv)
options->enable(Algorithms::FoldFullyConnected);
if (arser.get<bool>("--fold_gather"))
options->enable(Algorithms::FoldGather);
if (arser.get<bool>("--fold_mul"))
options->enable(Algorithms::FoldMul);
if (arser.get<bool>("--fold_reshape"))
options->enable(Algorithms::FoldReshape);
if (arser.get<bool>("--fold_shape"))
Expand All @@ -293,6 +300,8 @@ int entry(int argc, char **argv)
options->enable(Algorithms::FuseHorizontalFullyConnected);
if (arser.get<bool>("--fuse_batchnorm_with_conv"))
options->enable(Algorithms::FuseBatchNormWithConv);
if (arser.get<bool>("--fuse_add_to_fullyconnected_bias"))
options->enable(Algorithms::FuseAddToFullyConnectedBias);
if (arser.get<bool>("--fuse_add_with_conv"))
options->enable(Algorithms::FuseAddWithConv);
if (arser.get<bool>("--fuse_add_with_fully_connected"))
Expand All @@ -303,6 +312,8 @@ int entry(int argc, char **argv)
options->enable(Algorithms::FuseBatchNormWithDwConv);
if (arser.get<bool>("--fuse_batchnorm_with_tconv"))
options->enable(Algorithms::FuseBatchNormWithTConv);
if (arser.get<bool>("--fuse_mul_to_fullyconnected_weights"))
options->enable(Algorithms::FuseMulToFullyConnectedWeights);
if (arser.get<bool>("--fuse_slice_with_tconv"))
options->enable(Algorithms::FuseSliceWithTConv);
if (arser.get<bool>("--fuse_bcq"))
Expand Down
8 changes: 7 additions & 1 deletion compiler/luci-pass-value-py-test/test.lst
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@
# Format:
# eval(MODEL PASS)
# MODEL: tflite model file name in build/compiler/common-artifacts folder.
# PASS: Optimization Pass to test. Supports only one Pass for now.
# PASS: Optimization Pass to test. Supports one or Passes.
#

# eval(Net_Preactivation_BN_000 fuse_preactivation_batchnorm) : value diff exist
# --> https://github.com/Samsung/ONE/issues/5782
eval(FullyConnected_007 replace_non_const_fc_with_batch_matmul)
eval(HardSwish_001 decompose_hardswish)
eval(Net_Add_FloorMod_Gather_000 remove_gather_guard)
eval(Net_Add_FullyConnected_000 fuse_add_to_fullyconnected_bias)
eval(Net_Add_FullyConnected_001 fuse_add_to_fullyconnected_bias)
eval(Net_Add_FullyConnected_002 fuse_add_to_fullyconnected_bias)
eval(Net_Conv_Add_000 fuse_add_with_conv)
eval(Net_Conv_Add_001 fuse_add_with_conv)
# eval(Net_Conv_Add_002 fuse_add_with_conv) --> Conv2D w/o bias fails in tflite interpreter
Expand Down Expand Up @@ -40,6 +43,9 @@ eval(Net_Mul_Add_002 remove_unnecessary_add)
eval(Net_Mul_Add_003 remove_unnecessary_add)
eval(Net_Mul_Div_000 fuse_mul_with_div)
eval(Net_Mul_Div_001 fuse_mul_with_div)
eval(Net_Mul_FullyConnected_000 fuse_mul_to_fullyconnected_weights)
eval(Net_Mul_FullyConnected_001 fuse_mul_to_fullyconnected_weights)
eval(Net_Mul_FullyConnected_002 fuse_mul_to_fullyconnected_weights)
eval(Net_Reshape_Mean_000 forward_reshape_to_unaryop)
eval(Net_Reshape_Neg_000 forward_reshape_to_unaryop)
eval(Net_Reshape_Reshape_000 remove_redundant_reshape)
Expand Down
9 changes: 7 additions & 2 deletions compiler/luci-pass-value-py-test/test_luci_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,13 @@ def luci_eval_verify(test_name,
assert np.allclose(
luci_output_data, intp_output_data, rtol=rtolint, atol=atolint), err_msg
elif output_details["dtype"] == np.float32:
assert np.allclose(
luci_output_data, intp_output_data, rtol=rtolf32, atol=atolf32), err_msg
diff_comp = np.allclose(
luci_output_data, intp_output_data, rtol=rtolf32, atol=atolf32)
if not diff_comp:
print("\r\ntflite:\r\n", intp_output_data, flush=True)
print("\r\ncircle:\r\n", luci_output_data, flush=True)
print("\r\nDiff:\r\n", intp_output_data - luci_output_data, flush=True)
assert diff_comp, err_msg
elif output_details["dtype"] == np.int64:
assert np.allclose(
luci_output_data, intp_output_data, rtol=rtolint, atol=atolint), err_msg
Expand Down
3 changes: 3 additions & 0 deletions compiler/one-cmds/how-to-use-one-commands.txt
Original file line number Diff line number Diff line change
Expand Up @@ -160,15 +160,18 @@ Current transformation options are
- fold_dequantize : This removes Dequantize operation which can be folded
- fold_dwconv : This folds Depthwise Convolution operation which can be folded
- fold_gather : This removes Gather operation which can be folded
- fold_mul : This removes Mul operation which can be folded
- fold_shape : This removes Shape operation which can be folded
- fold_sparse_to_dense : This removes SparseToDense operation which can be folded
- forward_reshape_to_unaryop: This will move Reshape after UnaryOp for centain condition
- fuse_add_to_fullyconnected_bias: This fuses Add operator to following FullyConnected operator bias
- fuse_add_with_conv: This fuses Add operator with the preceding Convolution operator if possible
- fuse_add_with_fully_connected: This fuses Add operator with the preceding FullyConnected operator if possible
- fuse_add_with_tconv: This fuses Add operator with the preceding TConv operator if possible
- fuse_batchnorm_with_conv : This fuses BatchNorm operator to convolution operator
- fuse_batchnorm_with_dwconv : This fuses BatchNorm operator to depthwise convolution operator
- fuse_batchnorm_with_tconv : This fuses BatchNorm operator to transpose convolution operator
- fuse_mul_to_fullyconnected_weights : This fuses Mul operator to following FullyConnected operator weights
- fuse_mul_with_conv: This fuses Mul with a preceding Convolution op if possible.
- fuse_mul_with_div: This fuses Mul and Div op as Div.
- fuse_slice_with_tconv: This fuses Slice with a preceding TConv if possible.
Expand Down
8 changes: 8 additions & 0 deletions compiler/one-cmds/onelib/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,22 @@ class CONSTANT:
'fold_dwconv',
'fold_fully_connected',
'fold_gather',
'fold_mul',
'fold_reshape',
'fold_shape',
'fold_sparse_to_dense',
'fold_squeeze',

# Operator fusion
'fuse_add_to_fullyconnected_bias',
'fuse_add_with_conv',
'fuse_add_with_tconv',
'fuse_add_with_fully_connected',
'fuse_batchnorm_with_conv',
'fuse_batchnorm_with_dwconv',
'fuse_batchnorm_with_tconv',
'fuse_activation_function',
'fuse_mul_to_fullyconnected_weights',
'fuse_instnorm',
'fuse_prelu',
'fuse_gelu',
Expand Down Expand Up @@ -104,18 +107,23 @@ class CONSTANT:
('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'),
('fold_fully_connected', 'fold FullyConnected op with constant inputs'),
('fold_gather', 'fold Gather op'),
('fold_mul', 'fold Mul Op'),
('fold_reshape', 'fold Reshape op'),
('fold_shape', 'fold Shape op'),
('fold_sparse_to_dense', 'fold SparseToDense op'),
('fold_squeeze', 'fold Squeeze op'),
('forward_reshape_to_unaryop', 'Forward Reshape op'),
('forward_transpose_op', 'Forward Transpose op'),
('fuse_add_to_fullyconnected_bias',
'Fuse Add op to following FullyConnected op bias'),
('fuse_add_with_conv', 'fuse Add op to Convolution op'),
('fuse_add_with_tconv', 'fuse Add op to Transposed'),
('fuse_add_with_fully_connected', 'fuse Add op to FullyConnected op'),
('fuse_batchnorm_with_conv', 'fuse BatchNorm op to Convolution op'),
('fuse_batchnorm_with_dwconv', 'fuse BatchNorm op to Depthwise Convolution op'),
('fuse_batchnorm_with_tconv', 'fuse BatchNorm op to Transposed Convolution op'),
('fuse_mul_to_fullyconnected_weights',
'fuse Mul op to following FullyConnected op weights'),
('fuse_slice_with_tconv', 'fuse Slice op to Transposed Convolution op'),
('fuse_bcq', 'apply Binary Coded Quantization'),
('fuse_preactivation_batchnorm',
Expand Down

0 comments on commit d57d842

Please sign in to comment.