diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..dc8f507
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,14 @@
+# Use an official PyTorch runtime as a parent image
+FROM pytorch/pytorch
+
+# Set the working directory
+WORKDIR /dnncanyon
+
+# Copy the current directory contents into the container
+COPY ./  /dnncanyon
+
+# Install any needed packages specified in requirements.txt
+RUN pip install -r ./requirements.txt
+
+# Run when the container launches
+CMD ["python", "inference.py"]
\ No newline at end of file
diff --git a/data/.DS_Store b/data/.DS_Store
new file mode 100644
index 0000000..26a53b5
Binary files /dev/null and b/data/.DS_Store differ
diff --git a/data/images/.DS_Store b/data/images/.DS_Store
new file mode 100644
index 0000000..09855cb
Binary files /dev/null and b/data/images/.DS_Store differ
diff --git a/data/images/dog.jpg b/data/images/dog.jpg
new file mode 100644
index 0000000..12f0e0d
Binary files /dev/null and b/data/images/dog.jpg differ
diff --git a/dnn_models/.DS_Store b/dnn_models/.DS_Store
new file mode 100644
index 0000000..d7e35d4
Binary files /dev/null and b/dnn_models/.DS_Store differ
diff --git a/dnn_models/alexnet.py b/dnn_models/alexnet.py
new file mode 100644
index 0000000..2a87dea
--- /dev/null
+++ b/dnn_models/alexnet.py
@@ -0,0 +1,54 @@
+import torch
+import torch.nn as nn
+
+PATH = "../models/alexnet-owt-4df8aa71.pth"
+
+
+class AlexNet(nn.Module):
+
+    def __init__(self, num_classes=1000):
+        super(AlexNet, self).__init__()
+        self.features = nn.Sequential(
+            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=3, stride=2),
+
+            nn.Conv2d(64, 192, kernel_size=5, padding=2),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(kernel_size=3, stride=2),
+
+            nn.Conv2d(192, 384, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(384, 256, kernel_size=3, padding=1),
+
+            nn.ReLU(inplace=True),
+            nn.Conv2d(256, 256, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+
+            nn.MaxPool2d(kernel_size=3, stride=2),
+        )
+        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
+        self.classifier = nn.Sequential(
+            nn.Dropout(),
+            nn.Linear(256 * 6 * 6, 4096),
+            nn.ReLU(inplace=True),
+            nn.Dropout(),
+            nn.Linear(4096, 4096),
+            nn.ReLU(inplace=True),
+            nn.Linear(4096, num_classes),
+        )
+
+    def forward(self, x):
+        x = self.features(x)
+        x = self.avgpool(x)
+        x = torch.flatten(x, 1)
+        x = self.classifier(x)
+        return x
+
+
+def alexnet(pretrained=False, progress=True, **kwargs):
+    model = AlexNet(**kwargs)
+    if pretrained:
+        model.load_state_dict(torch.load(PATH))
+
+    return model
diff --git a/dnn_models/darknet_53.py b/dnn_models/darknet_53.py
new file mode 100644
index 0000000..611fa1a
--- /dev/null
+++ b/dnn_models/darknet_53.py
@@ -0,0 +1,117 @@
+import torch
+from torch import nn
+import time
+
+def conv_batch(in_num, out_num, kernel_size=3, padding=1, stride=1):
+    return nn.Sequential(
+        nn.Conv2d(in_num, out_num, kernel_size=kernel_size, stride=stride, padding=padding, bias=False),
+        nn.BatchNorm2d(out_num),
+        nn.LeakyReLU())
+
+
+# Residual block
+class DarkResidualBlock(nn.Module):
+    def __init__(self, in_channels):
+        super(DarkResidualBlock, self).__init__()
+
+        reduced_channels = int(in_channels/2)
+
+        self.layer1 = conv_batch(in_channels, reduced_channels, kernel_size=1, padding=0)
+        self.layer2 = conv_batch(reduced_channels, in_channels)
+
+    def forward(self, x):
+        residual = x
+
+        out = self.layer1(x)
+        out = self.layer2(out)
+        out += residual
+        return out
+
+
+class Darknet53(nn.Module):
+    def __init__(self, block, num_classes):
+        super(Darknet53, self).__init__()
+
+        self.num_classes = num_classes
+
+        self.conv1 = conv_batch(3, 32)
+        self.conv2 = conv_batch(32, 64, stride=2)
+        self.residual_block1 = self.make_layer(block, in_channels=64, num_blocks=1)
+        self.conv3 = conv_batch(64, 128, stride=2)
+        self.residual_block2 = self.make_layer(block, in_channels=128, num_blocks=2)
+        self.conv4 = conv_batch(128, 256, stride=2)
+        self.residual_block3 = self.make_layer(block, in_channels=256, num_blocks=8)
+        self.conv5 = conv_batch(256, 512, stride=2)
+        self.residual_block4 = self.make_layer(block, in_channels=512, num_blocks=8)
+        self.conv6 = conv_batch(512, 1024, stride=2)
+        self.residual_block5 = self.make_layer(block, in_channels=1024, num_blocks=4)
+        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(1024, self.num_classes)
+
+    def forward(self, x):
+        output_size = []
+        start1 = time.time()
+        out = self.conv1(x)
+        end1 = time.time()
+        output_size.append(out.size())
+        start2 = time.time()
+        out = self.conv2(out)
+        end2 = time.time()
+        output_size.append(out.size())
+        start3 = time.time()
+        out = self.residual_block1(out)
+        end3 = time.time()
+        output_size.append(out.size())
+        start4 = time.time()
+        out = self.conv3(out)
+        end4 = time.time()
+        output_size.append(out.size())
+        start5 = time.time()
+        out = self.residual_block2(out)
+        end5 = time.time()
+        output_size.append(out.size())
+        start6 = time.time()
+        out = self.conv4(out)
+        end6 = time.time()
+        output_size.append(out.size())
+        start7 = time.time()
+        out = self.residual_block3(out)
+        end7 = time.time()
+        output_size.append(out.size())
+        start8 = time.time()
+        out = self.conv5(out)
+        end8 = time.time()
+        output_size.append(out.size())
+        start9 = time.time()
+        out = self.residual_block4(out)
+        end9 = time.time()
+        output_size.append(out.size())
+        start10 = time.time()
+        out = self.conv6(out)
+        end10 = time.time()
+        output_size.append(out.size())
+        start11 = time.time()
+        out = self.residual_block5(out)
+        end11 = time.time()
+        output_size.append(out.size())
+        start12 = time.time()
+        out = self.global_avg_pool(out)
+        end12 = time.time()
+        output_size.append(out.size())
+        out = out.view(-1, 1024)
+        start13 = time.time()
+        out = self.fc(out)
+        end13 = time.time()
+        output_size.append(out.size())
+        proc_time = [end1-start1, end2-start2, end3-start3, end4-start4, end5-start5, end6-start6, end7-start7, end8-start8, end9-start9, end10-start10, end11-start11, end12-start12, end13-start13]
+        return out, proc_time, output_size
+
+    def make_layer(self, block, in_channels, num_blocks):
+        layers = []
+        for i in range(0, num_blocks):
+            layers.append(block(in_channels))
+        return nn.Sequential(*layers)
+
+
+def darknet53(num_classes):
+    return Darknet53(DarkResidualBlock, num_classes)
diff --git a/dnn_models/inception_v3.py b/dnn_models/inception_v3.py
new file mode 100644
index 0000000..64f753b
--- /dev/null
+++ b/dnn_models/inception_v3.py
@@ -0,0 +1,440 @@
+from collections import namedtuple
+import warnings
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.jit.annotations import Optional
+from torch import Tensor
+
+
+__all__ = ['Inception3', 'inception_v3', 'InceptionOutputs', '_InceptionOutputs']
+
+PATH = "../models/inception_v3_google-1a9a5a14.pth"
+
+model_urls = {
+    # Inception v3 ported from TensorFlow
+    'inception_v3_google': 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth',
+}
+
+InceptionOutputs = namedtuple('InceptionOutputs', ['logits', 'aux_logits'])
+InceptionOutputs.__annotations__ = {'logits': torch.Tensor, 'aux_logits': Optional[torch.Tensor]}
+
+# Script annotations failed with _GoogleNetOutputs = namedtuple ...
+# _InceptionOutputs set here for backwards compat
+_InceptionOutputs = InceptionOutputs
+
+
+def inception_v3(pretrained=False, progress=True, **kwargs):
+    r"""Inception v3 model architecture from
+    `"Rethinking the Inception Architecture for Computer Vision" <http://arxiv.org/abs/1512.00567>`_.
+
+    .. note::
+        **Important**: In contrast to the other models the inception_v3 expects tensors with a size of
+        N x 3 x 299 x 299, so ensure your images are sized accordingly.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+        aux_logits (bool): If True, add an auxiliary branch that can improve training.
+            Default: *True*
+        transform_input (bool): If True, preprocesses the input according to the method with which it
+            was trained on ImageNet. Default: *False*
+    """
+    if pretrained:
+        if 'transform_input' not in kwargs:
+            kwargs['transform_input'] = True
+        if 'aux_logits' in kwargs:
+            original_aux_logits = kwargs['aux_logits']
+            kwargs['aux_logits'] = True
+        else:
+            original_aux_logits = True
+        kwargs['init_weights'] = False  # we are loading weights from a pretrained model
+        model = Inception3(**kwargs)
+        model.load_state_dict(torch.load(PATH), progress=progress)
+        if not original_aux_logits:
+            model.aux_logits = False
+            del model.AuxLogits
+        return model
+
+    return Inception3(**kwargs)
+
+
+class Inception3(nn.Module):
+
+    def __init__(self, num_classes=1000, aux_logits=True, transform_input=False,
+                 inception_blocks=None, init_weights=None):
+        super(Inception3, self).__init__()
+        if inception_blocks is None:
+            inception_blocks = [
+                BasicConv2d, InceptionA, InceptionB, InceptionC,
+                InceptionD, InceptionE, InceptionAux
+            ]
+        if init_weights is None:
+            warnings.warn('The default weight initialization of inception_v3 will be changed in future releases of '
+                          'torchvision. If you wish to keep the old behavior (which leads to long initialization times'
+                          ' due to scipy/scipy#11299), please set init_weights=True.', FutureWarning)
+            init_weights = True
+        assert len(inception_blocks) == 7
+        conv_block = inception_blocks[0]
+        inception_a = inception_blocks[1]
+        inception_b = inception_blocks[2]
+        inception_c = inception_blocks[3]
+        inception_d = inception_blocks[4]
+        inception_e = inception_blocks[5]
+        inception_aux = inception_blocks[6]
+
+        self.aux_logits = aux_logits
+        self.transform_input = transform_input
+        self.Conv2d_1a_3x3 = conv_block(3, 32, kernel_size=3, stride=2)
+        self.Conv2d_2a_3x3 = conv_block(32, 32, kernel_size=3)
+        self.Conv2d_2b_3x3 = conv_block(32, 64, kernel_size=3, padding=1)
+        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2)
+        self.Conv2d_3b_1x1 = conv_block(64, 80, kernel_size=1)
+        self.Conv2d_4a_3x3 = conv_block(80, 192, kernel_size=3)
+        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2)
+        self.Mixed_5b = inception_a(192, pool_features=32)
+        self.Mixed_5c = inception_a(256, pool_features=64)
+        self.Mixed_5d = inception_a(288, pool_features=64)
+        self.Mixed_6a = inception_b(288)
+        self.Mixed_6b = inception_c(768, channels_7x7=128)
+        self.Mixed_6c = inception_c(768, channels_7x7=160)
+        self.Mixed_6d = inception_c(768, channels_7x7=160)
+        self.Mixed_6e = inception_c(768, channels_7x7=192)
+        if aux_logits:
+            self.AuxLogits = inception_aux(768, num_classes)
+        self.Mixed_7a = inception_d(768)
+        self.Mixed_7b = inception_e(1280)
+        self.Mixed_7c = inception_e(2048)
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.dropout = nn.Dropout()
+        self.fc = nn.Linear(2048, num_classes)
+        if init_weights:
+            for m in self.modules():
+                if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                    import scipy.stats as stats
+                    stddev = m.stddev if hasattr(m, 'stddev') else 0.1
+                    X = stats.truncnorm(-2, 2, scale=stddev)
+                    values = torch.as_tensor(X.rvs(m.weight.numel()), dtype=m.weight.dtype)
+                    values = values.view(m.weight.size())
+                    with torch.no_grad():
+                        m.weight.copy_(values)
+                elif isinstance(m, nn.BatchNorm2d):
+                    nn.init.constant_(m.weight, 1)
+                    nn.init.constant_(m.bias, 0)
+
+    def _transform_input(self, x):
+        if self.transform_input:
+            x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
+            x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
+            x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
+            x = torch.cat((x_ch0, x_ch1, x_ch2), 1)
+        return x
+
+    def _forward(self, x):
+        # N x 3 x 299 x 299
+        x = self.Conv2d_1a_3x3(x)
+        # N x 32 x 149 x 149
+        x = self.Conv2d_2a_3x3(x)
+        # N x 32 x 147 x 147
+        x = self.Conv2d_2b_3x3(x)
+        # N x 64 x 147 x 147
+        x = self.maxpool1(x)
+        # N x 64 x 73 x 73
+        x = self.Conv2d_3b_1x1(x)
+        # N x 80 x 73 x 73
+        x = self.Conv2d_4a_3x3(x)
+        # N x 192 x 71 x 71
+        x = self.maxpool2(x)
+        # N x 192 x 35 x 35
+        x = self.Mixed_5b(x)
+        # N x 256 x 35 x 35
+        x = self.Mixed_5c(x)
+        # N x 288 x 35 x 35
+        x = self.Mixed_5d(x)
+        # N x 288 x 35 x 35
+        x = self.Mixed_6a(x)
+        # N x 768 x 17 x 17
+        x = self.Mixed_6b(x)
+        # N x 768 x 17 x 17
+        x = self.Mixed_6c(x)
+        # N x 768 x 17 x 17
+        x = self.Mixed_6d(x)
+        # N x 768 x 17 x 17
+        x = self.Mixed_6e(x)
+        # N x 768 x 17 x 17
+        aux_defined = self.training and self.aux_logits
+        if aux_defined:
+            aux = self.AuxLogits(x)
+        else:
+            aux = None
+        # N x 768 x 17 x 17
+        x = self.Mixed_7a(x)
+        # N x 1280 x 8 x 8
+        x = self.Mixed_7b(x)
+        # N x 2048 x 8 x 8
+        x = self.Mixed_7c(x)
+        # N x 2048 x 8 x 8
+        # Adaptive average pooling
+        x = self.avgpool(x)
+        # N x 2048 x 1 x 1
+        x = self.dropout(x)
+        # N x 2048 x 1 x 1
+        x = torch.flatten(x, 1)
+        # N x 2048
+        x = self.fc(x)
+        # N x 1000 (num_classes)
+        return x, aux
+
+    @torch.jit.unused
+    def eager_outputs(self, x, aux):
+        # type: (Tensor, Optional[Tensor]) -> InceptionOutputs
+        if self.training and self.aux_logits:
+            return InceptionOutputs(x, aux)
+        else:
+            return x
+
+    def forward(self, x):
+        x = self._transform_input(x)
+        x, aux = self._forward(x)
+        aux_defined = self.training and self.aux_logits
+        if torch.jit.is_scripting():
+            if not aux_defined:
+                warnings.warn("Scripted Inception3 always returns Inception3 Tuple")
+            return InceptionOutputs(x, aux)
+        else:
+            return self.eager_outputs(x, aux)
+
+
+class InceptionA(nn.Module):
+
+    def __init__(self, in_channels, pool_features, conv_block=None):
+        super(InceptionA, self).__init__()
+        if conv_block is None:
+            conv_block = BasicConv2d
+        self.branch1x1 = conv_block(in_channels, 64, kernel_size=1)
+
+        self.branch5x5_1 = conv_block(in_channels, 48, kernel_size=1)
+        self.branch5x5_2 = conv_block(48, 64, kernel_size=5, padding=2)
+
+        self.branch3x3dbl_1 = conv_block(in_channels, 64, kernel_size=1)
+        self.branch3x3dbl_2 = conv_block(64, 96, kernel_size=3, padding=1)
+        self.branch3x3dbl_3 = conv_block(96, 96, kernel_size=3, padding=1)
+
+        self.branch_pool = conv_block(in_channels, pool_features, kernel_size=1)
+
+    def _forward(self, x):
+        branch1x1 = self.branch1x1(x)
+
+        branch5x5 = self.branch5x5_1(x)
+        branch5x5 = self.branch5x5_2(branch5x5)
+
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
+
+        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
+        branch_pool = self.branch_pool(branch_pool)
+
+        outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool]
+        return outputs
+
+    def forward(self, x):
+        outputs = self._forward(x)
+        return torch.cat(outputs, 1)
+
+
+class InceptionB(nn.Module):
+
+    def __init__(self, in_channels, conv_block=None):
+        super(InceptionB, self).__init__()
+        if conv_block is None:
+            conv_block = BasicConv2d
+        self.branch3x3 = conv_block(in_channels, 384, kernel_size=3, stride=2)
+
+        self.branch3x3dbl_1 = conv_block(in_channels, 64, kernel_size=1)
+        self.branch3x3dbl_2 = conv_block(64, 96, kernel_size=3, padding=1)
+        self.branch3x3dbl_3 = conv_block(96, 96, kernel_size=3, stride=2)
+
+    def _forward(self, x):
+        branch3x3 = self.branch3x3(x)
+
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
+
+        branch_pool = F.max_pool2d(x, kernel_size=3, stride=2)
+
+        outputs = [branch3x3, branch3x3dbl, branch_pool]
+        return outputs
+
+    def forward(self, x):
+        outputs = self._forward(x)
+        return torch.cat(outputs, 1)
+
+
+class InceptionC(nn.Module):
+
+    def __init__(self, in_channels, channels_7x7, conv_block=None):
+        super(InceptionC, self).__init__()
+        if conv_block is None:
+            conv_block = BasicConv2d
+        self.branch1x1 = conv_block(in_channels, 192, kernel_size=1)
+
+        c7 = channels_7x7
+        self.branch7x7_1 = conv_block(in_channels, c7, kernel_size=1)
+        self.branch7x7_2 = conv_block(c7, c7, kernel_size=(1, 7), padding=(0, 3))
+        self.branch7x7_3 = conv_block(c7, 192, kernel_size=(7, 1), padding=(3, 0))
+
+        self.branch7x7dbl_1 = conv_block(in_channels, c7, kernel_size=1)
+        self.branch7x7dbl_2 = conv_block(c7, c7, kernel_size=(7, 1), padding=(3, 0))
+        self.branch7x7dbl_3 = conv_block(c7, c7, kernel_size=(1, 7), padding=(0, 3))
+        self.branch7x7dbl_4 = conv_block(c7, c7, kernel_size=(7, 1), padding=(3, 0))
+        self.branch7x7dbl_5 = conv_block(c7, 192, kernel_size=(1, 7), padding=(0, 3))
+
+        self.branch_pool = conv_block(in_channels, 192, kernel_size=1)
+
+    def _forward(self, x):
+        branch1x1 = self.branch1x1(x)
+
+        branch7x7 = self.branch7x7_1(x)
+        branch7x7 = self.branch7x7_2(branch7x7)
+        branch7x7 = self.branch7x7_3(branch7x7)
+
+        branch7x7dbl = self.branch7x7dbl_1(x)
+        branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)
+
+        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
+        branch_pool = self.branch_pool(branch_pool)
+
+        outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool]
+        return outputs
+
+    def forward(self, x):
+        outputs = self._forward(x)
+        return torch.cat(outputs, 1)
+
+
+class InceptionD(nn.Module):
+
+    def __init__(self, in_channels, conv_block=None):
+        super(InceptionD, self).__init__()
+        if conv_block is None:
+            conv_block = BasicConv2d
+        self.branch3x3_1 = conv_block(in_channels, 192, kernel_size=1)
+        self.branch3x3_2 = conv_block(192, 320, kernel_size=3, stride=2)
+
+        self.branch7x7x3_1 = conv_block(in_channels, 192, kernel_size=1)
+        self.branch7x7x3_2 = conv_block(192, 192, kernel_size=(1, 7), padding=(0, 3))
+        self.branch7x7x3_3 = conv_block(192, 192, kernel_size=(7, 1), padding=(3, 0))
+        self.branch7x7x3_4 = conv_block(192, 192, kernel_size=3, stride=2)
+
+    def _forward(self, x):
+        branch3x3 = self.branch3x3_1(x)
+        branch3x3 = self.branch3x3_2(branch3x3)
+
+        branch7x7x3 = self.branch7x7x3_1(x)
+        branch7x7x3 = self.branch7x7x3_2(branch7x7x3)
+        branch7x7x3 = self.branch7x7x3_3(branch7x7x3)
+        branch7x7x3 = self.branch7x7x3_4(branch7x7x3)
+
+        branch_pool = F.max_pool2d(x, kernel_size=3, stride=2)
+        outputs = [branch3x3, branch7x7x3, branch_pool]
+        return outputs
+
+    def forward(self, x):
+        outputs = self._forward(x)
+        return torch.cat(outputs, 1)
+
+
+class InceptionE(nn.Module):
+
+    def __init__(self, in_channels, conv_block=None):
+        super(InceptionE, self).__init__()
+        if conv_block is None:
+            conv_block = BasicConv2d
+        self.branch1x1 = conv_block(in_channels, 320, kernel_size=1)
+
+        self.branch3x3_1 = conv_block(in_channels, 384, kernel_size=1)
+        self.branch3x3_2a = conv_block(384, 384, kernel_size=(1, 3), padding=(0, 1))
+        self.branch3x3_2b = conv_block(384, 384, kernel_size=(3, 1), padding=(1, 0))
+
+        self.branch3x3dbl_1 = conv_block(in_channels, 448, kernel_size=1)
+        self.branch3x3dbl_2 = conv_block(448, 384, kernel_size=3, padding=1)
+        self.branch3x3dbl_3a = conv_block(384, 384, kernel_size=(1, 3), padding=(0, 1))
+        self.branch3x3dbl_3b = conv_block(384, 384, kernel_size=(3, 1), padding=(1, 0))
+
+        self.branch_pool = conv_block(in_channels, 192, kernel_size=1)
+
+    def _forward(self, x):
+        branch1x1 = self.branch1x1(x)
+
+        branch3x3 = self.branch3x3_1(x)
+        branch3x3 = [
+            self.branch3x3_2a(branch3x3),
+            self.branch3x3_2b(branch3x3),
+        ]
+        branch3x3 = torch.cat(branch3x3, 1)
+
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = [
+            self.branch3x3dbl_3a(branch3x3dbl),
+            self.branch3x3dbl_3b(branch3x3dbl),
+        ]
+        branch3x3dbl = torch.cat(branch3x3dbl, 1)
+
+        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
+        branch_pool = self.branch_pool(branch_pool)
+
+        outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
+        return outputs
+
+    def forward(self, x):
+        outputs = self._forward(x)
+        return torch.cat(outputs, 1)
+
+
+class InceptionAux(nn.Module):
+
+    def __init__(self, in_channels, num_classes, conv_block=None):
+        super(InceptionAux, self).__init__()
+        if conv_block is None:
+            conv_block = BasicConv2d
+        self.conv0 = conv_block(in_channels, 128, kernel_size=1)
+        self.conv1 = conv_block(128, 768, kernel_size=5)
+        self.conv1.stddev = 0.01
+        self.fc = nn.Linear(768, num_classes)
+        self.fc.stddev = 0.001
+
+    def forward(self, x):
+        # N x 768 x 17 x 17
+        x = F.avg_pool2d(x, kernel_size=5, stride=3)
+        # N x 768 x 5 x 5
+        x = self.conv0(x)
+        # N x 128 x 5 x 5
+        x = self.conv1(x)
+        # N x 768 x 1 x 1
+        # Adaptive average pooling
+        x = F.adaptive_avg_pool2d(x, (1, 1))
+        # N x 768 x 1 x 1
+        x = torch.flatten(x, 1)
+        # N x 768
+        x = self.fc(x)
+        # N x 1000
+        return x
+
+
+class BasicConv2d(nn.Module):
+
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super(BasicConv2d, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
+        self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return F.relu(x, inplace=True)
\ No newline at end of file
diff --git a/dnn_models/inceptionv4.py b/dnn_models/inceptionv4.py
new file mode 100644
index 0000000..2ecd6ea
--- /dev/null
+++ b/dnn_models/inceptionv4.py
@@ -0,0 +1,358 @@
+from __future__ import print_function, division, absolute_import
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.model_zoo as model_zoo
+import os
+import sys
+
+__all__ = ['InceptionV4', 'inceptionv4']
+
+pretrained_settings = {
+    'inceptionv4': {
+        'imagenet': {
+            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth',
+            'input_space': 'RGB',
+            'input_size': [3, 299, 299],
+            'input_range': [0, 1],
+            'mean': [0.5, 0.5, 0.5],
+            'std': [0.5, 0.5, 0.5],
+            'num_classes': 1000
+        },
+        'imagenet+background': {
+            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth',
+            'input_space': 'RGB',
+            'input_size': [3, 299, 299],
+            'input_range': [0, 1],
+            'mean': [0.5, 0.5, 0.5],
+            'std': [0.5, 0.5, 0.5],
+            'num_classes': 1001
+        }
+    }
+}
+
+
+class BasicConv2d(nn.Module):
+
+    def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
+        super(BasicConv2d, self).__init__()
+        self.conv = nn.Conv2d(in_planes, out_planes,
+                              kernel_size=kernel_size, stride=stride,
+                              padding=padding, bias=False) # verify bias false
+        self.bn = nn.BatchNorm2d(out_planes,
+                                 eps=0.001, # value found in tensorflow
+                                 momentum=0.1, # default pytorch value
+                                 affine=True)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
+
+class Mixed_3a(nn.Module):
+
+    def __init__(self):
+        super(Mixed_3a, self).__init__()
+        self.maxpool = nn.MaxPool2d(3, stride=2)
+        self.conv = BasicConv2d(64, 96, kernel_size=3, stride=2)
+
+    def forward(self, x):
+        x0 = self.maxpool(x)
+        x1 = self.conv(x)
+        out = torch.cat((x0, x1), 1)
+        return out
+
+
+class Mixed_4a(nn.Module):
+
+    def __init__(self):
+        super(Mixed_4a, self).__init__()
+
+        self.branch0 = nn.Sequential(
+            BasicConv2d(160, 64, kernel_size=1, stride=1),
+            BasicConv2d(64, 96, kernel_size=3, stride=1)
+        )
+
+        self.branch1 = nn.Sequential(
+            BasicConv2d(160, 64, kernel_size=1, stride=1),
+            BasicConv2d(64, 64, kernel_size=(1,7), stride=1, padding=(0,3)),
+            BasicConv2d(64, 64, kernel_size=(7,1), stride=1, padding=(3,0)),
+            BasicConv2d(64, 96, kernel_size=(3,3), stride=1)
+        )
+
+    def forward(self, x):
+        x0 = self.branch0(x)
+        x1 = self.branch1(x)
+        out = torch.cat((x0, x1), 1)
+        return out
+
+
+class Mixed_5a(nn.Module):
+
+    def __init__(self):
+        super(Mixed_5a, self).__init__()
+        self.conv = BasicConv2d(192, 192, kernel_size=3, stride=2)
+        self.maxpool = nn.MaxPool2d(3, stride=2)
+
+    def forward(self, x):
+        x0 = self.conv(x)
+        x1 = self.maxpool(x)
+        out = torch.cat((x0, x1), 1)
+        return out
+
+
+class Inception_A(nn.Module):
+
+    def __init__(self):
+        super(Inception_A, self).__init__()
+        self.branch0 = BasicConv2d(384, 96, kernel_size=1, stride=1)
+
+        self.branch1 = nn.Sequential(
+            BasicConv2d(384, 64, kernel_size=1, stride=1),
+            BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1)
+        )
+
+        self.branch2 = nn.Sequential(
+            BasicConv2d(384, 64, kernel_size=1, stride=1),
+            BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1),
+            BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1)
+        )
+
+        self.branch3 = nn.Sequential(
+            nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
+            BasicConv2d(384, 96, kernel_size=1, stride=1)
+        )
+
+    def forward(self, x):
+        x0 = self.branch0(x)
+        x1 = self.branch1(x)
+        x2 = self.branch2(x)
+        x3 = self.branch3(x)
+        out = torch.cat((x0, x1, x2, x3), 1)
+        return out
+
+
+class Reduction_A(nn.Module):
+
+    def __init__(self):
+        super(Reduction_A, self).__init__()
+        self.branch0 = BasicConv2d(384, 384, kernel_size=3, stride=2)
+
+        self.branch1 = nn.Sequential(
+            BasicConv2d(384, 192, kernel_size=1, stride=1),
+            BasicConv2d(192, 224, kernel_size=3, stride=1, padding=1),
+            BasicConv2d(224, 256, kernel_size=3, stride=2)
+        )
+
+        self.branch2 = nn.MaxPool2d(3, stride=2)
+
+    def forward(self, x):
+        x0 = self.branch0(x)
+        x1 = self.branch1(x)
+        x2 = self.branch2(x)
+        out = torch.cat((x0, x1, x2), 1)
+        return out
+
+
+class Inception_B(nn.Module):
+
+    def __init__(self):
+        super(Inception_B, self).__init__()
+        self.branch0 = BasicConv2d(1024, 384, kernel_size=1, stride=1)
+
+        self.branch1 = nn.Sequential(
+            BasicConv2d(1024, 192, kernel_size=1, stride=1),
+            BasicConv2d(192, 224, kernel_size=(1,7), stride=1, padding=(0,3)),
+            BasicConv2d(224, 256, kernel_size=(7,1), stride=1, padding=(3,0))
+        )
+
+        self.branch2 = nn.Sequential(
+            BasicConv2d(1024, 192, kernel_size=1, stride=1),
+            BasicConv2d(192, 192, kernel_size=(7,1), stride=1, padding=(3,0)),
+            BasicConv2d(192, 224, kernel_size=(1,7), stride=1, padding=(0,3)),
+            BasicConv2d(224, 224, kernel_size=(7,1), stride=1, padding=(3,0)),
+            BasicConv2d(224, 256, kernel_size=(1,7), stride=1, padding=(0,3))
+        )
+
+        self.branch3 = nn.Sequential(
+            nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
+            BasicConv2d(1024, 128, kernel_size=1, stride=1)
+        )
+
+    def forward(self, x):
+        x0 = self.branch0(x)
+        x1 = self.branch1(x)
+        x2 = self.branch2(x)
+        x3 = self.branch3(x)
+        out = torch.cat((x0, x1, x2, x3), 1)
+        return out
+
+
+class Reduction_B(nn.Module):
+
+    def __init__(self):
+        super(Reduction_B, self).__init__()
+
+        self.branch0 = nn.Sequential(
+            BasicConv2d(1024, 192, kernel_size=1, stride=1),
+            BasicConv2d(192, 192, kernel_size=3, stride=2)
+        )
+
+        self.branch1 = nn.Sequential(
+            BasicConv2d(1024, 256, kernel_size=1, stride=1),
+            BasicConv2d(256, 256, kernel_size=(1,7), stride=1, padding=(0,3)),
+            BasicConv2d(256, 320, kernel_size=(7,1), stride=1, padding=(3,0)),
+            BasicConv2d(320, 320, kernel_size=3, stride=2)
+        )
+
+        self.branch2 = nn.MaxPool2d(3, stride=2)
+
+    def forward(self, x):
+        x0 = self.branch0(x)
+        x1 = self.branch1(x)
+        x2 = self.branch2(x)
+        out = torch.cat((x0, x1, x2), 1)
+        return out
+
+
+class Inception_C(nn.Module):
+
+    def __init__(self):
+        super(Inception_C, self).__init__()
+
+        self.branch0 = BasicConv2d(1536, 256, kernel_size=1, stride=1)
+
+        self.branch1_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1)
+        self.branch1_1a = BasicConv2d(384, 256, kernel_size=(1,3), stride=1, padding=(0,1))
+        self.branch1_1b = BasicConv2d(384, 256, kernel_size=(3,1), stride=1, padding=(1,0))
+
+        self.branch2_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1)
+        self.branch2_1 = BasicConv2d(384, 448, kernel_size=(3,1), stride=1, padding=(1,0))
+        self.branch2_2 = BasicConv2d(448, 512, kernel_size=(1,3), stride=1, padding=(0,1))
+        self.branch2_3a = BasicConv2d(512, 256, kernel_size=(1,3), stride=1, padding=(0,1))
+        self.branch2_3b = BasicConv2d(512, 256, kernel_size=(3,1), stride=1, padding=(1,0))
+
+        self.branch3 = nn.Sequential(
+            nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
+            BasicConv2d(1536, 256, kernel_size=1, stride=1)
+        )
+
+    def forward(self, x):
+        x0 = self.branch0(x)
+
+        x1_0 = self.branch1_0(x)
+        x1_1a = self.branch1_1a(x1_0)
+        x1_1b = self.branch1_1b(x1_0)
+        x1 = torch.cat((x1_1a, x1_1b), 1)
+
+        x2_0 = self.branch2_0(x)
+        x2_1 = self.branch2_1(x2_0)
+        x2_2 = self.branch2_2(x2_1)
+        x2_3a = self.branch2_3a(x2_2)
+        x2_3b = self.branch2_3b(x2_2)
+        x2 = torch.cat((x2_3a, x2_3b), 1)
+
+        x3 = self.branch3(x)
+
+        out = torch.cat((x0, x1, x2, x3), 1)
+        return out
+
+
+class InceptionV4(nn.Module):
+
+    def __init__(self, num_classes=1001):
+        super(InceptionV4, self).__init__()
+        # Special attributs
+        self.input_space = None
+        self.input_size = (299, 299, 3)
+        self.mean = None
+        self.std = None
+        # Modules
+        self.features = nn.Sequential(
+            BasicConv2d(3, 32, kernel_size=3, stride=2),
+            BasicConv2d(32, 32, kernel_size=3, stride=1),
+            BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1),
+            Mixed_3a(),
+            Mixed_4a(),
+            Mixed_5a(),
+            Inception_A(),
+            Inception_A(),
+            Inception_A(),
+            Inception_A(),
+            Reduction_A(), # Mixed_6a
+            Inception_B(),
+            Inception_B(),
+            Inception_B(),
+            Inception_B(),
+            Inception_B(),
+            Inception_B(),
+            Inception_B(),
+            Reduction_B(), # Mixed_7a
+            Inception_C(),
+            Inception_C(),
+            Inception_C()
+        )
+        self.last_linear = nn.Linear(1536, num_classes)
+
+    def logits(self, features):
+        #Allows image of any size to be processed
+        adaptiveAvgPoolWidth = features.shape[2]
+        x = F.avg_pool2d(features, kernel_size=adaptiveAvgPoolWidth)
+        x = x.view(x.size(0), -1)
+        x = self.last_linear(x)
+        return x
+
+    def forward(self, input):
+        x = self.features(input)
+        x = self.logits(x)
+        return x
+
+
+def inceptionv4(num_classes=1000, pretrained='imagenet'):
+    if pretrained:
+        settings = pretrained_settings['inceptionv4'][pretrained]
+        assert num_classes == settings['num_classes'], \
+            "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
+
+        # both 'imagenet'&'imagenet+background' are loaded from same parameters
+        model = InceptionV4(num_classes=1001)
+        model.load_state_dict(model_zoo.load_url(settings['url']))
+
+        if pretrained == 'imagenet':
+            new_last_linear = nn.Linear(1536, 1000)
+            new_last_linear.weight.data = model.last_linear.weight.data[1:]
+            new_last_linear.bias.data = model.last_linear.bias.data[1:]
+            model.last_linear = new_last_linear
+
+        model.input_space = settings['input_space']
+        model.input_size = settings['input_size']
+        model.input_range = settings['input_range']
+        model.mean = settings['mean']
+        model.std = settings['std']
+    else:
+        model = InceptionV4(num_classes=num_classes)
+    return model
+
+
+'''
+TEST
+Run this code with:
+```
+cd $HOME/pretrained-models.pytorch
+python -m pretrainedmodels.inceptionv4
+```
+'''
+if __name__ == '__main__':
+
+    assert inceptionv4(num_classes=10, pretrained=None)
+    print('success')
+    assert inceptionv4(num_classes=1000, pretrained='imagenet')
+    print('success')
+    assert inceptionv4(num_classes=1001, pretrained='imagenet+background')
+    print('success')
+
+    # fail
+    assert inceptionv4(num_classes=1001, pretrained='imagenet')
\ No newline at end of file
diff --git a/dnn_models/karate_club_net.py b/dnn_models/karate_club_net.py
new file mode 100644
index 0000000..9306664
--- /dev/null
+++ b/dnn_models/karate_club_net.py
@@ -0,0 +1,44 @@
+import numpy as np
+import networkx as nx
+import matplotlib.pyplot as plt
+import torch.nn as nn
+import dgl
+
+
+def build_karate_club_graph():
+    # All 78 edges are stored in two numpy arrays. One for source endpoints
+    # while the other for destination endpoints.
+    src = np.array([1, 2, 2, 3, 3, 3, 4, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 10, 10,
+                    10, 11, 12, 12, 13, 13, 13, 13, 16, 16, 17, 17, 19, 19, 21, 21,
+                    25, 25, 27, 27, 27, 28, 29, 29, 30, 30, 31, 31, 31, 31, 32, 32,
+                    32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33,
+                    33, 33, 33, 33, 33, 33, 33, 33, 33, 33])
+    dst = np.array([0, 0, 1, 0, 1, 2, 0, 0, 0, 4, 5, 0, 1, 2, 3, 0, 2, 2, 0, 4,
+                    5, 0, 0, 3, 0, 1, 2, 3, 5, 6, 0, 1, 0, 1, 0, 1, 23, 24, 2, 23,
+                    24, 2, 23, 26, 1, 8, 0, 24, 25, 28, 2, 8, 14, 15, 18, 20, 22, 23,
+                    29, 30, 31, 8, 9, 13, 14, 15, 18, 19, 20, 22, 23, 26, 27, 28, 29, 30,
+                    31, 32])
+    # Edges are directional in DGL; Make them bi-directional.
+    u = np.concatenate([src, dst])
+    v = np.concatenate([dst, src])
+    # Construct a DGLGraph
+    return dgl.graph((u, v))
+
+
+def assign_features(G):
+    embed = nn.Embedding(34, 5)
+    print(embed)
+    G.ndata['feat'] = embed.weight
+
+    return G
+
+
+if __name__ == "__main__":
+    G = build_karate_club_graph()
+    print("We have %d nodes." % G.number_of_nodes())
+    print("We have %d egdes." % G.number_of_edges())
+    nx_G = G.to_networkx().to_undirected()
+    pos = nx.kamada_kawai_layout(nx_G)
+    nx.draw(nx_G, pos, with_labels=True, node_color=[[.7, .8, .9]])
+    plt.show()
+
diff --git a/dnn_models/mynet.py b/dnn_models/mynet.py
new file mode 100644
index 0000000..920d64a
--- /dev/null
+++ b/dnn_models/mynet.py
@@ -0,0 +1,34 @@
+import torch
+import torch.nn as nn
+
+class MyNet(nn.Module):
+
+    def __init__(self):
+        super(MyNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=8, stride=4, padding=2)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv2d(64, 192, kernel_size=4, stride=4, padding=2)
+        self.pool1 = nn.MaxPool2d(kernel_size=6, stride=3, padding=0)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.relu(x)
+        y1 = self.conv2(x)
+        y1 = self.relu(y1)
+        y2 = self.conv2(x)
+        y = y1 + y2
+        # x = self.conv1(x)
+        # x = self.relu(x)
+        # x = self.conv2(x)
+        # x = self.pool1(x)
+
+        return y
+
+
+if __name__ == "__main__":
+    net = MyNet()
+    net.eval()
+
+    input = torch.randn(1, 3, 224, 224)
+    output = net(input)
+    print(output)
\ No newline at end of file
diff --git a/dnn_models/resnet.py b/dnn_models/resnet.py
new file mode 100644
index 0000000..b0fbd49
--- /dev/null
+++ b/dnn_models/resnet.py
@@ -0,0 +1,353 @@
+import torch
+import torch.nn as nn
+
+
+__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+           'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
+           'wide_resnet50_2', 'wide_resnet101_2']
+
+PATH = "../models/resnet34-333f7ec4.pth"
+
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+    'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
+    'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
+    'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
+    'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
+}
+
+
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=dilation, groups=groups, bias=False, dilation=dilation)
+
+
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(BasicBlock, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = norm_layer(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out += identity
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
+    # while original implementation places the stride at the first 1x1 convolution(self.conv1)
+    # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
+    # This variant is also known as ResNet V1.5 and improves accuracy according to
+    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
+
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(Bottleneck, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        width = int(planes * (base_width / 64.)) * groups
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = norm_layer(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = norm_layer(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out += identity
+        out = self.relu(out)
+
+        return out
+
+
+class ResNet(nn.Module):
+
+    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
+                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
+                 norm_layer=None):
+        super(ResNet, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+
+        self.inplanes = 64
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError("replace_stride_with_dilation should be None "
+                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
+                                       dilate=replace_stride_with_dilation[0])
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                       dilate=replace_stride_with_dilation[1])
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+                                       dilate=replace_stride_with_dilation[2])
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+                elif isinstance(m, BasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+                            self.base_width, previous_dilation, norm_layer))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=self.groups,
+                                base_width=self.base_width, dilation=self.dilation,
+                                norm_layer=norm_layer))
+
+        return nn.Sequential(*layers)
+
+    def _forward_impl(self, x):
+        # See note [TorchScript super()]
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = self.avgpool(x)
+        x = torch.flatten(x, 1)
+        x = self.fc(x)
+
+        return x
+
+    def forward(self, x):
+        return self._forward_impl(x)
+
+
+def _resnet(arch, block, layers, pretrained, progress, **kwargs):
+    model = ResNet(block, layers, **kwargs)
+    if pretrained:
+        state_dict = model.load_state_dict(torch.load(PATH),
+                                              progress=progress)
+        model.load_state_dict(state_dict)
+    return model
+
+
+def resnet18(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-18 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
+                   **kwargs)
+
+
+def resnet34(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-34 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
+                   **kwargs)
+
+
+def resnet50(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-50 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
+                   **kwargs)
+
+
+def resnet101(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-101 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
+                   **kwargs)
+
+
+def resnet152(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-152 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
+                   **kwargs)
+
+
+def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
+    r"""ResNeXt-50 32x4d model from
+    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 4
+    return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
+                   pretrained, progress, **kwargs)
+
+
+def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
+    r"""ResNeXt-101 32x8d model from
+    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 8
+    return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
+                   pretrained, progress, **kwargs)
+
+
+def wide_resnet50_2(pretrained=False, progress=True, **kwargs):
+    r"""Wide ResNet-50-2 model from
+    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
+
+    The model is the same as ResNet except for the bottleneck number of channels
+    which is twice larger in every block. The number of channels in outer 1x1
+    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
+    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['width_per_group'] = 64 * 2
+    return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
+                   pretrained, progress, **kwargs)
+
+
+def wide_resnet101_2(pretrained=False, progress=True, **kwargs):
+    r"""Wide ResNet-101-2 model from
+    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
+
+    The model is the same as ResNet except for the bottleneck number of channels
+    which is twice larger in every block. The number of channels in outer 1x1
+    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
+    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['width_per_group'] = 64 * 2
+    return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
+                   pretrained, progress, **kwargs)
\ No newline at end of file
diff --git a/dnn_models/simple_gcn.py b/dnn_models/simple_gcn.py
new file mode 100644
index 0000000..1bfa94b
--- /dev/null
+++ b/dnn_models/simple_gcn.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+from dgl.nn.pytorch import GraphConv
+
+
+class GCN(nn.Module):
+    def __init__(self, in_feats, hidden_size, num_classes):
+        super(GCN, self).__init__()
+        self.conv1 = GraphConv(in_feats, hidden_size)
+        self.conv2 = GraphConv(hidden_size, num_classes)
+
+    def forward(self, g, inputs):
+        h = self.conv1(g, inputs)
+        h = torch.relu(h)
+        h = self.conv2(g, h)
+
+        return h
\ No newline at end of file
diff --git a/dnn_split/.DS_Store b/dnn_split/.DS_Store
new file mode 100644
index 0000000..db6d5fb
Binary files /dev/null and b/dnn_split/.DS_Store differ
diff --git a/dnn_split/comm_util.py b/dnn_split/comm_util.py
new file mode 100644
index 0000000..63b85fe
--- /dev/null
+++ b/dnn_split/comm_util.py
@@ -0,0 +1,130 @@
+import os
+import queue
+import socket
+import struct
+import pickle
+import threading
+
+
+def send_model(model_path):
+    host = "127.0.0.1"
+    port = 50000
+
+    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    s.setblocking(1)
+    s.connect((host, port))
+
+    if os.path.isfile(model_path):
+        model_head = struct.pack('128sl', os.path.basename(model_path).encode('utf-8'), os.stat(model_path).st_size)
+        s.sendall(model_head)
+        f = open(model_path, 'rb')
+        print("file opened")
+        raw = f.read()
+        s.sendall(raw)
+        f.close()
+    else:
+        print("Wrong path.")
+    s.close()
+    # s.sendall(model_name.encode('utf-8'))
+    # s.sendall(os.stat(model_path+model_name).st_size.to_bytes(length=8, byteorder='big'))
+
+
+def recv_model(model_dir, host, port):
+    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    s.setblocking(0)
+    s.bind((host, port))
+    s.listen()
+    print("listening to connection")
+    while True:
+        conn, addr = s.accept()
+        print("connected by ", addr)
+        model_info_size = struct.calcsize('128sl')
+        buf = conn.recv(model_info_size)
+        if buf:
+            model_name, model_size = struct.unpack('128sl', buf)
+            fn = model_name.decode('utf-8').strip('\00')
+            new_model_path = os.path.join(model_dir + fn)
+            if model_size == 0:
+                continue
+            print("model_name:", fn)
+            f = open(new_model_path, 'wb')
+            while True:
+                data = conn.recv(model_size)
+                if not data:
+                    break
+                f.write(data)
+            f.close()
+        else:
+            continue
+    s.close()
+    # model_name = conn.recv(1024).decode('utf-8')
+    # model_size = int.from_bytes(conn.recv(8), byteorder='big')
+
+
+def send_data(data, host, port):
+    # host = "127.0.0.1"
+    # port = 50001
+
+    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    s.setblocking(1)
+    s.connect((host, port))
+    data_obj = pickle.dumps(data)
+    s.sendall(len(data_obj).to_bytes(length=8, byteorder='big'))
+    s.sendall(data_obj)
+    s.close()
+    # s.sendall(model_name.encode('utf-8'))
+    # s.sendall(os.stat(model_path+model_name).st_size.to_bytes(length=8, byteorder='big'))
+
+
+def producer(conn, q):
+    size = int.from_bytes(conn.recv(8), byteorder='big')
+    data_obj = conn.recv(size)
+    data = pickle.loads(data_obj)
+    conn.close()
+    q.put(item=data, block=False, timeout=10)
+    print("I put it into the queue: ", list(q.queue))
+
+
+def recv_data(q):
+    host = "127.0.0.1"
+    port = 50001
+    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    s.setblocking(1)
+    s.bind((host, port))
+    s.listen()
+    print("listening to connection")
+    while True:
+        conn, addr = s.accept()
+        print("connected by ", addr)
+        producer_thread = threading.Thread(target=producer, args=(conn, q))
+        producer_thread.start()
+
+
+def recv_data_once():
+    host = "10.5.27.51"
+    port = 50002
+    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    s.bind((host, port))
+    s.listen()
+    conn, addr = s.accept()
+    print("connected by ", addr)
+    size = int.from_bytes(conn.recv(8), byteorder='big')
+    data_obj = conn.recv(size)
+    data = pickle.loads(data_obj)
+    return data
+
+
+if __name__ == '__main__':
+    q = queue.Queue(1000)
+    recv_thread = threading.Thread(target=recv_data, args=(q))
+    recv_thread.start()
+    while True:
+        try:
+            value = q.get(block=True, timeout=5)
+            print("the value is: ", value)
+        except queue.Empty:
+            print("empty queue")
+    # recv_model("../data/models/", "127.0.0.1", 50000)
+
+
+
diff --git a/dnn_split/ftp_util.py b/dnn_split/ftp_util.py
new file mode 100644
index 0000000..64185e3
--- /dev/null
+++ b/dnn_split/ftp_util.py
@@ -0,0 +1,151 @@
+from dnn_split.model_util import *
+import torch
+import torch.nn as nn
+import numpy as np
+from dataclasses import dataclass
+
+@dataclass
+class TileRegion:
+    """
+    define the coordination of a feature map
+    (top_left_x, top_left_y) represents the top left coordination
+    (bottom_right_x, bottom_right_y) represents the bottom right coordination
+    """
+    top_left_x: int
+    top_left_y: int
+    bottom_right_x: int
+    bottom_right_y: int
+
+@dataclass
+class NetPara:
+    """
+    define the net para of each layer
+    (stride, kernel_size, padding): filter para of each layer
+    type: convolution or pooling
+    input_width: width of the input feature maps of each layer
+    input_height: height of the input feature maps of each layer
+    """
+    stride: int
+    kernel_size: int
+    padding: int
+    type: str
+    input_width: int
+    input_height: int
+
+@dataclass
+class FtpPara:
+    """
+    define the para for FTP algorithm
+    partitions_w: the number of slices divided from width
+    partitions_h: the number of slices divided from height
+    fused_layers: the number of layers that need to be partitioned by FTP algo
+    task_id: id for each partition
+    input_tiles: TileRegion info of each partition of the input feature maps
+    output_tiles: TileRegion info of each partition of the output feature maps
+    """
+    partitions_w: int
+    partitions_h: int
+    fused_layers: int
+    task_id: int
+    input_tiles: TileRegion
+    output_tiles: TileRegion
+
+class ModelInterpreter(nn.Module):
+    """ Interpret the model layer by layer
+    Retrieve the parameters of each layer (convolution or pooling) for the DNN model, including:
+    1. feature map size
+    2. kernel size
+    3. stride
+    4. padding
+    5. layer type: convolution or pooling
+    """
+    def __init__(self, model):
+        super(ModelInterpreter, self).__init__()
+        self.layers = get_all_layers(model)        # get each layer of the DNN model
+        self.x_train = nn.ModuleList(self.layers)
+
+    def forward(self, x):
+        x_size = []
+        x_kernel_size = []
+        x_stride = []
+        x_padding = []
+        x_type = []
+
+        for i in range(len(self.layers)):       # loop over all the layers
+            # forward layer by layer
+            x = self.layers[i](x)
+
+            # add flatten after AvgPool
+            if isinstance(self.layers[i], nn.AdaptiveAvgPool2d):
+                x = torch.flatten(x, 1)
+
+            # get kernel size of the current layer
+            if hasattr(self.layers[i], 'kernel_size'):
+                x_kernel_size.append(self.layers[i].kernel_size)
+            else:
+                continue
+
+            # get stride of the current layer
+            if hasattr(self.layers[i], 'stride'):
+                x_stride.append(self.layers[i].stride)
+            else:
+                continue
+
+            # get padding of the current layer
+            if hasattr(self.layers[i], 'padding'):
+                x_padding.append(self.layers[i].padding)
+            else:
+                continue
+
+            # get layer type of the current layer
+            if isinstance(self.layers[i], nn.Conv2d):
+                x_type.append("convolution")
+            elif isinstance(self.layers[i], nn.MaxPool2d) or isinstance(self.layers[i], nn.MinPool2d):
+                x_type.append("pooling")
+            else:
+                continue
+
+            # get output feature map size of all dimensions except the batch dimension (channel, width, height)
+            x_size.append(x.size()[1:])
+
+        return x_size, x_kernel_size, x_stride, x_padding, x_type
+
+
+def load_dnn_model(input_size, layer_size, layer_kernel_size, layer_stride, layer_padding, layer_type):
+    """ load dnn model and retrieve relevant parameters
+    Args:
+        input_size: the input image size (channel, height, width)
+        layer_size: output feature map size of each layer (convolution or pooling layer)
+        layer_kernel_size: kernel size para of each layer (convolution or pooling layer)
+        layer_stride: stride para of each layer (convolution or pooling layer)
+        layer_padding: padding para of each layer (convolution or pooling layer)
+        layer_type: type of each layer (convolution or pooling layer)
+
+    Returns:
+        net_para: necessary parameters of each layer for subsequent FTP calculation, including:
+        1. stride
+        2. kernel size
+        3. padding
+        4. type
+        5. input_width
+        6. output_width
+    """
+    net_para = [[0] for _ in range(len(layer_size))]
+    input_width = [[0] for _ in range(len(layer_size))]
+    input_height = [[0] for _ in range(len(layer_size))]
+    for i in range(len(layer_size)):
+        # assign the relevant para of the input maps for each layer
+        if i == 0:
+            input_width[i] = input_size[2]
+            input_height[i] = input_size[1]
+        else:
+            input_width[i] = layer_size[i-1][2]
+            input_height[i] = layer_size[i-1][1]
+
+        # calculate the net_para
+        if np.array(layer_stride[i]).size == 2:
+            net_para[i] = NetPara(layer_stride[i][0], layer_kernel_size[i][0], layer_padding[i][0], layer_type[i], input_width[i], input_height[i])
+        else:
+            net_para[i] = NetPara(layer_stride[i], layer_kernel_size[i], layer_padding[i], layer_type[i], input_width[i], input_height[i])
+
+    return net_para
diff --git a/dnn_split/fused_tile_patition.py b/dnn_split/fused_tile_patition.py
new file mode 100644
index 0000000..6ef10b9
--- /dev/null
+++ b/dnn_split/fused_tile_patition.py
@@ -0,0 +1,116 @@
+from dnn_split.ftp_util import *
+import numpy as np
+
+def grid(output_width, output_height, ftp_para, partition_w, partition_h):
+    """calculate the coordination of each partition for the bottom layer
+    Args:
+        output_width: width of the output feature map of the bottom layer
+        output_height: height of the output feature map of the bottom layer
+        ftp_para: initialized ftp para  of the bottom layer updated
+        partition_w: the number of slices divided from width
+        partition_h: the number of slices divided from height
+
+    Returns:
+        ftp_para: ftp para of the bottom layer updated for FTP algorithm
+    """
+    w = output_width
+    h = output_height
+    stride_w = np.ceil(w/partition_w)
+    stride_h = np.ceil(h/partition_h)
+    start_h = 0
+    end_h = stride_h
+
+    for i in range(partition_h):
+        start_w = 0
+        end_w = stride_w
+        if i != 0:
+            start_h = start_h + stride_h
+            end_h = end_h + stride_h
+        for j in range(partition_w):
+            task_id = ftp_para.task_id[i][j]
+            ftp_para.output_tiles[task_id][ftp_para.fused_layers-1].top_left_x = start_w
+            ftp_para.output_tiles[task_id][ftp_para.fused_layers-1].bottom_right_x = end_w
+            ftp_para.output_tiles[task_id][ftp_para.fused_layers-1].top_left_y = start_h
+            ftp_para.output_tiles[task_id][ftp_para.fused_layers-1].bottom_right_y = end_h
+            start_w = end_w
+            if j == partition_w - 1:
+                end_w = w
+            else:
+                end_w = end_w + stride_w
+
+    return ftp_para
+
+def tranversal(net_para, output):
+    """calculate the coordination of the partitioned tile for current layer
+    Args:
+        net_para: net para of the current layer
+        output: TileRegion info of the output partitioned tile for current layer
+
+    Returns:
+        input: TileRegion info of the input partitioned tile for current layer
+    """
+    input = TileRegion(0, 0, 0, 0)
+    stride = net_para.stride
+    kernel_size = net_para.kernel_size
+    padding = net_para.padding
+    input_w = net_para.input_width
+    input_h = net_para.input_height
+
+    # calculate the coordination of the input partitioned tiles for current layer
+    if net_para.type == "convolution" or net_para.type == "pooling":
+        input.top_left_x = output.top_left_x * stride
+        input.top_left_y = output.top_left_y * stride
+        input.bottom_right_x = (output.bottom_right_x - 1) * stride + kernel_size
+        input.bottom_right_y = (output.bottom_right_y - 1) * stride + kernel_size
+
+    # update the coordination of the input partitioned tile considering different situations with padding effect
+    if input.bottom_right_x == input_w + 2 * padding and input.bottom_right_y == input_h + 2 * padding:   # the partitioned tile locates at the bottom right corner of the feature map
+        input.top_left_x = max(0, input.top_left_x - padding)
+        input.top_left_y = max(0, input.top_left_y - padding)
+        input.bottom_right_x = input.bottom_right_x - 2 * padding
+        input.bottom_right_y = input.bottom_right_y - 2 * padding
+    elif input.bottom_right_x == input_w + 2 * padding:        # the partitioned tile locates at the right side of the feature map
+        input.top_left_x = max(0, input.top_left_x - padding)
+        input.top_left_y = max(0, input.top_left_y - padding)
+        input.bottom_right_x = input.bottom_right_x - 2 * padding
+        input.bottom_right_y = input.bottom_right_y - padding
+    elif input.bottom_right_y == input_h + 2 * padding:    # the partitioned tile locates at the down side of the feature map
+        input.top_left_x = max(0, input.top_left_x - padding)
+        input.top_left_y = max(0, input.top_left_y - padding)
+        input.bottom_right_x = input.bottom_right_x - padding
+        input.bottom_right_y = input.bottom_right_y - 2 * padding
+    else:
+        input.top_left_x = max(0, input.top_left_x - padding)
+        input.top_left_y = max(0, input.top_left_y - padding)
+        input.bottom_right_x = max(0, input.bottom_right_x - padding)
+        input.bottom_right_y = max(0, input.bottom_right_y - padding)
+
+    return input
+
+
+def perform_ftp(net_para, ftp_para, output_width, output_height):
+    """perform FTP algorithm
+    Args:
+        net_para: net para of the DNN model
+        ftp_para: initialized para for FTP algorithm
+        output_width: width of the output feature map of the bottom layer
+        output_height: height of the output feature map of the bottom layer
+
+    Returns:
+        ftp_para: updated ftp para, which gives the coordination of each partitioned tile for each layer
+    """
+    id = 0
+    for i in range(ftp_para.partitions_h):
+        for j in range(ftp_para.partitions_w):
+            ftp_para.task_id[i][j] = id
+            id += 1
+
+    grid(output_width, output_height, ftp_para, ftp_para.partitions_w, ftp_para.partitions_h)
+    for i in range(ftp_para.partitions_h):
+        for j in range(ftp_para.partitions_w):
+            for l in range(ftp_para.fused_layers-1, -1, -1):
+                ftp_para.input_tiles[ftp_para.task_id[i][j]][l] = tranversal(net_para[l], ftp_para.output_tiles[ftp_para.task_id[i][j]][l])    #derive the coordination from the bottom layer
+                if l > 0:
+                    ftp_para.output_tiles[ftp_para.task_id[i][j]][l-1] = ftp_para.input_tiles[ftp_para.task_id[i][j]][l]   # assign the input tiles of current layer as the output tiles of the previous layer
+
+    return ftp_para
diff --git a/dnn_split/horizontal_partition.py b/dnn_split/horizontal_partition.py
new file mode 100644
index 0000000..e32430d
--- /dev/null
+++ b/dnn_split/horizontal_partition.py
@@ -0,0 +1,286 @@
+import itertools
+import networkx as nx
+import matplotlib.pyplot as plt
+from collections import OrderedDict
+
+
+# class impl:
+#     def __init__(self, device, edge, cloud):
+#         self.device = device
+#         self.edge = edge
+#         self.cloud = cloud
+#
+# class trans:
+#     def __init__(self, d2e, e2c,d2c):
+#         self.d2e = d2e
+#         self.e2c = e2c
+#         self.d2c = d2c
+
+# def build_graph():
+#     G = nx.DiGraph()
+#     node_list = list(range(11))
+#     G.add_nodes_from(node_list)
+#     for i in range(10):
+#         G.add_edge(i, i+1)
+#
+#     return G
+
+# def build_alex_graph():
+#     G = nx.DiGraph()
+#     node_list = list(range(11))
+#     G.add_nodes_from(node_list)
+#     for i in range(10):
+#         G.add_edge(i, i+1)
+#
+#     G.add_node('input')
+#     G.add_node('output')
+#     G.add_edge('input', 0)
+#     G.add_edge(10, 'output')
+#     G.nodes[0]['attr'] = impl(0.0387906074523925, 0.00161535739898681, 0.000158524800837039)
+#     G.nodes[1]['attr'] = impl(0.00837891101837158, 0.000992012023925781, 0.000104918397590518)
+#     G.nodes[2]['attr'] = impl(0.0532735347747802, 0.000886416435241699, 0.000335372802615165)
+#     G.nodes[3]['attr'] = impl(0.00678062438964843, 0.000534486770629882, 0.0000306591999717056)
+#     G.nodes[4]['attr'] = impl(0.025732421875, 0.00070655345916748, 0.000196169601380825)
+#     G.nodes[5]['attr'] = impl(0.0373493671417236, 0.000864291191101074, 0.000201350398361682)
+#     G.nodes[6]['attr'] = impl(0.0266769647598266, 0.00054333209991455, 0.000142950402200222)
+#     G.nodes[7]['attr'] = impl(0.00391316413879394, 0.000162029266357421, 0.0000364096000790596)
+#     G.nodes[8]['attr'] = impl(0.0905773401260376, 0.00514111518859863, 0.000916819202899932)
+#     G.nodes[9]['attr'] = impl(0.05386643409729, 0.00220365524291992, 0.000424079996347427)
+#     G.nodes[10]['attr'] = impl(0.0138338804244995, 0.000534629821777343, 0.00014015680104494)
+#
+#     d2e = 64.95
+#     e2c = 31.53
+#     d2c = 29.78
+#
+#     output_size_01 = 5.908203125
+#     G.edges[(0, 1)]['attr'] = trans(output_size_01 / d2e, output_size_01 / e2c, output_size_01 / d2c)
+#     output_size_12 = 1.423828125
+#     G.edges[(1, 2)]['attr'] = trans(output_size_12 / d2e, output_size_12 / e2c, output_size_12 / d2c)
+#     output_size_23 = 4.271484375
+#     G.edges[(2, 3)]['attr'] = trans(output_size_23 / d2e, output_size_23 / e2c, output_size_23 / d2c)
+#     output_size_34 = 0.990234375
+#     G.edges[(3, 4)]['attr'] = trans(output_size_34 / d2e, output_size_34 / e2c, output_size_34 / d2c)
+#     output_size_45 = 1.98046875
+#     G.edges[(4, 5)]['attr'] = trans(output_size_45 / d2e, output_size_45 / e2c, output_size_45 / d2c)
+#     output_size_56 = 1.3203125
+#     G.edges[(5, 6)]['attr'] = trans(output_size_56 / d2e, output_size_56 / e2c, output_size_56 / d2c)
+#     output_size_67 = 1.3203125
+#     G.edges[(6, 7)]['attr'] = trans(output_size_67 / d2e, output_size_67 / e2c, output_size_67 / d2c)
+#     output_size_78 = 0.28125
+#     G.edges[(7, 8)]['attr'] = trans(output_size_78 / d2e, output_size_78 / e2c, output_size_78 / d2c)
+#     output_size_89 = 0.125
+#     G.edges[(8, 9)]['attr'] = trans(output_size_89 / d2e, output_size_89 / e2c, output_size_89 / d2c)
+#     output_size_910 = 0.125
+#     G.edges[(9, 10)]['attr'] = trans(output_size_910 / d2e, output_size_910 / e2c, output_size_910 / d2c)
+#
+#     G.nodes['input']['attr'] = 'device'
+#     input_size = 4.59375
+#     G.edges[('input', 0)]['attr'] = trans(input_size / d2e, input_size / e2c, input_size/d2c)
+#     G.edges[(10, 'output')]['attr'] = trans(10000, 10000, 10000)
+#
+#     return G
+
+def longest_path(G):
+    nodes = list(nx.topological_sort(G))
+    source = nodes[0]
+
+    def helper(node):
+        if node == source:
+            return 0
+        preds = list(G.predecessors(node))
+        dist = max([helper(i) + 1 for i in preds])
+        return dist
+
+    path_dict = OrderedDict()
+    for node in nodes:
+        path_dict[node] = helper(node)
+
+    return path_dict
+
+
+def get_layer(G):
+    path_dict = longest_path(G)
+    max_len = path_dict[max(path_dict, key=path_dict.get)]
+    layer_dict = OrderedDict()
+    for layer in range(max_len + 1):
+        layer_item = []
+        for k, v in path_dict.items():
+            if v == layer:
+                layer_item.append(k)
+        layer_dict[layer] = layer_item
+
+    return layer_dict
+
+
+
+def assign_nodes_to_layers(G, layer_dict):
+    nodes = list(nx.topological_sort(G))
+    source = nodes[0]
+
+    def get_subset_input_sibling(node, v):
+        subset = set()
+        siblings = []
+        pred_list = list(G.predecessors(node))
+        for i in range(1, len(pred_list) + 1):
+            data = itertools.combinations(pred_list, i)
+            subset.add(tuple(data))
+
+        for j in v:
+            if j != node:
+                if tuple(G.predecessors(j)) in subset:
+                    siblings.append(j)
+
+        return siblings
+
+
+    # k: layer index, v: list of nodes which belongs to layer k
+    for k, v in layer_dict.items():
+        print("Start partition in layer ", k)
+        for node in v:
+            # if G.nodes[node].get('location') == 'None':
+            pred_list = list(G.predecessors(node))
+            pred_location = []
+
+            for pred in pred_list:
+                pred_location.append(G.nodes[pred].get('location'))
+
+            if 'cloud' in pred_location:
+                last_location = 'cloud'
+            elif 'edge' in pred_location:
+                last_location = 'edge'
+            else:
+                last_location = 'device'
+
+            time_device = 0
+            time_edge = 0
+            time_cloud = 0
+            print('the pred location list is', pred_location)
+            print('the last location is', last_location)
+            if last_location == 'device':
+
+                # put node on device
+                print('pred is', pred)
+                print('node is ', node)
+                time_device = 0 + G.nodes[node].get('attr').device
+                # put node on edge
+                for pred in pred_list:
+                    print('edge trans', G.edges[(pred, node)].get('attr').d2e)
+                    time_edge = time_edge + G.edges[(pred, node)].get('attr').d2e + G.nodes[node].get('attr').edge
+                # put node on cloud
+                for pred in pred_list:
+                    print('cloud trans', G.edges[(pred, node)].get('attr').d2c)
+                    time_cloud = time_cloud + G.edges[(pred, node)].get('attr').d2c + G.nodes[node].get('attr').cloud
+
+                time_list = list([time_device, time_edge, time_cloud])
+                print(time_list)
+                time_min = min(time_list)
+
+                if time_min == time_device:
+                    node_location = 'device'
+                elif time_min == time_edge:
+                    node_location = 'edge'
+                else:
+                    node_location = 'cloud'
+
+            elif last_location == 'edge':
+                # put node on edge
+                for pred in pred_list:
+                    if G.nodes[pred].get('location') == 'device':
+                        time_edge = time_edge + G.edges[(pred, node)].get('attr').d2e + G.nodes[node].get('attr').edge
+                        time_cloud = time_cloud + G.edges[(pred, node)].get('attr').d2c + G.nodes[node].get('attr').cloud
+                    else:
+                        time_edge = time_edge + 0 + G.nodes[node].get('attr').edge
+                        time_cloud = time_cloud + G.edges[(pred, node)].get('attr').e2c + G.nodes[node].get('attr').cloud
+
+                time_list = list([time_edge, time_cloud])
+                time_min = min(time_list)
+
+                if time_min == time_edge:
+                    node_location = 'edge'
+                else:
+                    node_location = 'cloud'
+            else:
+                # for pred in pred_list:
+                #     if G.nodes[pred].get('location') == 'device':
+                #         time_cloud = time_cloud + G.edges[(pred, node)].get('attr').d2c + G.nodes[node].get('attr').cloud
+                #     elif G.nodes[pred].get('location') == 'edge':
+                #         time_cloud = time_cloud + G.edges[(pred, node)].get('attr').e2c + G.nodes[node].get('attr').cloud
+                #     else:
+                #         time_cloud = time_cloud + 0 + G.nodes[node].get('attr').cloud
+                node_location = 'cloud'
+
+            G.nodes[node]['location'] = node_location
+
+            # update subset siblings
+            location_dict = {'device':0, 'edge':1, 'cloud':2}
+            siblings = get_subset_input_sibling(node, v)
+            for sibling in siblings:
+                if G.nodes[sibling].get('location') == None:
+                    G.nodes[sibling]['location'] = node_location
+                else:
+                    if location_dict[G.nodes[sibling].get('location')] < location_dict[node_location]:
+                        G.nodes[sibling]['location'] = node_location
+
+    return G
+
+
+if __name__ == '__main__':
+    None
+    # G = build_graph()
+    #
+    # layer_dict = get_layer(G)
+    # print(layer_dict)
+    #
+    # G.add_node('input')
+    # G.add_node('output')
+    # G.add_edge('input', 0)
+    # G.add_edge(10, 'output')
+    # G.nodes[0]['attr'] = impl(0.03941894, 0.00148439, 0.000245695993)
+    # G.nodes[1]['attr'] = impl(0.00977516, 0.00098157, 0.0000405439995)
+    # G.nodes[2]['attr'] = impl(0.05405164, 0.00077534, 0.000319615990)
+    # G.nodes[3]['attr'] = impl(0.00755548, 0.00052238, 0.0000353920013)
+    # G.nodes[4]['attr'] = impl(0.02662373, 0.00069451, 0.000292640001)
+    # G.nodes[5]['attr'] = impl(0.03826237, 0.00082541, 0.000202368006)
+    # G.nodes[6]['attr'] = impl(0.02726197, 0.00062299, 0.000149023995)
+    # G.nodes[7]['attr'] = impl(0.00411129, 0.00015831, 0.0000356799997)
+    # G.nodes[8]['attr'] = impl(0.09115124, 0.00550461, 0.000915455997)
+    # G.nodes[9]['attr'] = impl(0.05428672, 0.00237727, 0.000427552015)
+    # G.nodes[10]['attr'] = impl(0.01386261, 0.00059962, 0.000106495999)
+    #
+    # output_size_01 = 64 * 55 * 55 * 4 / (1024 * 1024)
+    # G.edges[(0, 1)]['attr'] = trans(output_size_01/10, output_size_01/8, output_size_01/7)
+    # output_size_12 = 64 * 27 * 27 * 4 / (1024 * 1024)
+    # G.edges[(1, 2)]['attr'] = trans(output_size_12/10, output_size_12/8, output_size_12/7)
+    # output_size_23 = 192 * 27 * 27 * 4 / (1024 * 1024)
+    # G.edges[(2, 3)]['attr'] = trans(output_size_23/10, output_size_23/8, output_size_23/7)
+    # output_size_34 = 192 * 13 * 13 * 4 / (1024 * 1024)
+    # G.edges[(3, 4)]['attr'] = trans(output_size_34/10, output_size_34/8, output_size_34/7)
+    # output_size_45 = 384 * 13 * 13 * 4 / (1024 * 1024)
+    # G.edges[(4, 5)]['attr'] = trans(output_size_45 / 10, output_size_45 /8, output_size_45/7)
+    # output_size_56 = 256 * 13 * 13 * 4 / (1024 * 1024)
+    # G.edges[(5, 6)]['attr'] = trans(output_size_56 / 10, output_size_56 /8, output_size_56/7)
+    # output_size_67 = 256 * 13 * 13 * 4 / (1024 * 1024)
+    # G.edges[(6, 7)]['attr'] = trans(output_size_67 / 10, output_size_67 / 8, output_size_67/7)
+    # output_size_78 = 9216 * 1 * 1 * 4 / (1024 * 1024)
+    # G.edges[(7, 8)]['attr'] = trans(output_size_78 / 10, output_size_78 /8, output_size_78/7)
+    # output_size_89 = 4096 * 1 * 1 * 4 / (1024 * 1024)
+    # G.edges[(8, 9)]['attr'] = trans(output_size_89 / 10, output_size_89 / 8, output_size_89/7)
+    # output_size_910 = 4096 * 1 * 1 * 4 / (1024 * 1024)
+    # G.edges[(9, 10)]['attr'] = trans(output_size_910 / 10, output_size_910 / 8, output_size_910/7)
+    #
+    # G.nodes['input']['attr'] = 'device'
+    # input_size = 3 * 224 * 224 * 4 / (1024 * 1024)
+    # G.edges[('input', 0)]['attr'] = trans(input_size / 6, input_size / 3, input_size)
+    # G.edges[(10, 'output')]['attr'] = trans(100, 100, 100)
+    # print(G.edges(data=True))
+    #
+    # G = assign_nodes_to_layers(G, layer_dict)
+    # for node in G.nodes:
+    #     print('Node %s is at %s' % (str(node), G.nodes[node].get('location')))
+    #
+    # pos = nx.spring_layout(G)
+    # labels = nx.get_node_attributes(G, 'location')
+    # nx.draw_networkx_nodes(G, pos=pos)
+    # nx.draw_networkx_labels(G, pos=pos, labels=labels)
+    # nx.draw_networkx_edges(G, pos=pos, arrows=True)
+    # plt.show()
\ No newline at end of file
diff --git a/dnn_split/horizontal_partition_test.py b/dnn_split/horizontal_partition_test.py
new file mode 100644
index 0000000..e3e0e5d
--- /dev/null
+++ b/dnn_split/horizontal_partition_test.py
@@ -0,0 +1,321 @@
+import itertools
+import networkx as nx
+import matplotlib.pyplot as plt
+from collections import OrderedDict
+from dnn_split.horizontal_partition import *
+
+class impl:
+    def __init__(self, device, edge, cloud):
+        self.device = device
+        self.edge = edge
+        self.cloud = cloud
+
+class trans:
+    def __init__(self, d2e, e2c,d2c):
+        self.d2e = d2e
+        self.e2c = e2c
+        self.d2c = d2c
+
+def build_alex_graph(d2e, e2c, d2c):
+    G = nx.DiGraph()
+    node_list = list(range(11))
+    G.add_nodes_from(node_list)
+    for i in range(10):
+        G.add_edge(i, i+1)
+
+    layer_dict = get_layer(G)
+
+    G.add_node('input')
+    G.add_node('output')
+    G.add_edge('input', 0)
+    G.add_edge(10, 'output')
+    G.nodes['input']['attr'] = impl(0, 0, 0)
+    G.nodes[0]['attr'] = impl(0.0387906074523925, 0.00161535739898681, 0.000158524800837039)
+    G.nodes[1]['attr'] = impl(0.00837891101837158, 0.000992012023925781, 0.000104918397590518)
+    G.nodes[2]['attr'] = impl(0.0532735347747802, 0.000886416435241699, 0.000335372802615165)
+    G.nodes[3]['attr'] = impl(0.00678062438964843, 0.000534486770629882, 0.0000306591999717056)
+    G.nodes[4]['attr'] = impl(0.025732421875, 0.00070655345916748, 0.000196169601380825)
+    G.nodes[5]['attr'] = impl(0.0373493671417236, 0.000864291191101074, 0.000201350398361682)
+    G.nodes[6]['attr'] = impl(0.0266769647598266, 0.00054333209991455, 0.000142950402200222)
+    G.nodes[7]['attr'] = impl(0.00391316413879394, 0.000162029266357421, 0.0000364096000790596)
+    G.nodes[8]['attr'] = impl(0.0905773401260376, 0.00514111518859863, 0.000916819202899932)
+    G.nodes[9]['attr'] = impl(0.05386643409729, 0.00220365524291992, 0.000424079996347427)
+    G.nodes[10]['attr'] = impl(0.0138338804244995, 0.000534629821777343, 0.00014015680104494)
+    G.nodes['output']['attr'] = impl(0, 0, 0)
+
+    d2e = d2c
+    e2c = e2c
+    d2c = d2c
+
+    output_size_01 = 5.908203125
+    G.edges[(0, 1)]['attr'] = trans(output_size_01 / d2e, output_size_01 / e2c, output_size_01 / d2c)
+    output_size_12 = 1.423828125
+    G.edges[(1, 2)]['attr'] = trans(output_size_12 / d2e, output_size_12 / e2c, output_size_12 / d2c)
+    output_size_23 = 4.271484375
+    G.edges[(2, 3)]['attr'] = trans(output_size_23 / d2e, output_size_23 / e2c, output_size_23 / d2c)
+    output_size_34 = 0.990234375
+    G.edges[(3, 4)]['attr'] = trans(output_size_34 / d2e, output_size_34 / e2c, output_size_34 / d2c)
+    output_size_45 = 1.98046875
+    G.edges[(4, 5)]['attr'] = trans(output_size_45 / d2e, output_size_45 / e2c, output_size_45 / d2c)
+    output_size_56 = 1.3203125
+    G.edges[(5, 6)]['attr'] = trans(output_size_56 / d2e, output_size_56 / e2c, output_size_56 / d2c)
+    output_size_67 = 1.3203125
+    G.edges[(6, 7)]['attr'] = trans(output_size_67 / d2e, output_size_67 / e2c, output_size_67 / d2c)
+    output_size_78 = 0.28125
+    G.edges[(7, 8)]['attr'] = trans(output_size_78 / d2e, output_size_78 / e2c, output_size_78 / d2c)
+    output_size_89 = 0.125
+    G.edges[(8, 9)]['attr'] = trans(output_size_89 / d2e, output_size_89 / e2c, output_size_89 / d2c)
+    output_size_910 = 0.125
+    G.edges[(9, 10)]['attr'] = trans(output_size_910 / d2e, output_size_910 / e2c, output_size_910 / d2c)
+
+    G.nodes['input']['location'] = 'device'
+    input_size = 4.59375
+    G.edges[('input', 0)]['attr'] = trans(input_size / d2e, input_size / e2c, input_size/d2c)
+    G.edges[(10, 'output')]['attr'] = trans(10000, 10000, 10000)
+
+    return G, layer_dict
+
+
+def build_vgg_graph(d2e, e2c, d2c):
+    G = nx.DiGraph()
+    node_list = list(range(21))
+    G.add_nodes_from(node_list)
+    for i in range(20):
+        G.add_edge(i, i+1)
+
+    layer_dict = get_layer(G)
+
+    G.add_node('input')
+    G.add_node('output')
+    G.add_edge('input', 0)
+    G.add_edge(20, 'output')
+    G.nodes['input']['attr'] = impl(0, 0, 0)
+    G.nodes[0]['attr'] = impl(0.0609938383102417, 0.00338995456695556, 0.0000605106353759765)
+    G.nodes[1]['attr'] = impl(0.592135882377624, 0.00845353603363037, 0.0000553369522094726)
+    G.nodes[2]['attr'] = impl(0.0820929288864135, 0.00880284309387207, 0.0000228643417358398)
+    G.nodes[3]['attr'] = impl(0.226624870300292, 0.00373132228851318, 0.0000540971755981445)
+    G.nodes[4]['attr'] = impl(0.448662877082824, 0.00653448104858398, 0.0000515937805175781)
+    G.nodes[5]['attr'] = impl(0.0405383110046386, 0.00466926097869873, 0.0000332355499267578)
+    G.nodes[6]['attr'] = impl(0.176293754577636, 0.00315544605255126, 0.0000571727752685546)
+    G.nodes[7]['attr'] = impl(0.340849637985229, 0.00554869174957275, 0.0000574827194213867)
+    G.nodes[8]['attr'] = impl(0.334842944145202, 0.00576448440551757, 0.0000536680221557617)
+    G.nodes[9]['attr'] = impl(0.0198351860046386, 0.0023181676864624, 0.0000220775604248046)
+    G.nodes[10]['attr'] = impl(0.150825953483581, 0.0032393455505371, 0.0000629425048828125)
+    G.nodes[11]['attr'] = impl(0.295534491539001, 0.00640218257904052, 0.0000623226165771484)
+    G.nodes[12]['attr'] = impl(0.295758509635925, 0.00628831386566162, 0.0000530242919921875)
+    G.nodes[13]['attr'] = impl(0.0102149486541748, 0.00123481750488281, 0.0000325918197631835)
+    G.nodes[14]['attr'] = impl(0.0862767219543457, 0.00252645015716552, 0.0000523328781127929)
+    G.nodes[15]['attr'] = impl(0.0842627763748169, 0.00222053527832031, 0.0000505447387695312)
+    G.nodes[16]['attr'] = impl(0.0855239391326904, 0.00214335918426513, 0.0000607967376708984)
+    G.nodes[17]['attr'] = impl(0.00350527763366699, 0.000349688529968261, 0.0000220775604248046)
+    G.nodes[18]['attr'] = impl(0.330885338783264, 0.0138950824737548, 0.0000518321990966796)
+    G.nodes[19]['attr'] = impl(0.0538443565368652, 0.00220961570739746, 0.0000421762466430664)
+    G.nodes[20]['attr'] = impl(0.0138441324234008, 0.000549554824829101, 0.000037240982055664)
+    G.nodes['output']['attr'] = impl(0, 0, 0)
+
+    d2e = d2e
+    e2c = e2c
+    d2c = d2c
+
+    output_size_01 = 98
+    G.edges[(0, 1)]['attr'] = trans(output_size_01 / d2e, output_size_01 / e2c, output_size_01 / d2c)
+    output_size_12 = 98
+    G.edges[(1, 2)]['attr'] = trans(output_size_12 / d2e, output_size_12 / e2c, output_size_12 / d2c)
+    output_size_23 = 24.5
+    G.edges[(2, 3)]['attr'] = trans(output_size_23 / d2e, output_size_23 / e2c, output_size_23 / d2c)
+    output_size_34 = 49
+    G.edges[(3, 4)]['attr'] = trans(output_size_34 / d2e, output_size_34 / e2c, output_size_34 / d2c)
+    output_size_45 = 49
+    G.edges[(4, 5)]['attr'] = trans(output_size_45 / d2e, output_size_45 / e2c, output_size_45 / d2c)
+    output_size_56 = 12.25
+    G.edges[(5, 6)]['attr'] = trans(output_size_56 / d2e, output_size_56 / e2c, output_size_56 / d2c)
+    output_size_67 = 24.5
+    G.edges[(6, 7)]['attr'] = trans(output_size_67 / d2e, output_size_67 / e2c, output_size_67 / d2c)
+    output_size_78 = 24.5
+    G.edges[(7, 8)]['attr'] = trans(output_size_78 / d2e, output_size_78 / e2c, output_size_78 / d2c)
+    output_size_89 = 24.5
+    G.edges[(8, 9)]['attr'] = trans(output_size_89 / d2e, output_size_89 / e2c, output_size_89 / d2c)
+    output_size_910 = 6.125
+    G.edges[(9, 10)]['attr'] = trans(output_size_910 / d2e, output_size_910 / e2c, output_size_910 / d2c)
+    output_size_1011 = 12.25
+    G.edges[(10, 11)]['attr'] = trans(output_size_1011 / d2e, output_size_1011 / e2c, output_size_1011 / d2c)
+    output_size_1112 = 12.25
+    G.edges[(11, 12)]['attr'] = trans(output_size_1112 / d2e, output_size_1112 / e2c, output_size_1112 / d2c)
+    output_size_1213 = 12.25
+    G.edges[(12, 13)]['attr'] = trans(output_size_1213 / d2e, output_size_1213 / e2c, output_size_1213 / d2c)
+    output_size_1314 = 3.0625
+    G.edges[(13, 14)]['attr'] = trans(output_size_1314 / d2e, output_size_1314 / e2c, output_size_1314 / d2c)
+    output_size_1415 = 3.0625
+    G.edges[(14, 15)]['attr'] = trans(output_size_1415 / d2e, output_size_1415 / e2c, output_size_1415 / d2c)
+    output_size_1516 = 3.0625
+    G.edges[(15, 16)]['attr'] = trans(output_size_1516 / d2e, output_size_1516 / e2c, output_size_1516 / d2c)
+    output_size_1617 = 3.0625
+    G.edges[(16, 17)]['attr'] = trans(output_size_1617 / d2e, output_size_1617 / e2c, output_size_1617 / d2c)
+    output_size_1718 = 0.765625
+    G.edges[(17, 18)]['attr'] = trans(output_size_1718 / d2e, output_size_1718 / e2c, output_size_1718 / d2c)
+    output_size_1819 = 0.125
+    G.edges[(18, 19)]['attr'] = trans(output_size_1819 / d2e, output_size_1819 / e2c, output_size_1819 / d2c)
+    output_size_1920 = 0.125
+    G.edges[(19, 20)]['attr'] = trans(output_size_1920 / d2e, output_size_1920 / e2c, output_size_1920 / d2c)
+
+    G.nodes['input']['location'] = 'device'
+    input_size = 4.59375
+    G.edges[('input', 0)]['attr'] = trans(input_size / d2e, input_size / e2c, input_size/d2c)
+    G.edges[(20, 'output')]['attr'] = trans(10000, 10000, 10000)
+
+    return G, layer_dict
+
+
+def build_inception_graph(d2e, e2c, d2c):
+    G = nx.DiGraph()
+    node_list = list(range(24))
+    G.add_nodes_from(node_list)
+    for i in range(23):
+        G.add_edge(i, i+1)
+
+    layer_dict = get_layer(G)
+
+    G.add_node('input')
+    G.add_node('output')
+    G.add_edge('input', 0)
+    G.add_edge(23, 'output')
+    G.nodes['input']['attr'] = impl(0, 0, 0)
+    G.nodes[0]['attr'] = impl(0.01985724, 0.000497532, 0.000126123)
+    G.nodes[1]['attr'] = impl(0.042263031, 0.000740719, 0.000103807)
+    G.nodes[2]['attr'] = impl(0.073670936, 0.001400018, 9.36E-05)
+    G.nodes[3]['attr'] = impl(0.078483748, 0.004977775, 0.000189853)
+    G.nodes[4]['attr'] = impl(0.206167006, 0.003538108, 0.002597237)
+    G.nodes[5]['attr'] = impl(0.067958188, 0.003599906, 0.000158358)
+    G.nodes[6]['attr'] = impl(0.124842119, 0.002338004, 0.005271482)
+    G.nodes[7]['attr'] = impl(0.138248348, 0.002236056, 0.003134561)
+    G.nodes[8]['attr'] = impl(0.13957026, 0.002309346, 0.006592107)
+    G.nodes[9]['attr'] = impl(0.138262939, 0.002335835, 0.003136873)
+    G.nodes[10]['attr'] = impl(0.177802181, 0.004854488, 0.003598857)
+    G.nodes[11]['attr'] = impl(0.247489667, 0.003648567, 0.009092951)
+    G.nodes[12]['attr'] = impl(0.273446012, 0.003607512, 0.009114122)
+    G.nodes[13]['attr'] = impl(0.267560506, 0.003571081, 0.008089685)
+    G.nodes[14]['attr'] = impl(0.218984056, 0.003588629, 0.001395845)
+    G.nodes[15]['attr'] = impl(0.300414777, 0.003548193, 0.001451564)
+    G.nodes[16]['attr'] = impl(0.320718908, 0.003593445, 0.001513076)
+    G.nodes[17]['attr'] = impl(0.237069464, 0.003683615, 0.001054978)
+    G.nodes[18]['attr'] = impl(0.108347154, 0.002796745, 0.000617337)
+    G.nodes[19]['attr'] = impl(0.087156343, 0.002951241, 0.001044345)
+    G.nodes[20]['attr'] = impl(0.085878754, 0.002769732, 0.003273726)
+    G.nodes[21]['attr'] = impl(0.083295107, 0.002769542, 0.005679941)
+    G.nodes[22]['attr'] = impl(0.003227234, 1.73E-05, 1.52E-05)
+    G.nodes[23]['attr'] = impl(0.038788509, 0.006604266, 0.006083632)
+    G.nodes['output']['attr'] = impl(0, 0, 0)
+
+    d2e = d2e
+    e2c = e2c
+    d2c = d2c
+
+    output_size_01 = 12.03222656
+    G.edges[(0, 1)]['attr'] = trans(output_size_01 / d2e, output_size_01 / e2c, output_size_01 / d2c)
+    output_size_12 = 11.60253906
+    G.edges[(1, 2)]['attr'] = trans(output_size_12 / d2e, output_size_12 / e2c, output_size_12 / d2c)
+    output_size_23 = 23.20507813
+    G.edges[(2, 3)]['attr'] = trans(output_size_23 / d2e, output_size_23 / e2c, output_size_23 / d2c)
+    output_size_34 = 14.23828125
+    G.edges[(3, 4)]['attr'] = trans(output_size_34 / d2e, output_size_34 / e2c, output_size_34 / d2c)
+    output_size_45 = 15.84375
+    G.edges[(4, 5)]['attr'] = trans(output_size_45 / d2e, output_size_45 / e2c, output_size_45 / d2c)
+    output_size_56 = 7.32421875
+    G.edges[(5, 6)]['attr'] = trans(output_size_56 / d2e, output_size_56 / e2c, output_size_56 / d2c)
+    output_size_67 = 7.32421875
+    G.edges[(6, 7)]['attr'] = trans(output_size_67 / d2e, output_size_67 / e2c, output_size_67 / d2c)
+    output_size_78 = 7.32421875
+    G.edges[(7, 8)]['attr'] = trans(output_size_78 / d2e, output_size_78 / e2c, output_size_78 / d2c)
+    output_size_89 = 7.32421875
+    G.edges[(8, 9)]['attr'] = trans(output_size_89 / d2e, output_size_89 / e2c, output_size_89 / d2c)
+    output_size_910 = 7.32421875
+    G.edges[(9, 10)]['attr'] = trans(output_size_910 / d2e, output_size_910 / e2c, output_size_910 / d2c)
+    output_size_1011 = 4.5
+    G.edges[(10, 11)]['attr'] = trans(output_size_1011 / d2e, output_size_1011 / e2c, output_size_1011 / d2c)
+    output_size_1112 = 4.5
+    G.edges[(11, 12)]['attr'] = trans(output_size_1112 / d2e, output_size_1112 / e2c, output_size_1112 / d2c)
+    output_size_1213 = 4.5
+    G.edges[(12, 13)]['attr'] = trans(output_size_1213 / d2e, output_size_1213 / e2c, output_size_1213 / d2c)
+    output_size_1314 = 4.5
+    G.edges[(13, 14)]['attr'] = trans(output_size_1314 / d2e, output_size_1314 / e2c, output_size_1314 / d2c)
+    output_size_1415 = 4.5
+    G.edges[(14, 15)]['attr'] = trans(output_size_1415 / d2e, output_size_1415 / e2c, output_size_1415 / d2c)
+    output_size_1516 = 4.5
+    G.edges[(15, 16)]['attr'] = trans(output_size_1516 / d2e, output_size_1516 / e2c, output_size_1516 / d2c)
+    output_size_1617 = 4.5
+    G.edges[(16, 17)]['attr'] = trans(output_size_1617 / d2e, output_size_1617 / e2c, output_size_1617 / d2c)
+    output_size_1718 = 4.5
+    G.edges[(17, 18)]['attr'] = trans(output_size_1718 / d2e, output_size_1718 / e2c, output_size_1718 / d2c)
+    output_size_1819 = 1.171875
+    G.edges[(18, 19)]['attr'] = trans(output_size_1819 / d2e, output_size_1819 / e2c, output_size_1819 / d2c)
+    output_size_1920 = 1.171875
+    G.edges[(19, 20)]['attr'] = trans(output_size_1920 / d2e, output_size_1920 / e2c, output_size_1920 / d2c)
+    output_size_2021 = 1.171875
+    G.edges[(20, 21)]['attr'] = trans(output_size_2021 / d2e, output_size_2021 / e2c, output_size_2021 / d2c)
+    output_size_2122 = 1.171875
+    G.edges[(21, 22)]['attr'] = trans(output_size_2122 / d2e, output_size_2122 / e2c, output_size_2122 / d2c)
+    output_size_2223 = 0.046875
+    G.edges[(22, 23)]['attr'] = trans(output_size_2223 / d2e, output_size_2223 / e2c, output_size_2223 / d2c)
+
+    G.nodes['input']['location'] = 'device'
+    input_size = 4.59375
+    G.edges[('input', 0)]['attr'] = trans(input_size / d2e, input_size / e2c, input_size/d2c)
+    G.edges[(23, 'output')]['attr'] = trans(10000, 10000, 10000)
+
+    return G, layer_dict
+
+def calc_latency(G_assigned, d2e, e2c, d2c):
+    pred = 0
+    latency = 0
+    for node in G_assigned.nodes:
+        cur = G_assigned.nodes[node].get('location')
+        if cur != pred:
+            if pred == 'device' and cur == 'edge':
+                latency = latency + G_assigned.edges[(pred, node)].get('attr').d2e / d2e
+            if pred == 'edge' and cur == 'cloud':
+                latency = latency + G_assigned.edges[(pred, node)].get('attr').e2c / e2c
+            if pred == 'device' and cur == 'cloud':
+                latency = latency + G_assigned.edges[(pred, node)].get('attr').d2c / d2c
+
+        if cur == 'device':
+            latency = latency + G_assigned.nodes[node].get('attr').device
+        if cur == 'edge':
+            latency = latency + G_assigned.nodes[node].get('attr').edge
+        if cur == 'cloud':
+            latency = latency + G_assigned.nodes[node].get('attr').cloud
+
+    return latency
+
+if __name__ == '__main__':
+    d2e=64.95
+    e2c=31.53
+    d2c=29.78
+    alex_G, alex_layer_dict = build_alex_graph(d2e, e2c, d2c)
+    print(alex_layer_dict)
+    # print(Alex_G.nodes[0]['attr'].device)
+    # print(alex_G.nodes(data=True))
+    Alex_G_assigned = assign_nodes_to_layers(alex_G, alex_layer_dict)
+    for node in Alex_G_assigned.nodes:
+        print('Node %s is at %s' % (str(node), Alex_G_assigned.nodes[node].get('location')))
+    print('Latency is ', calc_latency(Alex_G_assigned, d2e, e2c, d2c))
+    print('------------------------------------')
+
+    vgg_G, vgg_layer_dict = build_vgg_graph(d2e, e2c, d2c)
+    print(vgg_layer_dict)
+    # print(Alex_G.nodes[0]['attr'].device)
+    # print(alex_G.nodes(data=True))
+    VGG_G_assigned = assign_nodes_to_layers(vgg_G, vgg_layer_dict)
+    for node in VGG_G_assigned.nodes:
+        print('Node %s is at %s' % (str(node), VGG_G_assigned.nodes[node].get('location')))
+    print('Latency is ', calc_latency(VGG_G_assigned, d2e, e2c, d2c))
+    print('------------------------------------')
+
+    inception_G, inception_layer_dict = build_inception_graph(d2e, e2c, d2c)
+    print(inception_layer_dict)
+    # print(Alex_G.nodes[0]['attr'].device)
+    # print(alex_G.nodes(data=True))
+    inception_G_assigned = assign_nodes_to_layers(inception_G, inception_layer_dict)
+    for node in inception_G_assigned.nodes:
+        print('Node %s is at %s' % (str(node), inception_G_assigned.nodes[node].get('location')))
+    print('Latency is ', calc_latency(inception_G_assigned, d2e, e2c, d2c))
+    print('------------------------------------')
\ No newline at end of file
diff --git a/dnn_split/min_cut.py b/dnn_split/min_cut.py
new file mode 100644
index 0000000..26c374e
--- /dev/null
+++ b/dnn_split/min_cut.py
@@ -0,0 +1,100 @@
+class Graph:
+
+    def __init__(self, graph):
+        self.graph = graph  # residual graph
+        self.org_graph = [i[:] for i in graph]
+        self.ROW = len(graph)
+        self.COL = len(graph[0])
+
+    '''Returns true if there is a path from source 's' to sink 't' in 
+    residual graph. Also fills parent[] to store the path '''
+
+    def BFS(self, s, t, parent):
+
+        # Mark all the vertices as not visited
+        visited = [False] * (self.ROW)
+
+        # Create a queue for BFS
+        queue = []
+
+        # Mark the source node as visited and enqueue it
+        queue.append(s)
+        visited[s] = True
+
+        # Standard BFS Loop
+        while queue:
+
+            # Dequeue a vertex from queue and print it
+            u = queue.pop(0)
+
+            # Get all adjacent vertices of the dequeued vertex u
+            # If a adjacent has not been visited, then mark it
+            # visited and enqueue it
+            for ind, val in enumerate(self.graph[u]):
+                if visited[ind] == False and val > 0:
+                    queue.append(ind)
+                    visited[ind] = True
+                    parent[ind] = u
+
+                    # If we reached sink in BFS starting from source, then return
+        # true, else false
+        return True if visited[t] else False
+
+    # Returns the min-cut of the given graph
+    def minCut(self, source, sink):
+
+        # This array is filled by BFS and to store path
+        parent = [-1] * (self.ROW)
+
+        min_cost = 0  # There is no flow initially
+
+        # Augment the flow while there is path from source to sink
+        while self.BFS(source, sink, parent):
+
+            # Find minimum residual capacity of the edges along the
+            # path filled by BFS. Or we can say find the maximum flow
+            # through the path found.
+            path_flow = float("Inf")
+            s = sink
+            while (s != source):
+                path_flow = min(path_flow, self.graph[parent[s]][s])
+                s = parent[s]
+
+                # Add path flow to overall flow
+            min_cost += path_flow
+
+            # update residual capacities of the edges and reverse edges
+            # along the path
+            v = sink
+            while (v != source):
+                u = parent[v]
+                self.graph[u][v] -= path_flow
+                self.graph[v][u] += path_flow
+                v = parent[v]
+
+                # print the edges which initially had weights
+        # but now have 0 weight
+        print("The cutting points are:")
+        for i in range(self.ROW):
+            for j in range(self.COL):
+                if self.graph[i][j] == 0 and self.org_graph[i][j] > 0:
+                    print(str(i) + " - " + str(j))
+        print("The minimum cost is:", min_cost)
+                # Create a graph given in the above diagram
+
+if __name__ == "__main__":
+
+    graph = [[0, 6, 0, 3, 1, 6, 0],
+             [0, 0, 3, 0, 0, 0, 2],
+             [0, 0, 0, float("Inf"), float("Inf"), 0, 0],
+             [0, 0, 0, 0, 0, 10, 7],
+             [0, 0, 0, 0, 0, 12, 8],
+             [0, 0, 0, 0, 0, 0, 16],
+             [0, 0, 0, 0, 0, 0, 0]]
+
+    g = Graph(graph)
+
+    source = 0
+    sink = 6
+
+    g.minCut(source, sink)
\ No newline at end of file
diff --git a/dnn_split/model_canyon.py b/dnn_split/model_canyon.py
new file mode 100644
index 0000000..60db1f6
--- /dev/null
+++ b/dnn_split/model_canyon.py
@@ -0,0 +1,58 @@
+from dnn_split.model_util import *
+import torch.nn.functional as F
+import math
+
+MODEL_PATH = '../models/'
+
+
+class ModelCanyon(nn.Module):
+    def __init__(self, model, start, end):
+        super(ModelCanyon, self).__init__()
+        layers = get_all_layers(model)
+        self.partialLayers = get_partial_layers(layers, start, end)
+        self.x_trains = nn.ModuleList(self.partialLayers)
+
+    def forward(self, x):
+        for i in range(len(self.partialLayers)):
+            x = self.partialLayers[i](x)
+            if isinstance(self.partialLayers[i], nn.AdaptiveAvgPool2d):
+                x = torch.flatten(x, 1)
+        return x
+
+class ModelCanyonG(nn.Module):
+    def __init__(self, model, G):
+        super(ModelCanyonG, self).__init__()
+        self.layers = get_all_layers()
+        print(self.layers)
+
+    def forward(self):
+        None
+
+
+if __name__ == "__main__":
+
+    # input = get_input()
+    # resnet34 = get_pretrained_resnet34()
+    # model_size = get_model_size(resnet34)
+    # print(model_size)
+    # model = ModelCanyon(model=resnet34, start=0, end=model_size-1)
+    # model.eval()
+    # output = model(input)
+
+    startLayer = 0
+    endLayer = 2
+    pretrained_alexnet = get_pretrained_alexnet()
+
+    path = MODEL_PATH + "partialmodel.pth"
+    updatedModel = ModelCanyon(model=pretrained_alexnet, start=startLayer, end=endLayer)
+    if isinstance(updatedModel.partialLayers[0], nn.Conv2d):
+        print(updatedModel.partialLayers[0])
+    print(updatedModel.partialLayers)
+    torch.save(updatedModel, path)
+
+    startLayer = 3
+    endLayer = 20
+    path2 = MODEL_PATH + "partialmodel2.pth"
+    updatedModel2 = ModelCanyon(model=pretrained_alexnet, start=startLayer, end=endLayer)
+    print(updatedModel2.partialLayers)
+    torch.save(updatedModel2, path2)
diff --git a/dnn_split/model_ftp.py b/dnn_split/model_ftp.py
new file mode 100644
index 0000000..5eedbbe
--- /dev/null
+++ b/dnn_split/model_ftp.py
@@ -0,0 +1,163 @@
+from dnn_split.model_util import *
+from dnn_split.ftp_util import *
+import torch.nn.functional as F
+import math
+
+MODEL_PATH = '../models/'
+
+class ModelFTP(nn.Module):
+    """
+    calculate the inference result of the partitioned tile according to its coordinates and partial padding
+    """
+    def __init__(self, model, start, end, coordinate, input_w, input_h):
+        """
+        Args:
+            model: the DNN model
+            start: starting number of the layer that FTP begins from
+            end: ending number of the layer that FTP ends with
+            coordinate: coordinates of the partitioned tile of the start layer
+            input_w: width of the input feature map of the starting layer
+            input_h: height of the input feature map of the starting layer
+        """
+        super(ModelFTP, self).__init__()
+        layers = get_all_layers(model)
+        self.partialLayers = get_partial_layers(layers, start, end)
+        self.x_trains = nn.ModuleList(self.partialLayers)
+        self.x1 = coordinate.top_left_x
+        self.y1 = coordinate.top_left_y
+        self.x2 = coordinate.bottom_right_x
+        self.y2 = coordinate.bottom_right_y
+        self.input_w = input_w
+        self.input_h = input_h
+
+    def cal(self, x, padding):
+        """
+        1. update the top left coordinate of the partitioned tile according to padding info
+        2. padding the partitioned tile according to its top left coordinate
+        """
+        if len(padding) == 2:         # conv layer
+            pad0 = padding[0]
+            pad1 = padding[1]
+        else:                         # pooling layer
+            pad0 = padding
+            pad1 = padding
+
+        if self.x1 == 0:
+            self.x1 = 0
+            x = F.pad(input=x, pad=[pad0, 0, 0, 0], mode='constant', value=0)
+        else:
+            self.x1 = self.x1 + pad0
+
+        if self.y1 == 0:
+            self.y1 = 0
+            x = F.pad(input=x, pad=[0, 0, pad1, 0], mode='constant', value=0)
+        else:
+            self.y1 = self.y1 + pad1
+        return x, self.x1, self.y1
+
+    def forward(self, x):
+        for i in range(len(self.partialLayers)):
+            if isinstance(self.partialLayers[i], nn.Conv2d):       # conv layer
+                in_ch = self.partialLayers[i].in_channels
+                out_ch = self.partialLayers[i].out_channels
+                kernel = self.partialLayers[i].kernel_size
+                stride = self.partialLayers[i].stride
+                padding = self.partialLayers[i].padding
+                weight = self.partialLayers[i].weight
+                bias = self.partialLayers[i].bias
+
+                if padding[0] != 0:
+                    if self.x2 == self.input_w and self.y2 == self.input_h:     # the bottom right coordinates locates at the bottom right corner
+                        x, self.x1, self.y1 = self.cal(x, padding)
+
+                        self.x2 = self.x2 + 2 * padding[0]
+                        self.y2 = self.y2 + 2 * padding[1]
+                        x = F.pad(input=x, pad=[0, padding[0], 0, padding[1]], mode='constant', value=0)   # padding the partitioned tile according to its right bottom coordinate
+                    elif self.x2 == self.input_w:    # the bottom right coordinates locates at the right side of the feature map
+                        x, self.x1, self.y1 = self.cal(x, padding)
+
+                        self.x2 = self.x2 + 2 * padding[0]
+                        self.y2 = self.y2 + padding[1]
+                        x = F.pad(input=x, pad=[0, padding[0], 0, 0], mode='constant', value=0)   # padding the partitioned tile according to its right bottom coordinate
+                    elif self.y2 == self.input_h:      # the bottom right coordinates locates at the down side of the feature map
+                        x, self.x1, self.y1 = self.cal(x, padding)
+
+                        self.x2 = self.x2 + padding[0]
+                        self.y2 = self.y2 + 2 * padding[1]
+                        x = F.pad(input=x, pad=[0, 0, 0, padding[1]], mode='constant', value=0)   # padding the partitioned tile according to its right bottom coordinate
+                    else:                # the bottom right coordinates locates at the middle of the feature map
+                        x, self.x1, self.y1 = self.cal(x, padding)
+
+                        self.x2 = self.x2 + padding[0]
+                        self.y2 = self.y2 + padding[1]
+
+                # calculate the coordinate of next layer
+                self.x1 = math.floor(self.x1 / stride[0])
+                self.y1 = math.floor(self.y1 / stride[1])
+
+                self.x2 = math.floor((self.x2 - kernel[0]) / stride[0] + 1)
+                self.y2 = math.floor((self.y2 - kernel[1]) / stride[1] + 1)
+
+                # calculate the width and height of the input feature map of next layer
+                self.input_h = math.floor((self.input_h - kernel[1] + 2 * padding[1]) / stride[1] + 1)
+                self.input_w = math.floor((self.input_w - kernel[0] + 2 * padding[0]) / stride[0] + 1)
+
+                # change padding of the current layer to 0
+                self.partialLayers[i] = nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=kernel[0],
+                                                  stride=stride[0], padding=0)
+                with torch.no_grad():
+                    self.partialLayers[i].weight = nn.Parameter(weight)
+                    self.partialLayers[i].bias = nn.Parameter(bias)
+
+            if isinstance(self.partialLayers[i], nn.MaxPool2d):        # pooling layer
+                kernel = self.partialLayers[i].kernel_size
+                stride = self.partialLayers[i].stride
+                padding = self.partialLayers[i].padding
+
+                if padding != 0:
+                    if self.x2 == self.input_w and self.y2 == self.input_h:   # the bottom right coordinates locates at the bottom right corner
+                        x, self.x1, self.y1 = self.cal(x, padding)
+
+                        self.x2 = self.x2 + 2 * padding
+                        self.y2 = self.y2 + 2 * padding
+                        x = F.pad(input=x, pad=[0, padding, 0, padding], mode='constant', value=0)   # padding the partitioned tile according to its right bottom coordinate
+                    elif self.x2 == self.input_w:      # the bottom right coordinates locates at the right side of the feature map
+                        x, self.x1, self.y1 = self.cal(x, padding)
+
+                        self.x2 = self.x2 + 2 * padding
+                        self.y2 = self.y2 + padding
+                        x = F.pad(input=x, pad=[0, padding, 0, 0], mode='constant', value=0)    # padding the partitioned tile according to its right bottom coordinate
+                    elif self.y2 == self.input_h:    # the bottom right coordinates locates at the down side of the feature map
+                        x, self.x1, self.y1 = self.cal(x, padding)
+
+                        self.x2 = self.x2 + padding
+                        self.y2 = self.y2 + 2 * padding
+                        x = F.pad(input=x, pad=[0, 0, 0, padding], mode='constant', value=0)    # padding the partitioned tile according to its right bottom coordinate
+                    else:             # the bottom right coordinates locates at the middle of the feature map
+                        x, self.x1, self.y1 = self.cal(x, padding)
+
+                        self.x2 = self.x2 + padding
+                        self.y2 = self.y2 + padding
+
+                # calculate the coordinate of next layer
+                self.x1 = math.floor(self.x1 / stride)
+                self.y1 = math.floor(self.y1 / stride)
+
+                self.x2 = math.floor((self.x2 - kernel) / stride + 1)
+                self.y2 = math.floor((self.y2 - kernel) / stride + 1)
+
+                # calculate the width and height of the input feature map of next layer
+                self.input_h = math.floor((self.input_h - kernel + 2 * padding) / stride + 1)
+                self.input_w = math.floor((self.input_w - kernel + 2 * padding) / stride + 1)
+
+                # change padding of the current layer to 0
+                self.partialLayers[i] = nn.MaxPool2d(kernel_size=kernel, stride=stride, padding=0)
+
+            print("The x after adding padding is", x.size())
+            x = self.partialLayers[i](x)
+            print("The x after processing is", x.size())
+            if isinstance(self.partialLayers[i], nn.AdaptiveAvgPool2d):
+                x = torch.flatten(x, 1)
+
+        print("----------------------------------------------------------------")
+        return x
\ No newline at end of file
diff --git a/dnn_split/model_graph.py b/dnn_split/model_graph.py
new file mode 100644
index 0000000..8ffc416
--- /dev/null
+++ b/dnn_split/model_graph.py
@@ -0,0 +1,222 @@
+import torch
+import networkx as nx
+import numpy as np
+import matplotlib.pyplot as plt
+from collections import OrderedDict
+from graphviz import Digraph
+from torch.autograd import Variable
+import torchvision.models as models
+import torch.nn as nn
+from dnn_split.model_util import get_all_layers
+from dnn_models.mynet import MyNet
+from networkx.drawing.nx_pydot import graphviz_layout
+import time
+
+
+def make_graph(var, params):
+    param_map = {id(v): k for k, v in params.items()}
+    print(param_map)
+    id_counter = 0
+    param_list = []
+
+    node_attr = dict(style='filled',
+                     shape='box',
+                     align='left',
+                     fontsize='12',
+                     ranksep='0.1',
+                     height='0.2')
+
+    dot = Digraph(node_attr=node_attr, graph_attr=dict(size="12,12"))
+    seen = set()
+
+    G = nx.DiGraph()
+    G_compute = nx.DiGraph()
+
+    def size_to_str(size):
+        return '(' + (', ').join(['%d' % v for v in size]) + ')'
+
+    output_nodes = (var.grad_fn,) if not isinstance(var, tuple) else tuple(v.grad_fn for v in var)
+
+    def add_nodes(var):
+        nonlocal id_counter
+        nonlocal param_list
+        if var not in seen:
+            if torch.is_tensor(var):
+                dot.node(str(id(var)), size_to_str(var.size()), fillcolor='orange')
+                G_compute.add_node(str(id(var)), name=param_map.get(id(var)), attr=size_to_str(var.size()))
+
+            elif hasattr(var, 'variable'):
+                u = var.variable
+                print("variable1 ", var)
+                node_name = '%s\n %s' % (param_map.get(id(u)), size_to_str(u.size()))
+                dot.node(str(id(var)), node_name, fillcolor='lightblue')
+
+                G_compute.add_node(str(id(var)), name=param_map.get(id(u)), attr=size_to_str(u.size()))
+                param_list.append(str(id(var)))
+            else:
+                dot.node(str(id(var)), str(type(var).__name__))
+                print(str(var))
+                if str(type(var).__name__) != "TBackward" and str(type(var).__name__) != "ExpandBackward" and str(type(var).__name__) != "ViewBackward":
+                    G.add_node(str(id(var)), id=id_counter, name=str(type(var).__name__))
+                    G_compute.add_node(str(id(var)), id=id_counter, name=str(type(var).__name__))
+                    id_counter = id_counter + 1
+
+                else:
+                    G_compute.add_node(str(id(var)), name=str(type(var).__name__))
+
+            seen.add(var)
+
+
+            if hasattr(var, 'next_functions'):
+                # print(var.next_functions)
+                for u in var.next_functions:
+                    if u[0] is not None:
+                        dot.edge(str(id(u[0])), str(id(var)))
+                        if str(type(u[0]).__name__) != "AccumulateGrad" and str(type(u[0]).__name__) != "TBackward" and str(type(u[0]).__name__) != "ExpandBackward":
+                            G.add_edge(str(id(u[0])), str(id(var)))
+                        G_compute.add_edge(str(id(u[0])), str(id(var)))
+                        add_nodes(u[0])
+
+            if hasattr(var, 'saved_tensors'):
+                for t in var.saved_tensors:
+                    dot.edge(str(id(t)), str(id(var)))
+                    G_compute.add_edge(str(id(t)), str(id(var)))
+                    add_nodes(t)
+
+    add_nodes(var.grad_fn)
+    return dot, G, G_compute, param_list
+
+
+def remove_empty_nodes(G):
+    node_removal = []
+    for node in G.nodes():
+        if G.nodes[node] == {}:
+            node_removal.append(node)
+    for node in node_removal:
+        parent = next(G.predecessors(node))
+        G = nx.contracted_nodes(G, parent, node)
+
+    return G
+
+
+def make_summray(model):
+    summary = OrderedDict()
+    for layer in model.named_children():
+        layer_name = layer[0]
+        layer_func = layer[1]
+        summary[layer_name] = layer_func
+
+    return summary
+
+
+def assign_func(G, G_compute, summary, param_dict):
+    # Traverse the graph
+    # roots = [n for n, d in G.in_degree() if d == 0]
+    # tree = nx.bfs_tree(G, source=roots[0], reverse=False)
+    # nodes = [roots[0]] + [v for u, v in tree.edges()]
+    nodes = list(nx.topological_sort(G))
+
+    for node in nodes:
+        node_name = G.nodes[node].get('name')
+        if node_name == 'MkldnnConvolutionBackward':
+            type = 'conv'
+            pred_id = list(G_compute.predecessors(node))
+            print("pred ",pred_id)
+            for id in pred_id:
+                if id in param_dict:
+                    type = G_compute.nodes[id].get('name').split(".")[0].split("'")[0]
+                    print("type is", type)
+                    break
+            func = summary.get(type)
+            G.nodes[node]['func'] = func
+        elif node_name == 'MaxPool2DWithIndicesBackward':
+            type = 'maxpool2d'
+            pred_id = list(G_compute.predecessors(node))
+            print("pred ", pred_id)
+            for id in pred_id:
+                if id in param_dict:
+                    type = G_compute.nodes[id].get('name').split(".")[0].split("'")[0]
+                    print("type is", type)
+                    break
+            func = summary.get(type)
+            G.nodes[node]['func'] = func
+        elif node_name == 'ReluBackward1':
+            type = 'relu'
+            func = summary.get(type)
+            G.nodes[node]['func'] = func
+        elif node_name == 'AddBackward0':
+            type = 'sum'
+            func = np.sum
+            G.nodes[node]['func'] = func
+        elif node_name == 'MulBackward0':
+            type = 'product'
+            func = np.prod
+            G.nodes[node]['func'] = func
+        elif node_name == 'DivBackward0':
+            type = 'division'
+            func = np.divide
+            G.nodes[node]['func'] = func
+
+    return G
+
+
+def make_forward(G, G_compute, x):
+    # Traverse the graph
+    roots = [n for n, d in G.in_degree() if d == 0]
+    tree = nx.bfs_tree(G, source=roots[0], reverse=False)
+    nodes = [roots[0]] + [v for u, v in tree.edges()]
+
+    for node in nodes:
+        func = G.nodes[node].get('func')
+        if func != None:
+            pred_id = list(G.predecessors(node))
+            if len(pred_id) == 0:
+                res = func(x)
+                G.nodes[node]['output'] = res
+            elif len(pred_id) == 1:
+                pred_res = G.nodes[pred_id[0]].get('output')
+                res = func(pred_res)
+                G.nodes[node]['output'] = res
+            else:
+                pred_res = []
+                for id in pred_id:
+                    pred_res.append(G.nodes[id].get('output'))
+                res = func(pred_res)
+                G.nodes[node]['output'] = res
+
+    return G.nodes[nodes[len(nodes)-1]].get('output')
+
+
+if __name__ == "__main__":
+    inputs = torch.randn(1, 3, 224, 224)
+    net = MyNet()
+    net.eval()
+    for i in range(10):
+        start = time.time()
+        res1 = net(inputs)
+        end = time.time()
+        print("time original is ", end - start)
+    print("original result is ", res1)
+    # print(get_all_layers(resnet18))
+    y = net(Variable(inputs))
+    dot, G, G_compute, param_list = make_graph(y, params=dict(net.named_parameters()))
+    dot.view(filename="mynet", directory="../models/")
+    G = remove_empty_nodes(G)
+
+    summary = make_summray(net)
+
+    G = assign_func(G, G_compute, summary, param_list)
+    print("Graph is ", G.nodes(data=True))
+    for i in range(10):
+        start = time.time()
+        res2 = make_forward(G, G_compute, inputs)
+        end = time.time()
+        print("time static graph is ", end - start)
+    print("static graph result is ", res2)
+
+    labels = nx.get_node_attributes(G, 'name')
+    pos = nx.spring_layout(G)
+    nx.draw_networkx_nodes(G, pos=pos)
+    nx.draw_networkx_labels(G, pos=pos, labels=labels)
+    nx.draw_networkx_edges(G, pos=pos, arrows=True)
+    plt.show()
\ No newline at end of file
diff --git a/dnn_split/model_infer_time.py b/dnn_split/model_infer_time.py
new file mode 100644
index 0000000..d24ae96
--- /dev/null
+++ b/dnn_split/model_infer_time.py
@@ -0,0 +1,99 @@
+from dnn_split.model_util import *
+import time
+import torch
+import torch.nn as nn
+
+PATH = "../model/alexnet-owt-4df8aa71.pth"
+
+class ModelInferTime(nn.Module):
+
+    def __init__(self, model, start, end):
+        super(ModelInferTime, self).__init__()
+        layers = get_all_layers(model)
+        self.partialLayers = get_partial_layers(layers, start, end)
+        self.x_train = nn.ModuleList(self.partialLayers)
+
+    def get_mul(self, arr):
+        mul = 1
+        for i in arr:
+            mul = mul * i
+        return mul
+
+    def forward(self, x):
+        infer_time = []
+        x_size = []
+        input_size = self.get_mul(x.size()[1:])
+        for layer in self.partialLayers:
+            start_time = time.time()
+            x = layer(x)
+            end_time = time.time()
+            running_time = end_time - start_time
+            x_size.append(self.get_mul(x.size()[1:]))
+            # print("time cost of No.", i, "layer is: %.*f sec" %(9, running_time))
+            if isinstance(layer, nn.AdaptiveAvgPool2d):
+                start_time = time.time()
+                x = torch.flatten(x, 1)
+                end_time = time.time()
+                flatten_running_time = end_time - start_time
+                running_time = running_time + flatten_running_time
+
+            infer_time.append(running_time)
+
+        x_size[-1] = input_size
+        return x_size, infer_time
+
+
+class ModelInferTimeGPU(nn.Module):
+
+    def __init__(self, model, start, end):
+        super(ModelInferTimeGPU, self).__init__()
+        layers = get_all_layers(model)
+        self.partialLayers = get_partial_layers(layers, start, end)
+        self.x_train = nn.ModuleList(self.partialLayers)
+
+    def get_mul(self, arr):
+        mul = 1
+        for i in arr:
+            mul = mul * i
+        return mul
+
+    def forward(self, x):
+        infer_time = []
+        x_size = []
+        input_size = self.get_mul(x.size()[1:])
+        start_time = torch.cuda.Event(enable_timing=True)
+        end_time = torch.cuda.Event(enable_timing=True)
+
+        for layer in self.partialLayers:
+            start_time.record()
+            x = layer(x)
+            end_time.record()
+            torch.cuda.synchronize()
+            running_time = start_time.elapsed_time(end_time)/1000
+            x_size.append(self.get_mul(x.size()[1:]))
+            # print("time cost of No.", i, "layer is: %.*f sec" %(9, running_time))
+            if isinstance(layer, nn.AdaptiveAvgPool2d):
+                start_time.record()
+                x = torch.flatten(x, 1)
+                end_time.record()
+                torch.cuda.synchronize()
+                flatten_running_time = start_time.elapsed_time(end_time)/1000
+                running_time = running_time + flatten_running_time
+
+            infer_time.append(running_time)
+
+        x_size[-1] = input_size
+        return x_size, infer_time
+
+
+if __name__ == "__main__":
+
+    input = get_input()
+
+    resnet34 = get_pretrained_resnet34()
+    model_size = get_model_size(resnet34)
+    print(model_size)
+    model = ModelInferTime(model=resnet34, start=0, end=model_size-1)
+    model.eval()
+    x_size, output = model(input)
+    print(len(x_size))
\ No newline at end of file
diff --git a/dnn_split/model_util.py b/dnn_split/model_util.py
new file mode 100644
index 0000000..2f9df87
--- /dev/null
+++ b/dnn_split/model_util.py
@@ -0,0 +1,95 @@
+import torch
+import torch.nn as nn
+import torchvision.models as models
+from PIL import Image
+from torchvision import transforms
+
+MODEL_PATH = "../models/"
+IMAGE_PATH = '../data/images/'
+
+
+def get_alexnet():
+    alexnet = models.alexnet(pretrained=False)
+    return alexnet
+
+
+def get_pretrained_alexnet():
+    pretrained_alexnet = models.alexnet(pretrained=False)
+    pretrained_alexnet.load_state_dict(torch.load(MODEL_PATH + 'alexnet-owt-4df8aa71.pth'))
+
+    return pretrained_alexnet
+
+
+def get_pretrained_vgg16():
+    pretrained_vgg16 = models.vgg16(pretrained=False)
+    pretrained_vgg16.load_state_dict(torch.load(MODEL_PATH + 'alexnet-owt-4df8aa71.pth'))
+
+    return pretrained_vgg16
+
+
+
+def get_resnet34():
+    resnet34 = models.resnet34(pretrained=False)
+    return resnet34
+
+
+def get_pretrained_resnet34():
+    pretrained_resnet34 = models.resnet34(pretrained=False)
+    pretrained_resnet34.load_state_dict(torch.load(MODEL_PATH + "resnet34-333f7ec4.pth"))
+    return pretrained_resnet34
+
+
+def get_all_layers(model):
+    # submodel = nn.Sequential(*list(model.children()))
+    # layers = [module for module in model.modules() if type(module) != nn.Sequential]
+    # return layers[1:]
+    layers = []
+    temp = [elem for elem in model.children()]
+
+    for layer in temp:
+        if isinstance(layer, nn.Sequential):
+            for i in layer.children():
+                layers.append(i)
+        else:
+            layers.append(layer)
+
+    return layers
+
+
+def get_partial_layers(layers, start, end):
+    partial_layers = layers[start: end + 1]
+    return partial_layers
+
+
+def get_model_size(model):
+    layered_model = get_all_layers(model)
+    return len(layered_model)
+
+
+def get_input():
+    input_image = Image.open(IMAGE_PATH + 'dog.jpg')
+    preprocess = transforms.Compose([
+        transforms.Resize(256),
+        transforms.CenterCrop(224),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ])
+    input_tensor = preprocess(input_image)
+    input_batch = input_tensor.unsqueeze(0)
+
+    return input_batch
+
+
+if __name__ == "__main__":
+    model = get_alexnet()
+    print(get_model_size(model))
+    layers = get_all_layers(model)
+    partial_layers = get_partial_layers(layers, 0, 20)
+    for i in range(len(partial_layers)):
+        print(partial_layers[i])
+
+
+
+
+
+
diff --git a/dnn_split/model_vis.py b/dnn_split/model_vis.py
new file mode 100644
index 0000000..4512d77
--- /dev/null
+++ b/dnn_split/model_vis.py
@@ -0,0 +1,78 @@
+from graphviz import Digraph
+import torch
+from torch.autograd import Variable
+from dnn_models.inceptionv4 import *
+from torchvision import models
+import networkx as nx
+import matplotlib.pyplot as plt
+
+
+def make_dot(var, params):
+    """ Produces Graphviz representation of PyTorch autograd graph
+
+    Blue nodes are the Variables that require grad, orange are Tensors
+    saved for backward in torch.autograd.Function
+
+    Args:
+        var: output Variable
+        params: dict of (name, Variable) to add names to node that
+            require grad (TODO: make optional)
+    """
+    param_map = {id(v): k for k, v in params.items()}
+    print(param_map)
+
+    node_attr = dict(style='filled',
+                     shape='box',
+                     align='left',
+                     fontsize='12',
+                     ranksep='0.1',
+                     height='0.2')
+
+    dot = Digraph(node_attr=node_attr, graph_attr=dict(size="12,12"))
+    seen = set()
+    G = nx.Graph()
+
+    def size_to_str(size):
+        return '(' + (', ').join(['%d' % v for v in size]) + ')'
+
+    def add_nodes(var):
+        if var not in seen:
+            if torch.is_tensor(var):
+                dot.node(str(id(var)), size_to_str(var.size()), fillcolor='orange')
+            elif hasattr(var, 'variable'):
+                u = var.variable
+                node_name = '%s\n %s' % (param_map.get(id(u)), size_to_str(u.size()))
+                dot.node(str(id(var)), node_name, fillcolor='lightblue')
+            else:
+                dot.node(str(id(var)), str(type(var).__name__))
+                G.add_node(str(id(var)), name=str(type(var).__name__))
+                print("just add node %s, the name is %s" % (str(id(var)), str(type(var).__name__)))
+            seen.add(var)
+            if hasattr(var, 'next_functions'):
+                for u in var.next_functions:
+                    if u[0] is not None:
+                        dot.edge(str(id(u[0])), str(id(var)))
+                        if str(type(u[0]).__name__) != "AccumulateGrad":
+                            G.add_edge(str(id(u[0])), str(id(var)))
+                            print("add an edge from %s node to %s node" % (str(type(u[0]).__name__), str(type(var).__name__)))
+                        add_nodes(u[0])
+            if hasattr(var, 'saved_tensors'):
+                for t in var.saved_tensors:
+                    dot.edge(str(id(t)), str(id(var)))
+                    G.add_edge(str(id(t)), str(id(var)))
+                    add_nodes(t)
+
+    add_nodes(var.grad_fn)
+    return dot, G
+
+
+inputs = torch.randn(1, 3, 224, 224)
+inception = inceptionv4()
+y = inception(Variable(inputs))
+# print(y)
+
+dot, G = make_dot(y, inception.state_dict())
+dot.view(filename="inceptionv4", directory="../models/vispdf/")
+labels = nx.get_node_attributes(G, 'name')
+nx.draw(G, labels=labels)
+plt.show()
\ No newline at end of file
diff --git a/dnn_split/split_point.py b/dnn_split/split_point.py
new file mode 100644
index 0000000..522336b
--- /dev/null
+++ b/dnn_split/split_point.py
@@ -0,0 +1,27 @@
+import numpy as np
+import joblib
+
+
+def find_split(delay_edge, delay_cloud, delay_trans):
+    num = len(delay_cloud)
+    total_delay = np.zeros(num+1)
+    for i in range(num-1):
+        total_delay[i] = np.sum(delay_edge[0:i+1]) + np.sum(delay_cloud[i+1:num]) + delay_trans[i]
+    total_delay[num-1] = np.sum(delay_edge)
+    total_delay[num] = np.sum(delay_cloud) + delay_trans[num-1]
+    split_point = np.argmin(total_delay)
+    min_delay = np.min(total_delay)
+    return split_point, min_delay
+
+
+def compute_delay_trans(data_size, bandwidth):
+    delay_trans = data_size/bandwidth
+    return delay_trans
+
+
+def predict_delay_per_layer(layer_type, layer_conf_para, dev_info):
+    PATH = "../model/regression_model.m"
+    input = [layer_type, layer_conf_para, dev_info]
+    model = joblib.load(PATH)
+    delay_per_layer = model.predict(input)
+    return delay_per_layer
diff --git a/inference.py b/inference.py
new file mode 100644
index 0000000..8b26681
--- /dev/null
+++ b/inference.py
@@ -0,0 +1,40 @@
+import torch
+from dnn_split.model_util import get_alexnet
+from dnn_split.model_canyon import ModelCanyon
+from PIL import Image
+from torchvision import transforms
+
+
+MODEL_PATH = './models/'
+IMAGE_PATH = './data/images/'
+
+
+def get_input():
+    input_image = Image.open(IMAGE_PATH+'dog.jpg')
+    preprocess = transforms.Compose([
+        transforms.Resize(256),
+        transforms.CenterCrop(224),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ])
+    input_tensor = preprocess(input_image)
+    input_batch = input_tensor.unsqueeze(0)
+
+    return input_batch
+
+if __name__ == '__main__':
+    input = get_input()
+    path = MODEL_PATH+"partialmodel.pth"
+    alexnet = get_alexnet()
+    model = ModelCanyon(model=alexnet, start=0, end=2)
+    model = torch.load(path)
+    model.eval()
+    # print(model.partialLayers)
+    output = model(input)
+
+    path2 = MODEL_PATH+"partialmodel2.pth"
+    model2 = ModelCanyon(model=alexnet, start=3, end=20)
+    model2 = torch.load(path2)
+    model2.eval()
+    output2 = model2(output)
+    print(output2)
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..b356961
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,27 @@
+certifi==2020.6.20
+chardet==3.0.4
+cycler==0.10.0
+decorator==4.4.2
+dgl-cu102==0.5.1
+future==0.18.2
+graphviz==0.14.1
+idna==2.10
+joblib==0.16.0
+kiwisolver==1.2.0
+matplotlib==3.3.1
+networkx==2.5
+numpy==1.19.1
+pandas==1.1.2
+Pillow==7.2.0
+pydot==1.4.1
+pyparsing==2.4.7
+python-dateutil==2.8.1
+pytz==2020.1
+requests==2.24.0
+scipy==1.5.2
+six==1.15.0
+torch==1.6.0
+torchprof==1.1.1
+torchsummary==1.5.1
+torchvision==0.7.0
+urllib3==1.25.10
diff --git a/test/misc_test.py b/test/misc_test.py
new file mode 100644
index 0000000..6176dcc
--- /dev/null
+++ b/test/misc_test.py
@@ -0,0 +1,49 @@
+# from __future__ import print_function
+# import torch
+# import os
+# import numpy as np
+# from collections import OrderedDict
+# import itertools
+#
+# pred_list = [0,1,2,3,4,5]
+# data = set(itertools.combinations(pred_list, 2))
+# it = (0,1)
+# print(type(it))
+# if it in data:
+#     print("yes")
+#
+# def addone(x):
+#     return x + 1
+#
+# l = OrderedDict()
+# l['a'] = addone
+# y = l['a'](3)
+# print(y)
+#
+# x = torch.rand(5, 3)
+# print(x)
+#
+# path = '/home/User/Documents/file.txt'
+#
+# # Above specified path
+# # will be splited into
+# # (head, tail) pair as
+# # ('/home/User/Documents', 'file.txt')
+#
+# # Get the base name
+# # of the specified path
+# basename = os.path.basename(path)
+#
+# # Print the basename name
+# print(basename)
+import torch
+import torch.nn.functional as F
+
+# data = torch.ones(4, 4)
+# # pad(left, right, top, bottom)
+# new_data = F.pad(input=data, pad=[1, 0, 0, 0], mode='constant', value=0)
+# new_new_data = F.pad(input=new_data, pad=[0, 0, 0, 1], mode='constant', value=0)
+# print(new_data)
+# print(new_new_data)
+input = [1,2,3,4,5]
+print(len(input[0:3]))
\ No newline at end of file
diff --git a/test/test_alexnet.py b/test/test_alexnet.py
new file mode 100644
index 0000000..adfcb5b
--- /dev/null
+++ b/test/test_alexnet.py
@@ -0,0 +1,13 @@
+from dnn_split.model_util import *
+import torchvision.models as models
+
+
+
+if __name__ == '__main__':
+    alexnet = models.alexnet(pretrained=False)
+    alexnet.load_state_dict(torch.load("../models/alexnet-owt-4df8aa71.pth"))
+    alexnet_layers = get_all_layers(alexnet)
+
+    for i in alexnet_layers:
+        print(i)
+        print("---------------")
\ No newline at end of file
diff --git a/test/test_alg.py b/test/test_alg.py
new file mode 100644
index 0000000..1cf681a
--- /dev/null
+++ b/test/test_alg.py
@@ -0,0 +1,197 @@
+import itertools
+import networkx as nx
+import matplotlib.pyplot as plt
+from collections import OrderedDict
+from networkx.drawing.nx_pydot import graphviz_layout
+
+
+class impl:
+    def __init__(self, device, edge, cloud):
+        self.device = device
+        self.edge = edge
+        self.cloud = cloud
+
+class trans:
+    def __init__(self, d2e, e2c,d2c):
+        self.d2e = d2e
+        self.e2c = e2c
+        self.d2c = d2c
+
+def build_graph():
+    G = nx.DiGraph()
+    node_list = list(range(8))
+    G.add_nodes_from(node_list)
+    G.add_edges_from([(0,1), (0,2), (1,3), (2,4), (3,5), (3,6), (2,6), (5,7), (6,7), (4,7)])
+
+    return G
+
+def longest_path(G):
+    nodes = list(nx.topological_sort(G))
+    source = nodes[0]
+
+    def helper(node):
+        if node == source:
+            return 0
+        preds = list(G.predecessors(node))
+        dist = max([helper(i) + 1 for i in preds])
+        return dist
+
+    path_dict = OrderedDict()
+    for node in nodes:
+        path_dict[node] = helper(node)
+
+    return path_dict
+
+
+def get_layer(G):
+    path_dict = longest_path(G)
+    max_len = path_dict[max(path_dict, key=path_dict.get)]
+    layer_dict = OrderedDict()
+    for layer in range(max_len + 1):
+        layer_item = []
+        for k, v in path_dict.items():
+            if v == layer:
+                layer_item.append(k)
+        layer_dict[layer] = layer_item
+
+    return layer_dict
+
+
+
+def assign_nodes_to_layers(G, layer_dict):
+    nodes = list(nx.topological_sort(G))
+    source = nodes[0]
+
+    def get_subset_input_sibling(node, v):
+        subset = set()
+        siblings = []
+        pred_list = list(G.predecessors(node))
+        for i in range(1, len(pred_list) + 1):
+            data = itertools.combinations(pred_list, i)
+            subset.add(tuple(data))
+
+        for j in v:
+            if j != node:
+                if tuple(G.predecessors(j)) in subset:
+                    siblings.append(j)
+
+        return siblings
+
+
+    # k: layer index, v: list of nodes which belongs to layer k
+    for k, v in layer_dict.items():
+        print("Start partition in layer ", k)
+        for node in v:
+            # if G.nodes[node].get('location') == 'None':
+            pred_list = list(G.predecessors(node))
+            pred_location = []
+
+            for pred in pred_list:
+                pred_location.append(G.nodes[pred].get('location'))
+
+            if 'cloud' in pred_location:
+                last_location = 'cloud'
+            elif 'edge' in pred_location:
+                last_location = 'edge'
+            else:
+                last_location = 'device'
+
+            time_device = 0
+            time_edge = 0
+            time_cloud = 0
+            print('the pred location list is', pred_location)
+            print('the last location is', last_location)
+            if last_location == 'device':
+
+                # put node on device
+                time_device = 0 + G.nodes[node].get('attr').device
+                # put node on edge
+                for pred in pred_list:
+                    time_edge = time_edge + G.edges[(pred, node)].get('attr').d2e + G.nodes[node].get('attr').edge
+                # put node on cloud
+                for pred in pred_list:
+                    time_cloud = time_cloud + G.edges[(pred, node)].get('attr').d2c + G.nodes[node].get('attr').edge
+
+                time_list = list([time_device, time_edge, time_cloud])
+                time_min = min(time_list)
+
+                if time_min == time_device:
+                    node_location = 'device'
+                elif time_min == time_edge:
+                    node_location = 'edge'
+                else:
+                    node_location = 'cloud'
+
+            elif last_location == 'edge':
+                # put node on edge
+                for pred in pred_list:
+                    if G.nodes[pred].get('location') == 'device':
+                        time_edge = time_edge + G.edges[(pred, node)].get('attr').d2e + G.nodes[node].get('attr').edge
+                        time_cloud = time_cloud + G.edges[(pred, node)].get('attr').d2c + G.nodes[node].get('attr').cloud
+                    else:
+                        time_edge = time_edge + 0 + G.nodes[node].get('attr').edge
+                        time_cloud = time_cloud + G.edges[(pred, node)].get('attr').e2c + G.nodes[node].get('attr').cloud
+
+                time_list = list([time_edge, time_cloud])
+                time_min = min(time_list)
+
+                if time_min == time_edge:
+                    node_location = 'edge'
+                else:
+                    node_location = 'cloud'
+            else:
+                # for pred in pred_list:
+                #     if G.nodes[pred].get('location') == 'device':
+                #         time_cloud = time_cloud + G.edges[(pred, node)].get('attr').d2c + G.nodes[node].get('attr').cloud
+                #     elif G.nodes[pred].get('location') == 'edge':
+                #         time_cloud = time_cloud + G.edges[(pred, node)].get('attr').e2c + G.nodes[node].get('attr').cloud
+                #     else:
+                #         time_cloud = time_cloud + 0 + G.nodes[node].get('attr').cloud
+                node_location = 'cloud'
+
+            G.nodes[node]['location'] = node_location
+
+            # update subset siblings
+            location_dict = {'device':0, 'edge':1, 'cloud':2}
+            siblings = get_subset_input_sibling(node, v)
+            for sibling in siblings:
+                if G.nodes[sibling].get('location') == None:
+                    G.nodes[sibling]['location'] = node_location
+                else:
+                    if location_dict[G.nodes[sibling].get('location')] < location_dict[node_location]:
+                        G.nodes[sibling]['location'] = node_location
+
+    return G
+
+
+if __name__ == '__main__':
+    G = build_graph()
+
+    layer_dict = get_layer(G)
+    print(layer_dict)
+
+    G.add_node('input')
+    G.add_node('output')
+    G.add_edge('input', 0)
+    G.add_edge(7, 'output')
+
+    for node in G.nodes:
+        G.nodes[node]['attr'] = impl(3,2,1)
+    for edge in G.edges:
+        G.edges[edge]['attr'] = trans(0.1, 0.2, 0.3)
+
+    G.nodes['input']['location'] = 'device'
+    G.edges[('input', 0)]['attr'] = trans(4, 8, 12)
+    G.edges[(7, 'output')]['attr'] = trans(4, 8, 12)
+    print(G.edges(data=True))
+
+    G = assign_nodes_to_layers(G, layer_dict)
+    for node in G.nodes:
+        print('Node %s is at %s' % (str(node), G.nodes[node].get('location')))
+
+    pos = nx.spring_layout(G)
+    labels = nx.get_node_attributes(G, 'location')
+    nx.draw_networkx_nodes(G, pos=pos)
+    nx.draw_networkx_labels(G, pos=pos, labels=labels)
+    nx.draw_networkx_edges(G, pos=pos, arrows=True)
+    plt.show()
\ No newline at end of file
diff --git a/test/test_case.py b/test/test_case.py
new file mode 100644
index 0000000..9c511f3
--- /dev/null
+++ b/test/test_case.py
@@ -0,0 +1,51 @@
+import torch
+from dnn_split.model_canyon import ModelCanyon
+from dnn_split.model_util import get_alexnet, get_pretrained_alexnet
+from PIL import Image
+from torchvision import transforms
+
+MODEL_PATH = '../data/models/'
+IMAGE_PATH = '../data/images/'
+
+
+def get_input():
+    input_image = Image.open(IMAGE_PATH + 'dog.jpg')
+    preprocess = transforms.Compose([
+        transforms.Resize(256),
+        transforms.CenterCrop(224),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ])
+    input_tensor = preprocess(input_image)
+    input_batch = input_tensor.unsqueeze(0)
+
+    return input_batch
+
+
+if __name__ == "__main__":
+
+    # input = get_input()
+    # path = "../models/partialmodel.pth"
+    # alexnet = get_alexnet()
+    # model = ModelCanyon(model=alexnet, start=0, end=2)
+    # model = torch.load(path)
+    # model.eval()
+    # # print(model.partialLayers)
+    # output = model(input)
+    #
+    # path2 = "../models/partialmodel2.pth"
+    # model2 = ModelCanyon(model=alexnet, start=3, end=20)
+    # model2 = torch.load(path2)
+    # model2.eval()
+    # print(model2.partialLayers)
+    # output2 = model2(output)
+    # print(output2)
+
+    input2 = get_input()
+    startLayer = 0
+    endLayer = 2
+    model3 = get_pretrained_alexnet()
+    model3.eval()
+    output3 = model3(input2)
+    # print("#####################################")
+    print(output3)
\ No newline at end of file
diff --git a/test/test_ftp.py b/test/test_ftp.py
new file mode 100644
index 0000000..be054ca
--- /dev/null
+++ b/test/test_ftp.py
@@ -0,0 +1,52 @@
+from dnn_split.fused_tile_patition import *
+from dnn_split.ftp_util import *
+
+if __name__ == "__main__":
+
+    # parameters for FTP configuration
+    partition_w = 2
+    partition_h = 2
+    partition = 4
+    fused_layer = 8
+    task_id = [[0, 1], [2, 3]]
+    input_tiles = [[0] * fused_layer for _ in range(partition)]
+    output_tiles = [[0] * fused_layer for _ in range(partition)]
+
+    # initialization of each partitioned tile of the bottom layer
+    for i in range(partition):
+        input_tiles[i][fused_layer-1] = TileRegion(0, 5, 0, 5)
+        output_tiles[i][fused_layer-1] = TileRegion(0, 5, 0, 5)
+
+    # get input
+    input = get_input()
+    input_size = input.size()
+
+    # get DNN model
+    model = get_pretrained_alexnet()
+
+    # interpret the model to get relevant parameters for FTP algo
+    model = ModelInterpreter(model=model)
+    x_size, x_kenerl_size, x_stride, x_padding, x_type = model(input)
+    output_width = x_size[-1][2]
+    output_height = x_size[-1][1]
+
+    # perform FTP algo
+    net_para = load_dnn_model(input_size[1:], x_size, x_kenerl_size, x_stride, x_padding, x_type)
+    ftp_para = FtpPara(partition_w, partition_h, fused_layer, task_id, input_tiles, output_tiles)
+    ftp_para = perform_ftp(net_para, ftp_para, output_width, output_height)
+
+    # print the coordinate of each partitioned tile for each layer
+    print("we partition each layer of the DNN model into ", partition, "parts:")
+    for i in range(partition_h):
+        for j in range(partition_w):
+            for l in range(fused_layer):
+                id = ftp_para.task_id[i][j]
+                print("input Layer", l + 1, " :", "coordination of the ", id + 1, "part: (",
+                      ftp_para.input_tiles[id][l].top_left_x, ",",
+                      ftp_para.input_tiles[id][l].top_left_y, "),(", ftp_para.input_tiles[id][l].bottom_right_x, ",",
+                      ftp_para.input_tiles[id][l].bottom_right_y, ")")
+                print("output Layer", l + 1, " :", "coordination of the ", id + 1, "part: (", ftp_para.output_tiles[id][l].top_left_x,
+                      ",",
+                      ftp_para.output_tiles[id][l].top_left_y, "),(", ftp_para.output_tiles[id][l].bottom_right_x, ",",
+                      ftp_para.output_tiles[id][l].bottom_right_y, ")")
+            print("----------------------------------------------------------------")
diff --git a/test/test_ftp_bug.py b/test/test_ftp_bug.py
new file mode 100644
index 0000000..e14166f
--- /dev/null
+++ b/test/test_ftp_bug.py
@@ -0,0 +1,45 @@
+import torch.nn as nn
+from PIL import Image
+
+from torchvision import datasets, transforms
+from torch.utils.data import DataLoader
+
+MODEL_PATH = '../data/models/'
+IMAGE_PATH = '../data/images/'
+#
+#
+def get_input():
+    input_image = Image.open(IMAGE_PATH + 'dog.jpg')
+    preprocess = transforms.Compose([
+        transforms.Resize(256),
+        transforms.CenterCrop(224),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ])
+    input_tensor = preprocess(input_image)
+    input_batch = input_tensor.unsqueeze(0)
+
+    return input_batch
+#
+#
+# if __name__ == "__main__":
+#     input = get_input()
+#     print(input)
+#     layer = nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2, bias=True)
+#     layer.eval()
+#     output = layer(input)
+#     print(output)
+
+import torch
+import torch.nn as nn
+
+# # With square kernels and equal stride
+# m = nn.Conv2d(16, 33, 3, stride=2)
+#  # non-square kernels and unequal stride and with padding
+# m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
+# # non-square kernels and unequal stride and with padding and dilation
+m = nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2, bias=True)
+m.eval()
+input = get_input()
+output = m(input)
+print(output)
\ No newline at end of file
diff --git a/test/test_imagenet.py b/test/test_imagenet.py
new file mode 100644
index 0000000..240e710
--- /dev/null
+++ b/test/test_imagenet.py
@@ -0,0 +1,199 @@
+import threading
+from dnn_split.ftp_util import *
+from dnn_split.model_canyon import ModelCanyon, ModelFTP
+# import pandas as pd
+import numpy as np
+import torch
+# from torchsummary import summary
+from torchvision import datasets, transforms
+from torch.utils.data import DataLoader
+
+
+class myThread(threading.Thread):
+    def __init__(self, threadID, name, input, model):
+        super(myThread, self).__init__()
+        threading.Thread.__init__(self)
+        self.threadID = threadID
+        self.name = name
+        self.input = input
+        self.model = model
+
+    def run(self):
+        # print("starting" + self.name)
+        self.result = perform_partial_forward(self.name, self.input, self.model)
+        # print("the size of the output feature map of " + self.name + " is:", self.result.size())
+        # print("the output feature map of " + self.name + " is:", output)
+        # print("Exiting" + self.name)
+
+    def get_result(self):
+        threading.Thread.join(self)
+        try:
+            return self.result
+        except Exception:
+            return None
+
+def perform_partial_forward(threadName, input, model):
+    # alexnet = get_pretrained_alexnet()
+    # model = ModelCanyon(model=alexnet, start=0, end=12)
+    model.eval()
+    output = model(input)
+
+    return output
+
+def output_to_excel(excel_name,output):
+    with pd.ExcelWriter(excel_name) as writer:
+        for i in range(output.size()[0]):
+            for j in range(output.size()[1]):
+                data = pd.DataFrame(output[i, j, :, :].detach().numpy())
+        #print(data)
+                data.to_excel(writer, index=False, header=True, startrow=i*(output.size()[2]+1), startcol=j*output.size()[2])
+
+if __name__ == "__main__":
+    threadList = ["Thread_1", "Thread_2", "Thread_3", "Thread_4"]
+    threadID = 1
+    threads = []
+    inputList = []
+    result = []
+    label = []
+    path = "../models/imagenet_classes.txt"
+
+    data_dir = "../data/images/val"
+
+    transforms = transforms.Compose([
+        transforms.Resize(256),
+        transforms.CenterCrop(224),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ])
+
+    with open(path) as f:
+        classes = [line.strip() for line in f.readlines()]
+    dataset = datasets.ImageFolder(root=data_dir, transform=transforms)
+    dataset_loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=4)
+
+    model = get_pretrained_vgg16()
+    # model = get_pretrained_alexnet()
+    model.eval()
+    class_ori = []
+    correct = 0
+    total = 0
+    with torch.no_grad():
+        for data in dataset_loader:
+            inputs, labels = data
+            # print(labels)
+            inputs = inputs.view(1, 3, 224, 224)
+            outputs = model(inputs)
+            _, index = torch.max(outputs.data, 1)
+            percentage = torch.nn.functional.softmax(outputs, dim=1)[0] * 100
+            # print("the original predicted class is:")
+            # print(classes[index[0]], percentage[index[0]].item())
+            class_ori.append(classes[index[0]])
+            total += labels.size(0)
+            correct += (index == labels).sum().item()
+
+    print('Accuracy of the network on the test images: %d %% without FTP' % (
+                    100 * correct / total))
+
+    # test FTP
+    # vgg16 = get_pretrained_vgg16()
+    # model1 = ModelCanyon(model=vgg16, start=0, end=29)
+
+    # alexnet = get_pretrained_alexnet()
+    # # model1 = ModelFTP(model=alexnet, start=0, end=12, x1=0, y1=0, x2=127, y2=127)
+    # # model2 = ModelFTP(model=alexnet, start=0, end=12, x1=112, y1=0, x2=224, y2=127)
+    # # model3 = ModelFTP(model=alexnet, start=0, end=12, x1=0, y1=112, x2=127, y2=224)
+    # # model4 = ModelFTP(model=alexnet, start=0, end=12, x1=112, y1=112, x2=224, y2=224)
+    # model1 = ModelFTP(model=alexnet, start=0, end=12, x1=0, y1=0, x2=193, y2=193)
+    # model2 = ModelFTP(model=alexnet, start=0, end=12, x1=96, y1=0, x2=224, y2=193)
+    # model3 = ModelFTP(model=alexnet, start=0, end=12, x1=0, y1=96, x2=193, y2=224)
+    # model4 = ModelFTP(model=alexnet, start=0, end=12, x1=96, y1=96, x2=224, y2=224)
+    # model_list = [model1, model2, model3, model4]
+
+    # alexnet = get_pretrained_alexnet()
+    vgg16 = get_pretrained_vgg16()
+    class_ftp = []
+    correct2 = 0
+    total2 = 0
+    with torch.no_grad():
+        for data in dataset_loader:
+            inputs, labels = data
+            # print(labels)
+            threadID = 1
+            threads = []
+            inputList = []
+            result = []
+
+            inputs = inputs.view(1, 3, 224, 224)
+            # inputList.append(inputs[:, :, 0:127, 0:127])
+            # inputList.append(inputs[:, :, 0:127, 112:224])
+            # inputList.append(inputs[:, :, 112:224, 0:127])
+            # inputList.append(inputs[:, :, 112:224, 112:224])
+            # inputList.append(inputs[:, :, 0:193, 0:193])
+            # inputList.append(inputs[:, :, 0:193, 30:224])
+            # inputList.append(inputs[:, :, 30:224, 0:193])
+            # inputList.append(inputs[:, :, 30:224, 30:224])
+            inputList.append(inputs[:, :, 0:130, 0:130])
+            inputList.append(inputs[:, :, 0:130, 94:224])
+            inputList.append(inputs[:, :, 94:224, 0:130])
+            inputList.append(inputs[:, :, 94:224, 94:224])
+
+            # model1 = ModelFTP(model=alexnet, start=0, end=12, x1=0, y1=0, x2=193, y2=193, input_w=inputs.size()[3], input_h=inputs.size()[2])
+            # model2 = ModelFTP(model=alexnet, start=0, end=12, x1=30, y1=0, x2=224, y2=193, input_w=inputs.size()[3], input_h=inputs.size()[2])
+            # model3 = ModelFTP(model=alexnet, start=0, end=12, x1=0, y1=30, x2=193, y2=224, input_w=inputs.size()[3], input_h=inputs.size()[2])
+            # model4 = ModelFTP(model=alexnet, start=0, end=12, x1=30, y1=30, x2=224, y2=224, input_w=inputs.size()[3], input_h=inputs.size()[2])
+            model1 = ModelFTP(model=vgg16, start=0, end=16, x1=0, y1=0, x2=130, y2=130, input_w=inputs.size()[3],
+                              input_h=inputs.size()[2])
+            model2 = ModelFTP(model=vgg16, start=0, end=16, x1=94, y1=0, x2=224, y2=130, input_w=inputs.size()[3],
+                              input_h=inputs.size()[2])
+            model3 = ModelFTP(model=vgg16, start=0, end=16, x1=0, y1=94, x2=130, y2=224, input_w=inputs.size()[3],
+                              input_h=inputs.size()[2])
+            model4 = ModelFTP(model=vgg16, start=0, end=16, x1=94, y1=94, x2=224, y2=224, input_w=inputs.size()[3],
+                              input_h=inputs.size()[2])
+            model_list = [model1, model2, model3, model4]
+            for i in range(len(threadList)):
+                thread = myThread(threadID, threadList[i], inputList[i], model_list[i])
+                thread.start()
+                threads.append(thread)
+                threadID += 1
+                result.append(thread.get_result())
+
+            for t in threads:
+                t.join()
+
+            # print("Exiting Main Thread")
+            # result.detach().numpy()
+            a = np.concatenate((result[0].detach().numpy(), result[1].detach().numpy()), axis=3)
+            b = np.concatenate((result[2].detach().numpy(), result[3].detach().numpy()), axis=3)
+            c = np.concatenate((a, b), axis=2)
+            input2 = torch.from_numpy(c)
+            # print(input2.size())
+            #
+            # modelx = ModelCanyon(model=alexnet, start=13, end=22)
+            modelx = ModelCanyon(model=vgg16, start=17, end=50)
+            modelx.eval()
+            output2 = modelx(input2)
+            # # summary(modelx, input_size=(256, 6, 6), batch_size=-1)
+            # summary(modelx, input_size=(512, 14, 14), batch_size=-1)
+            _, index2 = torch.max(output2, 1)
+            percentage2 = torch.nn.functional.softmax(output2, dim=1)[0] * 100
+            _, indices2 = torch.sort(output2, descending=True)
+            [(classes[idx], percentage[idx].item()) for idx in indices2[0][:5]]
+            # print("the FTP predicted class is:")
+            # print(classes[index2[0]], percentage2[index2[0]].item())
+            class_ftp.append(classes[index2[0]])
+            total2 += labels.size(0)
+            correct2 += (index2 == labels).sum().item()
+
+        print('Accuracy of the network on the test images: %d %% with FTP' % (
+                100 * correct2 / total2))
+
+    match_num = 0
+    for i in range(len(class_ftp)):
+        # print("the ", i, "pic belongs to class", class_ori[i], "without ftp, and ", class_ftp[i], "with ftp")
+        if class_ftp[i] == class_ori[i]:
+            match_num += 1
+
+    print("on the 1000 test images, the matching number is:", match_num)
+
+    # class_result = np.vstack((class_ori, class_ftp))
+    # np.savetxt('result_imagenet.csv', class_result.T, delimiter=',', header='the inference result', fmt='%s')
diff --git a/test/test_inception.py b/test/test_inception.py
new file mode 100644
index 0000000..3f18f67
--- /dev/null
+++ b/test/test_inception.py
@@ -0,0 +1,11 @@
+from dnn_split.model_util import *
+import torchvision.models as models
+
+if __name__ == '__main__':
+    inception_v3 = models.inception_v3(pretrained=False)
+    inception_v3.load_state_dict(torch.load("../models/inception_v3_google-1a9a5a14.pth"))
+    inception_layers = get_all_layers(inception_v3)
+
+    for i in inception_layers:
+        print(i)
+        print("--------------------")
\ No newline at end of file
diff --git a/test/test_infer_edge.py b/test/test_infer_edge.py
new file mode 100644
index 0000000..d375c15
--- /dev/null
+++ b/test/test_infer_edge.py
@@ -0,0 +1,38 @@
+from dnn_split.model_infer_time import *
+from dnn_split.model_util import get_input, get_pretrained_resnet34
+from dnn_split.comm_util import send_data
+
+
+if __name__ == "__main__":
+    input = get_input()
+
+    resnet34 = get_pretrained_resnet34()
+    num_layers = get_model_size(resnet34)
+
+    model = ModelInferTime(model=resnet34, start=0, end=num_layers-1)
+    model.eval()
+
+    data_size, infer_edge = model(input)
+    send_data(infer_edge, "10.5.27.51", 50002)
+    print(sum(infer_edge))
+    # send_data(infer_edge, "127.0.0.1", 50002)
+
+
+
+    # infer_edge = recv_data_once()
+    # data_size = np.array(data_size)
+    # infer_cloud = np.array(infer_cloud)
+    # infer_edge = np.array(infer_edge)
+    # # output, infer_edge = model(input)
+    # # infer_cloud = np.array([0.011, 0.0, 0.0002, 0.0001, 0.0, 0.0001, 0.0005, 0.0, 0.0012, 0.0, 0.0005, 0.0, 0.0, 0.0003, 0.0, 0.0, 0.002, 0.0, 0.0, 0.001, 0.0, 0.0005])
+    # # data_size = np.array([0.7744, 0.7744, 0.186624, 0.559872, 0.559872, 0.129792, 0.259584, 0.259584, 0.173056, 0.173056, 0.173056, 0.173056, 0.036864, 0.036864, 0.009216, 0.004096, 0.004096, 0.004096, 0.004096, 0.004096, 0.001])
+    # bandwidth = 6  # 6MB/s
+    # delay_trans = compute_delay_trans(data_size, bandwidth)
+    # split_point, min_delay = find_split(infer_edge, infer_cloud, delay_trans)
+    # if split_point == num_layers - 1:
+    #     print("edge side")
+    # elif split_point == num_layers:
+    #     print("cloud side")
+    # else:
+    #     print("we split xxx model at: ", split_point + 1, "layer to get the minimum inference delay of", min_delay)
+
diff --git a/test/test_infer_time.py b/test/test_infer_time.py
new file mode 100644
index 0000000..c602e05
--- /dev/null
+++ b/test/test_infer_time.py
@@ -0,0 +1,50 @@
+import torch
+import numpy as np
+from dnn_split.model_infer_time import *
+from dnn_split.split_point import *
+from dnn_split.model_util import get_input, get_pretrained_resnet34
+from dnn_split.comm_util import recv_data_once
+
+if __name__ == "__main__":
+    device = torch.device('cuda')
+    input = get_input()
+    input = input.to(device)
+    print("input finished")
+    resnet34 = get_pretrained_resnet34()
+    num_layers = get_model_size(resnet34)
+
+    model = ModelInferTimeGPU(model=resnet34, start=0, end=num_layers-1)
+    model.to(device)
+    model.eval()
+
+    x_size, infer_cloud = model(input)
+    print(next(model.parameters()).is_cuda)
+    print("Listening to the edge side to receive inference time data ...")
+    infer_edge = recv_data_once()
+    print("The inference time for each layer on the cloud side is: ", infer_cloud)
+    print("The inference time for each layer on the edge side is: ", infer_edge)
+    data_size = (np.array(x_size)*4)/pow(10,6)
+    print(data_size)
+    # output, infer_edge = model(input)
+    # infer_cloud = np.array([0.011, 0.0, 0.0002, 0.0001, 0.0, 0.0001, 0.0005, 0.0, 0.0012, 0.0, 0.0005, 0.0, 0.0, 0.0003, 0.0, 0.0, 0.002, 0.0, 0.0, 0.001, 0.0, 0.0005])
+    # data_size = np.array([0.7744, 0.7744, 0.186624, 0.559872, 0.559872, 0.129792, 0.259584, 0.259584, 0.173056, 0.173056, 0.173056, 0.173056, 0.036864, 0.036864, 0.009216, 0.004096, 0.004096, 0.004096, 0.004096, 0.004096, 0.001])
+    bandwidth = 30  # 6MB/s
+    delay_trans = compute_delay_trans(data_size, bandwidth)
+    split_point, min_delay = find_split(infer_edge, infer_cloud, delay_trans)
+    if split_point == num_layers - 1:
+        print("edge side")
+    elif split_point == num_layers:
+        print("cloud side")
+    else:
+        print("we split resnet34 model at: ", split_point + 1, "layer to get the minimum inference delay of", min_delay)
+
+    # test_resnet = get_pretrained_resnet34()
+    # test_resnet.to(device)
+    # test_resnet.eval()
+    # torch.cuda.synchronize()
+    # start_time = time.time()
+    # test_resnet(input)
+    # torch.cuda.synchronize()
+    # end_time = time.time()
+    # print("total running time:", end_time-start_time)
+
diff --git a/test/test_model_split.py b/test/test_model_split.py
new file mode 100644
index 0000000..3737d30
--- /dev/null
+++ b/test/test_model_split.py
@@ -0,0 +1,12 @@
+from dnn_split.split_point import *
+
+if __name__ == "__main__":
+
+    num_layer = 5
+    delay_dev = np.array([4, 3, 5, 8, 6])
+    delay_edge = np.array([2, 1, 1, 3, 2])
+    data_size = np.array([300, 200, 100, 50])
+    bandwidth = 50
+    delay_trans = compute_delay_trans(data_size, bandwidth)
+    split_point, min_delay = find_split(delay_dev, delay_edge, delay_trans, num_layer)
+    print("we split xxx model at: ", split_point+1, "layer to get the minimum inference delay of", min_delay)
\ No newline at end of file
diff --git a/test/test_multiprocess.py b/test/test_multiprocess.py
new file mode 100644
index 0000000..c704052
--- /dev/null
+++ b/test/test_multiprocess.py
@@ -0,0 +1,164 @@
+import threading
+from dnn_split.ftp_util import *
+from dnn_split.model_canyon import ModelCanyon
+import pandas as pd
+import numpy as np
+import torch
+from torchsummary import summary
+from torchvision import datasets, transforms
+from torch.utils.data import DataLoader
+# import torch.multiprocessing as mp
+# from torch.multiprocessing import Pool, Manager
+import multiprocessing
+from multiprocessing import Pool
+import time
+
+# class myProcess(multiprocessing.Process):
+#     def __init__(self, func, args):
+#         multiprocessing.Process.__init__(self)
+#         self.func = func
+#         self.args = args
+#         super(myProcess, self).__init__()
+#
+#     def run(self):
+#         # print("starting" + self.name)
+#         self.result = self.func(*self.args)
+#         # print("the size of the output feature map of " + self.name + " is:", self.result.size())
+#         # print("the output feature map of " + self.name + " is:", output)
+#         # print("Exiting" + self.name)
+#
+#     def get_result(self):
+#         multiprocessing.Process.join(self)
+#         try:
+#             return self.result
+#         except Exception:
+#             return None
+
+def perform_partial_forward(input, model):
+    # alexnet = get_pretrained_alexnet()
+    # model = ModelCanyon(model=alexnet, start=0, end=12)
+    model.eval()
+    output = model(input)
+
+    return output
+
+if __name__ == "__main__":
+    processList = ["Process_1", "Process_2", "Process_3", "Process_4"]
+    label = []
+    path = "../models/imagenet_classes.txt"
+
+    data_dir = "../data/images/val"
+
+    transforms = transforms.Compose([
+        transforms.Resize(256),
+        transforms.CenterCrop(224),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ])
+    # dataset = datasets.ImageFolder(data_dir, transform=transforms)
+    with open(path) as f:
+        classes = [line.strip() for line in f.readlines()]
+    dataset = datasets.ImageFolder(root=data_dir, transform=transforms)
+    dataset_loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=4)
+
+    # model = get_pretrained_vgg16()
+    # # model = get_pretrained_alexnet()
+    # model.eval()
+    # class_ori = []
+    # correct = 0
+    # total = 0
+    # start_time = time.time()
+    # with torch.no_grad():
+    #     for data in dataset_loader:
+    #         inputs, labels = data
+    #         # print(labels)
+    #         inputs = inputs.view(1, 3, 224, 224)
+    #         outputs = model(inputs)
+    #         # _, predicted = torch.max(outputs.data, 1)
+    #         _, index = torch.max(outputs.data, 1)
+    #         percentage = torch.nn.functional.softmax(outputs, dim=1)[0] * 100
+    #         # print("the original predicted class is:")
+    #         # print(classes[index[0]], percentage[index[0]].item())
+    #         class_ori.append(classes[index[0]])
+    #         total += labels.size(0)
+    #         correct += (index == labels).sum().item()
+    # end_time = time.time()
+    # print('Inference process cost:', end_time - start_time, "s")
+    # print('Accuracy of the network on the test images: %d %% without FTP' % (
+    #                 100 * correct / total))
+
+    # test FTP
+    vgg16 = get_pretrained_vgg16()
+    model1 = ModelCanyon(model=vgg16, start=0, end=29)
+
+    # alexnet = get_pretrained_alexnet()
+    # model1 = ModelCanyon(model=alexnet, start=0, end=12)
+
+    class_ftp = []
+    correct2 = 0
+    total2 = 0
+    multiprocessing.freeze_support()
+    start_time = time.time()
+    with torch.no_grad():
+        for data in dataset_loader:
+            inputs, labels = data
+            # print(labels)
+            # processID = 1
+            # processNum = []
+            inputList = []
+            pool = multiprocessing.Pool()
+            result = []
+
+            inputs = inputs.view(1, 3, 224, 224)
+            inputList.append(inputs[:, :, 0:127, 0:127])
+            inputList.append(inputs[:, :, 0:127, 112:224])
+            inputList.append(inputs[:, :, 112:224, 0:127])
+            inputList.append(inputs[:, :, 112:224, 112:224])
+            # inputList.append(inputs[:, :, 0:127, 0:127])
+            # inputList.append(inputs[:, :, 0:127, 93:224])
+            # inputList.append(inputs[:, :, 93:224, 0:127])
+            # inputList.append(inputs[:, :, 93:224, 93:224])
+
+            for i in range(len(processList)):
+                # process = myProcess(func=perform_partial_forward, args=(inputList[i], model1, return_dict))
+                return_result = pool.apply_async(perform_partial_forward, args=(inputList[i], model1))
+                result.append(return_result)
+
+            pool.close()
+            pool.join()
+
+            a = np.concatenate((result[0].get().detach().numpy(), result[1].get().detach().numpy()), axis=3)
+            b = np.concatenate((result[2].get().detach().numpy(), result[3].get().detach().numpy()), axis=3)
+            c = np.concatenate((a, b), axis=2)
+            input2 = torch.from_numpy(c)
+            #
+            # model2 = ModelCanyon(model=alexnet, start=13, end=22)
+            model2 = ModelCanyon(model=vgg16, start=30, end=50)
+            model2.eval()
+            output2 = model2(input2)
+            # # summary(model2, input_size=(256, 6, 6), batch_size=-1)
+            # summary(model2, input_size=(512, 14, 14), batch_size=-1)
+            _, index2 = torch.max(output2, 1)
+            percentage2 = torch.nn.functional.softmax(output2, dim=1)[0] * 100
+            # _, indices2 = torch.sort(output2, descending=True)
+            # [(classes[idx], percentage[idx].item()) for idx in indices2[0][:5]]
+            # print("the FTP predicted class is:")
+            # print(classes[index2[0]], percentage2[index2[0]].item())
+            class_ftp.append(classes[index2[0]])
+            total2 += labels.size(0)
+            correct2 += (index2 == labels).sum().item()
+    end_time = time.time()
+    print('Inference with FTP cost:', end_time-start_time, "s")
+    print('Accuracy of the network on the test images: %d %% with FTP' % (
+                100 * correct2 / total2))
+
+    # match_num = 0
+    # for i in range(len(class_ftp)):
+    #     # print("the ", i, "pic belongs to class", class_ori[i], "without ftp, and ", class_ftp[i], "with ftp")
+    #     if class_ftp[i] == class_ori[i]:
+    #         match_num += 1
+    #
+    # print("on the 1000 test images, the matching number is:", match_num)
+    #
+    # class_result = np.vstack((class_ori, class_ftp))
+    # np.savetxt('result_imagenet.csv', class_result.T, delimiter=',', header='the inference result', fmt='%s')
diff --git a/test/test_resnet.py b/test/test_resnet.py
new file mode 100644
index 0000000..61e8c11
--- /dev/null
+++ b/test/test_resnet.py
@@ -0,0 +1,31 @@
+from dnn_split.model_util import *
+import torchvision.models as models
+
+if __name__ == '__main__':
+    resnet34 = models.resnet34(pretrained=False)
+    resnet34.load_state_dict(torch.load("../models/resnet34-333f7ec4.pth"))
+    resnet_layers = get_all_layers(resnet34)
+
+    for i in resnet_layers:
+        print(i,"##")
+
+    alexnet = models.alexnet(pretrained=False)
+    alexnet.load_state_dict(torch.load("../models/alexnet-owt-4df8aa71.pth"))
+    alexnet_layers = get_all_layers(alexnet)
+
+    for j in alexnet_layers:
+        print(j)
+
+    alexnet = models.alexnet(pretrained=False)
+    alexnet.load_state_dict(torch.load("../models/alexnet-owt-4df8aa71.pth"))
+    for alexnet_module in alexnet.modules():
+        if (type(alexnet_module) == nn.Sequential):
+            print("type is: ", type(alexnet_module))
+        print(alexnet_module)
+
+
+    def get_all_layers(model):
+        # submodel = nn.Sequential(*list(model.children()))
+        layers = [module for module in model.modules() if type(module) != nn.Sequential]
+        return layers[1:]
+
diff --git a/test/test_resnet_infer_time.py b/test/test_resnet_infer_time.py
new file mode 100644
index 0000000..e2c9563
--- /dev/null
+++ b/test/test_resnet_infer_time.py
@@ -0,0 +1,25 @@
+from dnn_split.model_util import *
+from dnn_models.darknet_53 import *
+import time
+
+
+
+if __name__ == '__main__':
+    inputs = get_input()
+    print(inputs.size())
+    device = torch.device("cpu")
+    inputs = inputs.to(device)
+    darknet = darknet53(5)
+    darknet.to(device)
+
+    times = 100
+    total = [0]*13
+    for i in range(times):
+        res, proc_time, output_size = darknet(inputs)
+        total = [a + b for a, b in zip(total, proc_time)]
+
+    for elem in total:
+        print(elem)
+
+    for j in output_size:
+        print(j)
\ No newline at end of file
diff --git a/test/test_send_recv.py b/test/test_send_recv.py
new file mode 100644
index 0000000..10de575
--- /dev/null
+++ b/test/test_send_recv.py
@@ -0,0 +1,8 @@
+from dnn_split.comm_util import send_model, send_data
+
+if __name__ == '__main__':
+    # send_model("../models/partialmodel.pth")
+    # send_model("../models/partialmodel2.pth")
+    data = list([0,0,0,0,0,0,0])
+    print(data)
+    send_data(data)
\ No newline at end of file
diff --git a/test/test_summary.py b/test/test_summary.py
new file mode 100644
index 0000000..90bdeec
--- /dev/null
+++ b/test/test_summary.py
@@ -0,0 +1,10 @@
+from dnn_models.mynet import MyNet
+import torch
+from torchsummary import summary
+
+if __name__ == "__main__":
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # PyTorch v0.4.0
+    model = MyNet().to(device)
+
+    summary(model, (3, 28, 28))
\ No newline at end of file
diff --git a/test/test_torchprof.py b/test/test_torchprof.py
new file mode 100644
index 0000000..1ee039b
--- /dev/null
+++ b/test/test_torchprof.py
@@ -0,0 +1,12 @@
+import torch
+import torchvision
+import torchprof
+from dnn_models.mynet import MyNet
+
+model = MyNet()
+x = torch.rand([1, 3, 224, 224])
+
+with torchprof.Profile(model, use_cuda=False) as prof:
+    model(x)
+
+print(prof.display(show_events=False)) # equivalent to `print(prof)` and `print(prof.display())`
\ No newline at end of file
diff --git a/test/test_trace.py b/test/test_trace.py
new file mode 100644
index 0000000..28610ca
--- /dev/null
+++ b/test/test_trace.py
@@ -0,0 +1,102 @@
+from collections import namedtuple
+from distutils.version import LooseVersion
+from graphviz import Digraph
+from dnn_split.model_util import *
+import torch
+
+Node = namedtuple('Node', ('name', 'inputs', 'attr', 'op'))
+
+
+def replace(name, scope):
+    return '/'.join([scope[name], name])
+
+
+def parse(graph):
+    scope = {}
+    for n in graph.nodes():
+        inputs = [i.uniqueName() for i in n.inputs()]
+        for i in range(1, len(inputs)):
+            scope[inputs[i]] = n.scopeName()
+
+        uname = next(n.outputs()).uniqueName()
+        assert n.scopeName() != '', '{} has empty scope name'.format(n)
+        scope[uname] = n.scopeName()
+    scope['0'] = 'input'
+
+    nodes = []
+    for n in graph.nodes():
+        attrs = {k: n[k] for k in n.attributeNames()}
+        attrs = str(attrs).replace("'", ' ')
+        inputs = [replace(i.uniqueName(), scope) for i in n.inputs()]
+        uname = next(n.outputs()).uniqueName()
+        nodes.append(Node(**{'name': replace(uname, scope),
+                             'op': n.kind(),
+                             'inputs': inputs,
+                             'attr': attrs}))
+
+    for n in graph.inputs():
+        uname = n.uniqueName()
+        if uname not in scope.keys():
+            scope[uname] = 'unused'
+        nodes.append(Node(**{'name': replace(uname, scope),
+                             'op': 'Parameter',
+                             'inputs': [],
+                             'attr': str(n.type())}))
+
+    return nodes
+
+
+def make_dot_from_trace(trace):
+    """ Produces graphs of torch.jit.trace outputs
+    Example:
+    >>> trace, = torch.jit.trace(model, args=(x,))
+    >>> dot = make_dot_from_trace(trace)
+    """
+    # from tensorboardX
+    if LooseVersion(torch.__version__) >= LooseVersion("0.4.1"):
+        torch.onnx._optimize_trace(trace, torch._C._onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK)
+    elif LooseVersion(torch.__version__) >= LooseVersion("0.4"):
+        torch.onnx._optimize_trace(trace, False)
+    else:
+        torch.onnx._optimize_trace(trace)
+    graph = trace.graph()
+    list_of_nodes = parse(graph)
+
+    node_attr = dict(style='filled',
+                     shape='box',
+                     align='left',
+                     fontsize='12',
+                     ranksep='0.1',
+                     height='0.2')
+
+    dot = Digraph(node_attr=node_attr, graph_attr=dict(size="12,12"))
+
+    for node in list_of_nodes:
+        dot.node(node.name, label=node.name.replace('/', '\n'))
+        if node.inputs:
+            for inp in node.inputs:
+                dot.edge(inp, node.name)
+
+    resize_graph(dot)
+
+    return dot
+
+
+def resize_graph(dot, size_per_element=0.15, min_size=12):
+    """Resize the graph according to how much content it contains.
+    Modify the graph in place.
+    """
+    # Get the approximate number of nodes and edges
+    num_rows = len(dot.body)
+    content_size = num_rows * size_per_element
+    size = max(min_size, content_size)
+    size_str = str(size) + "," + str(size)
+    dot.graph_attr.update(size=size_str)
+
+if __name__ == '__main__':
+    model = get_pretrained_alexnet()
+    model.eval()
+    inputs = torch.randn(1, 3, 224, 224)
+    trace = torch.jit.trace(model, inputs)
+    dot = make_dot_from_trace(trace)
+
diff --git a/test/test_visualize.py b/test/test_visualize.py
new file mode 100644
index 0000000..d78658b
--- /dev/null
+++ b/test/test_visualize.py
@@ -0,0 +1,78 @@
+from graphviz import Digraph
+import torch
+from torch.autograd import Variable
+from dnn_models.darknet_53 import *
+from torchvision import models
+import networkx as nx
+import matplotlib.pyplot as plt
+
+
+def make_dot(var, params):
+    """ Produces Graphviz representation of PyTorch autograd graph
+
+    Blue nodes are the Variables that require grad, orange are Tensors
+    saved for backward in torch.autograd.Function
+
+    Args:
+        var: output Variable
+        params: dict of (name, Variable) to add names to node that
+            require grad (TODO: make optional)
+    """
+    param_map = {id(v): k for k, v in params.items()}
+    print(param_map)
+
+    node_attr = dict(style='filled',
+                     shape='box',
+                     align='left',
+                     fontsize='12',
+                     ranksep='0.1',
+                     height='0.2')
+
+    dot = Digraph(node_attr=node_attr, graph_attr=dict(size="12,12"))
+    seen = set()
+    G = nx.Graph()
+
+    def size_to_str(size):
+        return '(' + (', ').join(['%d' % v for v in size]) + ')'
+
+    def add_nodes(var):
+        if var not in seen:
+            if torch.is_tensor(var):
+                dot.node(str(id(var)), size_to_str(var.size()), fillcolor='orange')
+            elif hasattr(var, 'variable'):
+                u = var.variable
+                node_name = '%s\n %s' % (param_map.get(id(u)), size_to_str(u.size()))
+                dot.node(str(id(var)), node_name, fillcolor='lightblue')
+            else:
+                dot.node(str(id(var)), str(type(var).__name__))
+                G.add_node(str(id(var)), name=str(type(var).__name__))
+                print("just add node %s, the name is %s" % (str(id(var)), str(type(var).__name__)))
+            seen.add(var)
+            if hasattr(var, 'next_functions'):
+                for u in var.next_functions:
+                    if u[0] is not None:
+                        dot.edge(str(id(u[0])), str(id(var)))
+                        if str(type(u[0]).__name__) != "AccumulateGrad":
+                            G.add_edge(str(id(u[0])), str(id(var)))
+                            print("add an edge from %s node to %s node" % (str(type(u[0]).__name__), str(type(var).__name__)))
+                        add_nodes(u[0])
+            if hasattr(var, 'saved_tensors'):
+                for t in var.saved_tensors:
+                    dot.edge(str(id(t)), str(id(var)))
+                    G.add_edge(str(id(t)), str(id(var)))
+                    add_nodes(t)
+
+    add_nodes(var.grad_fn)
+    return dot, G
+
+
+inputs = torch.randn(1, 3, 224, 224)
+vgg = models.vgg16()
+y = vgg(Variable(inputs))
+# print(y)
+
+dot, G = make_dot(y, vgg.state_dict())
+dot.view(filename="vgg16", directory="../models/vispdf/")
+labels = nx.get_node_attributes(G, 'name')
+nx.draw(G, labels=labels)
+plt.show()
\ No newline at end of file
diff --git a/test/val_ftp.py b/test/val_ftp.py
new file mode 100644
index 0000000..2265531
--- /dev/null
+++ b/test/val_ftp.py
@@ -0,0 +1,94 @@
+import threading
+from dnn_split.ftp_util import *
+from dnn_split.model_canyon import ModelCanyon
+from dnn_split.model_ftp import ModelFTP
+
+MODEL_PATH = '../data/models/'
+IMAGE_PATH = '../data/images/'
+
+class myThread(threading.Thread):
+    def __init__(self, threadID, name, input, model):
+        super(myThread, self).__init__()
+        threading.Thread.__init__(self)
+        self.threadID = threadID
+        self.name = name
+        self.input = input
+        self.model = model
+
+    def perform_partial_forward(self):
+        self.model.eval()
+        output = self.model(self.input)
+
+        return output
+
+    def run(self):
+        self.result = self.perform_partial_forward()
+
+    def get_result(self):
+        threading.Thread.join(self)
+        try:
+            return self.result
+        except Exception:
+            return None
+
+if __name__ == "__main__":
+
+    input = get_input()
+    # multi-thread
+    threadList = ["Thread_1", "Thread_2", "Thread_3", "Thread_4"]
+    threadID = 1
+    threads = []
+    inputList = []
+    result = []
+
+    # original inference without fused tile partition
+    alexnet0 = get_pretrained_alexnet()
+    model = ModelCanyon(alexnet0, 0, 12)
+    model.eval()
+    output = model(input)
+
+    print("the output:")
+    print(output)
+    print("-------------------------------------------")
+
+    # inference with fused tile partition
+    alexnet = get_pretrained_alexnet()
+
+    # partition the input feature map into four parts
+    # the coordinate of each partitioned tile of the top layer
+    coordinate_1 = TileRegion(0, 0, 193, 193)
+    coordinate_2 = TileRegion(30, 0, 224, 193)
+    coordinate_3 = TileRegion(0, 30, 193, 224)
+    coordinate_4 = TileRegion(30, 30, 224, 224)
+    # each partitioned tile completes inference separately
+    model_1 = ModelFTP(model=alexnet, start=0, end=12, coordinate=coordinate_1, input_w=input.size()[3],
+                      input_h=input.size()[2])
+    model_2 = ModelFTP(model=alexnet, start=0, end=12, coordinate=coordinate_2, input_w=input.size()[3],
+                      input_h=input.size()[2])
+    model_3 = ModelFTP(model=alexnet, start=0, end=12, coordinate=coordinate_3, input_w=input.size()[3],
+                      input_h=input.size()[2])
+    model_4 = ModelFTP(model=alexnet, start=0, end=12, coordinate=coordinate_4, input_w=input.size()[3],
+                      input_h=input.size()[2])
+    model_list = [model_1, model_2, model_3, model_4]
+
+    inputList.append(input[:, :, 0:193, 0:193])
+    inputList.append(input[:, :, 0:193, 30:224])
+    inputList.append(input[:, :, 30:224, 0:193])
+    inputList.append(input[:, :, 30:224, 30:224])
+
+    for i in range(len(threadList)):
+        thread = myThread(threadID, threadList[i], inputList[i], model_list[i])      # use multi thread to compute in parallel
+        thread.start()
+        threads.append(thread)
+        threadID += 1
+        result.append(thread.get_result())
+
+    for t in threads:
+        t.join()
+
+    a = np.concatenate((result[0].detach().numpy(), result[1].detach().numpy()), axis=3)
+    b = np.concatenate((result[2].detach().numpy(), result[3].detach().numpy()), axis=3)
+    c = np.concatenate((a, b), axis=2)
+    output_1 = torch.from_numpy(c)
+    print(output_1)
+