diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..dc8f507 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +# Use an official PyTorch runtime as a parent image +FROM pytorch/pytorch + +# Set the working directory +WORKDIR /dnncanyon + +# Copy the current directory contents into the container +COPY ./ /dnncanyon + +# Install any needed packages specified in requirements.txt +RUN pip install -r ./requirements.txt + +# Run when the container launches +CMD ["python", "inference.py"] \ No newline at end of file diff --git a/data/.DS_Store b/data/.DS_Store new file mode 100644 index 0000000..26a53b5 Binary files /dev/null and b/data/.DS_Store differ diff --git a/data/images/.DS_Store b/data/images/.DS_Store new file mode 100644 index 0000000..09855cb Binary files /dev/null and b/data/images/.DS_Store differ diff --git a/data/images/dog.jpg b/data/images/dog.jpg new file mode 100644 index 0000000..12f0e0d Binary files /dev/null and b/data/images/dog.jpg differ diff --git a/dnn_models/.DS_Store b/dnn_models/.DS_Store new file mode 100644 index 0000000..d7e35d4 Binary files /dev/null and b/dnn_models/.DS_Store differ diff --git a/dnn_models/alexnet.py b/dnn_models/alexnet.py new file mode 100644 index 0000000..2a87dea --- /dev/null +++ b/dnn_models/alexnet.py @@ -0,0 +1,54 @@ +import torch +import torch.nn as nn + +PATH = "../models/alexnet-owt-4df8aa71.pth" + + +class AlexNet(nn.Module): + + def __init__(self, num_classes=1000): + super(AlexNet, self).__init__() + self.features = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + + nn.Conv2d(64, 192, kernel_size=5, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + + nn.Conv2d(192, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 256, kernel_size=3, padding=1), + + nn.ReLU(inplace=True), + nn.Conv2d(256, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + + nn.MaxPool2d(kernel_size=3, stride=2), + ) + self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) + self.classifier = nn.Sequential( + nn.Dropout(), + nn.Linear(256 * 6 * 6, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + nn.Linear(4096, num_classes), + ) + + def forward(self, x): + x = self.features(x) + x = self.avgpool(x) + x = torch.flatten(x, 1) + x = self.classifier(x) + return x + + +def alexnet(pretrained=False, progress=True, **kwargs): + model = AlexNet(**kwargs) + if pretrained: + model.load_state_dict(torch.load(PATH)) + + return model diff --git a/dnn_models/darknet_53.py b/dnn_models/darknet_53.py new file mode 100644 index 0000000..611fa1a --- /dev/null +++ b/dnn_models/darknet_53.py @@ -0,0 +1,117 @@ +import torch +from torch import nn +import time + +def conv_batch(in_num, out_num, kernel_size=3, padding=1, stride=1): + return nn.Sequential( + nn.Conv2d(in_num, out_num, kernel_size=kernel_size, stride=stride, padding=padding, bias=False), + nn.BatchNorm2d(out_num), + nn.LeakyReLU()) + + +# Residual block +class DarkResidualBlock(nn.Module): + def __init__(self, in_channels): + super(DarkResidualBlock, self).__init__() + + reduced_channels = int(in_channels/2) + + self.layer1 = conv_batch(in_channels, reduced_channels, kernel_size=1, padding=0) + self.layer2 = conv_batch(reduced_channels, in_channels) + + def forward(self, x): + residual = x + + out = self.layer1(x) + out = self.layer2(out) + out += residual + return out + + +class Darknet53(nn.Module): + def __init__(self, block, num_classes): + super(Darknet53, self).__init__() + + self.num_classes = num_classes + + self.conv1 = conv_batch(3, 32) + self.conv2 = conv_batch(32, 64, stride=2) + self.residual_block1 = self.make_layer(block, in_channels=64, num_blocks=1) + self.conv3 = conv_batch(64, 128, stride=2) + self.residual_block2 = self.make_layer(block, in_channels=128, num_blocks=2) + self.conv4 = conv_batch(128, 256, stride=2) + self.residual_block3 = self.make_layer(block, in_channels=256, num_blocks=8) + self.conv5 = conv_batch(256, 512, stride=2) + self.residual_block4 = self.make_layer(block, in_channels=512, num_blocks=8) + self.conv6 = conv_batch(512, 1024, stride=2) + self.residual_block5 = self.make_layer(block, in_channels=1024, num_blocks=4) + self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Linear(1024, self.num_classes) + + def forward(self, x): + output_size = [] + start1 = time.time() + out = self.conv1(x) + end1 = time.time() + output_size.append(out.size()) + start2 = time.time() + out = self.conv2(out) + end2 = time.time() + output_size.append(out.size()) + start3 = time.time() + out = self.residual_block1(out) + end3 = time.time() + output_size.append(out.size()) + start4 = time.time() + out = self.conv3(out) + end4 = time.time() + output_size.append(out.size()) + start5 = time.time() + out = self.residual_block2(out) + end5 = time.time() + output_size.append(out.size()) + start6 = time.time() + out = self.conv4(out) + end6 = time.time() + output_size.append(out.size()) + start7 = time.time() + out = self.residual_block3(out) + end7 = time.time() + output_size.append(out.size()) + start8 = time.time() + out = self.conv5(out) + end8 = time.time() + output_size.append(out.size()) + start9 = time.time() + out = self.residual_block4(out) + end9 = time.time() + output_size.append(out.size()) + start10 = time.time() + out = self.conv6(out) + end10 = time.time() + output_size.append(out.size()) + start11 = time.time() + out = self.residual_block5(out) + end11 = time.time() + output_size.append(out.size()) + start12 = time.time() + out = self.global_avg_pool(out) + end12 = time.time() + output_size.append(out.size()) + out = out.view(-1, 1024) + start13 = time.time() + out = self.fc(out) + end13 = time.time() + output_size.append(out.size()) + proc_time = [end1-start1, end2-start2, end3-start3, end4-start4, end5-start5, end6-start6, end7-start7, end8-start8, end9-start9, end10-start10, end11-start11, end12-start12, end13-start13] + return out, proc_time, output_size + + def make_layer(self, block, in_channels, num_blocks): + layers = [] + for i in range(0, num_blocks): + layers.append(block(in_channels)) + return nn.Sequential(*layers) + + +def darknet53(num_classes): + return Darknet53(DarkResidualBlock, num_classes) diff --git a/dnn_models/inception_v3.py b/dnn_models/inception_v3.py new file mode 100644 index 0000000..64f753b --- /dev/null +++ b/dnn_models/inception_v3.py @@ -0,0 +1,440 @@ +from collections import namedtuple +import warnings +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.jit.annotations import Optional +from torch import Tensor + + +__all__ = ['Inception3', 'inception_v3', 'InceptionOutputs', '_InceptionOutputs'] + +PATH = "../models/inception_v3_google-1a9a5a14.pth" + +model_urls = { + # Inception v3 ported from TensorFlow + 'inception_v3_google': 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth', +} + +InceptionOutputs = namedtuple('InceptionOutputs', ['logits', 'aux_logits']) +InceptionOutputs.__annotations__ = {'logits': torch.Tensor, 'aux_logits': Optional[torch.Tensor]} + +# Script annotations failed with _GoogleNetOutputs = namedtuple ... +# _InceptionOutputs set here for backwards compat +_InceptionOutputs = InceptionOutputs + + +def inception_v3(pretrained=False, progress=True, **kwargs): + r"""Inception v3 model architecture from + `"Rethinking the Inception Architecture for Computer Vision" `_. + + .. note:: + **Important**: In contrast to the other models the inception_v3 expects tensors with a size of + N x 3 x 299 x 299, so ensure your images are sized accordingly. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + aux_logits (bool): If True, add an auxiliary branch that can improve training. + Default: *True* + transform_input (bool): If True, preprocesses the input according to the method with which it + was trained on ImageNet. Default: *False* + """ + if pretrained: + if 'transform_input' not in kwargs: + kwargs['transform_input'] = True + if 'aux_logits' in kwargs: + original_aux_logits = kwargs['aux_logits'] + kwargs['aux_logits'] = True + else: + original_aux_logits = True + kwargs['init_weights'] = False # we are loading weights from a pretrained model + model = Inception3(**kwargs) + model.load_state_dict(torch.load(PATH), progress=progress) + if not original_aux_logits: + model.aux_logits = False + del model.AuxLogits + return model + + return Inception3(**kwargs) + + +class Inception3(nn.Module): + + def __init__(self, num_classes=1000, aux_logits=True, transform_input=False, + inception_blocks=None, init_weights=None): + super(Inception3, self).__init__() + if inception_blocks is None: + inception_blocks = [ + BasicConv2d, InceptionA, InceptionB, InceptionC, + InceptionD, InceptionE, InceptionAux + ] + if init_weights is None: + warnings.warn('The default weight initialization of inception_v3 will be changed in future releases of ' + 'torchvision. If you wish to keep the old behavior (which leads to long initialization times' + ' due to scipy/scipy#11299), please set init_weights=True.', FutureWarning) + init_weights = True + assert len(inception_blocks) == 7 + conv_block = inception_blocks[0] + inception_a = inception_blocks[1] + inception_b = inception_blocks[2] + inception_c = inception_blocks[3] + inception_d = inception_blocks[4] + inception_e = inception_blocks[5] + inception_aux = inception_blocks[6] + + self.aux_logits = aux_logits + self.transform_input = transform_input + self.Conv2d_1a_3x3 = conv_block(3, 32, kernel_size=3, stride=2) + self.Conv2d_2a_3x3 = conv_block(32, 32, kernel_size=3) + self.Conv2d_2b_3x3 = conv_block(32, 64, kernel_size=3, padding=1) + self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2) + self.Conv2d_3b_1x1 = conv_block(64, 80, kernel_size=1) + self.Conv2d_4a_3x3 = conv_block(80, 192, kernel_size=3) + self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2) + self.Mixed_5b = inception_a(192, pool_features=32) + self.Mixed_5c = inception_a(256, pool_features=64) + self.Mixed_5d = inception_a(288, pool_features=64) + self.Mixed_6a = inception_b(288) + self.Mixed_6b = inception_c(768, channels_7x7=128) + self.Mixed_6c = inception_c(768, channels_7x7=160) + self.Mixed_6d = inception_c(768, channels_7x7=160) + self.Mixed_6e = inception_c(768, channels_7x7=192) + if aux_logits: + self.AuxLogits = inception_aux(768, num_classes) + self.Mixed_7a = inception_d(768) + self.Mixed_7b = inception_e(1280) + self.Mixed_7c = inception_e(2048) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.dropout = nn.Dropout() + self.fc = nn.Linear(2048, num_classes) + if init_weights: + for m in self.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): + import scipy.stats as stats + stddev = m.stddev if hasattr(m, 'stddev') else 0.1 + X = stats.truncnorm(-2, 2, scale=stddev) + values = torch.as_tensor(X.rvs(m.weight.numel()), dtype=m.weight.dtype) + values = values.view(m.weight.size()) + with torch.no_grad(): + m.weight.copy_(values) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def _transform_input(self, x): + if self.transform_input: + x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 + x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 + x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 + x = torch.cat((x_ch0, x_ch1, x_ch2), 1) + return x + + def _forward(self, x): + # N x 3 x 299 x 299 + x = self.Conv2d_1a_3x3(x) + # N x 32 x 149 x 149 + x = self.Conv2d_2a_3x3(x) + # N x 32 x 147 x 147 + x = self.Conv2d_2b_3x3(x) + # N x 64 x 147 x 147 + x = self.maxpool1(x) + # N x 64 x 73 x 73 + x = self.Conv2d_3b_1x1(x) + # N x 80 x 73 x 73 + x = self.Conv2d_4a_3x3(x) + # N x 192 x 71 x 71 + x = self.maxpool2(x) + # N x 192 x 35 x 35 + x = self.Mixed_5b(x) + # N x 256 x 35 x 35 + x = self.Mixed_5c(x) + # N x 288 x 35 x 35 + x = self.Mixed_5d(x) + # N x 288 x 35 x 35 + x = self.Mixed_6a(x) + # N x 768 x 17 x 17 + x = self.Mixed_6b(x) + # N x 768 x 17 x 17 + x = self.Mixed_6c(x) + # N x 768 x 17 x 17 + x = self.Mixed_6d(x) + # N x 768 x 17 x 17 + x = self.Mixed_6e(x) + # N x 768 x 17 x 17 + aux_defined = self.training and self.aux_logits + if aux_defined: + aux = self.AuxLogits(x) + else: + aux = None + # N x 768 x 17 x 17 + x = self.Mixed_7a(x) + # N x 1280 x 8 x 8 + x = self.Mixed_7b(x) + # N x 2048 x 8 x 8 + x = self.Mixed_7c(x) + # N x 2048 x 8 x 8 + # Adaptive average pooling + x = self.avgpool(x) + # N x 2048 x 1 x 1 + x = self.dropout(x) + # N x 2048 x 1 x 1 + x = torch.flatten(x, 1) + # N x 2048 + x = self.fc(x) + # N x 1000 (num_classes) + return x, aux + + @torch.jit.unused + def eager_outputs(self, x, aux): + # type: (Tensor, Optional[Tensor]) -> InceptionOutputs + if self.training and self.aux_logits: + return InceptionOutputs(x, aux) + else: + return x + + def forward(self, x): + x = self._transform_input(x) + x, aux = self._forward(x) + aux_defined = self.training and self.aux_logits + if torch.jit.is_scripting(): + if not aux_defined: + warnings.warn("Scripted Inception3 always returns Inception3 Tuple") + return InceptionOutputs(x, aux) + else: + return self.eager_outputs(x, aux) + + +class InceptionA(nn.Module): + + def __init__(self, in_channels, pool_features, conv_block=None): + super(InceptionA, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.branch1x1 = conv_block(in_channels, 64, kernel_size=1) + + self.branch5x5_1 = conv_block(in_channels, 48, kernel_size=1) + self.branch5x5_2 = conv_block(48, 64, kernel_size=5, padding=2) + + self.branch3x3dbl_1 = conv_block(in_channels, 64, kernel_size=1) + self.branch3x3dbl_2 = conv_block(64, 96, kernel_size=3, padding=1) + self.branch3x3dbl_3 = conv_block(96, 96, kernel_size=3, padding=1) + + self.branch_pool = conv_block(in_channels, pool_features, kernel_size=1) + + def _forward(self, x): + branch1x1 = self.branch1x1(x) + + branch5x5 = self.branch5x5_1(x) + branch5x5 = self.branch5x5_2(branch5x5) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) + + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool] + return outputs + + def forward(self, x): + outputs = self._forward(x) + return torch.cat(outputs, 1) + + +class InceptionB(nn.Module): + + def __init__(self, in_channels, conv_block=None): + super(InceptionB, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.branch3x3 = conv_block(in_channels, 384, kernel_size=3, stride=2) + + self.branch3x3dbl_1 = conv_block(in_channels, 64, kernel_size=1) + self.branch3x3dbl_2 = conv_block(64, 96, kernel_size=3, padding=1) + self.branch3x3dbl_3 = conv_block(96, 96, kernel_size=3, stride=2) + + def _forward(self, x): + branch3x3 = self.branch3x3(x) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) + + branch_pool = F.max_pool2d(x, kernel_size=3, stride=2) + + outputs = [branch3x3, branch3x3dbl, branch_pool] + return outputs + + def forward(self, x): + outputs = self._forward(x) + return torch.cat(outputs, 1) + + +class InceptionC(nn.Module): + + def __init__(self, in_channels, channels_7x7, conv_block=None): + super(InceptionC, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.branch1x1 = conv_block(in_channels, 192, kernel_size=1) + + c7 = channels_7x7 + self.branch7x7_1 = conv_block(in_channels, c7, kernel_size=1) + self.branch7x7_2 = conv_block(c7, c7, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7_3 = conv_block(c7, 192, kernel_size=(7, 1), padding=(3, 0)) + + self.branch7x7dbl_1 = conv_block(in_channels, c7, kernel_size=1) + self.branch7x7dbl_2 = conv_block(c7, c7, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7dbl_3 = conv_block(c7, c7, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7dbl_4 = conv_block(c7, c7, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7dbl_5 = conv_block(c7, 192, kernel_size=(1, 7), padding=(0, 3)) + + self.branch_pool = conv_block(in_channels, 192, kernel_size=1) + + def _forward(self, x): + branch1x1 = self.branch1x1(x) + + branch7x7 = self.branch7x7_1(x) + branch7x7 = self.branch7x7_2(branch7x7) + branch7x7 = self.branch7x7_3(branch7x7) + + branch7x7dbl = self.branch7x7dbl_1(x) + branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl) + + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool] + return outputs + + def forward(self, x): + outputs = self._forward(x) + return torch.cat(outputs, 1) + + +class InceptionD(nn.Module): + + def __init__(self, in_channels, conv_block=None): + super(InceptionD, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.branch3x3_1 = conv_block(in_channels, 192, kernel_size=1) + self.branch3x3_2 = conv_block(192, 320, kernel_size=3, stride=2) + + self.branch7x7x3_1 = conv_block(in_channels, 192, kernel_size=1) + self.branch7x7x3_2 = conv_block(192, 192, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7x3_3 = conv_block(192, 192, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7x3_4 = conv_block(192, 192, kernel_size=3, stride=2) + + def _forward(self, x): + branch3x3 = self.branch3x3_1(x) + branch3x3 = self.branch3x3_2(branch3x3) + + branch7x7x3 = self.branch7x7x3_1(x) + branch7x7x3 = self.branch7x7x3_2(branch7x7x3) + branch7x7x3 = self.branch7x7x3_3(branch7x7x3) + branch7x7x3 = self.branch7x7x3_4(branch7x7x3) + + branch_pool = F.max_pool2d(x, kernel_size=3, stride=2) + outputs = [branch3x3, branch7x7x3, branch_pool] + return outputs + + def forward(self, x): + outputs = self._forward(x) + return torch.cat(outputs, 1) + + +class InceptionE(nn.Module): + + def __init__(self, in_channels, conv_block=None): + super(InceptionE, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.branch1x1 = conv_block(in_channels, 320, kernel_size=1) + + self.branch3x3_1 = conv_block(in_channels, 384, kernel_size=1) + self.branch3x3_2a = conv_block(384, 384, kernel_size=(1, 3), padding=(0, 1)) + self.branch3x3_2b = conv_block(384, 384, kernel_size=(3, 1), padding=(1, 0)) + + self.branch3x3dbl_1 = conv_block(in_channels, 448, kernel_size=1) + self.branch3x3dbl_2 = conv_block(448, 384, kernel_size=3, padding=1) + self.branch3x3dbl_3a = conv_block(384, 384, kernel_size=(1, 3), padding=(0, 1)) + self.branch3x3dbl_3b = conv_block(384, 384, kernel_size=(3, 1), padding=(1, 0)) + + self.branch_pool = conv_block(in_channels, 192, kernel_size=1) + + def _forward(self, x): + branch1x1 = self.branch1x1(x) + + branch3x3 = self.branch3x3_1(x) + branch3x3 = [ + self.branch3x3_2a(branch3x3), + self.branch3x3_2b(branch3x3), + ] + branch3x3 = torch.cat(branch3x3, 1) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = [ + self.branch3x3dbl_3a(branch3x3dbl), + self.branch3x3dbl_3b(branch3x3dbl), + ] + branch3x3dbl = torch.cat(branch3x3dbl, 1) + + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool] + return outputs + + def forward(self, x): + outputs = self._forward(x) + return torch.cat(outputs, 1) + + +class InceptionAux(nn.Module): + + def __init__(self, in_channels, num_classes, conv_block=None): + super(InceptionAux, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.conv0 = conv_block(in_channels, 128, kernel_size=1) + self.conv1 = conv_block(128, 768, kernel_size=5) + self.conv1.stddev = 0.01 + self.fc = nn.Linear(768, num_classes) + self.fc.stddev = 0.001 + + def forward(self, x): + # N x 768 x 17 x 17 + x = F.avg_pool2d(x, kernel_size=5, stride=3) + # N x 768 x 5 x 5 + x = self.conv0(x) + # N x 128 x 5 x 5 + x = self.conv1(x) + # N x 768 x 1 x 1 + # Adaptive average pooling + x = F.adaptive_avg_pool2d(x, (1, 1)) + # N x 768 x 1 x 1 + x = torch.flatten(x, 1) + # N x 768 + x = self.fc(x) + # N x 1000 + return x + + +class BasicConv2d(nn.Module): + + def __init__(self, in_channels, out_channels, **kwargs): + super(BasicConv2d, self).__init__() + self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs) + self.bn = nn.BatchNorm2d(out_channels, eps=0.001) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + return F.relu(x, inplace=True) \ No newline at end of file diff --git a/dnn_models/inceptionv4.py b/dnn_models/inceptionv4.py new file mode 100644 index 0000000..2ecd6ea --- /dev/null +++ b/dnn_models/inceptionv4.py @@ -0,0 +1,358 @@ +from __future__ import print_function, division, absolute_import +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.model_zoo as model_zoo +import os +import sys + +__all__ = ['InceptionV4', 'inceptionv4'] + +pretrained_settings = { + 'inceptionv4': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth', + 'input_space': 'RGB', + 'input_size': [3, 299, 299], + 'input_range': [0, 1], + 'mean': [0.5, 0.5, 0.5], + 'std': [0.5, 0.5, 0.5], + 'num_classes': 1000 + }, + 'imagenet+background': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth', + 'input_space': 'RGB', + 'input_size': [3, 299, 299], + 'input_range': [0, 1], + 'mean': [0.5, 0.5, 0.5], + 'std': [0.5, 0.5, 0.5], + 'num_classes': 1001 + } + } +} + + +class BasicConv2d(nn.Module): + + def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0): + super(BasicConv2d, self).__init__() + self.conv = nn.Conv2d(in_planes, out_planes, + kernel_size=kernel_size, stride=stride, + padding=padding, bias=False) # verify bias false + self.bn = nn.BatchNorm2d(out_planes, + eps=0.001, # value found in tensorflow + momentum=0.1, # default pytorch value + affine=True) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + return x + + +class Mixed_3a(nn.Module): + + def __init__(self): + super(Mixed_3a, self).__init__() + self.maxpool = nn.MaxPool2d(3, stride=2) + self.conv = BasicConv2d(64, 96, kernel_size=3, stride=2) + + def forward(self, x): + x0 = self.maxpool(x) + x1 = self.conv(x) + out = torch.cat((x0, x1), 1) + return out + + +class Mixed_4a(nn.Module): + + def __init__(self): + super(Mixed_4a, self).__init__() + + self.branch0 = nn.Sequential( + BasicConv2d(160, 64, kernel_size=1, stride=1), + BasicConv2d(64, 96, kernel_size=3, stride=1) + ) + + self.branch1 = nn.Sequential( + BasicConv2d(160, 64, kernel_size=1, stride=1), + BasicConv2d(64, 64, kernel_size=(1,7), stride=1, padding=(0,3)), + BasicConv2d(64, 64, kernel_size=(7,1), stride=1, padding=(3,0)), + BasicConv2d(64, 96, kernel_size=(3,3), stride=1) + ) + + def forward(self, x): + x0 = self.branch0(x) + x1 = self.branch1(x) + out = torch.cat((x0, x1), 1) + return out + + +class Mixed_5a(nn.Module): + + def __init__(self): + super(Mixed_5a, self).__init__() + self.conv = BasicConv2d(192, 192, kernel_size=3, stride=2) + self.maxpool = nn.MaxPool2d(3, stride=2) + + def forward(self, x): + x0 = self.conv(x) + x1 = self.maxpool(x) + out = torch.cat((x0, x1), 1) + return out + + +class Inception_A(nn.Module): + + def __init__(self): + super(Inception_A, self).__init__() + self.branch0 = BasicConv2d(384, 96, kernel_size=1, stride=1) + + self.branch1 = nn.Sequential( + BasicConv2d(384, 64, kernel_size=1, stride=1), + BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1) + ) + + self.branch2 = nn.Sequential( + BasicConv2d(384, 64, kernel_size=1, stride=1), + BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1), + BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1) + ) + + self.branch3 = nn.Sequential( + nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False), + BasicConv2d(384, 96, kernel_size=1, stride=1) + ) + + def forward(self, x): + x0 = self.branch0(x) + x1 = self.branch1(x) + x2 = self.branch2(x) + x3 = self.branch3(x) + out = torch.cat((x0, x1, x2, x3), 1) + return out + + +class Reduction_A(nn.Module): + + def __init__(self): + super(Reduction_A, self).__init__() + self.branch0 = BasicConv2d(384, 384, kernel_size=3, stride=2) + + self.branch1 = nn.Sequential( + BasicConv2d(384, 192, kernel_size=1, stride=1), + BasicConv2d(192, 224, kernel_size=3, stride=1, padding=1), + BasicConv2d(224, 256, kernel_size=3, stride=2) + ) + + self.branch2 = nn.MaxPool2d(3, stride=2) + + def forward(self, x): + x0 = self.branch0(x) + x1 = self.branch1(x) + x2 = self.branch2(x) + out = torch.cat((x0, x1, x2), 1) + return out + + +class Inception_B(nn.Module): + + def __init__(self): + super(Inception_B, self).__init__() + self.branch0 = BasicConv2d(1024, 384, kernel_size=1, stride=1) + + self.branch1 = nn.Sequential( + BasicConv2d(1024, 192, kernel_size=1, stride=1), + BasicConv2d(192, 224, kernel_size=(1,7), stride=1, padding=(0,3)), + BasicConv2d(224, 256, kernel_size=(7,1), stride=1, padding=(3,0)) + ) + + self.branch2 = nn.Sequential( + BasicConv2d(1024, 192, kernel_size=1, stride=1), + BasicConv2d(192, 192, kernel_size=(7,1), stride=1, padding=(3,0)), + BasicConv2d(192, 224, kernel_size=(1,7), stride=1, padding=(0,3)), + BasicConv2d(224, 224, kernel_size=(7,1), stride=1, padding=(3,0)), + BasicConv2d(224, 256, kernel_size=(1,7), stride=1, padding=(0,3)) + ) + + self.branch3 = nn.Sequential( + nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False), + BasicConv2d(1024, 128, kernel_size=1, stride=1) + ) + + def forward(self, x): + x0 = self.branch0(x) + x1 = self.branch1(x) + x2 = self.branch2(x) + x3 = self.branch3(x) + out = torch.cat((x0, x1, x2, x3), 1) + return out + + +class Reduction_B(nn.Module): + + def __init__(self): + super(Reduction_B, self).__init__() + + self.branch0 = nn.Sequential( + BasicConv2d(1024, 192, kernel_size=1, stride=1), + BasicConv2d(192, 192, kernel_size=3, stride=2) + ) + + self.branch1 = nn.Sequential( + BasicConv2d(1024, 256, kernel_size=1, stride=1), + BasicConv2d(256, 256, kernel_size=(1,7), stride=1, padding=(0,3)), + BasicConv2d(256, 320, kernel_size=(7,1), stride=1, padding=(3,0)), + BasicConv2d(320, 320, kernel_size=3, stride=2) + ) + + self.branch2 = nn.MaxPool2d(3, stride=2) + + def forward(self, x): + x0 = self.branch0(x) + x1 = self.branch1(x) + x2 = self.branch2(x) + out = torch.cat((x0, x1, x2), 1) + return out + + +class Inception_C(nn.Module): + + def __init__(self): + super(Inception_C, self).__init__() + + self.branch0 = BasicConv2d(1536, 256, kernel_size=1, stride=1) + + self.branch1_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1) + self.branch1_1a = BasicConv2d(384, 256, kernel_size=(1,3), stride=1, padding=(0,1)) + self.branch1_1b = BasicConv2d(384, 256, kernel_size=(3,1), stride=1, padding=(1,0)) + + self.branch2_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1) + self.branch2_1 = BasicConv2d(384, 448, kernel_size=(3,1), stride=1, padding=(1,0)) + self.branch2_2 = BasicConv2d(448, 512, kernel_size=(1,3), stride=1, padding=(0,1)) + self.branch2_3a = BasicConv2d(512, 256, kernel_size=(1,3), stride=1, padding=(0,1)) + self.branch2_3b = BasicConv2d(512, 256, kernel_size=(3,1), stride=1, padding=(1,0)) + + self.branch3 = nn.Sequential( + nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False), + BasicConv2d(1536, 256, kernel_size=1, stride=1) + ) + + def forward(self, x): + x0 = self.branch0(x) + + x1_0 = self.branch1_0(x) + x1_1a = self.branch1_1a(x1_0) + x1_1b = self.branch1_1b(x1_0) + x1 = torch.cat((x1_1a, x1_1b), 1) + + x2_0 = self.branch2_0(x) + x2_1 = self.branch2_1(x2_0) + x2_2 = self.branch2_2(x2_1) + x2_3a = self.branch2_3a(x2_2) + x2_3b = self.branch2_3b(x2_2) + x2 = torch.cat((x2_3a, x2_3b), 1) + + x3 = self.branch3(x) + + out = torch.cat((x0, x1, x2, x3), 1) + return out + + +class InceptionV4(nn.Module): + + def __init__(self, num_classes=1001): + super(InceptionV4, self).__init__() + # Special attributs + self.input_space = None + self.input_size = (299, 299, 3) + self.mean = None + self.std = None + # Modules + self.features = nn.Sequential( + BasicConv2d(3, 32, kernel_size=3, stride=2), + BasicConv2d(32, 32, kernel_size=3, stride=1), + BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1), + Mixed_3a(), + Mixed_4a(), + Mixed_5a(), + Inception_A(), + Inception_A(), + Inception_A(), + Inception_A(), + Reduction_A(), # Mixed_6a + Inception_B(), + Inception_B(), + Inception_B(), + Inception_B(), + Inception_B(), + Inception_B(), + Inception_B(), + Reduction_B(), # Mixed_7a + Inception_C(), + Inception_C(), + Inception_C() + ) + self.last_linear = nn.Linear(1536, num_classes) + + def logits(self, features): + #Allows image of any size to be processed + adaptiveAvgPoolWidth = features.shape[2] + x = F.avg_pool2d(features, kernel_size=adaptiveAvgPoolWidth) + x = x.view(x.size(0), -1) + x = self.last_linear(x) + return x + + def forward(self, input): + x = self.features(input) + x = self.logits(x) + return x + + +def inceptionv4(num_classes=1000, pretrained='imagenet'): + if pretrained: + settings = pretrained_settings['inceptionv4'][pretrained] + assert num_classes == settings['num_classes'], \ + "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes) + + # both 'imagenet'&'imagenet+background' are loaded from same parameters + model = InceptionV4(num_classes=1001) + model.load_state_dict(model_zoo.load_url(settings['url'])) + + if pretrained == 'imagenet': + new_last_linear = nn.Linear(1536, 1000) + new_last_linear.weight.data = model.last_linear.weight.data[1:] + new_last_linear.bias.data = model.last_linear.bias.data[1:] + model.last_linear = new_last_linear + + model.input_space = settings['input_space'] + model.input_size = settings['input_size'] + model.input_range = settings['input_range'] + model.mean = settings['mean'] + model.std = settings['std'] + else: + model = InceptionV4(num_classes=num_classes) + return model + + +''' +TEST +Run this code with: +``` +cd $HOME/pretrained-models.pytorch +python -m pretrainedmodels.inceptionv4 +``` +''' +if __name__ == '__main__': + + assert inceptionv4(num_classes=10, pretrained=None) + print('success') + assert inceptionv4(num_classes=1000, pretrained='imagenet') + print('success') + assert inceptionv4(num_classes=1001, pretrained='imagenet+background') + print('success') + + # fail + assert inceptionv4(num_classes=1001, pretrained='imagenet') \ No newline at end of file diff --git a/dnn_models/karate_club_net.py b/dnn_models/karate_club_net.py new file mode 100644 index 0000000..9306664 --- /dev/null +++ b/dnn_models/karate_club_net.py @@ -0,0 +1,44 @@ +import numpy as np +import networkx as nx +import matplotlib.pyplot as plt +import torch.nn as nn +import dgl + + +def build_karate_club_graph(): + # All 78 edges are stored in two numpy arrays. One for source endpoints + # while the other for destination endpoints. + src = np.array([1, 2, 2, 3, 3, 3, 4, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 10, 10, + 10, 11, 12, 12, 13, 13, 13, 13, 16, 16, 17, 17, 19, 19, 21, 21, + 25, 25, 27, 27, 27, 28, 29, 29, 30, 30, 31, 31, 31, 31, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 33, 33]) + dst = np.array([0, 0, 1, 0, 1, 2, 0, 0, 0, 4, 5, 0, 1, 2, 3, 0, 2, 2, 0, 4, + 5, 0, 0, 3, 0, 1, 2, 3, 5, 6, 0, 1, 0, 1, 0, 1, 23, 24, 2, 23, + 24, 2, 23, 26, 1, 8, 0, 24, 25, 28, 2, 8, 14, 15, 18, 20, 22, 23, + 29, 30, 31, 8, 9, 13, 14, 15, 18, 19, 20, 22, 23, 26, 27, 28, 29, 30, + 31, 32]) + # Edges are directional in DGL; Make them bi-directional. + u = np.concatenate([src, dst]) + v = np.concatenate([dst, src]) + # Construct a DGLGraph + return dgl.graph((u, v)) + + +def assign_features(G): + embed = nn.Embedding(34, 5) + print(embed) + G.ndata['feat'] = embed.weight + + return G + + +if __name__ == "__main__": + G = build_karate_club_graph() + print("We have %d nodes." % G.number_of_nodes()) + print("We have %d egdes." % G.number_of_edges()) + nx_G = G.to_networkx().to_undirected() + pos = nx.kamada_kawai_layout(nx_G) + nx.draw(nx_G, pos, with_labels=True, node_color=[[.7, .8, .9]]) + plt.show() + diff --git a/dnn_models/mynet.py b/dnn_models/mynet.py new file mode 100644 index 0000000..920d64a --- /dev/null +++ b/dnn_models/mynet.py @@ -0,0 +1,34 @@ +import torch +import torch.nn as nn + +class MyNet(nn.Module): + + def __init__(self): + super(MyNet, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=8, stride=4, padding=2) + self.relu = nn.ReLU(inplace=True) + self.conv2 = nn.Conv2d(64, 192, kernel_size=4, stride=4, padding=2) + self.pool1 = nn.MaxPool2d(kernel_size=6, stride=3, padding=0) + + def forward(self, x): + x = self.conv1(x) + x = self.relu(x) + y1 = self.conv2(x) + y1 = self.relu(y1) + y2 = self.conv2(x) + y = y1 + y2 + # x = self.conv1(x) + # x = self.relu(x) + # x = self.conv2(x) + # x = self.pool1(x) + + return y + + +if __name__ == "__main__": + net = MyNet() + net.eval() + + input = torch.randn(1, 3, 224, 224) + output = net(input) + print(output) \ No newline at end of file diff --git a/dnn_models/resnet.py b/dnn_models/resnet.py new file mode 100644 index 0000000..b0fbd49 --- /dev/null +++ b/dnn_models/resnet.py @@ -0,0 +1,353 @@ +import torch +import torch.nn as nn + + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', + 'wide_resnet50_2', 'wide_resnet101_2'] + +PATH = "../models/resnet34-333f7ec4.pth" + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', + 'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth', + 'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth', + 'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth', + 'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=dilation, groups=groups, bias=False, dilation=dilation) + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, + base_width=64, dilation=1, norm_layer=None): + super(BasicBlock, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + if groups != 1 or base_width != 64: + raise ValueError('BasicBlock only supports groups=1 and base_width=64') + if dilation > 1: + raise NotImplementedError("Dilation > 1 not supported in BasicBlock") + # Both self.conv1 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) + # while original implementation places the stride at the first 1x1 convolution(self.conv1) + # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. + # This variant is also known as ResNet V1.5 and improves accuracy according to + # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. + + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, + base_width=64, dilation=1, norm_layer=None): + super(Bottleneck, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + width = int(planes * (base_width / 64.)) * groups + # Both self.conv2 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv1x1(inplanes, width) + self.bn1 = norm_layer(width) + self.conv2 = conv3x3(width, width, stride, groups, dilation) + self.bn2 = norm_layer(width) + self.conv3 = conv1x1(width, planes * self.expansion) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, + groups=1, width_per_group=64, replace_stride_with_dilation=None, + norm_layer=None): + super(ResNet, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + self._norm_layer = norm_layer + + self.inplanes = 64 + self.dilation = 1 + if replace_stride_with_dilation is None: + # each element in the tuple indicates if we should replace + # the 2x2 stride with a dilated convolution instead + replace_stride_with_dilation = [False, False, False] + if len(replace_stride_with_dilation) != 3: + raise ValueError("replace_stride_with_dilation should be None " + "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) + self.groups = groups + self.base_width = width_per_group + self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = norm_layer(self.inplanes) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2, + dilate=replace_stride_with_dilation[0]) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2, + dilate=replace_stride_with_dilation[1]) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2, + dilate=replace_stride_with_dilation[2]) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Zero-initialize the last BN in each residual branch, + # so that the residual branch starts with zeros, and each residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer(self, block, planes, blocks, stride=1, dilate=False): + norm_layer = self._norm_layer + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + norm_layer(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample, self.groups, + self.base_width, previous_dilation, norm_layer)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, groups=self.groups, + base_width=self.base_width, dilation=self.dilation, + norm_layer=norm_layer)) + + return nn.Sequential(*layers) + + def _forward_impl(self, x): + # See note [TorchScript super()] + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = torch.flatten(x, 1) + x = self.fc(x) + + return x + + def forward(self, x): + return self._forward_impl(x) + + +def _resnet(arch, block, layers, pretrained, progress, **kwargs): + model = ResNet(block, layers, **kwargs) + if pretrained: + state_dict = model.load_state_dict(torch.load(PATH), + progress=progress) + model.load_state_dict(state_dict) + return model + + +def resnet18(pretrained=False, progress=True, **kwargs): + r"""ResNet-18 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress, + **kwargs) + + +def resnet34(pretrained=False, progress=True, **kwargs): + r"""ResNet-34 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress, + **kwargs) + + +def resnet50(pretrained=False, progress=True, **kwargs): + r"""ResNet-50 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress, + **kwargs) + + +def resnet101(pretrained=False, progress=True, **kwargs): + r"""ResNet-101 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress, + **kwargs) + + +def resnet152(pretrained=False, progress=True, **kwargs): + r"""ResNet-152 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress, + **kwargs) + + +def resnext50_32x4d(pretrained=False, progress=True, **kwargs): + r"""ResNeXt-50 32x4d model from + `"Aggregated Residual Transformation for Deep Neural Networks" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['groups'] = 32 + kwargs['width_per_group'] = 4 + return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3], + pretrained, progress, **kwargs) + + +def resnext101_32x8d(pretrained=False, progress=True, **kwargs): + r"""ResNeXt-101 32x8d model from + `"Aggregated Residual Transformation for Deep Neural Networks" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['groups'] = 32 + kwargs['width_per_group'] = 8 + return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3], + pretrained, progress, **kwargs) + + +def wide_resnet50_2(pretrained=False, progress=True, **kwargs): + r"""Wide ResNet-50-2 model from + `"Wide Residual Networks" `_ + + The model is the same as ResNet except for the bottleneck number of channels + which is twice larger in every block. The number of channels in outer 1x1 + convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048 + channels, and in Wide ResNet-50-2 has 2048-1024-2048. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['width_per_group'] = 64 * 2 + return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3], + pretrained, progress, **kwargs) + + +def wide_resnet101_2(pretrained=False, progress=True, **kwargs): + r"""Wide ResNet-101-2 model from + `"Wide Residual Networks" `_ + + The model is the same as ResNet except for the bottleneck number of channels + which is twice larger in every block. The number of channels in outer 1x1 + convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048 + channels, and in Wide ResNet-50-2 has 2048-1024-2048. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['width_per_group'] = 64 * 2 + return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3], + pretrained, progress, **kwargs) \ No newline at end of file diff --git a/dnn_models/simple_gcn.py b/dnn_models/simple_gcn.py new file mode 100644 index 0000000..1bfa94b --- /dev/null +++ b/dnn_models/simple_gcn.py @@ -0,0 +1,17 @@ +import torch +import torch.nn as nn +from dgl.nn.pytorch import GraphConv + + +class GCN(nn.Module): + def __init__(self, in_feats, hidden_size, num_classes): + super(GCN, self).__init__() + self.conv1 = GraphConv(in_feats, hidden_size) + self.conv2 = GraphConv(hidden_size, num_classes) + + def forward(self, g, inputs): + h = self.conv1(g, inputs) + h = torch.relu(h) + h = self.conv2(g, h) + + return h \ No newline at end of file diff --git a/dnn_split/.DS_Store b/dnn_split/.DS_Store new file mode 100644 index 0000000..db6d5fb Binary files /dev/null and b/dnn_split/.DS_Store differ diff --git a/dnn_split/comm_util.py b/dnn_split/comm_util.py new file mode 100644 index 0000000..63b85fe --- /dev/null +++ b/dnn_split/comm_util.py @@ -0,0 +1,130 @@ +import os +import queue +import socket +import struct +import pickle +import threading + + +def send_model(model_path): + host = "127.0.0.1" + port = 50000 + + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.setblocking(1) + s.connect((host, port)) + + if os.path.isfile(model_path): + model_head = struct.pack('128sl', os.path.basename(model_path).encode('utf-8'), os.stat(model_path).st_size) + s.sendall(model_head) + f = open(model_path, 'rb') + print("file opened") + raw = f.read() + s.sendall(raw) + f.close() + else: + print("Wrong path.") + s.close() + # s.sendall(model_name.encode('utf-8')) + # s.sendall(os.stat(model_path+model_name).st_size.to_bytes(length=8, byteorder='big')) + + +def recv_model(model_dir, host, port): + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.setblocking(0) + s.bind((host, port)) + s.listen() + print("listening to connection") + while True: + conn, addr = s.accept() + print("connected by ", addr) + model_info_size = struct.calcsize('128sl') + buf = conn.recv(model_info_size) + if buf: + model_name, model_size = struct.unpack('128sl', buf) + fn = model_name.decode('utf-8').strip('\00') + new_model_path = os.path.join(model_dir + fn) + if model_size == 0: + continue + print("model_name:", fn) + f = open(new_model_path, 'wb') + while True: + data = conn.recv(model_size) + if not data: + break + f.write(data) + f.close() + else: + continue + s.close() + # model_name = conn.recv(1024).decode('utf-8') + # model_size = int.from_bytes(conn.recv(8), byteorder='big') + + +def send_data(data, host, port): + # host = "127.0.0.1" + # port = 50001 + + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.setblocking(1) + s.connect((host, port)) + data_obj = pickle.dumps(data) + s.sendall(len(data_obj).to_bytes(length=8, byteorder='big')) + s.sendall(data_obj) + s.close() + # s.sendall(model_name.encode('utf-8')) + # s.sendall(os.stat(model_path+model_name).st_size.to_bytes(length=8, byteorder='big')) + + +def producer(conn, q): + size = int.from_bytes(conn.recv(8), byteorder='big') + data_obj = conn.recv(size) + data = pickle.loads(data_obj) + conn.close() + q.put(item=data, block=False, timeout=10) + print("I put it into the queue: ", list(q.queue)) + + +def recv_data(q): + host = "127.0.0.1" + port = 50001 + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.setblocking(1) + s.bind((host, port)) + s.listen() + print("listening to connection") + while True: + conn, addr = s.accept() + print("connected by ", addr) + producer_thread = threading.Thread(target=producer, args=(conn, q)) + producer_thread.start() + + +def recv_data_once(): + host = "10.5.27.51" + port = 50002 + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.bind((host, port)) + s.listen() + conn, addr = s.accept() + print("connected by ", addr) + size = int.from_bytes(conn.recv(8), byteorder='big') + data_obj = conn.recv(size) + data = pickle.loads(data_obj) + return data + + +if __name__ == '__main__': + q = queue.Queue(1000) + recv_thread = threading.Thread(target=recv_data, args=(q)) + recv_thread.start() + while True: + try: + value = q.get(block=True, timeout=5) + print("the value is: ", value) + except queue.Empty: + print("empty queue") + # recv_model("../data/models/", "127.0.0.1", 50000) + + + diff --git a/dnn_split/ftp_util.py b/dnn_split/ftp_util.py new file mode 100644 index 0000000..64185e3 --- /dev/null +++ b/dnn_split/ftp_util.py @@ -0,0 +1,151 @@ +from dnn_split.model_util import * +import torch +import torch.nn as nn +import numpy as np +from dataclasses import dataclass + +@dataclass +class TileRegion: + """ + define the coordination of a feature map + (top_left_x, top_left_y) represents the top left coordination + (bottom_right_x, bottom_right_y) represents the bottom right coordination + """ + top_left_x: int + top_left_y: int + bottom_right_x: int + bottom_right_y: int + +@dataclass +class NetPara: + """ + define the net para of each layer + (stride, kernel_size, padding): filter para of each layer + type: convolution or pooling + input_width: width of the input feature maps of each layer + input_height: height of the input feature maps of each layer + """ + stride: int + kernel_size: int + padding: int + type: str + input_width: int + input_height: int + +@dataclass +class FtpPara: + """ + define the para for FTP algorithm + partitions_w: the number of slices divided from width + partitions_h: the number of slices divided from height + fused_layers: the number of layers that need to be partitioned by FTP algo + task_id: id for each partition + input_tiles: TileRegion info of each partition of the input feature maps + output_tiles: TileRegion info of each partition of the output feature maps + """ + partitions_w: int + partitions_h: int + fused_layers: int + task_id: int + input_tiles: TileRegion + output_tiles: TileRegion + +class ModelInterpreter(nn.Module): + """ Interpret the model layer by layer + Retrieve the parameters of each layer (convolution or pooling) for the DNN model, including: + 1. feature map size + 2. kernel size + 3. stride + 4. padding + 5. layer type: convolution or pooling + """ + def __init__(self, model): + super(ModelInterpreter, self).__init__() + self.layers = get_all_layers(model) # get each layer of the DNN model + self.x_train = nn.ModuleList(self.layers) + + def forward(self, x): + x_size = [] + x_kernel_size = [] + x_stride = [] + x_padding = [] + x_type = [] + + for i in range(len(self.layers)): # loop over all the layers + # forward layer by layer + x = self.layers[i](x) + + # add flatten after AvgPool + if isinstance(self.layers[i], nn.AdaptiveAvgPool2d): + x = torch.flatten(x, 1) + + # get kernel size of the current layer + if hasattr(self.layers[i], 'kernel_size'): + x_kernel_size.append(self.layers[i].kernel_size) + else: + continue + + # get stride of the current layer + if hasattr(self.layers[i], 'stride'): + x_stride.append(self.layers[i].stride) + else: + continue + + # get padding of the current layer + if hasattr(self.layers[i], 'padding'): + x_padding.append(self.layers[i].padding) + else: + continue + + # get layer type of the current layer + if isinstance(self.layers[i], nn.Conv2d): + x_type.append("convolution") + elif isinstance(self.layers[i], nn.MaxPool2d) or isinstance(self.layers[i], nn.MinPool2d): + x_type.append("pooling") + else: + continue + + # get output feature map size of all dimensions except the batch dimension (channel, width, height) + x_size.append(x.size()[1:]) + + return x_size, x_kernel_size, x_stride, x_padding, x_type + + +def load_dnn_model(input_size, layer_size, layer_kernel_size, layer_stride, layer_padding, layer_type): + """ load dnn model and retrieve relevant parameters + Args: + input_size: the input image size (channel, height, width) + layer_size: output feature map size of each layer (convolution or pooling layer) + layer_kernel_size: kernel size para of each layer (convolution or pooling layer) + layer_stride: stride para of each layer (convolution or pooling layer) + layer_padding: padding para of each layer (convolution or pooling layer) + layer_type: type of each layer (convolution or pooling layer) + + Returns: + net_para: necessary parameters of each layer for subsequent FTP calculation, including: + 1. stride + 2. kernel size + 3. padding + 4. type + 5. input_width + 6. output_width + """ + net_para = [[0] for _ in range(len(layer_size))] + input_width = [[0] for _ in range(len(layer_size))] + input_height = [[0] for _ in range(len(layer_size))] + for i in range(len(layer_size)): + # assign the relevant para of the input maps for each layer + if i == 0: + input_width[i] = input_size[2] + input_height[i] = input_size[1] + else: + input_width[i] = layer_size[i-1][2] + input_height[i] = layer_size[i-1][1] + + # calculate the net_para + if np.array(layer_stride[i]).size == 2: + net_para[i] = NetPara(layer_stride[i][0], layer_kernel_size[i][0], layer_padding[i][0], layer_type[i], input_width[i], input_height[i]) + else: + net_para[i] = NetPara(layer_stride[i], layer_kernel_size[i], layer_padding[i], layer_type[i], input_width[i], input_height[i]) + + return net_para diff --git a/dnn_split/fused_tile_patition.py b/dnn_split/fused_tile_patition.py new file mode 100644 index 0000000..6ef10b9 --- /dev/null +++ b/dnn_split/fused_tile_patition.py @@ -0,0 +1,116 @@ +from dnn_split.ftp_util import * +import numpy as np + +def grid(output_width, output_height, ftp_para, partition_w, partition_h): + """calculate the coordination of each partition for the bottom layer + Args: + output_width: width of the output feature map of the bottom layer + output_height: height of the output feature map of the bottom layer + ftp_para: initialized ftp para of the bottom layer updated + partition_w: the number of slices divided from width + partition_h: the number of slices divided from height + + Returns: + ftp_para: ftp para of the bottom layer updated for FTP algorithm + """ + w = output_width + h = output_height + stride_w = np.ceil(w/partition_w) + stride_h = np.ceil(h/partition_h) + start_h = 0 + end_h = stride_h + + for i in range(partition_h): + start_w = 0 + end_w = stride_w + if i != 0: + start_h = start_h + stride_h + end_h = end_h + stride_h + for j in range(partition_w): + task_id = ftp_para.task_id[i][j] + ftp_para.output_tiles[task_id][ftp_para.fused_layers-1].top_left_x = start_w + ftp_para.output_tiles[task_id][ftp_para.fused_layers-1].bottom_right_x = end_w + ftp_para.output_tiles[task_id][ftp_para.fused_layers-1].top_left_y = start_h + ftp_para.output_tiles[task_id][ftp_para.fused_layers-1].bottom_right_y = end_h + start_w = end_w + if j == partition_w - 1: + end_w = w + else: + end_w = end_w + stride_w + + return ftp_para + +def tranversal(net_para, output): + """calculate the coordination of the partitioned tile for current layer + Args: + net_para: net para of the current layer + output: TileRegion info of the output partitioned tile for current layer + + Returns: + input: TileRegion info of the input partitioned tile for current layer + """ + input = TileRegion(0, 0, 0, 0) + stride = net_para.stride + kernel_size = net_para.kernel_size + padding = net_para.padding + input_w = net_para.input_width + input_h = net_para.input_height + + # calculate the coordination of the input partitioned tiles for current layer + if net_para.type == "convolution" or net_para.type == "pooling": + input.top_left_x = output.top_left_x * stride + input.top_left_y = output.top_left_y * stride + input.bottom_right_x = (output.bottom_right_x - 1) * stride + kernel_size + input.bottom_right_y = (output.bottom_right_y - 1) * stride + kernel_size + + # update the coordination of the input partitioned tile considering different situations with padding effect + if input.bottom_right_x == input_w + 2 * padding and input.bottom_right_y == input_h + 2 * padding: # the partitioned tile locates at the bottom right corner of the feature map + input.top_left_x = max(0, input.top_left_x - padding) + input.top_left_y = max(0, input.top_left_y - padding) + input.bottom_right_x = input.bottom_right_x - 2 * padding + input.bottom_right_y = input.bottom_right_y - 2 * padding + elif input.bottom_right_x == input_w + 2 * padding: # the partitioned tile locates at the right side of the feature map + input.top_left_x = max(0, input.top_left_x - padding) + input.top_left_y = max(0, input.top_left_y - padding) + input.bottom_right_x = input.bottom_right_x - 2 * padding + input.bottom_right_y = input.bottom_right_y - padding + elif input.bottom_right_y == input_h + 2 * padding: # the partitioned tile locates at the down side of the feature map + input.top_left_x = max(0, input.top_left_x - padding) + input.top_left_y = max(0, input.top_left_y - padding) + input.bottom_right_x = input.bottom_right_x - padding + input.bottom_right_y = input.bottom_right_y - 2 * padding + else: + input.top_left_x = max(0, input.top_left_x - padding) + input.top_left_y = max(0, input.top_left_y - padding) + input.bottom_right_x = max(0, input.bottom_right_x - padding) + input.bottom_right_y = max(0, input.bottom_right_y - padding) + + return input + + +def perform_ftp(net_para, ftp_para, output_width, output_height): + """perform FTP algorithm + Args: + net_para: net para of the DNN model + ftp_para: initialized para for FTP algorithm + output_width: width of the output feature map of the bottom layer + output_height: height of the output feature map of the bottom layer + + Returns: + ftp_para: updated ftp para, which gives the coordination of each partitioned tile for each layer + """ + id = 0 + for i in range(ftp_para.partitions_h): + for j in range(ftp_para.partitions_w): + ftp_para.task_id[i][j] = id + id += 1 + + grid(output_width, output_height, ftp_para, ftp_para.partitions_w, ftp_para.partitions_h) + for i in range(ftp_para.partitions_h): + for j in range(ftp_para.partitions_w): + for l in range(ftp_para.fused_layers-1, -1, -1): + ftp_para.input_tiles[ftp_para.task_id[i][j]][l] = tranversal(net_para[l], ftp_para.output_tiles[ftp_para.task_id[i][j]][l]) #derive the coordination from the bottom layer + if l > 0: + ftp_para.output_tiles[ftp_para.task_id[i][j]][l-1] = ftp_para.input_tiles[ftp_para.task_id[i][j]][l] # assign the input tiles of current layer as the output tiles of the previous layer + + return ftp_para diff --git a/dnn_split/horizontal_partition.py b/dnn_split/horizontal_partition.py new file mode 100644 index 0000000..e32430d --- /dev/null +++ b/dnn_split/horizontal_partition.py @@ -0,0 +1,286 @@ +import itertools +import networkx as nx +import matplotlib.pyplot as plt +from collections import OrderedDict + + +# class impl: +# def __init__(self, device, edge, cloud): +# self.device = device +# self.edge = edge +# self.cloud = cloud +# +# class trans: +# def __init__(self, d2e, e2c,d2c): +# self.d2e = d2e +# self.e2c = e2c +# self.d2c = d2c + +# def build_graph(): +# G = nx.DiGraph() +# node_list = list(range(11)) +# G.add_nodes_from(node_list) +# for i in range(10): +# G.add_edge(i, i+1) +# +# return G + +# def build_alex_graph(): +# G = nx.DiGraph() +# node_list = list(range(11)) +# G.add_nodes_from(node_list) +# for i in range(10): +# G.add_edge(i, i+1) +# +# G.add_node('input') +# G.add_node('output') +# G.add_edge('input', 0) +# G.add_edge(10, 'output') +# G.nodes[0]['attr'] = impl(0.0387906074523925, 0.00161535739898681, 0.000158524800837039) +# G.nodes[1]['attr'] = impl(0.00837891101837158, 0.000992012023925781, 0.000104918397590518) +# G.nodes[2]['attr'] = impl(0.0532735347747802, 0.000886416435241699, 0.000335372802615165) +# G.nodes[3]['attr'] = impl(0.00678062438964843, 0.000534486770629882, 0.0000306591999717056) +# G.nodes[4]['attr'] = impl(0.025732421875, 0.00070655345916748, 0.000196169601380825) +# G.nodes[5]['attr'] = impl(0.0373493671417236, 0.000864291191101074, 0.000201350398361682) +# G.nodes[6]['attr'] = impl(0.0266769647598266, 0.00054333209991455, 0.000142950402200222) +# G.nodes[7]['attr'] = impl(0.00391316413879394, 0.000162029266357421, 0.0000364096000790596) +# G.nodes[8]['attr'] = impl(0.0905773401260376, 0.00514111518859863, 0.000916819202899932) +# G.nodes[9]['attr'] = impl(0.05386643409729, 0.00220365524291992, 0.000424079996347427) +# G.nodes[10]['attr'] = impl(0.0138338804244995, 0.000534629821777343, 0.00014015680104494) +# +# d2e = 64.95 +# e2c = 31.53 +# d2c = 29.78 +# +# output_size_01 = 5.908203125 +# G.edges[(0, 1)]['attr'] = trans(output_size_01 / d2e, output_size_01 / e2c, output_size_01 / d2c) +# output_size_12 = 1.423828125 +# G.edges[(1, 2)]['attr'] = trans(output_size_12 / d2e, output_size_12 / e2c, output_size_12 / d2c) +# output_size_23 = 4.271484375 +# G.edges[(2, 3)]['attr'] = trans(output_size_23 / d2e, output_size_23 / e2c, output_size_23 / d2c) +# output_size_34 = 0.990234375 +# G.edges[(3, 4)]['attr'] = trans(output_size_34 / d2e, output_size_34 / e2c, output_size_34 / d2c) +# output_size_45 = 1.98046875 +# G.edges[(4, 5)]['attr'] = trans(output_size_45 / d2e, output_size_45 / e2c, output_size_45 / d2c) +# output_size_56 = 1.3203125 +# G.edges[(5, 6)]['attr'] = trans(output_size_56 / d2e, output_size_56 / e2c, output_size_56 / d2c) +# output_size_67 = 1.3203125 +# G.edges[(6, 7)]['attr'] = trans(output_size_67 / d2e, output_size_67 / e2c, output_size_67 / d2c) +# output_size_78 = 0.28125 +# G.edges[(7, 8)]['attr'] = trans(output_size_78 / d2e, output_size_78 / e2c, output_size_78 / d2c) +# output_size_89 = 0.125 +# G.edges[(8, 9)]['attr'] = trans(output_size_89 / d2e, output_size_89 / e2c, output_size_89 / d2c) +# output_size_910 = 0.125 +# G.edges[(9, 10)]['attr'] = trans(output_size_910 / d2e, output_size_910 / e2c, output_size_910 / d2c) +# +# G.nodes['input']['attr'] = 'device' +# input_size = 4.59375 +# G.edges[('input', 0)]['attr'] = trans(input_size / d2e, input_size / e2c, input_size/d2c) +# G.edges[(10, 'output')]['attr'] = trans(10000, 10000, 10000) +# +# return G + +def longest_path(G): + nodes = list(nx.topological_sort(G)) + source = nodes[0] + + def helper(node): + if node == source: + return 0 + preds = list(G.predecessors(node)) + dist = max([helper(i) + 1 for i in preds]) + return dist + + path_dict = OrderedDict() + for node in nodes: + path_dict[node] = helper(node) + + return path_dict + + +def get_layer(G): + path_dict = longest_path(G) + max_len = path_dict[max(path_dict, key=path_dict.get)] + layer_dict = OrderedDict() + for layer in range(max_len + 1): + layer_item = [] + for k, v in path_dict.items(): + if v == layer: + layer_item.append(k) + layer_dict[layer] = layer_item + + return layer_dict + + + +def assign_nodes_to_layers(G, layer_dict): + nodes = list(nx.topological_sort(G)) + source = nodes[0] + + def get_subset_input_sibling(node, v): + subset = set() + siblings = [] + pred_list = list(G.predecessors(node)) + for i in range(1, len(pred_list) + 1): + data = itertools.combinations(pred_list, i) + subset.add(tuple(data)) + + for j in v: + if j != node: + if tuple(G.predecessors(j)) in subset: + siblings.append(j) + + return siblings + + + # k: layer index, v: list of nodes which belongs to layer k + for k, v in layer_dict.items(): + print("Start partition in layer ", k) + for node in v: + # if G.nodes[node].get('location') == 'None': + pred_list = list(G.predecessors(node)) + pred_location = [] + + for pred in pred_list: + pred_location.append(G.nodes[pred].get('location')) + + if 'cloud' in pred_location: + last_location = 'cloud' + elif 'edge' in pred_location: + last_location = 'edge' + else: + last_location = 'device' + + time_device = 0 + time_edge = 0 + time_cloud = 0 + print('the pred location list is', pred_location) + print('the last location is', last_location) + if last_location == 'device': + + # put node on device + print('pred is', pred) + print('node is ', node) + time_device = 0 + G.nodes[node].get('attr').device + # put node on edge + for pred in pred_list: + print('edge trans', G.edges[(pred, node)].get('attr').d2e) + time_edge = time_edge + G.edges[(pred, node)].get('attr').d2e + G.nodes[node].get('attr').edge + # put node on cloud + for pred in pred_list: + print('cloud trans', G.edges[(pred, node)].get('attr').d2c) + time_cloud = time_cloud + G.edges[(pred, node)].get('attr').d2c + G.nodes[node].get('attr').cloud + + time_list = list([time_device, time_edge, time_cloud]) + print(time_list) + time_min = min(time_list) + + if time_min == time_device: + node_location = 'device' + elif time_min == time_edge: + node_location = 'edge' + else: + node_location = 'cloud' + + elif last_location == 'edge': + # put node on edge + for pred in pred_list: + if G.nodes[pred].get('location') == 'device': + time_edge = time_edge + G.edges[(pred, node)].get('attr').d2e + G.nodes[node].get('attr').edge + time_cloud = time_cloud + G.edges[(pred, node)].get('attr').d2c + G.nodes[node].get('attr').cloud + else: + time_edge = time_edge + 0 + G.nodes[node].get('attr').edge + time_cloud = time_cloud + G.edges[(pred, node)].get('attr').e2c + G.nodes[node].get('attr').cloud + + time_list = list([time_edge, time_cloud]) + time_min = min(time_list) + + if time_min == time_edge: + node_location = 'edge' + else: + node_location = 'cloud' + else: + # for pred in pred_list: + # if G.nodes[pred].get('location') == 'device': + # time_cloud = time_cloud + G.edges[(pred, node)].get('attr').d2c + G.nodes[node].get('attr').cloud + # elif G.nodes[pred].get('location') == 'edge': + # time_cloud = time_cloud + G.edges[(pred, node)].get('attr').e2c + G.nodes[node].get('attr').cloud + # else: + # time_cloud = time_cloud + 0 + G.nodes[node].get('attr').cloud + node_location = 'cloud' + + G.nodes[node]['location'] = node_location + + # update subset siblings + location_dict = {'device':0, 'edge':1, 'cloud':2} + siblings = get_subset_input_sibling(node, v) + for sibling in siblings: + if G.nodes[sibling].get('location') == None: + G.nodes[sibling]['location'] = node_location + else: + if location_dict[G.nodes[sibling].get('location')] < location_dict[node_location]: + G.nodes[sibling]['location'] = node_location + + return G + + +if __name__ == '__main__': + None + # G = build_graph() + # + # layer_dict = get_layer(G) + # print(layer_dict) + # + # G.add_node('input') + # G.add_node('output') + # G.add_edge('input', 0) + # G.add_edge(10, 'output') + # G.nodes[0]['attr'] = impl(0.03941894, 0.00148439, 0.000245695993) + # G.nodes[1]['attr'] = impl(0.00977516, 0.00098157, 0.0000405439995) + # G.nodes[2]['attr'] = impl(0.05405164, 0.00077534, 0.000319615990) + # G.nodes[3]['attr'] = impl(0.00755548, 0.00052238, 0.0000353920013) + # G.nodes[4]['attr'] = impl(0.02662373, 0.00069451, 0.000292640001) + # G.nodes[5]['attr'] = impl(0.03826237, 0.00082541, 0.000202368006) + # G.nodes[6]['attr'] = impl(0.02726197, 0.00062299, 0.000149023995) + # G.nodes[7]['attr'] = impl(0.00411129, 0.00015831, 0.0000356799997) + # G.nodes[8]['attr'] = impl(0.09115124, 0.00550461, 0.000915455997) + # G.nodes[9]['attr'] = impl(0.05428672, 0.00237727, 0.000427552015) + # G.nodes[10]['attr'] = impl(0.01386261, 0.00059962, 0.000106495999) + # + # output_size_01 = 64 * 55 * 55 * 4 / (1024 * 1024) + # G.edges[(0, 1)]['attr'] = trans(output_size_01/10, output_size_01/8, output_size_01/7) + # output_size_12 = 64 * 27 * 27 * 4 / (1024 * 1024) + # G.edges[(1, 2)]['attr'] = trans(output_size_12/10, output_size_12/8, output_size_12/7) + # output_size_23 = 192 * 27 * 27 * 4 / (1024 * 1024) + # G.edges[(2, 3)]['attr'] = trans(output_size_23/10, output_size_23/8, output_size_23/7) + # output_size_34 = 192 * 13 * 13 * 4 / (1024 * 1024) + # G.edges[(3, 4)]['attr'] = trans(output_size_34/10, output_size_34/8, output_size_34/7) + # output_size_45 = 384 * 13 * 13 * 4 / (1024 * 1024) + # G.edges[(4, 5)]['attr'] = trans(output_size_45 / 10, output_size_45 /8, output_size_45/7) + # output_size_56 = 256 * 13 * 13 * 4 / (1024 * 1024) + # G.edges[(5, 6)]['attr'] = trans(output_size_56 / 10, output_size_56 /8, output_size_56/7) + # output_size_67 = 256 * 13 * 13 * 4 / (1024 * 1024) + # G.edges[(6, 7)]['attr'] = trans(output_size_67 / 10, output_size_67 / 8, output_size_67/7) + # output_size_78 = 9216 * 1 * 1 * 4 / (1024 * 1024) + # G.edges[(7, 8)]['attr'] = trans(output_size_78 / 10, output_size_78 /8, output_size_78/7) + # output_size_89 = 4096 * 1 * 1 * 4 / (1024 * 1024) + # G.edges[(8, 9)]['attr'] = trans(output_size_89 / 10, output_size_89 / 8, output_size_89/7) + # output_size_910 = 4096 * 1 * 1 * 4 / (1024 * 1024) + # G.edges[(9, 10)]['attr'] = trans(output_size_910 / 10, output_size_910 / 8, output_size_910/7) + # + # G.nodes['input']['attr'] = 'device' + # input_size = 3 * 224 * 224 * 4 / (1024 * 1024) + # G.edges[('input', 0)]['attr'] = trans(input_size / 6, input_size / 3, input_size) + # G.edges[(10, 'output')]['attr'] = trans(100, 100, 100) + # print(G.edges(data=True)) + # + # G = assign_nodes_to_layers(G, layer_dict) + # for node in G.nodes: + # print('Node %s is at %s' % (str(node), G.nodes[node].get('location'))) + # + # pos = nx.spring_layout(G) + # labels = nx.get_node_attributes(G, 'location') + # nx.draw_networkx_nodes(G, pos=pos) + # nx.draw_networkx_labels(G, pos=pos, labels=labels) + # nx.draw_networkx_edges(G, pos=pos, arrows=True) + # plt.show() \ No newline at end of file diff --git a/dnn_split/horizontal_partition_test.py b/dnn_split/horizontal_partition_test.py new file mode 100644 index 0000000..e3e0e5d --- /dev/null +++ b/dnn_split/horizontal_partition_test.py @@ -0,0 +1,321 @@ +import itertools +import networkx as nx +import matplotlib.pyplot as plt +from collections import OrderedDict +from dnn_split.horizontal_partition import * + +class impl: + def __init__(self, device, edge, cloud): + self.device = device + self.edge = edge + self.cloud = cloud + +class trans: + def __init__(self, d2e, e2c,d2c): + self.d2e = d2e + self.e2c = e2c + self.d2c = d2c + +def build_alex_graph(d2e, e2c, d2c): + G = nx.DiGraph() + node_list = list(range(11)) + G.add_nodes_from(node_list) + for i in range(10): + G.add_edge(i, i+1) + + layer_dict = get_layer(G) + + G.add_node('input') + G.add_node('output') + G.add_edge('input', 0) + G.add_edge(10, 'output') + G.nodes['input']['attr'] = impl(0, 0, 0) + G.nodes[0]['attr'] = impl(0.0387906074523925, 0.00161535739898681, 0.000158524800837039) + G.nodes[1]['attr'] = impl(0.00837891101837158, 0.000992012023925781, 0.000104918397590518) + G.nodes[2]['attr'] = impl(0.0532735347747802, 0.000886416435241699, 0.000335372802615165) + G.nodes[3]['attr'] = impl(0.00678062438964843, 0.000534486770629882, 0.0000306591999717056) + G.nodes[4]['attr'] = impl(0.025732421875, 0.00070655345916748, 0.000196169601380825) + G.nodes[5]['attr'] = impl(0.0373493671417236, 0.000864291191101074, 0.000201350398361682) + G.nodes[6]['attr'] = impl(0.0266769647598266, 0.00054333209991455, 0.000142950402200222) + G.nodes[7]['attr'] = impl(0.00391316413879394, 0.000162029266357421, 0.0000364096000790596) + G.nodes[8]['attr'] = impl(0.0905773401260376, 0.00514111518859863, 0.000916819202899932) + G.nodes[9]['attr'] = impl(0.05386643409729, 0.00220365524291992, 0.000424079996347427) + G.nodes[10]['attr'] = impl(0.0138338804244995, 0.000534629821777343, 0.00014015680104494) + G.nodes['output']['attr'] = impl(0, 0, 0) + + d2e = d2c + e2c = e2c + d2c = d2c + + output_size_01 = 5.908203125 + G.edges[(0, 1)]['attr'] = trans(output_size_01 / d2e, output_size_01 / e2c, output_size_01 / d2c) + output_size_12 = 1.423828125 + G.edges[(1, 2)]['attr'] = trans(output_size_12 / d2e, output_size_12 / e2c, output_size_12 / d2c) + output_size_23 = 4.271484375 + G.edges[(2, 3)]['attr'] = trans(output_size_23 / d2e, output_size_23 / e2c, output_size_23 / d2c) + output_size_34 = 0.990234375 + G.edges[(3, 4)]['attr'] = trans(output_size_34 / d2e, output_size_34 / e2c, output_size_34 / d2c) + output_size_45 = 1.98046875 + G.edges[(4, 5)]['attr'] = trans(output_size_45 / d2e, output_size_45 / e2c, output_size_45 / d2c) + output_size_56 = 1.3203125 + G.edges[(5, 6)]['attr'] = trans(output_size_56 / d2e, output_size_56 / e2c, output_size_56 / d2c) + output_size_67 = 1.3203125 + G.edges[(6, 7)]['attr'] = trans(output_size_67 / d2e, output_size_67 / e2c, output_size_67 / d2c) + output_size_78 = 0.28125 + G.edges[(7, 8)]['attr'] = trans(output_size_78 / d2e, output_size_78 / e2c, output_size_78 / d2c) + output_size_89 = 0.125 + G.edges[(8, 9)]['attr'] = trans(output_size_89 / d2e, output_size_89 / e2c, output_size_89 / d2c) + output_size_910 = 0.125 + G.edges[(9, 10)]['attr'] = trans(output_size_910 / d2e, output_size_910 / e2c, output_size_910 / d2c) + + G.nodes['input']['location'] = 'device' + input_size = 4.59375 + G.edges[('input', 0)]['attr'] = trans(input_size / d2e, input_size / e2c, input_size/d2c) + G.edges[(10, 'output')]['attr'] = trans(10000, 10000, 10000) + + return G, layer_dict + + +def build_vgg_graph(d2e, e2c, d2c): + G = nx.DiGraph() + node_list = list(range(21)) + G.add_nodes_from(node_list) + for i in range(20): + G.add_edge(i, i+1) + + layer_dict = get_layer(G) + + G.add_node('input') + G.add_node('output') + G.add_edge('input', 0) + G.add_edge(20, 'output') + G.nodes['input']['attr'] = impl(0, 0, 0) + G.nodes[0]['attr'] = impl(0.0609938383102417, 0.00338995456695556, 0.0000605106353759765) + G.nodes[1]['attr'] = impl(0.592135882377624, 0.00845353603363037, 0.0000553369522094726) + G.nodes[2]['attr'] = impl(0.0820929288864135, 0.00880284309387207, 0.0000228643417358398) + G.nodes[3]['attr'] = impl(0.226624870300292, 0.00373132228851318, 0.0000540971755981445) + G.nodes[4]['attr'] = impl(0.448662877082824, 0.00653448104858398, 0.0000515937805175781) + G.nodes[5]['attr'] = impl(0.0405383110046386, 0.00466926097869873, 0.0000332355499267578) + G.nodes[6]['attr'] = impl(0.176293754577636, 0.00315544605255126, 0.0000571727752685546) + G.nodes[7]['attr'] = impl(0.340849637985229, 0.00554869174957275, 0.0000574827194213867) + G.nodes[8]['attr'] = impl(0.334842944145202, 0.00576448440551757, 0.0000536680221557617) + G.nodes[9]['attr'] = impl(0.0198351860046386, 0.0023181676864624, 0.0000220775604248046) + G.nodes[10]['attr'] = impl(0.150825953483581, 0.0032393455505371, 0.0000629425048828125) + G.nodes[11]['attr'] = impl(0.295534491539001, 0.00640218257904052, 0.0000623226165771484) + G.nodes[12]['attr'] = impl(0.295758509635925, 0.00628831386566162, 0.0000530242919921875) + G.nodes[13]['attr'] = impl(0.0102149486541748, 0.00123481750488281, 0.0000325918197631835) + G.nodes[14]['attr'] = impl(0.0862767219543457, 0.00252645015716552, 0.0000523328781127929) + G.nodes[15]['attr'] = impl(0.0842627763748169, 0.00222053527832031, 0.0000505447387695312) + G.nodes[16]['attr'] = impl(0.0855239391326904, 0.00214335918426513, 0.0000607967376708984) + G.nodes[17]['attr'] = impl(0.00350527763366699, 0.000349688529968261, 0.0000220775604248046) + G.nodes[18]['attr'] = impl(0.330885338783264, 0.0138950824737548, 0.0000518321990966796) + G.nodes[19]['attr'] = impl(0.0538443565368652, 0.00220961570739746, 0.0000421762466430664) + G.nodes[20]['attr'] = impl(0.0138441324234008, 0.000549554824829101, 0.000037240982055664) + G.nodes['output']['attr'] = impl(0, 0, 0) + + d2e = d2e + e2c = e2c + d2c = d2c + + output_size_01 = 98 + G.edges[(0, 1)]['attr'] = trans(output_size_01 / d2e, output_size_01 / e2c, output_size_01 / d2c) + output_size_12 = 98 + G.edges[(1, 2)]['attr'] = trans(output_size_12 / d2e, output_size_12 / e2c, output_size_12 / d2c) + output_size_23 = 24.5 + G.edges[(2, 3)]['attr'] = trans(output_size_23 / d2e, output_size_23 / e2c, output_size_23 / d2c) + output_size_34 = 49 + G.edges[(3, 4)]['attr'] = trans(output_size_34 / d2e, output_size_34 / e2c, output_size_34 / d2c) + output_size_45 = 49 + G.edges[(4, 5)]['attr'] = trans(output_size_45 / d2e, output_size_45 / e2c, output_size_45 / d2c) + output_size_56 = 12.25 + G.edges[(5, 6)]['attr'] = trans(output_size_56 / d2e, output_size_56 / e2c, output_size_56 / d2c) + output_size_67 = 24.5 + G.edges[(6, 7)]['attr'] = trans(output_size_67 / d2e, output_size_67 / e2c, output_size_67 / d2c) + output_size_78 = 24.5 + G.edges[(7, 8)]['attr'] = trans(output_size_78 / d2e, output_size_78 / e2c, output_size_78 / d2c) + output_size_89 = 24.5 + G.edges[(8, 9)]['attr'] = trans(output_size_89 / d2e, output_size_89 / e2c, output_size_89 / d2c) + output_size_910 = 6.125 + G.edges[(9, 10)]['attr'] = trans(output_size_910 / d2e, output_size_910 / e2c, output_size_910 / d2c) + output_size_1011 = 12.25 + G.edges[(10, 11)]['attr'] = trans(output_size_1011 / d2e, output_size_1011 / e2c, output_size_1011 / d2c) + output_size_1112 = 12.25 + G.edges[(11, 12)]['attr'] = trans(output_size_1112 / d2e, output_size_1112 / e2c, output_size_1112 / d2c) + output_size_1213 = 12.25 + G.edges[(12, 13)]['attr'] = trans(output_size_1213 / d2e, output_size_1213 / e2c, output_size_1213 / d2c) + output_size_1314 = 3.0625 + G.edges[(13, 14)]['attr'] = trans(output_size_1314 / d2e, output_size_1314 / e2c, output_size_1314 / d2c) + output_size_1415 = 3.0625 + G.edges[(14, 15)]['attr'] = trans(output_size_1415 / d2e, output_size_1415 / e2c, output_size_1415 / d2c) + output_size_1516 = 3.0625 + G.edges[(15, 16)]['attr'] = trans(output_size_1516 / d2e, output_size_1516 / e2c, output_size_1516 / d2c) + output_size_1617 = 3.0625 + G.edges[(16, 17)]['attr'] = trans(output_size_1617 / d2e, output_size_1617 / e2c, output_size_1617 / d2c) + output_size_1718 = 0.765625 + G.edges[(17, 18)]['attr'] = trans(output_size_1718 / d2e, output_size_1718 / e2c, output_size_1718 / d2c) + output_size_1819 = 0.125 + G.edges[(18, 19)]['attr'] = trans(output_size_1819 / d2e, output_size_1819 / e2c, output_size_1819 / d2c) + output_size_1920 = 0.125 + G.edges[(19, 20)]['attr'] = trans(output_size_1920 / d2e, output_size_1920 / e2c, output_size_1920 / d2c) + + G.nodes['input']['location'] = 'device' + input_size = 4.59375 + G.edges[('input', 0)]['attr'] = trans(input_size / d2e, input_size / e2c, input_size/d2c) + G.edges[(20, 'output')]['attr'] = trans(10000, 10000, 10000) + + return G, layer_dict + + +def build_inception_graph(d2e, e2c, d2c): + G = nx.DiGraph() + node_list = list(range(24)) + G.add_nodes_from(node_list) + for i in range(23): + G.add_edge(i, i+1) + + layer_dict = get_layer(G) + + G.add_node('input') + G.add_node('output') + G.add_edge('input', 0) + G.add_edge(23, 'output') + G.nodes['input']['attr'] = impl(0, 0, 0) + G.nodes[0]['attr'] = impl(0.01985724, 0.000497532, 0.000126123) + G.nodes[1]['attr'] = impl(0.042263031, 0.000740719, 0.000103807) + G.nodes[2]['attr'] = impl(0.073670936, 0.001400018, 9.36E-05) + G.nodes[3]['attr'] = impl(0.078483748, 0.004977775, 0.000189853) + G.nodes[4]['attr'] = impl(0.206167006, 0.003538108, 0.002597237) + G.nodes[5]['attr'] = impl(0.067958188, 0.003599906, 0.000158358) + G.nodes[6]['attr'] = impl(0.124842119, 0.002338004, 0.005271482) + G.nodes[7]['attr'] = impl(0.138248348, 0.002236056, 0.003134561) + G.nodes[8]['attr'] = impl(0.13957026, 0.002309346, 0.006592107) + G.nodes[9]['attr'] = impl(0.138262939, 0.002335835, 0.003136873) + G.nodes[10]['attr'] = impl(0.177802181, 0.004854488, 0.003598857) + G.nodes[11]['attr'] = impl(0.247489667, 0.003648567, 0.009092951) + G.nodes[12]['attr'] = impl(0.273446012, 0.003607512, 0.009114122) + G.nodes[13]['attr'] = impl(0.267560506, 0.003571081, 0.008089685) + G.nodes[14]['attr'] = impl(0.218984056, 0.003588629, 0.001395845) + G.nodes[15]['attr'] = impl(0.300414777, 0.003548193, 0.001451564) + G.nodes[16]['attr'] = impl(0.320718908, 0.003593445, 0.001513076) + G.nodes[17]['attr'] = impl(0.237069464, 0.003683615, 0.001054978) + G.nodes[18]['attr'] = impl(0.108347154, 0.002796745, 0.000617337) + G.nodes[19]['attr'] = impl(0.087156343, 0.002951241, 0.001044345) + G.nodes[20]['attr'] = impl(0.085878754, 0.002769732, 0.003273726) + G.nodes[21]['attr'] = impl(0.083295107, 0.002769542, 0.005679941) + G.nodes[22]['attr'] = impl(0.003227234, 1.73E-05, 1.52E-05) + G.nodes[23]['attr'] = impl(0.038788509, 0.006604266, 0.006083632) + G.nodes['output']['attr'] = impl(0, 0, 0) + + d2e = d2e + e2c = e2c + d2c = d2c + + output_size_01 = 12.03222656 + G.edges[(0, 1)]['attr'] = trans(output_size_01 / d2e, output_size_01 / e2c, output_size_01 / d2c) + output_size_12 = 11.60253906 + G.edges[(1, 2)]['attr'] = trans(output_size_12 / d2e, output_size_12 / e2c, output_size_12 / d2c) + output_size_23 = 23.20507813 + G.edges[(2, 3)]['attr'] = trans(output_size_23 / d2e, output_size_23 / e2c, output_size_23 / d2c) + output_size_34 = 14.23828125 + G.edges[(3, 4)]['attr'] = trans(output_size_34 / d2e, output_size_34 / e2c, output_size_34 / d2c) + output_size_45 = 15.84375 + G.edges[(4, 5)]['attr'] = trans(output_size_45 / d2e, output_size_45 / e2c, output_size_45 / d2c) + output_size_56 = 7.32421875 + G.edges[(5, 6)]['attr'] = trans(output_size_56 / d2e, output_size_56 / e2c, output_size_56 / d2c) + output_size_67 = 7.32421875 + G.edges[(6, 7)]['attr'] = trans(output_size_67 / d2e, output_size_67 / e2c, output_size_67 / d2c) + output_size_78 = 7.32421875 + G.edges[(7, 8)]['attr'] = trans(output_size_78 / d2e, output_size_78 / e2c, output_size_78 / d2c) + output_size_89 = 7.32421875 + G.edges[(8, 9)]['attr'] = trans(output_size_89 / d2e, output_size_89 / e2c, output_size_89 / d2c) + output_size_910 = 7.32421875 + G.edges[(9, 10)]['attr'] = trans(output_size_910 / d2e, output_size_910 / e2c, output_size_910 / d2c) + output_size_1011 = 4.5 + G.edges[(10, 11)]['attr'] = trans(output_size_1011 / d2e, output_size_1011 / e2c, output_size_1011 / d2c) + output_size_1112 = 4.5 + G.edges[(11, 12)]['attr'] = trans(output_size_1112 / d2e, output_size_1112 / e2c, output_size_1112 / d2c) + output_size_1213 = 4.5 + G.edges[(12, 13)]['attr'] = trans(output_size_1213 / d2e, output_size_1213 / e2c, output_size_1213 / d2c) + output_size_1314 = 4.5 + G.edges[(13, 14)]['attr'] = trans(output_size_1314 / d2e, output_size_1314 / e2c, output_size_1314 / d2c) + output_size_1415 = 4.5 + G.edges[(14, 15)]['attr'] = trans(output_size_1415 / d2e, output_size_1415 / e2c, output_size_1415 / d2c) + output_size_1516 = 4.5 + G.edges[(15, 16)]['attr'] = trans(output_size_1516 / d2e, output_size_1516 / e2c, output_size_1516 / d2c) + output_size_1617 = 4.5 + G.edges[(16, 17)]['attr'] = trans(output_size_1617 / d2e, output_size_1617 / e2c, output_size_1617 / d2c) + output_size_1718 = 4.5 + G.edges[(17, 18)]['attr'] = trans(output_size_1718 / d2e, output_size_1718 / e2c, output_size_1718 / d2c) + output_size_1819 = 1.171875 + G.edges[(18, 19)]['attr'] = trans(output_size_1819 / d2e, output_size_1819 / e2c, output_size_1819 / d2c) + output_size_1920 = 1.171875 + G.edges[(19, 20)]['attr'] = trans(output_size_1920 / d2e, output_size_1920 / e2c, output_size_1920 / d2c) + output_size_2021 = 1.171875 + G.edges[(20, 21)]['attr'] = trans(output_size_2021 / d2e, output_size_2021 / e2c, output_size_2021 / d2c) + output_size_2122 = 1.171875 + G.edges[(21, 22)]['attr'] = trans(output_size_2122 / d2e, output_size_2122 / e2c, output_size_2122 / d2c) + output_size_2223 = 0.046875 + G.edges[(22, 23)]['attr'] = trans(output_size_2223 / d2e, output_size_2223 / e2c, output_size_2223 / d2c) + + G.nodes['input']['location'] = 'device' + input_size = 4.59375 + G.edges[('input', 0)]['attr'] = trans(input_size / d2e, input_size / e2c, input_size/d2c) + G.edges[(23, 'output')]['attr'] = trans(10000, 10000, 10000) + + return G, layer_dict + +def calc_latency(G_assigned, d2e, e2c, d2c): + pred = 0 + latency = 0 + for node in G_assigned.nodes: + cur = G_assigned.nodes[node].get('location') + if cur != pred: + if pred == 'device' and cur == 'edge': + latency = latency + G_assigned.edges[(pred, node)].get('attr').d2e / d2e + if pred == 'edge' and cur == 'cloud': + latency = latency + G_assigned.edges[(pred, node)].get('attr').e2c / e2c + if pred == 'device' and cur == 'cloud': + latency = latency + G_assigned.edges[(pred, node)].get('attr').d2c / d2c + + if cur == 'device': + latency = latency + G_assigned.nodes[node].get('attr').device + if cur == 'edge': + latency = latency + G_assigned.nodes[node].get('attr').edge + if cur == 'cloud': + latency = latency + G_assigned.nodes[node].get('attr').cloud + + return latency + +if __name__ == '__main__': + d2e=64.95 + e2c=31.53 + d2c=29.78 + alex_G, alex_layer_dict = build_alex_graph(d2e, e2c, d2c) + print(alex_layer_dict) + # print(Alex_G.nodes[0]['attr'].device) + # print(alex_G.nodes(data=True)) + Alex_G_assigned = assign_nodes_to_layers(alex_G, alex_layer_dict) + for node in Alex_G_assigned.nodes: + print('Node %s is at %s' % (str(node), Alex_G_assigned.nodes[node].get('location'))) + print('Latency is ', calc_latency(Alex_G_assigned, d2e, e2c, d2c)) + print('------------------------------------') + + vgg_G, vgg_layer_dict = build_vgg_graph(d2e, e2c, d2c) + print(vgg_layer_dict) + # print(Alex_G.nodes[0]['attr'].device) + # print(alex_G.nodes(data=True)) + VGG_G_assigned = assign_nodes_to_layers(vgg_G, vgg_layer_dict) + for node in VGG_G_assigned.nodes: + print('Node %s is at %s' % (str(node), VGG_G_assigned.nodes[node].get('location'))) + print('Latency is ', calc_latency(VGG_G_assigned, d2e, e2c, d2c)) + print('------------------------------------') + + inception_G, inception_layer_dict = build_inception_graph(d2e, e2c, d2c) + print(inception_layer_dict) + # print(Alex_G.nodes[0]['attr'].device) + # print(alex_G.nodes(data=True)) + inception_G_assigned = assign_nodes_to_layers(inception_G, inception_layer_dict) + for node in inception_G_assigned.nodes: + print('Node %s is at %s' % (str(node), inception_G_assigned.nodes[node].get('location'))) + print('Latency is ', calc_latency(inception_G_assigned, d2e, e2c, d2c)) + print('------------------------------------') \ No newline at end of file diff --git a/dnn_split/min_cut.py b/dnn_split/min_cut.py new file mode 100644 index 0000000..26c374e --- /dev/null +++ b/dnn_split/min_cut.py @@ -0,0 +1,100 @@ +class Graph: + + def __init__(self, graph): + self.graph = graph # residual graph + self.org_graph = [i[:] for i in graph] + self.ROW = len(graph) + self.COL = len(graph[0]) + + '''Returns true if there is a path from source 's' to sink 't' in + residual graph. Also fills parent[] to store the path ''' + + def BFS(self, s, t, parent): + + # Mark all the vertices as not visited + visited = [False] * (self.ROW) + + # Create a queue for BFS + queue = [] + + # Mark the source node as visited and enqueue it + queue.append(s) + visited[s] = True + + # Standard BFS Loop + while queue: + + # Dequeue a vertex from queue and print it + u = queue.pop(0) + + # Get all adjacent vertices of the dequeued vertex u + # If a adjacent has not been visited, then mark it + # visited and enqueue it + for ind, val in enumerate(self.graph[u]): + if visited[ind] == False and val > 0: + queue.append(ind) + visited[ind] = True + parent[ind] = u + + # If we reached sink in BFS starting from source, then return + # true, else false + return True if visited[t] else False + + # Returns the min-cut of the given graph + def minCut(self, source, sink): + + # This array is filled by BFS and to store path + parent = [-1] * (self.ROW) + + min_cost = 0 # There is no flow initially + + # Augment the flow while there is path from source to sink + while self.BFS(source, sink, parent): + + # Find minimum residual capacity of the edges along the + # path filled by BFS. Or we can say find the maximum flow + # through the path found. + path_flow = float("Inf") + s = sink + while (s != source): + path_flow = min(path_flow, self.graph[parent[s]][s]) + s = parent[s] + + # Add path flow to overall flow + min_cost += path_flow + + # update residual capacities of the edges and reverse edges + # along the path + v = sink + while (v != source): + u = parent[v] + self.graph[u][v] -= path_flow + self.graph[v][u] += path_flow + v = parent[v] + + # print the edges which initially had weights + # but now have 0 weight + print("The cutting points are:") + for i in range(self.ROW): + for j in range(self.COL): + if self.graph[i][j] == 0 and self.org_graph[i][j] > 0: + print(str(i) + " - " + str(j)) + print("The minimum cost is:", min_cost) + # Create a graph given in the above diagram + +if __name__ == "__main__": + + graph = [[0, 6, 0, 3, 1, 6, 0], + [0, 0, 3, 0, 0, 0, 2], + [0, 0, 0, float("Inf"), float("Inf"), 0, 0], + [0, 0, 0, 0, 0, 10, 7], + [0, 0, 0, 0, 0, 12, 8], + [0, 0, 0, 0, 0, 0, 16], + [0, 0, 0, 0, 0, 0, 0]] + + g = Graph(graph) + + source = 0 + sink = 6 + + g.minCut(source, sink) \ No newline at end of file diff --git a/dnn_split/model_canyon.py b/dnn_split/model_canyon.py new file mode 100644 index 0000000..60db1f6 --- /dev/null +++ b/dnn_split/model_canyon.py @@ -0,0 +1,58 @@ +from dnn_split.model_util import * +import torch.nn.functional as F +import math + +MODEL_PATH = '../models/' + + +class ModelCanyon(nn.Module): + def __init__(self, model, start, end): + super(ModelCanyon, self).__init__() + layers = get_all_layers(model) + self.partialLayers = get_partial_layers(layers, start, end) + self.x_trains = nn.ModuleList(self.partialLayers) + + def forward(self, x): + for i in range(len(self.partialLayers)): + x = self.partialLayers[i](x) + if isinstance(self.partialLayers[i], nn.AdaptiveAvgPool2d): + x = torch.flatten(x, 1) + return x + +class ModelCanyonG(nn.Module): + def __init__(self, model, G): + super(ModelCanyonG, self).__init__() + self.layers = get_all_layers() + print(self.layers) + + def forward(self): + None + + +if __name__ == "__main__": + + # input = get_input() + # resnet34 = get_pretrained_resnet34() + # model_size = get_model_size(resnet34) + # print(model_size) + # model = ModelCanyon(model=resnet34, start=0, end=model_size-1) + # model.eval() + # output = model(input) + + startLayer = 0 + endLayer = 2 + pretrained_alexnet = get_pretrained_alexnet() + + path = MODEL_PATH + "partialmodel.pth" + updatedModel = ModelCanyon(model=pretrained_alexnet, start=startLayer, end=endLayer) + if isinstance(updatedModel.partialLayers[0], nn.Conv2d): + print(updatedModel.partialLayers[0]) + print(updatedModel.partialLayers) + torch.save(updatedModel, path) + + startLayer = 3 + endLayer = 20 + path2 = MODEL_PATH + "partialmodel2.pth" + updatedModel2 = ModelCanyon(model=pretrained_alexnet, start=startLayer, end=endLayer) + print(updatedModel2.partialLayers) + torch.save(updatedModel2, path2) diff --git a/dnn_split/model_ftp.py b/dnn_split/model_ftp.py new file mode 100644 index 0000000..5eedbbe --- /dev/null +++ b/dnn_split/model_ftp.py @@ -0,0 +1,163 @@ +from dnn_split.model_util import * +from dnn_split.ftp_util import * +import torch.nn.functional as F +import math + +MODEL_PATH = '../models/' + +class ModelFTP(nn.Module): + """ + calculate the inference result of the partitioned tile according to its coordinates and partial padding + """ + def __init__(self, model, start, end, coordinate, input_w, input_h): + """ + Args: + model: the DNN model + start: starting number of the layer that FTP begins from + end: ending number of the layer that FTP ends with + coordinate: coordinates of the partitioned tile of the start layer + input_w: width of the input feature map of the starting layer + input_h: height of the input feature map of the starting layer + """ + super(ModelFTP, self).__init__() + layers = get_all_layers(model) + self.partialLayers = get_partial_layers(layers, start, end) + self.x_trains = nn.ModuleList(self.partialLayers) + self.x1 = coordinate.top_left_x + self.y1 = coordinate.top_left_y + self.x2 = coordinate.bottom_right_x + self.y2 = coordinate.bottom_right_y + self.input_w = input_w + self.input_h = input_h + + def cal(self, x, padding): + """ + 1. update the top left coordinate of the partitioned tile according to padding info + 2. padding the partitioned tile according to its top left coordinate + """ + if len(padding) == 2: # conv layer + pad0 = padding[0] + pad1 = padding[1] + else: # pooling layer + pad0 = padding + pad1 = padding + + if self.x1 == 0: + self.x1 = 0 + x = F.pad(input=x, pad=[pad0, 0, 0, 0], mode='constant', value=0) + else: + self.x1 = self.x1 + pad0 + + if self.y1 == 0: + self.y1 = 0 + x = F.pad(input=x, pad=[0, 0, pad1, 0], mode='constant', value=0) + else: + self.y1 = self.y1 + pad1 + return x, self.x1, self.y1 + + def forward(self, x): + for i in range(len(self.partialLayers)): + if isinstance(self.partialLayers[i], nn.Conv2d): # conv layer + in_ch = self.partialLayers[i].in_channels + out_ch = self.partialLayers[i].out_channels + kernel = self.partialLayers[i].kernel_size + stride = self.partialLayers[i].stride + padding = self.partialLayers[i].padding + weight = self.partialLayers[i].weight + bias = self.partialLayers[i].bias + + if padding[0] != 0: + if self.x2 == self.input_w and self.y2 == self.input_h: # the bottom right coordinates locates at the bottom right corner + x, self.x1, self.y1 = self.cal(x, padding) + + self.x2 = self.x2 + 2 * padding[0] + self.y2 = self.y2 + 2 * padding[1] + x = F.pad(input=x, pad=[0, padding[0], 0, padding[1]], mode='constant', value=0) # padding the partitioned tile according to its right bottom coordinate + elif self.x2 == self.input_w: # the bottom right coordinates locates at the right side of the feature map + x, self.x1, self.y1 = self.cal(x, padding) + + self.x2 = self.x2 + 2 * padding[0] + self.y2 = self.y2 + padding[1] + x = F.pad(input=x, pad=[0, padding[0], 0, 0], mode='constant', value=0) # padding the partitioned tile according to its right bottom coordinate + elif self.y2 == self.input_h: # the bottom right coordinates locates at the down side of the feature map + x, self.x1, self.y1 = self.cal(x, padding) + + self.x2 = self.x2 + padding[0] + self.y2 = self.y2 + 2 * padding[1] + x = F.pad(input=x, pad=[0, 0, 0, padding[1]], mode='constant', value=0) # padding the partitioned tile according to its right bottom coordinate + else: # the bottom right coordinates locates at the middle of the feature map + x, self.x1, self.y1 = self.cal(x, padding) + + self.x2 = self.x2 + padding[0] + self.y2 = self.y2 + padding[1] + + # calculate the coordinate of next layer + self.x1 = math.floor(self.x1 / stride[0]) + self.y1 = math.floor(self.y1 / stride[1]) + + self.x2 = math.floor((self.x2 - kernel[0]) / stride[0] + 1) + self.y2 = math.floor((self.y2 - kernel[1]) / stride[1] + 1) + + # calculate the width and height of the input feature map of next layer + self.input_h = math.floor((self.input_h - kernel[1] + 2 * padding[1]) / stride[1] + 1) + self.input_w = math.floor((self.input_w - kernel[0] + 2 * padding[0]) / stride[0] + 1) + + # change padding of the current layer to 0 + self.partialLayers[i] = nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=kernel[0], + stride=stride[0], padding=0) + with torch.no_grad(): + self.partialLayers[i].weight = nn.Parameter(weight) + self.partialLayers[i].bias = nn.Parameter(bias) + + if isinstance(self.partialLayers[i], nn.MaxPool2d): # pooling layer + kernel = self.partialLayers[i].kernel_size + stride = self.partialLayers[i].stride + padding = self.partialLayers[i].padding + + if padding != 0: + if self.x2 == self.input_w and self.y2 == self.input_h: # the bottom right coordinates locates at the bottom right corner + x, self.x1, self.y1 = self.cal(x, padding) + + self.x2 = self.x2 + 2 * padding + self.y2 = self.y2 + 2 * padding + x = F.pad(input=x, pad=[0, padding, 0, padding], mode='constant', value=0) # padding the partitioned tile according to its right bottom coordinate + elif self.x2 == self.input_w: # the bottom right coordinates locates at the right side of the feature map + x, self.x1, self.y1 = self.cal(x, padding) + + self.x2 = self.x2 + 2 * padding + self.y2 = self.y2 + padding + x = F.pad(input=x, pad=[0, padding, 0, 0], mode='constant', value=0) # padding the partitioned tile according to its right bottom coordinate + elif self.y2 == self.input_h: # the bottom right coordinates locates at the down side of the feature map + x, self.x1, self.y1 = self.cal(x, padding) + + self.x2 = self.x2 + padding + self.y2 = self.y2 + 2 * padding + x = F.pad(input=x, pad=[0, 0, 0, padding], mode='constant', value=0) # padding the partitioned tile according to its right bottom coordinate + else: # the bottom right coordinates locates at the middle of the feature map + x, self.x1, self.y1 = self.cal(x, padding) + + self.x2 = self.x2 + padding + self.y2 = self.y2 + padding + + # calculate the coordinate of next layer + self.x1 = math.floor(self.x1 / stride) + self.y1 = math.floor(self.y1 / stride) + + self.x2 = math.floor((self.x2 - kernel) / stride + 1) + self.y2 = math.floor((self.y2 - kernel) / stride + 1) + + # calculate the width and height of the input feature map of next layer + self.input_h = math.floor((self.input_h - kernel + 2 * padding) / stride + 1) + self.input_w = math.floor((self.input_w - kernel + 2 * padding) / stride + 1) + + # change padding of the current layer to 0 + self.partialLayers[i] = nn.MaxPool2d(kernel_size=kernel, stride=stride, padding=0) + + print("The x after adding padding is", x.size()) + x = self.partialLayers[i](x) + print("The x after processing is", x.size()) + if isinstance(self.partialLayers[i], nn.AdaptiveAvgPool2d): + x = torch.flatten(x, 1) + + print("----------------------------------------------------------------") + return x \ No newline at end of file diff --git a/dnn_split/model_graph.py b/dnn_split/model_graph.py new file mode 100644 index 0000000..8ffc416 --- /dev/null +++ b/dnn_split/model_graph.py @@ -0,0 +1,222 @@ +import torch +import networkx as nx +import numpy as np +import matplotlib.pyplot as plt +from collections import OrderedDict +from graphviz import Digraph +from torch.autograd import Variable +import torchvision.models as models +import torch.nn as nn +from dnn_split.model_util import get_all_layers +from dnn_models.mynet import MyNet +from networkx.drawing.nx_pydot import graphviz_layout +import time + + +def make_graph(var, params): + param_map = {id(v): k for k, v in params.items()} + print(param_map) + id_counter = 0 + param_list = [] + + node_attr = dict(style='filled', + shape='box', + align='left', + fontsize='12', + ranksep='0.1', + height='0.2') + + dot = Digraph(node_attr=node_attr, graph_attr=dict(size="12,12")) + seen = set() + + G = nx.DiGraph() + G_compute = nx.DiGraph() + + def size_to_str(size): + return '(' + (', ').join(['%d' % v for v in size]) + ')' + + output_nodes = (var.grad_fn,) if not isinstance(var, tuple) else tuple(v.grad_fn for v in var) + + def add_nodes(var): + nonlocal id_counter + nonlocal param_list + if var not in seen: + if torch.is_tensor(var): + dot.node(str(id(var)), size_to_str(var.size()), fillcolor='orange') + G_compute.add_node(str(id(var)), name=param_map.get(id(var)), attr=size_to_str(var.size())) + + elif hasattr(var, 'variable'): + u = var.variable + print("variable1 ", var) + node_name = '%s\n %s' % (param_map.get(id(u)), size_to_str(u.size())) + dot.node(str(id(var)), node_name, fillcolor='lightblue') + + G_compute.add_node(str(id(var)), name=param_map.get(id(u)), attr=size_to_str(u.size())) + param_list.append(str(id(var))) + else: + dot.node(str(id(var)), str(type(var).__name__)) + print(str(var)) + if str(type(var).__name__) != "TBackward" and str(type(var).__name__) != "ExpandBackward" and str(type(var).__name__) != "ViewBackward": + G.add_node(str(id(var)), id=id_counter, name=str(type(var).__name__)) + G_compute.add_node(str(id(var)), id=id_counter, name=str(type(var).__name__)) + id_counter = id_counter + 1 + + else: + G_compute.add_node(str(id(var)), name=str(type(var).__name__)) + + seen.add(var) + + + if hasattr(var, 'next_functions'): + # print(var.next_functions) + for u in var.next_functions: + if u[0] is not None: + dot.edge(str(id(u[0])), str(id(var))) + if str(type(u[0]).__name__) != "AccumulateGrad" and str(type(u[0]).__name__) != "TBackward" and str(type(u[0]).__name__) != "ExpandBackward": + G.add_edge(str(id(u[0])), str(id(var))) + G_compute.add_edge(str(id(u[0])), str(id(var))) + add_nodes(u[0]) + + if hasattr(var, 'saved_tensors'): + for t in var.saved_tensors: + dot.edge(str(id(t)), str(id(var))) + G_compute.add_edge(str(id(t)), str(id(var))) + add_nodes(t) + + add_nodes(var.grad_fn) + return dot, G, G_compute, param_list + + +def remove_empty_nodes(G): + node_removal = [] + for node in G.nodes(): + if G.nodes[node] == {}: + node_removal.append(node) + for node in node_removal: + parent = next(G.predecessors(node)) + G = nx.contracted_nodes(G, parent, node) + + return G + + +def make_summray(model): + summary = OrderedDict() + for layer in model.named_children(): + layer_name = layer[0] + layer_func = layer[1] + summary[layer_name] = layer_func + + return summary + + +def assign_func(G, G_compute, summary, param_dict): + # Traverse the graph + # roots = [n for n, d in G.in_degree() if d == 0] + # tree = nx.bfs_tree(G, source=roots[0], reverse=False) + # nodes = [roots[0]] + [v for u, v in tree.edges()] + nodes = list(nx.topological_sort(G)) + + for node in nodes: + node_name = G.nodes[node].get('name') + if node_name == 'MkldnnConvolutionBackward': + type = 'conv' + pred_id = list(G_compute.predecessors(node)) + print("pred ",pred_id) + for id in pred_id: + if id in param_dict: + type = G_compute.nodes[id].get('name').split(".")[0].split("'")[0] + print("type is", type) + break + func = summary.get(type) + G.nodes[node]['func'] = func + elif node_name == 'MaxPool2DWithIndicesBackward': + type = 'maxpool2d' + pred_id = list(G_compute.predecessors(node)) + print("pred ", pred_id) + for id in pred_id: + if id in param_dict: + type = G_compute.nodes[id].get('name').split(".")[0].split("'")[0] + print("type is", type) + break + func = summary.get(type) + G.nodes[node]['func'] = func + elif node_name == 'ReluBackward1': + type = 'relu' + func = summary.get(type) + G.nodes[node]['func'] = func + elif node_name == 'AddBackward0': + type = 'sum' + func = np.sum + G.nodes[node]['func'] = func + elif node_name == 'MulBackward0': + type = 'product' + func = np.prod + G.nodes[node]['func'] = func + elif node_name == 'DivBackward0': + type = 'division' + func = np.divide + G.nodes[node]['func'] = func + + return G + + +def make_forward(G, G_compute, x): + # Traverse the graph + roots = [n for n, d in G.in_degree() if d == 0] + tree = nx.bfs_tree(G, source=roots[0], reverse=False) + nodes = [roots[0]] + [v for u, v in tree.edges()] + + for node in nodes: + func = G.nodes[node].get('func') + if func != None: + pred_id = list(G.predecessors(node)) + if len(pred_id) == 0: + res = func(x) + G.nodes[node]['output'] = res + elif len(pred_id) == 1: + pred_res = G.nodes[pred_id[0]].get('output') + res = func(pred_res) + G.nodes[node]['output'] = res + else: + pred_res = [] + for id in pred_id: + pred_res.append(G.nodes[id].get('output')) + res = func(pred_res) + G.nodes[node]['output'] = res + + return G.nodes[nodes[len(nodes)-1]].get('output') + + +if __name__ == "__main__": + inputs = torch.randn(1, 3, 224, 224) + net = MyNet() + net.eval() + for i in range(10): + start = time.time() + res1 = net(inputs) + end = time.time() + print("time original is ", end - start) + print("original result is ", res1) + # print(get_all_layers(resnet18)) + y = net(Variable(inputs)) + dot, G, G_compute, param_list = make_graph(y, params=dict(net.named_parameters())) + dot.view(filename="mynet", directory="../models/") + G = remove_empty_nodes(G) + + summary = make_summray(net) + + G = assign_func(G, G_compute, summary, param_list) + print("Graph is ", G.nodes(data=True)) + for i in range(10): + start = time.time() + res2 = make_forward(G, G_compute, inputs) + end = time.time() + print("time static graph is ", end - start) + print("static graph result is ", res2) + + labels = nx.get_node_attributes(G, 'name') + pos = nx.spring_layout(G) + nx.draw_networkx_nodes(G, pos=pos) + nx.draw_networkx_labels(G, pos=pos, labels=labels) + nx.draw_networkx_edges(G, pos=pos, arrows=True) + plt.show() \ No newline at end of file diff --git a/dnn_split/model_infer_time.py b/dnn_split/model_infer_time.py new file mode 100644 index 0000000..d24ae96 --- /dev/null +++ b/dnn_split/model_infer_time.py @@ -0,0 +1,99 @@ +from dnn_split.model_util import * +import time +import torch +import torch.nn as nn + +PATH = "../model/alexnet-owt-4df8aa71.pth" + +class ModelInferTime(nn.Module): + + def __init__(self, model, start, end): + super(ModelInferTime, self).__init__() + layers = get_all_layers(model) + self.partialLayers = get_partial_layers(layers, start, end) + self.x_train = nn.ModuleList(self.partialLayers) + + def get_mul(self, arr): + mul = 1 + for i in arr: + mul = mul * i + return mul + + def forward(self, x): + infer_time = [] + x_size = [] + input_size = self.get_mul(x.size()[1:]) + for layer in self.partialLayers: + start_time = time.time() + x = layer(x) + end_time = time.time() + running_time = end_time - start_time + x_size.append(self.get_mul(x.size()[1:])) + # print("time cost of No.", i, "layer is: %.*f sec" %(9, running_time)) + if isinstance(layer, nn.AdaptiveAvgPool2d): + start_time = time.time() + x = torch.flatten(x, 1) + end_time = time.time() + flatten_running_time = end_time - start_time + running_time = running_time + flatten_running_time + + infer_time.append(running_time) + + x_size[-1] = input_size + return x_size, infer_time + + +class ModelInferTimeGPU(nn.Module): + + def __init__(self, model, start, end): + super(ModelInferTimeGPU, self).__init__() + layers = get_all_layers(model) + self.partialLayers = get_partial_layers(layers, start, end) + self.x_train = nn.ModuleList(self.partialLayers) + + def get_mul(self, arr): + mul = 1 + for i in arr: + mul = mul * i + return mul + + def forward(self, x): + infer_time = [] + x_size = [] + input_size = self.get_mul(x.size()[1:]) + start_time = torch.cuda.Event(enable_timing=True) + end_time = torch.cuda.Event(enable_timing=True) + + for layer in self.partialLayers: + start_time.record() + x = layer(x) + end_time.record() + torch.cuda.synchronize() + running_time = start_time.elapsed_time(end_time)/1000 + x_size.append(self.get_mul(x.size()[1:])) + # print("time cost of No.", i, "layer is: %.*f sec" %(9, running_time)) + if isinstance(layer, nn.AdaptiveAvgPool2d): + start_time.record() + x = torch.flatten(x, 1) + end_time.record() + torch.cuda.synchronize() + flatten_running_time = start_time.elapsed_time(end_time)/1000 + running_time = running_time + flatten_running_time + + infer_time.append(running_time) + + x_size[-1] = input_size + return x_size, infer_time + + +if __name__ == "__main__": + + input = get_input() + + resnet34 = get_pretrained_resnet34() + model_size = get_model_size(resnet34) + print(model_size) + model = ModelInferTime(model=resnet34, start=0, end=model_size-1) + model.eval() + x_size, output = model(input) + print(len(x_size)) \ No newline at end of file diff --git a/dnn_split/model_util.py b/dnn_split/model_util.py new file mode 100644 index 0000000..2f9df87 --- /dev/null +++ b/dnn_split/model_util.py @@ -0,0 +1,95 @@ +import torch +import torch.nn as nn +import torchvision.models as models +from PIL import Image +from torchvision import transforms + +MODEL_PATH = "../models/" +IMAGE_PATH = '../data/images/' + + +def get_alexnet(): + alexnet = models.alexnet(pretrained=False) + return alexnet + + +def get_pretrained_alexnet(): + pretrained_alexnet = models.alexnet(pretrained=False) + pretrained_alexnet.load_state_dict(torch.load(MODEL_PATH + 'alexnet-owt-4df8aa71.pth')) + + return pretrained_alexnet + + +def get_pretrained_vgg16(): + pretrained_vgg16 = models.vgg16(pretrained=False) + pretrained_vgg16.load_state_dict(torch.load(MODEL_PATH + 'alexnet-owt-4df8aa71.pth')) + + return pretrained_vgg16 + + + +def get_resnet34(): + resnet34 = models.resnet34(pretrained=False) + return resnet34 + + +def get_pretrained_resnet34(): + pretrained_resnet34 = models.resnet34(pretrained=False) + pretrained_resnet34.load_state_dict(torch.load(MODEL_PATH + "resnet34-333f7ec4.pth")) + return pretrained_resnet34 + + +def get_all_layers(model): + # submodel = nn.Sequential(*list(model.children())) + # layers = [module for module in model.modules() if type(module) != nn.Sequential] + # return layers[1:] + layers = [] + temp = [elem for elem in model.children()] + + for layer in temp: + if isinstance(layer, nn.Sequential): + for i in layer.children(): + layers.append(i) + else: + layers.append(layer) + + return layers + + +def get_partial_layers(layers, start, end): + partial_layers = layers[start: end + 1] + return partial_layers + + +def get_model_size(model): + layered_model = get_all_layers(model) + return len(layered_model) + + +def get_input(): + input_image = Image.open(IMAGE_PATH + 'dog.jpg') + preprocess = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + input_tensor = preprocess(input_image) + input_batch = input_tensor.unsqueeze(0) + + return input_batch + + +if __name__ == "__main__": + model = get_alexnet() + print(get_model_size(model)) + layers = get_all_layers(model) + partial_layers = get_partial_layers(layers, 0, 20) + for i in range(len(partial_layers)): + print(partial_layers[i]) + + + + + + diff --git a/dnn_split/model_vis.py b/dnn_split/model_vis.py new file mode 100644 index 0000000..4512d77 --- /dev/null +++ b/dnn_split/model_vis.py @@ -0,0 +1,78 @@ +from graphviz import Digraph +import torch +from torch.autograd import Variable +from dnn_models.inceptionv4 import * +from torchvision import models +import networkx as nx +import matplotlib.pyplot as plt + + +def make_dot(var, params): + """ Produces Graphviz representation of PyTorch autograd graph + + Blue nodes are the Variables that require grad, orange are Tensors + saved for backward in torch.autograd.Function + + Args: + var: output Variable + params: dict of (name, Variable) to add names to node that + require grad (TODO: make optional) + """ + param_map = {id(v): k for k, v in params.items()} + print(param_map) + + node_attr = dict(style='filled', + shape='box', + align='left', + fontsize='12', + ranksep='0.1', + height='0.2') + + dot = Digraph(node_attr=node_attr, graph_attr=dict(size="12,12")) + seen = set() + G = nx.Graph() + + def size_to_str(size): + return '(' + (', ').join(['%d' % v for v in size]) + ')' + + def add_nodes(var): + if var not in seen: + if torch.is_tensor(var): + dot.node(str(id(var)), size_to_str(var.size()), fillcolor='orange') + elif hasattr(var, 'variable'): + u = var.variable + node_name = '%s\n %s' % (param_map.get(id(u)), size_to_str(u.size())) + dot.node(str(id(var)), node_name, fillcolor='lightblue') + else: + dot.node(str(id(var)), str(type(var).__name__)) + G.add_node(str(id(var)), name=str(type(var).__name__)) + print("just add node %s, the name is %s" % (str(id(var)), str(type(var).__name__))) + seen.add(var) + if hasattr(var, 'next_functions'): + for u in var.next_functions: + if u[0] is not None: + dot.edge(str(id(u[0])), str(id(var))) + if str(type(u[0]).__name__) != "AccumulateGrad": + G.add_edge(str(id(u[0])), str(id(var))) + print("add an edge from %s node to %s node" % (str(type(u[0]).__name__), str(type(var).__name__))) + add_nodes(u[0]) + if hasattr(var, 'saved_tensors'): + for t in var.saved_tensors: + dot.edge(str(id(t)), str(id(var))) + G.add_edge(str(id(t)), str(id(var))) + add_nodes(t) + + add_nodes(var.grad_fn) + return dot, G + + +inputs = torch.randn(1, 3, 224, 224) +inception = inceptionv4() +y = inception(Variable(inputs)) +# print(y) + +dot, G = make_dot(y, inception.state_dict()) +dot.view(filename="inceptionv4", directory="../models/vispdf/") +labels = nx.get_node_attributes(G, 'name') +nx.draw(G, labels=labels) +plt.show() \ No newline at end of file diff --git a/dnn_split/split_point.py b/dnn_split/split_point.py new file mode 100644 index 0000000..522336b --- /dev/null +++ b/dnn_split/split_point.py @@ -0,0 +1,27 @@ +import numpy as np +import joblib + + +def find_split(delay_edge, delay_cloud, delay_trans): + num = len(delay_cloud) + total_delay = np.zeros(num+1) + for i in range(num-1): + total_delay[i] = np.sum(delay_edge[0:i+1]) + np.sum(delay_cloud[i+1:num]) + delay_trans[i] + total_delay[num-1] = np.sum(delay_edge) + total_delay[num] = np.sum(delay_cloud) + delay_trans[num-1] + split_point = np.argmin(total_delay) + min_delay = np.min(total_delay) + return split_point, min_delay + + +def compute_delay_trans(data_size, bandwidth): + delay_trans = data_size/bandwidth + return delay_trans + + +def predict_delay_per_layer(layer_type, layer_conf_para, dev_info): + PATH = "../model/regression_model.m" + input = [layer_type, layer_conf_para, dev_info] + model = joblib.load(PATH) + delay_per_layer = model.predict(input) + return delay_per_layer diff --git a/inference.py b/inference.py new file mode 100644 index 0000000..8b26681 --- /dev/null +++ b/inference.py @@ -0,0 +1,40 @@ +import torch +from dnn_split.model_util import get_alexnet +from dnn_split.model_canyon import ModelCanyon +from PIL import Image +from torchvision import transforms + + +MODEL_PATH = './models/' +IMAGE_PATH = './data/images/' + + +def get_input(): + input_image = Image.open(IMAGE_PATH+'dog.jpg') + preprocess = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + input_tensor = preprocess(input_image) + input_batch = input_tensor.unsqueeze(0) + + return input_batch + +if __name__ == '__main__': + input = get_input() + path = MODEL_PATH+"partialmodel.pth" + alexnet = get_alexnet() + model = ModelCanyon(model=alexnet, start=0, end=2) + model = torch.load(path) + model.eval() + # print(model.partialLayers) + output = model(input) + + path2 = MODEL_PATH+"partialmodel2.pth" + model2 = ModelCanyon(model=alexnet, start=3, end=20) + model2 = torch.load(path2) + model2.eval() + output2 = model2(output) + print(output2) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b356961 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,27 @@ +certifi==2020.6.20 +chardet==3.0.4 +cycler==0.10.0 +decorator==4.4.2 +dgl-cu102==0.5.1 +future==0.18.2 +graphviz==0.14.1 +idna==2.10 +joblib==0.16.0 +kiwisolver==1.2.0 +matplotlib==3.3.1 +networkx==2.5 +numpy==1.19.1 +pandas==1.1.2 +Pillow==7.2.0 +pydot==1.4.1 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2020.1 +requests==2.24.0 +scipy==1.5.2 +six==1.15.0 +torch==1.6.0 +torchprof==1.1.1 +torchsummary==1.5.1 +torchvision==0.7.0 +urllib3==1.25.10 diff --git a/test/misc_test.py b/test/misc_test.py new file mode 100644 index 0000000..6176dcc --- /dev/null +++ b/test/misc_test.py @@ -0,0 +1,49 @@ +# from __future__ import print_function +# import torch +# import os +# import numpy as np +# from collections import OrderedDict +# import itertools +# +# pred_list = [0,1,2,3,4,5] +# data = set(itertools.combinations(pred_list, 2)) +# it = (0,1) +# print(type(it)) +# if it in data: +# print("yes") +# +# def addone(x): +# return x + 1 +# +# l = OrderedDict() +# l['a'] = addone +# y = l['a'](3) +# print(y) +# +# x = torch.rand(5, 3) +# print(x) +# +# path = '/home/User/Documents/file.txt' +# +# # Above specified path +# # will be splited into +# # (head, tail) pair as +# # ('/home/User/Documents', 'file.txt') +# +# # Get the base name +# # of the specified path +# basename = os.path.basename(path) +# +# # Print the basename name +# print(basename) +import torch +import torch.nn.functional as F + +# data = torch.ones(4, 4) +# # pad(left, right, top, bottom) +# new_data = F.pad(input=data, pad=[1, 0, 0, 0], mode='constant', value=0) +# new_new_data = F.pad(input=new_data, pad=[0, 0, 0, 1], mode='constant', value=0) +# print(new_data) +# print(new_new_data) +input = [1,2,3,4,5] +print(len(input[0:3])) \ No newline at end of file diff --git a/test/test_alexnet.py b/test/test_alexnet.py new file mode 100644 index 0000000..adfcb5b --- /dev/null +++ b/test/test_alexnet.py @@ -0,0 +1,13 @@ +from dnn_split.model_util import * +import torchvision.models as models + + + +if __name__ == '__main__': + alexnet = models.alexnet(pretrained=False) + alexnet.load_state_dict(torch.load("../models/alexnet-owt-4df8aa71.pth")) + alexnet_layers = get_all_layers(alexnet) + + for i in alexnet_layers: + print(i) + print("---------------") \ No newline at end of file diff --git a/test/test_alg.py b/test/test_alg.py new file mode 100644 index 0000000..1cf681a --- /dev/null +++ b/test/test_alg.py @@ -0,0 +1,197 @@ +import itertools +import networkx as nx +import matplotlib.pyplot as plt +from collections import OrderedDict +from networkx.drawing.nx_pydot import graphviz_layout + + +class impl: + def __init__(self, device, edge, cloud): + self.device = device + self.edge = edge + self.cloud = cloud + +class trans: + def __init__(self, d2e, e2c,d2c): + self.d2e = d2e + self.e2c = e2c + self.d2c = d2c + +def build_graph(): + G = nx.DiGraph() + node_list = list(range(8)) + G.add_nodes_from(node_list) + G.add_edges_from([(0,1), (0,2), (1,3), (2,4), (3,5), (3,6), (2,6), (5,7), (6,7), (4,7)]) + + return G + +def longest_path(G): + nodes = list(nx.topological_sort(G)) + source = nodes[0] + + def helper(node): + if node == source: + return 0 + preds = list(G.predecessors(node)) + dist = max([helper(i) + 1 for i in preds]) + return dist + + path_dict = OrderedDict() + for node in nodes: + path_dict[node] = helper(node) + + return path_dict + + +def get_layer(G): + path_dict = longest_path(G) + max_len = path_dict[max(path_dict, key=path_dict.get)] + layer_dict = OrderedDict() + for layer in range(max_len + 1): + layer_item = [] + for k, v in path_dict.items(): + if v == layer: + layer_item.append(k) + layer_dict[layer] = layer_item + + return layer_dict + + + +def assign_nodes_to_layers(G, layer_dict): + nodes = list(nx.topological_sort(G)) + source = nodes[0] + + def get_subset_input_sibling(node, v): + subset = set() + siblings = [] + pred_list = list(G.predecessors(node)) + for i in range(1, len(pred_list) + 1): + data = itertools.combinations(pred_list, i) + subset.add(tuple(data)) + + for j in v: + if j != node: + if tuple(G.predecessors(j)) in subset: + siblings.append(j) + + return siblings + + + # k: layer index, v: list of nodes which belongs to layer k + for k, v in layer_dict.items(): + print("Start partition in layer ", k) + for node in v: + # if G.nodes[node].get('location') == 'None': + pred_list = list(G.predecessors(node)) + pred_location = [] + + for pred in pred_list: + pred_location.append(G.nodes[pred].get('location')) + + if 'cloud' in pred_location: + last_location = 'cloud' + elif 'edge' in pred_location: + last_location = 'edge' + else: + last_location = 'device' + + time_device = 0 + time_edge = 0 + time_cloud = 0 + print('the pred location list is', pred_location) + print('the last location is', last_location) + if last_location == 'device': + + # put node on device + time_device = 0 + G.nodes[node].get('attr').device + # put node on edge + for pred in pred_list: + time_edge = time_edge + G.edges[(pred, node)].get('attr').d2e + G.nodes[node].get('attr').edge + # put node on cloud + for pred in pred_list: + time_cloud = time_cloud + G.edges[(pred, node)].get('attr').d2c + G.nodes[node].get('attr').edge + + time_list = list([time_device, time_edge, time_cloud]) + time_min = min(time_list) + + if time_min == time_device: + node_location = 'device' + elif time_min == time_edge: + node_location = 'edge' + else: + node_location = 'cloud' + + elif last_location == 'edge': + # put node on edge + for pred in pred_list: + if G.nodes[pred].get('location') == 'device': + time_edge = time_edge + G.edges[(pred, node)].get('attr').d2e + G.nodes[node].get('attr').edge + time_cloud = time_cloud + G.edges[(pred, node)].get('attr').d2c + G.nodes[node].get('attr').cloud + else: + time_edge = time_edge + 0 + G.nodes[node].get('attr').edge + time_cloud = time_cloud + G.edges[(pred, node)].get('attr').e2c + G.nodes[node].get('attr').cloud + + time_list = list([time_edge, time_cloud]) + time_min = min(time_list) + + if time_min == time_edge: + node_location = 'edge' + else: + node_location = 'cloud' + else: + # for pred in pred_list: + # if G.nodes[pred].get('location') == 'device': + # time_cloud = time_cloud + G.edges[(pred, node)].get('attr').d2c + G.nodes[node].get('attr').cloud + # elif G.nodes[pred].get('location') == 'edge': + # time_cloud = time_cloud + G.edges[(pred, node)].get('attr').e2c + G.nodes[node].get('attr').cloud + # else: + # time_cloud = time_cloud + 0 + G.nodes[node].get('attr').cloud + node_location = 'cloud' + + G.nodes[node]['location'] = node_location + + # update subset siblings + location_dict = {'device':0, 'edge':1, 'cloud':2} + siblings = get_subset_input_sibling(node, v) + for sibling in siblings: + if G.nodes[sibling].get('location') == None: + G.nodes[sibling]['location'] = node_location + else: + if location_dict[G.nodes[sibling].get('location')] < location_dict[node_location]: + G.nodes[sibling]['location'] = node_location + + return G + + +if __name__ == '__main__': + G = build_graph() + + layer_dict = get_layer(G) + print(layer_dict) + + G.add_node('input') + G.add_node('output') + G.add_edge('input', 0) + G.add_edge(7, 'output') + + for node in G.nodes: + G.nodes[node]['attr'] = impl(3,2,1) + for edge in G.edges: + G.edges[edge]['attr'] = trans(0.1, 0.2, 0.3) + + G.nodes['input']['location'] = 'device' + G.edges[('input', 0)]['attr'] = trans(4, 8, 12) + G.edges[(7, 'output')]['attr'] = trans(4, 8, 12) + print(G.edges(data=True)) + + G = assign_nodes_to_layers(G, layer_dict) + for node in G.nodes: + print('Node %s is at %s' % (str(node), G.nodes[node].get('location'))) + + pos = nx.spring_layout(G) + labels = nx.get_node_attributes(G, 'location') + nx.draw_networkx_nodes(G, pos=pos) + nx.draw_networkx_labels(G, pos=pos, labels=labels) + nx.draw_networkx_edges(G, pos=pos, arrows=True) + plt.show() \ No newline at end of file diff --git a/test/test_case.py b/test/test_case.py new file mode 100644 index 0000000..9c511f3 --- /dev/null +++ b/test/test_case.py @@ -0,0 +1,51 @@ +import torch +from dnn_split.model_canyon import ModelCanyon +from dnn_split.model_util import get_alexnet, get_pretrained_alexnet +from PIL import Image +from torchvision import transforms + +MODEL_PATH = '../data/models/' +IMAGE_PATH = '../data/images/' + + +def get_input(): + input_image = Image.open(IMAGE_PATH + 'dog.jpg') + preprocess = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + input_tensor = preprocess(input_image) + input_batch = input_tensor.unsqueeze(0) + + return input_batch + + +if __name__ == "__main__": + + # input = get_input() + # path = "../models/partialmodel.pth" + # alexnet = get_alexnet() + # model = ModelCanyon(model=alexnet, start=0, end=2) + # model = torch.load(path) + # model.eval() + # # print(model.partialLayers) + # output = model(input) + # + # path2 = "../models/partialmodel2.pth" + # model2 = ModelCanyon(model=alexnet, start=3, end=20) + # model2 = torch.load(path2) + # model2.eval() + # print(model2.partialLayers) + # output2 = model2(output) + # print(output2) + + input2 = get_input() + startLayer = 0 + endLayer = 2 + model3 = get_pretrained_alexnet() + model3.eval() + output3 = model3(input2) + # print("#####################################") + print(output3) \ No newline at end of file diff --git a/test/test_ftp.py b/test/test_ftp.py new file mode 100644 index 0000000..be054ca --- /dev/null +++ b/test/test_ftp.py @@ -0,0 +1,52 @@ +from dnn_split.fused_tile_patition import * +from dnn_split.ftp_util import * + +if __name__ == "__main__": + + # parameters for FTP configuration + partition_w = 2 + partition_h = 2 + partition = 4 + fused_layer = 8 + task_id = [[0, 1], [2, 3]] + input_tiles = [[0] * fused_layer for _ in range(partition)] + output_tiles = [[0] * fused_layer for _ in range(partition)] + + # initialization of each partitioned tile of the bottom layer + for i in range(partition): + input_tiles[i][fused_layer-1] = TileRegion(0, 5, 0, 5) + output_tiles[i][fused_layer-1] = TileRegion(0, 5, 0, 5) + + # get input + input = get_input() + input_size = input.size() + + # get DNN model + model = get_pretrained_alexnet() + + # interpret the model to get relevant parameters for FTP algo + model = ModelInterpreter(model=model) + x_size, x_kenerl_size, x_stride, x_padding, x_type = model(input) + output_width = x_size[-1][2] + output_height = x_size[-1][1] + + # perform FTP algo + net_para = load_dnn_model(input_size[1:], x_size, x_kenerl_size, x_stride, x_padding, x_type) + ftp_para = FtpPara(partition_w, partition_h, fused_layer, task_id, input_tiles, output_tiles) + ftp_para = perform_ftp(net_para, ftp_para, output_width, output_height) + + # print the coordinate of each partitioned tile for each layer + print("we partition each layer of the DNN model into ", partition, "parts:") + for i in range(partition_h): + for j in range(partition_w): + for l in range(fused_layer): + id = ftp_para.task_id[i][j] + print("input Layer", l + 1, " :", "coordination of the ", id + 1, "part: (", + ftp_para.input_tiles[id][l].top_left_x, ",", + ftp_para.input_tiles[id][l].top_left_y, "),(", ftp_para.input_tiles[id][l].bottom_right_x, ",", + ftp_para.input_tiles[id][l].bottom_right_y, ")") + print("output Layer", l + 1, " :", "coordination of the ", id + 1, "part: (", ftp_para.output_tiles[id][l].top_left_x, + ",", + ftp_para.output_tiles[id][l].top_left_y, "),(", ftp_para.output_tiles[id][l].bottom_right_x, ",", + ftp_para.output_tiles[id][l].bottom_right_y, ")") + print("----------------------------------------------------------------") diff --git a/test/test_ftp_bug.py b/test/test_ftp_bug.py new file mode 100644 index 0000000..e14166f --- /dev/null +++ b/test/test_ftp_bug.py @@ -0,0 +1,45 @@ +import torch.nn as nn +from PIL import Image + +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +MODEL_PATH = '../data/models/' +IMAGE_PATH = '../data/images/' +# +# +def get_input(): + input_image = Image.open(IMAGE_PATH + 'dog.jpg') + preprocess = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + input_tensor = preprocess(input_image) + input_batch = input_tensor.unsqueeze(0) + + return input_batch +# +# +# if __name__ == "__main__": +# input = get_input() +# print(input) +# layer = nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2, bias=True) +# layer.eval() +# output = layer(input) +# print(output) + +import torch +import torch.nn as nn + +# # With square kernels and equal stride +# m = nn.Conv2d(16, 33, 3, stride=2) +# # non-square kernels and unequal stride and with padding +# m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2)) +# # non-square kernels and unequal stride and with padding and dilation +m = nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2, bias=True) +m.eval() +input = get_input() +output = m(input) +print(output) \ No newline at end of file diff --git a/test/test_imagenet.py b/test/test_imagenet.py new file mode 100644 index 0000000..240e710 --- /dev/null +++ b/test/test_imagenet.py @@ -0,0 +1,199 @@ +import threading +from dnn_split.ftp_util import * +from dnn_split.model_canyon import ModelCanyon, ModelFTP +# import pandas as pd +import numpy as np +import torch +# from torchsummary import summary +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + + +class myThread(threading.Thread): + def __init__(self, threadID, name, input, model): + super(myThread, self).__init__() + threading.Thread.__init__(self) + self.threadID = threadID + self.name = name + self.input = input + self.model = model + + def run(self): + # print("starting" + self.name) + self.result = perform_partial_forward(self.name, self.input, self.model) + # print("the size of the output feature map of " + self.name + " is:", self.result.size()) + # print("the output feature map of " + self.name + " is:", output) + # print("Exiting" + self.name) + + def get_result(self): + threading.Thread.join(self) + try: + return self.result + except Exception: + return None + +def perform_partial_forward(threadName, input, model): + # alexnet = get_pretrained_alexnet() + # model = ModelCanyon(model=alexnet, start=0, end=12) + model.eval() + output = model(input) + + return output + +def output_to_excel(excel_name,output): + with pd.ExcelWriter(excel_name) as writer: + for i in range(output.size()[0]): + for j in range(output.size()[1]): + data = pd.DataFrame(output[i, j, :, :].detach().numpy()) + #print(data) + data.to_excel(writer, index=False, header=True, startrow=i*(output.size()[2]+1), startcol=j*output.size()[2]) + +if __name__ == "__main__": + threadList = ["Thread_1", "Thread_2", "Thread_3", "Thread_4"] + threadID = 1 + threads = [] + inputList = [] + result = [] + label = [] + path = "../models/imagenet_classes.txt" + + data_dir = "../data/images/val" + + transforms = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + + with open(path) as f: + classes = [line.strip() for line in f.readlines()] + dataset = datasets.ImageFolder(root=data_dir, transform=transforms) + dataset_loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=4) + + model = get_pretrained_vgg16() + # model = get_pretrained_alexnet() + model.eval() + class_ori = [] + correct = 0 + total = 0 + with torch.no_grad(): + for data in dataset_loader: + inputs, labels = data + # print(labels) + inputs = inputs.view(1, 3, 224, 224) + outputs = model(inputs) + _, index = torch.max(outputs.data, 1) + percentage = torch.nn.functional.softmax(outputs, dim=1)[0] * 100 + # print("the original predicted class is:") + # print(classes[index[0]], percentage[index[0]].item()) + class_ori.append(classes[index[0]]) + total += labels.size(0) + correct += (index == labels).sum().item() + + print('Accuracy of the network on the test images: %d %% without FTP' % ( + 100 * correct / total)) + + # test FTP + # vgg16 = get_pretrained_vgg16() + # model1 = ModelCanyon(model=vgg16, start=0, end=29) + + # alexnet = get_pretrained_alexnet() + # # model1 = ModelFTP(model=alexnet, start=0, end=12, x1=0, y1=0, x2=127, y2=127) + # # model2 = ModelFTP(model=alexnet, start=0, end=12, x1=112, y1=0, x2=224, y2=127) + # # model3 = ModelFTP(model=alexnet, start=0, end=12, x1=0, y1=112, x2=127, y2=224) + # # model4 = ModelFTP(model=alexnet, start=0, end=12, x1=112, y1=112, x2=224, y2=224) + # model1 = ModelFTP(model=alexnet, start=0, end=12, x1=0, y1=0, x2=193, y2=193) + # model2 = ModelFTP(model=alexnet, start=0, end=12, x1=96, y1=0, x2=224, y2=193) + # model3 = ModelFTP(model=alexnet, start=0, end=12, x1=0, y1=96, x2=193, y2=224) + # model4 = ModelFTP(model=alexnet, start=0, end=12, x1=96, y1=96, x2=224, y2=224) + # model_list = [model1, model2, model3, model4] + + # alexnet = get_pretrained_alexnet() + vgg16 = get_pretrained_vgg16() + class_ftp = [] + correct2 = 0 + total2 = 0 + with torch.no_grad(): + for data in dataset_loader: + inputs, labels = data + # print(labels) + threadID = 1 + threads = [] + inputList = [] + result = [] + + inputs = inputs.view(1, 3, 224, 224) + # inputList.append(inputs[:, :, 0:127, 0:127]) + # inputList.append(inputs[:, :, 0:127, 112:224]) + # inputList.append(inputs[:, :, 112:224, 0:127]) + # inputList.append(inputs[:, :, 112:224, 112:224]) + # inputList.append(inputs[:, :, 0:193, 0:193]) + # inputList.append(inputs[:, :, 0:193, 30:224]) + # inputList.append(inputs[:, :, 30:224, 0:193]) + # inputList.append(inputs[:, :, 30:224, 30:224]) + inputList.append(inputs[:, :, 0:130, 0:130]) + inputList.append(inputs[:, :, 0:130, 94:224]) + inputList.append(inputs[:, :, 94:224, 0:130]) + inputList.append(inputs[:, :, 94:224, 94:224]) + + # model1 = ModelFTP(model=alexnet, start=0, end=12, x1=0, y1=0, x2=193, y2=193, input_w=inputs.size()[3], input_h=inputs.size()[2]) + # model2 = ModelFTP(model=alexnet, start=0, end=12, x1=30, y1=0, x2=224, y2=193, input_w=inputs.size()[3], input_h=inputs.size()[2]) + # model3 = ModelFTP(model=alexnet, start=0, end=12, x1=0, y1=30, x2=193, y2=224, input_w=inputs.size()[3], input_h=inputs.size()[2]) + # model4 = ModelFTP(model=alexnet, start=0, end=12, x1=30, y1=30, x2=224, y2=224, input_w=inputs.size()[3], input_h=inputs.size()[2]) + model1 = ModelFTP(model=vgg16, start=0, end=16, x1=0, y1=0, x2=130, y2=130, input_w=inputs.size()[3], + input_h=inputs.size()[2]) + model2 = ModelFTP(model=vgg16, start=0, end=16, x1=94, y1=0, x2=224, y2=130, input_w=inputs.size()[3], + input_h=inputs.size()[2]) + model3 = ModelFTP(model=vgg16, start=0, end=16, x1=0, y1=94, x2=130, y2=224, input_w=inputs.size()[3], + input_h=inputs.size()[2]) + model4 = ModelFTP(model=vgg16, start=0, end=16, x1=94, y1=94, x2=224, y2=224, input_w=inputs.size()[3], + input_h=inputs.size()[2]) + model_list = [model1, model2, model3, model4] + for i in range(len(threadList)): + thread = myThread(threadID, threadList[i], inputList[i], model_list[i]) + thread.start() + threads.append(thread) + threadID += 1 + result.append(thread.get_result()) + + for t in threads: + t.join() + + # print("Exiting Main Thread") + # result.detach().numpy() + a = np.concatenate((result[0].detach().numpy(), result[1].detach().numpy()), axis=3) + b = np.concatenate((result[2].detach().numpy(), result[3].detach().numpy()), axis=3) + c = np.concatenate((a, b), axis=2) + input2 = torch.from_numpy(c) + # print(input2.size()) + # + # modelx = ModelCanyon(model=alexnet, start=13, end=22) + modelx = ModelCanyon(model=vgg16, start=17, end=50) + modelx.eval() + output2 = modelx(input2) + # # summary(modelx, input_size=(256, 6, 6), batch_size=-1) + # summary(modelx, input_size=(512, 14, 14), batch_size=-1) + _, index2 = torch.max(output2, 1) + percentage2 = torch.nn.functional.softmax(output2, dim=1)[0] * 100 + _, indices2 = torch.sort(output2, descending=True) + [(classes[idx], percentage[idx].item()) for idx in indices2[0][:5]] + # print("the FTP predicted class is:") + # print(classes[index2[0]], percentage2[index2[0]].item()) + class_ftp.append(classes[index2[0]]) + total2 += labels.size(0) + correct2 += (index2 == labels).sum().item() + + print('Accuracy of the network on the test images: %d %% with FTP' % ( + 100 * correct2 / total2)) + + match_num = 0 + for i in range(len(class_ftp)): + # print("the ", i, "pic belongs to class", class_ori[i], "without ftp, and ", class_ftp[i], "with ftp") + if class_ftp[i] == class_ori[i]: + match_num += 1 + + print("on the 1000 test images, the matching number is:", match_num) + + # class_result = np.vstack((class_ori, class_ftp)) + # np.savetxt('result_imagenet.csv', class_result.T, delimiter=',', header='the inference result', fmt='%s') diff --git a/test/test_inception.py b/test/test_inception.py new file mode 100644 index 0000000..3f18f67 --- /dev/null +++ b/test/test_inception.py @@ -0,0 +1,11 @@ +from dnn_split.model_util import * +import torchvision.models as models + +if __name__ == '__main__': + inception_v3 = models.inception_v3(pretrained=False) + inception_v3.load_state_dict(torch.load("../models/inception_v3_google-1a9a5a14.pth")) + inception_layers = get_all_layers(inception_v3) + + for i in inception_layers: + print(i) + print("--------------------") \ No newline at end of file diff --git a/test/test_infer_edge.py b/test/test_infer_edge.py new file mode 100644 index 0000000..d375c15 --- /dev/null +++ b/test/test_infer_edge.py @@ -0,0 +1,38 @@ +from dnn_split.model_infer_time import * +from dnn_split.model_util import get_input, get_pretrained_resnet34 +from dnn_split.comm_util import send_data + + +if __name__ == "__main__": + input = get_input() + + resnet34 = get_pretrained_resnet34() + num_layers = get_model_size(resnet34) + + model = ModelInferTime(model=resnet34, start=0, end=num_layers-1) + model.eval() + + data_size, infer_edge = model(input) + send_data(infer_edge, "10.5.27.51", 50002) + print(sum(infer_edge)) + # send_data(infer_edge, "127.0.0.1", 50002) + + + + # infer_edge = recv_data_once() + # data_size = np.array(data_size) + # infer_cloud = np.array(infer_cloud) + # infer_edge = np.array(infer_edge) + # # output, infer_edge = model(input) + # # infer_cloud = np.array([0.011, 0.0, 0.0002, 0.0001, 0.0, 0.0001, 0.0005, 0.0, 0.0012, 0.0, 0.0005, 0.0, 0.0, 0.0003, 0.0, 0.0, 0.002, 0.0, 0.0, 0.001, 0.0, 0.0005]) + # # data_size = np.array([0.7744, 0.7744, 0.186624, 0.559872, 0.559872, 0.129792, 0.259584, 0.259584, 0.173056, 0.173056, 0.173056, 0.173056, 0.036864, 0.036864, 0.009216, 0.004096, 0.004096, 0.004096, 0.004096, 0.004096, 0.001]) + # bandwidth = 6 # 6MB/s + # delay_trans = compute_delay_trans(data_size, bandwidth) + # split_point, min_delay = find_split(infer_edge, infer_cloud, delay_trans) + # if split_point == num_layers - 1: + # print("edge side") + # elif split_point == num_layers: + # print("cloud side") + # else: + # print("we split xxx model at: ", split_point + 1, "layer to get the minimum inference delay of", min_delay) + diff --git a/test/test_infer_time.py b/test/test_infer_time.py new file mode 100644 index 0000000..c602e05 --- /dev/null +++ b/test/test_infer_time.py @@ -0,0 +1,50 @@ +import torch +import numpy as np +from dnn_split.model_infer_time import * +from dnn_split.split_point import * +from dnn_split.model_util import get_input, get_pretrained_resnet34 +from dnn_split.comm_util import recv_data_once + +if __name__ == "__main__": + device = torch.device('cuda') + input = get_input() + input = input.to(device) + print("input finished") + resnet34 = get_pretrained_resnet34() + num_layers = get_model_size(resnet34) + + model = ModelInferTimeGPU(model=resnet34, start=0, end=num_layers-1) + model.to(device) + model.eval() + + x_size, infer_cloud = model(input) + print(next(model.parameters()).is_cuda) + print("Listening to the edge side to receive inference time data ...") + infer_edge = recv_data_once() + print("The inference time for each layer on the cloud side is: ", infer_cloud) + print("The inference time for each layer on the edge side is: ", infer_edge) + data_size = (np.array(x_size)*4)/pow(10,6) + print(data_size) + # output, infer_edge = model(input) + # infer_cloud = np.array([0.011, 0.0, 0.0002, 0.0001, 0.0, 0.0001, 0.0005, 0.0, 0.0012, 0.0, 0.0005, 0.0, 0.0, 0.0003, 0.0, 0.0, 0.002, 0.0, 0.0, 0.001, 0.0, 0.0005]) + # data_size = np.array([0.7744, 0.7744, 0.186624, 0.559872, 0.559872, 0.129792, 0.259584, 0.259584, 0.173056, 0.173056, 0.173056, 0.173056, 0.036864, 0.036864, 0.009216, 0.004096, 0.004096, 0.004096, 0.004096, 0.004096, 0.001]) + bandwidth = 30 # 6MB/s + delay_trans = compute_delay_trans(data_size, bandwidth) + split_point, min_delay = find_split(infer_edge, infer_cloud, delay_trans) + if split_point == num_layers - 1: + print("edge side") + elif split_point == num_layers: + print("cloud side") + else: + print("we split resnet34 model at: ", split_point + 1, "layer to get the minimum inference delay of", min_delay) + + # test_resnet = get_pretrained_resnet34() + # test_resnet.to(device) + # test_resnet.eval() + # torch.cuda.synchronize() + # start_time = time.time() + # test_resnet(input) + # torch.cuda.synchronize() + # end_time = time.time() + # print("total running time:", end_time-start_time) + diff --git a/test/test_model_split.py b/test/test_model_split.py new file mode 100644 index 0000000..3737d30 --- /dev/null +++ b/test/test_model_split.py @@ -0,0 +1,12 @@ +from dnn_split.split_point import * + +if __name__ == "__main__": + + num_layer = 5 + delay_dev = np.array([4, 3, 5, 8, 6]) + delay_edge = np.array([2, 1, 1, 3, 2]) + data_size = np.array([300, 200, 100, 50]) + bandwidth = 50 + delay_trans = compute_delay_trans(data_size, bandwidth) + split_point, min_delay = find_split(delay_dev, delay_edge, delay_trans, num_layer) + print("we split xxx model at: ", split_point+1, "layer to get the minimum inference delay of", min_delay) \ No newline at end of file diff --git a/test/test_multiprocess.py b/test/test_multiprocess.py new file mode 100644 index 0000000..c704052 --- /dev/null +++ b/test/test_multiprocess.py @@ -0,0 +1,164 @@ +import threading +from dnn_split.ftp_util import * +from dnn_split.model_canyon import ModelCanyon +import pandas as pd +import numpy as np +import torch +from torchsummary import summary +from torchvision import datasets, transforms +from torch.utils.data import DataLoader +# import torch.multiprocessing as mp +# from torch.multiprocessing import Pool, Manager +import multiprocessing +from multiprocessing import Pool +import time + +# class myProcess(multiprocessing.Process): +# def __init__(self, func, args): +# multiprocessing.Process.__init__(self) +# self.func = func +# self.args = args +# super(myProcess, self).__init__() +# +# def run(self): +# # print("starting" + self.name) +# self.result = self.func(*self.args) +# # print("the size of the output feature map of " + self.name + " is:", self.result.size()) +# # print("the output feature map of " + self.name + " is:", output) +# # print("Exiting" + self.name) +# +# def get_result(self): +# multiprocessing.Process.join(self) +# try: +# return self.result +# except Exception: +# return None + +def perform_partial_forward(input, model): + # alexnet = get_pretrained_alexnet() + # model = ModelCanyon(model=alexnet, start=0, end=12) + model.eval() + output = model(input) + + return output + +if __name__ == "__main__": + processList = ["Process_1", "Process_2", "Process_3", "Process_4"] + label = [] + path = "../models/imagenet_classes.txt" + + data_dir = "../data/images/val" + + transforms = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + # dataset = datasets.ImageFolder(data_dir, transform=transforms) + with open(path) as f: + classes = [line.strip() for line in f.readlines()] + dataset = datasets.ImageFolder(root=data_dir, transform=transforms) + dataset_loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=4) + + # model = get_pretrained_vgg16() + # # model = get_pretrained_alexnet() + # model.eval() + # class_ori = [] + # correct = 0 + # total = 0 + # start_time = time.time() + # with torch.no_grad(): + # for data in dataset_loader: + # inputs, labels = data + # # print(labels) + # inputs = inputs.view(1, 3, 224, 224) + # outputs = model(inputs) + # # _, predicted = torch.max(outputs.data, 1) + # _, index = torch.max(outputs.data, 1) + # percentage = torch.nn.functional.softmax(outputs, dim=1)[0] * 100 + # # print("the original predicted class is:") + # # print(classes[index[0]], percentage[index[0]].item()) + # class_ori.append(classes[index[0]]) + # total += labels.size(0) + # correct += (index == labels).sum().item() + # end_time = time.time() + # print('Inference process cost:', end_time - start_time, "s") + # print('Accuracy of the network on the test images: %d %% without FTP' % ( + # 100 * correct / total)) + + # test FTP + vgg16 = get_pretrained_vgg16() + model1 = ModelCanyon(model=vgg16, start=0, end=29) + + # alexnet = get_pretrained_alexnet() + # model1 = ModelCanyon(model=alexnet, start=0, end=12) + + class_ftp = [] + correct2 = 0 + total2 = 0 + multiprocessing.freeze_support() + start_time = time.time() + with torch.no_grad(): + for data in dataset_loader: + inputs, labels = data + # print(labels) + # processID = 1 + # processNum = [] + inputList = [] + pool = multiprocessing.Pool() + result = [] + + inputs = inputs.view(1, 3, 224, 224) + inputList.append(inputs[:, :, 0:127, 0:127]) + inputList.append(inputs[:, :, 0:127, 112:224]) + inputList.append(inputs[:, :, 112:224, 0:127]) + inputList.append(inputs[:, :, 112:224, 112:224]) + # inputList.append(inputs[:, :, 0:127, 0:127]) + # inputList.append(inputs[:, :, 0:127, 93:224]) + # inputList.append(inputs[:, :, 93:224, 0:127]) + # inputList.append(inputs[:, :, 93:224, 93:224]) + + for i in range(len(processList)): + # process = myProcess(func=perform_partial_forward, args=(inputList[i], model1, return_dict)) + return_result = pool.apply_async(perform_partial_forward, args=(inputList[i], model1)) + result.append(return_result) + + pool.close() + pool.join() + + a = np.concatenate((result[0].get().detach().numpy(), result[1].get().detach().numpy()), axis=3) + b = np.concatenate((result[2].get().detach().numpy(), result[3].get().detach().numpy()), axis=3) + c = np.concatenate((a, b), axis=2) + input2 = torch.from_numpy(c) + # + # model2 = ModelCanyon(model=alexnet, start=13, end=22) + model2 = ModelCanyon(model=vgg16, start=30, end=50) + model2.eval() + output2 = model2(input2) + # # summary(model2, input_size=(256, 6, 6), batch_size=-1) + # summary(model2, input_size=(512, 14, 14), batch_size=-1) + _, index2 = torch.max(output2, 1) + percentage2 = torch.nn.functional.softmax(output2, dim=1)[0] * 100 + # _, indices2 = torch.sort(output2, descending=True) + # [(classes[idx], percentage[idx].item()) for idx in indices2[0][:5]] + # print("the FTP predicted class is:") + # print(classes[index2[0]], percentage2[index2[0]].item()) + class_ftp.append(classes[index2[0]]) + total2 += labels.size(0) + correct2 += (index2 == labels).sum().item() + end_time = time.time() + print('Inference with FTP cost:', end_time-start_time, "s") + print('Accuracy of the network on the test images: %d %% with FTP' % ( + 100 * correct2 / total2)) + + # match_num = 0 + # for i in range(len(class_ftp)): + # # print("the ", i, "pic belongs to class", class_ori[i], "without ftp, and ", class_ftp[i], "with ftp") + # if class_ftp[i] == class_ori[i]: + # match_num += 1 + # + # print("on the 1000 test images, the matching number is:", match_num) + # + # class_result = np.vstack((class_ori, class_ftp)) + # np.savetxt('result_imagenet.csv', class_result.T, delimiter=',', header='the inference result', fmt='%s') diff --git a/test/test_resnet.py b/test/test_resnet.py new file mode 100644 index 0000000..61e8c11 --- /dev/null +++ b/test/test_resnet.py @@ -0,0 +1,31 @@ +from dnn_split.model_util import * +import torchvision.models as models + +if __name__ == '__main__': + resnet34 = models.resnet34(pretrained=False) + resnet34.load_state_dict(torch.load("../models/resnet34-333f7ec4.pth")) + resnet_layers = get_all_layers(resnet34) + + for i in resnet_layers: + print(i,"##") + + alexnet = models.alexnet(pretrained=False) + alexnet.load_state_dict(torch.load("../models/alexnet-owt-4df8aa71.pth")) + alexnet_layers = get_all_layers(alexnet) + + for j in alexnet_layers: + print(j) + + alexnet = models.alexnet(pretrained=False) + alexnet.load_state_dict(torch.load("../models/alexnet-owt-4df8aa71.pth")) + for alexnet_module in alexnet.modules(): + if (type(alexnet_module) == nn.Sequential): + print("type is: ", type(alexnet_module)) + print(alexnet_module) + + + def get_all_layers(model): + # submodel = nn.Sequential(*list(model.children())) + layers = [module for module in model.modules() if type(module) != nn.Sequential] + return layers[1:] + diff --git a/test/test_resnet_infer_time.py b/test/test_resnet_infer_time.py new file mode 100644 index 0000000..e2c9563 --- /dev/null +++ b/test/test_resnet_infer_time.py @@ -0,0 +1,25 @@ +from dnn_split.model_util import * +from dnn_models.darknet_53 import * +import time + + + +if __name__ == '__main__': + inputs = get_input() + print(inputs.size()) + device = torch.device("cpu") + inputs = inputs.to(device) + darknet = darknet53(5) + darknet.to(device) + + times = 100 + total = [0]*13 + for i in range(times): + res, proc_time, output_size = darknet(inputs) + total = [a + b for a, b in zip(total, proc_time)] + + for elem in total: + print(elem) + + for j in output_size: + print(j) \ No newline at end of file diff --git a/test/test_send_recv.py b/test/test_send_recv.py new file mode 100644 index 0000000..10de575 --- /dev/null +++ b/test/test_send_recv.py @@ -0,0 +1,8 @@ +from dnn_split.comm_util import send_model, send_data + +if __name__ == '__main__': + # send_model("../models/partialmodel.pth") + # send_model("../models/partialmodel2.pth") + data = list([0,0,0,0,0,0,0]) + print(data) + send_data(data) \ No newline at end of file diff --git a/test/test_summary.py b/test/test_summary.py new file mode 100644 index 0000000..90bdeec --- /dev/null +++ b/test/test_summary.py @@ -0,0 +1,10 @@ +from dnn_models.mynet import MyNet +import torch +from torchsummary import summary + +if __name__ == "__main__": + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # PyTorch v0.4.0 + model = MyNet().to(device) + + summary(model, (3, 28, 28)) \ No newline at end of file diff --git a/test/test_torchprof.py b/test/test_torchprof.py new file mode 100644 index 0000000..1ee039b --- /dev/null +++ b/test/test_torchprof.py @@ -0,0 +1,12 @@ +import torch +import torchvision +import torchprof +from dnn_models.mynet import MyNet + +model = MyNet() +x = torch.rand([1, 3, 224, 224]) + +with torchprof.Profile(model, use_cuda=False) as prof: + model(x) + +print(prof.display(show_events=False)) # equivalent to `print(prof)` and `print(prof.display())` \ No newline at end of file diff --git a/test/test_trace.py b/test/test_trace.py new file mode 100644 index 0000000..28610ca --- /dev/null +++ b/test/test_trace.py @@ -0,0 +1,102 @@ +from collections import namedtuple +from distutils.version import LooseVersion +from graphviz import Digraph +from dnn_split.model_util import * +import torch + +Node = namedtuple('Node', ('name', 'inputs', 'attr', 'op')) + + +def replace(name, scope): + return '/'.join([scope[name], name]) + + +def parse(graph): + scope = {} + for n in graph.nodes(): + inputs = [i.uniqueName() for i in n.inputs()] + for i in range(1, len(inputs)): + scope[inputs[i]] = n.scopeName() + + uname = next(n.outputs()).uniqueName() + assert n.scopeName() != '', '{} has empty scope name'.format(n) + scope[uname] = n.scopeName() + scope['0'] = 'input' + + nodes = [] + for n in graph.nodes(): + attrs = {k: n[k] for k in n.attributeNames()} + attrs = str(attrs).replace("'", ' ') + inputs = [replace(i.uniqueName(), scope) for i in n.inputs()] + uname = next(n.outputs()).uniqueName() + nodes.append(Node(**{'name': replace(uname, scope), + 'op': n.kind(), + 'inputs': inputs, + 'attr': attrs})) + + for n in graph.inputs(): + uname = n.uniqueName() + if uname not in scope.keys(): + scope[uname] = 'unused' + nodes.append(Node(**{'name': replace(uname, scope), + 'op': 'Parameter', + 'inputs': [], + 'attr': str(n.type())})) + + return nodes + + +def make_dot_from_trace(trace): + """ Produces graphs of torch.jit.trace outputs + Example: + >>> trace, = torch.jit.trace(model, args=(x,)) + >>> dot = make_dot_from_trace(trace) + """ + # from tensorboardX + if LooseVersion(torch.__version__) >= LooseVersion("0.4.1"): + torch.onnx._optimize_trace(trace, torch._C._onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK) + elif LooseVersion(torch.__version__) >= LooseVersion("0.4"): + torch.onnx._optimize_trace(trace, False) + else: + torch.onnx._optimize_trace(trace) + graph = trace.graph() + list_of_nodes = parse(graph) + + node_attr = dict(style='filled', + shape='box', + align='left', + fontsize='12', + ranksep='0.1', + height='0.2') + + dot = Digraph(node_attr=node_attr, graph_attr=dict(size="12,12")) + + for node in list_of_nodes: + dot.node(node.name, label=node.name.replace('/', '\n')) + if node.inputs: + for inp in node.inputs: + dot.edge(inp, node.name) + + resize_graph(dot) + + return dot + + +def resize_graph(dot, size_per_element=0.15, min_size=12): + """Resize the graph according to how much content it contains. + Modify the graph in place. + """ + # Get the approximate number of nodes and edges + num_rows = len(dot.body) + content_size = num_rows * size_per_element + size = max(min_size, content_size) + size_str = str(size) + "," + str(size) + dot.graph_attr.update(size=size_str) + +if __name__ == '__main__': + model = get_pretrained_alexnet() + model.eval() + inputs = torch.randn(1, 3, 224, 224) + trace = torch.jit.trace(model, inputs) + dot = make_dot_from_trace(trace) + diff --git a/test/test_visualize.py b/test/test_visualize.py new file mode 100644 index 0000000..d78658b --- /dev/null +++ b/test/test_visualize.py @@ -0,0 +1,78 @@ +from graphviz import Digraph +import torch +from torch.autograd import Variable +from dnn_models.darknet_53 import * +from torchvision import models +import networkx as nx +import matplotlib.pyplot as plt + + +def make_dot(var, params): + """ Produces Graphviz representation of PyTorch autograd graph + + Blue nodes are the Variables that require grad, orange are Tensors + saved for backward in torch.autograd.Function + + Args: + var: output Variable + params: dict of (name, Variable) to add names to node that + require grad (TODO: make optional) + """ + param_map = {id(v): k for k, v in params.items()} + print(param_map) + + node_attr = dict(style='filled', + shape='box', + align='left', + fontsize='12', + ranksep='0.1', + height='0.2') + + dot = Digraph(node_attr=node_attr, graph_attr=dict(size="12,12")) + seen = set() + G = nx.Graph() + + def size_to_str(size): + return '(' + (', ').join(['%d' % v for v in size]) + ')' + + def add_nodes(var): + if var not in seen: + if torch.is_tensor(var): + dot.node(str(id(var)), size_to_str(var.size()), fillcolor='orange') + elif hasattr(var, 'variable'): + u = var.variable + node_name = '%s\n %s' % (param_map.get(id(u)), size_to_str(u.size())) + dot.node(str(id(var)), node_name, fillcolor='lightblue') + else: + dot.node(str(id(var)), str(type(var).__name__)) + G.add_node(str(id(var)), name=str(type(var).__name__)) + print("just add node %s, the name is %s" % (str(id(var)), str(type(var).__name__))) + seen.add(var) + if hasattr(var, 'next_functions'): + for u in var.next_functions: + if u[0] is not None: + dot.edge(str(id(u[0])), str(id(var))) + if str(type(u[0]).__name__) != "AccumulateGrad": + G.add_edge(str(id(u[0])), str(id(var))) + print("add an edge from %s node to %s node" % (str(type(u[0]).__name__), str(type(var).__name__))) + add_nodes(u[0]) + if hasattr(var, 'saved_tensors'): + for t in var.saved_tensors: + dot.edge(str(id(t)), str(id(var))) + G.add_edge(str(id(t)), str(id(var))) + add_nodes(t) + + add_nodes(var.grad_fn) + return dot, G + + +inputs = torch.randn(1, 3, 224, 224) +vgg = models.vgg16() +y = vgg(Variable(inputs)) +# print(y) + +dot, G = make_dot(y, vgg.state_dict()) +dot.view(filename="vgg16", directory="../models/vispdf/") +labels = nx.get_node_attributes(G, 'name') +nx.draw(G, labels=labels) +plt.show() \ No newline at end of file diff --git a/test/val_ftp.py b/test/val_ftp.py new file mode 100644 index 0000000..2265531 --- /dev/null +++ b/test/val_ftp.py @@ -0,0 +1,94 @@ +import threading +from dnn_split.ftp_util import * +from dnn_split.model_canyon import ModelCanyon +from dnn_split.model_ftp import ModelFTP + +MODEL_PATH = '../data/models/' +IMAGE_PATH = '../data/images/' + +class myThread(threading.Thread): + def __init__(self, threadID, name, input, model): + super(myThread, self).__init__() + threading.Thread.__init__(self) + self.threadID = threadID + self.name = name + self.input = input + self.model = model + + def perform_partial_forward(self): + self.model.eval() + output = self.model(self.input) + + return output + + def run(self): + self.result = self.perform_partial_forward() + + def get_result(self): + threading.Thread.join(self) + try: + return self.result + except Exception: + return None + +if __name__ == "__main__": + + input = get_input() + # multi-thread + threadList = ["Thread_1", "Thread_2", "Thread_3", "Thread_4"] + threadID = 1 + threads = [] + inputList = [] + result = [] + + # original inference without fused tile partition + alexnet0 = get_pretrained_alexnet() + model = ModelCanyon(alexnet0, 0, 12) + model.eval() + output = model(input) + + print("the output:") + print(output) + print("-------------------------------------------") + + # inference with fused tile partition + alexnet = get_pretrained_alexnet() + + # partition the input feature map into four parts + # the coordinate of each partitioned tile of the top layer + coordinate_1 = TileRegion(0, 0, 193, 193) + coordinate_2 = TileRegion(30, 0, 224, 193) + coordinate_3 = TileRegion(0, 30, 193, 224) + coordinate_4 = TileRegion(30, 30, 224, 224) + # each partitioned tile completes inference separately + model_1 = ModelFTP(model=alexnet, start=0, end=12, coordinate=coordinate_1, input_w=input.size()[3], + input_h=input.size()[2]) + model_2 = ModelFTP(model=alexnet, start=0, end=12, coordinate=coordinate_2, input_w=input.size()[3], + input_h=input.size()[2]) + model_3 = ModelFTP(model=alexnet, start=0, end=12, coordinate=coordinate_3, input_w=input.size()[3], + input_h=input.size()[2]) + model_4 = ModelFTP(model=alexnet, start=0, end=12, coordinate=coordinate_4, input_w=input.size()[3], + input_h=input.size()[2]) + model_list = [model_1, model_2, model_3, model_4] + + inputList.append(input[:, :, 0:193, 0:193]) + inputList.append(input[:, :, 0:193, 30:224]) + inputList.append(input[:, :, 30:224, 0:193]) + inputList.append(input[:, :, 30:224, 30:224]) + + for i in range(len(threadList)): + thread = myThread(threadID, threadList[i], inputList[i], model_list[i]) # use multi thread to compute in parallel + thread.start() + threads.append(thread) + threadID += 1 + result.append(thread.get_result()) + + for t in threads: + t.join() + + a = np.concatenate((result[0].detach().numpy(), result[1].detach().numpy()), axis=3) + b = np.concatenate((result[2].detach().numpy(), result[3].detach().numpy()), axis=3) + c = np.concatenate((a, b), axis=2) + output_1 = torch.from_numpy(c) + print(output_1) +