hw5russellho_iou.py

# -*- coding: utf-8 -*-
"""hw5RussellHo_IoU.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1jnScFlOH4BnZxc8p6pyoGjd6nUULWdML
"""

# ! pip install pycocotools
# ! pip install torch==1.10.0+cu102 torchvision==0.11.0+cu102 torchaudio==0.10.0 -f https://download.pytorch.org/whl/torch_stable.html
! pip3 install torch==1.12.0+cu116 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116
! pip install opencv-python
# ! pip install scikit-image
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
from zipfile import ZipFile
import numpy as np
import sys,os,os.path
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision                  
import torchvision.transforms as tvt
import torch.optim as optim          
from PIL import ImageFilter
import numbers
import re
import cv2
from pycocotools.coco import COCO
import math
import random
import copy
import matplotlib.pyplot as plt
import gzip
import pickle
import logging
import argparse
import io
import skimage
import matplotlib.pyplot as plt
from PIL import Image
import requests
#Check Pytorch version
print(torch.__version__)

#Retrieve filepath to images for local runtime
local_filepath = "/Users/ho224/Desktop/hw5_RussellHo"
os.chdir(local_filepath)
# os.listdir()

# # from google.colab import drive to mount
# drive.mount('/content/drive')
# #Retrieve filepath to images
# image_filepath = str(os.getcwd()) + "/hw5_RussellHo"
# os.chdir(image_filepath)
# # os.getcwd()

"""The cell below Is the actual code for the custom dataloader"""

#Class for mydataset transformation
class mydataset(torch.utils.data.DataLoader):
    def __init__(self, args):
        self.root_path = str(os.getcwd()) + "/train2014/"
        self.coco_json_path = args.coco_json_path
        self.class_list = args.class_list
        # self.images_per_class = args.images_per_class
        self.label_map = {cls: label for label, cls in enumerate(self.class_list)}
        self.transform = args.transform
        self.coco = COCO(self.coco_json_path)

        #Mapping the COCO label to Class indices
        coco_labels_inverse = {}
        catIds = self.coco.getCatIds(catNms = self.class_list)
        categories = self.coco.loadCats(catIds)
        categories.sort(key = lambda x: x['id'])
        for idx, in_class in enumerate(self.class_list):
            for c in categories:
                if c['name'] == in_class:
                    coco_labels_inverse[c['id']] = idx
        self.coco_labels_inverse = coco_labels_inverse
        self.image_info = []
        self.image_size = 256
        
        self.mkdir()
        
    def mkdir(self):
        if not os.path.exists(self.root_path):
            os.makedirs(self.root_path)
    
        for cls in self.class_list:
            path = os.path.join(self.root_path, cls)
            if not os.path.exists(path):
                os.makedirs(path)
                
        for cls in self.class_list:
            print(f"Altering {cls} of class list")
            catIds = self.coco.getCatIds(catNms=cls);            
            imgIds = self.coco.getImgIds(catIds=catIds);
            #Initializing a list for plotting
            image_file = []

            for entry in range(len(imgIds)):
                img = self.coco.loadImgs(imgIds[entry])[0]  #img here is a dictionary
                # img = self.coco.loadImgs(imgIds)[count]
                img_path = os.path.join(self.root_path, cls, img['file_name'])
                
                # if not os.path.exists(img_path): self.getimage(img, img_path)

                # Annotations    
                annIds = self.coco.getAnnIds(imgIds=img['id'], catIds=catIds);
                anns = self.coco.loadAnns(annIds)               
                ann = []
                area = 0.0
                for ii in anns:
                    if ii['category_id'] == catIds[0] and ii['area'] > area:
                        ann = ii
                        area = ii['area']

                height = img['height']
                width = img['width']
                bbox = ann['bbox']
                area = ann['area']
                label = self.label_map[cls]
                height_width = [height, width]
                
                # Bounding Box
                x_min = bbox[0]
                y_min = bbox[1]
                x_max = x_min + bbox[2]
                y_max = y_min + bbox[3]
                
                #Only saving the images containing a dominant objet with bounding box exceeding 40000 pixels
                if area > 40000:

                    x_scale = self.image_size / width
                    y_scale = self.image_size / height

                    resize_x_min = np.maximum(0, (np.round(x_min * x_scale)))
                    resize_y_min = np.maximum(0, (np.round(y_min * y_scale)))
                    resize_x_max = np.maximum(0, np.minimum((np.round(x_max * x_scale)), (self.image_size - 1)))
                    resize_y_max = np.maximum(0, np.minimum((np.round(y_max * y_scale)), (self.image_size - 1)))

                    box = [resize_x_min / self.image_size, resize_y_min / self.image_size, resize_x_max / self.image_size, resize_y_max / self.image_size]
                    
                    #Append the filename to the file_name list for plotting purposes
                    image_file.append(img["id"])

                    # Append to image_info
                    temp = {'image_path' : img_path,
                            'bbox' : torch.FloatTensor(box),
                            'label' : label,
                        }
                    
                    self.image_info.append(temp)  

                    #Save the image to the corresponding subdirectory
                    if not os.path.exists(img_path):
                      I = skimage.io.imread(img['coco_url'])
                      if len(I.shape) == 2:
                          I = skimage.color.gray2rgb(I)
                      image = np.uint8(I)
                      im = Image.fromarray(image)
                      im = im.resize((self.image_size, self.image_size), Image.BOX)
                      im.save(os.path.join(self.root_path, cls, img['file_name']))  
                 
            # #For loop for iterating over the images in the given class
            # os.chdir("train2014/")
            # n = 3   #We want 3 images
            # ran_list = random.sample(image_file, n)
            # list_images = []
            # for j in range(len(ran_list)):
            #     img = self.coco.loadImgs(ran_list[j])[0]    #Selecting an image belonging to one of the categories at random
            #     I = skimage.io.imread(img['coco_url'])
            #     if len(I.shape) == 2:
            #         I = skimage.color.gray2rgb(I)
            #     annIds = self.coco.getAnnIds(imgIds = img['id'], catIds = catIds, iscrowd = False)
            #     anns = self.coco.loadAnns(annIds)
            #     image = np.uint8(I)
            #     #Annotation
            #     for ann in anns:
            #         [x, y, w, h] = ann['bbox']
            #         label = self.coco_labels_inverse [ann['category_id']]
            #         image = cv2. rectangle (image , (int(x), int(y)), (int(x + w), 
            #                 int (y + h)), (36 , 255 , 12), 2)
            #         image = cv2. putText (image , self.class_list [ label ], (int(x),int(y - 10)), 
            #                 cv2.FONT_HERSHEY_SIMPLEX ,0.8, (36 , 255 , 12), 2)
            #     list_images.append(image)
            # f, axarr = plt.subplots(1, 3)
            # axarr[0].imshow(list_images[0])
            # plt.axis('off')
            # axarr[1].imshow(list_images[1])
            # plt.axis('off')
            # axarr[2].imshow(list_images[2])
            # plt.axis('off')
            # plt.axis('tight')
            # plt.show()
            # os.chdir("../")

    #Accessing images through their URL
    def getimage(self, img, path):
        try: img_response = requests.get(img['coco_url'], timeout = 1) 
        except Exception as e: return False

        with open(path, 'wb') as img_f:
            img_f.write(img_response.content)  
            im = Image.open(path)

            if im.mode != "RGB":
                im = im.convert(mode = "RGB")
                im_resized = im.resize((self.image_size, self.image_size), Image.BOX)
                im_resized.save(path)  

    def __len__(self):
        return len(self.image_info)

    def __getitem__(self, idx):
        image_path, bbox, label = (self.image_info[idx]).values()
        image = Image.open(image_path)
        image = image.resize((self.image_size, self.image_size), Image.BOX)
        #Saving the images after being resized into the directory
        image.save(image_path)
        image = self.transform(image).to(dtype=torch.float64)
        temp = {'image' : image,
                'bbox' : bbox,
                'label' : label}
        return temp
    
#Namespace class for dictionary that would interact with mydataset
class Namespace():
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

#Implementing dictionary for customized dataloader
def customized_dataloader():
    transform = tvt.Compose([tvt.ToTensor(), tvt.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    dataType = 'train2014'
    annFile = 'annotations/instances_{}.json'.format(dataType)
    args = Namespace(coco_json_path = annFile,
                     class_list = ["bus", "cat", "pizza"],
                    #  class_list = ["pizza"],
                     transform = transform,
                     #images_per_class = 1250
                     )
    dataset = mydataset(args)
    #Printing out the length of mydataset
    print(len(dataset))
    train_data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size = 5, num_workers = 0, shuffle=True)
    return train_data_loader

#Implement the skip block connection of resnet
class ResnetBlock(nn.Module):
    def __init__(self, in_ch, out_ch, downsample=True, skip_connections=True):
        super(ResnetBlock, self).__init__()
        self.in_ch = in_ch
        self.out_ch = out_ch
        self.downsample = downsample
        self.skip_connections = skip_connections

        #First Layer Convolution
        self.convo1 = nn.Conv2d(in_ch, out_ch, kernel_size = 3, padding = 1)
        #Second Layer Convolution
        self.convo2 = nn.Conv2d(out_ch, out_ch, kernel_size = 3, padding = 1)
        #If downsample, set padding to 0 and kernel_size to 1
        if downsample:
            self.downsampler = nn.Conv2d(in_ch, out_ch, kernel_size = 1, padding = 0)
        # 1*1 Convolution 
        self.convo3 = nn.Conv2d(out_ch, out_ch, kernel_size = 1, padding = 0)
        #Batch Norm
        self.bn = nn.BatchNorm2d(out_ch)
        #ReLu
        self.relu = nn.ReLU()
    
    #Defining forward propagation
    def forward(self, x):
        identity = x
        #Block 1
        x = self.convo1(x)
        x = self.bn(x)
        x = self.relu(x)
        #Block 2
        x = self.convo2(x)
        x = self.bn(x)
        x = self.relu(x)
        #Block 3
        x = self.convo3(x)
        x = self.bn(x)
        x = self.relu(x)
        #Residual
        if self.downsample:
            identity = self.downsampler(identity)
        if self.skip_connections:
            x = x.clone() + identity
            x = nn.MaxPool2d(2, 2)(self.relu(x))

        return x

#Defining the class for CNN
class HW5Net(nn.Module):
    """
    Resnet-based encoder that consists of a few downsampling + several Resnet blocks as the backbone and two prediction heads
    """
    def __init__(self, input_nc, output_nc, ngf = 8, n_blocks = 4):
        super(HW5Net, self).__init__()
        assert (n_blocks >= 0)
        self.image_dimension = 256
        
        #Performing Feature Extraction from the first convo layer
        self.model = nn.Sequential(
            nn.Conv2d(input_nc, 32, kernel_size = 3, padding = 1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            ResnetBlock(32, 64),
            nn.BatchNorm2d(64),
            # ResnetBlock(64, 64),
            # nn.BatchNorm2d(64),
            ResnetBlock(64, 32),
            nn.BatchNorm2d(32),
            ResnetBlock(32, 16),
            nn.BatchNorm2d(16),
        )

        #The classification head
        self.class_head = nn.Sequential(
            nn.Linear((16*32*32), 2048),
            nn.ReLU(),
            nn.Linear(2048, 512),
            nn.ReLU(),
            nn.Linear(512, 32),
            nn.ReLU(),
            nn.Linear(32, 8)
        )

        #Bounding box regression head
        self.bbox_head = nn.Sequential(
            nn.Linear((16*32*32), 2048),
            nn.ReLU(),
            nn.Linear(2048, 512),
            nn.ReLU(),
            nn.Linear(512, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 4)
        )
    
    def forward(self, x):
        x = self.model(x)
        x = x.view(-1, (16*32*32))
        x1 = x.clone()
        x2 = x.clone()
        x1 = self.class_head(x1)
        x2 = self.bbox_head(x2)

        #Normalizing the predicted bbox
        x2[:, 0] = torch.sigmoid(x2[:, 0])  # normalize x1
        x2[:, 1] = torch.sigmoid(x2[:, 1])  # normalize y1
        x2[:, 2] = torch.exp(x2[:, 2])      # unnormalize w
        x2[:, 3] = torch.exp(x2[:, 3])      # unnormalize h
        x2[:, :4] /= self.image_dimension  # divide by the image width and height
        return x1, x2

#Function for iou loss propagation

def complete_iou_loss(y_pred, y_true):
    """
    Compute the Complete IoU loss between predicted and true bounding boxes
    Args:
        y_pred (tensor): predicted bounding boxes (N, 4)
        y_true (tensor): true bounding boxes (N, 4)
    Returns:
        tensor: Complete IoU loss
    """
    # Convert the predicted and true bounding boxes to (x1, y1, x2, y2) format
    y_pred = y_pred.clone()
    y_true = y_true.clone()
    y_pred[:, 2:] += y_pred[:, :2]
    y_true[:, 2:] += y_true[:, :2]
    
    # Compute IoU between predicted and true bounding boxes
    ious = torchvision.ops.generalized_box_iou(y_pred, y_true)
    
    # Compute the Complete IoU loss
    max_ious, _ = torch.max(ious, dim=1)
    loss = 1 - max_ious
    
    return loss.mean()

"""Script for Training"""

#Function for training
def training(net, data_loader):
    #First check if CUDA is available
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")
    net = net.to(device)
    criterion_1 = torch.nn.CrossEntropyLoss()
    #2 different loss functions for bounding box regression
    criterion_2 = torch.nn.MSELoss()

    #Optimizers
    optimizer = torch.optim.SGD(
        net.parameters(), lr = 1e-3, momentum = 0.9
        )
    epochs = 20  #Define number of epochs
    loss_graph = [] #Initialize a loss record
    cls_loss_graph = []
    bbox_loss_graph = []

    #Initialize iterations
    iteration = 0
    iterations = []


    for epoch in range(epochs):
        running_cls_loss = 0.0
        running_bbox_loss = 0.0
        # For loop for mydataloader to process all images
        for count, data_batch in enumerate(data_loader):
            img, bbox, label = data_batch['image'], data_batch['bbox'], data_batch['label']
            image = img.to(device, dtype = torch.float)
            bbox = bbox.to(device)
            label = label.to(device)
            optimizer.zero_grad()
            prediction_cls, prediction_bbox = net(image)
            cls_loss = criterion_1(prediction_cls, label) * 2
            cls_loss.backward(retain_graph = True)
            # Compute bounding box regression loss using Complete IoU loss
            bbox_loss = complete_iou_loss(prediction_bbox, bbox) * 0.2  #For IoU Loss
            bbox_loss.backward()
            optimizer.step()
            running_cls_loss += cls_loss.item()
            running_bbox_loss += bbox_loss.item()  #For IoU Loss

            freq = 100
            if (count+1) % freq == 0:
              print(str(count+1) + " of " + str(epoch + 1) + " out of " + str(epochs))
              running_loss = running_cls_loss + running_bbox_loss
              print("\n[epoch:%d, batch:%5d] loss: %.3f  cls_loss: %.3f  reg_loss: %.3f" %(epoch + 1, count + 1, running_loss / float(freq), running_cls_loss / float(freq), running_bbox_loss / float(freq)))
              loss_graph.append((running_loss / float(freq)))
              cls_loss_graph.append((running_cls_loss / float(freq)))
              bbox_loss_graph.append((running_bbox_loss / float(freq))) #For IoU Loss
              running_cls_loss = 0.0
              running_bbox_loss = 0.0
              running_loss = 0.0
              iterations.append(iteration)    #Appending number of iterations passed
              iteration += 1
    return net, iterations, loss_graph, cls_loss_graph, bbox_loss_graph

#Plotting the graph for losses
def plot_graph(record, cls_loss_graph, bbox_loss_graph, MSE = True):
  plt.figure()
  plt.plot(record, label = "Training Loss")
  plt.plot(cls_loss_graph, label = "Classification Loss")
  plt.plot(bbox_loss_graph, label = "Regression Loss with IoU Loss") #For MSELoss or IoU Loss
  plt.title("Loss vs Iterations Graph")
  plt.xlabel("Iterations")
  plt.ylabel("Loss")
  plt.legend()
  plt.plot(bbox_loss_graph, label = "Regression Loss with IoU Loss") #For MSELoss or IoU Loss
  plt.savefig("loss_graph_IoU.jpg")
  plt.show()

"""Main Script Below"""

#Run the Script
if __name__ == '__main__':
    train_data_loader = customized_dataloader()
    net = HW5Net(3, 32)
    num_layers = len(list(net.parameters()))
    print("Number of layers: "+str(num_layers))

    #Proceeding with Training
    net, iterations, record, cls_loss_graph, bbox_loss_graph = training(net, train_data_loader)
    #Saving the model
    torch.save(net.state_dict(), str(os.getcwd()) + "/iou_net")

    #Plotting the graph for losses
    plt.figure()
    plt.plot(record, label = "Training Loss")
    plt.plot(cls_loss_graph, label = "Classification Loss")
    plt.plot(bbox_loss_graph, label = "Regression Loss with IoU Loss") #For MSELoss or IoU Loss
    plt.title("Loss vs Iterations Graph")
    plt.xlabel("Iterations")
    plt.ylabel("Loss")
    plt.legend()
    plt.savefig("loss_graph_IoU.jpg")
    plt.show()