diff --git a/data/__init__.py b/data/__init__.py index a67c1cf9f..66087a97a 100644 --- a/data/__init__.py +++ b/data/__init__.py @@ -1,5 +1,5 @@ from .voc0712 import VOCDetection, VOCAnnotationTransform, VOC_CLASSES, VOC_ROOT - +from .subt_artifact import SUBTDetection, SUBTAnnotationTransform, SUBT_CLASSES, SUBT_ROOT from .coco import COCODetection, COCOAnnotationTransform, COCO_CLASSES, COCO_ROOT, get_label_map from .config import * import torch diff --git a/data/coco.py b/data/coco.py index 765531761..7c99a5ddf 100644 --- a/data/coco.py +++ b/data/coco.py @@ -8,7 +8,9 @@ import cv2 import numpy as np -COCO_ROOT = osp.join(HOME, 'data/coco/') +# COCO_ROOT = osp.join(HOME, 'data/coco/') +# COCO_ROOT = osp.join(HOME, '/content/ssd.pytorch/data/') +COCO_ROOT = osp.join(HOME, 'data/') IMAGES = 'images' ANNOTATIONS = 'annotations' COCO_API = 'PythonAPI' diff --git a/data/config.py b/data/config.py index 8999622cc..254af667d 100644 --- a/data/config.py +++ b/data/config.py @@ -2,7 +2,8 @@ import os.path # gets home dir cross platform -HOME = os.path.expanduser("~") +# HOME = os.path.expanduser("~") +HOME = '/content/ssd.pytorch/' # for making bounding boxes pretty COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128), @@ -40,3 +41,18 @@ 'clip': True, 'name': 'COCO', } + +SubT = { + 'num_classes': 6, + 'lr_steps': (4000, 8000, 12000), + 'max_iter': 7500, + 'feature_maps': [38, 19, 10, 5, 3, 1], + 'min_dim': 300, + 'steps': [8, 16, 32, 64, 100, 300], + 'min_sizes': [21, 45, 99, 153, 207, 261], + 'max_sizes': [45, 99, 153, 207, 261, 315], + 'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]], + 'variance': [0.1, 0.2], + 'clip': True, + 'name': 'SubT', +} diff --git a/data/scripts/VOC2007.sh b/data/scripts/VOC2007.sh index 9d53c8e99..af9783650 100755 --- a/data/scripts/VOC2007.sh +++ b/data/scripts/VOC2007.sh @@ -7,9 +7,9 @@ start=`date +%s` if [ -z "$1" ] then # navigate to ~/data - echo "navigating to ~/data/ ..." - mkdir -p ~/data - cd ~/data/ + echo "navigating to /content/ssd.pytorch/data ..." + # mkdir -p /content/data + cd /content/ssd.pytorch/data else # check if is valid directory if [ ! -d $1 ]; then diff --git a/data/subt_artifact.py b/data/subt_artifact.py new file mode 100644 index 000000000..b4b856428 --- /dev/null +++ b/data/subt_artifact.py @@ -0,0 +1,202 @@ +"""VOC Dataset Classes + +Original author: Francisco Massa +https://github.com/fmassa/vision/blob/voc_dataset/torchvision/datasets/voc.py + +Updated by: Ellis Brown, Max deGroot +""" +from .config import HOME +import os.path as osp +import sys +import torch +import torch.utils.data as data +if '/opt/ros/kinetic/lib/python2.7/dist-packages' in sys.path: + sys.path.remove('/opt/ros/kinetic/lib/python2.7/dist-packages') +import cv2 +import cv2 +import numpy as np +if sys.version_info[0] == 2: + import xml.etree.cElementTree as ET +else: + import xml.etree.ElementTree as ET + +SUBT_CLASSES = [ # always index 0 + 'missle','backpack','blueline','drill','can'] + +#SUBT_CLASSES = ( # always index 0 +# 'valve', '') + +# note: if you used our download scripts, this should be right +SUBT_ROOT = osp.join(HOME, "data/subt_artifact/") + + +class SUBTAnnotationTransform(object): + """Transforms a VOC annotation into a Tensor of bbox coords and label index + Initilized with a dictionary lookup of classnames to indexes + + Arguments: + class_to_ind (dict, optional): dictionary lookup of classnames -> indexes + (default: alphabetic indexing of VOC's 20 classes) + keep_difficult (bool, optional): keep difficult instances or not + (default: False) + height (int): height + width (int): width + """ + + def __init__(self, class_to_ind=None, keep_difficult=False): + self.class_to_ind = class_to_ind or dict( + zip(SUBT_CLASSES, range(len(SUBT_CLASSES)))) + self.keep_difficult = keep_difficult + def __call__(self, target, width, height): + """ + Arguments: + target (annotation) : the target annotation to be made usable + will be an ET.Element + Returns: + a list containing lists of bounding boxes [bbox coords, class name] + """ + res = [] + for obj in target.iter('object'): + #difficult = int(obj.find('difficult').text) == 1 + #if not self.keep_difficult and difficult: + # continue + name = obj.find('name').text.lower().strip() + if name not in self.class_to_ind: + continue + bbox = obj.find('bndbox') + if bbox is not None: + pts = ['xmin', 'ymin', 'xmax', 'ymax'] + bndbox = [] + for i, pt in enumerate(pts): + cur_pt = int(bbox.find(pt).text) - 1 + # scale height or width + cur_pt = cur_pt / width if i % 2 == 0 else cur_pt / height + bndbox.append(cur_pt) + label_idx = self.class_to_ind[name] + bndbox.append(label_idx) + res += [bndbox] # [xmin, ymin, xmax, ymax, label_ind] + # img_id = target.find('filename').text[:-4] + else: # For LabelMe tool + polygons = obj.find('polygon') + x = [] + y = [] + bndbox = [] + for polygon in polygons.iter('pt'): + # scale height or width + x.append(int(polygon.find('x').text) / width) + y.append(int(polygon.find('y').text) / height) + bndbox.append(min(x)) + bndbox.append(min(y)) + bndbox.append(max(x)) + bndbox.append(max(y)) + label_idx = self.class_to_ind[name] + bndbox.append(label_idx) + res += [bndbox] # [xmin, ymin, xmax, ymax, label_ind] + + return res # [[xmin, ymin, xmax, ymax, label_ind], ... ] + + +class SUBTDetection(data.Dataset): + """VOC Detection Dataset Object + + input is image, target is annotation + + Arguments: + root (string): filepath to VOCdevkit folder. + image_set (string): imageset to use (eg. 'train', 'val', 'test') + transform (callable, optional): transformation to perform on the + input image + target_transform (callable, optional): transformation to perform on the + target `annotation` + (eg: take in caption string, return tensor of word indices) + dataset_name (string, optional): which dataset to load + (default: 'VOC2007') + """ + + def __init__(self, root, + image_sets=['train', 'val'], + transform=None, target_transform=SUBTAnnotationTransform(), + dataset_name='SUBT'): + self.root = root + self.image_set = image_sets + self.transform = transform + self.target_transform = target_transform + self.name = dataset_name + self._annopath = osp.join('%s', 'Annotations', '%s.xml') + self._imgpath = osp.join('%s', 'JPEGImages', '%s.jpg') + self.ids = list() + for name in image_sets: + rootpath = osp.join(self.root) + for line in open(osp.join(rootpath, 'ImageSets', 'Main', name + '.txt')): + self.ids.append((rootpath, line.strip())) + + def __getitem__(self, index): + im, gt, h, w = self.pull_item(index) + + return im, gt + + def __len__(self): + return len(self.ids) + + def pull_item(self, index): + img_id = self.ids[index] + + target = ET.parse(self._annopath % img_id).getroot() + img = cv2.imread(self._imgpath % img_id) + height, width, channels = img.shape + + if self.target_transform is not None: + target = self.target_transform(target, width, height) + + if self.transform is not None: + target = np.array(target) + img, boxes, labels = self.transform(img, target[: , :4], target[: , 4]) + # to rgb + img = img[:, :, (2, 1, 0)] + # img = img.transpose(2, 0, 1) + target = np.hstack((boxes, np.expand_dims(labels, axis=1))) + return torch.from_numpy(img).permute(2, 0, 1), target, height, width + + def pull_image(self, index): + '''Returns the original image object at index in PIL form + + Note: not using self.__getitem__(), as any transformations passed in + could mess up this functionality. + + Argument: + index (int): index of img to show + Return: + PIL img + ''' + img_id = self.ids[index] + return cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR) + + def pull_anno(self, index): + '''Returns the original annotation of image at index + + Note: not using self.__getitem__(), as any transformations passed in + could mess up this functionality. + + Argument: + index (int): index of img to get annotation of + Return: + list: [img_id, [(label, bbox coords),...]] + eg: ('001718', [('dog', (96, 13, 438, 332))]) + ''' + img_id = self.ids[index] + anno = ET.parse(self._annopath % img_id).getroot() + gt = self.target_transform(anno, 1, 1) + return img_id[1], gt + + def pull_tensor(self, index): + '''Returns the original image at an index in tensor form + + Note: not using self.__getitem__(), as any transformations passed in + could mess up this functionality. + + Argument: + index (int): index of img to show + Return: + tensorized version of img, squeezed + ''' + return torch.Tensor(self.pull_image(index)).unsqueeze_(0) \ No newline at end of file diff --git a/layers/box_utils.py b/layers/box_utils.py index 84214947b..86f61c94e 100644 --- a/layers/box_utils.py +++ b/layers/box_utils.py @@ -212,6 +212,18 @@ def nms(boxes, scores, overlap=0.5, top_k=200): if idx.size(0) == 1: break idx = idx[:-1] # remove kept element from view + ######################################################## + idx = torch.autograd.Variable(idx, requires_grad=False) + idx = idx.data + x1 = torch.autograd.Variable(x1, requires_grad=False) + x1 = x1.data + y1 = torch.autograd.Variable(y1, requires_grad=False) + y1 = y1.data + x2 = torch.autograd.Variable(x2, requires_grad=False) + x2 = x2.data + y2 = torch.autograd.Variable(y2, requires_grad=False) + y2 = y2.data + ######################################################## # load bboxes of next highest vals torch.index_select(x1, 0, idx, out=xx1) torch.index_select(y1, 0, idx, out=yy1) @@ -231,6 +243,12 @@ def nms(boxes, scores, overlap=0.5, top_k=200): h = torch.clamp(h, min=0.0) inter = w*h # IoU = i / (area(a) + area(b) - i) + ################################################ + area = torch.autograd.Variable(area, requires_grad=False) + area = area.data + idx= torch.autograd.Variable(idx, requires_grad=False) + idx = idx.data + ################################################ rem_areas = torch.index_select(area, 0, idx) # load remaining areas) union = (rem_areas - inter) + area[i] IoU = inter/union # store result in iou diff --git a/layers/modules/multibox_loss.py b/layers/modules/multibox_loss.py index fb49cf439..7d7f2f140 100644 --- a/layers/modules/multibox_loss.py +++ b/layers/modules/multibox_loss.py @@ -30,10 +30,25 @@ class MultiBoxLoss(nn.Module): See: https://arxiv.org/pdf/1512.02325.pdf for more details. """ - def __init__(self, num_classes, overlap_thresh, prior_for_matching, + # def __init__(self, num_classes, overlap_thresh, prior_for_matching, + # bkg_label, neg_mining, neg_pos, neg_overlap, encode_target, + # use_gpu=True): + # super(MultiBoxLoss, self).__init__() + # self.use_gpu = use_gpu + # self.num_classes = num_classes + # self.threshold = overlap_thresh + # self.background_label = bkg_label + # self.encode_target = encode_target + # self.use_prior_for_matching = prior_for_matching + # self.do_neg_mining = neg_mining + # self.negpos_ratio = neg_pos + # self.neg_overlap = neg_overlap + # self.variance = cfg['variance'] + def __init__(self, batch_size, num_classes, overlap_thresh, prior_for_matching, bkg_label, neg_mining, neg_pos, neg_overlap, encode_target, use_gpu=True): super(MultiBoxLoss, self).__init__() + self.batch_size = batch_size self.use_gpu = use_gpu self.num_classes = num_classes self.threshold = overlap_thresh @@ -94,6 +109,16 @@ def forward(self, predictions, targets): loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) # Hard Negative Mining + # loss_c[pos] = 0 # filter out pos boxes for now + # loss_c = loss_c.view(num, -1) + # _, loss_idx = loss_c.sort(1, descending=True) + # _, idx_rank = loss_idx.sort(1) + # num_pos = pos.long().sum(1, keepdim=True) + # num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) + # neg = idx_rank < num_neg.expand_as(idx_rank) + a = int(len(loss_c)/8732) + loss_c = loss_c.reshape((a, 8732)) + #print(loss_c.shape,pos.shape) loss_c[pos] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) diff --git a/ssd.py b/ssd.py index 80a23d638..5245ca181 100644 --- a/ssd.py +++ b/ssd.py @@ -96,12 +96,13 @@ def forward(self, x): loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) if self.phase == "test": - output = self.detect( - loc.view(loc.size(0), -1, 4), # loc preds - self.softmax(conf.view(conf.size(0), -1, - self.num_classes)), # conf preds - self.priors.type(type(x.data)) # default boxes - ) + # output = self.detect( + # loc.view(loc.size(0), -1, 4), # loc preds + # self.softmax(conf.view(conf.size(0), -1, + # self.num_classes)), # conf preds + # self.priors.type(type(x.data)) # default boxes + # ) + output=self.detect.forward(loc.view(loc.size(0), -1, 4),self.softmax(conf.view(conf.size(0), -1,self.num_classes)),self.priors.type(type(x.data))) else: output = ( loc.view(loc.size(0), -1, 4),