update

panthersuper · Dec 4, 2017 · 2869bb3 · 2869bb3
1 parent a6e52b4
commit 2869bb3
Show file tree

Hide file tree

Showing 6 changed files with 3,922 additions and 53 deletions.
diff --git a/main.py b/main.py
@@ -7,20 +7,21 @@
 from model.reconNet import ReconNet
 import torch.optim as optim
 import torch.optim.lr_scheduler as s
+import torch.nn.functional as F
 
 
 # Dataset Parameters
 load_size =256
 fine_size = 224
-data_mean = np.asarray([0.3,0.3,0.3,0])
-batch_size = 120
+data_mean = np.asarray([0.485, 0.456, 0.406,0])
+batch_size = 20
 voxel_size = 256
 
 # Training Parameters
-learning_rate = 0.01
+learning_rate = 0.0001
 training_epoches = 10
 step_display = 10
-step_save = 2
+step_save = 1
 path_save = 'recon0'
 start_from = ''#'./alexnet64/Epoch28'
 starting_num = 1
@@ -39,8 +40,8 @@
 
     }
 opt_data_val = {
-    'img_root': 'data/train_imgs/',   # MODIFY PATH ACCORDINGLY
-    'voxel_root': 'data/train_voxels/',   # MODIFY PATH ACCORDINGLY
+    'img_root': 'data/val_imgs/',   # MODIFY PATH ACCORDINGLY
+    'voxel_root': 'data/val_voxels/',   # MODIFY PATH ACCORDINGLY
     'load_size': load_size,
     'fine_size': fine_size,
     'voxel_size': voxel_size,
@@ -50,32 +51,60 @@
 
     }
 
+def evaluate_voxel_prediction(prediction,gt):
+  """  The prediction and gt are 3 dim voxels. Each voxel has values 1 or 0"""
+
+  intersection = np.sum(np.logical_and(prediction,gt))
+  union = np.sum(np.logical_or(prediction,gt))
+  IoU = intersection / union
+
+  return IoU
+
 def get_accuracy(loader, size, net):
-    top_1_correct = 0
-    top_5_correct = 0
+    sumup = 0
 
     for i in range(size):
         inputs, labels = loader.next_batch(1)
         inputs = np.swapaxes(inputs,1,3)
         inputs = np.swapaxes(inputs,2,3)
         inputs = torch.from_numpy(inputs).float().cuda()
-        labels = torch.from_numpy(labels).long().cuda()
 
         net.eval()
         outputs = net(Variable(inputs))
-        _, predicted = torch.max(outputs.data, 1)
-        top_1_correct += (predicted == labels).sum()
-        _, predicted = torch.topk(outputs.data, 5)
-        for i in range(5):
-            top_5_correct += (predicted[:,i] == labels).sum()
 
-    return 100 * top_1_correct / float(size), 100 * top_5_correct / float(size)
+
+        outputs = outputs.cpu().data.numpy()
+        # print("pre1",outputs,np.shape(outputs))
+
+        # print("pre1.5",np.max(outputs, axis=1))
+
+        outputs = np.argmax(outputs, axis=1)
+        # print("pre2",outputs,np.shape(outputs))
+
+        # outputs = np.reshape(outputs,[1, 32,32,32])
+        labels = np.reshape(labels,[1,32,1024]).astype(int)
+
+        # print(np.shape(outputs),np.shape(labels))
+
+        sumup += evaluate_voxel_prediction(outputs,labels)
+
+    return sumup/size
 
 def weights_init(m):
     classname = m.__class__.__name__
     if classname.find('Conv2') != -1:
         nn.init.kaiming_uniform(m.weight.data)
 
+class CrossEntropyLoss2d(nn.Module):
+    def __init__(self, weight=None, size_average=False, ignore_index=-100):
+        super(CrossEntropyLoss2d, self).__init__()
+        self.nll_loss = nn.NLLLoss2d(size_average = size_average)
+
+    def forward(self, inputs, targets):
+        return self.nll_loss(F.log_softmax(inputs), targets)
+
+
+
 loader_train = DataLoaderDisk(**opt_data_train)
 loader_val = DataLoaderDisk(**opt_data_val)
 
@@ -87,11 +116,12 @@ def weights_init(m):
     net.apply(weights_init)
 
 # criterion = nn.MSELoss().cuda()
-criterion = nn.NLLLoss().cuda()
+# criterion = nn.NLLLoss2d().cuda()
+criterion = CrossEntropyLoss2d(size_average=True).cuda()
 
 
 optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.8, weight_decay=0.005) 
-scheduler = s.StepLR(optimizer, step_size=3, gamma=0.1)
+scheduler = s.StepLR(optimizer, step_size=1, gamma=0.1)
 
 running_loss = 0.0
 
@@ -103,7 +133,7 @@ def weights_init(m):
     scheduler.step()
     net.train()
 
-    for i in range(4000):  # loop over the dataset multiple times
+    for i in range(1400):  # loop over the dataset multiple times
         data = loader_train.next_batch(batch_size)
 
         # get the inputs
@@ -123,30 +153,39 @@ def weights_init(m):
         # zero the parameter gradients
         optimizer.zero_grad()
 
-        # forward + backward + optimize
-        output = net(inputs).float().contiguous() # places output
+        # forward + backward + optimize # 60*2*32*1024
+        output = net(inputs) # places output
 
-        output = output.view(batch_size*32*32*32,-1)
-        labels = labels.view(-1)
+        # output = F.log_softmax(output)
+        # output = output.view(batch_size,32,32*32,-1)
+        labels = labels.view(batch_size,32,1024)
 
         loss = criterion(output, labels)
 
+
         loss.backward()
         optimizer.step()
 
+        # print("IoU", get_accuracy(loader_train, 100, net))
+
         # print statistics
         running_loss += loss.data[0]
         if i % step_display == step_display - 1:    # print every 100 mini-batches
-            print('PLACES TRAINING Epoch: %d %d loss: %.10f' %
+            print('TRAINING Epoch: %d %d loss: %.10f' %
                   (epoch + starting_num, i + 1, running_loss/step_display))
             with open('./' + path_save + '/log.txt', 'a') as f:
-                f.write('PLACES TRAINING Epoch: %d %d loss: %.10f\n' %
+                f.write('TRAINING Epoch: %d %d loss: %.10f\n' %
                   (epoch + starting_num, i + 1, running_loss/step_display))
 
             running_loss = 0.0
 
+            acc = get_accuracy(loader_train, 100, net)
+            print("IoU: ", acc)
+            with open('./' + path_save + '/log.txt', 'a') as f:
+                f.write("IoU: "+ str(acc))
+
     if epoch % step_save == 1:
-       torch.save(net.state_dict(), './' + path_save + '/Epoch'+str(epoch+starting_num))
+        torch.save(net.state_dict(), './' + path_save + '/Epoch'+str(epoch+starting_num))
 
     # net.eval()
     # with open('./' + path_save + '/log.txt', 'a') as f:

diff --git a/model/__pycache__/reconNet.cpython-36.pyc b/model/__pycache__/reconNet.cpython-36.pyc
diff --git a/model/reconNet.py b/model/reconNet.py
@@ -5,6 +5,8 @@
 
 from alexnet import alex_net
 import torch.nn.functional as F
+from torch.autograd import Variable
+import torch
 
 class ReconNet(nn.Module):
 
@@ -32,8 +34,8 @@ def __init__(self):
             nn.BatchNorm1d(4096),
             nn.LeakyReLU(inplace=True),
             nn.Dropout(),
-            nn.Linear(4096, 4096),
-            nn.BatchNorm1d(4096),
+            nn.Linear(4096, 1024),
+            nn.BatchNorm1d(1024),
             nn.LeakyReLU(inplace=True), # latent vector, size:4096
         )
 
@@ -122,16 +124,19 @@ def __init__(self):
             # 4/4/4 de-conv3 -> 32*32*32
 
             nn.BatchNorm3d(64),
-            nn.LeakyReLU(inplace=True),
+            nn.ReLU(inplace=True),
 
+            nn.Conv3d(64,64,kernel_size=3, stride=1, padding=1),
             nn.ConvTranspose3d(64, 32, kernel_size=6, stride=2, padding=2), #8
             nn.BatchNorm3d(32, affine=True),
-            nn.LeakyReLU(inplace=True),
+            nn.ReLU(inplace=True),
 
+            nn.Conv3d(32,32,kernel_size=3, stride=1, padding=1),
             nn.ConvTranspose3d(32, 8, kernel_size=6, stride=2, padding=2), #16
             nn.BatchNorm3d(8, affine=True),
-            nn.LeakyReLU(inplace=True),
+            nn.ReLU(inplace=True),
 
+            nn.Conv3d(8,8,kernel_size=3, stride=1, padding=1),
             nn.ConvTranspose3d(8, 2, kernel_size=6, stride=2, padding=2), #32     60*2*32*32*32
             nn.Tanh(),
 
@@ -146,19 +151,50 @@ def __init__(self):
 
         # )
 
+        self.softmax = nn.Sequential(
+
+            nn.Softmax2d(),
+
+        )
+
+
+
     def forward(self, x):
         x = self.features(x)
+        batch_size = x.size(0)
         x = x.view(x.size(0), 256 * 6 * 6)
         x = self.latentV(x)  # latent vector, size:4096
-        x = x.view(x.size(0),64,4,4,4) # reshape to 4/4/4 cube with 64 channels
-        x = self.decoding(x) # convert to 3D voxel distribution
-
-        # x = x.view(x.size(0),1,32,32,32)
-        x = x.view(x.size(0),2,32768) # 60*2*(32*32*32)
-        x = F.log_softmax(x) # 60*2*(32*32*32)
-        x = x.permute(0,2,1) # 60*(32*32*32)*2
-        # return x
-        return x#.max(0)[1] #flat
+
+        # x = x.view(x.size(0),64,4,4,4) # reshape to 4/4/4 cube with 64 channels
+
+        x = x.view(x.size(0), 128, 2, 2, 2)
+
+        x = F.max_unpool3d(x, Variable(torch.Tensor(x.size()).zero_().long().cuda()), kernel_size=2, stride=2)
+        deconv1 = nn.ConvTranspose3d(128, 128, 3, padding=1).cuda()
+        x = deconv1(x)
+        x = F.leaky_relu(x)
+
+        x = F.max_unpool3d(x, Variable(torch.Tensor(x.size()).zero_().long().cuda()), kernel_size=2, stride=2)
+        deconv1 = nn.ConvTranspose3d(128, 128, 3, padding=1).cuda()
+        x = deconv1(x)
+        x = F.leaky_relu(x)
+
+        x = F.max_unpool3d(x, Variable(torch.Tensor(x.size()).zero_().long().cuda()), kernel_size=2, stride=2)
+        deconv2 = nn.ConvTranspose3d(128, 64, 3, padding=1).cuda()
+        x = deconv2(x)
+        x = F.leaky_relu(x)
+
+        x = F.max_unpool3d(x, Variable(torch.Tensor(x.size()).zero_().long().cuda()), kernel_size=2, stride=2)
+        deconv3 = nn.ConvTranspose3d(64, 32, 3, padding=1).cuda()
+        x = deconv3(x)
+        x = F.leaky_relu(x)
+
+        deconv4 = nn.ConvTranspose3d(32, 2, 3, padding=1).cuda()
+        x = deconv4(x)
+
+        # x = self.decoding(x) # convert to 3D voxel distribution
+        x = x.view(batch_size,2,32,1024) # 60*2*32*1024  converted to 2d
+        return x
 
 class UpsampleConv3Layer(nn.Module):
     """UpsampleConvLayer