Cleanup

snuspl · Jul 18, 2019 · 6e4d798 · 6e4d798
1 parent bebe474
commit 6e4d798
Show file tree

Hide file tree

Showing 5 changed files with 15 additions and 276 deletions.
diff --git a/data_util.py b/data_util.py
diff --git a/models/jsfusion/attention.py b/models/jsfusion/attention.py
@@ -4,45 +4,28 @@
 MIN_TIMESCALE=1.0
 MAX_TIMESCALE=1.0e4
 
-def add_timing_signal_nd(position, inv_timescales):
-  shape = [1, 40, 2048]
+def add_timing_signal_nd(num_frames, video_channels):
+  shape = [1, num_frames, video_channels]
   num_dims = len(shape) - 2
-
-  assert num_dims == 1
   channels = shape[-1]
-  assert channels % 2 == 0
   num_timescales = channels // (num_dims * 2)
-
-  log_timescale_increment = \
-      math.log(MAX_TIMESCALE / MIN_TIMESCALE) / (num_timescales - 1)
-
-
-#  inv_timescales = []
-#  for i in range(num_timescales):
-#    inv_timescales.append(MIN_TIMESCALE * math.exp(-float(i) * log_timescale_increment))
-#
-
-  # Python 2
-  # inv_timescales = map(lambda ts: MIN_TIMESCALE * math.exp(-float(ts) * log_timescale_increment),
-                       # range(num_timescales))
-
-
-  # for dim in xrange(num_dims):
+  log_timescale_increment = math.log(MAX_TIMESCALE / MIN_TIMESCALE) / (num_timescales - 1)
+  inv_timescales = []
+  for i in range(num_timescales):
+    inv_timescales.append(1.0 * math.exp(-float(i) * log_timescale_increment))
   dim = 0
   length = shape[dim + 1]
-  # position = torch.tensor(range(40), dtype=torch.float32)
+
+  position = torch.tensor(range(num_frames), dtype=torch.float32)
+  inv_timescales = torch.tensor(inv_timescales, dtype=torch.float32)
+
   position = torch.unsqueeze(position, dim=1)
 
-  #inv_timescales = torch.tensor(inv_timescales, dtype=torch.float32)
   inv_timescales = torch.unsqueeze(inv_timescales, dim=0)
 
   scaled_time = position.matmul(inv_timescales)
 
   signal = torch.cat([scaled_time.sin(), scaled_time.cos()], dim=1)
   signal = torch.unsqueeze(signal, 0)
 
-#  if torch.cuda.is_available():
-#    device = torch.device('cuda:%d' % j)
-#    signal = signal.to(device=device, non_blocking=True)
-
   return signal
diff --git a/models/jsfusion/module.py b/models/jsfusion/module.py
@@ -37,6 +37,7 @@ def forward(self, tensors):
 
 
 class MCModel(torch.nn.Module):
+
   def __init__(self, device, dropout_prob = 0.5, video_channels = 2048, num_frames = 40):
     super(MCModel, self).__init__()
 
@@ -45,25 +46,10 @@ def __init__(self, device, dropout_prob = 0.5, video_channels = 2048, num_frames
     self.num_frames = num_frames
     self.register_buffer('mask', torch.ones((self.num_frames), dtype=torch.float32))
     self.register_buffer('one', torch.tensor(1, dtype=torch.int32))
-
-    shape = [1, num_frames, video_channels]
-    num_dims = len(shape) - 2
-    channels = shape[-1]
-    num_timescales = channels // (num_dims * 2)
-    log_timescale_increment = math.log(1.0e4 / 1.0) / (num_timescales - 1)
-    inv_timescales = []
-    for i in range(num_timescales):
-      inv_timescales.append(1.0 * math.exp(-float(i) * log_timescale_increment))
-    dim = 0
-    length = shape[dim + 1]
-
-    self.position = torch.tensor(range(40), dtype=torch.float32)
-    self.inv_timescales = torch.tensor(inv_timescales, dtype=torch.float32)
-
-    self.register_buffer('signal', add_timing_signal_nd(self.position, self.inv_timescales))
+    self.register_buffer('signal', add_timing_signal_nd(self.num_frames, video_channels))
 
     self.dropout = torch.nn.Dropout(p=dropout_prob)
-    self.conv1 = torch.nn.Conv2d(video_channels, 2048, [3, 1], padding=(1, 0))
+    self.conv1 = torch.nn.Conv2d(2048, 2048, [3, 1], padding=(1, 0))
     self.relu1 = torch.nn.ReLU()
     self.bn1 = torch.nn.BatchNorm2d(2048, eps=0.001, momentum=0.001)
     self.conv2 = torch.nn.Conv2d(2048, 2048, [3, 1], padding=(1, 0))
@@ -137,7 +123,6 @@ def __init__(self, device, dropout_prob = 0.5, video_channels = 2048, num_frames
     self.final_bn4 = torch.nn.BatchNorm1d(1, eps=0.001, momentum=0.001)
 
     self.word2idx = hkl.load(os.path.join(os.environ['LSMDC_PATH'], 'hkls/common_word_to_index_py3.hkl'))
-    print('word2idx path', os.path.join(os.environ['LSMDC_PATH'], 'hkls/common_word_to_index_py3.hkl'))
 
   def video_embeddings(self, video, mask):
     # BxLxC
@@ -443,9 +428,9 @@ def final(self, fusion_next):
   def parse_sentences(self, word2idx, mc, max_length):
     import numpy as np
     def sentence_to_words(sentence):
-      import data_util
+      from models.jsfusion.data_util import clean_str
       try:
-        words = data_util.clean_str(sentence).split()
+        words = clean_str(sentence).split()
       except:
         print('[ERROR] sentence is broken: ' + sentence)
         sys.exit(1)

diff --git a/models/null/__init__.py b/models/null/__init__.py
diff --git a/models/null/model.py b/models/null/model.py