Added docks to common and update server Dockerfile

Roboy · Sep 21, 2019 · 80ab6d1 · 80ab6d1
1 parent 3c2e3dd
commit 80ab6d1
Show file tree

Hide file tree

Showing 10 changed files with 311 additions and 38 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -15,7 +15,7 @@ resampy==0.2.1
 scikit-learn==0.21.2
 scipy==1.3.0
 six==1.12.0
-torch==1.1.0
+torch==1.2.0
 torchvision==0.3.0
 tqdm==4.32.1
 pyyaml==5.1

diff --git a/server/Dockerfile b/server/Dockerfile
@@ -19,7 +19,8 @@ RUN apt install ffmpeg \
                 libsndfile1 \
                 portaudio19-dev \
                 python3-pyaudio \
-                git -y
+                git \
+                vim -y
 
 RUN pip3 install certifi==2019.3.9 \
                  chardet==3.0.4 \
@@ -39,7 +40,7 @@ RUN pip3 install certifi==2019.3.9 \
                  catkin_pkg \
                  librosa \
                  ipdb
-RUN pip3 install torch
+RUN pip3 install torch==1.2.0
 
 COPY . .
 

diff --git a/server/config.yaml b/server/config.yaml
@@ -1,16 +1,16 @@
 models:
-  # - id: 'deepspeech2'
-  #   path: '../pretrained/deepspeech_final.pth'
-  #   decoder: 'greedy'
-  #   name: 'DeepSpeech2'
+   - id: 'deepspeech2'
+     path: '/work/pretrained_host/deepspeech_final.pth'
+     decoder: 'greedy'
+     name: 'DeepSpeech2'
 
   # - id: 'las1'
   #   path: '../pretrained/las_model_1.pt'
   #   decoder: 'greedy'
   #   name: 'LAS_1'
 
   - id: 'las5'
-    path: '../pretrained/las_model_5.pt'
+    path: '/work/pretrained_host/las_model_5.pt'
     decoder: 'greedy'
     name: 'LAS'
 

diff --git a/server/run.sh b/server/run.sh
@@ -1,2 +1,2 @@
 #!/usr/bin/env bash
-docker run -v ../pretrained:/work/pretrained_host  -p 5000:5000 -p 8888:8888 -it ros1_roboy /bin/bash
+docker run -v $1:/work/pretrained_host  -p 5000:5000 -p 8888:8888 -it sonosco_server /bin/bash
diff --git a/sonosco/common/audio_tools.py b/sonosco/common/audio_tools.py
@@ -3,33 +3,89 @@
 import librosa
 
 
-def get_duration(file_path):
-    return float(subprocess.check_output([f'soxi -D "{file_path.strip()}"'], shell=True))
+def get_duration(file_path: str) -> float:
+    """
+    gets duration of audio using sox
+    Args:
+        file_path:
 
+    Returns: duration
 
-def transcode_recording(source, destination, sample_rate):
+    """
+    return float(subprocess.check_output([f'sox -D "{file_path.strip()}"'], shell=True))
+
+
+def transcode_recording(source: str, destination: str, sample_rate: int) -> None:
     subprocess.call([f"sox {source} -r {sample_rate} -b 16 -c 1 {destination}"], shell=True)
 
 
-def transcode_recordings_an4(raw_path, wav_path, sample_rate):
+def transcode_recordings_an4(raw_path: str, wav_path: str, sample_rate: str) -> None:
+    """
+    transcode recordings using sox
+    Args:
+        raw_path:
+        wav_path:
+        sample_rate:
+
+    """
     subprocess.call([f'sox -t raw -r {sample_rate} -b 16 -e signed-integer -B -c 1 \"{raw_path}\" \"{wav_path}\"'],
                     shell=True)
 
 
-def transcode_recordings_ted3(source, destination, start_time, end_time, sample_rate):
+def transcode_recordings_ted3(source: str, destination: str, start_time: int, end_time: int, sample_rate: int) -> None:
+    """
+    transcode recordings using sox
+    Args:
+        source:
+        destination:
+        start_time:
+        end_time:
+        sample_rate:
+
+    Returns:
+
+    """
     subprocess.call([f"sox {source}  -r {sample_rate} -b 16 -c 1 {destination} trim {start_time} ={end_time}"],
                     shell=True)
 
 
-def shift(audio, n_samples=1600):
+def shift(audio: np.ndarray, n_samples: int = 1600) -> np.ndarray:
+    """
+    shift audio by n_samples
+    Args:
+        audio:
+        n_samples:
+
+    Returns: shifted audio
+
+    """
     return np.roll(audio, n_samples)
 
 
-def stretch(audio, rate=1):
+def stretch(audio: np.ndarray, rate: int = 1) -> np.ndarray:
+    """
+    stretches the audio by rate
+    Args:
+        audio:
+        rate:
+
+    Returns: stretched audio
+
+    """
     stretched_audio = librosa.effects.time_stretch(audio, rate)
     return stretched_audio
 
 
-def pitch_shift(audio, sample_rate=16000, n_steps=3.0):
+def pitch_shift(audio: np.ndarray, sample_rate: int = 16000, n_steps: float = 3.0):
+    """
+    shifts pitch of the audio
+    Args:
+        audio:
+        sample_rate:
+        n_steps:
+
+    Returns: shifted audio
+
+    """
     stretched_audio = librosa.effects.pitch_shift(audio, sr=sample_rate, n_steps=n_steps)
     return stretched_audio
diff --git a/sonosco/common/click_extensions.py b/sonosco/common/click_extensions.py
diff --git a/sonosco/common/noise_makers.py b/sonosco/common/noise_makers.py
@@ -10,7 +10,15 @@ def __call__(self, audio):
         """Adds noise to the audio signal."""
         pass
 
-    def add_noise(self, audio):
+    def add_noise(self, audio: np.ndarray) -> np.ndarray:
+        """
+        Adds noise to the audio
+        Args:
+            audio:
+
+        Returns: enchanted audio
+
+        """
         return self(audio)
 
 
@@ -19,6 +27,13 @@ class GaussianNoiseMaker(NoiseMaker):
     def __init__(self, std=0.002):
         self.std = std
 
-    def __call__(self, audio):
+    def __call__(self, audio: np.ndarray) -> np.ndarray:
+        """
+        Adds noise to the audio
+        Args:
+            audio:
+
+        Returns: enchanted audio
+        """
         noise = np.random.randn(len(audio))
         return audio + self.std * noise
diff --git a/sonosco/common/path_utils.py b/sonosco/common/path_utils.py
@@ -4,16 +4,37 @@
 import codecs
 
 
-def try_create_directory(path: str):
+def try_create_directory(path: str) -> None:
+    """
+    tries to create a directory at given path
+    Args:
+        path:
+
+    """
     if not os.path.exists(path):
         os.makedirs(path)
 
 
-def try_download(destination: str, url: str):
+def try_download(destination: str, url: str) -> None:
+    """
+    Tries to download to destination from url
+    Args:
+        destination:
+        url:
+
+    """
     if not os.path.exists(destination):
         wget.download(url, destination)
 
 
-def parse_yaml(file_path: str):
+def parse_yaml(file_path: str) -> dict:
+    """
+    load yaml into memory
+    Args:
+        file_path:
+
+    Returns: dict with the yaml file content
+
+    """
     with codecs.open(file_path, "r", "utf-8") as file:
         return yaml.load(file, Loader=yaml.FullLoader)
diff --git a/sonosco/inference/deepspeech2_inference.py b/sonosco/inference/deepspeech2_inference.py
@@ -4,7 +4,7 @@
 from sonosco.inference.asr import SonoscoASR
 from sonosco.decoders import GreedyDecoder
 from sonosco.datasets.processor import AudioDataProcessor
-from sonosco.models.deepspeech2 import DeepSpeech2
+from sonosco.models.deepspeech2_old import DeepSpeech2
 from sonosco.common.global_settings import DEVICE