Skip to content

Commit

Permalink
Added docks to common and update server Dockerfile
Browse files Browse the repository at this point in the history
  • Loading branch information
w.jurasz committed Sep 21, 2019
1 parent 3c2e3dd commit 80ab6d1
Show file tree
Hide file tree
Showing 10 changed files with 311 additions and 38 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ resampy==0.2.1
scikit-learn==0.21.2
scipy==1.3.0
six==1.12.0
torch==1.1.0
torch==1.2.0
torchvision==0.3.0
tqdm==4.32.1
pyyaml==5.1
Expand Down
5 changes: 3 additions & 2 deletions server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ RUN apt install ffmpeg \
libsndfile1 \
portaudio19-dev \
python3-pyaudio \
git -y
git \
vim -y

RUN pip3 install certifi==2019.3.9 \
chardet==3.0.4 \
Expand All @@ -39,7 +40,7 @@ RUN pip3 install certifi==2019.3.9 \
catkin_pkg \
librosa \
ipdb
RUN pip3 install torch
RUN pip3 install torch==1.2.0

COPY . .

Expand Down
10 changes: 5 additions & 5 deletions server/config.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
models:
# - id: 'deepspeech2'
# path: '../pretrained/deepspeech_final.pth'
# decoder: 'greedy'
# name: 'DeepSpeech2'
- id: 'deepspeech2'
path: '/work/pretrained_host/deepspeech_final.pth'
decoder: 'greedy'
name: 'DeepSpeech2'

# - id: 'las1'
# path: '../pretrained/las_model_1.pt'
# decoder: 'greedy'
# name: 'LAS_1'

- id: 'las5'
path: '../pretrained/las_model_5.pt'
path: '/work/pretrained_host/las_model_5.pt'
decoder: 'greedy'
name: 'LAS'

Expand Down
2 changes: 1 addition & 1 deletion server/run.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#!/usr/bin/env bash
docker run -v ../pretrained:/work/pretrained_host -p 5000:5000 -p 8888:8888 -it ros1_roboy /bin/bash
docker run -v $1:/work/pretrained_host -p 5000:5000 -p 8888:8888 -it sonosco_server /bin/bash
72 changes: 64 additions & 8 deletions sonosco/common/audio_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,89 @@
import librosa


def get_duration(file_path):
return float(subprocess.check_output([f'soxi -D "{file_path.strip()}"'], shell=True))
def get_duration(file_path: str) -> float:
"""
gets duration of audio using sox
Args:
file_path:
Returns: duration
def transcode_recording(source, destination, sample_rate):
"""
return float(subprocess.check_output([f'sox -D "{file_path.strip()}"'], shell=True))


def transcode_recording(source: str, destination: str, sample_rate: int) -> None:
subprocess.call([f"sox {source} -r {sample_rate} -b 16 -c 1 {destination}"], shell=True)


def transcode_recordings_an4(raw_path, wav_path, sample_rate):
def transcode_recordings_an4(raw_path: str, wav_path: str, sample_rate: str) -> None:
"""
transcode recordings using sox
Args:
raw_path:
wav_path:
sample_rate:
"""
subprocess.call([f'sox -t raw -r {sample_rate} -b 16 -e signed-integer -B -c 1 \"{raw_path}\" \"{wav_path}\"'],
shell=True)


def transcode_recordings_ted3(source, destination, start_time, end_time, sample_rate):
def transcode_recordings_ted3(source: str, destination: str, start_time: int, end_time: int, sample_rate: int) -> None:
"""
transcode recordings using sox
Args:
source:
destination:
start_time:
end_time:
sample_rate:
Returns:
"""
subprocess.call([f"sox {source} -r {sample_rate} -b 16 -c 1 {destination} trim {start_time} ={end_time}"],
shell=True)


def shift(audio, n_samples=1600):
def shift(audio: np.ndarray, n_samples: int = 1600) -> np.ndarray:
"""
shift audio by n_samples
Args:
audio:
n_samples:
Returns: shifted audio
"""
return np.roll(audio, n_samples)


def stretch(audio, rate=1):
def stretch(audio: np.ndarray, rate: int = 1) -> np.ndarray:
"""
stretches the audio by rate
Args:
audio:
rate:
Returns: stretched audio
"""
stretched_audio = librosa.effects.time_stretch(audio, rate)
return stretched_audio


def pitch_shift(audio, sample_rate=16000, n_steps=3.0):
def pitch_shift(audio: np.ndarray, sample_rate: int = 16000, n_steps: float = 3.0):
"""
shifts pitch of the audio
Args:
audio:
sample_rate:
n_steps:
Returns: shifted audio
"""
stretched_audio = librosa.effects.pitch_shift(audio, sr=sample_rate, n_steps=n_steps)
return stretched_audio
15 changes: 0 additions & 15 deletions sonosco/common/click_extensions.py

This file was deleted.

19 changes: 17 additions & 2 deletions sonosco/common/noise_makers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,15 @@ def __call__(self, audio):
"""Adds noise to the audio signal."""
pass

def add_noise(self, audio):
def add_noise(self, audio: np.ndarray) -> np.ndarray:
"""
Adds noise to the audio
Args:
audio:
Returns: enchanted audio
"""
return self(audio)


Expand All @@ -19,6 +27,13 @@ class GaussianNoiseMaker(NoiseMaker):
def __init__(self, std=0.002):
self.std = std

def __call__(self, audio):
def __call__(self, audio: np.ndarray) -> np.ndarray:
"""
Adds noise to the audio
Args:
audio:
Returns: enchanted audio
"""
noise = np.random.randn(len(audio))
return audio + self.std * noise
27 changes: 24 additions & 3 deletions sonosco/common/path_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,37 @@
import codecs


def try_create_directory(path: str):
def try_create_directory(path: str) -> None:
"""
tries to create a directory at given path
Args:
path:
"""
if not os.path.exists(path):
os.makedirs(path)


def try_download(destination: str, url: str):
def try_download(destination: str, url: str) -> None:
"""
Tries to download to destination from url
Args:
destination:
url:
"""
if not os.path.exists(destination):
wget.download(url, destination)


def parse_yaml(file_path: str):
def parse_yaml(file_path: str) -> dict:
"""
load yaml into memory
Args:
file_path:
Returns: dict with the yaml file content
"""
with codecs.open(file_path, "r", "utf-8") as file:
return yaml.load(file, Loader=yaml.FullLoader)
2 changes: 1 addition & 1 deletion sonosco/inference/deepspeech2_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from sonosco.inference.asr import SonoscoASR
from sonosco.decoders import GreedyDecoder
from sonosco.datasets.processor import AudioDataProcessor
from sonosco.models.deepspeech2 import DeepSpeech2
from sonosco.models.deepspeech2_old import DeepSpeech2
from sonosco.common.global_settings import DEVICE


Expand Down
Loading

0 comments on commit 80ab6d1

Please sign in to comment.