Skip to content

Commit

Permalink
Merge pull request #493 from snakers4/adamnsandle
Browse files Browse the repository at this point in the history
Adamnsandle
  • Loading branch information
snakers4 authored Jul 9, 2024
2 parents 2b97f61 + 6de3660 commit 84768ce
Show file tree
Hide file tree
Showing 12 changed files with 136 additions and 42 deletions.
42 changes: 28 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,34 @@ https://user-images.githubusercontent.com/36505480/144874384-95f80f6d-a4f1-42cc-
</details>

<br/>

<h2 align="center">Fast start</h2>
<br/>

**Using pip**:
`pip install silero-vad`

```python3
from silero_vad import load_silero_vad, read_audio, get_speech_timestamps
model = load_silero_vad()
wav = read_audio('path_to_audio_file') # backend (sox, soundfile, or ffmpeg) required!
speech_timestamps = get_speech_timestamps(wav, model)
```

**Using torch.hub**:
```python3
import torch
torch.set_num_threads(1)

model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad', model='silero_vad')
(get_speech_timestamps, _, read_audio, _, _) = utils

wav = read_audio('path_to_audio_file') # backend (sox, soundfile, or ffmpeg) required!
speech_timestamps = get_speech_timestamps(wav, model)
```

<br/>

<h2 align="center">Key Features</h2>
<br/>

Expand Down Expand Up @@ -57,21 +85,7 @@ https://user-images.githubusercontent.com/36505480/144874384-95f80f6d-a4f1-42cc-
Published under permissive license (MIT) Silero VAD has zero strings attached - no telemetry, no keys, no registration, no built-in expiration, no keys or vendor lock.

<br/>
<h2 align="center">Fast start</h2>
<br/>

```python3
import torch
torch.set_num_threads(1)

model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad', model='silero_vad')
(get_speech_timestamps, _, read_audio, _, _) = utils

wav = read_audio('path_to_audio_file')
speech_timestamps = get_speech_timestamps(wav, model)
```

<br/>
<h2 align="center">Typical Use Cases</h2>
<br/>

Expand Down
1 change: 0 additions & 1 deletion files/lang_dict_95.json

This file was deleted.

1 change: 0 additions & 1 deletion files/lang_group_dict_95.json

This file was deleted.

21 changes: 10 additions & 11 deletions hubconf.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
dependencies = ['torch', 'torchaudio']
import torch
import json
import os
from utils_vad import (init_jit_model,
get_speech_timestamps,
save_audio,
read_audio,
VADIterator,
collect_chunks,
drop_chunks,
Validator,
OnnxWrapper)
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
from silero_vad.utils_vad import (init_jit_model,
get_speech_timestamps,
save_audio,
read_audio,
VADIterator,
collect_chunks,
OnnxWrapper)


def versiontuple(v):
Expand All @@ -36,7 +35,7 @@ def silero_vad(onnx=False, force_onnx_cpu=False):
if versiontuple(installed_version) < versiontuple(supported_version):
raise Exception(f'Please install torch {supported_version} or greater ({installed_version} installed)')

model_dir = os.path.join(os.path.dirname(__file__), 'files')
model_dir = os.path.join(os.path.dirname(__file__), 'src', 'silero_vad', 'data')
if onnx:
model = OnnxWrapper(os.path.join(model_dir, 'silero_vad.onnx'), force_onnx_cpu)
else:
Expand Down
35 changes: 35 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "silero-vad"
version = "5.1"
authors = [
{name="Silero Team", email="[email protected]"},
]
description = "Voice Activity Detector (VAD) by Silero"
readme = "README.md"
requires-python = ">=3.8"
classifiers = [
"Development Status :: 5 - Production/Stable",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Intended Audience :: Science/Research",
"Intended Audience :: Developers",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Scientific/Engineering",
]
dependencies = [
"torch>=1.12.0",
"torchaudio>=0.12.0",
"onnxruntime>=1.18.0",
]

[project.urls]
Homepage = "https://github.com/snakers4/silero-vad"
Issues = "https://github.com/snakers4/silero-vad/issues"
30 changes: 20 additions & 10 deletions silero-vad.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,30 @@
},
"outputs": [],
"source": [
"USE_PIP = True # download model using pip package or torch.hub\n",
"USE_ONNX = False # change this to True if you want to test onnx model\n",
"if USE_ONNX:\n",
" !pip install -q onnxruntime\n",
"if USE_PIP:\n",
" !pip install -q silero-vad\n",
" from silero_vad import (load_silero_vad,\n",
" read_audio,\n",
" get_speech_timestamps,\n",
" save_audio,\n",
" VADIterator,\n",
" collect_chunks)\n",
" model = load_silero_vad(onnx=USE_ONNX)\n",
"else:\n",
" model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',\n",
" model='silero_vad',\n",
" force_reload=True,\n",
" onnx=USE_ONNX)\n",
"\n",
"model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',\n",
" model='silero_vad',\n",
" force_reload=True,\n",
" onnx=USE_ONNX)\n",
"\n",
"(get_speech_timestamps,\n",
" save_audio,\n",
" read_audio,\n",
" VADIterator,\n",
" collect_chunks) = utils"
" (get_speech_timestamps,\n",
" save_audio,\n",
" read_audio,\n",
" VADIterator,\n",
" collect_chunks) = utils"
]
},
{
Expand Down
12 changes: 12 additions & 0 deletions src/silero_vad/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from importlib.metadata import version
try:
__version__ = version(__name__)
except:
pass

from silero_vad.model import load_silero_vad
from silero_vad.utils_vad import (get_speech_timestamps,
save_audio,
read_audio,
VADIterator,
collect_chunks)
Empty file added src/silero_vad/data/__init__.py
Empty file.
Binary file not shown.
Binary file not shown.
25 changes: 25 additions & 0 deletions src/silero_vad/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from .utils_vad import init_jit_model, OnnxWrapper
import torch
torch.set_num_threads(1)

def load_silero_vad(onnx=False):
model_name = 'silero_vad.onnx' if onnx else 'silero_vad.jit'
package_path = "silero_vad.data"

try:
import importlib_resources as impresources
model_file_path = str(impresources.files(package_path).joinpath(model_name))
except:
from importlib import resources as impresources
try:
with impresources.path(package_path, model_name) as f:
model_file_path = f
except:
model_file_path = str(impresources.files(package_path).joinpath(model_name))

if onnx:
model = OnnxWrapper(model_file_path, force_onnx_cpu=True)
else:
model = init_jit_model(model_file_path)

return model
11 changes: 6 additions & 5 deletions utils_vad.py → src/silero_vad/utils_vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,18 +132,19 @@ def __call__(self, inputs: torch.Tensor):

def read_audio(path: str,
sampling_rate: int = 16000):
list_backends = torchaudio.list_audio_backends()

assert len(list_backends) > 0, 'The list of available backends is empty, please install backend manually. \
\n Recommendations: \n \tSox (UNIX OS) \n \tSoundfile (Windows OS, UNIX OS) \n \tffmpeg (Windows OS, UNIX OS)'

sox_backends = set(['sox', 'sox_io'])
audio_backends = torchaudio.list_audio_backends()

if len(sox_backends.intersection(audio_backends)) > 0:
try:
effects = [
['channels', '1'],
['rate', str(sampling_rate)]
]

wav, sr = torchaudio.sox_effects.apply_effects_file(path, effects=effects)
else:
except:
wav, sr = torchaudio.load(path)

if wav.size(0) > 1:
Expand Down

0 comments on commit 84768ce

Please sign in to comment.