Skip to content

Commit

Permalink
Add simulate_rir_ism method for room impulse response simulation (pyt…
Browse files Browse the repository at this point in the history
…orch#2880)

Summary:
replicate of pytorch#2644

Pull Request resolved: pytorch#2880

Reviewed By: mthrok

Differential Revision: D41633911

Pulled By: nateanl

fbshipit-source-id: 73cf145d75c389e996aafe96571ab86dc21f86e5
  • Loading branch information
nateanl authored and facebook-github-bot committed Feb 14, 2023
1 parent 3f02b89 commit 8c5c9a9
Show file tree
Hide file tree
Showing 20 changed files with 705 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .circleci/unittest/linux/scripts/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ fi
(
set -x
conda install -y -c conda-forge ${NUMBA_DEV_CHANNEL} 'librosa>=0.8.0' parameterized 'requests>=2.20'
pip install kaldi-io SoundFile coverage pytest pytest-cov 'scipy==1.7.3' transformers expecttest unidecode inflect Pillow sentencepiece pytorch-lightning 'protobuf<4.21.0' demucs tinytag
pip install kaldi-io SoundFile coverage pytest pytest-cov 'scipy==1.7.3' transformers expecttest unidecode inflect Pillow sentencepiece pytorch-lightning 'protobuf<4.21.0' demucs tinytag pyroomacoustics
)
# Install fairseq
git clone https://github.com/pytorch/fairseq
Expand Down
3 changes: 2 additions & 1 deletion .circleci/unittest/windows/scripts/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ esac
unidecode \
'protobuf<4.21.0' \
demucs \
tinytag
tinytag \
pyroomacoustics
)
# Install fairseq
git clone https://github.com/pytorch/fairseq
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ endif()
# Options
option(BUILD_SOX "Build libsox statically" ON)
option(BUILD_KALDI "Build kaldi statically" ON)
option(BUILD_RIR "Enable RIR simulation" ON)
option(BUILD_RNNT "Enable RNN transducer" ON)
option(BUILD_CTC_DECODER "Build Flashlight CTC decoder" ON)
option(BUILD_TORCHAUDIO_PYTHON_EXTENSION "Build Python extension" OFF)
Expand Down
9 changes: 9 additions & 0 deletions docs/source/prototype.functional.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,12 @@ DSP
oscillator_bank
sinc_impulse_response
frequency_impulse_response

Room Impulse Response Simulation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. autosummary::
:toctree: generated
:nosignatures:

simulate_rir_ism
24 changes: 24 additions & 0 deletions docs/source/refs.bib
Original file line number Diff line number Diff line change
Expand Up @@ -504,3 +504,27 @@ @inproceedings{valk2021voxlingua107
year={2021},
organization={IEEE}
}
@inproceedings{scheibler2018pyroomacoustics,
title={Pyroomacoustics: A python package for audio room simulation and array processing algorithms},
author={Scheibler, Robin and Bezzam, Eric and Dokmani{\'c}, Ivan},
booktitle={2018 IEEE international conference on acoustics, speech and signal processing (ICASSP)},
pages={351--355},
year={2018},
organization={IEEE}
}
@article{allen1979image,
title={Image method for efficiently simulating small-room acoustics},
author={Allen, Jont B and Berkley, David A},
journal={The Journal of the Acoustical Society of America},
volume={65},
number={4},
pages={943--950},
year={1979},
publisher={Acoustical Society of America}
}
@misc{wiki:Absorption_(acoustics),
author = "{Wikipedia contributors}",
title = "Absorption (acoustics) --- {W}ikipedia{,} The Free Encyclopedia",
url = "https://en.wikipedia.org/wiki/Absorption_(acoustics)",
note = "[Online]"
}
2 changes: 2 additions & 0 deletions test/torchaudio_unittest/common_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
skipIfNoMacOS,
skipIfNoModule,
skipIfNoQengine,
skipIfNoRIR,
skipIfNoSox,
skipIfPy310,
skipIfRocm,
Expand Down Expand Up @@ -47,6 +48,7 @@
"skipIfNoMacOS",
"skipIfNoModule",
"skipIfNoKaldi",
"skipIfNoRIR",
"skipIfNoSox",
"skipIfNoSoxBackend",
"skipIfRocm",
Expand Down
5 changes: 5 additions & 0 deletions test/torchaudio_unittest/common_utils/case_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,11 @@ def skipIfNoModule(module, display_name=None):
reason="Kaldi features are not available.",
key="NO_KALDI",
)
skipIfNoRIR = _skipIf(
not torchaudio._extension._IS_RIR_AVAILABLE,
reason="RIR features are not available.",
key="NO_RIR",
)
skipIfNoCtcDecoder = _skipIf(
not is_ctc_decoder_available(),
reason="CTC decoder not available.",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import torch
from torchaudio_unittest.common_utils import PytorchTestCase

from .functional_test_impl import Functional64OnlyTestImpl, FunctionalTestImpl
from .functional_test_impl import Functional64OnlyTestImpl, FunctionalCPUOnlyTestImpl, FunctionalTestImpl


class FunctionalFloat32CPUTest(FunctionalTestImpl, PytorchTestCase):
Expand All @@ -17,3 +17,13 @@ class FunctionalFloat64CPUTest(FunctionalTestImpl, PytorchTestCase):
class FunctionalFloat64OnlyCPUTest(Functional64OnlyTestImpl, PytorchTestCase):
dtype = torch.float64
device = torch.device("cpu")


class FunctionalCPUOnlyFloat32Test(FunctionalCPUOnlyTestImpl, PytorchTestCase):
dtype = torch.float32
device = torch.device("cpu")


class FunctionalCPUOnlyFloat64Test(FunctionalCPUOnlyTestImpl, PytorchTestCase):
dtype = torch.float64
device = torch.device("cpu")
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
from torchaudio._internal import module_utils as _mod_utils

if _mod_utils.is_module_available("pyroomacoustics"):
import pyroomacoustics as pra

import torch
import torchaudio.prototype.functional as F
from parameterized import param, parameterized
from torchaudio_unittest.common_utils import nested_params, TestBaseMixin
from torchaudio_unittest.common_utils import nested_params, skipIfNoModule, skipIfNoRIR, TestBaseMixin

from .dsp_utils import freq_ir as freq_ir_np, oscillator_bank as oscillator_bank_np, sinc_ir as sinc_ir_np

Expand Down Expand Up @@ -424,3 +429,83 @@ def _debug_plot():
except AssertionError:
_debug_plot()
raise


@skipIfNoModule("pyroomacoustics")
@skipIfNoRIR
class FunctionalCPUOnlyTestImpl(TestBaseMixin):
@parameterized.expand([(1,), (4,)])
def test_simulate_rir_ism_single_band(self, channel):
"""Test simulate_rir_ism function in the case where absorption coefficients are identical for all walls."""
room_dim = torch.rand(3, dtype=self.dtype, device=self.device) + 5
mic_array = torch.rand(channel, 3, dtype=self.dtype, device=self.device) + 1
source = torch.rand(3, dtype=self.dtype, device=self.device) + 4
max_order = 3
# absorption is set as a float value indicating absorption coefficients are the same for every wall.
absorption = 0.5
# compute rir signal by torchaudio implementation
actual = F.simulate_rir_ism(room_dim, source, mic_array, max_order, absorption)
# compute rir signal by pyroomacoustics
room = pra.ShoeBox(
room_dim.detach().numpy(),
fs=16000,
materials=pra.Material(absorption),
max_order=max_order,
ray_tracing=False,
air_absorption=False,
)
# mic_locs is a numpy array of dimension `(3, channel)`.
mic_locs = mic_array.transpose(0, 1).double().detach().numpy()
room.add_microphone_array(mic_locs)
room.add_source(source.tolist())
room.compute_rir()
max_len = max([room.rir[i][0].shape[0] for i in range(channel)])
expected = torch.zeros(channel, max_len, dtype=self.dtype, device=self.device)
for i in range(channel):
expected[i, 0 : room.rir[i][0].shape[0]] = torch.from_numpy(room.rir[i][0])

self.assertEqual(expected, actual, atol=1e-3, rtol=1e-3)

@parameterized.expand([(1,), (4,)])
def test_simulate_rir_ism_multi_band(self, channel):
"""Test simulate_rir_ism in the case where absorption coefficients are different for all walls."""
room_dim = torch.rand(3, dtype=self.dtype, device=self.device) + 5
mic_array = torch.rand(channel, 3, dtype=self.dtype, device=self.device) + 1
source = torch.rand(3, dtype=self.dtype, device=self.device) + 4
max_order = 3
# absorption is set as a Tensor with dimensions `(7, 6)` indicating there are
# 6 walls and each wall has 7 absorption coefficients corresponds to 7 octave bands, respectively.
absorption = torch.rand(7, 6, dtype=self.dtype, device=self.device)
walls = ["west", "east", "south", "north", "floor", "ceiling"]
room = pra.ShoeBox(
room_dim.detach().numpy(),
fs=16000,
materials={
walls[i]: pra.Material(
{
"coeffs": absorption[:, i]
.reshape(
-1,
)
.detach()
.numpy(),
"center_freqs": [125.0, 250.0, 500.0, 1000.0, 2000.0, 4000.0, 8000.0],
}
)
for i in range(len(walls))
},
max_order=max_order,
ray_tracing=False,
air_absorption=False,
)
# mic_locs is a numpy array of dimension `(D, channel)`.
mic_locs = mic_array.transpose(0, 1).double().detach().numpy()
room.add_microphone_array(mic_locs)
room.add_source(source.tolist())
room.compute_rir()
max_len = max([room.rir[i][0].shape[0] for i in range(channel)])
expected = torch.zeros(channel, max_len, dtype=self.dtype, device=self.device)
for i in range(channel):
expected[i, 0 : room.rir[i][0].shape[0]] = torch.from_numpy(room.rir[i][0])
actual = F.simulate_rir_ism(room_dim, source, mic_array, max_order, absorption)
self.assertEqual(expected, actual, atol=1e-3, rtol=1e-3)
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import torch
from torchaudio_unittest.common_utils import PytorchTestCase

from .torchscript_consistency_test_impl import TorchScriptConsistencyTestImpl
from .torchscript_consistency_test_impl import TorchScriptConsistencyCPUOnlyTestImpl, TorchScriptConsistencyTestImpl


class TorchScriptConsistencyCPUFloat32Test(TorchScriptConsistencyTestImpl, PytorchTestCase):
Expand All @@ -12,3 +12,13 @@ class TorchScriptConsistencyCPUFloat32Test(TorchScriptConsistencyTestImpl, Pytor
class TorchScriptConsistencyCPUFloat64Test(TorchScriptConsistencyTestImpl, PytorchTestCase):
dtype = torch.float64
device = torch.device("cpu")


class TorchScriptConsistencyCPUOnlyFloat32Test(TorchScriptConsistencyCPUOnlyTestImpl, PytorchTestCase):
dtype = torch.float32
device = torch.device("cpu")


class TorchScriptConsistencyCPUOnlyFloat64Test(TorchScriptConsistencyCPUOnlyTestImpl, PytorchTestCase):
dtype = torch.float64
device = torch.device("cpu")
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

import torch
import torchaudio.prototype.functional as F
from torchaudio_unittest.common_utils import TestBaseMixin, torch_script
from parameterized import parameterized
from torchaudio_unittest.common_utils import skipIfNoRIR, TestBaseMixin, torch_script


class TorchScriptConsistencyTestImpl(TestBaseMixin):
Expand Down Expand Up @@ -62,3 +63,52 @@ def test_sinc_ir(self):
def test_freq_ir(self):
mags = torch.tensor([0, 0.5, 1.0], device=self.device, dtype=self.dtype)
self._assert_consistency(F.frequency_impulse_response, (mags,))


class TorchScriptConsistencyCPUOnlyTestImpl(TestBaseMixin):
def _assert_consistency(self, func, inputs, shape_only=False):
inputs_ = []
for i in inputs:
if torch.is_tensor(i):
i = i.to(device=self.device, dtype=self.dtype)
inputs_.append(i)
ts_func = torch_script(func)

torch.random.manual_seed(40)
output = func(*inputs_)

torch.random.manual_seed(40)
ts_output = ts_func(*inputs_)

if shape_only:
ts_output = ts_output.shape
output = output.shape
self.assertEqual(ts_output, output)

@skipIfNoRIR
@parameterized.expand([(1,), (4,)])
def test_simulate_rir_ism_single_band(self, channel):
room_dim = torch.rand(3, dtype=self.dtype, device=self.device) + 5
mic_array = torch.rand(channel, 3, dtype=self.dtype, device=self.device) + 1
source = torch.rand(3, dtype=self.dtype, device=self.device) + 4
max_order = 3
absorption = 0.5
center_frequency = torch.tensor([125, 250, 500, 1000, 2000, 4000, 8000], dtype=self.dtype, device=self.device)
self._assert_consistency(
F.simulate_rir_ism,
(room_dim, source, mic_array, max_order, absorption, None, 81, center_frequency, 343.0, 16000.0),
)

@skipIfNoRIR
@parameterized.expand([(1,), (4,)])
def test_simulate_rir_ism_multi_band(self, channel):
room_dim = torch.rand(3, dtype=self.dtype, device=self.device) + 5
mic_array = torch.rand(channel, 3, dtype=self.dtype, device=self.device) + 1
source = torch.rand(3, dtype=self.dtype, device=self.device) + 4
max_order = 3
absorption = torch.rand(7, 6, dtype=self.dtype, device=self.device)
center_frequency = torch.tensor([125, 250, 500, 1000, 2000, 4000, 8000], dtype=self.dtype, device=self.device)
self._assert_consistency(
F.simulate_rir_ism,
(room_dim, source, mic_array, max_order, absorption, None, 81, center_frequency, 343.0, 16000.0),
)
2 changes: 2 additions & 0 deletions tools/setup_helpers/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def _get_build(var, default=False):

_BUILD_SOX = False if platform.system() == "Windows" else _get_build("BUILD_SOX", True)
_BUILD_KALDI = False if platform.system() == "Windows" else _get_build("BUILD_KALDI", True)
_BUILD_RIR = _get_build("BUILD_RIR", True)
_BUILD_RNNT = _get_build("BUILD_RNNT", True)
_BUILD_CTC_DECODER = _get_build("BUILD_CTC_DECODER", True)
_USE_FFMPEG = _get_build("USE_FFMPEG", False)
Expand Down Expand Up @@ -116,6 +117,7 @@ def build_extension(self, ext):
f"-DPython_INCLUDE_DIR={distutils.sysconfig.get_python_inc()}",
f"-DBUILD_SOX:BOOL={'ON' if _BUILD_SOX else 'OFF'}",
f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}",
f"-DBUILD_RIR:BOOL={'ON' if _BUILD_RIR else 'OFF'}",
f"-DBUILD_RNNT:BOOL={'ON' if _BUILD_RNNT else 'OFF'}",
f"-DBUILD_CTC_DECODER:BOOL={'ON' if _BUILD_CTC_DECODER else 'OFF'}",
"-DBUILD_TORCHAUDIO_PYTHON_EXTENSION:BOOL=ON",
Expand Down
15 changes: 13 additions & 2 deletions torchaudio/_extension/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"_check_cuda_version",
"_IS_TORCHAUDIO_EXT_AVAILABLE",
"_IS_KALDI_AVAILABLE",
"_IS_RIR_AVAILABLE",
"_SOX_INITIALIZED",
"_FFMPEG_INITIALIZED",
]
Expand All @@ -33,16 +34,18 @@
# In case of an error, we do not catch the failure as it suggests there is something
# wrong with the installation.
_IS_TORCHAUDIO_EXT_AVAILABLE = is_module_available("torchaudio.lib._torchaudio")
# Kaldi features are implemented in _torchaudio extension, but it can be individually
# Kaldi and RIR features are implemented in _torchaudio extension, but they can be individually
# turned on/off at build time. Available means that _torchaudio is loaded properly, and
# Kaldi features are found there.
# Kaldi or RIR features are found there.
_IS_RIR_AVAILABLE = False
_IS_KALDI_AVAILABLE = False
if _IS_TORCHAUDIO_EXT_AVAILABLE:
_load_lib("libtorchaudio")

import torchaudio.lib._torchaudio # noqa

_check_cuda_version()
_IS_RIR_AVAILABLE = torchaudio.lib._torchaudio.is_rir_available()
_IS_KALDI_AVAILABLE = torchaudio.lib._torchaudio.is_kaldi_available()


Expand Down Expand Up @@ -88,3 +91,11 @@
)

fail_if_no_ffmpeg = no_op if _FFMPEG_INITIALIZED else _fail_since_no_ffmpeg

fail_if_no_rir = (
no_op
if _IS_RIR_AVAILABLE
else fail_with_message(
"requires RIR extension, but TorchAudio is not compiled with it. Please build TorchAudio with RIR support."
)
)
5 changes: 5 additions & 0 deletions torchaudio/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ if(BUILD_RNNT)
endif()
endif()

if(BUILD_RIR)
list(APPEND sources rir.cpp)
list(APPEND compile_definitions INCLUDE_RIR)
endif()

if(USE_CUDA)
list(
APPEND
Expand Down
1 change: 1 addition & 0 deletions torchaudio/csrc/pybind/pybind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ namespace {

PYBIND11_MODULE(_torchaudio, m) {
m.def("is_kaldi_available", &is_kaldi_available, "");
m.def("is_rir_available", &is_rir_available, "");
m.def("cuda_version", &cuda_version, "");
}

Expand Down
Loading

0 comments on commit 8c5c9a9

Please sign in to comment.