diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index a5344c03..44434355 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -8,13 +8,13 @@ jobs: - uses: actions/checkout@v2 # Run isort + black formatter - name: Python Code Formatter - uses: descriptinc/python-format-action@master + uses: descriptinc/python-format-action@main build: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9] + python-version: ["3.9", "3.10", "3.12"] steps: - uses: actions/checkout@v2 diff --git a/audiotools/core/ffmpeg.py b/audiotools/core/ffmpeg.py index baf27ccc..83f9cd19 100644 --- a/audiotools/core/ffmpeg.py +++ b/audiotools/core/ffmpeg.py @@ -176,7 +176,14 @@ def load_from_file_with_ffmpeg(cls, audio_path: str, quiet: bool = True, **kwarg ff = ffmpy.FFmpeg( inputs={audio_path: None}, - outputs={wav_file: None}, + # For inputs that are m4a (and others?), the input audio can + # have samples that don't match the sample rate. This aresample + # option forces ffmpeg to read timing information in the source + # file instead of assuming constant sample rate. + # + # This fixes an issue where an input m4a file might be a + # different length than the output wav file + outputs={wav_file: "-af aresample=async=1000"}, global_options=global_options, ) ff.run() diff --git a/setup.py b/setup.py index 57453e4f..27c6edbc 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ }, install_requires=[ "argbind", - "numpy", + "numpy<2", "soundfile", "pyloudnorm", "importlib-resources", diff --git a/tests/core/test_whisper.py b/tests/core/test_whisper.py index 0a8cd428..b8d8c193 100644 --- a/tests/core/test_whisper.py +++ b/tests/core/test_whisper.py @@ -26,8 +26,7 @@ def test_whisper_transcript(): transcript = signal.get_whisper_transcript() - assert "<|startoftranscript|>" in transcript - assert "<|endoftext|>" in transcript + assert transcript.startswith("<|startoftranscript|>") def test_whisper_embeddings():