diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
index a5344c03..44434355 100644
--- a/.github/workflows/pr.yml
+++ b/.github/workflows/pr.yml
@@ -8,13 +8,13 @@ jobs:
       - uses: actions/checkout@v2
       # Run isort + black formatter
       - name: Python Code Formatter
-        uses: descriptinc/python-format-action@master
+        uses: descriptinc/python-format-action@main
 
   build:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.7, 3.8, 3.9]
+        python-version: ["3.9", "3.10", "3.12"]
 
     steps:
       - uses: actions/checkout@v2
diff --git a/audiotools/core/ffmpeg.py b/audiotools/core/ffmpeg.py
index baf27ccc..83f9cd19 100644
--- a/audiotools/core/ffmpeg.py
+++ b/audiotools/core/ffmpeg.py
@@ -176,7 +176,14 @@ def load_from_file_with_ffmpeg(cls, audio_path: str, quiet: bool = True, **kwarg
 
             ff = ffmpy.FFmpeg(
                 inputs={audio_path: None},
-                outputs={wav_file: None},
+                # For inputs that are m4a (and others?), the input audio can
+                # have samples that don't match the sample rate. This aresample
+                # option forces ffmpeg to read timing information in the source
+                # file instead of assuming constant sample rate.
+                #
+                # This fixes an issue where an input m4a file might be a
+                # different length than the output wav file
+                outputs={wav_file: "-af aresample=async=1000"},
                 global_options=global_options,
             )
             ff.run()
diff --git a/setup.py b/setup.py
index 57453e4f..27c6edbc 100644
--- a/setup.py
+++ b/setup.py
@@ -35,7 +35,7 @@
     },
     install_requires=[
         "argbind",
-        "numpy",
+        "numpy<2",
         "soundfile",
         "pyloudnorm",
         "importlib-resources",
diff --git a/tests/core/test_whisper.py b/tests/core/test_whisper.py
index 0a8cd428..b8d8c193 100644
--- a/tests/core/test_whisper.py
+++ b/tests/core/test_whisper.py
@@ -26,8 +26,7 @@ def test_whisper_transcript():
 
     transcript = signal.get_whisper_transcript()
 
-    assert "<|startoftranscript|>" in transcript
-    assert "<|endoftext|>" in transcript
+    assert transcript.startswith("<|startoftranscript|>")
 
 
 def test_whisper_embeddings():