feat: Add functionality for extracting metadata and for plot a spectr…

…ogram (#28)
osl-incubator · Jul 9, 2024 · 5236e7d · 5236e7d
1 parent d7b2917
commit 5236e7d
Show file tree

Hide file tree

Showing 6 changed files with 1,081 additions and 917 deletions.
diff --git a/conda/dev.yaml b/conda/dev.yaml
@@ -5,6 +5,8 @@ channels:
 dependencies:
   - pygobject
   - python >=3.8.1,<3.12
+  - cairo 1.18
+  - pycairo 1.25.1
   - pip
   - poetry
   - nodejs # used by semantic-release

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,7 +19,7 @@ exclude = [
 
 [tool.poetry.dependencies]
 python = ">3.8.1,<3.12"
-pycairo = ">=1.24.0"
+pycairo = ">=1.24.0,<1.26"
 pygobject = ">=3.44.1"
 openai = ">=1"
 python-dotenv = ">=1.0.0"
@@ -35,6 +35,10 @@ pytubefix = ">=5.0"
 speechrecognition = ">=3.10"
 vosk = ">=0.3.45"
 google-cloud-speech = ">=2.24.1"
+ffmpeg-python = ">=0.2.0"
+matplotlib = "<=3.9"
+scipy = "<1.23"
+librosa = ">=0.10.1"
 
 [tool.poetry.group.dev.dependencies]
 pytest = ">=7.3.2"
@@ -120,5 +124,8 @@ module = [
   "pydub.generators",
   "pytubefix",
   "speech_recognition",
+  "ffmpeg",
+  "matplotlib.pyplot",
+  "matplotlib",
 ]
 ignore_missing_imports = true
diff --git a/src/artbox/cli.py b/src/artbox/cli.py
@@ -196,6 +196,31 @@ def sound_notes_to_audio(
     runner.notes_to_audio()
 
 
+@app_sound.command("spectrogram")
+def sound_spectrogram(
+    input_path: Annotated[
+        str,
+        typer.Option(
+            "--input-path", help="Specify the path of the input file"
+        ),
+    ] = "",
+    output_path: Annotated[
+        str,
+        typer.Option(
+            "--output-path", help="Specify the path to store the audio file"
+        ),
+    ] = "",
+) -> None:
+    """Generate a spectrogram from an MP3 file and saves it as an image."""
+    args_dict = {
+        "input-path": input_path,
+        "output-path": output_path,
+    }
+
+    runner = Sound(args_dict)
+    runner.spectrogram()
+
+
 @app_video.command("remove-audio")
 def video_remove_audio(
     input_path: Annotated[
@@ -247,6 +272,32 @@ def video_extract_audio(
     runner.extract_audio()
 
 
+@app_video.command("get-metadata")
+def video_get_metadata(
+    input_path: Annotated[
+        str,
+        typer.Option(
+            "--input-path", help="Specify the path of the input video file"
+        ),
+    ] = "",
+    output_path: Annotated[
+        str,
+        typer.Option(
+            "--output-path",
+            help="Specify the path to store the extracted audio file",
+        ),
+    ] = "",
+) -> None:
+    """Get the metadata from a video (mp4)."""
+    args_dict = {
+        "input-path": input_path,
+        "output-path": output_path,
+    }
+
+    runner = Video(args_dict)
+    runner.get_metadata()
+
+
 @app_video.command("combine-video-and-audio")
 def video_combine_audio_and_video(
     video_path: Annotated[

diff --git a/src/artbox/sounds.py b/src/artbox/sounds.py
@@ -5,6 +5,9 @@
 from math import log2
 
 import aubio
+import librosa
+import librosa.display
+import matplotlib.pyplot as plt
 import noisereduce as nr
 import numpy as np
 
@@ -299,3 +302,28 @@ def extract_notes_from_mp3(self) -> list:
             json.dump(notes, f)
 
         return notes
+
+    def spectrogram(self):
+        """Generate a spectrogram from an MP3 file and saves it as an image."""
+        mp3_file_path = str(self.input_path)
+        output_file_path = str(self.output_path)
+
+        # Load the audio file
+        y, sr = librosa.load(mp3_file_path)
+
+        # Generate a spectrogram
+        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
+        S_dB = librosa.power_to_db(S, ref=np.max)
+
+        # Plot the spectrogram
+        plt.figure(figsize=(10, 4))
+        librosa.display.specshow(
+            S_dB, sr=sr, x_axis="time", y_axis="mel", fmax=8000
+        )
+        plt.colorbar(format="%+2.0f dB")
+        plt.title("Mel-frequency spectrogram")
+        plt.tight_layout()
+
+        # Save the spectrogram as an image
+        plt.savefig(output_file_path)
+        plt.close()  # Close the plot to free up memory
diff --git a/src/artbox/videos.py b/src/artbox/videos.py
@@ -6,6 +6,8 @@
 
 from abc import abstractmethod
 
+import ffmpeg
+
 from moviepy.editor import AudioFileClip, VideoFileClip
 from pytubefix import YouTube as PyYouTube
 
@@ -158,6 +160,60 @@ def extract_audio(self) -> None:
 
         print(f"Audio has been extracted. Output saved at '{output_path}'.")
 
+    def get_metadata(self) -> None:
+        """
+        Extract metadata from an MP4 file using moviepy.
+
+        Returns
+        -------
+        Metadata of the MP4 file.
+        """
+        file_path = str(self.input_path)
+
+        try:
+            probe = ffmpeg.probe(file_path)
+            general_metadata = probe.get("format", {})
+            streams_metadata = probe.get("streams", [])
+
+            detailed_metadata = {
+                "format": general_metadata.get("format_name"),
+                "duration": general_metadata.get("duration"),
+                "size": general_metadata.get("size"),
+                "bit_rate": general_metadata.get("bit_rate"),
+                "tags": general_metadata.get("tags", {}),
+                "streams": [],
+            }
+
+            for stream in streams_metadata:
+                stream_info = {
+                    "index": stream.get("index"),
+                    "type": stream.get("codec_type"),
+                    "codec": stream.get("codec_name"),
+                    "profile": stream.get("profile"),
+                    "resolution": (
+                        f"{stream.get('width')}x{stream.get('height')}"
+                        if stream.get("codec_type") == "video"
+                        else None
+                    ),
+                    "bit_rate": stream.get("bit_rate"),
+                    "sample_rate": stream.get("sample_rate")
+                    if stream.get("codec_type") == "audio"
+                    else None,
+                    "channels": stream.get("channels")
+                    if stream.get("codec_type") == "audio"
+                    else None,
+                    "tags": stream.get("tags", {}),
+                }
+                detailed_metadata["streams"].append(stream_info)
+
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            return
+
+        with open(self.output_path, "w") as f:
+            f.write(str(detailed_metadata))
+        print(detailed_metadata)
+
     def remove_audio(self) -> None:
         """Remove the audio from an MP4 file."""
         # Load the video