From 4af0863af4f4332f50ba85226ded2f0eedde2bc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sat, 19 Oct 2024 00:50:32 +0100 Subject: [PATCH] Try to GET LRCLib lyrics before searching --- beetsplug/lyrics.py | 156 +++++++++++++++++++++++++----------- docs/changelog.rst | 11 +-- test/plugins/test_lyrics.py | 12 ++- 3 files changed, 123 insertions(+), 56 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index a938f53184..3a99cea9e4 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -25,8 +25,11 @@ import struct import unicodedata import warnings -from functools import partial -from typing import TYPE_CHECKING, ClassVar +from contextlib import suppress +from dataclasses import dataclass +from functools import cached_property, partial, total_ordering +from http import HTTPStatus +from typing import TYPE_CHECKING, ClassVar, Iterable, Iterator from urllib.parse import quote, urlencode import requests @@ -98,6 +101,10 @@ """ +class NotFoundError(requests.exceptions.HTTPError): + pass + + # Utilities. @@ -276,14 +283,80 @@ class LRCLibItem(TypedDict): trackName: str artistName: str albumName: str - duration: float + duration: float | None instrumental: bool plainLyrics: str syncedLyrics: str | None +@dataclass +@total_ordering +class LRCLyrics: + #: Percentage tolerance for max duration difference between lyrics and item. + DURATION_DIFF_TOLERANCE = 0.05 + + target_duration: float + duration: float + instrumental: bool + plain: str + synced: str | None + + def __le__(self, other: LRCLyrics) -> bool: + """Compare two lyrics items by their score.""" + return self.dist < other.dist + + @classmethod + def make(cls, candidate: LRCLibItem, target_duration: float) -> LRCLyrics: + return cls( + target_duration, + candidate["duration"] or 0.0, + candidate["instrumental"], + candidate["plainLyrics"], + candidate["syncedLyrics"], + ) + + @cached_property + def duration_dist(self) -> float: + """Return the absolute difference between lyrics and target duration.""" + return abs(self.duration - self.target_duration) + + @cached_property + def is_valid(self) -> bool: + """Return whether the lyrics item is valid. + Lyrics duration must be within the tolerance defined by + :attr:`DURATION_DIFF_TOLERANCE`. + """ + return ( + self.duration_dist + <= self.target_duration * self.DURATION_DIFF_TOLERANCE + ) + + @cached_property + def dist(self) -> tuple[float, bool]: + """Distance/score of the given lyrics item. + + Return a tuple with the following values: + 1. Absolute difference between lyrics and target duration + 2. Boolean telling whether synced lyrics are available. + + Best lyrics match is the one that has the closest duration to + ``target_duration`` and has synced lyrics available. + """ + return self.duration_dist, not self.synced + + def get_text(self, want_synced: bool) -> str: + if self.instrumental: + return INSTRUMENTAL_LYRICS + + return self.synced if want_synced and self.synced else self.plain + + class LRCLib(Backend): - base_url = "https://lrclib.net/api/search" + """Fetch lyrics from the LRCLib API.""" + + BASE_URL = "https://lrclib.net/api" + GET_URL = f"{BASE_URL}/get" + SEARCH_URL = f"{BASE_URL}/search" def warn(self, message: str, *args) -> None: """Log a warning message with the class name.""" @@ -294,69 +367,54 @@ def fetch_json(self, *args, **kwargs): kwargs.setdefault("timeout", 10) kwargs.setdefault("headers", {"User-Agent": USER_AGENT}) r = requests.get(*args, **kwargs) + if r.status_code == HTTPStatus.NOT_FOUND: + raise NotFoundError("HTTP Error: Not Found", response=r) r.raise_for_status() return r.json() - @staticmethod - def get_rank( - target_duration: float, item: LRCLibItem - ) -> tuple[float, bool]: - """Rank the given lyrics item. + def fetch_candidates( + self, artist: str, title: str, album: str + ) -> Iterator[list[LRCLibItem]]: + """Yield lyrics candidates for the given song data. - Return a tuple with the following values: - 1. Absolute difference between lyrics and target duration - 2. Boolean telling whether synced lyrics are available. + Firstly, attempt to GET lyrics directly, and then search the API if + lyrics are not found or the duration does not match. + + Return an iterator over lists of candidates. """ - return ( - abs(item["duration"] - target_duration), - not item["syncedLyrics"], - ) + base_params = {"artist_name": artist, "track_name": title} + get_params = base_params.copy() + if album: + get_params["album_name"] = album - @classmethod - def pick_lyrics( - cls, target_duration: float, data: list[LRCLibItem] - ) -> LRCLibItem: - """Return best matching lyrics item from the given list. + with suppress(NotFoundError): + yield [self.fetch_json(self.GET_URL, params=get_params)] - Best lyrics match is the one that has the closest duration to - ``target_duration`` and has synced lyrics available. + yield self.fetch_json(self.SEARCH_URL, params=base_params) - Note that the incoming list is guaranteed to be non-empty. - """ - return min(data, key=lambda item: cls.get_rank(target_duration, item)) + @classmethod + def pick_best_match(cls, lyrics: Iterable[LRCLyrics]) -> LRCLyrics | None: + """Return best matching lyrics item from the given list.""" + return min((li for li in lyrics if li.is_valid), default=None) def fetch( self, artist: str, title: str, album: str, length: int ) -> str | None: - """Fetch lyrics for the given artist, title, and album.""" - params: dict[str, str | int] = { - "artist_name": artist, - "track_name": title, - } - if album: - params["album_name"] = album - - if length: - params["duration"] = length - + """Fetch lyrics text for the given song data.""" + fetch = partial(self.fetch_candidates, artist, title, album) + make = partial(LRCLyrics.make, target_duration=length) + pick = self.pick_best_match try: - data = self.fetch_json(self.base_url, params=params) + return next( + filter(None, map(pick, (map(make, x) for x in fetch()))) + ).get_text(self.config["synced"]) + except StopIteration: + pass except requests.JSONDecodeError: self.warn("Could not decode response JSON data") except requests.RequestException as exc: self.warn("Request error: {}", exc) - else: - if data: - item = self.pick_lyrics(length, data) - - if item["instrumental"]: - return INSTRUMENTAL_LYRICS - - if self.config["synced"] and (synced := item["syncedLyrics"]): - return synced - - return item["plainLyrics"] return None diff --git a/docs/changelog.rst b/docs/changelog.rst index cdb7f48163..b2edfd6e6b 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -54,11 +54,12 @@ Bug fixes: * :doc:`plugins/lyrics`: Do not attempt to search for lyrics if either the artist or title is missing and ignore ``artist_sort`` value if it is empty. :bug:`2635` -* :doc:`plugins/lyrics`: Fix fetching lyrics from ``lrclib`` source. Instead of - attempting to fetch lyrics for a specific album, artist, title and duration - combination, the plugin now performs a search which yields many results. - Update the default ``sources`` configuration to prioritize ``lrclib`` over - other sources since it returns reliable results quicker than others. +* :doc:`plugins/lyrics`: Fix fetching lyrics from ``lrclib`` source. If we + cannot find lyrics for a specific album, artist, title combination, the + plugin now tries to search for the artist and title and picks the most + relevant result. Update the default ``sources`` configuration to prioritize + ``lrclib`` over other sources since it returns reliable results quicker than + others. :bug:`5102` For packagers: diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index 8c2a133cff..fe5909e327 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -372,7 +372,8 @@ def request_kwargs(self, response_data): @pytest.fixture def fetch_lyrics(self, backend, requests_mock, request_kwargs): - requests_mock.get(backend.base_url, **request_kwargs) + requests_mock.get(backend.GET_URL, status_code=HTTPStatus.NOT_FOUND) + requests_mock.get(backend.SEARCH_URL, **request_kwargs) return partial(backend.fetch, "la", "la", "la", self.ITEM_DURATION) @@ -389,7 +390,14 @@ def test_synced_config_option(self, fetch_lyrics, expected_lyrics): [ pytest.param([], None, id="handle non-matching lyrics"), pytest.param( - [lyrics_match()], "synced", id="synced when available" + [lyrics_match()], + "synced", + id="synced when available", + ), + pytest.param( + [lyrics_match(duration=1)], + None, + id="none: duration too short", ), pytest.param( [lyrics_match(instrumental=True)],