Skip to content

Commit

Permalink
translations: make sure we do not re-translate
Browse files Browse the repository at this point in the history
  • Loading branch information
snejus committed Oct 27, 2024
1 parent 36f0b3c commit 5ec1cf8
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 10 deletions.
22 changes: 15 additions & 7 deletions beetsplug/lyrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,7 @@ def scrape(cls, html: str) -> str | None:
class Translator(RequestHandler):
TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate"
LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$")
remove_translations = partial(re.compile(r" / [^\n]+").sub, "")

_log: beets.logging.Logger
api_key: str
Expand Down Expand Up @@ -802,21 +803,33 @@ def append_translations(self, lines: Iterable[str]) -> list[str]:
def translate(self, new_lyrics: str, old_lyrics: str) -> str:
"""Translate the given lyrics to the target language.
Check old lyrics for existing translations and return them if their
original text matches the new lyrics. This is to avoid translating
the same lyrics multiple times.
If the lyrics are already in the target language or not in any of
of the source languages (if configured), they are returned as is.
The footer with the source URL is preserved, if present.
"""
if (
" / " in old_lyrics
and self.remove_translations(old_lyrics) == new_lyrics
):
self.info("🔵 Translations already exist")
return old_lyrics

lyrics_language = langdetect.detect(new_lyrics)
if lyrics_language != self.to_lang and (
not self.from_langs or lyrics_language in self.from_langs
):
lyrics, *url = new_lyrics.split("\n\nSource: ")
with self.handle_request():
translated_lines = self.append_translations(lyrics.splitlines())
self.info("🟢 Translated lyrics to {}", self.to_lang.upper())
return "\n\nSource: ".join(["\n".join(translated_lines), *url])

return lyrics
return new_lyrics


@dataclass
Expand Down Expand Up @@ -1052,12 +1065,7 @@ def add_item_lyrics(self, item: Item, write: bool) -> None:
if lyrics := self.find_lyrics(item):
self.info("🟢 Found lyrics: {0}", item)
if translator := self.translator:
initial_lyrics = lyrics
if (lyrics := translator.translate(lyrics)) != initial_lyrics:
self.info(
"🟢 Added translation to {}",
self.config["translate_to"].get().upper(),
)
lyrics = translator.translate(lyrics, item.lyrics)
else:
self.info("🔴 Lyrics not found: {}", item)
lyrics = self.config["fallback"].get()
Expand Down
14 changes: 11 additions & 3 deletions test/plugins/test_lyrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ def callback(request, _):
requests_mock.post(lyrics.Translator.TRANSLATE_URL, json=callback)

@pytest.mark.parametrize(
"initial_lyrics, expected",
"new_lyrics, old_lyrics, expected",
[
pytest.param(
"""
Expand All @@ -569,6 +569,7 @@ def callback(request, _):
My body wouldn't let me hide it (Hide it)
No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold)
Ridin' through the thunder, lightnin'""",
"",
"""
[Refrain: Doja Cat] / [Refrain : Doja Cat]
Hard for me to let you go (Let you go, let you go) / Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)
Expand All @@ -584,6 +585,7 @@ def callback(request, _):
[00:01.00] Some more synced lyrics
Source: https://lrclib.net/api/123""",
"",
"""
[00:00.00] Some synced lyrics / Quelques paroles synchronisées
[00:00:50]
Expand All @@ -592,14 +594,20 @@ def callback(request, _):
Source: https://lrclib.net/api/123""", # noqa: E501
id="synced",
),
pytest.param(
"Some lyrics",
"Some lyrics / Some translation",
"Some lyrics / Some translation",
id="already translated",
),
],
)
def test_translate(self, initial_lyrics, expected):
def test_translate(self, new_lyrics, old_lyrics, expected):
plugin = lyrics.LyricsPlugin()
bing = lyrics.Translator(plugin._log, "123", ["en"], "fr")

assert bing.translate(
textwrap.dedent(initial_lyrics)
textwrap.dedent(new_lyrics), old_lyrics
) == textwrap.dedent(expected)


Expand Down

0 comments on commit 5ec1cf8

Please sign in to comment.