Skip to content

Commit

Permalink
Try to GET LRCLib lyrics before searching
Browse files Browse the repository at this point in the history
  • Loading branch information
snejus committed Oct 12, 2024
1 parent 6f2db6c commit 84fbf93
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 51 deletions.
148 changes: 104 additions & 44 deletions beetsplug/lyrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@
import unicodedata
import urllib
import warnings
from typing import TYPE_CHECKING
from contextlib import suppress
from dataclasses import dataclass
from functools import cached_property, partial, total_ordering
from http import HTTPStatus
from typing import TYPE_CHECKING, Iterable, Iterator

import requests
from typing_extensions import TypedDict
Expand Down Expand Up @@ -112,6 +116,10 @@
"""


class NotFoundError(requests.exceptions.HTTPError):
pass


# Utilities.


Expand Down Expand Up @@ -310,8 +318,74 @@ class LRCLibItem(TypedDict):
syncedLyrics: str | None


@dataclass
@total_ordering
class LRCLyrics:
#: Percentage tolerance for max duration difference between lyrics and item.
DURATION_DIFF_TOLERANCE = 0.05

target_duration: float
duration: float
instrumental: bool
plain: str
synced: str | None

def __le__(self, other: LRCLyrics) -> bool:
"""Compare two lyrics items by their score."""
return self.dist < other.dist

@classmethod
def make(cls, candidate: LRCLibItem, target_duration: float) -> LRCLyrics:
return cls(
target_duration,
candidate["duration"],
candidate["instrumental"],
candidate["plainLyrics"],
candidate["syncedLyrics"],
)

@cached_property
def duration_dist(self) -> float:
"""Return the absolute difference between lyrics and target duration."""
return abs(self.duration - self.target_duration)

@cached_property
def is_valid(self) -> bool:
"""Return whether the lyrics item is valid.
Lyrics duration must be within the tolerance defined by
:attr:`DURATION_DIFF_TOLERANCE`.
"""
return (
self.duration_dist
<= self.target_duration * self.DURATION_DIFF_TOLERANCE
)

@cached_property
def dist(self) -> tuple[float, bool]:
"""Distance/score of the given lyrics item.
Return a tuple with the following values:
1. Absolute difference between lyrics and target duration
2. Boolean telling whether synced lyrics are available.
Best lyrics match is the one that has the closest duration to
``target_duration`` and has synced lyrics available.
"""
return self.duration_dist, not self.synced

def get_text(self, want_synced: bool) -> str:
if self.instrumental:
return INSTRUMENTAL_LYRICS

return self.synced if want_synced and self.synced else self.plain


class LRCLib(Backend):
base_url = "https://lrclib.net/api/search"
"""Fetch lyrics from the LRCLib API."""

BASE_URL = "https://lrclib.net/api"
GET_URL = f"{BASE_URL}/get"
SEARCH_URL = f"{BASE_URL}/search"

def warn(self, message: str, *args) -> None:
"""Log a warning message with the class name."""
Expand All @@ -322,65 +396,51 @@ def fetch_json(self, *args, **kwargs):
kwargs.setdefault("timeout", 10)
kwargs.setdefault("headers", {"User-Agent": USER_AGENT})
r = requests.get(*args, **kwargs)
if r.status_code == HTTPStatus.NOT_FOUND:
raise NotFoundError("HTTP Error: Not Found", response=r)
r.raise_for_status()

return r.json()

@staticmethod
def get_rank(
target_duration: float, item: LRCLibItem
) -> tuple[float, bool]:
"""Rank the given lyrics item.
def fetch_candidates(
self, artist: str, title: str, album: str
) -> Iterator[list[LRCLibItem]]:
"""Yield lyrics candidates for the given song data.
Return a tuple with the following values:
1. Absolute difference between lyrics and target duration
2. Boolean telling whether synced lyrics are available.
"""
return (
abs(item["duration"] - target_duration),
not item["syncedLyrics"],
)
Firstly, attempt to GET lyrics directly, and then search the API if
lyrics are not found or the duration does not match.
@classmethod
def pick_lyrics(
cls, target_duration: float, data: list[LRCLibItem]
) -> LRCLibItem:
"""Return best matching lyrics item from the given list.
Return an iterator over lists of candidates.
"""
base_params = {"artist_name": artist, "track_name": title}
get_params = {**base_params, "album_name": album}
with suppress(NotFoundError):
yield [self.fetch_json(self.GET_URL, params=get_params)]

Best lyrics match is the one that has the closest duration to
``target_duration`` and has synced lyrics available.
yield self.fetch_json(self.SEARCH_URL, params=base_params)

Note that the incoming list is guaranteed to be non-empty.
"""
return min(data, key=lambda item: cls.get_rank(target_duration, item))
@classmethod
def pick_best_match(cls, lyrics: Iterable[LRCLyrics]) -> LRCLyrics | None:
"""Return best matching lyrics item from the given list."""
return min((li for li in lyrics if li.is_valid), default=None)

def fetch(
self, artist: str, title: str, album: str, length: float
) -> str | None:
"""Fetch lyrics for the given artist, title, and album."""
params = {
"artist_name": artist,
"track_name": title,
"album_name": album,
}

"""Fetch lyrics text for the given song data."""
fetch = partial(self.fetch_candidates, artist, title, album)
make = partial(LRCLyrics.make, target_duration=length)
pick = self.pick_best_match
try:
data = self.fetch_json(self.base_url, params=params)
return next(
filter(None, map(pick, (map(make, x) for x in fetch())))
).get_text(self.config["synced"])
except StopIteration:
pass
except requests.JSONDecodeError:
self.warn("Could not decode response JSON data")
except requests.RequestException as exc:
self.warn("Request error: {}", exc)
else:
if data:
item = self.pick_lyrics(length, data)

if item["instrumental"]:
return INSTRUMENTAL_LYRICS

if self.config["synced"] and (synced := item["syncedLyrics"]):
return synced

return item["plainLyrics"]

return None

Expand Down
11 changes: 6 additions & 5 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,12 @@ Bug fixes:
* :doc:`plugins/lyrics`: Do not attempt to search for lyrics if either the
artist or title is missing and ignore ``artist_sort`` value if it is empty.
:bug:`2635`
* :doc:`plugins/lyrics`: Fix fetching lyrics from ``lrclib`` source. Instead of
attempting to fetch lyrics for a specific album, artist, title and duration
combination, the plugin now performs a search which yields many results.
Update the default ``sources`` configuration to prioritize ``lrclib`` over
other sources since it returns reliable results quicker than others.
* :doc:`plugins/lyrics`: Fix fetching lyrics from ``lrclib`` source. If we
cannot find lyrics for a specific album, artist, title combination, the
plugin now tries to search for the artist and title and picks the most
relevant result. Update the default ``sources`` configuration to prioritize
``lrclib`` over other sources since it returns reliable results quicker than
others.
:bug:`5102`

For packagers:
Expand Down
6 changes: 4 additions & 2 deletions test/plugins/test_lyrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def test_backend_source(self, backend):
"""
title = "Lady Madonna"

res = backend.fetch("The Beatles", title, "", 0.0)
res = backend.fetch("The Beatles", title, "", 180.0)

assert res
assert PHRASE_BY_TITLE[title] in res.lower()
Expand Down Expand Up @@ -423,7 +423,8 @@ def request_kwargs(self, response_data):

@pytest.fixture
def fetch_lyrics(self, backend, requests_mock, request_kwargs):
requests_mock.get(backend.base_url, **request_kwargs)
requests_mock.get(backend.GET_URL, status_code=HTTPStatus.NOT_FOUND)
requests_mock.get(backend.SEARCH_URL, **request_kwargs)

return partial(backend.fetch, "la", "la", "la", self.ITEM_DURATION)

Expand All @@ -440,6 +441,7 @@ def test_synced_config_option(self, fetch_lyrics, expected_lyrics):
[
_p([], None, id="handle non-matching lyrics"),
_p([lyrics_match()], "synced", id="synced when available"),
_p([lyrics_match(duration=1)], None, id="none: duration too short"),
_p(
[lyrics_match(instrumental=True)],
"[Instrumental]",
Expand Down

0 comments on commit 84fbf93

Please sign in to comment.