Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,6 @@
# Maximum number of concurrent provider searches to prevent overwhelming APIs
SEARCH_CONCURRENCY_LIMIT = 5

# Maximum number of concurrent MusicBrainz ISRC lookups. MusicBrainz throttles all
# callers to a shared global budget, so a wide fan-out just queues behind it while
# bursting past the mirror's edge limit; cap it to stay within budget with headroom.
MB_ISRC_CONCURRENCY_LIMIT = 5

# Item counts and limits
# Target number of items to return in recommendation folders
TARGET_ITEM_COUNT = 10
Expand All @@ -50,7 +45,7 @@
SIMILAR_ITEMS_BUFFER = 12

# Number of similar tracks to resolve before filtering to target count. Larger than the
# artist buffer to absorb this row's extra attrition: stricter ISRC matching and the
# artist buffer to absorb this row's extra attrition: artist+title verification and the
# exclusion of tracks that resolve to the user's own library copy.
SIMILAR_TRACKS_BUFFER = 15

Expand Down
164 changes: 49 additions & 115 deletions music_assistant/providers/lastfm_recommendations/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from music_assistant.constants import MASS_LOGGER_NAME
from music_assistant.helpers.compare import compare_strings
from music_assistant.providers.lastfm_recommendations.constants import (
MB_ISRC_CONCURRENCY_LIMIT,
PROVIDER_SEARCH_LIMIT,
SEARCH_CONCURRENCY_LIMIT,
)
Expand All @@ -23,31 +22,44 @@
from music_assistant.controllers.media.albums import AlbumsController
from music_assistant.controllers.media.artists import ArtistsController
from music_assistant.controllers.media.tracks import TracksController
from music_assistant.providers.musicbrainz import MusicbrainzProvider

LOGGER = logging.getLogger(f"{MASS_LOGGER_NAME}.lastfm_recommendations")

# Limit concurrent provider searches to avoid overwhelming their APIs.
_SEARCH_SEMAPHORE = asyncio.Semaphore(SEARCH_CONCURRENCY_LIMIT)

# Cap concurrent MusicBrainz ISRC lookups so a single artist's top-track fan-out
# doesn't dump dozens of requests onto the shared MusicBrainz rate-limit budget at once.
_MB_ISRC_SEMAPHORE = asyncio.Semaphore(MB_ISRC_CONCURRENCY_LIMIT)


def _has_matching_external_ids(
item_mapping: ItemMapping, media_item: Artist | Album | Track
def _is_matching_result(
item_mapping: ItemMapping, result: Artist | Album | Track, artist_name: str | None
) -> bool:
"""
Return True if the ItemMapping shares any external IDs with the media item.
Return True if a search result matches the searched item by name (and artist, if known).

:param item_mapping: ItemMapping with external IDs from Last.fm.
:param media_item: Artist or Track to compare against.
:param item_mapping: ItemMapping that was searched for.
:param result: Search result to verify.
:param artist_name: Artist name to verify against the result's artists, if known.
"""
if not item_mapping.external_ids:
# Album/track search names are "Artist - Title" while results usually expose only the title.
searched_name = item_mapping.name
title_part = (
searched_name[len(artist_name) + 3 :]
if artist_name and searched_name.startswith(f"{artist_name} - ")
else searched_name
)
if not compare_strings(title_part, result.name, strict=False) and not compare_strings(
searched_name, result.name, strict=False
):
return False

return bool(item_mapping.external_ids & media_item.external_ids)
# Verify the artist too: a matching title from a different artist (cover or karaoke
# version) must not pass. Results without artist info can't contradict, accept those.
result_artists = getattr(result, "artists", None)
if artist_name and result_artists:
return any(
compare_strings(artist_name, result_artist.name, strict=False)
for result_artist in result_artists
)
return True


def _get_streaming_providers(
Expand Down Expand Up @@ -118,118 +130,61 @@ async def _search_providers_concurrent(
ctrl: ArtistsController | AlbumsController | TracksController,
item_mapping: ItemMapping,
providers: list[Any],
require_external_id_match: bool,
artist_name: str | None,
) -> Artist | Album | Track | None:
"""
Search multiple providers concurrently and return the best match.
Search multiple providers concurrently and return the first verified match.

:param ctrl: Controller for the media type.
:param item_mapping: ItemMapping to search for.
:param providers: List of providers to search.
:param require_external_id_match: If True, prefer matches on external IDs and reject
results whose external IDs contradict the ItemMapping.
:param artist_name: Artist name to verify candidate matches against, if known.
"""
tasks = [
asyncio.create_task(_search_provider(ctrl, item_mapping, provider))
for provider in providers
]

fallback_result = None

for task in asyncio.as_completed(tasks):
result = await task
if result is None:
continue

if not require_external_id_match:
names_match = compare_strings(item_mapping.name, result.name, strict=False)

# Album/track search names may be "Artist - Title" while results expose only "Title".
if not names_match and " - " in item_mapping.name:
title_part = item_mapping.name.split(" - ", 1)[1]
names_match = compare_strings(title_part, result.name, strict=False)

if names_match:
LOGGER.debug(
"Name match on %s: %s (searched: %s)",
result.provider,
result.name,
item_mapping.name,
)
for t in tasks:
if not t.done():
t.cancel()
return result

if _is_matching_result(item_mapping, result, artist_name):
LOGGER.debug(
"Rejecting %s from %s: name mismatch (searched: %s)",
result.name,
result.provider,
item_mapping.name,
)
if not fallback_result:
fallback_result = result
continue

if _has_matching_external_ids(item_mapping, result):
LOGGER.debug(
"External ID match on %s: %s",
"Match on %s: %s (searched: %s)",
result.provider,
result.name,
item_mapping.name,
)
for t in tasks:
if not t.done():
t.cancel()
return result
Comment thread
Copilot marked this conversation as resolved.

# A result that exposes external IDs of a matching type but none that match is a
# different item; only consider results without such IDs as a name-based fallback.
result_has_external_ids = any(
ext_id[0] in {ext_id_check[0] for ext_id_check in item_mapping.external_ids}
for ext_id in result.external_ids
LOGGER.debug(
"Rejecting %s from %s: name mismatch (searched: %s)",
result.name,
result.provider,
item_mapping.name,
)

if result_has_external_ids:
LOGGER.debug(
"Rejecting %s from %s: has external IDs but they don't match",
result.name,
result.provider,
)
elif not fallback_result:
names_match = compare_strings(item_mapping.name, result.name, strict=False)

if not names_match and " - " in item_mapping.name:
title_part = item_mapping.name.split(" - ", 1)[1]
names_match = compare_strings(title_part, result.name, strict=False)

if names_match:
LOGGER.debug(
"Saving %s from %s as fallback (no external IDs to verify)",
result.name,
result.provider,
)
fallback_result = result
else:
LOGGER.debug(
"Not saving %s from %s as fallback: name mismatch",
result.name,
result.provider,
)

if fallback_result:
LOGGER.debug("No external ID matches found, using fallback result")
return fallback_result
return None


async def _resolve_item(
item_mapping: ItemMapping, mass: MusicAssistant, provider_instance_to_skip: str
item_mapping: ItemMapping,
mass: MusicAssistant,
provider_instance_to_skip: str,
artist_name: str | None = None,
) -> Artist | Album | Track | None:
"""
Resolve an ItemMapping to a library or provider item.

:param item_mapping: ItemMapping with metadata and external IDs from Last.fm.
:param mass: MusicAssistant instance.
:param provider_instance_to_skip: Provider instance to skip (ourselves).
:param artist_name: Artist name to verify candidate matches against, if known.
"""
ctrl: ArtistsController | AlbumsController | TracksController
if item_mapping.media_type == MediaType.ARTIST:
Expand Down Expand Up @@ -257,15 +212,8 @@ async def _resolve_item(
LOGGER.debug("No streaming providers available for resolution")
return None

# Streaming providers only expose ISRCs for tracks; for artists/albums rely on name matching.
require_external_id_match = False
if item_mapping.media_type == MediaType.TRACK:
has_isrc = any(ext_id[0] == ExternalID.ISRC for ext_id in item_mapping.external_ids)
if has_isrc:
require_external_id_match = True

result = await _search_providers_concurrent(
ctrl, item_mapping, streaming_providers, require_external_id_match
ctrl, item_mapping, streaming_providers, artist_name
)
if result is None:
LOGGER.debug("Could not resolve %s: %s", item_mapping.media_type.value, item_mapping.name)
Expand Down Expand Up @@ -336,27 +284,9 @@ async def parse_track(
artist_name = artist_data.get("name", "Unknown Artist")

external_ids = set()

if mbid:
external_ids.add((ExternalID.MB_RECORDING, mbid))

# Streaming providers match tracks on ISRC, so enrich MBIDs with ISRCs where possible.
mb_provider = mass.get_provider("musicbrainz")
if mb_provider:
LOGGER.debug("Resolving MBID %s to ISRCs via MusicBrainz", mbid)
async with _MB_ISRC_SEMAPHORE:
isrcs = await cast("MusicbrainzProvider", mb_provider).get_isrcs_for_recording(mbid)
if isrcs:
LOGGER.debug("Found %d ISRCs for MBID %s: %s", len(isrcs), mbid, isrcs)
for isrc in isrcs:
external_ids.add((ExternalID.ISRC, isrc))
else:
LOGGER.debug("No ISRCs found for MBID %s", mbid)
else:
LOGGER.debug("MusicBrainz provider not available for ISRC lookup")
else:
LOGGER.debug("Track has no MBID, cannot resolve ISRCs")

item_mapping = ItemMapping(
media_type=MediaType.TRACK,
item_id="temp",
Expand All @@ -365,7 +295,9 @@ async def parse_track(
external_ids=external_ids,
)

return cast("Track | None", await _resolve_item(item_mapping, mass, provider_instance))
return cast(
"Track | None", await _resolve_item(item_mapping, mass, provider_instance, artist_name)
)


async def parse_album(
Expand Down Expand Up @@ -399,4 +331,6 @@ async def parse_album(
external_ids=external_ids,
)

return cast("Album | None", await _resolve_item(item_mapping, mass, provider_instance))
return cast(
"Album | None", await _resolve_item(item_mapping, mass, provider_instance, artist_name)
)
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ async def _get_genre_based_recommendations(self) -> AsyncIterator[Recommendation
)
if genre_artists_raw:
# Drop items already in the library using a cheap DB lookup, before the
# expensive MusicBrainz + provider resolution step.
# expensive provider resolution step.
non_library_artists_raw = [
artist_data
for artist_data in genre_artists_raw
Expand Down Expand Up @@ -801,7 +801,7 @@ async def _get_similar_tracks_from_seeds(self, seed_tracks: list[Track]) -> list
unique_similar.sort(key=lambda x: float(x.get("match", 0)), reverse=True)

# Resolve a small buffer beyond the target so resolution failures and owned-copy
# exclusion still leave enough to fill the row, while capping MusicBrainz ISRC lookups.
# exclusion still leave enough to fill the row, while capping provider searches.
top_tracks_data = unique_similar[:SIMILAR_TRACKS_BUFFER]

resolved_tracks = await asyncio.gather(
Expand Down