diff --git a/music_assistant/providers/lastfm_recommendations/constants.py b/music_assistant/providers/lastfm_recommendations/constants.py index fce568e69b..221067d7da 100644 --- a/music_assistant/providers/lastfm_recommendations/constants.py +++ b/music_assistant/providers/lastfm_recommendations/constants.py @@ -24,11 +24,6 @@ # Maximum number of concurrent provider searches to prevent overwhelming APIs SEARCH_CONCURRENCY_LIMIT = 5 -# Maximum number of concurrent MusicBrainz ISRC lookups. MusicBrainz throttles all -# callers to a shared global budget, so a wide fan-out just queues behind it while -# bursting past the mirror's edge limit; cap it to stay within budget with headroom. -MB_ISRC_CONCURRENCY_LIMIT = 5 - # Item counts and limits # Target number of items to return in recommendation folders TARGET_ITEM_COUNT = 10 @@ -50,7 +45,7 @@ SIMILAR_ITEMS_BUFFER = 12 # Number of similar tracks to resolve before filtering to target count. Larger than the -# artist buffer to absorb this row's extra attrition: stricter ISRC matching and the +# artist buffer to absorb this row's extra attrition: artist+title verification and the # exclusion of tracks that resolve to the user's own library copy. SIMILAR_TRACKS_BUFFER = 15 diff --git a/music_assistant/providers/lastfm_recommendations/parsers.py b/music_assistant/providers/lastfm_recommendations/parsers.py index 40771e0ccb..858dec80e2 100644 --- a/music_assistant/providers/lastfm_recommendations/parsers.py +++ b/music_assistant/providers/lastfm_recommendations/parsers.py @@ -13,7 +13,6 @@ from music_assistant.constants import MASS_LOGGER_NAME from music_assistant.helpers.compare import compare_strings from music_assistant.providers.lastfm_recommendations.constants import ( - MB_ISRC_CONCURRENCY_LIMIT, PROVIDER_SEARCH_LIMIT, SEARCH_CONCURRENCY_LIMIT, ) @@ -23,31 +22,44 @@ from music_assistant.controllers.media.albums import AlbumsController from music_assistant.controllers.media.artists import ArtistsController from music_assistant.controllers.media.tracks import TracksController - from music_assistant.providers.musicbrainz import MusicbrainzProvider LOGGER = logging.getLogger(f"{MASS_LOGGER_NAME}.lastfm_recommendations") # Limit concurrent provider searches to avoid overwhelming their APIs. _SEARCH_SEMAPHORE = asyncio.Semaphore(SEARCH_CONCURRENCY_LIMIT) -# Cap concurrent MusicBrainz ISRC lookups so a single artist's top-track fan-out -# doesn't dump dozens of requests onto the shared MusicBrainz rate-limit budget at once. -_MB_ISRC_SEMAPHORE = asyncio.Semaphore(MB_ISRC_CONCURRENCY_LIMIT) - -def _has_matching_external_ids( - item_mapping: ItemMapping, media_item: Artist | Album | Track +def _is_matching_result( + item_mapping: ItemMapping, result: Artist | Album | Track, artist_name: str | None ) -> bool: """ - Return True if the ItemMapping shares any external IDs with the media item. + Return True if a search result matches the searched item by name (and artist, if known). - :param item_mapping: ItemMapping with external IDs from Last.fm. - :param media_item: Artist or Track to compare against. + :param item_mapping: ItemMapping that was searched for. + :param result: Search result to verify. + :param artist_name: Artist name to verify against the result's artists, if known. """ - if not item_mapping.external_ids: + # Album/track search names are "Artist - Title" while results usually expose only the title. + searched_name = item_mapping.name + title_part = ( + searched_name[len(artist_name) + 3 :] + if artist_name and searched_name.startswith(f"{artist_name} - ") + else searched_name + ) + if not compare_strings(title_part, result.name, strict=False) and not compare_strings( + searched_name, result.name, strict=False + ): return False - return bool(item_mapping.external_ids & media_item.external_ids) + # Verify the artist too: a matching title from a different artist (cover or karaoke + # version) must not pass. Results without artist info can't contradict, accept those. + result_artists = getattr(result, "artists", None) + if artist_name and result_artists: + return any( + compare_strings(artist_name, result_artist.name, strict=False) + for result_artist in result_artists + ) + return True def _get_streaming_providers( @@ -118,111 +130,54 @@ async def _search_providers_concurrent( ctrl: ArtistsController | AlbumsController | TracksController, item_mapping: ItemMapping, providers: list[Any], - require_external_id_match: bool, + artist_name: str | None, ) -> Artist | Album | Track | None: """ - Search multiple providers concurrently and return the best match. + Search multiple providers concurrently and return the first verified match. :param ctrl: Controller for the media type. :param item_mapping: ItemMapping to search for. :param providers: List of providers to search. - :param require_external_id_match: If True, prefer matches on external IDs and reject - results whose external IDs contradict the ItemMapping. + :param artist_name: Artist name to verify candidate matches against, if known. """ tasks = [ asyncio.create_task(_search_provider(ctrl, item_mapping, provider)) for provider in providers ] - fallback_result = None - for task in asyncio.as_completed(tasks): result = await task if result is None: continue - if not require_external_id_match: - names_match = compare_strings(item_mapping.name, result.name, strict=False) - - # Album/track search names may be "Artist - Title" while results expose only "Title". - if not names_match and " - " in item_mapping.name: - title_part = item_mapping.name.split(" - ", 1)[1] - names_match = compare_strings(title_part, result.name, strict=False) - - if names_match: - LOGGER.debug( - "Name match on %s: %s (searched: %s)", - result.provider, - result.name, - item_mapping.name, - ) - for t in tasks: - if not t.done(): - t.cancel() - return result - + if _is_matching_result(item_mapping, result, artist_name): LOGGER.debug( - "Rejecting %s from %s: name mismatch (searched: %s)", - result.name, - result.provider, - item_mapping.name, - ) - if not fallback_result: - fallback_result = result - continue - - if _has_matching_external_ids(item_mapping, result): - LOGGER.debug( - "External ID match on %s: %s", + "Match on %s: %s (searched: %s)", result.provider, result.name, + item_mapping.name, ) for t in tasks: if not t.done(): t.cancel() + await asyncio.gather(*tasks, return_exceptions=True) return result - # A result that exposes external IDs of a matching type but none that match is a - # different item; only consider results without such IDs as a name-based fallback. - result_has_external_ids = any( - ext_id[0] in {ext_id_check[0] for ext_id_check in item_mapping.external_ids} - for ext_id in result.external_ids + LOGGER.debug( + "Rejecting %s from %s: name mismatch (searched: %s)", + result.name, + result.provider, + item_mapping.name, ) - if result_has_external_ids: - LOGGER.debug( - "Rejecting %s from %s: has external IDs but they don't match", - result.name, - result.provider, - ) - elif not fallback_result: - names_match = compare_strings(item_mapping.name, result.name, strict=False) - - if not names_match and " - " in item_mapping.name: - title_part = item_mapping.name.split(" - ", 1)[1] - names_match = compare_strings(title_part, result.name, strict=False) - - if names_match: - LOGGER.debug( - "Saving %s from %s as fallback (no external IDs to verify)", - result.name, - result.provider, - ) - fallback_result = result - else: - LOGGER.debug( - "Not saving %s from %s as fallback: name mismatch", - result.name, - result.provider, - ) - - if fallback_result: - LOGGER.debug("No external ID matches found, using fallback result") - return fallback_result + return None async def _resolve_item( - item_mapping: ItemMapping, mass: MusicAssistant, provider_instance_to_skip: str + item_mapping: ItemMapping, + mass: MusicAssistant, + provider_instance_to_skip: str, + artist_name: str | None = None, ) -> Artist | Album | Track | None: """ Resolve an ItemMapping to a library or provider item. @@ -230,6 +185,7 @@ async def _resolve_item( :param item_mapping: ItemMapping with metadata and external IDs from Last.fm. :param mass: MusicAssistant instance. :param provider_instance_to_skip: Provider instance to skip (ourselves). + :param artist_name: Artist name to verify candidate matches against, if known. """ ctrl: ArtistsController | AlbumsController | TracksController if item_mapping.media_type == MediaType.ARTIST: @@ -257,15 +213,8 @@ async def _resolve_item( LOGGER.debug("No streaming providers available for resolution") return None - # Streaming providers only expose ISRCs for tracks; for artists/albums rely on name matching. - require_external_id_match = False - if item_mapping.media_type == MediaType.TRACK: - has_isrc = any(ext_id[0] == ExternalID.ISRC for ext_id in item_mapping.external_ids) - if has_isrc: - require_external_id_match = True - result = await _search_providers_concurrent( - ctrl, item_mapping, streaming_providers, require_external_id_match + ctrl, item_mapping, streaming_providers, artist_name ) if result is None: LOGGER.debug("Could not resolve %s: %s", item_mapping.media_type.value, item_mapping.name) @@ -336,27 +285,9 @@ async def parse_track( artist_name = artist_data.get("name", "Unknown Artist") external_ids = set() - if mbid: external_ids.add((ExternalID.MB_RECORDING, mbid)) - # Streaming providers match tracks on ISRC, so enrich MBIDs with ISRCs where possible. - mb_provider = mass.get_provider("musicbrainz") - if mb_provider: - LOGGER.debug("Resolving MBID %s to ISRCs via MusicBrainz", mbid) - async with _MB_ISRC_SEMAPHORE: - isrcs = await cast("MusicbrainzProvider", mb_provider).get_isrcs_for_recording(mbid) - if isrcs: - LOGGER.debug("Found %d ISRCs for MBID %s: %s", len(isrcs), mbid, isrcs) - for isrc in isrcs: - external_ids.add((ExternalID.ISRC, isrc)) - else: - LOGGER.debug("No ISRCs found for MBID %s", mbid) - else: - LOGGER.debug("MusicBrainz provider not available for ISRC lookup") - else: - LOGGER.debug("Track has no MBID, cannot resolve ISRCs") - item_mapping = ItemMapping( media_type=MediaType.TRACK, item_id="temp", @@ -365,7 +296,9 @@ async def parse_track( external_ids=external_ids, ) - return cast("Track | None", await _resolve_item(item_mapping, mass, provider_instance)) + return cast( + "Track | None", await _resolve_item(item_mapping, mass, provider_instance, artist_name) + ) async def parse_album( @@ -399,4 +332,6 @@ async def parse_album( external_ids=external_ids, ) - return cast("Album | None", await _resolve_item(item_mapping, mass, provider_instance)) + return cast( + "Album | None", await _resolve_item(item_mapping, mass, provider_instance, artist_name) + ) diff --git a/music_assistant/providers/lastfm_recommendations/recommendations.py b/music_assistant/providers/lastfm_recommendations/recommendations.py index 4e68b66963..baf91dea61 100644 --- a/music_assistant/providers/lastfm_recommendations/recommendations.py +++ b/music_assistant/providers/lastfm_recommendations/recommendations.py @@ -499,7 +499,7 @@ async def _get_genre_based_recommendations(self) -> AsyncIterator[Recommendation ) if genre_artists_raw: # Drop items already in the library using a cheap DB lookup, before the - # expensive MusicBrainz + provider resolution step. + # expensive provider resolution step. non_library_artists_raw = [ artist_data for artist_data in genre_artists_raw @@ -801,7 +801,7 @@ async def _get_similar_tracks_from_seeds(self, seed_tracks: list[Track]) -> list unique_similar.sort(key=lambda x: float(x.get("match", 0)), reverse=True) # Resolve a small buffer beyond the target so resolution failures and owned-copy - # exclusion still leave enough to fill the row, while capping MusicBrainz ISRC lookups. + # exclusion still leave enough to fill the row, while capping provider searches. top_tracks_data = unique_similar[:SIMILAR_TRACKS_BUFFER] resolved_tracks = await asyncio.gather(