Skip to content

Commit dc5c483

Browse files
committed
Drop per-track MusicBrainz ISRC lookups from Last.fm recommendations
Every resolved track fired a MusicBrainz ISRC lookup, fanning out up to 25 MB calls per artist top-tracks request and tripping the MB mirror's edge rate limit on the playback path. Tracks now resolve via the existing name search on streaming providers, with the artist verified against the result so same-titled covers don't slip through. Library dedup is unaffected: it already uses the external IDs returned by the streaming provider's own result.
1 parent 35af34d commit dc5c483

3 files changed

Lines changed: 52 additions & 123 deletions

File tree

music_assistant/providers/lastfm_recommendations/constants.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,6 @@
2424
# Maximum number of concurrent provider searches to prevent overwhelming APIs
2525
SEARCH_CONCURRENCY_LIMIT = 5
2626

27-
# Maximum number of concurrent MusicBrainz ISRC lookups. MusicBrainz throttles all
28-
# callers to a shared global budget, so a wide fan-out just queues behind it while
29-
# bursting past the mirror's edge limit; cap it to stay within budget with headroom.
30-
MB_ISRC_CONCURRENCY_LIMIT = 5
31-
3227
# Item counts and limits
3328
# Target number of items to return in recommendation folders
3429
TARGET_ITEM_COUNT = 10
@@ -50,7 +45,7 @@
5045
SIMILAR_ITEMS_BUFFER = 12
5146

5247
# Number of similar tracks to resolve before filtering to target count. Larger than the
53-
# artist buffer to absorb this row's extra attrition: stricter ISRC matching and the
48+
# artist buffer to absorb this row's extra attrition: artist+title verification and the
5449
# exclusion of tracks that resolve to the user's own library copy.
5550
SIMILAR_TRACKS_BUFFER = 15
5651

music_assistant/providers/lastfm_recommendations/parsers.py

Lines changed: 49 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from music_assistant.constants import MASS_LOGGER_NAME
1414
from music_assistant.helpers.compare import compare_strings
1515
from music_assistant.providers.lastfm_recommendations.constants import (
16-
MB_ISRC_CONCURRENCY_LIMIT,
1716
PROVIDER_SEARCH_LIMIT,
1817
SEARCH_CONCURRENCY_LIMIT,
1918
)
@@ -23,31 +22,44 @@
2322
from music_assistant.controllers.media.albums import AlbumsController
2423
from music_assistant.controllers.media.artists import ArtistsController
2524
from music_assistant.controllers.media.tracks import TracksController
26-
from music_assistant.providers.musicbrainz import MusicbrainzProvider
2725

2826
LOGGER = logging.getLogger(f"{MASS_LOGGER_NAME}.lastfm_recommendations")
2927

3028
# Limit concurrent provider searches to avoid overwhelming their APIs.
3129
_SEARCH_SEMAPHORE = asyncio.Semaphore(SEARCH_CONCURRENCY_LIMIT)
3230

33-
# Cap concurrent MusicBrainz ISRC lookups so a single artist's top-track fan-out
34-
# doesn't dump dozens of requests onto the shared MusicBrainz rate-limit budget at once.
35-
_MB_ISRC_SEMAPHORE = asyncio.Semaphore(MB_ISRC_CONCURRENCY_LIMIT)
3631

37-
38-
def _has_matching_external_ids(
39-
item_mapping: ItemMapping, media_item: Artist | Album | Track
32+
def _is_matching_result(
33+
item_mapping: ItemMapping, result: Artist | Album | Track, artist_name: str | None
4034
) -> bool:
4135
"""
42-
Return True if the ItemMapping shares any external IDs with the media item.
36+
Return True if a search result matches the searched item by name (and artist, if known).
4337
44-
:param item_mapping: ItemMapping with external IDs from Last.fm.
45-
:param media_item: Artist or Track to compare against.
38+
:param item_mapping: ItemMapping that was searched for.
39+
:param result: Search result to verify.
40+
:param artist_name: Artist name to verify against the result's artists, if known.
4641
"""
47-
if not item_mapping.external_ids:
42+
# Album/track search names are "Artist - Title" while results usually expose only the title.
43+
searched_name = item_mapping.name
44+
title_part = (
45+
searched_name[len(artist_name) + 3 :]
46+
if artist_name and searched_name.startswith(f"{artist_name} - ")
47+
else searched_name
48+
)
49+
if not compare_strings(title_part, result.name, strict=False) and not compare_strings(
50+
searched_name, result.name, strict=False
51+
):
4852
return False
4953

50-
return bool(item_mapping.external_ids & media_item.external_ids)
54+
# Verify the artist too: a matching title from a different artist (cover or karaoke
55+
# version) must not pass. Results without artist info can't contradict, accept those.
56+
result_artists = getattr(result, "artists", None)
57+
if artist_name and result_artists:
58+
return any(
59+
compare_strings(artist_name, result_artist.name, strict=False)
60+
for result_artist in result_artists
61+
)
62+
return True
5163

5264

5365
def _get_streaming_providers(
@@ -118,118 +130,61 @@ async def _search_providers_concurrent(
118130
ctrl: ArtistsController | AlbumsController | TracksController,
119131
item_mapping: ItemMapping,
120132
providers: list[Any],
121-
require_external_id_match: bool,
133+
artist_name: str | None,
122134
) -> Artist | Album | Track | None:
123135
"""
124-
Search multiple providers concurrently and return the best match.
136+
Search multiple providers concurrently and return the first verified match.
125137
126138
:param ctrl: Controller for the media type.
127139
:param item_mapping: ItemMapping to search for.
128140
:param providers: List of providers to search.
129-
:param require_external_id_match: If True, prefer matches on external IDs and reject
130-
results whose external IDs contradict the ItemMapping.
141+
:param artist_name: Artist name to verify candidate matches against, if known.
131142
"""
132143
tasks = [
133144
asyncio.create_task(_search_provider(ctrl, item_mapping, provider))
134145
for provider in providers
135146
]
136147

137-
fallback_result = None
138-
139148
for task in asyncio.as_completed(tasks):
140149
result = await task
141150
if result is None:
142151
continue
143152

144-
if not require_external_id_match:
145-
names_match = compare_strings(item_mapping.name, result.name, strict=False)
146-
147-
# Album/track search names may be "Artist - Title" while results expose only "Title".
148-
if not names_match and " - " in item_mapping.name:
149-
title_part = item_mapping.name.split(" - ", 1)[1]
150-
names_match = compare_strings(title_part, result.name, strict=False)
151-
152-
if names_match:
153-
LOGGER.debug(
154-
"Name match on %s: %s (searched: %s)",
155-
result.provider,
156-
result.name,
157-
item_mapping.name,
158-
)
159-
for t in tasks:
160-
if not t.done():
161-
t.cancel()
162-
return result
163-
153+
if _is_matching_result(item_mapping, result, artist_name):
164154
LOGGER.debug(
165-
"Rejecting %s from %s: name mismatch (searched: %s)",
166-
result.name,
167-
result.provider,
168-
item_mapping.name,
169-
)
170-
if not fallback_result:
171-
fallback_result = result
172-
continue
173-
174-
if _has_matching_external_ids(item_mapping, result):
175-
LOGGER.debug(
176-
"External ID match on %s: %s",
155+
"Match on %s: %s (searched: %s)",
177156
result.provider,
178157
result.name,
158+
item_mapping.name,
179159
)
180160
for t in tasks:
181161
if not t.done():
182162
t.cancel()
183163
return result
184164

185-
# A result that exposes external IDs of a matching type but none that match is a
186-
# different item; only consider results without such IDs as a name-based fallback.
187-
result_has_external_ids = any(
188-
ext_id[0] in {ext_id_check[0] for ext_id_check in item_mapping.external_ids}
189-
for ext_id in result.external_ids
165+
LOGGER.debug(
166+
"Rejecting %s from %s: name mismatch (searched: %s)",
167+
result.name,
168+
result.provider,
169+
item_mapping.name,
190170
)
191171

192-
if result_has_external_ids:
193-
LOGGER.debug(
194-
"Rejecting %s from %s: has external IDs but they don't match",
195-
result.name,
196-
result.provider,
197-
)
198-
elif not fallback_result:
199-
names_match = compare_strings(item_mapping.name, result.name, strict=False)
200-
201-
if not names_match and " - " in item_mapping.name:
202-
title_part = item_mapping.name.split(" - ", 1)[1]
203-
names_match = compare_strings(title_part, result.name, strict=False)
204-
205-
if names_match:
206-
LOGGER.debug(
207-
"Saving %s from %s as fallback (no external IDs to verify)",
208-
result.name,
209-
result.provider,
210-
)
211-
fallback_result = result
212-
else:
213-
LOGGER.debug(
214-
"Not saving %s from %s as fallback: name mismatch",
215-
result.name,
216-
result.provider,
217-
)
218-
219-
if fallback_result:
220-
LOGGER.debug("No external ID matches found, using fallback result")
221-
return fallback_result
172+
return None
222173

223174

224175
async def _resolve_item(
225-
item_mapping: ItemMapping, mass: MusicAssistant, provider_instance_to_skip: str
176+
item_mapping: ItemMapping,
177+
mass: MusicAssistant,
178+
provider_instance_to_skip: str,
179+
artist_name: str | None = None,
226180
) -> Artist | Album | Track | None:
227181
"""
228182
Resolve an ItemMapping to a library or provider item.
229183
230184
:param item_mapping: ItemMapping with metadata and external IDs from Last.fm.
231185
:param mass: MusicAssistant instance.
232186
:param provider_instance_to_skip: Provider instance to skip (ourselves).
187+
:param artist_name: Artist name to verify candidate matches against, if known.
233188
"""
234189
ctrl: ArtistsController | AlbumsController | TracksController
235190
if item_mapping.media_type == MediaType.ARTIST:
@@ -257,15 +212,8 @@ async def _resolve_item(
257212
LOGGER.debug("No streaming providers available for resolution")
258213
return None
259214

260-
# Streaming providers only expose ISRCs for tracks; for artists/albums rely on name matching.
261-
require_external_id_match = False
262-
if item_mapping.media_type == MediaType.TRACK:
263-
has_isrc = any(ext_id[0] == ExternalID.ISRC for ext_id in item_mapping.external_ids)
264-
if has_isrc:
265-
require_external_id_match = True
266-
267215
result = await _search_providers_concurrent(
268-
ctrl, item_mapping, streaming_providers, require_external_id_match
216+
ctrl, item_mapping, streaming_providers, artist_name
269217
)
270218
if result is None:
271219
LOGGER.debug("Could not resolve %s: %s", item_mapping.media_type.value, item_mapping.name)
@@ -336,27 +284,9 @@ async def parse_track(
336284
artist_name = artist_data.get("name", "Unknown Artist")
337285

338286
external_ids = set()
339-
340287
if mbid:
341288
external_ids.add((ExternalID.MB_RECORDING, mbid))
342289

343-
# Streaming providers match tracks on ISRC, so enrich MBIDs with ISRCs where possible.
344-
mb_provider = mass.get_provider("musicbrainz")
345-
if mb_provider:
346-
LOGGER.debug("Resolving MBID %s to ISRCs via MusicBrainz", mbid)
347-
async with _MB_ISRC_SEMAPHORE:
348-
isrcs = await cast("MusicbrainzProvider", mb_provider).get_isrcs_for_recording(mbid)
349-
if isrcs:
350-
LOGGER.debug("Found %d ISRCs for MBID %s: %s", len(isrcs), mbid, isrcs)
351-
for isrc in isrcs:
352-
external_ids.add((ExternalID.ISRC, isrc))
353-
else:
354-
LOGGER.debug("No ISRCs found for MBID %s", mbid)
355-
else:
356-
LOGGER.debug("MusicBrainz provider not available for ISRC lookup")
357-
else:
358-
LOGGER.debug("Track has no MBID, cannot resolve ISRCs")
359-
360290
item_mapping = ItemMapping(
361291
media_type=MediaType.TRACK,
362292
item_id="temp",
@@ -365,7 +295,9 @@ async def parse_track(
365295
external_ids=external_ids,
366296
)
367297

368-
return cast("Track | None", await _resolve_item(item_mapping, mass, provider_instance))
298+
return cast(
299+
"Track | None", await _resolve_item(item_mapping, mass, provider_instance, artist_name)
300+
)
369301

370302

371303
async def parse_album(
@@ -399,4 +331,6 @@ async def parse_album(
399331
external_ids=external_ids,
400332
)
401333

402-
return cast("Album | None", await _resolve_item(item_mapping, mass, provider_instance))
334+
return cast(
335+
"Album | None", await _resolve_item(item_mapping, mass, provider_instance, artist_name)
336+
)

music_assistant/providers/lastfm_recommendations/recommendations.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ async def _get_genre_based_recommendations(self) -> AsyncIterator[Recommendation
499499
)
500500
if genre_artists_raw:
501501
# Drop items already in the library using a cheap DB lookup, before the
502-
# expensive MusicBrainz + provider resolution step.
502+
# expensive provider resolution step.
503503
non_library_artists_raw = [
504504
artist_data
505505
for artist_data in genre_artists_raw
@@ -801,7 +801,7 @@ async def _get_similar_tracks_from_seeds(self, seed_tracks: list[Track]) -> list
801801
unique_similar.sort(key=lambda x: float(x.get("match", 0)), reverse=True)
802802

803803
# Resolve a small buffer beyond the target so resolution failures and owned-copy
804-
# exclusion still leave enough to fill the row, while capping MusicBrainz ISRC lookups.
804+
# exclusion still leave enough to fill the row, while capping provider searches.
805805
top_tracks_data = unique_similar[:SIMILAR_TRACKS_BUFFER]
806806

807807
resolved_tracks = await asyncio.gather(

0 commit comments

Comments
 (0)