Skip to content

Commit e0f11cf

Browse files
OzGavmarcelveldtCopilot
authored
Drop per-track MusicBrainz ISRC lookups from Last.fm recommendations (#4190)
# What does this implement/fix? <!-- Quick description and explanation of changes. --> Every resolved track fired a MusicBrainz ISRC lookup tripping the MB mirror's edge rate limit on the playback path. Tracks now resolve via the existing name search on streaming providers, with the artist verified against the result so same-titled covers don't slip through. Library dedup is unaffected: it already uses the external IDs returned by the streaming provider's own result. **Related issue (if applicable):** - related issue <link to issue> ## Types of changes <!-- Tick exactly one box. CI (.github/workflows/pr-labels.yaml) derives the label from the ticked box and applies it automatically; the release-notes generator uses that same label to slot this change into the next release notes. --> - [X] Bugfix (non-breaking change which fixes an issue) — `bugfix` - [ ] New feature (non-breaking change which adds functionality) — `new-feature` - [ ] Enhancement to an existing feature — `enhancement` - [ ] New music/player/metadata/plugin provider — `new-provider` - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) — `breaking-change` - [ ] Refactor (no behaviour change) — `refactor` - [ ] Documentation only — `documentation` - [ ] Maintenance / chore — `maintenance` - [ ] CI / workflow change — `ci` - [ ] Dependencies bump — `dependencies` ## Checklist - [X] The code change is tested and works locally. - [X] `pre-commit run --all-files` passes. - [X] `pytest` passes, and tests have been added/updated under `tests/` where applicable. - [ ] For changes to shared models, the companion PR in `music-assistant/models` is linked. - [ ] For changes affecting the UI, the companion PR in `music-assistant/frontend` is linked. - [X] I have read and complied with the project's [AI Policy](https://github.com/music-assistant/.github/blob/main/AI_POLICY.md) for any AI-assisted contributions. - [ ] I have raised a PR against the documentation repository targeting the main or beta branch as appropriate. --------- Co-authored-by: Marcel van der Veldt <m.vanderveldt@outlook.com> Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
1 parent 1706209 commit e0f11cf

3 files changed

Lines changed: 53 additions & 123 deletions

File tree

music_assistant/providers/lastfm_recommendations/constants.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,6 @@
3131
# Maximum number of concurrent provider searches to prevent overwhelming APIs
3232
SEARCH_CONCURRENCY_LIMIT = 5
3333

34-
# Maximum number of concurrent MusicBrainz ISRC lookups. MusicBrainz throttles all
35-
# callers to a shared global budget, so a wide fan-out just queues behind it while
36-
# bursting past the mirror's edge limit; cap it to stay within budget with headroom.
37-
MB_ISRC_CONCURRENCY_LIMIT = 5
38-
3934
# Item counts and limits
4035
# Target number of items to return in recommendation folders
4136
TARGET_ITEM_COUNT = 10
@@ -57,7 +52,7 @@
5752
SIMILAR_ITEMS_BUFFER = 12
5853

5954
# Number of similar tracks to resolve before filtering to target count. Larger than the
60-
# artist buffer to absorb this row's extra attrition: stricter ISRC matching and the
55+
# artist buffer to absorb this row's extra attrition: artist+title verification and the
6156
# exclusion of tracks that resolve to the user's own library copy.
6257
SIMILAR_TRACKS_BUFFER = 15
6358

music_assistant/providers/lastfm_recommendations/parsers.py

Lines changed: 50 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from music_assistant.constants import MASS_LOGGER_NAME
1414
from music_assistant.helpers.compare import compare_strings
1515
from music_assistant.providers.lastfm_recommendations.constants import (
16-
MB_ISRC_CONCURRENCY_LIMIT,
1716
PROVIDER_SEARCH_LIMIT,
1817
SEARCH_CONCURRENCY_LIMIT,
1918
)
@@ -23,31 +22,44 @@
2322
from music_assistant.controllers.media.albums import AlbumsController
2423
from music_assistant.controllers.media.artists import ArtistsController
2524
from music_assistant.controllers.media.tracks import TracksController
26-
from music_assistant.providers.musicbrainz import MusicbrainzProvider
2725

2826
LOGGER = logging.getLogger(f"{MASS_LOGGER_NAME}.lastfm_recommendations")
2927

3028
# Limit concurrent provider searches to avoid overwhelming their APIs.
3129
_SEARCH_SEMAPHORE = asyncio.Semaphore(SEARCH_CONCURRENCY_LIMIT)
3230

33-
# Cap concurrent MusicBrainz ISRC lookups so a single artist's top-track fan-out
34-
# doesn't dump dozens of requests onto the shared MusicBrainz rate-limit budget at once.
35-
_MB_ISRC_SEMAPHORE = asyncio.Semaphore(MB_ISRC_CONCURRENCY_LIMIT)
3631

37-
38-
def _has_matching_external_ids(
39-
item_mapping: ItemMapping, media_item: Artist | Album | Track
32+
def _is_matching_result(
33+
item_mapping: ItemMapping, result: Artist | Album | Track, artist_name: str | None
4034
) -> bool:
4135
"""
42-
Return True if the ItemMapping shares any external IDs with the media item.
36+
Return True if a search result matches the searched item by name (and artist, if known).
4337
44-
:param item_mapping: ItemMapping with external IDs from Last.fm.
45-
:param media_item: Artist or Track to compare against.
38+
:param item_mapping: ItemMapping that was searched for.
39+
:param result: Search result to verify.
40+
:param artist_name: Artist name to verify against the result's artists, if known.
4641
"""
47-
if not item_mapping.external_ids:
42+
# Album/track search names are "Artist - Title" while results usually expose only the title.
43+
searched_name = item_mapping.name
44+
title_part = (
45+
searched_name[len(artist_name) + 3 :]
46+
if artist_name and searched_name.startswith(f"{artist_name} - ")
47+
else searched_name
48+
)
49+
if not compare_strings(title_part, result.name, strict=False) and not compare_strings(
50+
searched_name, result.name, strict=False
51+
):
4852
return False
4953

50-
return bool(item_mapping.external_ids & media_item.external_ids)
54+
# Verify the artist too: a matching title from a different artist (cover or karaoke
55+
# version) must not pass. Results without artist info can't contradict, accept those.
56+
result_artists = getattr(result, "artists", None)
57+
if artist_name and result_artists:
58+
return any(
59+
compare_strings(artist_name, result_artist.name, strict=False)
60+
for result_artist in result_artists
61+
)
62+
return True
5163

5264

5365
def _get_streaming_providers(
@@ -118,118 +130,62 @@ async def _search_providers_concurrent(
118130
ctrl: ArtistsController | AlbumsController | TracksController,
119131
item_mapping: ItemMapping,
120132
providers: list[Any],
121-
require_external_id_match: bool,
133+
artist_name: str | None,
122134
) -> Artist | Album | Track | None:
123135
"""
124-
Search multiple providers concurrently and return the best match.
136+
Search multiple providers concurrently and return the first verified match.
125137
126138
:param ctrl: Controller for the media type.
127139
:param item_mapping: ItemMapping to search for.
128140
:param providers: List of providers to search.
129-
:param require_external_id_match: If True, prefer matches on external IDs and reject
130-
results whose external IDs contradict the ItemMapping.
141+
:param artist_name: Artist name to verify candidate matches against, if known.
131142
"""
132143
tasks = [
133144
asyncio.create_task(_search_provider(ctrl, item_mapping, provider))
134145
for provider in providers
135146
]
136147

137-
fallback_result = None
138-
139148
for task in asyncio.as_completed(tasks):
140149
result = await task
141150
if result is None:
142151
continue
143152

144-
if not require_external_id_match:
145-
names_match = compare_strings(item_mapping.name, result.name, strict=False)
146-
147-
# Album/track search names may be "Artist - Title" while results expose only "Title".
148-
if not names_match and " - " in item_mapping.name:
149-
title_part = item_mapping.name.split(" - ", 1)[1]
150-
names_match = compare_strings(title_part, result.name, strict=False)
151-
152-
if names_match:
153-
LOGGER.debug(
154-
"Name match on %s: %s (searched: %s)",
155-
result.provider,
156-
result.name,
157-
item_mapping.name,
158-
)
159-
for t in tasks:
160-
if not t.done():
161-
t.cancel()
162-
return result
163-
153+
if _is_matching_result(item_mapping, result, artist_name):
164154
LOGGER.debug(
165-
"Rejecting %s from %s: name mismatch (searched: %s)",
166-
result.name,
167-
result.provider,
168-
item_mapping.name,
169-
)
170-
if not fallback_result:
171-
fallback_result = result
172-
continue
173-
174-
if _has_matching_external_ids(item_mapping, result):
175-
LOGGER.debug(
176-
"External ID match on %s: %s",
155+
"Match on %s: %s (searched: %s)",
177156
result.provider,
178157
result.name,
158+
item_mapping.name,
179159
)
180160
for t in tasks:
181161
if not t.done():
182162
t.cancel()
163+
await asyncio.gather(*tasks, return_exceptions=True)
183164
return result
184165

185-
# A result that exposes external IDs of a matching type but none that match is a
186-
# different item; only consider results without such IDs as a name-based fallback.
187-
result_has_external_ids = any(
188-
ext_id[0] in {ext_id_check[0] for ext_id_check in item_mapping.external_ids}
189-
for ext_id in result.external_ids
166+
LOGGER.debug(
167+
"Rejecting %s from %s: name mismatch (searched: %s)",
168+
result.name,
169+
result.provider,
170+
item_mapping.name,
190171
)
191172

192-
if result_has_external_ids:
193-
LOGGER.debug(
194-
"Rejecting %s from %s: has external IDs but they don't match",
195-
result.name,
196-
result.provider,
197-
)
198-
elif not fallback_result:
199-
names_match = compare_strings(item_mapping.name, result.name, strict=False)
200-
201-
if not names_match and " - " in item_mapping.name:
202-
title_part = item_mapping.name.split(" - ", 1)[1]
203-
names_match = compare_strings(title_part, result.name, strict=False)
204-
205-
if names_match:
206-
LOGGER.debug(
207-
"Saving %s from %s as fallback (no external IDs to verify)",
208-
result.name,
209-
result.provider,
210-
)
211-
fallback_result = result
212-
else:
213-
LOGGER.debug(
214-
"Not saving %s from %s as fallback: name mismatch",
215-
result.name,
216-
result.provider,
217-
)
218-
219-
if fallback_result:
220-
LOGGER.debug("No external ID matches found, using fallback result")
221-
return fallback_result
173+
return None
222174

223175

224176
async def _resolve_item(
225-
item_mapping: ItemMapping, mass: MusicAssistant, provider_instance_to_skip: str
177+
item_mapping: ItemMapping,
178+
mass: MusicAssistant,
179+
provider_instance_to_skip: str,
180+
artist_name: str | None = None,
226181
) -> Artist | Album | Track | None:
227182
"""
228183
Resolve an ItemMapping to a library or provider item.
229184
230185
:param item_mapping: ItemMapping with metadata and external IDs from Last.fm.
231186
:param mass: MusicAssistant instance.
232187
:param provider_instance_to_skip: Provider instance to skip (ourselves).
188+
:param artist_name: Artist name to verify candidate matches against, if known.
233189
"""
234190
ctrl: ArtistsController | AlbumsController | TracksController
235191
if item_mapping.media_type == MediaType.ARTIST:
@@ -257,15 +213,8 @@ async def _resolve_item(
257213
LOGGER.debug("No streaming providers available for resolution")
258214
return None
259215

260-
# Streaming providers only expose ISRCs for tracks; for artists/albums rely on name matching.
261-
require_external_id_match = False
262-
if item_mapping.media_type == MediaType.TRACK:
263-
has_isrc = any(ext_id[0] == ExternalID.ISRC for ext_id in item_mapping.external_ids)
264-
if has_isrc:
265-
require_external_id_match = True
266-
267216
result = await _search_providers_concurrent(
268-
ctrl, item_mapping, streaming_providers, require_external_id_match
217+
ctrl, item_mapping, streaming_providers, artist_name
269218
)
270219
if result is None:
271220
LOGGER.debug("Could not resolve %s: %s", item_mapping.media_type.value, item_mapping.name)
@@ -336,27 +285,9 @@ async def parse_track(
336285
artist_name = artist_data.get("name", "Unknown Artist")
337286

338287
external_ids = set()
339-
340288
if mbid:
341289
external_ids.add((ExternalID.MB_RECORDING, mbid))
342290

343-
# Streaming providers match tracks on ISRC, so enrich MBIDs with ISRCs where possible.
344-
mb_provider = mass.get_provider("musicbrainz")
345-
if mb_provider:
346-
LOGGER.debug("Resolving MBID %s to ISRCs via MusicBrainz", mbid)
347-
async with _MB_ISRC_SEMAPHORE:
348-
isrcs = await cast("MusicbrainzProvider", mb_provider).get_isrcs_for_recording(mbid)
349-
if isrcs:
350-
LOGGER.debug("Found %d ISRCs for MBID %s: %s", len(isrcs), mbid, isrcs)
351-
for isrc in isrcs:
352-
external_ids.add((ExternalID.ISRC, isrc))
353-
else:
354-
LOGGER.debug("No ISRCs found for MBID %s", mbid)
355-
else:
356-
LOGGER.debug("MusicBrainz provider not available for ISRC lookup")
357-
else:
358-
LOGGER.debug("Track has no MBID, cannot resolve ISRCs")
359-
360291
item_mapping = ItemMapping(
361292
media_type=MediaType.TRACK,
362293
item_id="temp",
@@ -365,7 +296,9 @@ async def parse_track(
365296
external_ids=external_ids,
366297
)
367298

368-
return cast("Track | None", await _resolve_item(item_mapping, mass, provider_instance))
299+
return cast(
300+
"Track | None", await _resolve_item(item_mapping, mass, provider_instance, artist_name)
301+
)
369302

370303

371304
async def parse_album(
@@ -399,4 +332,6 @@ async def parse_album(
399332
external_ids=external_ids,
400333
)
401334

402-
return cast("Album | None", await _resolve_item(item_mapping, mass, provider_instance))
335+
return cast(
336+
"Album | None", await _resolve_item(item_mapping, mass, provider_instance, artist_name)
337+
)

music_assistant/providers/lastfm_recommendations/recommendations.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ async def _get_genre_based_recommendations(self) -> AsyncIterator[Recommendation
502502
)
503503
if genre_artists_raw:
504504
# Drop items already in the library using a cheap DB lookup, before the
505-
# expensive MusicBrainz + provider resolution step.
505+
# expensive provider resolution step.
506506
non_library_artists_raw = [
507507
artist_data
508508
for artist_data in genre_artists_raw
@@ -871,7 +871,7 @@ async def _get_similar_tracks_from_seeds(self, seed_tracks: list[Track]) -> list
871871
unique_similar.sort(key=lambda x: float(x.get("match", 0)), reverse=True)
872872

873873
# Resolve a small buffer beyond the target so resolution failures and owned-copy
874-
# exclusion still leave enough to fill the row, while capping MusicBrainz ISRC lookups.
874+
# exclusion still leave enough to fill the row, while capping provider searches.
875875
top_tracks_data = unique_similar[:SIMILAR_TRACKS_BUFFER]
876876

877877
resolved_tracks = await asyncio.gather(

0 commit comments

Comments
 (0)