Fix high idle memory usage introduced in 2.9

marcelveldt · marcelveldt · commit 0fc3d876aeee · 2026-06-13T11:42:25.000+02:00
2.9 imported torch into the process at startup (via the always-on audio
analysis controller and the default-enabled Smart Fades provider), pushing
idle RAM from ~300-500MB to ~1GB and causing OOM on small hardware.

- Gate Smart Fades (&gt;=6GB / 4 cores) and Sonic Analysis (&gt;=8GB / 4 cores) on
  system resources, raising a non-retrying UnsupportedSystemError when unmet
- Stop loading torch at idle: lazy-import it in the audio analysis controller
  and Smart Fades provider, and configure torch thread caps on first analysis
- Auto-created default providers that do not meet requirements are removed and
  not retried, instead of lingering as a broken/retrying provider
- Lazy-load the Sonic Similarity text encoder on first query
- RAM-filter the buffer-size options (Balanced &gt;=4GB, Maximum &gt;=8GB)
diff --git a/music_assistant/constants.py b/music_assistant/constants.py
@@ -1107,7 +1107,9 @@ def create_sample_rates_config_entry(
     ("heos", True, lambda: True),
     ("wiim", True, lambda: True),
     ("party", False, lambda: True),
-    ("smart_fades", False, lambda: (os.cpu_count() or 1) > 1),
+    # smart_fades gates on system requirements (RAM/CPU) in its own setup(); an
+    # under-spec host has the auto-created config removed again at load time.
+    ("smart_fades", False, lambda: True),
     ("lastfm_recommendations", False, lambda: True),
 }
 
diff --git a/music_assistant/controllers/streams/audio.py b/music_assistant/controllers/streams/audio.py
@@ -67,7 +67,10 @@
     MASS_LOGGER_NAME,
     VERBOSE_LOG_LEVEL,
 )
-from music_assistant.controllers.streams.audio_analysis import LOUDNESS_ANALYSIS_DOMAIN
+from music_assistant.controllers.streams.audio_analysis import (
+    LOUDNESS_ANALYSIS_DOMAIN,
+    SMART_FADES_ANALYSIS_DOMAIN,
+)
 from music_assistant.controllers.streams.audio_buffer import AudioBuffer
 from music_assistant.controllers.streams.constants import (
     CACHE_CATEGORY_RESOLVED_RADIO_URL,
@@ -2054,10 +2057,11 @@ async def get_queue_flow_stream(
             if flow_player
             else []
         )
-        # smart crossfade requires a large buffer for beat analysis
-        if (
-            smart_fades_mode == SmartFadesMode.SMART_CROSSFADE
-            and self.mass.config.get_raw_core_config_value(
+        # smart crossfade needs the smart_fades analysis provider (for beat/key data) and a
+        # non-minimal buffer for beat analysis; fall back to standard crossfade otherwise.
+        if smart_fades_mode == SmartFadesMode.SMART_CROSSFADE and (
+            self.mass.get_provider(SMART_FADES_ANALYSIS_DOMAIN) is None
+            or self.mass.config.get_raw_core_config_value(
                 "streams", CONF_BUFFER_SIZE, CONF_BUFFER_SIZE_DEFAULT
             )
             == BufferSize.MINIMAL
diff --git a/music_assistant/controllers/streams/audio_analysis.py b/music_assistant/controllers/streams/audio_analysis.py
@@ -12,7 +12,6 @@
 from math import inf
 from typing import TYPE_CHECKING, Any
 
-import torch
 from music_assistant_models.audio_analysis import AudioAnalysisCoverage
 from music_assistant_models.background_task import TaskSchedule
 from music_assistant_models.enums import ContentType, MediaType, ProviderType, StreamType
@@ -137,10 +136,10 @@ def __init__(self, streams: StreamsController) -> None:
         self.logger = self.mass.logger.getChild("audio_analysis")
         self._active_sessions: dict[str, set[str]] = {}
         self._workers: dict[str, asyncio.Task[None]] = {}
+        self._thread_caps_configured = False
 
     def setup(self) -> None:
-        """Register the nightly background scan task and apply CPU caps."""
-        self._configure_thread_caps()
+        """Register the nightly background scan task."""
         utc_hour, utc_minute = local_clock_time_to_utc(0, 0)
         self.mass.tasks.register_scheduled_task(
             task_id=BACKGROUND_SCAN_TASK_ID,
@@ -163,8 +162,15 @@ async def close(self) -> None:
         if workers:
             await asyncio.gather(*workers, return_exceptions=True)
 
-    def _configure_thread_caps(self) -> None:
-        """Cap PyTorch threading so Audio Analysis inference stays around a quarter of cpu_count."""
+    def _ensure_thread_caps_configured(self) -> None:
+        """Cap PyTorch threading (once) so Audio Analysis inference stays around a quarter of cpu_count."""
+        # Import torch lazily and only when analysis actually starts, so an idle server
+        # with no audio analysis provider enabled never imports torch.
+        if self._thread_caps_configured:
+            return
+        self._thread_caps_configured = True
+        import torch  # noqa: PLC0415
+
         budget = self._aa_thread_budget()
         torch.set_num_threads(budget)
         with contextlib.suppress(RuntimeError):
@@ -955,6 +961,9 @@ async def _start_analysis_on_providers(
         providers: list[AudioAnalysisProvider],
     ) -> set[str]:
         """Call start_analysis on each provider, returning IDs of those that accepted."""
+        # Apply torch thread caps now that analysis is actually starting (lazy, once).
+        # This is the shared chokepoint for both the live and background-scan paths.
+        self._ensure_thread_caps_configured()
         provider_ids: set[str] = set()
         for provider in providers:
             try:
diff --git a/music_assistant/controllers/streams/constants.py b/music_assistant/controllers/streams/constants.py
@@ -41,10 +41,27 @@ class BufferSize(StrEnum):
 CONF_BUFFER_SIZE: Final[str] = "buffer_size"
 
 
+def get_available_buffer_sizes() -> list[BufferSize]:
+    """
+    Return the buffer-size presets allowed for this host's RAM.
+
+    Minimal is always available; Balanced requires >= 4GB and Maximum >= 8GB. When total
+    memory is unknown (0.0, e.g. Windows) all presets are offered (fail open).
+    """
+    if TOTAL_SYSTEM_MEMORY_GB == 0.0:
+        return [BufferSize.MINIMAL, BufferSize.BALANCED, BufferSize.MAXIMUM]
+    sizes = [BufferSize.MINIMAL]
+    if TOTAL_SYSTEM_MEMORY_GB >= 4.0:
+        sizes.append(BufferSize.BALANCED)
+    if TOTAL_SYSTEM_MEMORY_GB >= 8.0:
+        sizes.append(BufferSize.MAXIMUM)
+    return sizes
+
+
 def _get_default_buffer_size() -> str:
     if TOTAL_SYSTEM_MEMORY_GB >= 8.0:
         return BufferSize.MAXIMUM
-    if TOTAL_SYSTEM_MEMORY_GB > 4.0:
+    if TOTAL_SYSTEM_MEMORY_GB >= 4.0:
         return BufferSize.BALANCED
     return BufferSize.MINIMAL
 
diff --git a/music_assistant/controllers/streams/controller.py b/music_assistant/controllers/streams/controller.py
@@ -69,7 +69,7 @@
     CONF_BUFFER_SIZE_DEFAULT,
     CONF_SMART_FADES_LOG_LEVEL,
     DEFAULT_PORT,
-    BufferSize,
+    get_available_buffer_sizes,
 )
 from music_assistant.helpers.audio import (
     calculate_content_length,
@@ -189,10 +189,11 @@ async def get_config_entries(
                 "good balance for most systems.\n"
                 "- **Maximum**: Large buffer, "
                 "best performance for systems with plenty of memory.",
+                # Only offer presets the host's RAM can sustain (Balanced >= 4GB,
+                # Maximum >= 8GB); see get_available_buffer_sizes.
                 options=[
-                    ConfigValueOption("Minimal", BufferSize.MINIMAL.value),
-                    ConfigValueOption("Balanced", BufferSize.BALANCED.value),
-                    ConfigValueOption("Maximum", BufferSize.MAXIMUM.value),
+                    ConfigValueOption(size.value.title(), size.value)
+                    for size in get_available_buffer_sizes()
                 ],
                 required=False,
                 category="playback",
diff --git a/music_assistant/helpers/util.py b/music_assistant/helpers/util.py
@@ -130,11 +130,52 @@ def get_total_system_memory() -> float:
         return 0.0
 
 
+class UnsupportedSystemError(SetupFailedError):
+    """
+    Raised when the host does not meet a provider's minimum requirements.
+
+    Subclass of SetupFailedError so existing setup handling still applies, but it
+    marks a permanent condition (RAM, CPU cores, CPU capability) that will not
+    resolve at runtime, so the provider load must not be retried.
+    """
+
+
+def verify_system_meets_requirements(
+    *,
+    feature_name: str,
+    min_memory_gb: float = 0.0,
+    min_cpu_cores: int = 0,
+) -> None:
+    """
+    Verify the host meets the minimum CPU/RAM requirements for a heavy provider.
+
+    :param feature_name: Human-readable provider name used in the error message.
+    :param min_memory_gb: Minimum total system RAM in GB (0 disables the check).
+    :param min_cpu_cores: Minimum CPU core count (0 disables the check).
+    :raises UnsupportedSystemError: If the system does not meet the requirements.
+    """
+    cpu_cores = os.process_cpu_count() or os.cpu_count() or 1
+    if min_cpu_cores and cpu_cores < min_cpu_cores:
+        raise UnsupportedSystemError(
+            f"This system does not meet the minimal requirements for {feature_name}: "
+            f"at least {min_cpu_cores} CPU cores are required ({cpu_cores} detected)."
+        )
+    total_memory_gb = get_total_system_memory()
+    # get_total_system_memory() returns 0.0 when the platform cannot report memory
+    # (e.g. Windows); treat that as unknown and fail open rather than block setup.
+    if min_memory_gb and total_memory_gb and total_memory_gb < min_memory_gb:
+        raise UnsupportedSystemError(
+            f"This system does not meet the minimal requirements for {feature_name}: "
+            f"at least {min_memory_gb:.0f}GB of RAM is required "
+            f"({total_memory_gb:.1f}GB detected)."
+        )
+
+
 def verify_cpu_supports_ml_inference() -> None:
     """
     Verify the CPU can run on-device ML (torch) inference.
 
-    :raises SetupFailedError: If this is an x86 CPU without AVX2 support, which
+    :raises UnsupportedSystemError: If this is an x86 CPU without AVX2 support, which
         torch's FBGEMM quantized backend requires.
     """
     if platform.machine().lower() not in ("x86_64", "amd64", "i386", "i686", "x86"):
@@ -143,7 +184,7 @@ def verify_cpu_supports_ml_inference() -> None:
     import torch  # noqa: PLC0415
 
     if torch.backends.cpu.get_cpu_capability() in ("DEFAULT", "NO AVX"):
-        raise SetupFailedError(
+        raise UnsupportedSystemError(
             "On-device audio analysis requires a CPU with AVX2 support "
             "(Intel Haswell / AMD Zen or newer). This CPU does not support AVX2. "
             "If you are running in a virtual machine (e.g. Proxmox), changing the "
diff --git a/music_assistant/mass.py b/music_assistant/mass.py
@@ -49,6 +49,7 @@
 from music_assistant.helpers.images import get_icon_string
 from music_assistant.helpers.util import (
     TaskManager,
+    UnsupportedSystemError,
     get_package_version,
     is_hass_supervisor,
     load_provider_module,
@@ -713,6 +714,7 @@ async def load_provider(
         self,
         instance_id: str,
         allow_retry: bool = False,
+        remove_if_unsupported: bool = False,
     ) -> None:
         """Try to load a provider and catch errors."""
         try:
@@ -732,6 +734,27 @@ async def load_provider(
 
         try:
             await self.load_provider_config(prov_conf)
+        except UnsupportedSystemError as exc:
+            # The host does not meet this provider's hardware requirements. This is a
+            # permanent condition, so we never retry. For a provider that was just
+            # auto-set-up as a default, drop the config again so it does not linger as a
+            # broken provider (it stays marked done so it is not auto-created again).
+            if remove_if_unsupported:
+                LOGGER.info(
+                    "Not enabling default provider %s: %s",
+                    prov_conf.name or prov_conf.instance_id,
+                    exc,
+                )
+                await self.config.remove_provider_config(instance_id)
+                return
+            prov_conf.last_error = str(exc)
+            self.config.set(f"{CONF_PROVIDERS}/{instance_id}/last_error", str(exc))
+            LOGGER.warning(
+                "Provider(instance) %s can not run on this system: %s",
+                prov_conf.name or prov_conf.instance_id,
+                exc,
+            )
+            return
         except Exception as exc:
             # if loading failed, we store the error in the config object
             # so we can show something useful to the user
@@ -895,6 +918,7 @@ async def _load_providers(self) -> None:
         self.config.set_default(CONF_DEFAULT_PROVIDERS_SETUP, set())
         default_providers_setup = set(self.config.get(CONF_DEFAULT_PROVIDERS_SETUP))
         changes_made = False
+        newly_created_defaults: set[str] = set()
         for default_provider, require_mdns, precondition in DEFAULT_PROVIDERS:
             if default_provider in default_providers_setup:
                 # already processed/setup before, skip
@@ -915,6 +939,7 @@ async def _load_providers(self) -> None:
                     continue
             await self.config.create_builtin_provider_config(manifest.domain)
             changes_made = True
+            newly_created_defaults.add(manifest.domain)
             # TEMP: migration - to be removed after 2.8 release
             # enable all existing players of the default providers if they are not already enabled
             # due to the linked protocol feature we introduced
@@ -944,7 +969,15 @@ async def _load_providers(self) -> None:
             for prov_conf in other_configs:
                 # Use a task so we can load multiple providers at once.
                 # If a provider fails, that will not block the loading of other providers.
-                tg.create_task(self.load_provider(prov_conf.instance_id, allow_retry=True))
+                # For providers just auto-set-up as a default, drop the config again if the
+                # host does not meet their requirements (rather than retry a broken provider).
+                tg.create_task(
+                    self.load_provider(
+                        prov_conf.instance_id,
+                        allow_retry=True,
+                        remove_if_unsupported=prov_conf.domain in newly_created_defaults,
+                    )
+                )
 
     async def _load_provider(self, conf: ProviderConfig) -> None:
         """Load (or reload) a provider."""
diff --git a/music_assistant/providers/smart_fades/__init__.py b/music_assistant/providers/smart_fades/__init__.py
@@ -8,7 +8,7 @@
 from music_assistant_models.config_entries import ConfigEntry, ConfigValueType
 from music_assistant_models.enums import ConfigEntryType
 
-from .provider import SmartFadesProvider
+from music_assistant.helpers.util import verify_system_meets_requirements
 
 if TYPE_CHECKING:
     from music_assistant_models.config_entries import ProviderConfig
@@ -17,15 +17,28 @@
 
     from music_assistant.mass import MusicAssistant
 
+    from .provider import SmartFadesProvider
+
 SUPPORTED_FEATURES: set[ProviderFeature] = set()
 
+# Smart Fades runs on-device ML (torch) inference; gate it to capable hardware.
+MIN_RAM_GB = 6.0
+MIN_CPU_CORES = 4
+
 
 async def setup(
     mass: MusicAssistant,
     manifest: ProviderManifest,
     config: ProviderConfig,
 ) -> SmartFadesProvider:
     """Set up the Smart Fades provider."""
+    # Gate before importing the provider module so the heavy torch/beat_this stack is
+    # never imported on a host that does not meet the minimal requirements.
+    verify_system_meets_requirements(
+        feature_name="Smart Fades", min_memory_gb=MIN_RAM_GB, min_cpu_cores=MIN_CPU_CORES
+    )
+    from .provider import SmartFadesProvider  # noqa: PLC0415
+
     return SmartFadesProvider(mass, manifest, config, SUPPORTED_FEATURES)
 
 
diff --git a/music_assistant/providers/sonic_analysis/__init__.py b/music_assistant/providers/sonic_analysis/__init__.py
@@ -13,7 +13,10 @@
 from music_assistant_models.config_entries import ConfigEntry, ConfigValueOption
 from music_assistant_models.enums import ConfigEntryType, ContentType
 
-from music_assistant.helpers.util import verify_cpu_supports_ml_inference
+from music_assistant.helpers.util import (
+    verify_cpu_supports_ml_inference,
+    verify_system_meets_requirements,
+)
 from music_assistant.models.audio_analysis import AudioAnalysisData
 from music_assistant.models.audio_analysis_provider import (
     AnalysisSessionData,
@@ -66,6 +69,10 @@
 
 CONF_CLAP_SAMPLING: str = "clap_sampling"
 
+# Sonic Analysis runs on-device CLAP inference; gate it to capable hardware.
+MIN_RAM_GB: float = 8.0
+MIN_CPU_CORES: int = 4
+
 
 @dataclass
 class SonicSessionData(AnalysisSessionData):
@@ -320,6 +327,9 @@ async def handle_async_init(self) -> None:
         available=False, which the AudioAnalysisController already honors when
         scheduling work.
         """
+        verify_system_meets_requirements(
+            feature_name="Sonic Analysis", min_memory_gb=MIN_RAM_GB, min_cpu_cores=MIN_CPU_CORES
+        )
         verify_cpu_supports_ml_inference()
         (
             self._clap_model,
diff --git a/music_assistant/providers/sonic_similarity/provider.py b/music_assistant/providers/sonic_similarity/provider.py
@@ -113,6 +113,7 @@ def __init__(
         # CLAP text encoder — lazy: stays None until the first text_search call.
         self._text_encoder: Any = None
         self._text_encoder_lock = asyncio.Lock()
+        self._text_encoder_warm_started = False
         # Per-label last error from fire-and-forget rebuild tasks.
         self._last_rebuild_error: dict[str, str] = {}
         self._last_seen_row_count: int = 0
@@ -218,12 +219,9 @@ async def loaded_in_mass(self) -> None:
                     "sonic_similarity/text_search", self._handle_text_search
                 )
             )
-            # Warm in background so the timeout-less global SEARCH dispatcher never blocks
-            # on the ~500MB GPT2 download; search() short-circuits until the encoder is set.
-            self.mass.create_task(
-                self._get_text_encoder, task_id="sonic_similarity_text_encoder_warm"
-            )
-            self.logger.info("Text search ready (encoder warming in background)")
+            # The ~500MB GPT2 text encoder loads lazily on the first query (see search()
+            # and _handle_text_search), not at plugin start.
+            self.logger.info("Text search enabled (encoder loads on first query)")
 
         self.mass.tasks.register_scheduled_task(
             task_id=PERIODIC_REFRESH_TASK_ID,
@@ -819,9 +817,14 @@ async def search(
             return SearchResults()
         if self._clap_index is None or len(self._clap_index) == 0:
             return SearchResults()
-        # Only serve once the encoder is warm; never hold up the global search
-        # gather waiting on the lazy load (background-scheduled in loaded_in_mass).
+        # Never hold up the timeout-less global SEARCH gather on the ~500MB encoder load:
+        # kick off a one-time background warm on the first query, and short-circuit until ready.
         if self._text_encoder is None:
+            if not self._text_encoder_warm_started:
+                self._text_encoder_warm_started = True
+                self.mass.create_task(
+                    self._get_text_encoder, task_id="sonic_similarity_text_encoder_warm"
+                )
             return SearchResults()
         emb_np = await self._embed_text_query(search_query)
         if emb_np is None:
diff --git a/tests/controllers/streams/test_buffer_size.py b/tests/controllers/streams/test_buffer_size.py
diff --git a/tests/core/test_helpers.py b/tests/core/test_helpers.py