Skip to content

Commit 98d1980

Browse files
committed
Hoist weighted-distance weight vector out of the MMR loop
Building the per-dimension weight vector inside compute_weighted_distance rebuilt it on every call in the O(n²) MMR loop. Split out build_dimension_weights + compute_weighted_distance_vec so apply_mmr builds the vector once and reuses it, roughly halving per-call cost. compute_weighted_distance keeps its signature as a thin wrapper for the remaining callers.
1 parent eb268f7 commit 98d1980

3 files changed

Lines changed: 66 additions & 22 deletions

File tree

music_assistant/providers/sonic_similarity/similarity.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
import numpy as np
1313

14-
from .vectors import compute_weighted_distance
14+
from .vectors import build_dimension_weights, compute_weighted_distance_vec
1515

1616

1717
class Candidate(NamedTuple):
@@ -111,9 +111,11 @@ def apply_mmr(
111111
seed_arr = np.array(seed_vec, dtype=np.float64)
112112

113113
if weights is not None:
114+
# Build the per-dimension weight vector once; _similarity runs O(n²) times.
115+
dim_weights = build_dimension_weights(weights)
114116

115117
def _similarity(a: np.ndarray, b: np.ndarray) -> float:
116-
d = compute_weighted_distance(a, b, weights)
118+
d = compute_weighted_distance_vec(a, b, dim_weights)
117119
return 1.0 / (1.0 + d)
118120

119121
relevance: dict[str, float] = {

music_assistant/providers/sonic_similarity/vectors.py

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -174,26 +174,60 @@ def compute_group_distances(
174174
return result
175175

176176

177+
def build_dimension_weights(weights: dict[str, float]) -> np.ndarray:
178+
"""
179+
Expand per-group weights into a per-dimension weight vector.
180+
181+
Callers in a hot loop (e.g. MMR re-ranking) can build this once and reuse it
182+
across many compute_weighted_distance_vec calls instead of rebuilding it per call.
183+
184+
:param weights: Per-group weight overrides keyed by FEATURE_GROUPS name. Groups
185+
absent from the dict default to weight 1.0.
186+
:returns: Float64 weight array of length VECTOR_DIMENSIONS.
187+
"""
188+
dim_weights = np.ones(VECTOR_DIMENSIONS, dtype=np.float64)
189+
for group, (start, end) in FEATURE_GROUPS.items():
190+
if group in weights:
191+
dim_weights[start:end] = weights[group]
192+
return dim_weights
193+
194+
177195
def compute_weighted_distance(
178196
sig_a: list[float] | np.ndarray,
179197
sig_b: list[float] | np.ndarray,
180198
weights: dict[str, float],
181199
) -> float:
182-
"""Compute per-group weighted Euclidean distance between two feature vectors.
200+
"""
201+
Compute per-group weighted Euclidean distance between two feature vectors.
183202
184203
:param sig_a: First feature vector (list or numpy array).
185204
:param sig_b: Second feature vector (list or numpy array).
186205
:param weights: Per-group weight overrides keyed by FEATURE_GROUPS name.
187206
:returns: Weighted normalized distance as a float.
188207
"""
208+
return compute_weighted_distance_vec(sig_a, sig_b, build_dimension_weights(weights))
209+
210+
211+
def compute_weighted_distance_vec(
212+
sig_a: list[float] | np.ndarray,
213+
sig_b: list[float] | np.ndarray,
214+
dim_weights: np.ndarray,
215+
) -> float:
216+
"""
217+
Compute weighted Euclidean distance from a precomputed per-dimension weight vector.
218+
219+
:param sig_a: First feature vector (list or numpy array).
220+
:param sig_b: Second feature vector (list or numpy array).
221+
:param dim_weights: Per-dimension weights as built by build_dimension_weights.
222+
:returns: Weighted normalized distance as a float.
223+
"""
224+
total_weighted_dims = float(dim_weights.sum())
225+
if total_weighted_dims == 0.0:
226+
return 0.0
189227
# np.asarray is a no-op when the caller already holds a float64 array (the
190228
# MMR hot path), avoiding the list round-trip the previous version forced.
191229
a = np.asarray(sig_a, dtype=np.float64)
192230
b = np.asarray(sig_b, dtype=np.float64)
193-
dim_weights = _expand_group_weights(weights)
194-
total_weighted_dims = float(dim_weights.sum())
195-
if total_weighted_dims == 0.0:
196-
return 0.0
197231
diff = a - b
198232
weighted_sq_sum = float(np.dot(dim_weights, diff * diff))
199233
return math.sqrt(weighted_sq_sum / total_weighted_dims)
@@ -222,18 +256,3 @@ def build_debug_breakdown(
222256
for k, v in compute_group_distances(seed_normalized, cand_normalized).items()
223257
},
224258
}
225-
226-
227-
def _expand_group_weights(weights: dict[str, float]) -> np.ndarray:
228-
"""
229-
Expand per-group weights into a per-dimension weight vector.
230-
231-
:param weights: Per-group weight overrides keyed by FEATURE_GROUPS name. Groups
232-
absent from the dict default to weight 1.0.
233-
:returns: Float64 weight array of length VECTOR_DIMENSIONS.
234-
"""
235-
dim_weights = np.ones(VECTOR_DIMENSIONS, dtype=np.float64)
236-
for group, (start, end) in FEATURE_GROUPS.items():
237-
if group in weights:
238-
dim_weights[start:end] = weights[group]
239-
return dim_weights

tests/providers/sonic_similarity/test_vector_assembly.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@
1212
FEATURE_GROUPS,
1313
VECTOR_DIMENSIONS,
1414
assemble_vector,
15+
build_dimension_weights,
1516
compute_corpus_stats,
1617
compute_weighted_distance,
18+
compute_weighted_distance_vec,
1719
encode_key_mode,
1820
normalize_features,
1921
)
@@ -454,3 +456,24 @@ def test_all_zero_weights_returns_zero(self) -> None:
454456
b = self._make_vector(1.0)
455457
all_zero = dict.fromkeys(FEATURE_GROUPS, 0.0)
456458
assert compute_weighted_distance(a, b, all_zero) == 0.0
459+
460+
def test_precomputed_weights_match_dict_path(self) -> None:
461+
"""compute_weighted_distance_vec with a prebuilt vector equals the dict-based path."""
462+
rng = np.random.default_rng(7)
463+
a = rng.standard_normal(VECTOR_DIMENSIONS)
464+
b = rng.standard_normal(VECTOR_DIMENSIONS)
465+
weights = {"rhythm": 2.0, "dynamics": 0.0}
466+
dim_weights = build_dimension_weights(weights)
467+
assert compute_weighted_distance_vec(a, b, dim_weights) == pytest.approx(
468+
compute_weighted_distance(a, b, weights)
469+
)
470+
471+
def test_build_dimension_weights_expands_groups(self) -> None:
472+
"""Each group's weight lands on its dimensions; absent groups default to 1.0."""
473+
dim_weights = build_dimension_weights({"timbre": 3.0})
474+
assert len(dim_weights) == VECTOR_DIMENSIONS
475+
start, end = FEATURE_GROUPS["timbre"]
476+
assert list(dim_weights[start:end]) == [3.0] * (end - start)
477+
# rhythm was not overridden, so it keeps the 1.0 default
478+
r_start, r_end = FEATURE_GROUPS["rhythm"]
479+
assert list(dim_weights[r_start:r_end]) == [1.0] * (r_end - r_start)

0 commit comments

Comments
 (0)