Skip to content

Commit b6f45fe

Browse files
feat: implement TTL (#139)
* feat: implement TTL for cache entries with automatic eviction of expired entries * feat(tests): add unit tests for SemanticCache TTL and expiration features * issues resolved * fix: enhance cache expiration functionality to allow disabling TTL * fix: add test for loading no-TTL cache into TTL-enabled instance * fix: enhance SemanticCache TTL handling to reject non-finite values and types
1 parent bd4cda5 commit b6f45fe

2 files changed

Lines changed: 337 additions & 15 deletions

File tree

nexum_ai/optimizer.py

Lines changed: 137 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
Semantic cache and query optimizer using local embedding models
33
"""
44
import logging
5+
import math
56
import numpy as np
7+
import time
68
from typing import Optional, List, Dict, Any
79
import json
810
import os
@@ -61,6 +63,7 @@ def __init__(self, similarity_threshold: float = 0.95, cache_file: str = "semant
6163
self.cache: List[Dict] = []
6264
self.similarity_threshold = similarity_threshold
6365
self.model = None
66+
self.max_age_seconds: Optional[float] = None # None = no TTL
6467

6568
# Support environment variable for cache file path
6669
cache_file_env = os.environ.get('NEXUMDB_CACHE_FILE', cache_file)
@@ -115,25 +118,81 @@ def cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
115118

116119
return float(dot_product / (norm1 * norm2))
117120

121+
def _is_entry_expired(self, entry: Dict, now: Optional[float] = None) -> bool:
122+
"""Check if a cache entry has exceeded its TTL.
123+
124+
Args:
125+
entry: Cache entry dict, expected to contain a 'timestamp' key.
126+
now: Current time as a Unix timestamp. If *None*, ``time.time()``
127+
is called. Callers iterating over many entries should snapshot
128+
the current time once and pass it in to avoid redundant
129+
syscalls and subtle inconsistencies.
130+
131+
Returns:
132+
True if the entry is expired, False otherwise.
133+
Entries without a timestamp are never considered expired.
134+
"""
135+
if self.max_age_seconds is None:
136+
return False
137+
timestamp = entry.get('timestamp')
138+
if timestamp is None:
139+
# Legacy entries without a timestamp are kept (not expired)
140+
return False
141+
if now is None:
142+
now = time.time()
143+
return (now - timestamp) > self.max_age_seconds
144+
145+
def _evict_expired(self) -> int:
146+
"""Remove all expired cache entries.
147+
148+
Returns:
149+
Number of entries removed.
150+
"""
151+
if self.max_age_seconds is None:
152+
return 0
153+
now = time.time()
154+
before = len(self.cache)
155+
self.cache = [e for e in self.cache if not self._is_entry_expired(e, now=now)]
156+
removed = before - len(self.cache)
157+
if removed > 0:
158+
logger.info(f"Evicted {removed} expired cache entries")
159+
return removed
160+
118161
def get(self, query: str) -> Optional[str]:
119-
"""Retrieve cached result if similar query exists"""
162+
"""Retrieve cached result if similar query exists.
163+
164+
Expired entries (based on TTL) are skipped during lookup.
165+
When expired entries are detected they are opportunistically
166+
evicted so future lookups don't degrade over time.
167+
"""
120168
query_vec = self.vectorize(query)
121-
169+
now = time.time()
170+
found_expired = False
171+
122172
for entry in self.cache:
173+
# Skip expired entries and flag for cleanup
174+
if self._is_entry_expired(entry, now=now):
175+
found_expired = True
176+
continue
123177
similarity = self.cosine_similarity(query_vec, entry['vector'])
124178
if similarity >= self.similarity_threshold:
125179
logger.info(f"Cache hit! Similarity: {similarity:.4f}")
180+
if found_expired:
181+
self._evict_expired()
126182
return entry['result']
127-
183+
184+
if found_expired:
185+
self._evict_expired()
128186
return None
129187

130188
def put(self, query: str, result: str) -> None:
131-
"""Store query and result in cache"""
189+
"""Store query and result in cache with a creation timestamp."""
132190
query_vec = self.vectorize(query)
133191
self.cache.append({
134192
'query': query,
135193
'vector': query_vec,
136-
'result': result
194+
'result': result,
195+
'timestamp': time.time()
137196
})
138197
logger.info(f"Cached query: {query[:50]}...")
139198

@@ -248,7 +307,8 @@ def save_cache_json(self, filepath: Optional[str] = None) -> None:
248307
'cache': self.cache,
249308
'similarity_threshold': self.similarity_threshold,
250309
'cache_size': len(self.cache),
251-
'format_version': '1.0'
310+
'format_version': '1.1',
311+
'max_age_seconds': self.max_age_seconds,
252312
}
253313

254314
with open(filepath, 'w') as f:
@@ -278,8 +338,17 @@ def load_cache_json(self, filepath: Optional[str] = None) -> None:
278338

279339
self.cache = data.get('cache', [])
280340
self.similarity_threshold = data.get('similarity_threshold', self.similarity_threshold)
341+
342+
# Always restore persisted TTL state so that loading
343+
# a no-TTL cache into a TTL-enabled instance correctly
344+
# clears the TTL rather than keeping the stale value.
345+
saved_max_age = data.get('max_age_seconds')
346+
self.max_age_seconds = float(saved_max_age) if saved_max_age is not None else None
281347

282348
logger.info(f"Semantic cache loaded from JSON: {filepath} ({len(self.cache)} entries)")
349+
350+
# Evict entries that became stale while the process was down
351+
self._evict_expired()
283352

284353
except Exception:
285354
logger.exception("Error loading cache from JSON")
@@ -288,14 +357,26 @@ def load_cache_json(self, filepath: Optional[str] = None) -> None:
288357
logger.debug(f"No JSON cache file found at {filepath}")
289358

290359
def get_cache_stats(self) -> Dict[str, Any]:
291-
"""Get cache statistics"""
292-
return {
360+
"""Get cache statistics including TTL information."""
361+
try:
362+
cache_size_bytes = self.cache_path.stat().st_size
363+
except OSError:
364+
cache_size_bytes = 0
365+
366+
stats: Dict[str, Any] = {
293367
'total_entries': len(self.cache),
294368
'similarity_threshold': self.similarity_threshold,
295369
'cache_file': str(self.cache_path),
296370
'cache_exists': self.cache_path.exists(),
297-
'cache_size_bytes': self.cache_path.stat().st_size if self.cache_path.exists() else 0
371+
'cache_size_bytes': cache_size_bytes,
298372
}
373+
if self.max_age_seconds is not None:
374+
now = time.time()
375+
stats['max_age_hours'] = self.max_age_seconds / 3600.0
376+
# Count how many entries are currently expired
377+
expired = sum(1 for e in self.cache if self._is_entry_expired(e, now=now))
378+
stats['expired_entries'] = expired
379+
return stats
299380

300381
def explain_query(self, query: str) -> Dict[str, Any]:
301382
"""
@@ -345,8 +426,11 @@ def explain_query(self, query: str) -> Dict[str, Any]:
345426
best_match = None
346427
best_similarity = 0.0
347428

348-
# Analyze cache entries safely
429+
# Analyze cache entries safely (skip expired)
430+
now = time.time()
349431
for i, entry in enumerate(self.cache):
432+
if self._is_entry_expired(entry, now=now):
433+
continue
350434
try:
351435
similarity = self.cosine_similarity(query_vec, entry.get('vector', []))
352436
except Exception as e:
@@ -389,11 +473,49 @@ def explain_query(self, query: str) -> Dict[str, Any]:
389473
'top_matches': cache_analysis[:5] # Top 5 similar cached queries
390474
}
391475

392-
def set_cache_expiration(self, max_age_hours: int = 24) -> None:
393-
"""Remove cache entries older than specified hours (future enhancement)"""
394-
# This would require adding timestamps to cache entries
395-
# For now, just a placeholder for TTL functionality
396-
logger.info(f"Cache expiration set to {max_age_hours} hours (not yet implemented)")
476+
def set_cache_expiration(self, max_age_hours: Optional[float] = 24) -> int:
477+
"""Set or disable TTL and immediately evict stale cache entries.
478+
479+
After calling this method every subsequent :meth:`get` call will
480+
transparently skip entries that have exceeded the TTL, and every
481+
:meth:`save_cache` / :meth:`save_cache_json` call will persist the
482+
TTL setting so it survives restarts.
483+
484+
Pass ``None`` to disable TTL entirely (all entries are kept
485+
regardless of age).
486+
487+
Args:
488+
max_age_hours: Maximum age of a cache entry in hours.
489+
Must be a positive finite number, or ``None`` to disable TTL.
490+
Booleans, strings, NaN, and infinite values are rejected.
491+
492+
Returns:
493+
Number of expired entries that were evicted (always 0 when
494+
disabling TTL).
495+
496+
Raises:
497+
ValueError: If *max_age_hours* is not a positive finite number
498+
(and is not ``None``), or is a non-numeric type.
499+
"""
500+
if max_age_hours is None:
501+
self.max_age_seconds = None
502+
logger.info("Cache expiration disabled")
503+
return 0
504+
505+
if not isinstance(max_age_hours, (int, float)) or isinstance(max_age_hours, bool):
506+
raise ValueError("max_age_hours must be a positive finite number or None")
507+
508+
if not math.isfinite(max_age_hours) or max_age_hours <= 0:
509+
raise ValueError("max_age_hours must be a positive finite number")
510+
511+
self.max_age_seconds = max_age_hours * 3600.0
512+
evicted = self._evict_expired()
513+
logger.info(
514+
"Cache expiration set to %.2f hours – evicted %d stale entries",
515+
max_age_hours,
516+
evicted,
517+
)
518+
return evicted
397519

398520
def optimize_cache(self, max_entries: int = 1000) -> None:
399521
"""Remove oldest entries if cache exceeds max size"""

0 commit comments

Comments
 (0)