22Semantic cache and query optimizer using local embedding models
33"""
44import logging
5+ import math
56import numpy as np
7+ import time
68from typing import Optional , List , Dict , Any
79import json
810import os
@@ -61,6 +63,7 @@ def __init__(self, similarity_threshold: float = 0.95, cache_file: str = "semant
6163 self .cache : List [Dict ] = []
6264 self .similarity_threshold = similarity_threshold
6365 self .model = None
66+ self .max_age_seconds : Optional [float ] = None # None = no TTL
6467
6568 # Support environment variable for cache file path
6669 cache_file_env = os .environ .get ('NEXUMDB_CACHE_FILE' , cache_file )
@@ -115,25 +118,81 @@ def cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
115118
116119 return float (dot_product / (norm1 * norm2 ))
117120
121+ def _is_entry_expired (self , entry : Dict , now : Optional [float ] = None ) -> bool :
122+ """Check if a cache entry has exceeded its TTL.
123+
124+ Args:
125+ entry: Cache entry dict, expected to contain a 'timestamp' key.
126+ now: Current time as a Unix timestamp. If *None*, ``time.time()``
127+ is called. Callers iterating over many entries should snapshot
128+ the current time once and pass it in to avoid redundant
129+ syscalls and subtle inconsistencies.
130+
131+ Returns:
132+ True if the entry is expired, False otherwise.
133+ Entries without a timestamp are never considered expired.
134+ """
135+ if self .max_age_seconds is None :
136+ return False
137+ timestamp = entry .get ('timestamp' )
138+ if timestamp is None :
139+ # Legacy entries without a timestamp are kept (not expired)
140+ return False
141+ if now is None :
142+ now = time .time ()
143+ return (now - timestamp ) > self .max_age_seconds
144+
145+ def _evict_expired (self ) -> int :
146+ """Remove all expired cache entries.
147+
148+ Returns:
149+ Number of entries removed.
150+ """
151+ if self .max_age_seconds is None :
152+ return 0
153+ now = time .time ()
154+ before = len (self .cache )
155+ self .cache = [e for e in self .cache if not self ._is_entry_expired (e , now = now )]
156+ removed = before - len (self .cache )
157+ if removed > 0 :
158+ logger .info (f"Evicted { removed } expired cache entries" )
159+ return removed
160+
118161 def get (self , query : str ) -> Optional [str ]:
119- """Retrieve cached result if similar query exists"""
162+ """Retrieve cached result if similar query exists.
163+
164+ Expired entries (based on TTL) are skipped during lookup.
165+ When expired entries are detected they are opportunistically
166+ evicted so future lookups don't degrade over time.
167+ """
120168 query_vec = self .vectorize (query )
121-
169+ now = time .time ()
170+ found_expired = False
171+
122172 for entry in self .cache :
173+ # Skip expired entries and flag for cleanup
174+ if self ._is_entry_expired (entry , now = now ):
175+ found_expired = True
176+ continue
123177 similarity = self .cosine_similarity (query_vec , entry ['vector' ])
124178 if similarity >= self .similarity_threshold :
125179 logger .info (f"Cache hit! Similarity: { similarity :.4f} " )
180+ if found_expired :
181+ self ._evict_expired ()
126182 return entry ['result' ]
127-
183+
184+ if found_expired :
185+ self ._evict_expired ()
128186 return None
129187
130188 def put (self , query : str , result : str ) -> None :
131- """Store query and result in cache"""
189+ """Store query and result in cache with a creation timestamp. """
132190 query_vec = self .vectorize (query )
133191 self .cache .append ({
134192 'query' : query ,
135193 'vector' : query_vec ,
136- 'result' : result
194+ 'result' : result ,
195+ 'timestamp' : time .time ()
137196 })
138197 logger .info (f"Cached query: { query [:50 ]} ..." )
139198
@@ -248,7 +307,8 @@ def save_cache_json(self, filepath: Optional[str] = None) -> None:
248307 'cache' : self .cache ,
249308 'similarity_threshold' : self .similarity_threshold ,
250309 'cache_size' : len (self .cache ),
251- 'format_version' : '1.0'
310+ 'format_version' : '1.1' ,
311+ 'max_age_seconds' : self .max_age_seconds ,
252312 }
253313
254314 with open (filepath , 'w' ) as f :
@@ -278,8 +338,17 @@ def load_cache_json(self, filepath: Optional[str] = None) -> None:
278338
279339 self .cache = data .get ('cache' , [])
280340 self .similarity_threshold = data .get ('similarity_threshold' , self .similarity_threshold )
341+
342+ # Always restore persisted TTL state so that loading
343+ # a no-TTL cache into a TTL-enabled instance correctly
344+ # clears the TTL rather than keeping the stale value.
345+ saved_max_age = data .get ('max_age_seconds' )
346+ self .max_age_seconds = float (saved_max_age ) if saved_max_age is not None else None
281347
282348 logger .info (f"Semantic cache loaded from JSON: { filepath } ({ len (self .cache )} entries)" )
349+
350+ # Evict entries that became stale while the process was down
351+ self ._evict_expired ()
283352
284353 except Exception :
285354 logger .exception ("Error loading cache from JSON" )
@@ -288,14 +357,26 @@ def load_cache_json(self, filepath: Optional[str] = None) -> None:
288357 logger .debug (f"No JSON cache file found at { filepath } " )
289358
290359 def get_cache_stats (self ) -> Dict [str , Any ]:
291- """Get cache statistics"""
292- return {
360+ """Get cache statistics including TTL information."""
361+ try :
362+ cache_size_bytes = self .cache_path .stat ().st_size
363+ except OSError :
364+ cache_size_bytes = 0
365+
366+ stats : Dict [str , Any ] = {
293367 'total_entries' : len (self .cache ),
294368 'similarity_threshold' : self .similarity_threshold ,
295369 'cache_file' : str (self .cache_path ),
296370 'cache_exists' : self .cache_path .exists (),
297- 'cache_size_bytes' : self . cache_path . stat (). st_size if self . cache_path . exists () else 0
371+ 'cache_size_bytes' : cache_size_bytes ,
298372 }
373+ if self .max_age_seconds is not None :
374+ now = time .time ()
375+ stats ['max_age_hours' ] = self .max_age_seconds / 3600.0
376+ # Count how many entries are currently expired
377+ expired = sum (1 for e in self .cache if self ._is_entry_expired (e , now = now ))
378+ stats ['expired_entries' ] = expired
379+ return stats
299380
300381 def explain_query (self , query : str ) -> Dict [str , Any ]:
301382 """
@@ -345,8 +426,11 @@ def explain_query(self, query: str) -> Dict[str, Any]:
345426 best_match = None
346427 best_similarity = 0.0
347428
348- # Analyze cache entries safely
429+ # Analyze cache entries safely (skip expired)
430+ now = time .time ()
349431 for i , entry in enumerate (self .cache ):
432+ if self ._is_entry_expired (entry , now = now ):
433+ continue
350434 try :
351435 similarity = self .cosine_similarity (query_vec , entry .get ('vector' , []))
352436 except Exception as e :
@@ -389,11 +473,49 @@ def explain_query(self, query: str) -> Dict[str, Any]:
389473 'top_matches' : cache_analysis [:5 ] # Top 5 similar cached queries
390474 }
391475
392- def set_cache_expiration (self , max_age_hours : int = 24 ) -> None :
393- """Remove cache entries older than specified hours (future enhancement)"""
394- # This would require adding timestamps to cache entries
395- # For now, just a placeholder for TTL functionality
396- logger .info (f"Cache expiration set to { max_age_hours } hours (not yet implemented)" )
476+ def set_cache_expiration (self , max_age_hours : Optional [float ] = 24 ) -> int :
477+ """Set or disable TTL and immediately evict stale cache entries.
478+
479+ After calling this method every subsequent :meth:`get` call will
480+ transparently skip entries that have exceeded the TTL, and every
481+ :meth:`save_cache` / :meth:`save_cache_json` call will persist the
482+ TTL setting so it survives restarts.
483+
484+ Pass ``None`` to disable TTL entirely (all entries are kept
485+ regardless of age).
486+
487+ Args:
488+ max_age_hours: Maximum age of a cache entry in hours.
489+ Must be a positive finite number, or ``None`` to disable TTL.
490+ Booleans, strings, NaN, and infinite values are rejected.
491+
492+ Returns:
493+ Number of expired entries that were evicted (always 0 when
494+ disabling TTL).
495+
496+ Raises:
497+ ValueError: If *max_age_hours* is not a positive finite number
498+ (and is not ``None``), or is a non-numeric type.
499+ """
500+ if max_age_hours is None :
501+ self .max_age_seconds = None
502+ logger .info ("Cache expiration disabled" )
503+ return 0
504+
505+ if not isinstance (max_age_hours , (int , float )) or isinstance (max_age_hours , bool ):
506+ raise ValueError ("max_age_hours must be a positive finite number or None" )
507+
508+ if not math .isfinite (max_age_hours ) or max_age_hours <= 0 :
509+ raise ValueError ("max_age_hours must be a positive finite number" )
510+
511+ self .max_age_seconds = max_age_hours * 3600.0
512+ evicted = self ._evict_expired ()
513+ logger .info (
514+ "Cache expiration set to %.2f hours – evicted %d stale entries" ,
515+ max_age_hours ,
516+ evicted ,
517+ )
518+ return evicted
397519
398520 def optimize_cache (self , max_entries : int = 1000 ) -> None :
399521 """Remove oldest entries if cache exceeds max size"""
0 commit comments