@@ -243,6 +243,54 @@ def get_cache_stats(self) -> Dict[str, Any]:
243243 'cache_size_bytes' : self .cache_path .stat ().st_size if self .cache_path .exists () else 0
244244 }
245245
246+ def explain_query (self , query : str ) -> Dict [str , Any ]:
247+ """
248+ Analyze query without executing - returns cache similarity scores
249+ and potential cache hits for EXPLAIN command
250+ """
251+ query_vec = self .vectorize (query )
252+
253+ cache_analysis = []
254+ best_match = None
255+ best_similarity = 0.0
256+
257+ for entry in self .cache :
258+ similarity = self .cosine_similarity (query_vec , entry ['vector' ])
259+ # Smart truncation for cached query display
260+ cached_query = entry ['query' ]
261+ if len (cached_query ) > 50 :
262+ display_query = cached_query [:50 ] + '...'
263+ else :
264+ display_query = cached_query
265+
266+ cache_analysis .append ({
267+ 'cached_query' : display_query ,
268+ 'similarity' : round (similarity , 4 ),
269+ 'would_hit' : similarity >= self .similarity_threshold
270+ })
271+ if similarity > best_similarity :
272+ best_similarity = similarity
273+ best_match = entry ['query' ]
274+
275+ # Sort by similarity descending
276+ cache_analysis .sort (key = lambda x : x ['similarity' ], reverse = True )
277+
278+ # Smart truncation for best match
279+ if best_match and len (best_match ) > 50 :
280+ best_match_display = best_match [:50 ] + '...'
281+ else :
282+ best_match_display = best_match
283+
284+ return {
285+ 'query' : query ,
286+ 'cache_entries_checked' : len (self .cache ),
287+ 'similarity_threshold' : self .similarity_threshold ,
288+ 'best_match' : best_match_display ,
289+ 'best_similarity' : round (best_similarity , 4 ),
290+ 'would_hit_cache' : best_similarity >= self .similarity_threshold ,
291+ 'top_matches' : cache_analysis [:5 ] # Top 5 similar cached queries
292+ }
293+
246294 def set_cache_expiration (self , max_age_hours : int = 24 ) -> None :
247295 """Remove cache entries older than specified hours (future enhancement)"""
248296 # This would require adding timestamps to cache entries
@@ -310,6 +358,49 @@ def feed_metrics(self, query: str, latency_ms: float) -> None:
310358 next_state = "completed"
311359
312360 self .update (state , action , reward , next_state )
361+
362+ def explain_action (self , query : str , available_actions : List [str ]) -> Dict [str , Any ]:
363+ """
364+ Explain what action would be taken without executing.
365+
366+ Returns Q-values and predicted action for EXPLAIN command.
367+ This method provides a read-only analysis of the optimizer's decision-making
368+ process without actually executing any action or updating the Q-table.
369+
370+ Args:
371+ query: SQL query string
372+ available_actions: List of possible actions
373+
374+ Returns:
375+ Dict containing:
376+ - state: state key string
377+ - q_values: Q-values for all actions
378+ - best_action: action with highest Q-value
379+ - epsilon: current exploration rate
380+ - would_explore: whether exploration is possible
381+ - explanation: human-readable explanation of optimizer behavior
382+ """
383+ state = f"query_type_{ len (query ) // 10 } "
384+
385+ q_values = {}
386+ if state in self .q_table :
387+ q_values = {a : round (v , 4 ) for a , v in self .q_table [state ].items ()}
388+ else :
389+ q_values = {a : 0.0 for a in available_actions }
390+
391+ best_action = max (available_actions , key = lambda a : q_values .get (a , 0.0 ))
392+
393+ # Defensive truncation for display (limit to 20 chars)
394+ best_action_display = best_action [:20 ] if len (best_action ) > 20 else best_action
395+
396+ return {
397+ 'state' : state ,
398+ 'q_values' : q_values ,
399+ 'best_action' : best_action_display ,
400+ 'epsilon' : self .epsilon ,
401+ 'would_explore' : self .epsilon > 0 ,
402+ 'explanation' : f'With ε={ self .epsilon :.4f} , agent would explore { self .epsilon * 100 :.1f} % of the time'
403+ }
313404
314405
315406def test_vectorization () -> Dict [str , Any ]:
@@ -324,6 +415,165 @@ def test_vectorization() -> Dict[str, Any]:
324415 }
325416
326417
418+ def explain_query_plan (query : str , cache : Optional [SemanticCache ] = None ,
419+ optimizer : Optional [QueryOptimizer ] = None ) -> Dict [str , Any ]:
420+ """
421+ Generate a complete EXPLAIN plan for a query
422+ Shows parsing, cache analysis, and RL agent predictions
423+ """
424+ result = {
425+ 'query' : query ,
426+ 'query_length' : len (query ),
427+ 'parsing' : {},
428+ 'cache_analysis' : {},
429+ 'rl_agent' : {},
430+ 'execution_strategy' : {}
431+ }
432+
433+ # 1. Query Parsing Analysis
434+ query_upper = query .upper ().strip ()
435+ if query_upper .startswith ('SELECT' ):
436+ query_type = 'SELECT'
437+ elif query_upper .startswith ('INSERT' ):
438+ query_type = 'INSERT'
439+ elif query_upper .startswith ('UPDATE' ):
440+ query_type = 'UPDATE'
441+ elif query_upper .startswith ('DELETE' ):
442+ query_type = 'DELETE'
443+ elif query_upper .startswith ('CREATE' ):
444+ query_type = 'CREATE'
445+ else :
446+ query_type = 'UNKNOWN'
447+
448+ result ['parsing' ] = {
449+ 'query_type' : query_type ,
450+ 'query_length' : len (query ),
451+ 'complexity_estimate' : min (len (query ) // 20 , 10 ),
452+ 'has_where_clause' : 'WHERE' in query_upper ,
453+ 'has_join' : 'JOIN' in query_upper ,
454+ 'has_aggregation' : any (agg in query_upper for agg in ['COUNT' , 'SUM' , 'AVG' , 'MAX' , 'MIN' ]),
455+ 'has_order_by' : 'ORDER BY' in query_upper ,
456+ 'has_group_by' : 'GROUP BY' in query_upper
457+ }
458+
459+ # 2. Cache Analysis
460+ if cache is None :
461+ cache = SemanticCache ()
462+ result ['cache_analysis' ] = cache .explain_query (query )
463+
464+ # 3. RL Agent Analysis
465+ if optimizer is None :
466+ optimizer = QueryOptimizer ()
467+
468+ available_actions = ['use_cache' , 'bypass_cache' , 'full_scan' , 'index_scan' ]
469+ result ['rl_agent' ] = optimizer .explain_action (query , available_actions )
470+
471+ # 4. Execution Strategy
472+ would_hit_cache = result ['cache_analysis' ].get ('would_hit_cache' , False )
473+ best_action = result ['rl_agent' ].get ('best_action' , 'full_scan' )
474+
475+ if would_hit_cache :
476+ strategy = 'CACHE_HIT'
477+ estimated_latency = '< 1ms'
478+ elif best_action == 'use_cache' :
479+ strategy = 'CACHE_MISS_THEN_STORE'
480+ estimated_latency = '5-50ms'
481+ elif best_action == 'index_scan' :
482+ strategy = 'INDEX_SCAN'
483+ estimated_latency = '1-10ms'
484+ else :
485+ strategy = 'FULL_SCAN'
486+ estimated_latency = '10-100ms'
487+
488+ result ['execution_strategy' ] = {
489+ 'strategy' : strategy ,
490+ 'estimated_latency' : estimated_latency ,
491+ 'will_cache_result' : query_type == 'SELECT' and not would_hit_cache ,
492+ 'recommendation' : 'Use cached result' if would_hit_cache else 'Execute and cache'
493+ }
494+
495+ return result
496+
497+
498+ def format_explain_output (explain_result : Dict [str , Any ]) -> str :
499+ """Format EXPLAIN result as a readable table with defensive field width limits"""
500+
501+ def truncate (value : Any , max_len : int ) -> str :
502+ """Truncate value to max length for box alignment"""
503+ s = str (value )
504+ if len (s ) > max_len :
505+ return s [:max_len - 3 ] + "..."
506+ return s
507+
508+ lines = []
509+ lines .append ("=" * 70 )
510+ lines .append ("QUERY EXECUTION PLAN" )
511+ lines .append ("=" * 70 )
512+
513+ # Smart query truncation
514+ query = explain_result ['query' ]
515+ display_query = truncate (query , 60 )
516+
517+ lines .append (f"Query: { display_query } " )
518+ lines .append ("" )
519+
520+ # Parsing section
521+ lines .append ("┌─ PARSING ─────────────────────────────────────────────────────────┐" )
522+ p = explain_result ['parsing' ]
523+ query_type = truncate (p ['query_type' ], 15 )
524+ lines .append (f"│ Type: { query_type :<15} Complexity: { p ['complexity_estimate' ]} /10 │" )
525+ lines .append (f"│ WHERE: { str (p ['has_where_clause' ]):<8} JOIN: { str (p ['has_join' ]):<8} AGG: { str (p ['has_aggregation' ]):<8} │" )
526+ lines .append ("└───────────────────────────────────────────────────────────────────┘" )
527+ lines .append ("" )
528+
529+ # Cache section
530+ lines .append ("┌─ CACHE LOOKUP ────────────────────────────────────────────────────┐" )
531+ c = explain_result ['cache_analysis' ]
532+ # Defensive limits: cache_entries_checked capped at 99999 for display
533+ entries_checked = min (c ['cache_entries_checked' ], 99999 )
534+ lines .append (f"│ Entries checked: { entries_checked :<5} Threshold: { c ['similarity_threshold' ]:<6} │" )
535+ lines .append (f"│ Best similarity: { c ['best_similarity' ]:<6} Would hit: { str (c ['would_hit_cache' ]):<6} │" )
536+ if c ['top_matches' ]:
537+ lines .append ("│ Top matches: │" )
538+ for match in c ['top_matches' ][:3 ]:
539+ sim = match ['similarity' ]
540+ hit = "✓" if match ['would_hit' ] else "✗"
541+ # Smart truncation for cached queries (limit to 45 chars)
542+ cached_query = truncate (match ['cached_query' ], 45 )
543+ lines .append (f"│ { hit } { sim :.4f} - { cached_query :<45} │" )
544+ lines .append ("└───────────────────────────────────────────────────────────────────┘" )
545+ lines .append ("" )
546+
547+ # RL Agent section
548+ lines .append ("┌─ RL AGENT ────────────────────────────────────────────────────────┐" )
549+ r = explain_result ['rl_agent' ]
550+ # Defensive truncation for state (30 chars) and best_action (20 chars)
551+ state_display = truncate (r ['state' ], 30 )
552+ best_action_display = truncate (r ['best_action' ], 20 )
553+ lines .append (f"│ State: { state_display :<30} Epsilon: { r .get ('epsilon' , 0 ):<6} │" )
554+ lines .append (f"│ Best action: { best_action_display :<20} │" )
555+ lines .append ("│ Q-values: │" )
556+ for action , qval in r ['q_values' ].items ():
557+ # Truncate action names to 15 chars for alignment
558+ action_display = truncate (action , 15 )
559+ lines .append (f"│ { action_display :<15} : { qval :>8.4f} │" )
560+ lines .append ("└───────────────────────────────────────────────────────────────────┘" )
561+ lines .append ("" )
562+
563+ # Execution strategy
564+ lines .append ("┌─ EXECUTION STRATEGY ──────────────────────────────────────────────┐" )
565+ e = explain_result ['execution_strategy' ]
566+ # Defensive truncation for strategy (20 chars)
567+ strategy_display = truncate (e ['strategy' ], 20 )
568+ recommendation_display = truncate (e ['recommendation' ], 40 )
569+ lines .append (f"│ Strategy: { strategy_display :<20} Est. latency: { e ['estimated_latency' ]:<10} │" )
570+ lines .append (f"│ Will cache: { str (e ['will_cache_result' ]):<8} │" )
571+ lines .append (f"│ Recommendation: { recommendation_display :<40} │" )
572+ lines .append ("└───────────────────────────────────────────────────────────────────┘" )
573+
574+ return "\n " .join (lines )
575+
576+
327577def test_cache_persistence () -> Dict [str , Any ]:
328578 """Test semantic cache persistence functionality"""
329579 print ("\n " + "=" * 60 )
@@ -395,3 +645,18 @@ def test_cache_persistence() -> Dict[str, Any]:
395645 print ("\n Running persistence test..." )
396646 persistence_result = test_cache_persistence ()
397647 print (f"\n Persistence test result: { persistence_result } " )
648+
649+ # Test EXPLAIN functionality
650+ print ("\n " + "=" * 70 )
651+ print ("Testing EXPLAIN Query Plan" )
652+ print ("=" * 70 )
653+
654+ # Add some test data to cache first
655+ cache = SemanticCache ()
656+ cache .put ("SELECT * FROM users WHERE age > 25" , "User data result" )
657+ cache .put ("SELECT name FROM products WHERE price < 100" , "Product names" )
658+
659+ # Test explain
660+ test_query = "SELECT * FROM users WHERE age > 30"
661+ explain_result = explain_query_plan (test_query , cache )
662+ print (format_explain_output (explain_result ))
0 commit comments