Skip to content

Commit 7f19716

Browse files
authored
Merge pull request #56 from sshekhar563/feature/explain-query-plan
Add EXPLAIN query plan command with AI optimization insights
2 parents 0179452 + 763493e commit 7f19716

6 files changed

Lines changed: 512 additions & 4 deletions

File tree

nexum_ai/optimizer.py

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,54 @@ def get_cache_stats(self) -> Dict[str, Any]:
243243
'cache_size_bytes': self.cache_path.stat().st_size if self.cache_path.exists() else 0
244244
}
245245

246+
def explain_query(self, query: str) -> Dict[str, Any]:
247+
"""
248+
Analyze query without executing - returns cache similarity scores
249+
and potential cache hits for EXPLAIN command
250+
"""
251+
query_vec = self.vectorize(query)
252+
253+
cache_analysis = []
254+
best_match = None
255+
best_similarity = 0.0
256+
257+
for entry in self.cache:
258+
similarity = self.cosine_similarity(query_vec, entry['vector'])
259+
# Smart truncation for cached query display
260+
cached_query = entry['query']
261+
if len(cached_query) > 50:
262+
display_query = cached_query[:50] + '...'
263+
else:
264+
display_query = cached_query
265+
266+
cache_analysis.append({
267+
'cached_query': display_query,
268+
'similarity': round(similarity, 4),
269+
'would_hit': similarity >= self.similarity_threshold
270+
})
271+
if similarity > best_similarity:
272+
best_similarity = similarity
273+
best_match = entry['query']
274+
275+
# Sort by similarity descending
276+
cache_analysis.sort(key=lambda x: x['similarity'], reverse=True)
277+
278+
# Smart truncation for best match
279+
if best_match and len(best_match) > 50:
280+
best_match_display = best_match[:50] + '...'
281+
else:
282+
best_match_display = best_match
283+
284+
return {
285+
'query': query,
286+
'cache_entries_checked': len(self.cache),
287+
'similarity_threshold': self.similarity_threshold,
288+
'best_match': best_match_display,
289+
'best_similarity': round(best_similarity, 4),
290+
'would_hit_cache': best_similarity >= self.similarity_threshold,
291+
'top_matches': cache_analysis[:5] # Top 5 similar cached queries
292+
}
293+
246294
def set_cache_expiration(self, max_age_hours: int = 24) -> None:
247295
"""Remove cache entries older than specified hours (future enhancement)"""
248296
# This would require adding timestamps to cache entries
@@ -310,6 +358,49 @@ def feed_metrics(self, query: str, latency_ms: float) -> None:
310358
next_state = "completed"
311359

312360
self.update(state, action, reward, next_state)
361+
362+
def explain_action(self, query: str, available_actions: List[str]) -> Dict[str, Any]:
363+
"""
364+
Explain what action would be taken without executing.
365+
366+
Returns Q-values and predicted action for EXPLAIN command.
367+
This method provides a read-only analysis of the optimizer's decision-making
368+
process without actually executing any action or updating the Q-table.
369+
370+
Args:
371+
query: SQL query string
372+
available_actions: List of possible actions
373+
374+
Returns:
375+
Dict containing:
376+
- state: state key string
377+
- q_values: Q-values for all actions
378+
- best_action: action with highest Q-value
379+
- epsilon: current exploration rate
380+
- would_explore: whether exploration is possible
381+
- explanation: human-readable explanation of optimizer behavior
382+
"""
383+
state = f"query_type_{len(query) // 10}"
384+
385+
q_values = {}
386+
if state in self.q_table:
387+
q_values = {a: round(v, 4) for a, v in self.q_table[state].items()}
388+
else:
389+
q_values = {a: 0.0 for a in available_actions}
390+
391+
best_action = max(available_actions, key=lambda a: q_values.get(a, 0.0))
392+
393+
# Defensive truncation for display (limit to 20 chars)
394+
best_action_display = best_action[:20] if len(best_action) > 20 else best_action
395+
396+
return {
397+
'state': state,
398+
'q_values': q_values,
399+
'best_action': best_action_display,
400+
'epsilon': self.epsilon,
401+
'would_explore': self.epsilon > 0,
402+
'explanation': f'With ε={self.epsilon:.4f}, agent would explore {self.epsilon*100:.1f}% of the time'
403+
}
313404

314405

315406
def test_vectorization() -> Dict[str, Any]:
@@ -324,6 +415,165 @@ def test_vectorization() -> Dict[str, Any]:
324415
}
325416

326417

418+
def explain_query_plan(query: str, cache: Optional[SemanticCache] = None,
419+
optimizer: Optional[QueryOptimizer] = None) -> Dict[str, Any]:
420+
"""
421+
Generate a complete EXPLAIN plan for a query
422+
Shows parsing, cache analysis, and RL agent predictions
423+
"""
424+
result = {
425+
'query': query,
426+
'query_length': len(query),
427+
'parsing': {},
428+
'cache_analysis': {},
429+
'rl_agent': {},
430+
'execution_strategy': {}
431+
}
432+
433+
# 1. Query Parsing Analysis
434+
query_upper = query.upper().strip()
435+
if query_upper.startswith('SELECT'):
436+
query_type = 'SELECT'
437+
elif query_upper.startswith('INSERT'):
438+
query_type = 'INSERT'
439+
elif query_upper.startswith('UPDATE'):
440+
query_type = 'UPDATE'
441+
elif query_upper.startswith('DELETE'):
442+
query_type = 'DELETE'
443+
elif query_upper.startswith('CREATE'):
444+
query_type = 'CREATE'
445+
else:
446+
query_type = 'UNKNOWN'
447+
448+
result['parsing'] = {
449+
'query_type': query_type,
450+
'query_length': len(query),
451+
'complexity_estimate': min(len(query) // 20, 10),
452+
'has_where_clause': 'WHERE' in query_upper,
453+
'has_join': 'JOIN' in query_upper,
454+
'has_aggregation': any(agg in query_upper for agg in ['COUNT', 'SUM', 'AVG', 'MAX', 'MIN']),
455+
'has_order_by': 'ORDER BY' in query_upper,
456+
'has_group_by': 'GROUP BY' in query_upper
457+
}
458+
459+
# 2. Cache Analysis
460+
if cache is None:
461+
cache = SemanticCache()
462+
result['cache_analysis'] = cache.explain_query(query)
463+
464+
# 3. RL Agent Analysis
465+
if optimizer is None:
466+
optimizer = QueryOptimizer()
467+
468+
available_actions = ['use_cache', 'bypass_cache', 'full_scan', 'index_scan']
469+
result['rl_agent'] = optimizer.explain_action(query, available_actions)
470+
471+
# 4. Execution Strategy
472+
would_hit_cache = result['cache_analysis'].get('would_hit_cache', False)
473+
best_action = result['rl_agent'].get('best_action', 'full_scan')
474+
475+
if would_hit_cache:
476+
strategy = 'CACHE_HIT'
477+
estimated_latency = '< 1ms'
478+
elif best_action == 'use_cache':
479+
strategy = 'CACHE_MISS_THEN_STORE'
480+
estimated_latency = '5-50ms'
481+
elif best_action == 'index_scan':
482+
strategy = 'INDEX_SCAN'
483+
estimated_latency = '1-10ms'
484+
else:
485+
strategy = 'FULL_SCAN'
486+
estimated_latency = '10-100ms'
487+
488+
result['execution_strategy'] = {
489+
'strategy': strategy,
490+
'estimated_latency': estimated_latency,
491+
'will_cache_result': query_type == 'SELECT' and not would_hit_cache,
492+
'recommendation': 'Use cached result' if would_hit_cache else 'Execute and cache'
493+
}
494+
495+
return result
496+
497+
498+
def format_explain_output(explain_result: Dict[str, Any]) -> str:
499+
"""Format EXPLAIN result as a readable table with defensive field width limits"""
500+
501+
def truncate(value: Any, max_len: int) -> str:
502+
"""Truncate value to max length for box alignment"""
503+
s = str(value)
504+
if len(s) > max_len:
505+
return s[:max_len - 3] + "..."
506+
return s
507+
508+
lines = []
509+
lines.append("=" * 70)
510+
lines.append("QUERY EXECUTION PLAN")
511+
lines.append("=" * 70)
512+
513+
# Smart query truncation
514+
query = explain_result['query']
515+
display_query = truncate(query, 60)
516+
517+
lines.append(f"Query: {display_query}")
518+
lines.append("")
519+
520+
# Parsing section
521+
lines.append("┌─ PARSING ─────────────────────────────────────────────────────────┐")
522+
p = explain_result['parsing']
523+
query_type = truncate(p['query_type'], 15)
524+
lines.append(f"│ Type: {query_type:<15} Complexity: {p['complexity_estimate']}/10 │")
525+
lines.append(f"│ WHERE: {str(p['has_where_clause']):<8} JOIN: {str(p['has_join']):<8} AGG: {str(p['has_aggregation']):<8} │")
526+
lines.append("└───────────────────────────────────────────────────────────────────┘")
527+
lines.append("")
528+
529+
# Cache section
530+
lines.append("┌─ CACHE LOOKUP ────────────────────────────────────────────────────┐")
531+
c = explain_result['cache_analysis']
532+
# Defensive limits: cache_entries_checked capped at 99999 for display
533+
entries_checked = min(c['cache_entries_checked'], 99999)
534+
lines.append(f"│ Entries checked: {entries_checked:<5} Threshold: {c['similarity_threshold']:<6} │")
535+
lines.append(f"│ Best similarity: {c['best_similarity']:<6} Would hit: {str(c['would_hit_cache']):<6} │")
536+
if c['top_matches']:
537+
lines.append("│ Top matches: │")
538+
for match in c['top_matches'][:3]:
539+
sim = match['similarity']
540+
hit = "✓" if match['would_hit'] else "✗"
541+
# Smart truncation for cached queries (limit to 45 chars)
542+
cached_query = truncate(match['cached_query'], 45)
543+
lines.append(f"│ {hit} {sim:.4f} - {cached_query:<45} │")
544+
lines.append("└───────────────────────────────────────────────────────────────────┘")
545+
lines.append("")
546+
547+
# RL Agent section
548+
lines.append("┌─ RL AGENT ────────────────────────────────────────────────────────┐")
549+
r = explain_result['rl_agent']
550+
# Defensive truncation for state (30 chars) and best_action (20 chars)
551+
state_display = truncate(r['state'], 30)
552+
best_action_display = truncate(r['best_action'], 20)
553+
lines.append(f"│ State: {state_display:<30} Epsilon: {r.get('epsilon', 0):<6} │")
554+
lines.append(f"│ Best action: {best_action_display:<20} │")
555+
lines.append("│ Q-values: │")
556+
for action, qval in r['q_values'].items():
557+
# Truncate action names to 15 chars for alignment
558+
action_display = truncate(action, 15)
559+
lines.append(f"│ {action_display:<15}: {qval:>8.4f} │")
560+
lines.append("└───────────────────────────────────────────────────────────────────┘")
561+
lines.append("")
562+
563+
# Execution strategy
564+
lines.append("┌─ EXECUTION STRATEGY ──────────────────────────────────────────────┐")
565+
e = explain_result['execution_strategy']
566+
# Defensive truncation for strategy (20 chars)
567+
strategy_display = truncate(e['strategy'], 20)
568+
recommendation_display = truncate(e['recommendation'], 40)
569+
lines.append(f"│ Strategy: {strategy_display:<20} Est. latency: {e['estimated_latency']:<10} │")
570+
lines.append(f"│ Will cache: {str(e['will_cache_result']):<8} │")
571+
lines.append(f"│ Recommendation: {recommendation_display:<40} │")
572+
lines.append("└───────────────────────────────────────────────────────────────────┘")
573+
574+
return "\n".join(lines)
575+
576+
327577
def test_cache_persistence() -> Dict[str, Any]:
328578
"""Test semantic cache persistence functionality"""
329579
print("\n" + "="*60)
@@ -395,3 +645,18 @@ def test_cache_persistence() -> Dict[str, Any]:
395645
print("\nRunning persistence test...")
396646
persistence_result = test_cache_persistence()
397647
print(f"\nPersistence test result: {persistence_result}")
648+
649+
# Test EXPLAIN functionality
650+
print("\n" + "="*70)
651+
print("Testing EXPLAIN Query Plan")
652+
print("="*70)
653+
654+
# Add some test data to cache first
655+
cache = SemanticCache()
656+
cache.put("SELECT * FROM users WHERE age > 25", "User data result")
657+
cache.put("SELECT name FROM products WHERE price < 100", "Product names")
658+
659+
# Test explain
660+
test_query = "SELECT * FROM users WHERE age > 30"
661+
explain_result = explain_query_plan(test_query, cache)
662+
print(format_explain_output(explain_result))

nexum_ai/rl_agent.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"""
55

66
import numpy as np
7-
from typing import Dict, Optional
7+
from typing import Dict, Optional, Any
88

99

1010
class QLearningAgent:
@@ -173,6 +173,65 @@ def get_stats(self) -> Dict[str, float]:
173173
'avg_reward': np.mean([h['reward'] for h in self.training_history[-100:]]) if self.training_history else 0.0
174174
}
175175

176+
def explain_action(self, query_length: int, cache_hit: bool, complexity: int) -> Dict[str, Any]:
177+
"""
178+
Explain what action would be taken without executing.
179+
180+
Returns Q-values, state analysis, and predicted action for EXPLAIN command.
181+
This method provides a read-only analysis of the RL agent's decision-making
182+
process without actually executing any action or updating the Q-table.
183+
184+
Args:
185+
query_length: Length of SQL query
186+
cache_hit: Whether query hit cache
187+
complexity: Complexity score (0-10)
188+
189+
Returns:
190+
Dict containing:
191+
- state: state key string
192+
- state_breakdown: dict with query_length_bucket, cache_hit, complexity
193+
- q_values: Q-values for all actions
194+
- best_action: action with highest Q-value
195+
- epsilon: current exploration rate
196+
- would_explore: whether exploration is possible
197+
- predicted_action: deterministic best action (ignores epsilon-greedy)
198+
- explanation: human-readable explanation of agent behavior
199+
- agent_stats: total_states_learned, total_updates, episodes
200+
"""
201+
state = self._get_state_key(query_length, cache_hit, complexity)
202+
203+
# Get Q-values for this state
204+
if state in self.q_table:
205+
q_values = {a: round(v, 4) for a, v in self.q_table[state].items()}
206+
else:
207+
q_values = {a: 0.0 for a in self.actions}
208+
209+
# Determine best action
210+
best_action = max(self.actions, key=lambda a: q_values.get(a, 0.0))
211+
212+
# Truncate best_action for display if needed (defensive limit)
213+
best_action_display = best_action[:20] if len(best_action) > 20 else best_action
214+
215+
return {
216+
'state': state,
217+
'state_breakdown': {
218+
'query_length_bucket': min(query_length // 10, 10),
219+
'cache_hit': cache_hit,
220+
'complexity': complexity
221+
},
222+
'q_values': q_values,
223+
'best_action': best_action_display,
224+
'epsilon': round(self.epsilon, 4),
225+
'would_explore': self.epsilon > 0,
226+
'predicted_action': best_action_display, # Deterministic for explain
227+
'explanation': f'With ε={self.epsilon:.4f}, agent would explore {self.epsilon*100:.1f}% of the time',
228+
'agent_stats': {
229+
'total_states_learned': len(self.q_table),
230+
'total_updates': len(self.training_history),
231+
'episodes': self.episode_count
232+
}
233+
}
234+
176235
def save_state(self, filepath: Optional[str] = None) -> None:
177236
"""Save Q-table and agent state to file using joblib"""
178237
try:

0 commit comments

Comments
 (0)