Skip to content

Commit 679c126

Browse files
committed
Add knowledge Q&A bot for #ask-mitgcm channel
Add a knowledge handler that listens in #ask-mitgcm for natural language questions about MITgcm, ERA5, oceanography, and the codebase. Uses Claude (Sonnet) with a comprehensive system prompt derived from CLAUDE.md, plus WebSearch/WebFetch for live documentation lookups. Runs on the same bot instance as the simulation ops bot — no second token needed. Long answers auto-create threads to keep the channel clean. https://claude.ai/code/session_01WNamUYvvru6xmxpPLqqW4f
1 parent 1648687 commit 679c126

5 files changed

Lines changed: 242 additions & 2 deletions

File tree

docs/discord-setup.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ Create these channels in your Discord server:
4545
| `#alerts` | Failure alerts and critical warnings |
4646
| `#plots` | Surface field PNGs, convergence plots |
4747
| `#logs` | Verbose agent activity (optional) |
48+
| `#ask-mitgcm` | Knowledge Q&A — ask about MITgcm, ERA5, oceanography, or the codebase |
4849

4950
**Get your Guild (Server) ID:**
5051
- Enable Developer Mode in Discord (Settings → Advanced → Developer Mode)
@@ -176,6 +177,8 @@ journalctl -u spectre-agents -f
176177

177178
## Discord Commands Reference
178179

180+
### Slash commands (simulation ops)
181+
179182
| Command | Description |
180183
|---------|-------------|
181184
| `/run start` | Validate config, submit simulation, start monitoring |
@@ -194,6 +197,20 @@ journalctl -u spectre-agents -f
194197
| `/ensemble status` | Show ensemble convergence |
195198
| `/config [param]` | Show simulation configuration |
196199

200+
### Knowledge Q&A (`#ask-mitgcm`)
201+
202+
Just type a question in the `#ask-mitgcm` channel — no slash command needed.
203+
The bot answers using Claude with full context about:
204+
205+
- **MITgcm**: parameters, packages, Fortran source, debugging
206+
- **ERA5 / GLORYS**: variable definitions, units, accumulation conventions
207+
- **This simulation**: grid, forcing, namelists, workflows, known gotchas
208+
- **Oceanography**: North Atlantic circulation, air-sea fluxes, ensemble methods
209+
- **HPC / SLURM**: job scheduling, containers, parallel I/O
210+
211+
Long answers automatically create a thread to keep the channel clean.
212+
The bot can also search the web and read files in the repo for up-to-date answers.
213+
197214
## Agent Autonomy Levels
198215

199216
The system operates with **high autonomy**:

spectre_agents/config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class DiscordChannels:
2828
alerts: str = "alerts"
2929
plots: str = "plots"
3030
logs: str = "logs"
31+
knowledge: str = "ask-mitgcm"
3132

3233

3334
@dataclass
@@ -151,7 +152,7 @@ def load_config(config_path: str | Path | None = None) -> Config:
151152

152153
discord = raw.get("discord", {})
153154
channels = discord.get("channels", {})
154-
for attr in ("status", "decisions", "alerts", "plots", "logs"):
155+
for attr in ("status", "decisions", "alerts", "plots", "logs", "knowledge"):
155156
if attr in channels:
156157
setattr(cfg.discord_channels, attr, channels[attr])
157158

spectre_agents/discord_bot/bot.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
from spectre_agents.discord_bot.commands import setup_commands
1818
from spectre_agents.discord_bot.embeds import decision_embed
19+
from spectre_agents.discord_bot.knowledge import setup_knowledge_handler
1920
from spectre_agents.discord_bot.views import DecisionView
2021

2122
if TYPE_CHECKING:
@@ -63,12 +64,17 @@ async def on_ready(self) -> None:
6364
# Start the decision queue processor
6465
self._decision_task = asyncio.create_task(self._process_decision_queue())
6566

67+
# Register the knowledge Q&A handler for #ask-mitgcm
68+
setup_knowledge_handler(self, self.config, self.ctx)
69+
logger.info("Knowledge bot listening in #%s", self.config.discord_channels.knowledge)
70+
6671
# Post startup message
6772
channel = await self.ctx.get_channel(self.config.discord_channels.status)
6873
if channel:
6974
await channel.send(
7075
"**SPECTRE Agent System** online.\n"
71-
"Use `/run start` to begin a simulation, `/run status` to check progress."
76+
"Use `/run start` to begin a simulation, `/run status` to check progress.\n"
77+
f"Ask questions in #**{self.config.discord_channels.knowledge}**."
7278
)
7379

7480
async def on_error(self, event_method: str, *args, **kwargs) -> None:
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
"""Knowledge bot: answers MITgcm, ERA5, oceanography, and codebase questions.
2+
3+
Listens in #ask-mitgcm for messages, runs a Claude agent with the full
4+
CLAUDE.md context + WebSearch/WebFetch, and replies in-channel or in a thread.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
import asyncio
10+
import logging
11+
from concurrent.futures import ThreadPoolExecutor
12+
from pathlib import Path
13+
from typing import TYPE_CHECKING
14+
15+
import discord
16+
17+
if TYPE_CHECKING:
18+
from spectre_agents.config import Config
19+
from spectre_agents.context import AgentContext
20+
21+
logger = logging.getLogger(__name__)
22+
23+
_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="knowledge")
24+
25+
# The knowledge agent's system prompt, combining CLAUDE.md domain knowledge
26+
# with instructions for being a helpful Q&A assistant.
27+
KNOWLEDGE_SYSTEM_PROMPT = """\
28+
You are the SPECTRE knowledge assistant — an expert on MITgcm ocean modeling, \
29+
ERA5/GLORYS reanalysis data, and the SPECTRE simulation system. You answer \
30+
questions from researchers and engineers working on North Atlantic ocean \
31+
simulations.
32+
33+
## Your expertise
34+
35+
- **MITgcm**: namelist parameters, packages (EXF, OBCS, DIAGNOSTICS, KPP, MNC), \
36+
Fortran source code, numerical methods, grid configuration, debugging
37+
- **ERA5 / Copernicus**: variable definitions, accumulation conventions, units, \
38+
CDS API, temporal/spatial resolution
39+
- **GLORYS v12**: ocean reanalysis fields, CMEMS access, variable naming
40+
- **Oceanography**: North Atlantic circulation, Gulf Stream dynamics, \
41+
air-sea fluxes, boundary conditions, ensemble methods
42+
- **HPC / SLURM**: job scheduling, container workflows (enroot/pyxis), \
43+
parallel I/O, memory management
44+
- **This codebase**: spectre_utils Python package, workflow scripts, \
45+
configuration files, bred vector ensembles
46+
47+
## SPECTRE simulation context
48+
49+
This project runs a realistic MITgcm simulation of the North Atlantic (26-54N):
50+
- Grid: Native NEMO curvilinear, 768 x 424 x 50 levels, MPI 8x8 = 64 ranks
51+
- Ocean data: GLORYS v12 daily fields (T, S, U, V, SSH) for IC and OBC
52+
- Atmospheric forcing: ERA5 3-hourly single-level fields via EXF package
53+
- Simulation period: 2002-07-01 to 2017-06-30
54+
- Key directory: simulations/glorysv12-curvilinear/
55+
56+
### Critical technical details
57+
58+
- **EXF latitude orientation**: ERA5 stores latitude north-to-south. MITgcm EXF \
59+
expects south-to-north (lat0=20.0, lat_inc=+0.25). The mk_exf_conditions.py \
60+
script flips the axis. Getting this wrong causes ~20C air-sea temperature error.
61+
62+
- **EXF range thresholds** (hardcoded in exf_check_range.F): \
63+
hflux: [-500, +1600] W/m2; ustress/vstress: +/-2.0 N/m2
64+
65+
- **Bulk formula**: ALLOW_BULK_LARGEYEAGER04 — Large & Yeager (2009) \
66+
stability-corrected with wind-speed-dependent drag coefficients.
67+
68+
- **MNC tile numbering**: mnc_*_0001/ contains PID 0, which writes tile t004.
69+
70+
- **ERA5 scale factors**: 3-hourly accumulations to W/m2 or m/s use \
71+
1/10800 = 9.2593e-5 (not 1/3600).
72+
73+
- **EXF does not support negative lat_inc** — exf_interp.F assumes \
74+
monotonically increasing latitude.
75+
76+
- **OBC period = 86400.0s (daily), EXF period = 10800.0s (3-hourly)**
77+
78+
- **MNC memory leak**: diag_mnc=.FALSE. with a post-processor converter \
79+
is the workaround for long runs.
80+
81+
## How to respond
82+
83+
- Be direct and technical. Lead with the answer, then explain.
84+
- Include MITgcm parameter names, file paths, and Fortran source references.
85+
- When uncertain, say so and suggest where to look (readthedocs, source code).
86+
- For questions about this specific simulation, reference the config and namelists.
87+
- Use code blocks for parameter examples, file snippets, and commands.
88+
- If a question requires web lookup (latest docs, specific source code), \
89+
use WebSearch/WebFetch to find the answer.
90+
"""
91+
92+
93+
async def _run_knowledge_query(config: Config, question: str, context_hint: str = "") -> str:
94+
"""Run the knowledge agent and return its text response."""
95+
from claude_agent_sdk import query, ClaudeAgentOptions, ResultMessage, AssistantMessage, TextBlock
96+
97+
prompt = question
98+
if context_hint:
99+
prompt = f"{context_hint}\n\nQuestion: {question}"
100+
101+
result_text = ""
102+
try:
103+
async for message in query(
104+
prompt=prompt,
105+
options=ClaudeAgentOptions(
106+
cwd=str(config.simulation_dir),
107+
allowed_tools=["Read", "Glob", "Grep", "WebSearch", "WebFetch"],
108+
system_prompt=KNOWLEDGE_SYSTEM_PROMPT,
109+
model=config.agents.web_research.model, # Sonnet for Q&A
110+
permission_mode="default",
111+
max_turns=10,
112+
),
113+
):
114+
if isinstance(message, ResultMessage):
115+
result_text = message.result or ""
116+
elif isinstance(message, AssistantMessage):
117+
for block in message.content:
118+
if isinstance(block, TextBlock):
119+
result_text = block.text
120+
except Exception as e:
121+
logger.exception("Knowledge agent failed")
122+
result_text = f"Sorry, I encountered an error: {e}"
123+
124+
return result_text
125+
126+
127+
def setup_knowledge_handler(bot: discord.Client, config: Config, ctx: AgentContext) -> None:
128+
"""Register the on_message handler for #ask-mitgcm Q&A."""
129+
130+
channel_name = config.discord_channels.knowledge
131+
132+
@bot.event
133+
async def on_message(message: discord.Message) -> None:
134+
# Ignore own messages
135+
if message.author == bot.user:
136+
return
137+
138+
# Ignore DMs
139+
if not message.guild:
140+
return
141+
142+
# Only respond in the knowledge channel
143+
if message.channel.name != channel_name:
144+
return
145+
146+
# Ignore messages that are just bot mentions with no content
147+
content = message.content.strip()
148+
if not content:
149+
return
150+
151+
# Strip bot mention if present
152+
if bot.user and bot.user.mentioned_in(message):
153+
content = content.replace(f"<@{bot.user.id}>", "").replace(f"<@!{bot.user.id}>", "").strip()
154+
155+
if not content:
156+
return
157+
158+
logger.info("Knowledge query from %s: %s", message.author, content[:100])
159+
160+
# Show typing indicator while processing
161+
async with message.channel.typing():
162+
# Build context from recent thread/conversation
163+
context_hint = ""
164+
if isinstance(message.channel, discord.Thread):
165+
context_hint = f"(This question is in a thread titled: {message.channel.name})"
166+
167+
result = await _run_knowledge_query(config, content, context_hint)
168+
169+
# Reply in thread if message is in a thread, otherwise create one for long answers
170+
if not result:
171+
result = "I wasn't able to find an answer. Could you rephrase or provide more context?"
172+
173+
# Discord 2000 char limit — split long responses
174+
if len(result) <= 2000:
175+
await message.reply(result, mention_author=False)
176+
else:
177+
# Create a thread for long answers
178+
if not isinstance(message.channel, discord.Thread):
179+
thread = await message.create_thread(
180+
name=content[:90] + "..." if len(content) > 90 else content,
181+
auto_archive_duration=60,
182+
)
183+
target = thread
184+
else:
185+
target = message.channel
186+
187+
# Send in chunks
188+
chunks = _split_message(result)
189+
for chunk in chunks:
190+
await target.send(chunk)
191+
192+
193+
def _split_message(text: str, limit: int = 1900) -> list[str]:
194+
"""Split a long message into chunks, preferring line boundaries."""
195+
if len(text) <= limit:
196+
return [text]
197+
198+
chunks = []
199+
while text:
200+
if len(text) <= limit:
201+
chunks.append(text)
202+
break
203+
204+
# Try to split at a newline
205+
split_at = text.rfind("\n", 0, limit)
206+
if split_at == -1 or split_at < limit // 2:
207+
# Try space
208+
split_at = text.rfind(" ", 0, limit)
209+
if split_at == -1:
210+
split_at = limit
211+
212+
chunks.append(text[:split_at])
213+
text = text[split_at:].lstrip("\n")
214+
215+
return chunks

spectre_agents_config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ discord:
2020
alerts: alerts
2121
plots: plots
2222
logs: logs
23+
knowledge: ask-mitgcm
2324

2425
agents:
2526
orchestrator:

0 commit comments

Comments
 (0)