Skip to content

Commit fcc789d

Browse files
JihaoXinclaude
andcommitted
Overhaul pipeline: fix critical bugs, remove dead agents, restructure to 6-agent workflow
Critical fixes: - Fix agent template path (website/templates → ark/templates) — root cause of empty papers - run_agent() raises FileNotFoundError on missing prompt instead of silent "" - _enforce_page_count: max 20 attempts + stall detection (was infinite loop) - _kill_process_tree: use os.killpg() for proper process group cleanup - _sync_db: add ARK root to sys.path (fix "No module named website") - _load_action_plan: raise on corrupt YAML instead of silent empty plan - Title generation: dedicated claude -p call with validation + retry + fallback - Citation [NEEDS-CHECK] markers only in References, not body text Agent restructure (9 → 6): - Delete visualizer (never called, dead code) - Delete meta_debugger (could diagnose but not act, 30min API cost) - Merge old initializer + old researcher → new researcher agent - Dev Phase result analysis moved from researcher to planner - Remove dead run_literature_search (~100 lines) Skills selection improved: - Use project_context.md (verified) instead of just idea.md - Distinguish IMPLEMENT vs MENTION to avoid false matches - Allow selecting 0 skills, cap at 5 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 528d5c0 commit fcc789d

File tree

11 files changed

+247
-771
lines changed

11 files changed

+247
-771
lines changed

ark/agents.py

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -240,14 +240,11 @@ def _run(self):
240240
# during the Specialization step (Research Phase Step 3). The runtime injection of
241241
# project_context has been replaced by Template-Specialization architecture.
242242
AGENT_CONTEXT_PROFILES = {
243-
"initializer": {"memory": False, "deep_research": False, "prior_context": False, "context_files": True},
243+
"researcher": {"memory": False, "deep_research": False, "prior_context": False, "context_files": True},
244244
"reviewer": {"memory": True, "deep_research": False, "prior_context": False, "context_files": False},
245245
"planner": {"memory": True, "deep_research": False, "prior_context": True, "context_files": False},
246246
"writer": {"memory": False, "deep_research": True, "prior_context": True, "context_files": False},
247247
"experimenter": {"memory": False, "deep_research": True, "prior_context": False, "context_files": True},
248-
"researcher": {"memory": False, "deep_research": True, "prior_context": False, "context_files": True},
249-
"visualizer": {"memory": False, "deep_research": False, "prior_context": False, "context_files": False},
250-
"meta_debugger": {"memory": True, "deep_research": False, "prior_context": False, "context_files": True},
251248
"coder": {"memory": False, "deep_research": False, "prior_context": True, "context_files": False},
252249
}
253250

@@ -295,13 +292,28 @@ def _get_ark_model(self) -> str | None:
295292
return None
296293

297294
def _kill_process_tree(self, pid: int):
298-
"""Kill a process and all its descendants."""
295+
"""Kill a process and all its descendants (including the process itself).
296+
297+
Uses SIGKILL on the entire process group (since agents run with
298+
start_new_session=True, pid == pgid) to ensure no orphans survive.
299+
"""
300+
# First try to kill the entire process group
301+
try:
302+
os.killpg(pid, signal.SIGKILL)
303+
except (ProcessLookupError, PermissionError, OSError):
304+
pass
305+
# Also kill individual descendants in case pgid differs
299306
descendants = _get_descendant_pids(pid)
300-
for child_pid in reversed(descendants): # kill children first
307+
for child_pid in reversed(descendants):
301308
try:
302309
os.kill(child_pid, signal.SIGKILL)
303310
except (ProcessLookupError, PermissionError):
304311
pass
312+
# Kill the process itself
313+
try:
314+
os.kill(pid, signal.SIGKILL)
315+
except (ProcessLookupError, PermissionError):
316+
pass
305317

306318
def _cleanup_cli_state(self):
307319
"""Clean up Claude CLI state after abnormal termination (e.g. SIGHUP).
@@ -472,8 +484,7 @@ def run_agent(self, agent_type: str, task: str, timeout: int = 1800,
472484

473485
prompt_file = self.agents_dir / f"{agent_type}.prompt"
474486
if not prompt_file.exists():
475-
self.log(f"Agent prompt not found: {prompt_file}", "ERROR")
476-
return ""
487+
raise FileNotFoundError(f"Agent prompt not found: {prompt_file}")
477488

478489
base_prompt = prompt_file.read_text()
479490

@@ -695,14 +706,20 @@ def run_agent(self, agent_type: str, task: str, timeout: int = 1800,
695706
watchdog.stop()
696707
# Kill entire process tree (agent + all its children)
697708
self._kill_process_tree(process.pid)
698-
process.kill()
699709
timer.stop()
700710
self.log(f"Agent {agent_type} timed out ({timeout}s)", "WARN")
701-
# Capture whatever stdout/stderr is available so the empty-run
702-
# detection + downstream `stderr` references don't NameError.
711+
# Capture whatever stdout/stderr is available.
712+
# Close pipes first to avoid blocking on dead processes.
703713
try:
704-
stdout, stderr = process.communicate(timeout=10)
714+
stdout, stderr = process.communicate(timeout=5)
705715
except Exception:
716+
# If communicate still blocks, force-close pipes
717+
for pipe in (process.stdout, process.stderr):
718+
if pipe:
719+
try:
720+
pipe.close()
721+
except Exception:
722+
pass
706723
stdout, stderr = "", ""
707724
stdout = stdout or ""
708725
stderr = stderr or ""

ark/execution.py

Lines changed: 16 additions & 235 deletions
Original file line numberDiff line numberDiff line change
@@ -43,111 +43,6 @@ def _wait_for_slurm_jobs(self, max_wait_hours: float = 2) -> bool:
4343
"""Wait for experiment jobs (internal shortcut). Delegates to compute backend."""
4444
return self._compute_backend.wait_for_completion(max_wait_hours)
4545

46-
def _get_searched_lit_topics(self) -> set:
47-
"""Return set of literature topics already searched (from literature.yaml)."""
48-
try:
49-
if self.literature_file.exists():
50-
data = yaml.safe_load(self.literature_file.read_text()) or {}
51-
searches = data.get("searches", [])
52-
if isinstance(searches, list):
53-
return {s.get("topic", "").lower().strip()
54-
for s in searches if isinstance(s, dict) and s.get("topic")}
55-
except Exception:
56-
pass
57-
return set()
58-
59-
def run_literature_search(self, topics: list) -> str:
60-
"""Run API-first literature search on given topics.
61-
62-
1. Extract search queries from topics
63-
2. Search academic databases (DBLP/CrossRef/arXiv/S2)
64-
3. Have researcher agent select relevant papers from candidates
65-
4. Fetch official BibTeX and write to references.bib
66-
5. Update literature.yaml for writer reference
67-
"""
68-
from ark.citation import (
69-
search_papers, extract_search_queries, format_candidates_for_agent,
70-
parse_agent_selection, fetch_bibtex, append_papers_to_bib,
71-
update_literature_yaml,
72-
)
73-
74-
self.log_step(f"Literature search (API-first): {topics}", "progress")
75-
76-
bib_path = str(self.latex_dir / "references.bib")
77-
literature_path = str(self.literature_file)
78-
paper_title = self.config.get("title", self.project_name)
79-
research_idea = self.config.get("research_idea", "")
80-
81-
# Gather candidates from all topics
82-
all_candidates = []
83-
for topic in topics:
84-
topic_prompts = self.config.get("literature_search_prompts", {})
85-
description = topic_prompts.get(topic, topic)
86-
queries = extract_search_queries(topic, description)
87-
self.log_step(f" Searching: {queries[:3]}", "progress")
88-
for q in queries[:3]:
89-
results = search_papers(q, max_results=10)
90-
all_candidates.extend(results)
91-
92-
if not all_candidates:
93-
self.log_step("No papers found from academic databases", "warning")
94-
return ""
95-
96-
# Deduplicate
97-
seen = set()
98-
unique = []
99-
for p in all_candidates:
100-
key = p.doi or p.title.lower()[:60]
101-
if key not in seen:
102-
seen.add(key)
103-
unique.append(p)
104-
all_candidates = unique[:15]
105-
106-
self.log_step(f" Found {len(all_candidates)} candidate papers", "progress")
107-
108-
# Researcher agent selects relevant papers
109-
candidates_text = format_candidates_for_agent(all_candidates)
110-
selection_prompt = f"""
111-
## Paper Background
112-
Title: {paper_title}
113-
Research idea: {research_idea}
114-
115-
## Candidate Papers (from academic databases — all are real, verified papers)
116-
117-
{candidates_text}
118-
119-
## Your Task
120-
121-
Select the papers most relevant to our research from the list above.
122-
123-
Output format:
124-
SELECTED: 1, 5, 11
125-
[1] Reason: ... | Section: Related Work
126-
[5] Reason: ... | Section: Method
127-
[11] Reason: ... | Section: Experiments
128-
129-
Rules:
130-
- ONLY select from the numbered list above
131-
- Do NOT suggest any papers not in the list
132-
- For each selected paper, explain why it is relevant and where to cite it
133-
"""
134-
agent_output = self.run_agent("researcher", selection_prompt, timeout=900)
135-
136-
# Parse selection and write BibTeX
137-
selected = parse_agent_selection(agent_output, all_candidates)
138-
if not selected:
139-
self.log_step("Researcher selected no papers", "warning")
140-
return agent_output
141-
142-
self.log_step(f" Researcher selected {len(selected)} papers, fetching BibTeX...", "progress")
143-
added_keys = append_papers_to_bib(bib_path, selected)
144-
self.log_step(f" Added {len(added_keys)} citations to references.bib: {added_keys}", "success")
145-
146-
# Update literature.yaml
147-
update_literature_yaml(literature_path, selected, added_keys, agent_output)
148-
149-
return agent_output
150-
15146
def run_planner_cycle(self, review_output: str) -> bool:
15247
"""Planner-driven iteration cycle (planning + execution).
15348
@@ -708,13 +603,15 @@ def _enforce_page_count(self, context: str = "post-writing") -> bool:
708603

709604
# Loop until page count is in range.
710605
# Every 4 failed attempts, relax tolerance by 0.1 pages (both sides).
606+
# Hard limit of 20 attempts to prevent infinite loops.
607+
MAX_PAGE_ATTEMPTS = 20
711608
attempt = 0
712609
tolerance_relaxations = 0
713610
cur_min = min_pages
714611
cur_max = max_pages
715612
history = [] # list of {"before", "after", "action"} for feedback to LLM
716613

717-
while True:
614+
while attempt < MAX_PAGE_ATTEMPTS:
718615
in_range = cur_min <= page_count <= cur_max
719616
if in_range:
720617
if tolerance_relaxations > 0:
@@ -794,6 +691,19 @@ def _enforce_page_count(self, context: str = "post-writing") -> bool:
794691
self.log(f"[{context}] Could not determine page count after {action}", "WARN")
795692
return True
796693

694+
# Detect stalled progress: if page count didn't change at all, bail out
695+
# instead of looping fruitlessly.
696+
stall_count = sum(1 for h in history[-3:] if abs(h["after"] - h["before"]) < 0.01)
697+
if stall_count >= 3:
698+
self.log(f"[{context}] Page count stalled at {page_count:.1f} for 3 consecutive "
699+
f"attempts — aborting page enforcement", "ERROR")
700+
return False
701+
702+
# Exhausted max attempts
703+
self.log(f"[{context}] Page enforcement failed after {MAX_PAGE_ATTEMPTS} attempts "
704+
f"({page_count:.1f}/{venue_pages} pages)", "ERROR")
705+
return False
706+
797707
# LaTeX snippet that saves the current vertical position to the .aux file.
798708
# \pdfsavepos records the position at shipout; the deferred \write expands
799709
# \pdflastypos at that moment, giving the y-coordinate (in sp, from the
@@ -1425,135 +1335,6 @@ def _get_bottleneck(self) -> str:
14251335

14261336
return "Unknown"
14271337

1428-
# ==================== Meta-Debugger ====================
1429-
1430-
def run_meta_debugger(self, trigger_reason: str) -> str:
1431-
"""Run Meta-Debugger for system diagnosis and repair.
1432-
1433-
Returns:
1434-
"CONTINUE" | "CONTINUE_WITH_FIX" | "PAUSE"
1435-
"""
1436-
self.log(f"Triggering Meta-Debugger: {trigger_reason}", "META")
1437-
1438-
diagnosis_ctx = self.memory.get_diagnosis_context()
1439-
health_status, health_reasons = self.memory.get_health_status()
1440-
1441-
ctx_summary = f"""
1442-
## Trigger Reason
1443-
{trigger_reason}
1444-
1445-
## System Health Status
1446-
Status: **{health_status}**
1447-
{"Reasons: " + ", ".join(health_reasons) if health_reasons else "No anomalies"}
1448-
1449-
## Score Trend
1450-
- Current: {diagnosis_ctx['scores']['current']}/10
1451-
- Best: {diagnosis_ctx['scores']['best']}/10
1452-
- Trend: {diagnosis_ctx['scores']['trend']}
1453-
- Recent: {' -> '.join(f"{s:.1f}" for s in diagnosis_ctx['scores']['recent'][-5:])}
1454-
1455-
## Stagnation Status
1456-
- Stagnation count: {diagnosis_ctx['stagnation']['count']}
1457-
- Is stagnating: {diagnosis_ctx['stagnation']['is_stagnating']}
1458-
- Reason: {diagnosis_ctx['stagnation']['reason']}
1459-
1460-
## Issue Repetition
1461-
- High repeat (7+ times): {diagnosis_ctx['issues']['high_repeat']}
1462-
- Medium repeat (3+ times): {diagnosis_ctx['issues']['repeat_issues'][:5]}
1463-
1464-
## Experiment Idle Runs
1465-
- Idle run count: {diagnosis_ctx['experiment_empty_count']}
1466-
"""
1467-
1468-
diagnosis_output = self.run_agent("meta_debugger", f"""
1469-
{ctx_summary}
1470-
1471-
Please perform a complete system diagnosis:
1472-
1473-
1. **Read key state files**:
1474-
- auto_research/state/memory.yaml
1475-
- auto_research/state/action_plan.yaml
1476-
- auto_research/state/latest_review.md
1477-
1478-
2. **Analyze recent execution logs** (check auto_research/logs/ directory)
1479-
1480-
3. **Check execution consistency**:
1481-
- Run `git diff scripts/create_paper_figures.py` to check FIGURE_CODE tasks
1482-
- Run `git status` to check which files were modified
1483-
1484-
4. **Identify problem patterns**:
1485-
- Are there cases of "correct plan but failed execution"?
1486-
- Is the system stuck in a "method loop"?
1487-
- Are strategy escalation rules being violated?
1488-
1489-
5. **Generate diagnosis report** to auto_research/state/meta_diagnosis.md
1490-
1491-
6. **If state issues are found, fix them directly** (ONLY these file types):
1492-
- Reset erroneous accumulations in memory.yaml
1493-
- Fix malformed action_plan.yaml
1494-
- Edit agent prompt files (*.prompt) to improve instructions
1495-
1496-
**FORBIDDEN — do NOT modify**:
1497-
- Any Python source code (.py files)
1498-
- Any shell scripts (.sh files)
1499-
- Any configuration outside auto_research/state/
1500-
1501-
Modifying .py files risks breaking the pipeline. If you find a Python bug,
1502-
describe it in meta_diagnosis.md — a human will fix it.
1503-
1504-
**Important**: Diagnosis must find root causes, not just symptoms. Fixes must be specific, not just suggestions.
1505-
""", timeout=1800)
1506-
1507-
# Safety: revert any .py files the agent may have modified
1508-
self._revert_py_modifications()
1509-
1510-
diagnosis_file = self.state_dir / "meta_diagnosis.md"
1511-
if diagnosis_file.exists():
1512-
try:
1513-
diagnosis_file.read_text()
1514-
self.log("Meta-Debugger completed diagnosis, continuing iteration", "WARNING")
1515-
self.memory.load()
1516-
return "CONTINUE_WITH_FIX"
1517-
except Exception as e:
1518-
self.log(f"Failed to read diagnosis report: {e}", "ERROR")
1519-
1520-
return "CONTINUE"
1521-
1522-
def _revert_py_modifications(self):
1523-
"""Revert any .py file changes made by meta-debugger.
1524-
1525-
Meta-debugger should only modify state files (.yaml, .md, .prompt).
1526-
If it touched .py files, revert them with git checkout.
1527-
"""
1528-
try:
1529-
result = subprocess.run(
1530-
["git", "diff", "--name-only"],
1531-
capture_output=True, text=True, timeout=10,
1532-
cwd=self.code_dir,
1533-
)
1534-
if result.returncode != 0:
1535-
return
1536-
1537-
changed = result.stdout.strip().split("\n") if result.stdout.strip() else []
1538-
py_files = [f for f in changed if f.endswith(".py")]
1539-
if py_files:
1540-
self.log(f"Meta-debugger modified .py files (forbidden): {py_files}", "WARN")
1541-
subprocess.run(
1542-
["git", "checkout", "--"] + py_files,
1543-
capture_output=True, timeout=10,
1544-
cwd=self.code_dir,
1545-
)
1546-
self.log(f"Reverted {len(py_files)} .py file(s)", "WARN")
1547-
except Exception as e:
1548-
self.log(f"Failed to check/revert .py modifications: {e}", "WARN")
1549-
1550-
def check_and_trigger_meta_debug(self) -> str:
1551-
"""Check if Meta-Debugger should be triggered, and run it if so."""
1552-
should_trigger, reason = self.memory.should_trigger_meta_debug()
1553-
if should_trigger:
1554-
return self.run_meta_debugger(reason)
1555-
return "CONTINUE"
1556-
15571338
def self_repair(self, stagnation_reason: str) -> bool:
15581339
"""Self-repair: re-plan strategy when stagnating.
15591340

0 commit comments

Comments
 (0)