Skip to content

Commit 182a949

Browse files
JihaoXinclaude
andcommitted
Unify project state management: DB as source of truth for config/status
- Extended Project DB schema with config fields (model, code_dir, language, source), runtime status (phase, iteration, dev_iteration, score_history), cost tracking (total_cost_usd, total_input/output_tokens), checkpoint data, and process tracking (pid) - Made orchestrator DB-aware: _sync_db() updates project record after each step, checkpoint, score change, and cost report write - Unified CLI with webapp: ark new registers projects in webapp DB, ark run passes --db-path/--project-id to orchestrator - Updated webapp to read from DB first with YAML fallback for legacy projects. SSE stream now reads from DB instead of parsing YAML every 2 seconds - Added data migration (migrate_project_data) that populates new DB columns from existing YAML state files on webapp startup - YAML files retained for agent working state (findings, experiment_plan, research_state, action_plan, literature, memory) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 3e2097c commit 182a949

File tree

8 files changed

+525
-63
lines changed

8 files changed

+525
-63
lines changed

ark/cli.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1591,6 +1591,47 @@ def _finalize_project(name: str, project_dir: Path, config: dict,
15911591
"""Create project directory, write config, copy templates, set up workspace."""
15921592
code_dir = config["code_dir"]
15931593

1594+
# ── Register project in webapp DB ──────────────────────────
1595+
project_id = None
1596+
try:
1597+
from ark.webapp.db import (resolve_db_path, get_session,
1598+
get_or_create_user_by_email, create_project as db_create_project)
1599+
import getpass
1600+
db_path = resolve_db_path()
1601+
if db_path and Path(db_path).parent.exists():
1602+
with get_session(db_path) as session:
1603+
cli_email = f"{getpass.getuser()}@cli.local"
1604+
user, _ = get_or_create_user_by_email(session, cli_email)
1605+
import uuid
1606+
project_id = str(uuid.uuid4())
1607+
db_create_project(
1608+
session,
1609+
id=project_id,
1610+
user_id=user.id,
1611+
name=title or name,
1612+
title=title or "",
1613+
idea=config.get("research_idea", "") or config.get("idea", ""),
1614+
venue=venue_name,
1615+
venue_format=venue_format,
1616+
venue_pages=venue_pages,
1617+
max_iterations=config.get("max_iterations", 2),
1618+
max_dev_iterations=config.get("max_dev_iterations", 3),
1619+
mode=config.get("mode", "paper"),
1620+
model=config.get("model", "claude"),
1621+
model_variant=config.get("model_variant", ""),
1622+
code_dir=str(code_dir),
1623+
language=config.get("language", "en"),
1624+
source="cli",
1625+
status="queued",
1626+
telegram_token=config.get("telegram_bot_token", ""),
1627+
telegram_chat_id=config.get("telegram_chat_id", ""),
1628+
)
1629+
# Store project_id in config so cmd_run can pass it to orchestrator
1630+
config["_project_id"] = project_id
1631+
config["_db_path"] = db_path
1632+
except Exception as e:
1633+
print(f" {_c('Note:', Colors.DIM)} DB registration skipped: {e}")
1634+
15941635
# ── Create project directory ──────────────────────────────
15951636
project_dir.mkdir(parents=True, exist_ok=True)
15961637
agents_dir = project_dir / "agents"
@@ -1830,6 +1871,25 @@ def cmd_run(args):
18301871
print(f" Max days: {max_days}")
18311872
print(f" Log file: {log_file}")
18321873

1874+
# ── Resolve DB path and project ID for orchestrator ──
1875+
db_path = config.get("_db_path", "")
1876+
project_id = config.get("_project_id", "")
1877+
if not db_path:
1878+
try:
1879+
from ark.webapp.db import resolve_db_path
1880+
db_path = resolve_db_path()
1881+
except Exception:
1882+
pass
1883+
if not project_id and db_path and Path(db_path).exists():
1884+
try:
1885+
from ark.webapp.db import get_session, get_project_by_name
1886+
with get_session(db_path) as session:
1887+
p = get_project_by_name(session, name)
1888+
if p:
1889+
project_id = p.id
1890+
except Exception:
1891+
pass
1892+
18331893
# Launch orchestrator in background
18341894
cmd = [
18351895
sys.executable, "-m", "ark.orchestrator",
@@ -1839,6 +1899,10 @@ def cmd_run(args):
18391899
"--iterations", str(max_iterations),
18401900
"--max-days", str(max_days),
18411901
]
1902+
if db_path:
1903+
cmd.extend(["--db-path", db_path])
1904+
if project_id:
1905+
cmd.extend(["--project-id", project_id])
18421906

18431907
# Strip CLAUDECODE so orchestrator can call claude CLI freely
18441908
env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"}

ark/orchestrator.py

Lines changed: 99 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class Orchestrator(AgentMixin, CompilerMixin, ExecutionMixin, PipelineMixin, Dev
4444

4545
def __init__(self, project: str, max_days: float = 3, max_iterations: int = 100,
4646
mode: str = "research", model: str = "claude", code_dir: str = None,
47-
project_dir: str = None):
47+
project_dir: str = None, db_path: str = None, project_id: str = None):
4848
global PROJECT_DIR
4949

5050
self.max_end_time = datetime.now() + timedelta(days=max_days)
@@ -54,6 +54,11 @@ def __init__(self, project: str, max_days: float = 3, max_iterations: int = 100,
5454
self.model = model
5555
self.project_name = project
5656

57+
# ── DB awareness ──
58+
self._db_path = db_path
59+
self._project_id = project_id
60+
self._db_sync_errors = 0 # count consecutive failures
61+
5762
# Human-readable display name (resolved after config loads)
5863
self._display_name = None
5964

@@ -98,7 +103,7 @@ def __init__(self, project: str, max_days: float = 3, max_iterations: int = 100,
98103
self.latex_dir = self.code_dir / self.config.get("latex_dir", "Latex")
99104
self.figures_dir = self.code_dir / self.config.get("figures_dir", "Latex/figures")
100105

101-
# State file paths
106+
# State file paths (agent working state — stays as YAML)
102107
self.state_file = self.state_dir / "research_state.yaml"
103108
self.findings_file = self.state_dir / "findings.yaml"
104109
self.paper_state_file = self.state_dir / "paper_state.yaml"
@@ -221,6 +226,24 @@ def display_name(self) -> str:
221226
self._display_name = title or name or self.project_name
222227
return self._display_name
223228

229+
# ========== DB Sync ==========
230+
231+
def _sync_db(self, **kwargs):
232+
"""Update project record in the webapp DB. Fail-soft: errors are logged, never raised."""
233+
if not self._db_path or not self._project_id:
234+
return
235+
try:
236+
from ark.webapp.db import get_session, get_project, update_project
237+
with get_session(self._db_path) as session:
238+
project = get_project(session, self._project_id)
239+
if project:
240+
update_project(session, project, **kwargs)
241+
self._db_sync_errors = 0
242+
except Exception as e:
243+
self._db_sync_errors += 1
244+
if self._db_sync_errors <= 3:
245+
self.log(f"DB sync failed ({self._db_sync_errors}): {e}", "WARN")
246+
224247
# ========== Deep Research (background) ==========
225248

226249
def _start_deep_research_background(self):
@@ -636,6 +659,7 @@ def set_language_pref(self, lang: str):
636659
data["language"] = lang
637660
with open(prefs_file, "w") as f:
638661
yaml.dump(data, f, default_flow_style=False)
662+
self._sync_db(language=lang)
639663
self.log(f"Language preference set to: {lang}", "INFO")
640664
except Exception as e:
641665
self.log(f"Failed to save language pref: {e}", "WARN")
@@ -947,6 +971,12 @@ def save_checkpoint(self):
947971
with open(self.checkpoint_file, "w") as f:
948972
yaml.dump(checkpoint, f, default_flow_style=False)
949973
self.log(f"Checkpoint saved: iteration={self.iteration}", "INFO")
974+
self._sync_db(
975+
checkpoint_data=json.dumps(checkpoint),
976+
iteration=self.iteration,
977+
total_input_tokens=self.total_input_tokens,
978+
total_output_tokens=self.total_output_tokens,
979+
)
950980

951981
def save_step_checkpoint(self, step_num: int, step_name: str):
952982
"""Save checkpoint after a step completes within a phase iteration."""
@@ -965,6 +995,7 @@ def save_step_checkpoint(self, step_num: int, step_name: str):
965995
}
966996
with open(self.checkpoint_file, "w") as f:
967997
yaml.dump(checkpoint, f, default_flow_style=False)
998+
self._sync_db(checkpoint_data=json.dumps(checkpoint))
968999

9691000
# Backward compat alias
9701001
save_phase_checkpoint = save_step_checkpoint
@@ -1206,6 +1237,25 @@ def save_paper_state(self, state: dict):
12061237
"""Save paper review state."""
12071238
with open(self.paper_state_file, "w") as f:
12081239
yaml.dump(state, f, default_flow_style=False, allow_unicode=True)
1240+
# Sync to DB
1241+
db_update = {
1242+
"score": float(state.get("current_score", 0)),
1243+
"iteration": self.iteration,
1244+
}
1245+
reviews = state.get("reviews", [])
1246+
if reviews:
1247+
db_update["score_history"] = json.dumps([
1248+
{"iteration": r.get("iteration", i + 1),
1249+
"score": float(r.get("score", 0)),
1250+
"timestamp": r.get("timestamp", "")}
1251+
for i, r in enumerate(reviews)
1252+
])
1253+
paper_status = state.get("status", "in_progress")
1254+
if paper_status in ("accepted", "accepted_pending_cleanup"):
1255+
db_update["phase"] = "accepted"
1256+
else:
1257+
db_update["phase"] = "review"
1258+
self._sync_db(**db_update)
12091259

12101260
def load_paper_requirements(self) -> dict:
12111261
"""Load paper requirements config."""
@@ -2046,8 +2096,19 @@ def main():
20462096
help="Override code directory (default: from project config)")
20472097
parser.add_argument("--project-dir", type=str, default=None,
20482098
help="Override project directory (default: ARK_ROOT/projects/<project>)")
2099+
parser.add_argument("--db-path", type=str, default=None,
2100+
help="Path to webapp SQLite DB for status sync")
2101+
parser.add_argument("--project-id", type=str, default=None,
2102+
help="Project UUID in the webapp DB")
20492103
args = parser.parse_args()
20502104

2105+
# Resolve DB path: explicit arg > env > webapp.env > default
2106+
db_path = args.db_path
2107+
project_id = args.project_id
2108+
if not db_path:
2109+
from ark.webapp.db import resolve_db_path
2110+
db_path = resolve_db_path()
2111+
20512112
# Load project config to resolve code_dir if not specified
20522113
project_dir = args.project_dir
20532114
config_file = (Path(project_dir) if project_dir else ARK_ROOT / "projects" / args.project) / "config.yaml"
@@ -2058,6 +2119,21 @@ def main():
20582119
cfg = _yaml.safe_load(f) or {}
20592120
code_dir = cfg.get("code_dir")
20602121

2122+
# Auto-resolve project_id from DB if not provided
2123+
if not project_id and db_path and Path(db_path).exists():
2124+
try:
2125+
from ark.webapp.db import get_session, get_project_by_name, get_project
2126+
with get_session(db_path) as session:
2127+
# Try looking up by project name or by project_dir matching id
2128+
p = get_project_by_name(session, args.project)
2129+
if not p:
2130+
# Maybe --project is actually a UUID
2131+
p = get_project(session, args.project)
2132+
if p:
2133+
project_id = p.id
2134+
except Exception:
2135+
pass
2136+
20612137
orchestrator = Orchestrator(
20622138
max_days=args.max_days,
20632139
max_iterations=args.iterations,
@@ -2066,8 +2142,28 @@ def main():
20662142
model=args.model,
20672143
code_dir=code_dir,
20682144
project_dir=project_dir,
2145+
db_path=db_path,
2146+
project_id=project_id,
20692147
)
2070-
orchestrator.run()
2148+
2149+
# Mark as running in DB
2150+
if db_path and project_id:
2151+
orchestrator._sync_db(status="running", pid=os.getpid())
2152+
2153+
try:
2154+
orchestrator.run()
2155+
# Mark completion in DB
2156+
if db_path and project_id:
2157+
paper_state = orchestrator.load_paper_state()
2158+
final_status = "done"
2159+
if paper_state.get("status") in ("accepted", "accepted_pending_cleanup"):
2160+
final_status = "done"
2161+
orchestrator._sync_db(status=final_status, pid=0)
2162+
except KeyboardInterrupt:
2163+
orchestrator._sync_db(status="stopped", pid=0)
2164+
except Exception:
2165+
orchestrator._sync_db(status="failed", pid=0)
2166+
raise
20712167

20722168

20732169
if __name__ == "__main__":

ark/pipeline.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -808,6 +808,7 @@ def _run_research_phase(self):
808808
4.1 clone conda env (ark-base → .env/)
809809
4.2 bootstrap citations → references.bib
810810
"""
811+
self._sync_db(phase="research")
811812
self.log("", "RAW")
812813
self.log_section("Research Phase | Understanding Project & Building Foundation")
813814

@@ -1257,6 +1258,7 @@ def _update_title_from_context(self):
12571258
cfg["title"] = title
12581259
with open(config_file, "w") as f:
12591260
yaml.dump(cfg, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
1261+
self._sync_db(title=title, name=title)
12601262
self.log(f"Title updated: {title}", "INFO")
12611263

12621264
# ==================== Citation Bootstrapping ====================
@@ -1308,6 +1310,7 @@ def _generate_title_if_needed(self):
13081310
cfg = yaml.safe_load(config_path.read_text()) or {}
13091311
cfg["title"] = new_title
13101312
config_path.write_text(yaml.dump(cfg, default_flow_style=False, allow_unicode=True))
1313+
self._sync_db(title=new_title, name=new_title)
13111314

13121315
def _bootstrap_citations_from_deep_research(self):
13131316
"""Extract paper titles from Deep Research report via LLM, then fetch BibTeX via API.
@@ -1544,6 +1547,14 @@ def _save_dev_phase_state(self, state: dict):
15441547
dev_state_file = self.state_dir / "dev_phase_state.yaml"
15451548
with open(dev_state_file, "w") as f:
15461549
yaml.dump(state, f, default_flow_style=False, allow_unicode=True)
1550+
# Sync to DB
1551+
dev_status = state.get("status", "pending")
1552+
phase = "dev" if dev_status == "in_progress" else ("review" if dev_status in ("completed", "complete") else "")
1553+
self._sync_db(
1554+
dev_iteration=int(state.get("iteration", 0)),
1555+
dev_status=dev_status,
1556+
phase=phase,
1557+
)
15471558

15481559
def _run_dev_phase(self):
15491560
"""Run the Dev Phase: iterative experiments → initial paper draft.
@@ -2648,6 +2659,13 @@ def _write_cost_report(self):
26482659
except Exception:
26492660
pass
26502661
raise
2662+
# Sync cost totals to DB
2663+
self._sync_db(
2664+
total_cost_usd=round(total_cost_usd, 6),
2665+
total_input_tokens=total_input_tokens,
2666+
total_output_tokens=total_output_tokens,
2667+
total_agent_calls=total_calls,
2668+
)
26512669

26522670
def run(self):
26532671
"""Main loop."""

ark/webapp/app.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,15 @@ async def lifespan(app: FastAPI):
443443
pass
444444
# Now create engine + tables (ORM will see the migrated schema)
445445
get_engine(settings.db_path)
446+
447+
# Migrate existing project data: populate new DB columns from YAML state files
448+
from ark.webapp.db import migrate_project_data
449+
try:
450+
migrate_project_data(settings.db_path, str(settings.projects_root))
451+
logger.info("Project data migration completed.")
452+
except Exception as e:
453+
logger.warning(f"Project data migration failed (non-fatal): {e}")
454+
446455
logger.info(f"ARK Webapp starting. DB: {settings.db_path}")
447456
logger.info(f"Projects root: {settings.projects_root}")
448457

0 commit comments

Comments
 (0)