@@ -794,21 +794,27 @@ def _run_research_phase(self):
794794
795795 All sub-steps are idempotent — each checks if its output exists and skips if so.
796796
797+ Step 0: Setup
798+ Provision per-project conda env at <project_dir>/.env (clones ark-base).
799+ Idempotent: skipped if .env already exists.
800+
797801 Step 1: Analyze Proposal
798- initializer reads uploaded PDF → idea.md + deep research query
802+ initializer reads uploaded PDF / idea → idea.md (including a
803+ suggested title) + deep research query. Title is parsed and
804+ committed to config.yaml + DB immediately after this step so
805+ Deep Research and Telegram UX have a real title.
799806
800807 Step 2: Deep Research
801808 Gemini Deep Research API → deep_research.md → PDF sent to user via Telegram
802809
803810 Step 3: Specialization
804811 initializer reads idea.md + deep_research.md →
805- 3.1 determine title
806- 3.2 generate project_context.md (web-verified)
807- 3.3 specialize agent prompts (template + project knowledge → agents/ dir)
808- 3.4 select skills from library
812+ 3.1 generate project_context.md (web-verified)
813+ 3.2 specialize agent prompts (template + project knowledge → agents/ dir)
814+ 3.3 select skills from library
809815
810816 Step 4: Bootstrap
811- 4.1 clone conda env (ark-base → .env/)
817+ 4.1 install builtin skills
812818 4.2 bootstrap citations → references.bib
813819 """
814820 self ._sync_db (phase = "research" )
@@ -821,6 +827,27 @@ def _run_research_phase(self):
821827 parse_mode = "HTML" ,
822828 )
823829
830+ # ── Step 0: Setup (conda env provisioning) ──────────────────────
831+ self .log_step_header (0 , 4 , "Setup" )
832+ try :
833+ from ark .webapp .jobs import provision_project_env , project_env_ready
834+ if not project_env_ready (self .code_dir ):
835+ base_env = self .config .get ("base_conda_env" , "ark-base" )
836+ self .log_step (f"Provisioning conda environment (cloning { base_env } )..." , "progress" )
837+ success , msg = provision_project_env (self .code_dir , base_env )
838+ if success :
839+ self .log_step (f"Conda env ready: { msg } " , "success" )
840+ else :
841+ # Hard fail: the whole pipeline depends on this env for
842+ # experiments. Surface the error; caller will mark failed.
843+ self .log_step (f"Conda env provisioning failed: { msg } " , "error" )
844+ raise RuntimeError (f"Conda env provisioning failed: { msg } " )
845+ else :
846+ self .log_step ("Conda env already exists" , "success" )
847+ except ImportError as e :
848+ self .log (f"Conda env provisioning skipped (webapp.jobs unavailable): { e } " , "WARN" )
849+ self .log_step_header (0 , 4 , "Setup" , "end" )
850+
824851 # ── Step 1: Analyze Proposal ────────────────────────────────────
825852 idea_file = self .state_dir / "idea.md"
826853 dr_query = None # Will be set by initializer output
@@ -886,6 +913,10 @@ def _run_research_phase(self):
886913 else :
887914 self .log_step ("idea.md exists, skipping proposal analysis" , "info" )
888915
916+ # Commit the suggested title to config.yaml + DB now, before Deep
917+ # Research, so the query and Telegram notifications use a real title.
918+ self ._update_title_from_idea ()
919+
889920 # ── Step 2: Deep Research ───────────────────────────────────────
890921 dr_file = self .state_dir / "deep_research.md"
891922 if not dr_file .exists ():
@@ -969,20 +1000,14 @@ def _run_research_phase(self):
9691000
9701001Write `auto_research/state/project_context.md` with sections:
9711002## External Systems, ## Environment Setup, ## Experiment Guidance, ## Credentials & Access
972-
973- Also: if the current title "{ self .config .get ('title' , '' )} " is empty or a placeholder,
974- write a suggested title as the first line: `# Title: <suggested title>`
9751003""" , timeout = 600 )
9761004 self .log_step ("Project context generated" , "success" )
9771005
978- # 3.2: Update title from context if needed
979- self ._update_title_from_context ()
980-
981- # 3.3: Specialize agent prompts (code-driven, one call per agent)
1006+ # 3.2: Specialize agent prompts (code-driven, one call per agent)
9821007 self .log_step ("Specializing agent prompts..." , "progress" )
9831008 self ._specialize_agent_prompts (idea_content , dr_content )
9841009
985- # 3.4 : Select and install skills
1010+ # 3.3 : Select and install skills
9861011 self .log_step ("Selecting skills..." , "progress" )
9871012 skills_index = self ._load_skills_index ()
9881013 if skills_index and "No skills" not in skills_index :
@@ -1008,26 +1033,10 @@ def _run_research_phase(self):
10081033 # ── Step 4: Bootstrap ───────────────────────────────────────────
10091034 self .log_step_header (4 , 4 , "Bootstrap" )
10101035
1011- # 4.1: Conda environment
1012- try :
1013- from ark .webapp .jobs import provision_project_env , project_env_ready
1014- if not project_env_ready (self .code_dir ):
1015- base_env = self .config .get ("base_conda_env" , "ark-base" )
1016- self .log_step (f"Provisioning conda environment (cloning { base_env } )..." , "progress" )
1017- success , msg = provision_project_env (self .code_dir , base_env )
1018- if success :
1019- self .log_step (f"Conda env ready: { msg } " , "success" )
1020- else :
1021- self .log_step (f"Conda env provisioning failed: { msg } " , "warning" )
1022- else :
1023- self .log_step ("Conda env already exists" , "success" )
1024- except Exception as e :
1025- self .log (f"Conda env provisioning skipped: { e } " , "WARN" )
1026-
1027- # 4.2: Install builtin skills (auto-inherited by all projects)
1036+ # 4.1: Install builtin skills (auto-inherited by all projects)
10281037 self ._install_builtin_skills ()
10291038
1030- # 4.3 : Bootstrap citations
1039+ # 4.2 : Bootstrap citations
10311040 self ._bootstrap_citations_from_deep_research ()
10321041
10331042 self .log_step_header (4 , 4 , "Bootstrap" , "end" )
@@ -1249,83 +1258,82 @@ def _specialize_agent_prompts(self, idea_content: str, dr_content: str):
12491258
12501259 self .log_step (f"Specialized { specialized_count } /{ len (agent_focus )} agent prompts" , "success" )
12511260
1252- def _update_title_from_context (self ):
1253- """Extract title from project_context.md if it starts with '# Title:'."""
1254- ctx_file = self .state_dir / "project_context.md"
1255- if not ctx_file .exists ():
1256- return
1257-
1258- content = ctx_file .read_text ()
1259- if content .startswith ("# Title:" ):
1260- title = content .split ("\n " , 1 )[0 ].replace ("# Title:" , "" ).strip ()
1261- if title and not self .config .get ("title" ):
1262- self .config ["title" ] = title
1263- # Update config.yaml on disk
1264- config_file = Path (self .code_dir ).parent / "config.yaml"
1265- if not config_file .exists ():
1266- # Try project dir
1267- from ark .paths import get_ark_root
1268- config_file = get_ark_root () / "projects" / self .project_name / "config.yaml"
1269- if config_file .exists ():
1270- import yaml
1271- with open (config_file ) as f :
1272- cfg = yaml .safe_load (f ) or {}
1273- cfg ["title" ] = title
1274- with open (config_file , "w" ) as f :
1275- yaml .dump (cfg , f , default_flow_style = False , allow_unicode = True , sort_keys = False )
1276- self ._sync_db (title = title , name = title )
1277- self .log (f"Title updated: { title } " , "INFO" )
1261+ def _update_title_from_idea (self ):
1262+ """Extract the suggested title from idea.md and commit it.
12781263
1279- # ==================== Citation Bootstrapping ====================
1280-
1281- def _generate_title_if_needed (self ):
1282- """Auto-generate paper title from idea + venue + deep research if not provided."""
1283- title = self .config .get ("title" , "" )
1284- if title :
1285- return # User provided a title, keep it
1264+ The initializer (Step 1) writes a ``### Suggested Title`` section in
1265+ ``auto_research/state/idea.md``. If the current project title is empty
1266+ or a placeholder, parse that section, update ``self.config['title']``,
1267+ write back to ``config.yaml``, and sync the DB. This makes the real
1268+ title available to Deep Research and to Telegram UX before they run.
1269+ """
1270+ idea_file = self .state_dir / "idea.md"
1271+ if not idea_file .exists ():
1272+ return
12861273
1287- idea = self ._research_idea
1288- venue = self .config .get ("venue" , "" )
1289- if not idea :
1274+ current = (self .config .get ("title" ) or "" ).strip ()
1275+ # Treat UUID-like or empty titles as placeholders we should overwrite.
1276+ is_placeholder = (
1277+ not current
1278+ or len (current ) < 4
1279+ or re .fullmatch (r"[0-9a-fA-F-]{30,}" , current ) is not None
1280+ )
1281+ if not is_placeholder :
12901282 return
12911283
1292- # Read deep research summary for context
1293- dr_file = self .state_dir / "deep_research.md"
1294- dr_summary = ""
1295- if dr_file .exists ():
1296- dr_summary = dr_file .read_text ()[:4000 ]
1284+ content = idea_file .read_text ()
1285+ # Find the Suggested Title section. It may be a "### Suggested Title"
1286+ # header followed by one or more lines; capture the first non-empty
1287+ # non-instructional line.
1288+ m = re .search (r"^###\s+Suggested\s+Title\s*\n+(.+?)(?=^##|\Z)" ,
1289+ content , re .MULTILINE | re .DOTALL | re .IGNORECASE )
1290+ if not m :
1291+ return
1292+ block = m .group (1 ).strip ()
1293+ # Skip if the initializer said the current title is fine.
1294+ if "current title is appropriate" in block .lower ():
1295+ return
12971296
1298- self .log_step ("Generating paper title..." , "progress" )
1299- result = self .run_agent ("planner" , f"""Generate a concise, academic paper title for the following research.
1297+ # Take the first substantive line
1298+ new_title = ""
1299+ for line in block .splitlines ():
1300+ line = line .strip ().lstrip ("#" ).lstrip ("-" ).lstrip ("*" ).strip ()
1301+ line = line .strip ("\" '" )
1302+ # Skip obvious meta lines
1303+ if not line or line .lower ().startswith (("title:" , "suggested title" )):
1304+ # For "Title: X" form, keep the X part
1305+ if ":" in line :
1306+ after = line .split (":" , 1 )[1 ].strip ().strip ("\" '" )
1307+ if after and len (after ) > 4 :
1308+ new_title = after
1309+ break
1310+ continue
1311+ if len (line ) < 4 :
1312+ continue
1313+ new_title = line
1314+ break
13001315
1301- ## Research Idea
1302- { idea [: 4000 ] }
1316+ if not new_title :
1317+ return
13031318
1304- ## Target Venue
1305- { venue }
1306-
1307- ## Background Research (if available)
1308- { dr_summary }
1309-
1310- Output ONLY the title — one line, no quotes, no explanation. The title should be:
1311- - Concise (8-15 words)
1312- - Descriptive of the main contribution
1313- - In the style of { venue or 'a top-tier ML conference' } papers
1314- """ , timeout = 120 )
1315-
1316- if result and result .strip ():
1317- new_title = _parse_title_from_agent_output (result )
1318- if new_title and len (new_title ) > 10 :
1319- self .config ["title" ] = new_title
1320- self .log (f"Generated title: { new_title } " , "INFO" )
1321- # Update config.yaml on disk
1319+ self .config ["title" ] = new_title
1320+ # Write to config.yaml on disk
1321+ config_path = self .code_dir / "config.yaml"
1322+ if config_path .exists ():
1323+ try :
13221324 import yaml
1323- config_path = self .code_dir / "config.yaml"
1324- if config_path .exists ():
1325- cfg = yaml .safe_load (config_path .read_text ()) or {}
1326- cfg ["title" ] = new_title
1327- config_path .write_text (yaml .dump (cfg , default_flow_style = False , allow_unicode = True ))
1328- self ._sync_db (title = new_title , name = new_title )
1325+ cfg = yaml .safe_load (config_path .read_text ()) or {}
1326+ cfg ["title" ] = new_title
1327+ config_path .write_text (
1328+ yaml .dump (cfg , default_flow_style = False ,
1329+ allow_unicode = True , sort_keys = False )
1330+ )
1331+ except Exception as e :
1332+ self .log (f"Could not update config.yaml with new title: { e } " , "WARN" )
1333+ self ._sync_db (title = new_title , name = new_title )
1334+ self .log (f"Title updated from idea.md: { new_title } " , "INFO" )
1335+
1336+ # ==================== Citation Bootstrapping ====================
13291337
13301338 def _bootstrap_citations_from_deep_research (self ):
13311339 """Extract paper titles from Deep Research report via LLM, then fetch BibTeX via API.
0 commit comments