Skip to content

Commit 87fe329

Browse files
JihaoXinclaude
andcommitted
Fix figure+page pipeline: figures first, then write, clearpage guaranteed
Root cause: Writer didn't know what figures existed, created duplicates. Page enforcement and clearpage injection were broken. Figures-first principle (Dev phase): - Remove background parallelization — correctness over speed - Generate all figures (matplotlib + PaperBanana) BEFORE writer starts - Pass _list_available_figures() to writer prompt with explicit instructions not to recreate AI concept figures Writer coordination (Review loop): - Both individual and batch writer prompts now include figure list - Writer told which figures are AI concept (>150KB) vs matplotlib clearpage fix: - _ensure_clearpage_before_bibliography() rewritten with simple string replace (old regex logic was broken) - Called before every compile in pre-delivery and dev-phase-delivery Skip logic fix: - _generate_nano_banana_figures now only skips files >100KB (real AI figs) - Small placeholder files (<100KB) get regenerated by PaperBanana Page enforcement: - Dev phase: enforce after initial draft compile - Review loop: enforce before every Telegram PDF send Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 1f57e56 commit 87fe329

File tree

3 files changed

+80
-62
lines changed

3 files changed

+80
-62
lines changed

ark/compiler.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -620,10 +620,13 @@ def _generate_nano_banana_figures(self):
620620
placement = fig.get("placement", "full_width")
621621
output_path = self.figures_dir / f"{name}.png"
622622

623-
# Skip if file already exists and is non-empty
624-
if output_path.exists() and output_path.stat().st_size > 0:
625-
self.log(f" Skipping {name}: already exists", "INFO")
623+
# Skip if a real AI-generated figure already exists (>100KB)
624+
# Small files (<100KB) are likely writer-created placeholders — regenerate
625+
if output_path.exists() and output_path.stat().st_size > 100_000:
626+
self.log(f" Skipping {name}: AI figure already exists ({output_path.stat().st_size // 1024}KB)", "INFO")
626627
continue
628+
if output_path.exists() and output_path.stat().st_size > 0:
629+
self.log(f" Replacing small placeholder {name} ({output_path.stat().st_size // 1024}KB) with AI figure", "INFO")
627630

628631
# Determine aspect ratio and width based on agent's placement decision
629632
if columns == 1:

ark/execution.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -708,22 +708,18 @@ def _ensure_clearpage_before_bibliography(self):
708708
return
709709
try:
710710
content = main_tex.read_text()
711-
# Check if \clearpage already before \bibliography
712-
if r'\clearpage' not in content or \
713-
content.index(r'\bibliography') < content.rindex(r'\clearpage') if r'\clearpage' in content else True:
714-
# Need to check more carefully
715-
pass
716-
717-
# Find \bibliography and ensure \clearpage is right before it
718-
import re as _re
719-
pattern = r'(?<!\\clearpage\n)(\\bibliography\{)'
720-
if _re.search(pattern, content):
721-
new_content = _re.sub(pattern, r'\\clearpage\n\1', content)
722-
if new_content != content:
723-
main_tex.write_text(new_content)
724-
self.log("Injected \\clearpage before \\bibliography", "INFO")
725-
except Exception:
726-
pass # Non-critical, agent can handle it
711+
marker = r'\bibliography{'
712+
if marker not in content:
713+
return
714+
# Check if \clearpage already precedes \bibliography
715+
if '\\clearpage\n' + marker in content or '\\clearpage\n\n' + marker in content:
716+
return
717+
# Insert \clearpage before \bibliography
718+
content = content.replace(marker, '\\clearpage\n' + marker)
719+
main_tex.write_text(content)
720+
self.log("Injected \\clearpage before \\bibliography", "INFO")
721+
except Exception as e:
722+
self.log(f"Failed to inject \\clearpage: {e}", "WARN")
727723

728724
def _run_writing_phase(self, action_plan: dict, prior_context: str = ""):
729725
"""Execute writing phase for all writing tasks."""
@@ -953,9 +949,13 @@ def _run_writing_phase(self, action_plan: dict, prior_context: str = ""):
953949
literature_context = self._get_literature_context_for_task(task)
954950

955951
page_warning = self._get_page_constraint_warning()
952+
figure_list = self._list_available_figures()
956953
self.run_agent("writer", f"""
957954
You have only one task to complete. Please complete it carefully and thoroughly.
958955
{page_warning}
956+
## Available Figures (DO NOT recreate AI concept figures)
957+
{figure_list}
958+
959959
## Task: {task_id} - {task_title}
960960
{task_desc}
961961
{literature_context}
@@ -1002,9 +1002,13 @@ def _run_writing_phase(self, action_plan: dict, prior_context: str = ""):
10021002
""")
10031003

10041004
page_warning = self._get_page_constraint_warning()
1005+
figure_list = self._list_available_figures()
10051006
self.run_agent("writer", f"""
10061007
Please update the paper {latex_dir_name}/main.tex according to the following review revision tasks.
10071008
{page_warning}
1009+
## Available Figures (DO NOT recreate AI concept figures)
1010+
{figure_list}
1011+
10081012
## Revision Task List
10091013
10101014
{''.join(task_list)}

ark/pipeline.py

Lines changed: 54 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -446,9 +446,10 @@ def run_paper_iteration(self) -> bool:
446446

447447
# Recompile after writing phase to get the latest PDF
448448
self.log_step("Recompiling after improvements...", "progress")
449+
self._ensure_clearpage_before_bibliography()
449450
self.compile_latex()
450451

451-
# Hard page count enforcement — the ONLY place we enforce before delivery
452+
# Hard page count enforcement before delivery
452453
self._enforce_page_count(context="pre-delivery")
453454

454455
# Send iteration summary + PDF to Telegram
@@ -1128,36 +1129,39 @@ def _run_dev_phase(self):
11281129
self.log_section("✏️ Writing Initial Paper Draft")
11291130
self._send_dev_phase_telegram("writing", 0, 0)
11301131

1131-
# Create plotting script from experiment results (if not already present)
1132+
# ── FIGURES FIRST, THEN WRITE ──
1133+
# All figures must be ready before writer starts, so writer can reference them.
1134+
1135+
# Step A: Create plotting script from experiment results
11321136
self._create_plotting_script_if_needed()
11331137

1134-
# Generate matplotlib figures from results
1135-
self.log_step("Generating figures from experiment results...", "progress")
1138+
# Step B: Generate matplotlib figures
1139+
self.log_step("Generating statistical figures from experiment results...", "progress")
11361140
self.generate_figures()
11371141

1138-
# Start AI concept figure generation in background (slow, ~7min/fig)
1139-
# Writer can proceed in parallel — it writes text first, figures are \includegraphics refs
1140-
nano_banana_future = None
1142+
# Step C: Generate AI concept figures (sequential — must complete before writer)
11411143
if self.config.get("figure_generation") == "nano_banana":
1142-
from concurrent.futures import ThreadPoolExecutor
1143-
self._nano_banana_executor = ThreadPoolExecutor(max_workers=1)
1144-
self.log_step("Starting AI concept figure generation (background)...", "progress")
1145-
nano_banana_future = self._nano_banana_executor.submit(self._generate_nano_banana_figures)
1144+
self.log_step("Generating AI concept figures (PaperBanana)...", "progress")
1145+
self._generate_nano_banana_figures()
1146+
1147+
# Step D: List all available figures for the writer
1148+
figure_list = self._list_available_figures()
11461149

1147-
# Writer produces complete initial draft
1150+
# Step E: Writer writes paper with KNOWN figure filenames
11481151
paper_requirements = self.load_paper_requirements()
11491152
req_summary = yaml.dump(paper_requirements, allow_unicode=True) if paper_requirements else "No special requirements"
11501153
findings_summary = self._load_findings_summary()
11511154

1155+
venue_pages = self.config.get('venue_pages', 9)
1156+
latex_dir = self.config.get('latex_dir', 'paper')
1157+
figures_dir = self.config.get('figures_dir', 'paper/figures')
1158+
11521159
base_prompt = self.config.get("initial_paper_writing_prompt", "")
11531160
if base_prompt:
11541161
prompt = base_prompt.replace("{req_summary}", req_summary)
1155-
# Enhance with findings context
11561162
prompt += f"\n\n## Experiment Findings\n{findings_summary}"
1163+
prompt += f"\n\n## Available Figures (already generated)\n{figure_list}"
11571164
else:
1158-
venue_pages = self.config.get('venue_pages', 9)
1159-
latex_dir = self.config.get('latex_dir', 'paper')
1160-
figures_dir = self.config.get('figures_dir', 'paper/figures')
11611165
prompt = f"""Write a COMPLETE, SUBMISSION-READY research paper draft.
11621166
11631167
## Research Idea
@@ -1169,6 +1173,15 @@ def _run_dev_phase(self):
11691173
## Paper Requirements
11701174
{req_summary}
11711175
1176+
## Available Figures (already generated — DO NOT recreate these)
1177+
{figure_list}
1178+
1179+
**CRITICAL**: The figures above are already generated. Use \\includegraphics to include them.
1180+
- AI concept figures (marked as "AI concept") must NOT be recreated as TikZ or matplotlib.
1181+
- Statistical plots (marked as "matplotlib") are already generated from experiment data.
1182+
- Use the EXACT filenames listed above in your \\includegraphics commands.
1183+
- For multi-column templates, use \\begin{{figure*}} for wide concept figures, \\begin{{figure}} for single plots.
1184+
11721185
## MANDATORY — every item below is required, NO exceptions:
11731186
11741187
### 1. All sections must be fully written (zero placeholders)
@@ -1180,51 +1193,35 @@ def _run_dev_phase(self):
11801193
- Analysis/Discussion: explain WHY results are good/bad, failure cases
11811194
- Conclusion: 1 paragraph summary + 1 paragraph future work
11821195
1183-
### 2. Figures are REQUIRED (paper will fail without them)
1184-
- Minimum 2 figures in the body:
1185-
a) System/architecture overview (TikZ diagram OR simple block diagram in LaTeX)
1186-
b) Main results figure (bar chart or line plot from actual results data)
1187-
- Each figure needs: \\caption{{...}} and \\label{{fig:...}}
1188-
- Generate result figures using Python: save to {figures_dir}/ then \\includegraphics
1189-
1190-
### 3. Data integrity
1196+
### 2. Data integrity
11911197
- Every performance claim must use actual numbers from findings
11921198
- Include at least one \\begin{{table}} comparing against baselines
11931199
- No vague statements like "our method is better" — use exact percentages
11941200
1195-
### 4. Page target: {venue_pages} pages of body text
1196-
- Every section must be substantively written
1197-
- Related Work and Experiments sections should each be 1.5-2 pages
1198-
- Do NOT leave any section with only 1-2 sentences
1201+
### 3. Page target: {venue_pages} pages of body text
1202+
- The last page must be at least 90% filled
1203+
- Ensure `\\clearpage` before `\\bibliography{{...}}`
11991204
1200-
### 5. LaTeX mechanics
1205+
### 4. LaTeX mechanics
12011206
- Edit {latex_dir}/main.tex directly
12021207
- Verify compilation: cd {latex_dir} && pdflatex -interaction=nonstopmode main.tex
1203-
- All \\ref and \\cite must resolve (no undefined references)
1204-
- If figures don't exist yet, create simple placeholder TikZ diagrams
1208+
- All \\ref and \\cite must resolve
12051209
12061210
Produce the complete paper. Do not stop until all sections are written and it compiles.
12071211
"""
12081212

12091213
self.run_agent("writer", prompt, timeout=3600)
12101214

1211-
# Wait for background AI figure generation to complete (if running)
1212-
if nano_banana_future is not None:
1213-
self.log_step("Waiting for AI concept figures to complete...", "progress")
1214-
try:
1215-
nano_banana_future.result(timeout=1200) # 20 min max
1216-
self.log_step("AI concept figures ready", "success")
1217-
except Exception as e:
1218-
self.log(f"AI concept figure generation failed: {e}", "WARN")
1219-
finally:
1220-
self._nano_banana_executor.shutdown(wait=False)
1221-
1222-
# Compile initial draft (must succeed before moving to review)
1215+
# Step F: Inject \clearpage before \bibliography + enforce page count
1216+
self._ensure_clearpage_before_bibliography()
12231217
self.log_step("Compiling initial draft...", "progress")
12241218
draft_compiled = self._compile_until_success(
12251219
context=f"Dev Phase complete ({dev_state['iteration']} iterations)"
12261220
)
12271221

1222+
if draft_compiled:
1223+
self._enforce_page_count(context="dev-phase-delivery")
1224+
12281225
if draft_compiled and self.telegram.is_configured:
12291226
pdf_path = self.latex_dir / "main.pdf"
12301227
if pdf_path.exists():
@@ -1551,6 +1548,20 @@ def _create_plotting_script_if_needed(self):
15511548
else:
15521549
self.log(f"Coder agent did not create {script_rel}", "WARN")
15531550

1551+
def _list_available_figures(self) -> str:
1552+
"""List all figures in paper/figures/ with their type (AI concept vs matplotlib)."""
1553+
if not self.figures_dir.exists():
1554+
return "No figures generated yet."
1555+
lines = []
1556+
for f in sorted(self.figures_dir.iterdir()):
1557+
if f.suffix not in (".png", ".pdf"):
1558+
continue
1559+
size_kb = f.stat().st_size // 1024
1560+
# AI concept figures are typically >150KB (PaperBanana/Gemini output)
1561+
fig_type = "AI concept diagram — DO NOT recreate" if size_kb > 150 else "matplotlib statistical plot"
1562+
lines.append(f"- {f.name} ({size_kb}KB, {fig_type})")
1563+
return "\n".join(lines) if lines else "No figures generated yet."
1564+
15541565
def _send_dev_phase_telegram(self, event: str, current: int, total: int):
15551566
"""Send dev phase notifications to Telegram."""
15561567
if not self.telegram.is_configured:

0 commit comments

Comments
 (0)