Skip to content

Commit 4655736

Browse files
JihaoXinclaude
andcommitted
Enforce page count at 3 checkpoints: too short → expand, too long → compress
For a venue with N pages, body text must be: - Not over N pages (compress if exceeded) - Last page at least 90% filled, i.e. >= N-0.1 pages (expand if too short) Three checkpoints: 1. Dev phase: after initial draft writing 2. Execute: after each writing phase 3. Post-validate: after figure phase (figures may change layout) Also: - Programmatic \clearpage injection before \bibliography (ensures references always start on a new page, no agent needed) - Unified _enforce_page_count() method replaces old compression-only check Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 918d895 commit 4655736

File tree

2 files changed

+117
-31
lines changed

2 files changed

+117
-31
lines changed

ark/execution.py

Lines changed: 112 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,116 @@ def _parse_overfull_pt(warning: str) -> float:
602602
m = re.search(r'([\d.]+)pt', warning)
603603
return float(m.group(1)) if m else 0.0
604604

605+
def _enforce_page_count(self, context: str = "post-writing") -> bool:
606+
"""Check and enforce page count: not over limit, last page >= 90% filled.
607+
608+
For a 6-page venue:
609+
- Over 6.1 pages → compression pass
610+
- Under 5.9 pages (last page < 90%) → expansion pass
611+
- Also ensures \\clearpage before \\bibliography
612+
613+
Args:
614+
context: Where this check is called from (for logging)
615+
616+
Returns:
617+
True if page count is acceptable, False if couldn't be fixed.
618+
"""
619+
venue_pages = self.config.get("venue_pages")
620+
if not venue_pages or self._quota_exhausted:
621+
return True
622+
623+
self.compile_latex()
624+
page_count = getattr(self, '_body_page_count', 0)
625+
if not page_count:
626+
return True
627+
628+
# First: ensure \clearpage before \bibliography (programmatic, no agent needed)
629+
self._ensure_clearpage_before_bibliography()
630+
631+
min_pages = venue_pages - 1 + 0.9 # e.g., 6 pages → 5.9 minimum
632+
max_pages = venue_pages + 0.1 # e.g., 6 pages → 6.1 maximum
633+
latex_dir = self.config.get("latex_dir", "paper")
634+
635+
self.log(f"[{context}] Page check: {page_count:.1f} body pages (target: {min_pages:.1f}{max_pages:.1f})", "INFO")
636+
637+
# Case 1: Over limit → compress
638+
if page_count > max_pages:
639+
self.log(f"[{context}] Over limit ({page_count:.1f} > {max_pages:.1f}), running compression...", "WARN")
640+
self.run_agent("writer", f"""## PAGE COMPRESSION — venue limit is {venue_pages} body pages
641+
642+
The paper body is currently {page_count:.1f} pages, exceeding the {venue_pages}-page limit.
643+
644+
Reduce to exactly {venue_pages} pages or just under. Strategies:
645+
- Condense verbose paragraphs (aim for ~15% shorter text)
646+
- Merge overlapping sentences in related work
647+
- Move less essential subsections to \\appendix
648+
- Reduce whitespace around figures/tables (use \\vspace{{-Xpt}})
649+
- Do NOT remove key technical content or results
650+
651+
After changes:
652+
1. Ensure `\\clearpage` before `\\bibliography` so references start on a new page
653+
2. Compile: cd {latex_dir} && pdflatex -interaction=nonstopmode main.tex && pdflatex -interaction=nonstopmode main.tex
654+
3. Body pages must be ≤ {venue_pages}
655+
""", timeout=1800)
656+
self.compile_latex()
657+
page_count = getattr(self, '_body_page_count', 0)
658+
if page_count and page_count > max_pages:
659+
self.log(f"[{context}] Still over limit after compression: {page_count:.1f}", "ERROR")
660+
return False
661+
662+
# Case 2: Under target → expand
663+
elif page_count < min_pages:
664+
self.log(f"[{context}] Under target ({page_count:.1f} < {min_pages:.1f}), running expansion...", "WARN")
665+
self.run_agent("writer", f"""## PAGE EXPANSION — paper is too short
666+
667+
The paper body is currently {page_count:.1f} pages. For a {venue_pages}-page venue, the last page (page {venue_pages}) must be at least 90% filled. Target: {min_pages:.1f}{venue_pages:.0f} body pages.
668+
669+
Expand the paper by adding substantive content (NOT filler):
670+
- Deepen the analysis/discussion section with more insights
671+
- Add more related work comparisons and positioning
672+
- Expand experimental methodology details (hyperparameters, setup)
673+
- Add a limitations paragraph or future work discussion
674+
- Expand figure captions with more context
675+
- Do NOT add padding text, redundant restatements, or unnecessary whitespace
676+
677+
After changes:
678+
1. Ensure `\\clearpage` before `\\bibliography` so references start on a new page
679+
2. Compile: cd {latex_dir} && pdflatex -interaction=nonstopmode main.tex && pdflatex -interaction=nonstopmode main.tex
680+
3. Body pages should be between {min_pages:.1f} and {venue_pages:.0f}
681+
""", timeout=1800)
682+
self.compile_latex()
683+
page_count = getattr(self, '_body_page_count', 0)
684+
if page_count and page_count < min_pages:
685+
self.log(f"[{context}] Still too short after expansion: {page_count:.1f}", "WARN")
686+
687+
if page_count:
688+
self.log(f"[{context}] Final page count: {page_count:.1f}/{venue_pages} body pages", "INFO")
689+
return True
690+
691+
def _ensure_clearpage_before_bibliography(self):
692+
"""Programmatically ensure \\clearpage appears before \\bibliography in main.tex."""
693+
main_tex = self.latex_dir / "main.tex"
694+
if not main_tex.exists():
695+
return
696+
try:
697+
content = main_tex.read_text()
698+
# Check if \clearpage already before \bibliography
699+
if r'\clearpage' not in content or \
700+
content.index(r'\bibliography') < content.rindex(r'\clearpage') if r'\clearpage' in content else True:
701+
# Need to check more carefully
702+
pass
703+
704+
# Find \bibliography and ensure \clearpage is right before it
705+
import re as _re
706+
pattern = r'(?<!\\clearpage\n)(\\bibliography\{)'
707+
if _re.search(pattern, content):
708+
new_content = _re.sub(pattern, r'\\clearpage\n\1', content)
709+
if new_content != content:
710+
main_tex.write_text(new_content)
711+
self.log("Injected \\clearpage before \\bibliography", "INFO")
712+
except Exception:
713+
pass # Non-critical, agent can handle it
714+
605715
def _run_writing_phase(self, action_plan: dict, prior_context: str = ""):
606716
"""Execute writing phase for all writing tasks."""
607717
issues = action_plan.get("issues", [])
@@ -915,37 +1025,8 @@ def _run_writing_phase(self, action_plan: dict, prior_context: str = ""):
9151025
self._save_action_plan(action_plan)
9161026
self.log_step("Writing phase completed", "success")
9171027

918-
# Post-writing page count check: if still over limit, one compression pass
919-
venue_pages = self.config.get("venue_pages")
920-
if venue_pages and not self._quota_exhausted:
921-
self.compile_latex()
922-
page_count = getattr(self, '_body_page_count', 0)
923-
if page_count and page_count > venue_pages + 0.10:
924-
self.log(f"Post-writing compression needed: {page_count:.1f} body pages (limit {venue_pages})", "WARN")
925-
self.run_agent("writer", f"""
926-
## PAGE COMPRESSION — venue limit is {venue_pages} body pages
927-
928-
The paper body currently exceeds the limit. Reduce it to {venue_pages} pages or fewer.
929-
Move less essential subsections to \\appendix, condense verbose text, merge overlapping sections.
930-
Do NOT add new content — only compress.
931-
932-
After each round of changes:
933-
1. Ensure `\\clearpage` appears immediately before `\\bibliography{{...}}` so References starts on a fresh page
934-
2. Compile with `pdflatex -interaction=nonstopmode main.tex` (run twice)
935-
3. Count body pages (before References section)
936-
4. If still over {venue_pages}, compress more
937-
5. Stop when body pages <= {venue_pages}
938-
""", timeout=1800)
939-
940-
if not self._quota_exhausted:
941-
self.compile_latex()
942-
final_count = getattr(self, '_body_page_count', 0)
943-
if final_count and final_count > venue_pages + 0.10:
944-
self.log(f"WARNING: Paper still at {final_count:.1f} body pages after compression (limit {venue_pages})", "ERROR")
945-
elif final_count:
946-
self.log(f"Page count OK: {final_count:.1f}/{venue_pages} body pages", "INFO")
947-
elif page_count:
948-
self.log(f"Page count OK: {page_count:.1f}/{venue_pages} body pages", "INFO")
1028+
# Post-writing page count check: compress if over, expand if too short
1029+
self._enforce_page_count(context="post-writing")
9491030

9501031
return True
9511032
else:

ark/pipeline.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,9 @@ def run_paper_iteration(self) -> bool:
406406
self.log_step_header(step_num, total_steps, "Validate", "end")
407407
self.save_step_checkpoint(step_num, "Validate")
408408

409+
# Final page count check after figure phase (figures may change layout)
410+
self._enforce_page_count(context="post-validate-final")
411+
409412
self.save_paper_state(paper_state)
410413
self._last_score = score
411414

@@ -1183,6 +1186,8 @@ def _run_dev_phase(self):
11831186
self.log_step("Compiling initial draft...", "progress")
11841187
if self.compile_latex():
11851188
self.log_step("Initial draft compiled successfully", "success")
1189+
# Page count check after initial writing
1190+
self._enforce_page_count(context="dev-phase-initial-draft")
11861191
# Send initial draft PDF via Telegram
11871192
if self.telegram.is_configured:
11881193
pdf_path = self.latex_dir / "main.pdf"

0 commit comments

Comments
 (0)