@@ -88,6 +88,97 @@ def _extract_latex_errors(self, log_path: Path) -> str:
8888
8989 return f"Found { len (blocks )} error(s) in main.log:\n \n " + "\n ---\n " .join (blocks )
9090
91+ def _compile_until_success (self , context : str = "" ) -> bool :
92+ """Keep fixing and recompiling until LaTeX compiles successfully.
93+
94+ Strategy escalation:
95+ 1. Attempts 1-3: Writer fixes errors normally
96+ 2. Attempt 4: Programmatic fix (strip non-UTF8 bytes, fix common issues)
97+ 3. Attempts 5-7: Writer with aggressive "comment out broken parts" prompt
98+ 4. Attempt 8+: Writer rewrites broken sections from scratch
99+
100+ Returns True when compiled, False only if 10 attempts all fail
101+ (should be extremely rare — our own agents generated this).
102+ """
103+ MAX_ATTEMPTS = 10
104+ last_errors = ""
105+
106+ for attempt in range (1 , MAX_ATTEMPTS + 1 ):
107+ success , errors = self .compile_latex_with_errors ()
108+ if success :
109+ if attempt > 1 :
110+ self .log_step (f"Compilation fixed on attempt { attempt } " , "success" )
111+ else :
112+ self .log_step ("Initial draft compiled successfully" , "success" )
113+ return True
114+
115+ self .log_step (f"Compile attempt { attempt } failed" , "warning" )
116+
117+ # Strategy 1 (attempts 1-3): normal writer fix
118+ if attempt <= 3 :
119+ self .run_agent ("writer" ,
120+ f"LaTeX compilation failed. Read paper/main.tex carefully, find and fix "
121+ f"the syntax errors below. Do NOT remove content — fix the LaTeX syntax.\n \n { errors } " )
122+
123+ # Strategy 2 (attempt 4): programmatic fix for common issues
124+ elif attempt == 4 :
125+ self .log_step ("Trying programmatic fixes..." , "progress" )
126+ self ._auto_fix_latex ()
127+ # Also let writer have another look after programmatic fix
128+ success , errors = self .compile_latex_with_errors ()
129+ if success :
130+ self .log_step ("Programmatic fix worked" , "success" )
131+ return True
132+ self .run_agent ("writer" ,
133+ f"After programmatic cleanup, LaTeX still fails. Fix these remaining errors:\n \n { errors } " )
134+
135+ # Strategy 3 (attempts 5-7): aggressive — comment out broken parts
136+ elif attempt <= 7 :
137+ same_error = errors [:200 ] == last_errors [:200 ]
138+ self .run_agent ("writer" ,
139+ f"LaTeX has failed { attempt } times{ ' with the same error' if same_error else '' } . "
140+ f"Take aggressive action: COMMENT OUT the broken section entirely and replace "
141+ f"with a brief placeholder like '% TODO: fix this section'. "
142+ f"The paper MUST compile.\n \n { errors } " )
143+
144+ # Strategy 4 (attempt 8+): nuclear — rewrite from scratch
145+ else :
146+ self .run_agent ("writer" ,
147+ f"LaTeX has failed { attempt } times. Read the ENTIRE main.tex file, identify ALL "
148+ f"syntax errors, and rewrite any broken sections from scratch. Remove any "
149+ f"non-standard packages or commands that might cause issues. "
150+ f"Priority: the paper MUST compile, even if some content is lost.\n \n { errors } " )
151+
152+ last_errors = errors
153+
154+ self .log_step (f"Compilation failed after { MAX_ATTEMPTS } attempts" , "error" )
155+ return False
156+
157+ def _auto_fix_latex (self ):
158+ """Programmatic fixes for common LaTeX compilation issues."""
159+ main_tex = self .latex_dir / "main.tex"
160+ if not main_tex .exists ():
161+ return
162+
163+ # Fix 1: Strip non-UTF8 bytes
164+ raw = main_tex .read_bytes ()
165+ try :
166+ raw .decode ("utf-8" )
167+ except UnicodeDecodeError :
168+ cleaned = raw .decode ("utf-8" , errors = "ignore" ).encode ("utf-8" )
169+ main_tex .write_bytes (cleaned )
170+ self .log ("Auto-fix: stripped non-UTF8 bytes from main.tex" , "INFO" )
171+
172+ # Fix 2: Same for .bib files
173+ for bib in self .latex_dir .glob ("*.bib" ):
174+ raw = bib .read_bytes ()
175+ try :
176+ raw .decode ("utf-8" )
177+ except UnicodeDecodeError :
178+ cleaned = raw .decode ("utf-8" , errors = "ignore" ).encode ("utf-8" )
179+ bib .write_bytes (cleaned )
180+ self .log (f"Auto-fix: stripped non-UTF8 bytes from { bib .name } " , "INFO" )
181+
91182 def compile_latex (self ) -> bool :
92183 """Compile the LaTeX paper.
93184
@@ -107,13 +198,14 @@ def compile_latex(self) -> bool:
107198 result = subprocess .run (
108199 cmd ,
109200 capture_output = True ,
110- text = True ,
111201 timeout = 120 ,
112202 cwd = self .latex_dir ,
113203 )
204+ stderr = result .stderr .decode ("utf-8" , errors = "replace" )
205+ stdout = result .stdout .decode ("utf-8" , errors = "replace" )
114206 if result .returncode != 0 and "main.tex" in cmd :
115- self ._last_compile_stderr = result . stderr [:1000 ] or result . stdout [- 1000 :]
116- self .log (f"LaTeX compilation warning: { result . stderr [:500 ]} " )
207+ self ._last_compile_stderr = stderr [:1000 ] or stdout [- 1000 :]
208+ self .log (f"LaTeX compilation warning: { stderr [:500 ]} " )
117209
118210 pdf_path = self .latex_dir / "main.pdf"
119211 if pdf_path .exists () and pdf_path .stat ().st_size > 0 :
@@ -284,175 +376,73 @@ def _generate_figure_config(self) -> dict:
284376 return geo
285377
286378 def _run_figure_phase (self ):
287- """Independent Figure Phase: ensure figures are template-aware and visually correct.
288-
289- Runs BEFORE the reviewer sees the paper. Loop:
290- 1. Generate figure_config.json
291- 2. Run figure generation script (if exists)
292- 3. Compile LaTeX + convert to images
293- 4. Run figure_fixer agent to visually inspect
294- 5. If issues found and fixed, re-run (max 2 loops)
379+ """Figure Phase: generate figures and ensure template-aware sizing.
380+
381+ 1. Generate figure_config.json (geometry)
382+ 2. Load manifest, backup protected (AI-generated) figures
383+ 3. Run matplotlib figure script (if exists) + overlap detection
384+ 4. Restore any overwritten protected figures
385+ 5. Generate AI concept figures (Nano Banana, if enabled)
386+ 6. Compile LaTeX
387+
388+ Figure *quality* issues are handled by the reviewer, not here.
295389 """
296- MAX_FIGURE_LOOPS = 2
390+ from ark .figure_manifest import (
391+ load_manifest , save_manifest , register_figure ,
392+ backup_protected , restore_protected ,
393+ )
297394
298395 # Step 1: Generate geometry config
299396 geo = self ._generate_figure_config ()
300397
301- # Step 2: Run figure generation script
398+ # Step 2: Load manifest (auto-migrates if missing)
399+ manifest = load_manifest (self .figures_dir )
400+
401+ # Step 3: Run figure generation script
302402 script_path = self .config .get ("create_figures_script" , "scripts/create_paper_figures.py" )
303403 full_script = self .code_dir / script_path
304- overlap_report = None
305404 if full_script .exists ():
306405 self .log_step ("Running figure generation script..." , "progress" )
406+
407+ # Backup protected files before running matplotlib script
408+ backups = backup_protected (self .figures_dir , manifest )
409+
307410 self .generate_figures ()
308411
309- # Step 2.1: Programmatic overlap detection and auto-fix
412+ # Restore any AI-generated files overwritten by the script
413+ restore_protected (self .figures_dir , backups , log_fn = self .log )
414+
415+ # Register matplotlib outputs in manifest
416+ for fig_file in self .figures_dir .glob ("fig*" ):
417+ if fig_file .suffix in (".pdf" , ".png" , ".jpg" ):
418+ if fig_file .name not in manifest .get ("figures" , {}):
419+ register_figure (manifest , fig_file .name , "matplotlib" )
420+ elif manifest ["figures" ][fig_file .name ].get ("source" ) == "matplotlib" :
421+ pass # Already registered
422+ save_manifest (self .figures_dir , manifest )
423+
424+ # Step 3.1: Programmatic overlap detection and auto-fix
310425 try :
311426 from ark .figure_overlap import check_and_fix_figures
312427 overlap_report = check_and_fix_figures (
313428 full_script , self .figures_dir , geo , log_fn = self .log ,
314429 )
315430 if overlap_report .get ("summary" , {}).get ("with_overlaps" , 0 ) > 0 :
316- # Re-run figure generation after fixes were applied
317431 self .log_step ("Regenerating figures after overlap fixes..." , "progress" )
432+ backups = backup_protected (self .figures_dir , manifest )
318433 self .generate_figures ()
434+ restore_protected (self .figures_dir , backups , log_fn = self .log )
319435 except Exception as e :
320436 self .log (f"Overlap detection error (non-fatal): { e } " , "WARN" )
321437 else :
322438 self .log_step (f"No figure script at { script_path } , skipping generation" , "info" )
323439
324- # Step 2.5: Generate AI concept figures (Nano Banana)
325- # Track which files are AI-generated so the figure fixer won't touch them
326- ai_generated_files = set ()
440+ # Step 4: Generate AI concept figures (Nano Banana)
327441 if self .config .get ("figure_generation" ) == "nano_banana" :
328442 self .log_step ("Generating AI concept figures (Nano Banana)..." , "progress" )
329- # Snapshot existing files before generation
330- existing_before = {f .name for f in self .figures_dir .glob ("*" )}
331443 self ._generate_nano_banana_figures ()
332- # Any new files are AI-generated concept figures
333- existing_after = {f .name for f in self .figures_dir .glob ("*" )}
334- ai_generated_files = existing_after - existing_before
335- # Also include files that were already generated in earlier phases
336- for f in self .figures_dir .glob ("*.png" ):
337- if f .name .startswith ("fig_" ) and f .name not in ai_generated_files :
338- # Check if this was generated by PaperBanana (large, not from matplotlib)
339- # PaperBanana PNGs are typically >100KB; matplotlib PNGs are smaller
340- if f .stat ().st_size > 150_000 :
341- ai_generated_files .add (f .name )
342-
343- for loop in range (MAX_FIGURE_LOOPS ):
344- # Step 3: Compile and convert to images
345- self .compile_latex ()
346- page_images = self .pdf_to_images ()
347-
348- if not page_images :
349- self .log_step ("No page images available, skipping figure check" , "warning" )
350- break
351-
352- # Step 4: Run figure_fixer agent
353- images_list = "\n " .join (f"- { img } " for img in page_images )
354- figure_files = list (self .figures_dir .glob ("*" ))
355- figures_list = "\n " .join (f"- { f .name } " for f in figure_files if f .suffix in (".pdf" , ".png" , ".jpg" ))
356-
357- # Include overlap report if available
358- overlap_section = ""
359- overlap_report_path = self .figures_dir / "overlap_report.json"
360- if overlap_report_path .exists ():
361- try :
362- or_data = json .loads (overlap_report_path .read_text ())
363- figs_with_issues = [f for f in or_data .get ("figures" , []) if f .get ("has_overlaps" )]
364- if figs_with_issues :
365- overlap_lines = []
366- for f in figs_with_issues :
367- overlap_lines .append (f"- **{ f ['name' ]} **: { f ['overlap_count' ]} overlaps, density={ f ['density' ]} " )
368- for o in f .get ("overlaps" , [])[:5 ]:
369- overlap_lines .append (f" - { o ['type1' ]} ({ o ['text1' ]} ) ↔ { o ['type2' ]} ({ o ['text2' ]} ), severity={ o ['severity' ]} " )
370- if f .get ("suggestions" ):
371- overlap_lines .append (f" - Suggestions: { ', ' .join (f ['suggestions' ])} " )
372- overlap_section = f"""
373- ### Programmatic Overlap Report (auto-detected)
374- The system detected text overlaps in these figures and attempted auto-fixes.
375- Verify the fixes are correct. If issues remain, fix them manually.
376-
377- { chr (10 ).join (overlap_lines )}
378- """
379- except Exception :
380- pass
381-
382- # Build protected files section
383- protected_section = ""
384- if ai_generated_files :
385- protected_list = "\n " .join (f"- { f } " for f in sorted (ai_generated_files ))
386- protected_section = f"""
387- ### ⚠️ PROTECTED AI-Generated Concept Figures (DO NOT MODIFY)
388- The following figures were generated by PaperBanana/Gemini AI and must NOT be
389- overwritten, regenerated, or replaced by matplotlib. Do NOT modify any Python
390- script to output to these filenames. Only check their LaTeX placement/sizing.
391- { protected_list }
392- """
393-
394- fixer_prompt = f"""## Figure Quality Check (Loop { loop + 1 } /{ MAX_FIGURE_LOOPS } )
395-
396- ### Template Geometry Parameters
397- - Column width: { geo ['columnwidth_in' ]} inches
398- - Full width: { geo ['textwidth_in' ]} inches
399- - Base font size: { geo ['font_size_pt' ]} pt
400- - Config file: { self .figures_dir } /figure_config.json
401-
402- ### Current Figure Files
403- { figures_list }
404- { protected_section } { overlap_section }
405- ### PDF Page Images (use Read tool to view each page)
406- { images_list }
407-
408- ### Check Requirements
409- 1. Use the Read tool to read each page PNG image and carefully check:
410- - Is the text in figures clearly readable (equivalent >= 8pt)?
411- - Do figures overflow the column width boundaries?
412- - Are there any overlapping labels? (Check the overlap report above for known issues)
413- - Do tables overflow their boundaries?
414- - Does the overall visual quality meet academic publication standards?
415- 2. If issues are found:
416- - Locate the corresponding Python plotting script or LaTeX table code
417- - Modify figsize to column width { geo ['columnwidth_in' ]} in or full width { geo ['textwidth_in' ]} in
418- - Modify font.size to { geo ['font_size_pt' ]} pt
419- - For overlapping x-labels: use `rotation=45, ha='right'` or switch to horizontal bars
420- - For crowded plots: increase figsize height or use `constrained_layout=True`
421- - Read { self .figures_dir } /figure_config.json for full configuration
422- - Re-run the script to regenerate figures
423- 3. If no issues or already fixed, output the verdict
424-
425- ### Output Format (last line must be one of the following)
426- FIGURES_OK
427- FIGURES_NEED_FIX"""
428-
429- self .log_step (f"Figure quality check (loop { loop + 1 } )..." , "progress" )
430- result = self .run_agent ("visualizer" , fixer_prompt , timeout = 1200 )
431-
432- if "FIGURES_OK" in (result or "" ):
433- self .log_step ("Figure quality check passed" , "success" )
434- break
435- elif "FIGURES_NEED_FIX" in (result or "" ):
436- self .log_step ("Figure fixer made changes, will re-check..." , "progress" )
437- if full_script .exists ():
438- # Back up AI-generated concept figures before re-running script
439- backups = {}
440- for fname in ai_generated_files :
441- fpath = self .figures_dir / fname
442- if fpath .exists ():
443- backups [fname ] = fpath .read_bytes ()
444- self .generate_figures ()
445- # Restore any AI-generated files that were overwritten
446- for fname , data in backups .items ():
447- fpath = self .figures_dir / fname
448- if not fpath .exists () or fpath .read_bytes () != data :
449- fpath .write_bytes (data )
450- self .log (f"Restored AI-generated figure: { fname } " , "INFO" )
451- else :
452- self .log_step ("Figure fixer verdict unclear, continuing..." , "warning" )
453- break
454444
455- # Final compile after figure phase
445+ # Step 5: Compile LaTeX
456446 self .compile_latex ()
457447
458448 def _should_skip_figure_phase (self ) -> bool :
@@ -640,6 +630,7 @@ def _generate_nano_banana_figures(self):
640630 self .log (f" Generating: { name } (placement={ placement } , { fig_width :.1f} in, ratio={ aspect_ratio } )..." , "INFO" )
641631
642632 # Try PaperBanana pipeline first (best quality)
633+ source = "paperbanana"
643634 ok = self ._try_paperbanana (
644635 name = name ,
645636 caption = caption ,
@@ -651,6 +642,7 @@ def _generate_nano_banana_figures(self):
651642
652643 # Fallback to our Nano Banana pipeline
653644 if not ok :
645+ source = "nano_banana"
654646 self .log (f" PaperBanana unavailable, falling back to Nano Banana..." , "INFO" )
655647 from ark .nano_banana import generate_figure_pipeline
656648 ok = generate_figure_pipeline (
@@ -668,6 +660,11 @@ def _generate_nano_banana_figures(self):
668660 if ok :
669661 generated += 1
670662 self .log (f" Generated: { output_path .name } " , "INFO" )
663+ # Register in manifest
664+ from ark .figure_manifest import load_manifest , save_manifest , register_figure
665+ manifest = load_manifest (self .figures_dir )
666+ register_figure (manifest , output_path .name , source )
667+ save_manifest (self .figures_dir , manifest )
671668 else :
672669 self .log (f" Failed: { name } " , "WARN" )
673670
0 commit comments