@@ -277,14 +277,76 @@ def _count_pdf_pages(self, pdf_path: Path) -> int:
277277 return 0
278278
279279 def _count_body_pages (self , pdf_path : Path ) -> float :
280- """Count body pages (before References section) using PyMuPDF .
280+ """Count body pages (before References section).
281281
282- For dual-column templates, measures the RIGHT column fill on the last
283- body page, which is what matters for page limit compliance.
282+ Primary method: read \\ arkBodyEndY / \\ arkPageH from the .aux file
283+ (injected by _ensure_clearpage_before_bibliography via \\ pdfsavepos).
284+ This gives the exact y-position where body text ends, immune to
285+ page-number / header / footer interference.
286+
287+ Fallback: PyMuPDF text-block analysis (less accurate).
284288
285289 Returns a float: e.g. 5.8 means 5 full pages + last page 80% filled.
286- For dual-column: last page fill = right column fill ratio (not left).
287290 """
291+ # ── Try aux-based measurement first ──
292+ result = self ._count_body_pages_from_aux (pdf_path )
293+ if result is not None :
294+ return result
295+
296+ # ── Fallback: PyMuPDF text-block analysis ──
297+ return self ._count_body_pages_from_pdf (pdf_path )
298+
299+ def _count_body_pages_from_aux (self , pdf_path : Path ) -> float | None :
300+ """Read body-end position from .aux file (written by \\ pdfsavepos).
301+
302+ Returns body page count as a float, or None if aux data unavailable.
303+ """
304+ aux_path = self .latex_dir / "main.aux"
305+ if not aux_path .exists ():
306+ return None
307+ try :
308+ aux_text = aux_path .read_text (errors = "replace" )
309+ import re
310+ m_y = re .search (r'\\gdef\\arkBodyEndY\{(\d+)\}' , aux_text )
311+ m_h = re .search (r'\\gdef\\arkPageH\{(\d+)\}' , aux_text )
312+ if not m_y or not m_h :
313+ return None
314+
315+ body_end_y_sp = int (m_y .group (1 )) # sp from page bottom
316+ page_height_sp = int (m_h .group (1 )) # total page height in sp
317+ if page_height_sp <= 0 :
318+ return None
319+
320+ # fill_ratio = fraction of page used (from top)
321+ fill_ratio = 1.0 - (body_end_y_sp / page_height_sp )
322+
323+ # Determine which page the body ends on by finding References page
324+ import fitz
325+ doc = fitz .open (str (pdf_path ))
326+ ref_page_idx = None
327+ for i in range (len (doc )):
328+ text = doc [i ].get_text ()
329+ if any (line .strip () == 'References' for line in text .split ('\n ' )):
330+ ref_page_idx = i
331+ break
332+ doc .close ()
333+
334+ if ref_page_idx is None :
335+ return None # can't determine without References marker
336+
337+ # Body ends on the page before References (since we inject
338+ # the marker right before \clearpage\bibliography)
339+ last_body_idx = max (ref_page_idx - 1 , 0 )
340+ result = last_body_idx + fill_ratio
341+ self .log (f"Body page count (aux): { result :.2f} "
342+ f"(page { last_body_idx + 1 } , { fill_ratio :.1%} filled)" , "DEBUG" )
343+ return result
344+ except Exception as e :
345+ self .log (f"Aux-based page count failed: { e } " , "DEBUG" )
346+ return None
347+
348+ def _count_body_pages_from_pdf (self , pdf_path : Path ) -> float :
349+ """Fallback: count body pages via PyMuPDF text-block analysis."""
288350 try :
289351 import fitz
290352 doc = fitz .open (str (pdf_path ))
@@ -307,7 +369,6 @@ def _count_body_pages(self, pdf_path: Path) -> float:
307369 # (if References has its own page via \clearpage)
308370 # OR the same page (if References starts mid-page)
309371 ref_page = doc [ref_page_idx ]
310- ref_page_text = ref_page .get_text ()
311372
312373 # Check if References is at the very top of its page (i.e., \clearpage was used)
313374 ref_y = 0
@@ -354,8 +415,6 @@ def _count_body_pages(self, pdf_path: Path) -> float:
354415 fill_ratio = right_last_y / page_height
355416 elif ref_at_top :
356417 # Single column, References on separate page: check last body page fill
357- # Filter out headers/footers: ignore blocks in top 8% and bottom 5% of page,
358- # and blocks shorter than 10 chars (page numbers, headers)
359418 body_blocks = [
360419 b for b in blocks
361420 if b [3 ] > page_height * 0.06 # below header
0 commit comments