@@ -608,7 +608,15 @@ def _strip_bibtex_fields(bib: str) -> str:
608608 if brace_depth <= 0 :
609609 current_field_kept = None # field complete
610610
611- return "\n " .join (result )
611+ cleaned = "\n " .join (result )
612+
613+ # Remove volume if it equals year (e.g. TMLR where volume=2025, year=2025)
614+ vol_match = re .search (r"volume\s*=\s*\{(\d{4})\}" , cleaned )
615+ year_match = re .search (r"year\s*=\s*\{(\d{4})\}" , cleaned )
616+ if vol_match and year_match and vol_match .group (1 ) == year_match .group (1 ):
617+ cleaned = re .sub (r"\s*volume\s*=\s*\{\d{4}\},?\n?" , "\n " , cleaned )
618+
619+ return cleaned
612620
613621
614622
@@ -944,10 +952,11 @@ def fix_bib(bib_path: str, results: list[VerificationResult]) -> None:
944952 content = content .replace (r .original_bibtex , tagged )
945953
946954 elif r .status == "NEEDS-CHECK" :
947- # Add [NEEDS-CHECK] comment if not already there
948- if "% [NEEDS-CHECK]" not in content .split (r .original_bibtex )[0 ].split ("\n " )[- 1 ]:
949- tagged = f"% [NEEDS-CHECK] { r .details } \n { r .original_bibtex } "
950- content = content .replace (r .original_bibtex , tagged )
955+ # Add note field inside the entry so it shows in PDF
956+ if "note" not in r .original_bibtex .lower ():
957+ modified_entry = r .original_bibtex .rstrip ().rstrip ("}" )
958+ modified_entry += r' note = {\textcolor{red}{[NEEDS-CHECK: citation not verified]}},' + "\n }"
959+ content = content .replace (r .original_bibtex , modified_entry )
951960
952961 bib_file .write_text (content )
953962
@@ -986,6 +995,9 @@ def append_papers_to_bib(bib_path: str, papers: list[Paper]) -> list[str]:
986995 if cite_key in existing_keys :
987996 continue
988997
998+ # Store BibTeX on the paper object for literature.yaml
999+ paper .bibtex = bib
1000+
9891001 source = paper .dblp_key and "dblp" or (paper .doi and "crossref" or paper .source )
9901002 tagged = f"% { _ARK_SOURCE_TAG } { source } ]\n { bib } "
9911003 new_entries .append (tagged )
@@ -1034,7 +1046,7 @@ def _write_needs_check_to_bib(bib_path: str, titles: list[str],
10341046 counter += 1
10351047
10361048 # Build entry with whatever info we have
1037- fields = [f" title = {{{{ { title } } }}}" ]
1049+ fields = [f" title = {{{ title } }}" ]
10381050 if author :
10391051 fields .append (f" author = {{{ author } et al.}}" )
10401052 if year :
@@ -1058,6 +1070,85 @@ def _write_needs_check_to_bib(bib_path: str, titles: list[str],
10581070 return added_keys
10591071
10601072
1073+ def regenerate_bib_from_literature (literature_path : str , bib_path : str ) -> None :
1074+ """Regenerate references.bib entirely from literature.yaml.
1075+
1076+ This is the enforcement mechanism: literature.yaml is the single source of truth.
1077+ Any entries writer added to bib outside of our system are discarded.
1078+ NEEDS-CHECK entries get a note field for PDF visibility.
1079+ """
1080+ import yaml
1081+
1082+ lit_file = Path (literature_path )
1083+ if not lit_file .exists ():
1084+ return
1085+
1086+ try :
1087+ data = yaml .safe_load (lit_file .read_text ()) or {}
1088+ except Exception :
1089+ return
1090+
1091+ # Collect all BibTeX from literature.yaml references
1092+ entries = []
1093+ _updated = False
1094+
1095+ for ref in data .get ("references" , []):
1096+ if not isinstance (ref , dict ):
1097+ continue
1098+ bibtex = ref .get ("bibtex" )
1099+ key = ref .get ("bibtex_key" , "" )
1100+ source = ref .get ("source" , "" )
1101+ if not key :
1102+ continue
1103+
1104+ # If bibtex not stored, try to re-fetch it
1105+ if not bibtex :
1106+ title = ref .get ("title" , "" )
1107+ if title :
1108+ paper = _search_by_title (title )
1109+ if paper :
1110+ bibtex = fetch_bibtex (paper )
1111+ if bibtex :
1112+ ref ["bibtex" ] = bibtex # cache for next time
1113+ _updated = True
1114+
1115+ if bibtex :
1116+ tag = f"% [ARK:source={ source } ]" if source else "% [ARK:source=verified]"
1117+ entries .append (f"{ tag } \n { bibtex } " )
1118+
1119+ # Add NEEDS-CHECK entries
1120+ for nc in data .get ("needs_check" , []):
1121+ if not isinstance (nc , dict ):
1122+ continue
1123+ key = nc .get ("bibtex_key" , "" )
1124+ title = nc .get ("title" , "" )
1125+ author = nc .get ("authors" , "" )
1126+ year = nc .get ("year" , 0 )
1127+ if not key or not title :
1128+ continue
1129+
1130+ fields = [f" title = {{{ title } }}" ]
1131+ if author :
1132+ fields .append (f" author = {{{ author } et al.}}" )
1133+ if year :
1134+ fields .append (f" year = {{{ year } }}" )
1135+ fields .append (r" note = {\textcolor{red}{[NEEDS-CHECK: citation not verified]}}" )
1136+
1137+ entry = (
1138+ f"% [NEEDS-CHECK]\n "
1139+ f"@misc{{{ key } ,\n "
1140+ + ",\n " .join (fields ) + ",\n "
1141+ f"}}"
1142+ )
1143+ entries .append (entry )
1144+
1145+ # Write bib
1146+ bib_file = Path (bib_path )
1147+ bib_file .parent .mkdir (parents = True , exist_ok = True )
1148+ bib_file .write_text ("% ARK auto-managed references\n % Generated from literature.yaml — do not edit manually\n \n "
1149+ + "\n \n " .join (entries ) + "\n " )
1150+
1151+
10611152# ═══════════════════════════════════════════════════════════
10621153# Query extraction from planner issues
10631154# ═══════════════════════════════════════════════════════════
@@ -1213,6 +1304,8 @@ def update_literature_yaml(literature_path: str, papers: list[Paper],
12131304 }
12141305 if paper .abstract :
12151306 entry ["abstract" ] = paper .abstract [:500 ]
1307+ if paper .bibtex :
1308+ entry ["bibtex" ] = paper .bibtex
12161309 # Mark importance based on Deep Research context
12171310 if ctx :
12181311 entry ["context" ] = ctx
@@ -1226,7 +1319,9 @@ def update_literature_yaml(literature_path: str, papers: list[Paper],
12261319
12271320def _write_needs_check_to_literature (literature_path : str , titles : list [str ],
12281321 cite_keys : list [str ] = None ,
1229- contexts : list [str ] = None ) -> None :
1322+ contexts : list [str ] = None ,
1323+ authors : list [str ] = None ,
1324+ years : list = None ) -> None :
12301325 """Append [NEEDS-CHECK] entries to literature.yaml for papers not found in any API."""
12311326 import yaml
12321327 from datetime import datetime
@@ -1256,6 +1351,12 @@ def _write_needs_check_to_literature(literature_path: str, titles: list[str],
12561351 }
12571352 if cite_keys and i < len (cite_keys ):
12581353 entry ["bibtex_key" ] = cite_keys [i ]
1354+ author = (authors [i ] if authors and i < len (authors ) else "" ) or ""
1355+ year = (years [i ] if years and i < len (years ) else 0 ) or 0
1356+ if author :
1357+ entry ["authors" ] = author
1358+ if year :
1359+ entry ["year" ] = year
12591360 ctx = (contexts [i ] if contexts and i < len (contexts ) else "" ) or ""
12601361 if ctx :
12611362 entry ["context" ] = ctx
@@ -1365,7 +1466,7 @@ def bootstrap_citations(
13651466 contexts = found_contexts )
13661467 if needs_check_titles :
13671468 _write_needs_check_to_literature (literature_path , needs_check_titles , needs_check_keys ,
1368- needs_check_contexts )
1469+ needs_check_contexts , needs_check_authors , needs_check_years )
13691470
13701471 return BootstrapResult (
13711472 found = found_papers ,
@@ -1446,18 +1547,36 @@ def _extract_keywords_from_title(title: str) -> str:
14461547# Mark [NEEDS-CHECK] citations in tex files
14471548# ═══════════════════════════════════════════════════════════
14481549
1449- def mark_needs_check_in_tex (bib_path : str , tex_dir : str ) -> int :
1550+ def mark_needs_check_in_tex (bib_path : str , tex_dir : str ,
1551+ literature_path : str = None ) -> int :
14501552 """Scan .tex files and add [NEEDS-CHECK] marker after any \\ cite of a needs-check entry.
14511553
1452- This runs after writer finishes, so it doesn't depend on writer compliance.
1554+ Reads NEEDS-CHECK keys from literature.yaml (single source of truth).
1555+ Falls back to parsing bib comments if literature.yaml not available.
14531556 Returns number of citations marked.
14541557 """
1455- # Find needs-check keys from bib
1456- entries = parse_bib ( bib_path )
1558+ import yaml
1559+
14571560 needs_check_keys = set ()
1458- for entry in entries :
1459- if "[NEEDS-CHECK]" in entry .get ("preceding_comments" , "" ):
1460- needs_check_keys .add (entry ["key" ])
1561+
1562+ # Primary: read from literature.yaml
1563+ if literature_path :
1564+ lit_file = Path (literature_path )
1565+ if lit_file .exists ():
1566+ try :
1567+ lit_data = yaml .safe_load (lit_file .read_text ()) or {}
1568+ for nc in lit_data .get ("needs_check" , []):
1569+ if isinstance (nc , dict ) and nc .get ("bibtex_key" ):
1570+ needs_check_keys .add (nc ["bibtex_key" ])
1571+ except Exception :
1572+ pass
1573+
1574+ # Fallback: parse bib comments
1575+ if not needs_check_keys :
1576+ entries = parse_bib (bib_path )
1577+ for entry in entries :
1578+ if "[NEEDS-CHECK]" in entry .get ("preceding_comments" , "" ):
1579+ needs_check_keys .add (entry ["key" ])
14611580
14621581 if not needs_check_keys :
14631582 return 0
@@ -1481,6 +1600,14 @@ def _add_marker(m, _marker=marker):
14811600 marked += count
14821601
14831602 if new_content != content :
1603+ # Ensure xcolor package is loaded for \textcolor to work
1604+ if r"\textcolor" in new_content and "xcolor" not in new_content :
1605+ new_content = re .sub (
1606+ r"(\\documentclass[^\n]*\n)" ,
1607+ r"\1\\usepackage{xcolor}\n" ,
1608+ new_content ,
1609+ count = 1 ,
1610+ )
14841611 tex_file .write_text (new_content )
14851612
14861613 return marked
0 commit comments