diff --git a/src/zotero_arxiv_daily/construct_email.py b/src/zotero_arxiv_daily/construct_email.py index 193f8ba71..60d3b604c 100644 --- a/src/zotero_arxiv_daily/construct_email.py +++ b/src/zotero_arxiv_daily/construct_email.py @@ -52,7 +52,7 @@ def get_empty_html(): """ return block_template -def get_block_html(title:str, authors:str, rate:str, tldr:str, pdf_url:str, affiliations:str=None): +def get_block_html(title:str, authors:str, rate:str, tldr:str, pdf_url:str, affiliations:str=None,published_date: str = None): block_template = """ @@ -67,6 +67,11 @@ def get_block_html(title:str, authors:str, rate:str, tldr:str, pdf_url:str, affi {affiliations} + + +
+ Date: {published_date} +
Relevance: {rate} @@ -85,7 +90,7 @@ def get_block_html(title:str, authors:str, rate:str, tldr:str, pdf_url:str, affi
""" - return block_template.format(title=title, authors=authors,rate=rate, tldr=tldr, pdf_url=pdf_url, affiliations=affiliations) + return block_template.format(title=title, authors=authors,rate=rate, tldr=tldr, pdf_url=pdf_url, affiliations=affiliations,published_date=published_date or "Unknown") def get_stars(score:float): full_star = '' @@ -125,7 +130,8 @@ def render_email(papers:list[Paper]) -> str: affiliations += ', ...' else: affiliations = 'Unknown Affiliation' - parts.append(get_block_html(p.title, authors, rate, p.tldr, p.pdf_url, affiliations)) + published_date = p.published_date or "Unknown" + parts.append(get_block_html(p.title, authors, rate, p.tldr, p.pdf_url, affiliations,published_date,)) content = '
' + '

'.join(parts) + '
' return framework.replace('__CONTENT__', content) diff --git a/src/zotero_arxiv_daily/protocol.py b/src/zotero_arxiv_daily/protocol.py index 143c03d37..91959e398 100644 --- a/src/zotero_arxiv_daily/protocol.py +++ b/src/zotero_arxiv_daily/protocol.py @@ -20,6 +20,7 @@ class Paper: tldr: Optional[str] = None affiliations: Optional[list[str]] = None score: Optional[float] = None + published_date: Optional[str] = None def _generate_tldr_with_llm(self, openai_client:OpenAI,llm_params:dict) -> str: lang = llm_params.get('language', 'English') @@ -108,4 +109,4 @@ class CorpusPaper: title: str abstract: str added_date: datetime - paths: list[str] \ No newline at end of file + paths: list[str] diff --git a/src/zotero_arxiv_daily/retriever/arxiv_retriever.py b/src/zotero_arxiv_daily/retriever/arxiv_retriever.py index b56d6f933..63679999f 100644 --- a/src/zotero_arxiv_daily/retriever/arxiv_retriever.py +++ b/src/zotero_arxiv_daily/retriever/arxiv_retriever.py @@ -146,6 +146,7 @@ def convert_to_paper(self, raw_paper: ArxivResult) -> Paper: authors = [a.name for a in raw_paper.authors] abstract = raw_paper.summary pdf_url = raw_paper.pdf_url + published_date = raw_paper.published.strftime("%Y-%m-%d") if raw_paper.published else None full_text = extract_text_from_tar(raw_paper) if full_text is None: full_text = extract_text_from_html(raw_paper) @@ -159,6 +160,7 @@ def convert_to_paper(self, raw_paper: ArxivResult) -> Paper: url=raw_paper.entry_id, pdf_url=pdf_url, full_text=full_text, + published_date=published_date, ) diff --git a/src/zotero_arxiv_daily/retriever/biorxiv_retriever.py b/src/zotero_arxiv_daily/retriever/biorxiv_retriever.py index 719158887..31deaa078 100644 --- a/src/zotero_arxiv_daily/retriever/biorxiv_retriever.py +++ b/src/zotero_arxiv_daily/retriever/biorxiv_retriever.py @@ -50,6 +50,7 @@ def convert_to_paper(self, raw_paper:dict[str, Any]) -> Paper | None: abstract = raw_paper['abstract'] pdf_url = f"https://www.{self.server}.org/content/{raw_paper['doi']}v{raw_paper['version']}.full.pdf" full_text = None # biorxiv forbids scraping its pdf + published_date = raw_paper.get("date") return Paper( source=self.name, title=title, @@ -57,5 +58,6 @@ def convert_to_paper(self, raw_paper:dict[str, Any]) -> Paper | None: abstract=abstract, url=pdf_url, pdf_url=pdf_url, - full_text=full_text - ) \ No newline at end of file + full_text=full_text, + published_date=published_date, + )