Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions src/zotero_arxiv_daily/construct_email.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def get_empty_html():
"""
return block_template

def get_block_html(title:str, authors:str, rate:str, tldr:str, pdf_url:str, affiliations:str=None):
def get_block_html(title:str, authors:str, rate:str, tldr:str, pdf_url:str, affiliations:str=None,published_date: str = None):
block_template = """
<table border="0" cellpadding="0" cellspacing="0" width="100%" style="font-family: Arial, sans-serif; border: 1px solid #ddd; border-radius: 8px; padding: 16px; background-color: #f9f9f9;">
<tr>
Expand All @@ -67,6 +67,11 @@ def get_block_html(title:str, authors:str, rate:str, tldr:str, pdf_url:str, affi
<i>{affiliations}</i>
</td>
</tr>
<tr>
<td style="font-size: 14px; color: #333; padding: 8px 0;">
<strong>Date:</strong> {published_date}
</td>
</tr>
<tr>
<td style="font-size: 14px; color: #333; padding: 8px 0;">
<strong>Relevance:</strong> {rate}
Expand All @@ -85,7 +90,7 @@ def get_block_html(title:str, authors:str, rate:str, tldr:str, pdf_url:str, affi
</tr>
</table>
"""
return block_template.format(title=title, authors=authors,rate=rate, tldr=tldr, pdf_url=pdf_url, affiliations=affiliations)
return block_template.format(title=title, authors=authors,rate=rate, tldr=tldr, pdf_url=pdf_url, affiliations=affiliations,published_date=published_date or "Unknown")

def get_stars(score:float):
full_star = '<span class="full-star">⭐</span>'
Expand Down Expand Up @@ -125,7 +130,8 @@ def render_email(papers:list[Paper]) -> str:
affiliations += ', ...'
else:
affiliations = 'Unknown Affiliation'
parts.append(get_block_html(p.title, authors, rate, p.tldr, p.pdf_url, affiliations))
published_date = p.published_date or "Unknown"
parts.append(get_block_html(p.title, authors, rate, p.tldr, p.pdf_url, affiliations,published_date,))

content = '<br>' + '</br><br>'.join(parts) + '</br>'
return framework.replace('__CONTENT__', content)
3 changes: 2 additions & 1 deletion src/zotero_arxiv_daily/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class Paper:
tldr: Optional[str] = None
affiliations: Optional[list[str]] = None
score: Optional[float] = None
published_date: Optional[str] = None

def _generate_tldr_with_llm(self, openai_client:OpenAI,llm_params:dict) -> str:
lang = llm_params.get('language', 'English')
Expand Down Expand Up @@ -108,4 +109,4 @@ class CorpusPaper:
title: str
abstract: str
added_date: datetime
paths: list[str]
paths: list[str]
2 changes: 2 additions & 0 deletions src/zotero_arxiv_daily/retriever/arxiv_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ def convert_to_paper(self, raw_paper: ArxivResult) -> Paper:
authors = [a.name for a in raw_paper.authors]
abstract = raw_paper.summary
pdf_url = raw_paper.pdf_url
published_date = raw_paper.published.strftime("%Y-%m-%d") if raw_paper.published else None
full_text = extract_text_from_tar(raw_paper)
if full_text is None:
full_text = extract_text_from_html(raw_paper)
Expand All @@ -159,6 +160,7 @@ def convert_to_paper(self, raw_paper: ArxivResult) -> Paper:
url=raw_paper.entry_id,
pdf_url=pdf_url,
full_text=full_text,
published_date=published_date,
)


Expand Down
6 changes: 4 additions & 2 deletions src/zotero_arxiv_daily/retriever/biorxiv_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,14 @@ def convert_to_paper(self, raw_paper:dict[str, Any]) -> Paper | None:
abstract = raw_paper['abstract']
pdf_url = f"https://www.{self.server}.org/content/{raw_paper['doi']}v{raw_paper['version']}.full.pdf"
full_text = None # biorxiv forbids scraping its pdf
published_date = raw_paper.get("date")
return Paper(
source=self.name,
title=title,
authors=authors,
abstract=abstract,
url=pdf_url,
pdf_url=pdf_url,
full_text=full_text
)
full_text=full_text,
published_date=published_date,
)