Skip to content

Commit 2e37aad

Browse files
authored
Merge pull request #250 from Maxine-1520/fix/arxiv-429-retry
fix: add retry and rate limiting for arXiv API 429 errors
2 parents 54d71b9 + eef990c commit 2e37aad

1 file changed

Lines changed: 18 additions & 3 deletions

File tree

src/zotero_arxiv_daily/retriever/arxiv_retriever.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import multiprocessing
1010
import os
1111
from queue import Empty
12+
from time import sleep
1213
from typing import Any, Callable, TypeVar
1314
from loguru import logger
1415
import requests
@@ -132,11 +133,25 @@ def _retrieve_raw_papers(self) -> list[ArxivResult]:
132133

133134
# Get full information of each paper from arxiv api
134135
bar = tqdm(total=len(all_paper_ids))
136+
max_batch_retries = 5
137+
batch_retry_delay = 30
135138
for i in range(0, len(all_paper_ids), 20):
136139
search = arxiv.Search(id_list=all_paper_ids[i:i + 20])
137-
batch = list(client.results(search))
138-
bar.update(len(batch))
139-
raw_papers.extend(batch)
140+
for attempt in range(max_batch_retries):
141+
try:
142+
batch = list(client.results(search))
143+
bar.update(len(batch))
144+
raw_papers.extend(batch)
145+
break
146+
except arxiv.HTTPError as exc:
147+
if exc.status == 429 and attempt < max_batch_retries - 1:
148+
wait = batch_retry_delay * (attempt + 1)
149+
logger.warning(f"arXiv API 429 on batch {i // 20}, retry {attempt + 1}/{max_batch_retries} in {wait}s")
150+
sleep(wait)
151+
else:
152+
raise
153+
if i + 20 < len(all_paper_ids):
154+
sleep(3)
140155
bar.close()
141156

142157
return raw_papers

0 commit comments

Comments
 (0)