File tree Expand file tree Collapse file tree
src/zotero_arxiv_daily/retriever Expand file tree Collapse file tree Original file line number Diff line number Diff line change 99import multiprocessing
1010import os
1111from queue import Empty
12+ from time import sleep
1213from typing import Any , Callable , TypeVar
1314from loguru import logger
1415import requests
@@ -132,11 +133,25 @@ def _retrieve_raw_papers(self) -> list[ArxivResult]:
132133
133134 # Get full information of each paper from arxiv api
134135 bar = tqdm (total = len (all_paper_ids ))
136+ max_batch_retries = 5
137+ batch_retry_delay = 30
135138 for i in range (0 , len (all_paper_ids ), 20 ):
136139 search = arxiv .Search (id_list = all_paper_ids [i :i + 20 ])
137- batch = list (client .results (search ))
138- bar .update (len (batch ))
139- raw_papers .extend (batch )
140+ for attempt in range (max_batch_retries ):
141+ try :
142+ batch = list (client .results (search ))
143+ bar .update (len (batch ))
144+ raw_papers .extend (batch )
145+ break
146+ except arxiv .HTTPError as exc :
147+ if exc .status == 429 and attempt < max_batch_retries - 1 :
148+ wait = batch_retry_delay * (attempt + 1 )
149+ logger .warning (f"arXiv API 429 on batch { i // 20 } , retry { attempt + 1 } /{ max_batch_retries } in { wait } s" )
150+ sleep (wait )
151+ else :
152+ raise
153+ if i + 20 < len (all_paper_ids ):
154+ sleep (3 )
140155 bar .close ()
141156
142157 return raw_papers
You can’t perform that action at this time.
0 commit comments