|
4 | 4 | from types import SimpleNamespace |
5 | 5 |
|
6 | 6 | import feedparser |
| 7 | +import pytest |
7 | 8 |
|
8 | 9 | from zotero_arxiv_daily.retriever.arxiv_retriever import ArxivRetriever, _run_with_hard_timeout |
9 | 10 | import zotero_arxiv_daily.retriever.arxiv_retriever as arxiv_retriever |
@@ -88,3 +89,59 @@ def test_run_with_hard_timeout_returns_none_on_failure(monkeypatch): |
88 | 89 | ) |
89 | 90 | assert result is None |
90 | 91 | assert "boom" in warnings[0] |
| 92 | + |
| 93 | + |
| 94 | +def test_retrieve_raw_papers_skips_batch_after_retryable_http_error(config, mock_feedparser, monkeypatch): |
| 95 | + from omegaconf import open_dict |
| 96 | + |
| 97 | + with open_dict(config): |
| 98 | + config.executor.debug = True |
| 99 | + |
| 100 | + monkeypatch.setattr(arxiv_retriever, "sleep", lambda _: None) |
| 101 | + warnings: list[str] = [] |
| 102 | + monkeypatch.setattr(arxiv_retriever, "logger", SimpleNamespace(warning=warnings.append)) |
| 103 | + |
| 104 | + class FakeHTTPError(Exception): |
| 105 | + def __init__(self, status): |
| 106 | + self.status = status |
| 107 | + |
| 108 | + class FakeClient: |
| 109 | + def __init__(self, **kw): |
| 110 | + pass |
| 111 | + |
| 112 | + def results(self, search): |
| 113 | + raise FakeHTTPError(503) |
| 114 | + |
| 115 | + monkeypatch.setattr(arxiv_retriever.arxiv, "HTTPError", FakeHTTPError) |
| 116 | + monkeypatch.setattr(arxiv_retriever.arxiv, "Client", FakeClient) |
| 117 | + |
| 118 | + retriever = ArxivRetriever(config) |
| 119 | + assert retriever._retrieve_raw_papers() == [] |
| 120 | + assert any("Skipping batch 0 after 5 retries due to arXiv API 503" in warning for warning in warnings) |
| 121 | + |
| 122 | + |
| 123 | +def test_retrieve_raw_papers_raises_non_retryable_http_error(config, mock_feedparser, monkeypatch): |
| 124 | + from omegaconf import open_dict |
| 125 | + |
| 126 | + with open_dict(config): |
| 127 | + config.executor.debug = True |
| 128 | + |
| 129 | + monkeypatch.setattr(arxiv_retriever, "sleep", lambda _: None) |
| 130 | + |
| 131 | + class FakeHTTPError(Exception): |
| 132 | + def __init__(self, status): |
| 133 | + self.status = status |
| 134 | + |
| 135 | + class FakeClient: |
| 136 | + def __init__(self, **kw): |
| 137 | + pass |
| 138 | + |
| 139 | + def results(self, search): |
| 140 | + raise FakeHTTPError(400) |
| 141 | + |
| 142 | + monkeypatch.setattr(arxiv_retriever.arxiv, "HTTPError", FakeHTTPError) |
| 143 | + monkeypatch.setattr(arxiv_retriever.arxiv, "Client", FakeClient) |
| 144 | + |
| 145 | + retriever = ArxivRetriever(config) |
| 146 | + with pytest.raises(FakeHTTPError): |
| 147 | + retriever._retrieve_raw_papers() |
0 commit comments