TideDra
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 1 addition & 19 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 1 addition & 19 deletions
diff --git a/‎CLAUDE.md‎
Lines changed: 81 additions & 0 deletions b/‎CLAUDE.md‎
Lines changed: 81 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 3 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎tests/__init__.py‎ b/‎tests/__init__.py‎
diff --git a/‎tests/canned_responses.py‎
Lines changed: 231 additions & 0 deletions b/‎tests/canned_responses.py‎
Lines changed: 231 additions & 0 deletions
@@ -12,31 +12,13 @@ on:
 jobs:
   pytest:
     runs-on: ubuntu-latest
-    services:
-      mailhog:
-        image: mailhog/mailhog:latest
-        ports:
-          - 1025:1025  # SMTP
-      openai:
-        image: tidedra/mock_openai:latest
-        ports:
-          - 30000:30000
     steps:
       - name: Checkout
         uses: actions/checkout@v6
 
       - name: Setup uv
         uses: astral-sh/setup-uv@v7.1.4
 
-
       - name: Run Pytest
-        env:
-          ZOTERO_ID: "0"
-          ZOTERO_KEY: "AbCdEfGhIjKlMnOpQrStUvWx"
-          SENDER: "test@example.com"
-          RECEIVER: "test@example.com"
-          SENDER_PASSWORD: "test"
-          OPENAI_API_KEY: "sk-xxx"
-          OPENAI_API_BASE: "http://openai:30000/v1"
         run: |
-          uv run pytest -m ""
+          uv run pytest -m "" --cov=src/zotero_arxiv_daily --cov-report=term-missing
@@ -0,0 +1,81 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+Zotero-arXiv-Daily recommends new arXiv/bioRxiv/medRxiv papers based on a user's Zotero library. It computes embedding similarity between new papers and the user's existing library, generates TLDRs via LLM, and delivers results by email. Designed to run as a GitHub Actions workflow at zero cost.
+
+## Commands
+
+```bash
+# Run the application
+uv run src/zotero_arxiv_daily/main.py
+
+# Run tests (excludes slow tests by default)
+uv run pytest
+
+# Run all tests including slow ones
+uv run pytest -m ""
+
+# Run a single test
+uv run pytest tests/test_utils.py::TestGlobMatch -v
+
+# Install/sync dependencies
+uv sync
+```
+
+No linter or formatter is configured.
+
+## Architecture
+
+The app follows a linear pipeline orchestrated by `Executor` (`src/zotero_arxiv_daily/executor.py`):
+
+1. **Fetch Zotero corpus** — retrieves user's library papers via pyzotero API
+2. **Filter corpus** — applies `include_path` glob patterns to select relevant collections
+3. **Retrieve new papers** — fetches from configured sources (arXiv RSS, bioRxiv/medRxiv REST API)
+4. **Rerank** — scores candidates by weighted similarity to corpus (newer Zotero papers weighted higher)
+5. **Generate TLDRs + affiliations** — via OpenAI-compatible LLM API
+6. **Render + send email** — HTML email via SMTP
+
+### Plugin Systems
+
+**Retrievers** (`src/zotero_arxiv_daily/retriever/`): Register via `@register_retriever` decorator, discovered by `get_retriever_cls()`. Each retriever implements `_retrieve_raw_papers()` and `convert_to_paper()`.
+
+**Rerankers** (`src/zotero_arxiv_daily/reranker/`): Register via `@register_reranker` decorator, discovered by `get_reranker_cls()`. Two implementations: `local` (sentence-transformers) and `api` (OpenAI-compatible embeddings endpoint).
+
+### Configuration
+
+Uses Hydra + OmegaConf. Config is composed from `config/base.yaml` (defaults) + `config/custom.yaml` (user overrides). Environment variables are interpolated via `${oc.env:VAR_NAME,default}` syntax. Entry point uses `@hydra.main`.
+
+### Data Classes
+
+`Paper` and `CorpusPaper` in `src/zotero_arxiv_daily/protocol.py`. `Paper` has LLM-powered methods (`generate_tldr`, `generate_affiliations`) that call the OpenAI API directly.
+
+## Testing
+
+Tests marked `@pytest.mark.slow` require heavy dependencies (e.g., sentence-transformers model download) and are skipped locally by default (`addopts = "-m 'not slow'"` in pyproject.toml). All other tests run with pure Python stubs (no Docker containers needed).
+
+```bash
+# Run tests (excludes slow tests)
+uv run pytest
+
+# Run all tests including slow ones
+uv run pytest -m ""
+
+# Run with coverage
+uv run pytest --cov=src/zotero_arxiv_daily --cov-report=term-missing
+```
+
+## gstack
+
+Use the `/browse` skill from gstack for all web browsing. Never use `mcp__claude-in-chrome__*` tools.
+
+Available skills: `/office-hours`, `/plan-ceo-review`, `/plan-eng-review`, `/plan-design-review`, `/design-consultation`, `/design-shotgun`, `/design-html`, `/review`, `/ship`, `/land-and-deploy`, `/canary`, `/benchmark`, `/browse`, `/connect-chrome`, `/qa`, `/qa-only`, `/design-review`, `/setup-browser-cookies`, `/setup-deploy`, `/retro`, `/investigate`, `/document-release`, `/codex`, `/cso`, `/autoplan`, `/plan-devex-review`, `/devex-review`, `/careful`, `/freeze`, `/guard`, `/unfreeze`, `/gstack-upgrade`, `/learn`.
+
+If gstack skills aren't working, run `cd .claude/skills/gstack && ./setup` to build the binary and register skills.
+
+## Git Workflow
+
+- PRs should target the `dev` branch, not `main`
+- Current development branch: `dev`
@@ -37,9 +37,9 @@ url = "https://download.pytorch.org/whl/cpu"
 explicit = true
 
 [tool.pytest.ini_options]
-addopts = "-m 'not ci'"
+addopts = "-m 'not slow'"
 markers = [
-    "ci: tests that only run in CI (require external services)",
+    "slow: tests that are slow (e.g. download models)",
 ]
 filterwarnings = [
     "ignore::DeprecationWarning:multiprocessing",
@@ -49,4 +49,5 @@ filterwarnings = [
 dev = [
     "ipykernel>=7.1.0",
     "pytest>=8.4.1",
+    "pytest-cov>=6.0",
 ]
@@ -0,0 +1,231 @@
+"""Shared stub factories for tests. No unittest.mock anywhere."""
+
+from datetime import datetime
+from types import SimpleNamespace
+
+from zotero_arxiv_daily.protocol import CorpusPaper, Paper
+
+
+# ---------------------------------------------------------------------------
+# OpenAI client stub
+# ---------------------------------------------------------------------------
+
+_AFFILIATION_MARKER = "You are an assistant who perfectly extracts affiliations"
+_AFFILIATION_RESPONSE = '["TsingHua University","Peking University"]'
+_TLDR_RESPONSE = "Hello! How can I assist you today?"
+
+
+def _make_chat_response(content: str) -> SimpleNamespace:
+    return SimpleNamespace(
+        choices=[
+            SimpleNamespace(
+                message=SimpleNamespace(content=content),
+                finish_reason="stop",
+                index=0,
+            )
+        ],
+        id="chatcmpl-stub",
+        created=1765197615,
+        model="gpt-4o-mini-2024-07-18",
+        object="chat.completion",
+    )
+
+
+def _stub_chat_create(**kwargs):
+    messages = kwargs.get("messages", [])
+    request_str = str(messages)
+    if _AFFILIATION_MARKER in request_str:
+        return _make_chat_response(_AFFILIATION_RESPONSE)
+    return _make_chat_response(_TLDR_RESPONSE)
+
+
+def _stub_embeddings_create(**kwargs):
+    inputs = kwargs.get("input", [])
+    n = len(inputs) if isinstance(inputs, list) else 1
+    return SimpleNamespace(
+        data=[SimpleNamespace(embedding=[0.1, 0.2, 0.3], index=i, object="embedding") for i in range(n)],
+        model="text-embedding-3-large",
+        object="list",
+    )
+
+
+def make_stub_openai_client():
+    """Return a SimpleNamespace that quacks like openai.OpenAI().
+
+    chat.completions.create() and embeddings.create() behave identically
+    to the Docker mock_openai server that CI previously relied on.
+    """
+    return SimpleNamespace(
+        chat=SimpleNamespace(
+            completions=SimpleNamespace(create=_stub_chat_create),
+        ),
+        embeddings=SimpleNamespace(create=_stub_embeddings_create),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Zotero client stub
+# ---------------------------------------------------------------------------
+
+_DEFAULT_COLLECTIONS = [
+    {
+        "key": "COL1",
+        "data": {"name": "survey", "parentCollection": False},
+    },
+    {
+        "key": "COL2",
+        "data": {"name": "topic-a", "parentCollection": "COL1"},
+    },
+]
+
+_DEFAULT_ITEMS = [
+    {
+        "data": {
+            "title": "Stub Paper 1",
+            "abstractNote": "Abstract of stub paper 1.",
+            "dateAdded": "2026-01-15T10:00:00Z",
+            "collections": ["COL2"],
+        },
+    },
+    {
+        "data": {
+            "title": "Stub Paper 2",
+            "abstractNote": "Abstract of stub paper 2.",
+            "dateAdded": "2026-02-20T12:00:00Z",
+            "collections": ["COL1"],
+        },
+    },
+]
+
+
+def make_stub_zotero_client(collections=None, items=None):
+    """Return a SimpleNamespace that quacks like pyzotero.zotero.Zotero.
+
+    Supports the call patterns used by Executor.fetch_zotero_corpus():
+        zot.everything(zot.collections())
+        zot.everything(zot.items(itemType=...))
+    """
+    cols = collections if collections is not None else _DEFAULT_COLLECTIONS
+    itms = items if items is not None else _DEFAULT_ITEMS
+
+    def everything(generator):
+        return generator
+
+    def collections_fn():
+        return cols
+
+    def items_fn(**kwargs):
+        return itms
+
+    return SimpleNamespace(
+        everything=everything,
+        collections=collections_fn,
+        items=items_fn,
+    )
+
+
+# ---------------------------------------------------------------------------
+# SMTP stub
+# ---------------------------------------------------------------------------
+
+
+def make_stub_smtp(sent_emails: list):
+    """Return a class that records calls to sendmail().
+
+    Usage:
+        sent = []
+        monkeypatch.setattr(smtplib, "SMTP", make_stub_smtp(sent))
+        ...
+        assert len(sent) == 1
+        sender, recipients, body = sent[0]
+    """
+
+    class StubSMTP:
+        def __init__(self, *args, **kwargs):
+            pass
+
+        def starttls(self):
+            pass
+
+        def login(self, user, password):
+            pass
+
+        def sendmail(self, sender, recipients, msg):
+            sent_emails.append((sender, recipients, msg))
+
+        def quit(self):
+            pass
+
+    return StubSMTP
+
+
+# ---------------------------------------------------------------------------
+# Paper / CorpusPaper factories
+# ---------------------------------------------------------------------------
+
+
+def make_sample_paper(**overrides) -> Paper:
+    defaults = dict(
+        source="arxiv",
+        title="Sample Paper Title",
+        authors=["Author A", "Author B", "Author C"],
+        abstract="This paper explores a novel approach to widget engineering.",
+        url="https://arxiv.org/abs/2026.00001",
+        pdf_url="https://arxiv.org/pdf/2026.00001",
+        full_text="\\begin{document} Some text. \\end{document}",
+        tldr=None,
+        affiliations=None,
+        score=None,
+    )
+    defaults.update(overrides)
+    return Paper(**defaults)
+
+
+def make_sample_corpus(n: int = 3) -> list[CorpusPaper]:
+    return [
+        CorpusPaper(
+            title=f"Corpus Paper {i}",
+            abstract=f"Abstract for corpus paper {i}.",
+            added_date=datetime(2026, 1, 1 + i),
+            paths=[f"2026/survey/topic-{i}"],
+        )
+        for i in range(n)
+    ]
+
+
+# ---------------------------------------------------------------------------
+# bioRxiv canned API response
+# ---------------------------------------------------------------------------
+
+SAMPLE_BIORXIV_API_RESPONSE = {
+    "messages": [{"status": "ok"}],
+    "collection": [
+        {
+            "doi": "10.1101/2026.03.01.000001",
+            "title": "A biorxiv paper",
+            "authors": "Smith, J.; Doe, A.; Lee, K.",
+            "abstract": "We present a novel finding.",
+            "date": "2026-03-02",
+            "category": "bioinformatics",
+            "version": "1",
+        },
+        {
+            "doi": "10.1101/2026.03.01.000002",
+            "title": "Another biorxiv paper",
+            "authors": "Wang, L.; Chen, M.",
+            "abstract": "We replicate a key result.",
+            "date": "2026-03-02",
+            "category": "genomics",
+            "version": "1",
+        },
+        {
+            "doi": "10.1101/2026.03.01.000003",
+            "title": "Old biorxiv paper",
+            "authors": "Old, R.",
+            "abstract": "Yesterday's paper.",
+            "date": "2026-03-01",
+            "category": "bioinformatics",
+            "version": "1",
+        },
+    ],
+}