44import pytest
55from omegaconf import OmegaConf
66
7- from zotero_arxiv_daily .executor import Executor , normalize_include_path_patterns
7+ from zotero_arxiv_daily .executor import Executor , normalize_path_patterns
88from zotero_arxiv_daily .protocol import CorpusPaper
99
1010
11- def test_normalize_include_path_patterns_rejects_single_string ():
11+ def test_normalize_path_patterns_rejects_single_string_for_include_path ():
1212 with pytest .raises (TypeError , match = "config.zotero.include_path must be a list of glob patterns or null" ):
13- normalize_include_path_patterns ("2026/survey/**" )
13+ normalize_path_patterns ("2026/survey/**" , "include_path " )
1414
1515
16- def test_normalize_include_path_patterns_accepts_list_config ():
16+ def test_normalize_path_patterns_accepts_list_config_for_include_path ():
1717 include_path = OmegaConf .create (["2026/survey/**" , "2026/reading-group/**" ])
1818
19- assert normalize_include_path_patterns (include_path ) == [
19+ assert normalize_path_patterns (include_path , "include_path" ) == [
2020 "2026/survey/**" ,
2121 "2026/reading-group/**" ,
2222 ]
@@ -27,7 +27,8 @@ def test_filter_corpus_matches_any_path_against_any_pattern():
2727 executor .config = SimpleNamespace (
2828 zotero = SimpleNamespace (include_path = ["2026/survey/**" , "2026/reading-group/**" ])
2929 )
30- executor .include_path_patterns = normalize_include_path_patterns (executor .config .zotero .include_path )
30+ executor .include_path_patterns = normalize_path_patterns (executor .config .zotero .include_path , "include_path" )
31+ executor .ignore_path_patterns = None
3132
3233 corpus = [
3334 CorpusPaper (
@@ -53,3 +54,90 @@ def test_filter_corpus_matches_any_path_against_any_pattern():
5354 filtered = executor .filter_corpus (corpus )
5455
5556 assert [paper .title for paper in filtered ] == ["Survey Paper" , "Reading Group Paper" ]
57+
58+
59+ def test_normalize_path_patterns_rejects_single_string_for_ignore_path ():
60+ with pytest .raises (TypeError , match = "config.zotero.ignore_path must be a list of glob patterns or null" ):
61+ normalize_path_patterns ("archive/**" , "ignore_path" )
62+
63+
64+ def test_normalize_path_patterns_accepts_list_config_for_ignore_path ():
65+ ignore_path = OmegaConf .create (["archive/**" , "2025/**" ])
66+
67+ assert normalize_path_patterns (ignore_path , "ignore_path" ) == ["archive/**" , "2025/**" ]
68+
69+
70+ def test_normalize_path_patterns_accepts_empty_list ():
71+ assert normalize_path_patterns ([], "ignore_path" ) == []
72+
73+
74+ def test_filter_corpus_excludes_papers_matching_ignore_path ():
75+ executor = Executor .__new__ (Executor )
76+ executor .include_path_patterns = None
77+ executor .ignore_path_patterns = normalize_path_patterns (["archive/**" , "2025/**" ], "ignore_path" )
78+
79+ corpus = [
80+ CorpusPaper (
81+ title = "Active Paper" ,
82+ abstract = "" ,
83+ added_date = datetime (2026 , 1 , 1 ),
84+ paths = ["2026/survey/topic-a" ],
85+ ),
86+ CorpusPaper (
87+ title = "Archived Paper" ,
88+ abstract = "" ,
89+ added_date = datetime (2026 , 1 , 2 ),
90+ paths = ["archive/misc" ],
91+ ),
92+ CorpusPaper (
93+ title = "Old Paper" ,
94+ abstract = "" ,
95+ added_date = datetime (2026 , 1 , 3 ),
96+ paths = ["2025/other/topic" ],
97+ ),
98+ ]
99+
100+ filtered = executor .filter_corpus (corpus )
101+
102+ assert [paper .title for paper in filtered ] == ["Active Paper" ]
103+
104+
105+ def test_filter_corpus_ignore_path_takes_precedence_over_include_path ():
106+ """Papers matching both include_path and ignore_path should be excluded."""
107+ executor = Executor .__new__ (Executor )
108+ executor .include_path_patterns = normalize_path_patterns (["2026/**" ], "include_path" )
109+ executor .ignore_path_patterns = normalize_path_patterns (["2026/ignore/**" ], "ignore_path" )
110+
111+ corpus = [
112+ CorpusPaper (
113+ title = "Included Paper" ,
114+ abstract = "" ,
115+ added_date = datetime (2026 , 1 , 1 ),
116+ paths = ["2026/survey/topic-a" ],
117+ ),
118+ CorpusPaper (
119+ title = "Ignored Paper" ,
120+ abstract = "" ,
121+ added_date = datetime (2026 , 1 , 2 ),
122+ paths = ["2026/ignore/topic-b" ],
123+ ),
124+ ]
125+
126+ filtered = executor .filter_corpus (corpus )
127+
128+ assert [paper .title for paper in filtered ] == ["Included Paper" ]
129+
130+
131+ def test_filter_corpus_no_filters_returns_all ():
132+ executor = Executor .__new__ (Executor )
133+ executor .include_path_patterns = None
134+ executor .ignore_path_patterns = None
135+
136+ corpus = [
137+ CorpusPaper (title = "Paper A" , abstract = "" , added_date = datetime (2026 , 1 , 1 ), paths = ["foo" ]),
138+ CorpusPaper (title = "Paper B" , abstract = "" , added_date = datetime (2026 , 1 , 2 ), paths = ["bar" ]),
139+ ]
140+
141+ filtered = executor .filter_corpus (corpus )
142+
143+ assert filtered == corpus
0 commit comments