|
1 | 1 | from galaxy.util.search import ( |
| 2 | + filter_terms, |
| 3 | + FilteredTerm, |
2 | 4 | parse_filters, |
3 | 5 | parse_filters_structured, |
| 6 | + RawTextTerm, |
4 | 7 | ) |
5 | 8 |
|
6 | 9 |
|
@@ -94,3 +97,55 @@ def test_parse_filters_structured(): |
94 | 97 | assert text_terms[1].quoted is True |
95 | 98 | assert text_terms[2].text == "foo" |
96 | 99 | assert text_terms[2].quoted is False |
| 100 | + |
| 101 | + |
| 102 | +def test_filter_terms_drops_short_raw_terms(): |
| 103 | + parsed = parse_filters_structured("Copy of Genomic Assembly and analysis", {}) |
| 104 | + filtered = filter_terms(parsed, min_raw_term_length=4, max_raw_terms=None) |
| 105 | + kept = [t.text for t in filtered.terms] |
| 106 | + assert kept == ["Copy", "Genomic", "Assembly", "analysis"] |
| 107 | + |
| 108 | + |
| 109 | +def test_filter_terms_preserves_quoted_raw_terms(): |
| 110 | + parsed = parse_filters_structured("'ab' Copy 'de'", {}) |
| 111 | + filtered = filter_terms(parsed, min_raw_term_length=4, max_raw_terms=None) |
| 112 | + assert [(t.text, t.quoted) for t in filtered.terms] == [ |
| 113 | + ("ab", True), |
| 114 | + ("Copy", False), |
| 115 | + ("de", True), |
| 116 | + ] |
| 117 | + |
| 118 | + |
| 119 | +def test_filter_terms_preserves_filtered_terms_of_any_length(): |
| 120 | + parsed = parse_filters_structured("tag:ab user:cd Copy", {"tag": "tag", "user": "user"}) |
| 121 | + filtered = filter_terms(parsed, min_raw_term_length=4, max_raw_terms=None) |
| 122 | + kinds = [(t.__class__.__name__, t.text) for t in filtered.terms] |
| 123 | + # Both filtered terms are preserved even though their text is shorter than 4; |
| 124 | + # "Copy" (4) is preserved too. |
| 125 | + assert ("FilteredTerm", "ab") in kinds |
| 126 | + assert ("FilteredTerm", "cd") in kinds |
| 127 | + assert ("RawTextTerm", "Copy") in kinds |
| 128 | + |
| 129 | + |
| 130 | +def test_filter_terms_caps_raw_terms_only(): |
| 131 | + parsed = parse_filters_structured( |
| 132 | + "tag:foo Copy Genomic Assembly analysis shared user nedflanders extra1 extra2", |
| 133 | + {"tag": "tag"}, |
| 134 | + ) |
| 135 | + filtered = filter_terms(parsed, min_raw_term_length=4, max_raw_terms=3) |
| 136 | + raw = [t.text for t in filtered.terms if isinstance(t, RawTextTerm)] |
| 137 | + filt = [t.text for t in filtered.terms if isinstance(t, FilteredTerm)] |
| 138 | + assert raw == ["Copy", "Genomic", "Assembly"] |
| 139 | + assert filt == ["foo"] |
| 140 | + |
| 141 | + |
| 142 | +def test_filter_terms_defaults(): |
| 143 | + # Default behaviour: 4-char min length, 7-term cap on raw terms. |
| 144 | + parsed = parse_filters_structured( |
| 145 | + "Copy of Genomic Assembly and analysis - RDH shared by user nedflanders", |
| 146 | + {}, |
| 147 | + ) |
| 148 | + filtered = filter_terms(parsed) |
| 149 | + kept = [t.text for t in filtered.terms] |
| 150 | + # of, and, -, RDH, by dropped for length; everything else survives. |
| 151 | + assert kept == ["Copy", "Genomic", "Assembly", "analysis", "shared", "user", "nedflanders"] |
0 commit comments