Skip to content

Commit 35c08df

Browse files
authored
Add option to skip dictionary compile step (#201)
* Add option to skip dictionary compile step Resolves #196 * Fix coverage * Ensure we skip only if the dict doesn't exist
1 parent f6ae60b commit 35c08df

File tree

7 files changed

+137
-25
lines changed

7 files changed

+137
-25
lines changed

.github/workflows/build.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,12 @@ jobs:
9090
# Results
9191
- name: Upload Results
9292
if: success()
93-
uses: codecov/codecov-action@v3
93+
uses: codecov/codecov-action@v4
9494
with:
9595
file: ./coverage.xml
9696
flags: unittests
9797
name: ${{ matrix.platform }}-${{ matrix.tox-env }}
98+
token: ${{ secrets.CODECOV_TOKEN }} # required
9899
fail_ci_if_error: false
99100

100101
docs_and_lint:
@@ -110,15 +111,15 @@ jobs:
110111
- name: Set up Python
111112
uses: actions/setup-python@v5
112113
with:
113-
python-version: 3.x
114+
python-version: '3.13'
114115
- name: Install Aspell
115116
if: matrix.tox-env == 'documents'
116117
run: |
117118
sudo apt-get install aspell aspell-en
118119
- name: Install dependencies
119120
run: |
120121
python -m pip install --upgrade pip
121-
python -m pip install --upgrade build setuptools tox
122+
python -m pip install --upgrade build setuptools tox
122123
- name: ${{ matrix.tox-env }}
123124
run: |
124125
python -m tox -e ${{ matrix.tox-env }}

docs/src/markdown/about/changelog.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Changelog
22

3+
## 2.11
4+
5+
- **NEW**: Add new command line option `--skip-dict-compile` which will skip the dictionary compiling step if the
6+
dictionary already exists. Changes to a custom dictionary will be ignored.
7+
38
## 2.10
49

510
- **NEW**: Allow specifying parallel processes to speed up spell checking. Number of jobs can be specified either

docs/src/markdown/index.md

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -53,26 +53,24 @@ If you want to manually install it, run `#!bash python setup.py build` and `#!ba
5353
## Command Line Usage
5454

5555
```
56-
usage: pyspelling [-h] [--version] [--verbose] [--name NAME | --group GROUP] [--binary BINARY] [--jobs JOBS] [--config CONFIG] [--source SOURCE] [--spellchecker SPELLCHECKER]
56+
usage: pyspelling [-h] [--version] [--verbose] [--name NAME | --group GROUP] [--binary BINARY] [--jobs JOBS] [--config CONFIG] [--source SOURCE] [--spellchecker SPELLCHECKER] [--skip-dict-compile]
5757
5858
Spell checking tool.
5959
6060
options:
6161
-h, --help show this help message and exit
6262
--version show program's version number and exit
6363
--verbose, -v Verbosity level.
64-
--name NAME, -n NAME Specific spelling task by name to run.
65-
--group GROUP, -g GROUP
66-
Specific spelling task group to run.
67-
--binary BINARY, -b BINARY
68-
Provide path to spell checker's binary.
69-
--jobs JOBS, -j JOBS Specify the number of spell checker processes to run in parallel.
70-
--config CONFIG, -c CONFIG
71-
Spelling config.
72-
--source SOURCE, -S SOURCE
73-
Specify override file pattern. Only applicable when specifying exactly one --name.
74-
--spellchecker SPELLCHECKER, -s SPELLCHECKER
75-
Choose between aspell and hunspell
64+
--name, -n NAME Specific spelling task by name to run.
65+
--group, -g GROUP Specific spelling task group to run.
66+
--binary, -b BINARY Provide path to spell checker's binary.
67+
--jobs, -j JOBS Specify the number of spell checker processes to run in parallel.
68+
--config, -c CONFIG Spelling config.
69+
--source, -S SOURCE Specify override file pattern. Only applicable when specifying exactly one --name.
70+
--spellchecker, -s SPELLCHECKER
71+
Choose between aspell and hunspell.
72+
--skip-dict-compile, -x
73+
Skip dictionary compilation if the compiled file already exists.
7674
```
7775

7876
PySpelling can be run with the command below (assuming your Python bin/script folder is in your path). By default it

pyspelling/__init__.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,7 @@ class SpellingTask:
578578
"O": glob.O
579579
}
580580

581-
def __init__(self, checker, config, binary='', verbose=0, jobs=0, debug=False):
581+
def __init__(self, checker, config, binary='', verbose=0, jobs=0, debug=False, skip_dict_compile=False):
582582
"""Initialize."""
583583

584584
if checker == "hunspell": # pragma: no cover
@@ -594,6 +594,7 @@ def __init__(self, checker, config, binary='', verbose=0, jobs=0, debug=False):
594594
self.binary = checker if not binary else binary
595595
self.debug = debug
596596
self.jobs = jobs
597+
self.skip_dict_compile = skip_dict_compile
597598

598599
def log(self, text, level):
599600
"""Log level."""
@@ -622,7 +623,13 @@ def walk_src(self, targets, flags, limit):
622623
def get_checker(self):
623624
"""Get a spell checker object."""
624625

625-
checker = self.spellchecker(self.config, self.binary, self.verbose, self.default_encoding, self.debug)
626+
checker = self.spellchecker(
627+
self.config,
628+
self.binary,
629+
self.verbose,
630+
self.default_encoding,
631+
self.debug
632+
)
626633
checker._build_pipeline(self.task)
627634
return checker
628635

@@ -654,7 +661,15 @@ def run_task(self, task, source_patterns=None):
654661
self.task = task
655662
self.default_encoding = self.task.get('default_encoding', '')
656663
self.options = self.spellchecker.get_options(self.task)
657-
self.personal_dict = self.spellchecker.setup_dictionary(self.task, self.binary, self.verbose)
664+
if not self.skip_dict_compile:
665+
self.personal_dict = self.spellchecker.setup_dictionary(self.task, self.binary, self.verbose)
666+
else:
667+
dictionary_options = self.task.get('dictionary', {})
668+
output = os.path.abspath(dictionary_options.get('output', os.path.abspath(self.spellchecker.DICTIONARY)))
669+
if os.path.exists(output):
670+
self.personal_dict = output
671+
else:
672+
self.personal_dict = self.spellchecker.setup_dictionary(self.task, self.binary, self.verbose)
658673
self.found_match = False
659674
glob_flags = self._to_flags(self.task.get('glob_flags', "N|B|G"))
660675
glob_limit = self.task.get('glob_pattern_limit', 1000)
@@ -696,7 +711,8 @@ def spellcheck(
696711
sources=None,
697712
verbose=0,
698713
debug=False,
699-
jobs=0
714+
jobs=0,
715+
skip_dict_compile=False
700716
):
701717
"""Spell check."""
702718

@@ -737,7 +753,7 @@ def spellcheck(
737753

738754
log('Using {} to spellcheck {}'.format(checker, task.get('name', '')), 1, verbose)
739755

740-
spelltask = SpellingTask(checker, config, binary, verbose, jobs, debug)
756+
spelltask = SpellingTask(checker, config, binary, verbose, jobs, debug, skip_dict_compile)
741757

742758
for result in spelltask.run_task(task, source_patterns=sources):
743759
log('Context: %s' % result.context, 2, verbose)

pyspelling/__main__.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,13 @@ def main():
3030
help="Specify override file pattern. Only applicable when specifying exactly one --name."
3131
)
3232
parser.add_argument(
33-
'--spellchecker', '-s', action='store', default='', help="Choose between aspell and hunspell"
33+
'--spellchecker', '-s', action='store', default='', help="Choose between aspell and hunspell."
34+
)
35+
parser.add_argument(
36+
'--skip-dict-compile',
37+
'-x',
38+
action='store_true',
39+
help="Skip dictionary compilation if the compiled file already exists."
3440
)
3541
args = parser.parse_args()
3642

@@ -44,6 +50,7 @@ def main():
4450
verbose=args.verbose,
4551
debug=args.debug,
4652
jobs=args.jobs,
53+
skip_dict_compile=args.skip_dict_compile
4754
)
4855

4956

@@ -58,6 +65,7 @@ def run(config, **kwargs):
5865
sources = kwargs.get('sources', [])
5966
debug = kwargs.get('debug', False)
6067
jobs = kwargs.get('jobs', 0)
68+
skip_dict_compile = kwargs.get('skip_dict_compile', False)
6169

6270
fail = False
6371
count = 0
@@ -71,6 +79,7 @@ def run(config, **kwargs):
7179
verbose=verbose,
7280
debug=debug,
7381
jobs=jobs,
82+
skip_dict_compile=skip_dict_compile
7483
):
7584
count += 1
7685
if results.error:

tests/test_config.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,3 +315,70 @@ def test_too_many_filter_names(self):
315315
self.mktemp('test.txt', '', 'utf-8')
316316
with self.assertRaises(ValueError):
317317
self.assert_spellcheck('.source.yml', [])
318+
319+
320+
class TestSkipDictionary(util.PluginTestCase):
321+
"""Test no pipeline."""
322+
323+
def test_skip(self):
324+
"""Test text."""
325+
326+
config = self.dedent(
327+
"""
328+
matrix:
329+
- name: no_pipeline
330+
default_encoding: utf-8
331+
sources:
332+
- '{temp}/**/*.txt'
333+
aspell:
334+
lang: en
335+
d: en_US
336+
hunspell:
337+
d: en_US
338+
pipeline: null
339+
dictionary:
340+
wordlists:
341+
- '{temp}/mydict.wl'
342+
output: '{temp}/mydict.dic'
343+
"""
344+
).format(temp=self.tempdir)
345+
self.mktemp('.skip_compile.yml', config, 'utf-8')
346+
347+
bad_words = ['helo', 'begn']
348+
good_words = ['yes', 'word']
349+
self.mktemp('mydict.wl', '\n'.join(bad_words), 'utf-8')
350+
self.mktemp('test.txt', '\n'.join(bad_words + good_words), 'utf-8')
351+
self.assert_spellcheck('.skip_compile.yml', [], skip_dict_compile=True)
352+
353+
def test_compile_once(self):
354+
"""Test text."""
355+
356+
config = self.dedent(
357+
"""
358+
matrix:
359+
- name: no_pipeline
360+
default_encoding: utf-8
361+
sources:
362+
- '{temp}/**/*.txt'
363+
aspell:
364+
lang: en
365+
d: en_US
366+
hunspell:
367+
d: en_US
368+
pipeline: null
369+
dictionary:
370+
wordlists:
371+
- '{temp}/mydict.wl'
372+
output: '{temp}/mydict.dic'
373+
"""
374+
).format(temp=self.tempdir)
375+
self.mktemp('.skip_compile.yml', config, 'utf-8')
376+
377+
bad_words = ['helo', 'begn']
378+
good_words = ['yes', 'word']
379+
self.mktemp('mydict.wl', '\n'.join(bad_words), 'utf-8')
380+
self.mktemp('test.txt', '\n'.join(bad_words + good_words), 'utf-8')
381+
# For this to work, we need to run on either Hunspell or Aspell, not both as the dictionary
382+
# will be overwritten with the format for the wrong spell checker.
383+
self.assert_spellcheck('.skip_compile.yml', [], skip_dict_compile=False, only_one=True)
384+
self.assert_spellcheck('.skip_compile.yml', [], skip_dict_compile=True, only_one=True)

tests/util.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,17 @@ def assert_context(self, config_file, expected, names=None, groups=None, sources
184184
context.append(results.context)
185185
self.assertEqual([(f'{self.tempdir}/{i}' if i else i) for i in expected[:]], context)
186186

187-
def assert_spellcheck(self, config_file, expected, names=None, groups=None, sources=None, verbose=4):
187+
def assert_spellcheck(
188+
self,
189+
config_file,
190+
expected,
191+
names=None,
192+
groups=None,
193+
sources=None,
194+
verbose=4,
195+
skip_dict_compile=False,
196+
only_one=False
197+
):
188198
"""Spell check."""
189199

190200
hunspell_location = which(HUNSPELL)
@@ -209,12 +219,15 @@ def assert_spellcheck(self, config_file, expected, names=None, groups=None, sour
209219
checker='hunspell',
210220
binary=hunspell_location,
211221
debug=True,
212-
verbose=verbose
222+
verbose=verbose,
223+
skip_dict_compile=skip_dict_compile
213224
):
214225
if results.error:
215226
print(results.error)
216227
words |= set(results.words)
217228
self.assertEqual(sorted(expected), sorted(words))
229+
if only_one:
230+
return
218231
if aspell_location:
219232
words = set()
220233
for results in spellcheck(
@@ -225,9 +238,12 @@ def assert_spellcheck(self, config_file, expected, names=None, groups=None, sour
225238
checker='aspell',
226239
binary=aspell_location,
227240
debug=True,
228-
verbose=verbose
241+
verbose=verbose,
242+
skip_dict_compile=skip_dict_compile
229243
):
230244
if results.error:
231245
print(results.error)
232246
words |= set(results.words)
233247
self.assertEqual(sorted(expected), sorted(words))
248+
if only_one:
249+
return

0 commit comments

Comments
 (0)