refactor: replace multiprocessing.Pool with ThreadPoolExecutor in process_single_position (#396)

aofei-liu · claude · web-flow · commit 9a3158f398ad · 2026-04-07T10:53:53.000-07:00
* refactor: replace multiprocessing.Pool with ThreadPoolExecutor in process_single_position

The transform functions passed to process_single_position (numpy, scipy,
PyTorch, ANTsPy) all release the GIL, making threads sufficient for
parallelism. ThreadPoolExecutor avoids the overhead of spawn-based
multiprocessing (re-importing libraries, serializing arguments) and
plays better with Nextflow's per-task resource accounting.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;

* fix: handle os.cpu_count() returning None and add threaded path test

Guard against os.cpu_count() returning None in containerized environments
by falling back to 1. Add test_process_single_position_threaded to
exercise the ThreadPoolExecutor code path with num_processes=2.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;

* refactor: deduplicate test_process_single_position and threaded variant

Extract shared logic into _run_process_single_position helper so the
threaded test delegates instead of duplicating the entire test body.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;

* refactor: inline helper and parameterize num_processes via hypothesis

Remove _run_process_single_position helper and merge the two test
functions into a single test_process_single_position that samples
num_processes from [1, 2].

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;

* refactor: rename num_processes to num_threads with deprecation shim

Add num_threads parameter to process_single_position and deprecate
num_processes. When num_processes is passed, a DeprecationWarning is
emitted and the value is forwarded to num_threads if num_threads is
smaller. Update tests to use num_threads.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;

---------

Co-authored-by: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/src/iohub/ngff/utils.py b/src/iohub/ngff/utils.py
@@ -2,9 +2,11 @@
 
 import inspect
 import itertools
-import multiprocessing as mp
+import os
+import warnings
 from collections import defaultdict
 from collections.abc import Callable, Sequence
+from concurrent.futures import ThreadPoolExecutor
 from functools import partial
 from pathlib import Path
 from typing import Any, Literal
@@ -283,7 +285,8 @@ def process_single_position(
     output_channel_indices: list[slice] | list[list[int]] | None = None,
     input_time_indices: list[int] | None = None,
     output_time_indices: list[int] | None = None,
-    num_processes: int = 1,
+    num_processes: int | None = None,
+    num_threads: int = 1,
     **kwargs,
 ) -> None:
     """
@@ -324,14 +327,26 @@ def process_single_position(
         Must match input_channel_indices if not empty.
         Defaults to None.
     num_processes : int, optional
-        Number of simultaneous processes per position. Defaults to 1.
+        Deprecated. Use ``num_threads`` instead. When set, its value is
+        forwarded to ``num_threads``. If both are set to non-default values
+        and differ, ``num_threads`` takes precedence. Defaults to None.
+    num_threads : int, optional
+        Number of simultaneous threads per position. Defaults to 1.
     kwargs : dict, optional
         Additional arguments to pass to the function.
         A dictionary with key "extra_metadata"
         can be passed to be stored at a FOV level,
         e.g.,
         kwargs={"extra_metadata": {"Temperature": 37.5, "CO2_level": 0.5}}.
     """
+    if num_processes is not None:
+        warnings.warn(
+            "num_processes is deprecated. Use num_threads instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        if num_threads < num_processes:
+            num_threads = num_processes
     click.echo(f"Function to be applied: \t{func}")
     click.echo(f"Input data path:\t{input_position_path}")
     click.echo(f"Output data path:\t{output_position_path}")
@@ -395,21 +410,19 @@ def process_single_position(
         output_position_path,
         **kwargs,
     )
-    num_processes = min(num_processes, len(flat_iterable), mp.cpu_count())
-    click.echo(f"\nStarting multiprocess pool with {num_processes} processes")
-    if num_processes <= 1:
-        # Run serially — Pool(1) with spawn unnecessarily forks a subprocess
+    cpu_count = os.cpu_count() or 1
+    num_workers = min(num_threads, len(flat_iterable), cpu_count)
+    click.echo(f"\nStarting thread pool with {num_workers} workers")
+    if num_workers <= 1:
         for args in flat_iterable:
             partial_apply_transform_to_czyx_and_save(*args)
     else:
-        # NOTE: use spawn to work around tensorstore#61
-        context = mp.get_context("spawn")
-        with context.Pool(num_processes) as p:
-            p.starmap(
-                partial_apply_transform_to_czyx_and_save,
+        with ThreadPoolExecutor(max_workers=num_workers) as executor:
+            list(executor.map(
+                lambda args: partial_apply_transform_to_czyx_and_save(*args),
                 flat_iterable,
-            )
-    click.echo("Shut down multiprocess pool")
+            ))
+    click.echo("Shut down thread pool")
 
 
 # -- Pure utility functions ------------------------------------------------
diff --git a/tests/ngff/test_ngff_utils.py b/tests/ngff/test_ngff_utils.py
@@ -723,9 +723,10 @@ def test_match_indices_to_batches(indices, shard_size):
 @given(
     setup=process_single_position_setup(),
     constant=st.integers(min_value=1, max_value=3),
+    num_threads=st.sampled_from([1, 2]),
 )
 @settings(max_examples=3, deadline=None)
-def test_process_single_position(setup, constant):
+def test_process_single_position(setup, constant, num_threads):
     (
         position_keys,
         channel_names,
@@ -739,7 +740,6 @@ def test_process_single_position(setup, constant):
         version,
     ) = setup
 
-    # Use the enhanced context manager to get both input and output store paths
     with _temp_ome_zarr_stores(
         position_keys=position_keys,
         channel_names=channel_names,
@@ -750,16 +750,13 @@ def test_process_single_position(setup, constant):
         dtype=dtype,
         version=version,
     ) as (input_store_path, output_store_path):
-        # Populate Store with random data
         populate_store(input_store_path, position_keys, shape, dtype)
 
-        # Choose a single position to process (e.g., the first one)
         for position_key_tuple in position_keys:
             input_position_path = input_store_path / Path(*position_key_tuple)
             output_position_path = output_store_path / Path(*position_key_tuple)
             kwargs = {"constant": constant, "extra_metadata": {"temp": 10}}
 
-            # Apply the transformation using process_single_position
             process_single_position(
                 func=dummy_transform,
                 input_position_path=input_position_path,
@@ -768,18 +765,15 @@ def test_process_single_position(setup, constant):
                 output_channel_indices=channel_indices,
                 input_time_indices=time_indices,
                 output_time_indices=time_indices,
+                num_threads=num_threads,
                 **kwargs,
             )
 
-            # Handle None for process_single_position_setup
             if time_indices is None:
                 time_indices = list(range(shape[0]))
             if channel_indices is None:
                 channel_indices = [[c] for c in range(shape[1])]
 
-            print("time_indices", time_indices)
-            print("channel_indices", channel_indices)
-            # Verify the transformation
             iterable = itertools.product(time_indices, channel_indices)
             for t_idx, chan_idx in iterable:
                 verify_transformation(