mmschlk · mmschlk · May 14, 2025 · Mar 20, 2025 · Mar 20, 2025 · Mar 20, 2025
diff --git a/docs/source/notebooks/language_notebooks/language_model_game.ipynb b/docs/source/notebooks/language_notebooks/language_model_game.ipynb
diff --git a/docs/source/notebooks/vision_notebooks/vision_transformer.ipynb b/docs/source/notebooks/vision_notebooks/vision_transformer.ipynb
diff --git a/pyproject.toml b/pyproject.toml
@@ -15,10 +15,12 @@ dependencies = [
     "scikit-learn",
     "tqdm",
     "requests",
+    "sparse-transform",
+    "galois",
     # plotting
     "matplotlib",
     "networkx",
-    "colour"
+    "colour",
 ]
 authors = [
     {name = "Maximilian Muschalik", email = "Maximilian.Muschalik@lmu.de"},

diff --git a/shapiq/__init__.py b/shapiq/__init__.py
@@ -7,6 +7,7 @@
 # approximator classes
 from .approximator import (
     SHAPIQ,
+    SPEX,
     SVARM,
     SVARMIQ,
     InconsistentKernelSHAPIQ,
@@ -95,6 +96,7 @@
     "SVARMIQ",
     "kADDSHAP",
     "UnbiasedKernelSHAP",
+    "SPEX",
     # explainers
     "Explainer",
     "TabularExplainer",

diff --git a/shapiq/approximator/__init__.py b/shapiq/approximator/__init__.py
@@ -14,6 +14,7 @@
     RegressionFSII,
     kADDSHAP,
 )
+from .sparse import SPEX
 
 # contains all SV approximators
 SV_APPROXIMATORS: list[Approximator.__class__] = [
@@ -24,6 +25,7 @@
     PermutationSamplingSV,
     KernelSHAP,
     kADDSHAP,
+    SPEX,
 ]
 
 # contains all SI approximators
@@ -44,6 +46,7 @@
     InconsistentKernelSHAPIQ,
     SVARMIQ,
     SHAPIQ,
+    SPEX,
 ]
 
 # contains all approximators that can be used for STII
@@ -53,6 +56,7 @@
     InconsistentKernelSHAPIQ,
     SVARMIQ,
     SHAPIQ,
+    SPEX,
 ]
 
 # contains all approximators that can be used for FSII
@@ -62,11 +66,13 @@
     InconsistentKernelSHAPIQ,
     SVARMIQ,
     SHAPIQ,
+    SPEX,
 ]
 
 # contains all approximators that can be used for FBII
 FBII_APPROXIMATORS: list[Approximator.__class__] = [
     RegressionFBII,
+    SPEX,
 ]
 
 __all__ = [
@@ -84,6 +90,7 @@
     "SVARM",
     "SVARMIQ",
     "kADDSHAP",
+    "SPEX",
     "UnbiasedKernelSHAP",
     "SV_APPROXIMATORS",
     "SI_APPROXIMATORS",

diff --git a/shapiq/approximator/_base.py b/shapiq/approximator/_base.py
@@ -69,6 +69,7 @@ def __init__(
         pairing_trick: bool = False,
         sampling_weights: np.ndarray[float] | None = None,
         random_state: int | None = None,
+        initialize_dict: bool = True,
     ) -> None:
         # check if index can be approximated
         self.index: str = index
@@ -89,11 +90,16 @@ def __init__(
         self._grand_coalition_tuple = tuple(range(self.n))
         self._grand_coalition_array: np.ndarray = np.arange(self.n + 1, dtype=int)
         self.iteration_cost: int = 1  # default value, can be overwritten by subclasses
-        self._interaction_lookup = generate_interaction_lookup(
-            self.n,
-            self.min_order,
-            self.max_order,
-        )
+
+        # The interaction_lookup is not initialized is some cases due to performance reasons
+        if initialize_dict:
+            self._interaction_lookup = generate_interaction_lookup(
+                self.n,
+                self.min_order,
+                self.max_order,
+            )
+        else:
+            self._interaction_lookup = {}
 
         # set up random state and random number generators
         self._random_state: int | None = random_state

diff --git a/shapiq/approximator/sparse/__init__.py b/shapiq/approximator/sparse/__init__.py
@@ -0,0 +1,7 @@
+from ._base import Sparse
+from .spex import SPEX
+
+__all__ = [
+    "SPEX",
+    "Sparse",
+]
diff --git a/shapiq/approximator/sparse/_base.py b/shapiq/approximator/sparse/_base.py
@@ -0,0 +1,251 @@
+from __future__ import annotations
+
+import copy
+from collections.abc import Callable
+
+import numpy as np
+from sparse_transform.qsft.qsft import transform as sparse_fourier_transform
+from sparse_transform.qsft.signals.input_signal_subsampled import (
+    SubsampledSignal as SubsampledSignalFourier,
+)
+from sparse_transform.qsft.utils.general import fourier_to_mobius as fourier_to_moebius
+from sparse_transform.qsft.utils.query import get_bch_decoder
+
+from ...game_theory.indices import is_index_aggregated
+from ...game_theory.moebius_converter import MoebiusConverter
+from ...interaction_values import InteractionValues
+from .._base import Approximator
+
+
+class Sparse(Approximator):
+    """Approximator for interaction values using sparse transformation techniques.
+
+    This class implements a sparse approximation method for computing various interaction indices
+    using sparse Fourier transforms. It efficiently estimates interaction values with a limited
+    sample budget by leveraging sparsity in the Fourier domain.
+
+    Attributes:
+        transform_type (str): Type of transform used (currently only "fourier" is supported).
+        t (int): Error parameter for the sparse Fourier transform (currently fixed to 5).
+        query_args (dict): Parameters for querying the signal.
+        decoder_args (dict): Parameters for decoding the transform.
+
+    Args:
+        n (int): Number of players/features.
+        index (str): Type of interaction index to compute (e.g., "STII", "FBII", "FSII").
+        max_order (int, optional): Maximum interaction order to compute. It is not suggested to use this parameter
+            since sparse approximation dynamically and implicitly adjusts the order based on the budget and function.
+        top_order (bool, optional): If True, only compute interactions of exactly max_order.
+            If False, compute interactions up to max_order. Defaults to False.
+        random_state (int, optional): Random seed for reproducibility. Defaults to None.
+        transform_type (str, optional): Type of transform to use. Currently only "fourier"
+            is supported. Defaults to "fourier".
+        decoder_type (str, optional): Type of decoder to use, either "soft" or "hard".
+            Defaults to "soft"
+
+    Raises:
+        ValueError: If transform_type is not "fourier" or if decoder_type is not "soft" or "hard".
+    """
+
+    def __init__(
+        self,
+        n: int,
+        index: str,
+        max_order: int | None = None,
+        top_order: bool = False,
+        random_state: int | None = None,
+        transform_type: str = "fourier",
+        decoder_type: str = "soft",
+    ) -> None:
+        if transform_type.lower() not in ["fourier"]:
+            msg = "transform_type must be 'fourier'"
+            raise ValueError(msg)
+        self.transform_type = transform_type.lower()
+        self.t = 5  # 5 could be a parameter
+        self.decoder_type = "hard" if decoder_type is None else decoder_type.lower()
+        if self.decoder_type not in ["soft", "hard"]:
+            msg = "decoder_type must be 'soft' or 'hard'"
+            raise ValueError(msg)
+        # The sampling parameters for the Fourier transform
+        self.query_args = {
+            "query_method": "complex",
+            "num_subsample": 3,
+            "delays_method_source": "joint-coded",
+            "subsampling_method": "qsft",
+            "delays_method_channel": "identity-siso",
+            "num_repeat": 1,
+            "t": self.t,
+        }
+        self.decoder_args = {
+            "num_subsample": 3,
+            "num_repeat": 1,
+            "reconstruct_method_source": "coded",
+            "peeling_method": "multi-detect",
+            "reconstruct_method_channel": "identity-siso"
+            if self.decoder_type == "soft"
+            else "identity",
+            "regress": "lasso",
+            "res_energy_cutoff": 0.9,
+            "source_decoder": get_bch_decoder(n, self.t, self.decoder_type),
+        }
+        super().__init__(
+            n=n,
+            max_order=n if max_order is None else max_order,
+            index=index,
+            top_order=top_order,
+            random_state=random_state,
+            initialize_dict=False,  # Important for performance
+        )
+
+    def approximate(
+        self,
+        budget: int,
+        game: Callable[[np.ndarray], np.ndarray],
+    ) -> InteractionValues:
+        """Approximates the interaction values using a sparse transform approach.
+
+        Args:
+            budget: The budget for the approximation.
+            game: The game function that returns the values for the coalitions.
+
+        Returns:
+            The approximated Shapley interaction values.
+        """
+        # Find the maximum value of b that fits within the given sample budget and get the used budget
+        used_budget = self._set_transform_budget(budget)
+        signal = SubsampledSignalFourier(
+            func=lambda inputs: game(inputs.astype(bool)),
+            n=self.n,
+            q=2,
+            query_args=self.query_args,
+        )
+        # Extract the coefficients of the original transform
+        initial_transform = {
+            tuple(np.nonzero(key)[0]): np.real(value)
+            for key, value in sparse_fourier_transform(signal, **self.decoder_args).items()
+        }
+        # If we are using the fourier transform, we need to convert it to a Moebius transform
+        moebius_transform = fourier_to_moebius(initial_transform)
+        # Convert the Moebius transform to the desired index
+        result = self._process_moebius(moebius_transform=moebius_transform)
+        # Filter the output as needed
+        if self.top_order:
+            result = self._filter_order(result)
+        output = InteractionValues(
+            values=result,
+            index=self.approximation_index,
+            min_order=self.min_order,
+            max_order=self.max_order,
+            n_players=self.n,
+            interaction_lookup=copy.deepcopy(self.interaction_lookup),
+            estimated=True,
+            estimation_budget=used_budget,
+            baseline_value=self.interaction_lookup.get((), 0.0),
+        )
+        # Update the interaction lookup to reflect the filtered results
+        if is_index_aggregated(self.index):
+            output = self.aggregate_interaction_values(output)
+        return output
+
+    def _filter_order(self, result: np.ndarray) -> np.ndarray:
+        """Filters the interactions to keep only those of the maximum order.
+
+        This method is used when top_order=True to filter out all interactions that are not
+        of exactly the maximum order (self.max_order).
+
+        Args:
+            result: Array of interaction values.
+
+        Returns:
+            Filtered array containing only interaction values of the maximum order.
+            The method also updates the internal _interaction_lookup dictionary.
+        """
+        filtered_interactions = {}
+        filtered_results = []
+        i = 0
+        for j, key in enumerate(self.interaction_lookup):
+            if len(key) == self.max_order:
+                filtered_interactions[key] = i
+                filtered_results.append(result[j])
+                i += 1
+        self._interaction_lookup = filtered_interactions
+        return np.array(filtered_results)
+
+    def _process_moebius(self, moebius_transform: dict[tuple, float]) -> np.ndarray:
+        """Processes the Moebius transform to extract the desired index.
+
+        Args:
+            moebius_transform: The Moebius transform to process (dict mapping tuples to float values).
+
+        Returns:
+            np.ndarray: The converted interaction values based on the specified index.
+            The function also updates the internal _interaction_lookup dictionary.
+        """
+        moebius_interactions = InteractionValues(
+            values=np.array([moebius_transform[key] for key in moebius_transform.keys()]),
+            index="Moebius",
+            min_order=self.min_order,
+            max_order=self.max_order,
+            n_players=self.n,
+            interaction_lookup={key: i for i, key in enumerate(moebius_transform.keys())},
+            estimated=True,
+            baseline_value=moebius_transform.get((), 0.0),
+        )
+        autoconverter = MoebiusConverter(moebius_coefficients=moebius_interactions)
+        converted_interaction_values = autoconverter(index=self.index, order=self.max_order)
+        self._interaction_lookup = converted_interaction_values.interaction_lookup
+        return converted_interaction_values.values
+
+    def _set_transform_budget(self, budget: int) -> int:
+        """Sets the appropriate transform budget parameters based on the given sample budget.
+
+        This method calculates the maximum possible 'b' parameter (number of bits to subsample)
+        that fits within the provided budget, then configures the query and decoder arguments
+        accordingly. The actual number of samples that will be used is returned.
+
+        Args:
+            budget: The maximum number of samples allowed for the approximation.
+
+        Returns:
+            int: The actual number of samples that will be used, which is less than or equal to the budget.
+
+        Raises:
+            ValueError: If the budget is too low to compute the transform with acceptable parameters.
+        """
+        b = SubsampledSignalFourier.get_b_for_sample_budget(
+            budget, self.n, self.t, 2, self.query_args
+        )
+        used_budget = SubsampledSignalFourier.get_number_of_samples(
+            self.n, b, self.t, 2, self.query_args
+        )
+
+        if b <= 2:
+            while self.t > 2:
+                self.t -= 1
+                self.query_args["t"] = self.t
+
+                # Recalculate 'b' with the updated 't'
+                b = SubsampledSignalFourier.get_b_for_sample_budget(
+                    budget, self.n, self.t, 2, self.query_args
+                )
+
+                # Compute the used budget
+                used_budget = SubsampledSignalFourier.get_number_of_samples(
+                    self.n, b, self.t, 2, self.query_args
+                )
+
+                # Break if 'b' is now sufficient
+                if b > 2:
+                    self.decoder_args["source_decoder"] = get_bch_decoder(
+                        self.n, self.t, self.decoder_type
+                    )
+                    break
+
+            # If 'b' is still too low, raise an error
+            if b <= 2:
+                msg = "Insufficient budget to compute the transform. Increase the budget or use a different approximator."
+                raise ValueError(msg)
+        # Store the final 'b' value
+        self.query_args["b"] = b
+        self.decoder_args["b"] = b
+        return used_budget