Add mzn-bench plot-cactus to create plots to compare solved instances

Dekker1 · Dekker1 · commit 23ef6485880f · 2026-02-27T09:46:13.000+11:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,4 +19,4 @@ mzn-bench = 'mzn_bench.cli:main'
 
 [project.optional-dependencies]
 scripts = ["pandas>=2.2.3", "pytest>=8.3.4", "tabulate>=0.9.0"]
-plotting = ["bokeh>=3.6.2"]
+plotting = ["bokeh>=3.6.2", "matplotlib>=3.10.1", "seaborn>=0.13.2"]
diff --git a/src/mzn_bench/analysis/plot.py b/src/mzn_bench/analysis/plot.py
@@ -9,6 +9,57 @@
 from bokeh.palettes import Palette, Spectral5
 from bokeh.plotting import figure, gridplot
 from bokeh.transform import factor_cmap
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+
+def plot_cactus(stats: pd.DataFrame):
+    configurations = stats["configuration"].unique()
+
+    frames = []
+    for conf in configurations:
+        # Filter statistics to find completed instances
+        conf_stats = stats[
+            (stats["configuration"] == conf)
+            & (
+                (stats["status"] == "OPTIMAL_SOLUTION")
+                | ((stats["status"] == "SATISFIED") & (stats["method"] == "satisfy"))
+            )
+        ]
+
+        # Extract solving time and sort in ascending order
+        t = pd.DataFrame({"time": sorted(conf_stats["time"])})
+
+        # Add the position in the column (i.e. 1..n) as the number of instances
+        # solved in up to the time in that row
+        t["n_solved"] = list(range(1, 1 + len(t)))
+
+        # Label with the associated configuration
+        t["configuration"] = conf
+        frames.append(t)
+
+    data = pd.concat(frames, ignore_index=True)
+
+    fig, ax = plt.subplots(figsize=(12, 5))
+    sns.lineplot(
+        ax=ax,
+        data=data,
+        y="time",
+        x="n_solved",
+        hue="configuration",
+        style="configuration",
+        markers=True,
+        dashes=False,
+    )
+    ax.set(
+        title="Comparison of solved instances between different configurations",
+        ylabel="CPU time(seconds)",
+        xlabel="# of instances solved",
+    )
+    sns.move_legend(ax, "upper left", bbox_to_anchor=(1.01, 1), borderaxespad=0)
+    fig.tight_layout()
+
+    return fig
 
 
 def plot_all_instances(
diff --git a/src/mzn_bench/cli.py b/src/mzn_bench/cli.py
@@ -74,7 +74,7 @@ def collect_instances(shared_data: Optional[str], benchmarks_location: str):
     "--param",
     "-p",
     help="Additional solution parameters to add to each row of the CSV file",
-    default=["configuration"],
+    default=[],
     multiple=True,
 )
 @click.argument("dirs", nargs=-1, type=click.Path(exists=True, dir_okay=True))
@@ -102,9 +102,12 @@ def collect_objectives_(
     count = 0
     additional_params = list(additional_params)
     with Path(out_file).open(mode="w") as file:
+        labels = STANDARD_KEYS.copy()
+        labels.remove("status")  # No need to output SAT every time
+        labels = labels + ["run", "objective"] + additional_params
         writer = csv.DictWriter(
             file,
-            STANDARD_KEYS + ["run", "objective"] + additional_params,
+            labels,
             dialect="unix",
             extrasaction="ignore",
         )
@@ -419,5 +422,66 @@ def compare_configurations(
         exit(1)
 
 
+@main.command()
+@click.argument(
+    "objectives", metavar="objs_file", type=click.Path(exists=True, file_okay=True)
+)
+@click.argument(
+    "statistics", metavar="stats_file", type=click.Path(exists=True, file_okay=True)
+)
+@click.argument("out_file", type=click.Path(file_okay=True))
+def plot_all_instances(
+    objectives: str,
+    statistics: str,
+    out_file: str,
+):
+    """Plot all instances in a grid
+
+    STATS_FILE is the CSV file containing aggregated statistics data
+    OBJS_FILE is the CSV file containing aggregated solutions data
+    OUT_FILE is the file to write the plot to
+    """
+    try:
+        from .analysis.collect import read_csv
+        from .analysis.plot import plot_all_instances as fn
+        from bokeh.plotting import save
+
+        objs, stats = read_csv(objectives, statistics)
+        figure = fn(objs, stats)
+
+        save(figure, filename=out_file)
+    except ImportError:
+        click.echo(IMPORT_ERROR, err=True)
+        exit(1)
+
+
+@main.command()
+@click.argument(
+    "statistics", metavar="stats_file", type=click.Path(exists=True, file_okay=True)
+)
+@click.argument("out_file", type=click.Path(file_okay=True))
+def plot_cactus(
+    statistics: str,
+    out_file: str,
+):
+    """Plots all configurations in a cactus plot of solved instances
+
+    STATS_FILE is the CSV file containing aggregated statistics data
+    OUT_FILE is the file to write the plot to
+    """
+    try:
+        import pandas as pd
+        from .analysis.plot import plot_cactus as fn
+
+        stats = pd.read_csv(statistics)
+        stats.data_file = stats.data_file.fillna("")
+        fig = fn(stats)
+        fig.savefig(out_file)
+
+    except ImportError:
+        click.echo(IMPORT_ERROR, err=True)
+        exit(1)
+
+
 if __name__ == "__main__":
     main()
diff --git a/src/mzn_bench/mzn_slurm.py b/src/mzn_bench/mzn_slurm.py
@@ -6,7 +6,7 @@
 import sys
 import time
 import traceback
-from dataclasses import asdict, dataclass, field, fields
+from dataclasses import dataclass, field, fields
 from datetime import timedelta
 from pathlib import Path
 from typing import Any, Dict, Iterable, List, NoReturn, Optional
diff --git a/uv.lock b/uv.lock