pytorch
diff --git a/‎.github/workflows/benchmark.yml‎
Lines changed: 91 additions & 23 deletions b/‎.github/workflows/benchmark.yml‎
Lines changed: 91 additions & 23 deletions
@@ -132,29 +132,97 @@ jobs:
           mkdir -p "$TEST_REPORTS_DIR"
           echo "$TEST_REPORTS_DIR"
 
-          # Do autotuning but do not record the results
-          python benchmarks/run.py \
-              --op $KERNEL_LIST \
-              --metrics speedup,accuracy \
-              --latency-measure-mode triton_do_bench \
-              --cudagraph \
-              --only triton,liger,torch_compile,helion \
-              --only-match-mode prefix-with-baseline \
-              --exit-on-exception
-
-          # Relax the GPU
-          sleep 5m
-
-          # Run again with cache and record results
-          python benchmarks/run.py \
-              --op $KERNEL_LIST \
-              --metrics speedup,accuracy \
-              --latency-measure-mode triton_do_bench \
-              --cudagraph \
-              --only triton,liger,torch_compile,helion \
-              --only-match-mode prefix-with-baseline \
-              --output "$TEST_REPORTS_DIR/helionbench.json" \
-              --exit-on-exception
+          # Run kernels one by one in a for loop
+          for kernel in "${SHARD_KERNELS[@]}"; do
+            echo "=========================================="
+            echo "Running benchmark for kernel: $kernel"
+            echo "=========================================="
+
+            # Get available implementations and baseline for this kernel
+            KERNEL_INFO=$(python benchmarks/run.py --list-impls-for-benchmark-ci --op $kernel | grep "^$kernel:")
+            IMPLS=$(echo "$KERNEL_INFO" | sed -n 's/.*impls=\([^ ]*\).*/\1/p')
+            BASELINE=$(echo "$KERNEL_INFO" | sed -n 's/.*baseline=\([^ ]*\).*/\1/p')
+
+            if [[ -z "$IMPLS" ]]; then
+              echo "Warning: No implementations found for kernel $kernel, skipping..."
+              continue
+            fi
+
+            if [[ -z "$BASELINE" ]]; then
+              echo "Warning: No baseline found for kernel $kernel"
+            else
+              echo "Using baseline: $BASELINE"
+            fi
+            echo "Available implementations for $kernel: $IMPLS"
+
+            # Do autotuning but do not record the results
+            RUN_CMD="python benchmarks/run.py \
+                --op $kernel \
+                --metrics speedup,accuracy \
+                --latency-measure-mode triton_do_bench \
+                --cudagraph \
+                --only $IMPLS \
+                --only-match-mode prefix-with-baseline"
+
+            # Add baseline if available
+            if [[ -n "$BASELINE" ]]; then
+              RUN_CMD="$RUN_CMD --baseline $BASELINE"
+            fi
+
+            eval "$RUN_CMD --exit-on-exception"
+
+            # Relax the GPU
+            sleep 2m
+
+            # Run again with cache and record results
+            RUN_CMD="python benchmarks/run.py \
+                --op $kernel \
+                --metrics speedup,accuracy \
+                --latency-measure-mode triton_do_bench \
+                --cudagraph \
+                --only $IMPLS \
+                --only-match-mode prefix-with-baseline \
+                --output \"$TEST_REPORTS_DIR/helionbench_${kernel}.json\""
+
+            # Add baseline if available
+            if [[ -n "$BASELINE" ]]; then
+              RUN_CMD="$RUN_CMD --baseline $BASELINE"
+            fi
+
+            eval "$RUN_CMD --exit-on-exception"
+
+            if [[ ! -s "$TEST_REPORTS_DIR/helionbench_${kernel}.json" ]]; then
+              echo "❌ helionbench_${kernel}.json is missing or empty"
+              exit 1
+            fi
+            echo "✅ Completed benchmark for kernel: $kernel"
+          done
+
+          # Merge all individual kernel results into a single file
+          echo "Merging benchmark results..."
+          python -c "
+import json
+import glob
+import os
+
+test_reports_dir = '$TEST_REPORTS_DIR'
+result_files = sorted(glob.glob(os.path.join(test_reports_dir, 'helionbench_*.json')))
+
+if not result_files:
+    print('❌ No result files found')
+    exit(1)
+
+merged_results = []
+for file in result_files:
+    with open(file, 'r') as f:
+        kernel_results = json.load(f)
+        merged_results.extend(kernel_results)
+
+with open(os.path.join(test_reports_dir, 'helionbench.json'), 'w') as f:
+    json.dump(merged_results, f, indent=2)
+
+print(f'✅ Merged {len(result_files)} kernel results into helionbench.json')
+"
 
           if [[ ! -s "$TEST_REPORTS_DIR/helionbench.json" ]]; then
             echo "❌ helionbench.json is missing or empty"