misc: Add benchmark filters to manual perf workflow (#787)

happy-lx · web-flow · commit a6f4a1821338 · 2026-03-13T10:18:07.000+08:00
Add a specific_benchmarks workflow input and pass it to parallel_sim.sh.
Filter workloads in-memory with case-insensitive substring matching.
Keep default behavior unchanged when filter is empty.

Change-Id: I7686d05c28aca7234aa9357be4d5bdf05d103236
diff --git a/.github/workflows/gem5-perf-template.yml b/.github/workflows/gem5-perf-template.yml
@@ -16,6 +16,11 @@ on:
         required: true
         type: string
         description: "Benchmark type: gcc12-spec06-0.3c, gcc12-spec06-0.8c, gcc12-spec06-1.0c, gcc15-spec06-0.3c, gcc15-spec06-0.8c, gcc15-spec06-1.0c, spec17-1.0c, spec06-rvv-1.0c or spec06int-rvv-0.8c"
+      specific_benchmarks:
+        required: false
+        type: string
+        default: ""
+        description: "Comma-separated benchmark filters. Empty means run all workloads in checkpoint list."
       vector_type:
         required: false
         type: string
@@ -171,6 +176,7 @@ jobs:
             ${{ steps.config.outputs.checkpoint_list }} \
             ${{ steps.config.outputs.checkpoint_root_node}} \
             spec_all \
+            "${{ inputs.specific_benchmarks }}" \
             "${{ inputs.extra_args }}"
       - name: Setup gem5_data_proc environment
         run: |
@@ -244,6 +250,7 @@ jobs:
           echo "branch: ${GITHUB_REF#refs/heads/}" >> "$TARGET_DIR/metadata.txt"
           echo "run_number: $RUN_NUMBER" >> "$TARGET_DIR/metadata.txt"
           echo "benchmark_type: ${{ inputs.benchmark_type }}" >> "$TARGET_DIR/metadata.txt"
+          echo "specific_benchmarks: ${{ inputs.specific_benchmarks }}" >> "$TARGET_DIR/metadata.txt"
           echo "workflow_run_id: ${{ github.run_id }}" >> "$TARGET_DIR/metadata.txt"
           
           # Auto cleanup: keep only last 200 runs for this benchmark type
diff --git a/.github/workflows/manual-perf.yml b/.github/workflows/manual-perf.yml
@@ -27,6 +27,11 @@ on:
           - spec17-1.0c
           - spec06-rvv-1.0c
           - spec06int-rvv-0.8c
+      specific_benchmarks:
+        description: 'Specific benchmarks to run (comma-separated, eg. "mcf,gcc"), leave empty to run all'
+        required: false
+        type: string
+        default: ''
       vector_type:
         description: 'Vector decode strategy (only for rvv benchmarks)'
         required: false
@@ -64,6 +69,7 @@ jobs:
     with:
       config_path: ${{ needs.setup.outputs.config_path }}
       benchmark_type: ${{ github.event.inputs.benchmark_type }}
+      specific_benchmarks: ${{ github.event.inputs.specific_benchmarks }}
       vector_type: ${{ github.event.inputs.vector_type }}
       pr_sha: ${{ needs.setup.outputs.pr_sha }}
       check_result: true
diff --git a/util/xs_scripts/parallel_sim.sh b/util/xs_scripts/parallel_sim.sh
@@ -3,14 +3,16 @@
 
 function print_help() {
     printf "Usage:
-    bash $0 <config_file_or_script> workload_list.lst checkpoint_top_dir task_tag [extra_gem5_args]
+    bash $0 <config_file_or_script> workload_list.lst checkpoint_top_dir task_tag [benchmark_filters] [extra_gem5_args]
 
 Arguments:
     config_file_or_script:  Config file (*.py) or wrapper script (*.sh).
                             If relative, it is resolved relative to the repo root (gem5_home).
     workload_list.lst:      List of workloads to run
     checkpoint_top_dir:     Root directory for checkpoints
     task_tag:               Tag for this experiment
+    benchmark_filters:      Optional comma-separated benchmark filters, case-insensitive.
+                            Empty means no filter (run all workloads in workload_list).
     extra_gem5_args:        Optional extra arguments for gem5 (only for .py mode)
 
 Examples:
@@ -20,8 +22,14 @@ Examples:
     # New mode (using .py config)
     bash $0 configs/example/idealkmhv3.py workload.lst /cpt/dir my_exp
 
+    # New mode with benchmark filters
+    bash $0 configs/example/idealkmhv3.py workload.lst /cpt/dir my_exp_subset \"mcf,gcc\"
+
     # New mode with extra args
-    bash $0 configs/example/idealkmhv3.py workload.lst /cpt/dir my_exp_nosc \"--disable-mgsc\"
+    bash $0 configs/example/idealkmhv3.py workload.lst /cpt/dir my_exp_nosc \"\" \"--disable-mgsc\"
+
+    # New mode with filters + extra args
+    bash $0 configs/example/idealkmhv3.py workload.lst /cpt/dir my_exp_nosc \"mcf,gcc\" \"--disable-mgsc\"
 \n"
     exit 1
 }
@@ -47,17 +55,28 @@ if [[ "$first_param" == *.sh ]]; then
     # Legacy mode: using wrapper script
     export use_legacy_mode=true
     export arch_script="$first_param"
+    export benchmark_filters="${5:-}"  # Optional 5th parameter in legacy mode
     echo "Legacy mode: using script $arch_script"
 else
     # New mode: using config file directly
     export use_legacy_mode=false
     export config_file="$first_param"
-    export extra_gem5_args="${5:-}"  # Optional 5th parameter
+    export benchmark_filters="${5:-}"  # Optional 5th parameter (new order)
+    export extra_gem5_args="${6:-}"    # Optional 6th parameter (new order)
+    # Backward compatibility: if only one optional arg and it looks like gem5 args,
+    # keep treating it as extra_gem5_args.
+    if [ "$#" -eq 5 ] && [[ "${5}" == -* ]]; then
+        export extra_gem5_args="${5}"
+        export benchmark_filters=""
+    fi
     echo "Config mode: using $config_file"
     if [ -n "$extra_gem5_args" ]; then
         echo "Extra gem5 args: $extra_gem5_args"
     fi
 fi
+if [ -n "${benchmark_filters//[[:space:],]/}" ]; then
+    echo "Benchmark filters: $benchmark_filters"
+fi
 
 # Note 1: workload list contains the workload name, checkpoint path, and parameters, looks like:
 #       astar_biglakes_122060000000 astar_biglakes_122060000000_0.244818/0/ 0 0 20 20
@@ -80,6 +99,49 @@ export full_work_dir=$ds/$tag # work dir wheter stats data stored
 mkdir -p $full_work_dir
 ln -sf $full_work_dir .  # optional, you can customize it yourself
 
+declare -a filtered_workloads=()
+
+function apply_benchmark_filter() {
+    if [ -z "${benchmark_filters//[[:space:],]/}" ]; then
+        echo "No benchmark filter provided, run all workloads."
+        return
+    fi
+
+    mapfile -t filtered_workloads < <(awk -v filters="$benchmark_filters" '
+        BEGIN {
+            n = split(filters, raw_filters, ",")
+            valid = 0
+            for (i = 1; i <= n; i++) {
+                token = raw_filters[i]
+                gsub(/^[[:space:]]+|[[:space:]]+$/, "", token)
+                token = tolower(token)
+                if (token != "") {
+                    patterns[++valid] = token
+                }
+            }
+        }
+        /^[[:space:]]*$/ { next }
+        {
+            lower_line = tolower($0)
+            for (i = 1; i <= valid; i++) {
+                if (index(lower_line, patterns[i]) > 0) {
+                    print $0
+                    next
+                }
+            }
+        }
+    ' "$workload_list")
+
+    local selected_count
+    selected_count="${#filtered_workloads[@]}"
+    if [ "$selected_count" -eq 0 ]; then
+        echo "Error: benchmark_filters '$benchmark_filters' matched no workloads in '$workload_list'"
+        exit 1
+    fi
+
+    echo "Applied benchmark filters: '$benchmark_filters' -> $selected_count workloads selected."
+}
+
 check() {
     if [ $1 -ne 0 ]; then
         echo FAIL
@@ -176,7 +238,12 @@ num_threads=${xsgem5_para_jobs:-63}
 function parallel_run() {
     # We use gnu parallel to control the parallelism.
     # If your server has 32 core and 64 SMT threads, we suggest to run with no more than 32 threads.
-    cat $workload_list | parallel -a - -j $num_threads arg_wrapper {}
+    if [ "${#filtered_workloads[@]}" -gt 0 ]; then
+        printf '%s\n' "${filtered_workloads[@]}" | parallel -a - -j $num_threads arg_wrapper {}
+    else
+        cat "$workload_list" | parallel -a - -j $num_threads arg_wrapper {}
+    fi
 }
 
+apply_benchmark_filter
 parallel_run