Skip to content

[Reland] Login to Docker from the workflows #24

[Reland] Login to Docker from the workflows

[Reland] Login to Docker from the workflows #24

Workflow file for this run

name: Benchmark

Check failure on line 1 in .github/workflows/benchmark.yml

View workflow run for this annotation

GitHub Actions / .github/workflows/benchmark.yml

Invalid workflow file

(Line: 28, Col: 9): Unrecognized named-value: 'runner'. Located at position 11 within expression: !contains(runner, 'b200') || secrets.DOCKER_HUB_READONLY_TOKEN != ''
on:
workflow_call:
inputs:
runner:
required: true
type: string
python-version:
required: true
type: string
image:
required: true
type: string
runtime-version:
required: true
type: string
container-options:
required: true
type: string
alias:
required: true
type: string
jobs:
benchmark:
# This won't work when the secret is empty on DGX B200 runners
if: ${{ !contains(runner, 'b200') || secrets.DOCKER_HUB_READONLY_TOKEN != '' }}
name: benchmark-${{ inputs.runtime-version }}-py${{ inputs.python-version }}-${{ inputs.alias }}
strategy:
fail-fast: false
matrix:
shard: [0, 1, 2, 3]
num_shards: [4]
container:
image: ${{ inputs.image }}
options: ${{ inputs.container-options }}
credentials:
username: pytorchbot
password: ${{ secrets.DOCKER_HUB_READONLY_TOKEN || '' }}
runs-on: ${{ inputs.runner }}
permissions:
id-token: write
contents: read
defaults:
run:
shell: bash -l {0}
steps:
- name: Check out code
uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
python-version: ${{ inputs.python-version }}
enable-cache: true
- name: Create virtual environment
run: |
uv venv --python ${{ inputs.python-version }}
- name: Install PyTorch
run: |
source .venv/bin/activate
uv pip install -U --pre torch --index-url https://download.pytorch.org/whl/nightly/${{ inputs.runtime-version }}
- name: Install Triton
run: |
set -x
source .venv/bin/activate
apt-get update
apt-get install -y git
apt-get install -y clang-14 clang++-14 zlib1g-dev
export CC=clang-14
export CXX=clang++-14
mkdir -p /tmp/$USER
cd /tmp/$USER
uv pip uninstall triton pytorch-triton || true
rm -rf triton/ || true
git clone https://github.com/triton-lang/triton.git
cd triton/
uv pip install -r python/requirements.txt
MAX_JOBS=$(nproc) TRITON_PARALLEL_LINK_JOBS=2 uv pip install .
cd /tmp/$USER
rm -rf triton/
python -c "import triton; print(f'Triton version: {triton.__version__}')"
- name: Install Helion
run: |
source .venv/bin/activate
uv pip install -r requirements.txt
SETUPTOOLS_SCM_PRETEND_VERSION="0.0.0" uv pip install -e .'[dev]' --no-deps
python -c "import helion; print(helion.__name__)"
- name: Install Benchmark Requirements
run: |
set -x
source .venv/bin/activate
uv pip install pip
uv pip install quack-kernels --no-deps
mkdir -p benchmarks/ && pushd benchmarks/
git clone https://github.com/pytorch-labs/tritonbench/
pushd tritonbench/
git submodule update --init --recursive
uv pip install -r requirements.txt
python install.py --liger
uv pip install -e . --no-deps
popd
popd
- name: Run Benchmark
run: |
source .venv/bin/activate
KERNELS=("vector_add" "vector_exp" "sum" "layer_norm" "softmax" "rms_norm" "cross_entropy")
NUMSHARDS=${{ matrix.num_shards }}
SHARD=${{ matrix.shard }}
SHARD_KERNELS=()
for ((i=0; i<${#KERNELS[@]}; i++)); do
if [ $((i % NUMSHARDS)) -eq $SHARD ]; then
SHARD_KERNELS+=("${KERNELS[i]}")
fi
done
KERNEL_LIST=$(IFS=','; echo "${SHARD_KERNELS[*]}")
echo "Running shard $SHARD of $NUMSHARDS with kernels: $KERNEL_LIST"
TEST_REPORTS_DIR=$(pwd)/test/test-reports
mkdir -p "$TEST_REPORTS_DIR"
echo "$TEST_REPORTS_DIR"
# Do autotuning but do not record the results
python benchmarks/run.py \
--kernel $KERNEL_LIST \
--metrics speedup,accuracy \
--latency-measure-mode profiler \
# Relax the GPU
sleep 5m
# Run again with cache and record results
python benchmarks/run.py \
--kernel $KERNEL_LIST \
--metrics speedup,accuracy \
--latency-measure-mode profiler \
--output "$TEST_REPORTS_DIR/helionbench.json"
if [[ ! -s "$TEST_REPORTS_DIR/helionbench.json" ]]; then
echo "❌ helionbench.json is missing or empty"
exit 1
fi
cat "$TEST_REPORTS_DIR/helionbench.json"
- name: Authenticate with AWS
if: inputs.alias == 'b200'
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
# The max duration enforced by the server side
role-duration-seconds: 18000
aws-region: us-east-1
- name: Upload the benchmark results to OSS benchmark database for the dashboard
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
benchmark-results-dir: test/test-reports
dry-run: false
schema-version: v3
github-token: ${{ secrets.GITHUB_TOKEN }}
venv: ".venv/bin/activate"