[Reland] Login to Docker from the workflows #24

Workflow file for this run

.github/workflows/benchmark.yml at 6b9179f

	name: Benchmark
Check failure on line 1 in .github/workflows/benchmark.yml View workflow run for this annotation GitHub Actions / .github/workflows/benchmark.yml Invalid workflow file `(Line: 28, Col: 9): Unrecognized named-value: 'runner'. Located at position 11 within expression: !contains(runner, 'b200') \|\| secrets.DOCKER_HUB_READONLY_TOKEN != ''`

	on:
	workflow_call:
	inputs:
	runner:
	required: true
	type: string
	python-version:
	required: true
	type: string
	image:
	required: true
	type: string
	runtime-version:
	required: true
	type: string
	container-options:
	required: true
	type: string
	alias:
	required: true
	type: string

	jobs:
	benchmark:
	# This won't work when the secret is empty on DGX B200 runners
	if: ${{ !contains(runner, 'b200') \|\| secrets.DOCKER_HUB_READONLY_TOKEN != '' }}
	name: benchmark-${{ inputs.runtime-version }}-py${{ inputs.python-version }}-${{ inputs.alias }}

	strategy:
	fail-fast: false
	matrix:
	shard: [0, 1, 2, 3]
	num_shards: [4]

	container:
	image: ${{ inputs.image }}
	options: ${{ inputs.container-options }}
	credentials:
	username: pytorchbot
	password: ${{ secrets.DOCKER_HUB_READONLY_TOKEN \|\| '' }}

	runs-on: ${{ inputs.runner }}
	permissions:
	id-token: write
	contents: read

	defaults:
	run:
	shell: bash -l {0}

	steps:
	- name: Check out code
	uses: actions/checkout@v4

	- name: Install uv
	uses: astral-sh/setup-uv@v6
	with:
	python-version: ${{ inputs.python-version }}
	enable-cache: true

	- name: Create virtual environment
	run: \|
	uv venv --python ${{ inputs.python-version }}

	- name: Install PyTorch
	run: \|
	source .venv/bin/activate
	uv pip install -U --pre torch --index-url https://download.pytorch.org/whl/nightly/${{ inputs.runtime-version }}

	- name: Install Triton
	run: \|
	set -x
	source .venv/bin/activate
	apt-get update
	apt-get install -y git
	apt-get install -y clang-14 clang++-14 zlib1g-dev
	export CC=clang-14
	export CXX=clang++-14
	mkdir -p /tmp/$USER
	cd /tmp/$USER
	uv pip uninstall triton pytorch-triton \|\| true
	rm -rf triton/ \|\| true
	git clone https://github.com/triton-lang/triton.git
	cd triton/
	uv pip install -r python/requirements.txt
	MAX_JOBS=$(nproc) TRITON_PARALLEL_LINK_JOBS=2 uv pip install .
	cd /tmp/$USER
	rm -rf triton/
	python -c "import triton; print(f'Triton version: {triton.__version__}')"

	- name: Install Helion
	run: \|
	source .venv/bin/activate
	uv pip install -r requirements.txt
	SETUPTOOLS_SCM_PRETEND_VERSION="0.0.0" uv pip install -e .'[dev]' --no-deps
	python -c "import helion; print(helion.__name__)"

	- name: Install Benchmark Requirements
	run: \|
	set -x
	source .venv/bin/activate
	uv pip install pip
	uv pip install quack-kernels --no-deps
	mkdir -p benchmarks/ && pushd benchmarks/
	git clone https://github.com/pytorch-labs/tritonbench/
	pushd tritonbench/
	git submodule update --init --recursive
	uv pip install -r requirements.txt
	python install.py --liger
	uv pip install -e . --no-deps
	popd
	popd

	- name: Run Benchmark
	run: \|
	source .venv/bin/activate

	KERNELS=("vector_add" "vector_exp" "sum" "layer_norm" "softmax" "rms_norm" "cross_entropy")
	NUMSHARDS=${{ matrix.num_shards }}
	SHARD=${{ matrix.shard }}

	SHARD_KERNELS=()
	for ((i=0; i<${#KERNELS[@]}; i++)); do
	if [ $((i % NUMSHARDS)) -eq $SHARD ]; then
	SHARD_KERNELS+=("${KERNELS[i]}")
	fi
	done

	KERNEL_LIST=$(IFS=','; echo "${SHARD_KERNELS[*]}")
	echo "Running shard $SHARD of $NUMSHARDS with kernels: $KERNEL_LIST"

	TEST_REPORTS_DIR=$(pwd)/test/test-reports
	mkdir -p "$TEST_REPORTS_DIR"
	echo "$TEST_REPORTS_DIR"

	# Do autotuning but do not record the results
	python benchmarks/run.py \
	--kernel $KERNEL_LIST \
	--metrics speedup,accuracy \
	--latency-measure-mode profiler \

	# Relax the GPU
	sleep 5m

	# Run again with cache and record results
	python benchmarks/run.py \
	--kernel $KERNEL_LIST \
	--metrics speedup,accuracy \
	--latency-measure-mode profiler \
	--output "$TEST_REPORTS_DIR/helionbench.json"

	if [[ ! -s "$TEST_REPORTS_DIR/helionbench.json" ]]; then
	echo "❌ helionbench.json is missing or empty"
	exit 1
	fi
	cat "$TEST_REPORTS_DIR/helionbench.json"

	- name: Authenticate with AWS
	if: inputs.alias == 'b200'
	uses: aws-actions/configure-aws-credentials@v4
	with:
	role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
	# The max duration enforced by the server side
	role-duration-seconds: 18000
	aws-region: us-east-1

	- name: Upload the benchmark results to OSS benchmark database for the dashboard
	uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
	with:
	benchmark-results-dir: test/test-reports
	dry-run: false
	schema-version: v3
	github-token: ${{ secrets.GITHUB_TOKEN }}
	venv: ".venv/bin/activate"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[Reland] Login to Docker from the workflows #24

Workflow file

[Reland] Login to Docker from the workflows #24

Uh oh!

Workflow file for this run

GitHub Actions / .github/workflows/benchmark.yml