Build Wheels (CUDA) #83
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Build Wheels (CUDA) | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| os: | |
| description: OS to build | |
| required: true | |
| default: all | |
| type: choice | |
| options: | |
| - all | |
| - ubuntu-22.04 | |
| - windows-2022 | |
| cuda: | |
| description: CUDA toolkit version to build | |
| required: true | |
| default: all | |
| type: choice | |
| options: | |
| - all | |
| - 12.1.1 | |
| - 12.2.2 | |
| - 12.3.2 | |
| - 12.4.1 | |
| - 12.5.1 | |
| test_python: | |
| description: Python version to smoke-test | |
| required: true | |
| default: all | |
| type: choice | |
| options: | |
| - all | |
| - "3.9" | |
| - "3.10" | |
| - "3.11" | |
| - "3.12" | |
| - "3.13" | |
| permissions: | |
| contents: write | |
| jobs: | |
| define_matrix: | |
| name: Define Build Matrix | |
| runs-on: ubuntu-22.04 | |
| outputs: | |
| build_matrix: ${{ steps.set-matrix.outputs.build_matrix }} | |
| test_matrix: ${{ steps.set-matrix.outputs.test_matrix }} | |
| publish_matrix: ${{ steps.set-matrix.outputs.publish_matrix }} | |
| defaults: | |
| run: | |
| shell: pwsh | |
| steps: | |
| - name: Define Job Output | |
| id: set-matrix | |
| run: | | |
| $osInput = '${{ inputs.os }}' | |
| $cudaInput = '${{ inputs.cuda }}' | |
| $testPythonInput = '${{ inputs.test_python }}' | |
| if ([string]::IsNullOrWhiteSpace($osInput) -or $osInput -eq 'all') { | |
| $os = @('ubuntu-22.04', 'windows-2022') | |
| } else { | |
| $os = @($osInput) | |
| } | |
| if ([string]::IsNullOrWhiteSpace($cudaInput) -or $cudaInput -eq 'all') { | |
| $cuda = @("12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1") | |
| } else { | |
| $cuda = @($cudaInput) | |
| } | |
| if ([string]::IsNullOrWhiteSpace($testPythonInput) -or $testPythonInput -eq 'all') { | |
| $testPython = @("3.9", "3.10", "3.11", "3.12", "3.13") | |
| } else { | |
| $testPython = @($testPythonInput) | |
| } | |
| $buildMatrix = @{ | |
| 'os' = $os | |
| # wheel.py-api = "py3" makes the CUDA wheel interpreter-agnostic, | |
| # so one builder per toolkit version is sufficient. | |
| 'pyver' = @("3.9") | |
| 'cuda' = $cuda | |
| 'releasetag' = @("basic") | |
| } | |
| $testMatrix = @{ | |
| 'os' = $os | |
| 'cuda' = $cuda | |
| 'pyver' = $testPython | |
| } | |
| $publishMatrix = @{ | |
| 'cuda' = $cuda | |
| } | |
| Write-Output ('build_matrix=' + (ConvertTo-Json $buildMatrix -Compress)) >> $env:GITHUB_OUTPUT | |
| Write-Output ('test_matrix=' + (ConvertTo-Json $testMatrix -Compress)) >> $env:GITHUB_OUTPUT | |
| Write-Output ('publish_matrix=' + (ConvertTo-Json $publishMatrix -Compress)) >> $env:GITHUB_OUTPUT | |
| build_wheels: | |
| name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }} | |
| needs: define_matrix | |
| runs-on: ${{ matrix.os }} | |
| strategy: | |
| fail-fast: false | |
| matrix: ${{ fromJSON(needs.define_matrix.outputs.build_matrix) }} | |
| defaults: | |
| run: | |
| shell: pwsh | |
| env: | |
| CUDAVER: ${{ matrix.cuda }} | |
| AVXVER: ${{ matrix.releasetag }} | |
| steps: | |
| - name: Set up MSVC | |
| if: runner.os == 'Windows' | |
| uses: ilammy/msvc-dev-cmd@v1 | |
| with: | |
| arch: x64 | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: "recursive" | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.pyver }} | |
| cache: 'pip' | |
| - name: Setup Mamba | |
| uses: conda-incubator/setup-miniconda@v3.1.0 | |
| with: | |
| activate-environment: "llamacpp" | |
| python-version: ${{ matrix.pyver }} | |
| miniforge-version: latest | |
| add-pip-as-python-dependency: true | |
| auto-activate-base: false | |
| - name: Install Dependencies | |
| env: | |
| MAMBA_DOWNLOAD_FAILFAST: "0" | |
| MAMBA_NO_LOW_SPEED_LIMIT: "1" | |
| run: | | |
| $cudaVersion = $env:CUDAVER | |
| $cudaChannel = "nvidia/label/cuda-$cudaVersion" | |
| if ($IsLinux) { | |
| $nvccPackage = "${cudaChannel}::cuda-nvcc_linux-64" | |
| $cudaPackages = @("${cudaChannel}::cuda-toolkit=$cudaVersion", $nvccPackage) | |
| } elseif ($IsWindows) { | |
| $nvccPackage = "${cudaChannel}::cuda-nvcc_win-64" | |
| if ($cudaVersion -like '12.5.*') { | |
| # The Windows 12.5 toolkit meta-package pulls conda compiler | |
| # activation scripts that can overflow cmd.exe's line limit after | |
| # MSVC is already initialized. The build only needs nvcc, cudart, | |
| # and headers from the NVIDIA label. | |
| $cudaPackages = @($nvccPackage) | |
| } else { | |
| $cudaPackages = @("${cudaChannel}::cuda-toolkit=$cudaVersion", $nvccPackage) | |
| } | |
| } else { | |
| throw 'Unsupported CUDA wheel build platform' | |
| } | |
| # Keep nvcc, cudart, and headers on the same NVIDIA label so the | |
| # detected toolkit version matches the published wheel tag. | |
| $mambaArgs = @( | |
| 'install', | |
| '-y', | |
| '--channel-priority', | |
| 'flexible', | |
| '--override-channels', | |
| '-c', | |
| $cudaChannel | |
| ) + $cudaPackages + @( | |
| "${cudaChannel}::cuda-cudart", | |
| "${cudaChannel}::cuda-cudart-dev" | |
| ) | |
| & mamba @mambaArgs | |
| if ($LASTEXITCODE -ne 0) { | |
| exit $LASTEXITCODE | |
| } | |
| python -m pip install build wheel ninja | |
| - name: Show CUDA diagnostics | |
| run: | | |
| mamba list | Select-String 'cuda|nvidia|cudatoolkit' | |
| if ($IsWindows) { | |
| Get-ChildItem -Path $env:CONDA_PREFIX -Recurse -Filter nvcc.exe | Select-Object -First 10 -ExpandProperty FullName | |
| } else { | |
| Get-ChildItem -Path $env:CONDA_PREFIX -Recurse -Filter nvcc | Select-Object -First 10 -ExpandProperty FullName | |
| } | |
| - name: Build Wheel | |
| run: | | |
| $pathSeparator = if ($IsWindows) { ';' } else { ':' } | |
| if ($IsWindows) { | |
| $cudaRoot = Join-Path $env:CONDA_PREFIX 'Library' | |
| } elseif (Test-Path (Join-Path $env:CONDA_PREFIX 'targets/x86_64-linux/include/cuda_runtime.h')) { | |
| $cudaRoot = Join-Path $env:CONDA_PREFIX 'targets/x86_64-linux' | |
| } else { | |
| $cudaRoot = $env:CONDA_PREFIX | |
| } | |
| $env:CUDA_PATH = $cudaRoot | |
| $env:CUDA_HOME = $cudaRoot | |
| $env:CUDAToolkit_ROOT = $cudaRoot | |
| $env:CUDA_TOOLKIT_ROOT_DIR = $cudaRoot | |
| $cudaHostCompilerArg = '' | |
| $cudaRootCmake = $cudaRoot.Replace('\', '/') | |
| $env:CMAKE_ARGS = "-DCUDAToolkit_ROOT=$cudaRootCmake -DCUDA_TOOLKIT_ROOT_DIR=$cudaRootCmake" | |
| if ($IsLinux) { | |
| if (Test-Path '/usr/bin/g++-12') { | |
| $env:CC = '/usr/bin/gcc-12' | |
| $env:CXX = '/usr/bin/g++-12' | |
| $env:CUDAHOSTCXX = '/usr/bin/g++-12' | |
| $cudaHostCompilerArg = " -DCMAKE_CUDA_HOST_COMPILER=$env:CUDAHOSTCXX" | |
| } | |
| $env:CMAKE_ARGS = "-DCUDAToolkit_ROOT=$cudaRoot -DCUDA_TOOLKIT_ROOT_DIR=$cudaRoot$cudaHostCompilerArg" | |
| $env:CPATH = "$cudaRoot/include$pathSeparator$env:CPATH" | |
| $env:CPLUS_INCLUDE_PATH = "$cudaRoot/include$pathSeparator$env:CPLUS_INCLUDE_PATH" | |
| $env:LIBRARY_PATH = "$cudaRoot/lib$pathSeparator$env:CONDA_PREFIX/lib$pathSeparator$env:LIBRARY_PATH" | |
| $env:LD_LIBRARY_PATH = "$cudaRoot/lib$pathSeparator$env:CONDA_PREFIX/lib$pathSeparator$env:LD_LIBRARY_PATH" | |
| } elseif ($IsWindows) { | |
| $ninjaPath = ((Get-Command ninja -ErrorAction Stop).Source).Replace('\', '/') | |
| $env:CMAKE_GENERATOR = 'Ninja' | |
| $env:CMAKE_MAKE_PROGRAM = $ninjaPath | |
| $env:PATH = "$(Join-Path $cudaRoot 'bin')$pathSeparator$env:PATH" | |
| } | |
| if ($IsWindows) { | |
| $nvccCandidates = @( | |
| (Join-Path $cudaRoot 'bin\nvcc.exe'), | |
| (Join-Path $env:CONDA_PREFIX 'Library\bin\nvcc.exe'), | |
| (Join-Path $env:CONDA_PREFIX 'bin\nvcc.exe') | |
| ) | |
| } else { | |
| $nvccCandidates = @( | |
| (Join-Path $env:CONDA_PREFIX 'bin/nvcc'), | |
| (Join-Path $env:CONDA_PREFIX 'targets/x86_64-linux/bin/nvcc') | |
| ) | |
| } | |
| $nvccPath = $nvccCandidates | Where-Object { Test-Path $_ } | Select-Object -First 1 | |
| if (-not $nvccPath) { | |
| throw 'Failed to find nvcc in the conda environment' | |
| } | |
| $env:CUDACXX = $nvccPath | |
| $env:PATH = "$(Split-Path $nvccPath)$pathSeparator$env:PATH" | |
| if ($IsWindows) { | |
| $nvccPathCmake = $nvccPath.Replace('\', '/') | |
| $env:CUDACXX = $nvccPathCmake | |
| $env:CMAKE_ARGS = "-DCMAKE_CUDA_COMPILER=$nvccPathCmake -DCMAKE_MAKE_PROGRAM=$env:CMAKE_MAKE_PROGRAM $env:CMAKE_ARGS" | |
| } | |
| $nvccVersion = ((& $nvccPath --version) | Select-String 'release ([0-9]+\.[0-9]+)').Matches[0].Groups[1].Value | |
| if (-not $nvccVersion) { | |
| throw 'Failed to detect the installed CUDA toolkit version' | |
| } | |
| $cudaTagVersion = $nvccVersion.Replace('.','') | |
| $env:VERBOSE = '1' | |
| # Build real cubins for the supported GPUs, including sm_70, and keep | |
| # one forward-compatible PTX target instead of embedding PTX for every | |
| # SM. This keeps the wheel under GitHub's 2 GiB release-asset limit. | |
| $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=70-real;75-real;80-real;86-real;89-real;90-real;90-virtual -DCMAKE_CUDA_FLAGS=--allow-unsupported-compiler $env:CMAKE_ARGS" | |
| $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off' | |
| python -m build --wheel | |
| # Publish tags that reflect the actual installed toolkit version. | |
| Write-Output "CUDA_VERSION=$cudaTagVersion" >> $env:GITHUB_ENV | |
| - uses: actions/upload-artifact@v4 | |
| with: | |
| name: cuda-wheel-${{ matrix.os }}-${{ matrix.cuda }} | |
| path: dist/*.whl | |
| test_wheels: | |
| name: Test Wheel ${{ matrix.os }} ${{ matrix.cuda }} Python ${{ matrix.pyver }} | |
| needs: [define_matrix, build_wheels] | |
| runs-on: ${{ matrix.os }} | |
| strategy: | |
| fail-fast: false | |
| matrix: ${{ fromJSON(needs.define_matrix.outputs.test_matrix) }} | |
| defaults: | |
| run: | |
| shell: pwsh | |
| steps: | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.pyver }} | |
| - uses: actions/download-artifact@v4 | |
| with: | |
| name: cuda-wheel-${{ matrix.os }}-${{ matrix.cuda }} | |
| path: dist | |
| - name: Test wheel installation | |
| run: | | |
| $wheel = (Get-ChildItem dist -Filter '*.whl' | Select-Object -First 1).FullName | |
| python -m pip install --upgrade pip | |
| python -m pip install $wheel | |
| python -m pip check | |
| # Avoid importing llama_cpp here: GitHub-hosted runners do not provide | |
| # NVIDIA drivers, so native CUDA library loading can fail independently | |
| # of Python-version wheel compatibility. | |
| python -c "from importlib.metadata import version; print(version('llama_cpp_python'))" | |
| publish_wheels: | |
| name: Publish Wheels CUDA ${{ matrix.cuda }} | |
| needs: [define_matrix, test_wheels] | |
| if: startsWith(github.ref, 'refs/tags/') | |
| runs-on: ubuntu-22.04 | |
| strategy: | |
| fail-fast: false | |
| matrix: ${{ fromJSON(needs.define_matrix.outputs.publish_matrix) }} | |
| defaults: | |
| run: | |
| shell: pwsh | |
| steps: | |
| - uses: actions/download-artifact@v4 | |
| with: | |
| pattern: cuda-wheel-*-${{ matrix.cuda }} | |
| path: dist | |
| merge-multiple: true | |
| - name: Set CUDA release tag | |
| run: | | |
| $versionParts = "${{ matrix.cuda }}".Split('.') | |
| $cudaVersion = "$($versionParts[0])$($versionParts[1])" | |
| Write-Output "CUDA_VERSION=$cudaVersion" >> $env:GITHUB_ENV | |
| - uses: softprops/action-gh-release@v2 | |
| with: | |
| files: dist/*.whl | |
| # Set tag_name to <tag>-cu<cuda_version> | |
| tag_name: ${{ github.ref_name }}-cu${{ env.CUDA_VERSION }} | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} |