2121 run : |
2222 $matrix = @{
2323 'os' = @('ubuntu-22.04') #, 'windows-2022')
24- 'pyver' = @("3.9", "3.10", "3.11", "3. 12")
25- 'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12. 4.1") #, "12.5.1", "12.6.1")
24+ 'pyver' = @("3.12")
25+ 'cuda' = @("12.4.1") #, "12.5.1", "12.6.1")
2626 'releasetag' = @("basic")
2727 }
2828
@@ -153,9 +153,9 @@ jobs:
153153 }
154154 $cudaTagVersion = $nvccVersion.Replace('.','')
155155 $env:VERBOSE = '1'
156- # Keep a portable SM set, including sm_70, instead of CMake's `all`,
157- # which now pulls in future targets the hosted-runner toolchains cannot assemble .
158- $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=70;75;80;86;89;90 -DCMAKE_CUDA_FLAGS=--allow-unsupported-compiler $env:CMAKE_ARGS"
156+ # Build real cubins for the supported GPUs and keep a single PTX target
157+ # for forward compatibility instead of embedding PTX for every SM .
158+ $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=70-real ;75-real ;80-real ;86-real ;89-real ;90-real;90-virtual -DCMAKE_CUDA_FLAGS=--allow-unsupported-compiler $env:CMAKE_ARGS"
159159 # if ($env:AVXVER -eq 'AVX') {
160160 $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
161161 # }
@@ -166,6 +166,24 @@ jobs:
166166 # $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX=off -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
167167 # }
168168 python -m build --wheel
169+ Write-Output 'Wheel size before repack:'
170+ Get-ChildItem dist/*.whl | ForEach-Object {
171+ Write-Output ("{0}`t{1:N2} MiB" -f $_.Name, ($_.Length / 1MB))
172+ }
173+ $wheelDir = Join-Path $env:RUNNER_TEMP 'cuda-wheel-unpacked'
174+ if (Test-Path $wheelDir) {
175+ Remove-Item -Recurse -Force $wheelDir
176+ }
177+ python -m wheel unpack (Get-ChildItem dist/*.whl | Select-Object -First 1).FullName -d $wheelDir
178+ Get-ChildItem $wheelDir -Recurse -File | Where-Object { $_.Name -match '\.so(\..*)?$' } | ForEach-Object {
179+ & strip --strip-unneeded $_.FullName
180+ }
181+ Remove-Item dist/*.whl
182+ python -m wheel pack (Get-ChildItem $wheelDir | Select-Object -First 1).FullName -d dist
183+ Write-Output 'Wheel size after repack:'
184+ Get-ChildItem dist/*.whl | ForEach-Object {
185+ Write-Output ("{0}`t{1:N2} MiB" -f $_.Name, ($_.Length / 1MB))
186+ }
169187 # Publish tags that reflect the actual installed toolkit version.
170188 Write-Output "CUDA_VERSION=$cudaTagVersion" >> $env:GITHUB_ENV
171189
0 commit comments