File tree Expand file tree Collapse file tree 2 files changed +11
-0
lines changed
tests/integration_tests/fixtures Expand file tree Collapse file tree 2 files changed +11
-0
lines changed Original file line number Diff line number Diff line change 3333 python-version : ' 3.12'
3434 - name : Update pip
3535 run : python -m pip install --upgrade pip
36+ - name : Install CUDA toolkit
37+ run : |
38+ # flashinfer (used by vLLM 0.13.0) requires nvcc for JIT compilation
39+ # Add NVIDIA CUDA repository for Amazon Linux / RHEL
40+ sudo dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo
41+ sudo dnf install -y cuda-toolkit-12-8
42+ echo "CUDA_HOME=/usr/local/cuda-12.8" >> $GITHUB_ENV
43+ echo "/usr/local/cuda-12.8/bin" >> $GITHUB_PATH
3644 - name : Install torchforge
3745 run : pip install uv && uv pip install . && uv pip install .[dev]
3846 - name : Run weight sync integration test
Original file line number Diff line number Diff line change @@ -17,6 +17,9 @@ generator:
1717 tensor_parallel_size : 4
1818 pipeline_parallel_size : 1
1919 enforce_eager : ${not:${compile}}
20+ # Reduce memory usage for vLLM 0.13.0 warmup on T4 GPUs
21+ max_num_seqs : 128
22+ gpu_memory_utilization : 0.85
2023 sampling_params :
2124 n : ${group_size}
2225 max_tokens : ${max_res_tokens}
You can’t perform that action at this time.
0 commit comments