Skip to content
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
5891465
Add build job for rocm
pnunna93 Jun 19, 2024
d03a680
Add rocm build script
pnunna93 Jun 19, 2024
ec9000f
Copy shared obj file into output_dir
pnunna93 Jun 20, 2024
9b8c1da
upload build artifacts and enable wheels build
pnunna93 Jun 20, 2024
1413c5f
Remove cuda build temporarily
pnunna93 Jun 20, 2024
578b2f4
Merge pull request #38 from ROCm/enable_rocm_build_ci
pnunna93 Jun 21, 2024
fd655b0
Add ROCm version to .so filename
pnunna93 Jul 29, 2024
6b77f4c
Add rocm_version to whls build
pnunna93 Jul 29, 2024
78324b3
Revert "Remove cuda build temporarily"
pnunna93 Jul 29, 2024
c146b8b
Add rocm_version env var
pnunna93 Jul 29, 2024
953a383
Merge remote-tracking branch 'upstream/multi-backend-refactor' into e…
pnunna93 Jul 29, 2024
d6c3df4
Remove thrush header files
pnunna93 Jul 30, 2024
7e9a65c
Print node info
pnunna93 Jul 30, 2024
cdb209a
print cuda node info
pnunna93 Jul 30, 2024
77e1499
Revert "print cuda node info"
pnunna93 Jul 30, 2024
7c91909
Revert "Print node info"
pnunna93 Jul 30, 2024
b78b340
Add rocm arch to compile command
pnunna93 Jul 30, 2024
a62b9d4
Rename .so files to rocm
pnunna93 Jul 30, 2024
9059bff
Update default gpu arch
pnunna93 Jul 30, 2024
c5a406a
Skip cpu based igemmlt int tests on ROCm
pnunna93 Jul 30, 2024
9cbb5e1
Update Documentation
pnunna93 Jul 30, 2024
3580624
Update upstream repo name
pnunna93 Jul 30, 2024
3bde1b7
Update docs
pnunna93 Jul 30, 2024
b123125
Merge pull request #39 from ROCm/enable_rocm_whls
pnunna93 Jul 31, 2024
db1df72
Update string format
pnunna93 Jul 31, 2024
e498b4d
Remove pre-release option for torch install
pnunna93 Jul 31, 2024
7d2e027
Update pytorch install path
pnunna93 Aug 1, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions .github/scripts/build-rocm.sh
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
#!/bin/bash
declare build_arch
declare build_os
declare rocm_version

set -xeuo pipefail
bnb_rocm_arch="gfx90a;gfx942;gfx1100"
if [ "${build_os:0:6}" == ubuntu ]; then
image=rocm/dev-ubuntu-22.04:6.1-complete
image=rocm/dev-ubuntu-22.04:${rocm_version}-complete
echo "Using image $image"
docker run --rm --platform "linux/$build_arch" -i \
-w /src -v "$PWD:/src" "$image" sh -c \
"apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \
&& cmake -DCOMPUTE_BACKEND=hip . \
&& cmake -DCOMPUTE_BACKEND=hip -DBNB_ROCM_ARCH=\"${bnb_rocm_arch}\" . \
&& cmake --build ."
fi

#output_dir="output/${build_os}/${build_arch}"
#mkdir -p "${output_dir}"
#(shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}")
output_dir="output/${build_os}/${build_arch}"
mkdir -p "${output_dir}"
(shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}")
10 changes: 10 additions & 0 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ jobs:
matrix:
os: [ubuntu-latest]
arch: [x86_64]
rocm_version:
["6.1.2"]
runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents
steps:
- uses: actions/checkout@v4
Expand All @@ -123,10 +125,18 @@ jobs:
env:
build_os: ${{ matrix.os }}
build_arch: ${{ matrix.arch }}
rocm_version: ${{ matrix.rocm_version }}
- name: Upload build artifact
uses: actions/upload-artifact@v4
with:
name: shared_library_rocm_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.rocm_version }}
path: output/*
retention-days: 7
build-wheels:
needs:
- build-shared-libs
- build-shared-libs-cuda
- build-shared-libs-rocm
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
Expand Down
6 changes: 4 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ elseif(BUILD_HIP)
set(CMAKE_HIP_ARCHITECTURES ${BNB_ROCM_ARCH})
else()
if (NOT AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES)
set(CMAKE_HIP_ARCHITECTURES "gfx908;gfx90a;gfx940;gfx941;gfx942")
set(CMAKE_HIP_ARCHITECTURES "gfx90a;gfx942;gfx1100")
elseif (AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES)
set(CMAKE_HIP_ARCHITECTURES ${AMDGPU_TARGETS})
endif()
Expand All @@ -194,12 +194,14 @@ elseif(BUILD_HIP)

list(APPEND SRC_FILES ${HIP_FILES})

string(APPEND BNB_OUTPUT_NAME "_hip")
string(APPEND BNB_OUTPUT_NAME "_rocm")

# get hip version
execute_process(COMMAND hipconfig --version OUTPUT_VARIABLE HIP_CONFIG_VERSION)
string(REGEX MATCH "[0-9]+\\.[0-9]+" HIP_VERSION "${HIP_CONFIG_VERSION}")
string(REPLACE "." "" HIP_VERSION_SHORT "${HIP_VERSION}")

string(APPEND BNB_OUTPUT_NAME "${HIP_VERSION_SHORT}")
if(NO_CUBLASLT OR HIP_VERSION VERSION_LESS "6.1")
string(APPEND BNB_OUTPUT_NAME "_nohipblaslt")
endif()
Expand Down
6 changes: 4 additions & 2 deletions bitsandbytes/cextension.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ def get_cuda_bnb_library_path(cuda_specs: CUDASpecs) -> Path:
"""
if torch.version.hip:
if BNB_HIP_VERSION < 601:
return PACKAGE_DIR / f"libbitsandbytes_hip_nohipblaslt{DYNAMIC_LIBRARY_SUFFIX}"
return PACKAGE_DIR / f"libbitsandbytes_rocm{BNB_HIP_VERSION_SHORT}_nohipblaslt{DYNAMIC_LIBRARY_SUFFIX}"
else:
return PACKAGE_DIR / f"libbitsandbytes_hip{DYNAMIC_LIBRARY_SUFFIX}"
return PACKAGE_DIR / f"libbitsandbytes_rocm{BNB_HIP_VERSION_SHORT}{DYNAMIC_LIBRARY_SUFFIX}"
library_name = f"libbitsandbytes_cuda{cuda_specs.cuda_version_string}"
if not cuda_specs.has_cublaslt:
# if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt
Expand Down Expand Up @@ -119,8 +119,10 @@ def get_native_library() -> BNBNativeLibrary:
if torch.version.hip:
hip_major, hip_minor = map(int, torch.version.hip.split(".")[0:2])
HIP_ENVIRONMENT, BNB_HIP_VERSION = True, hip_major * 100 + hip_minor
BNB_HIP_VERSION_SHORT = f"{hip_major}{hip_minor}"
else:
HIP_ENVIRONMENT, BNB_HIP_VERSION = False, 0
BNB_HIP_VERSION_SHORT = ""
lib = get_native_library()
except Exception as e:
lib = None
Expand Down
2 changes: 0 additions & 2 deletions csrc/kernels.hip
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
#include <hipcub/hipcub.hpp>
#include <hip/hip_math_constants.h>

#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
//#include <mma.h>


Expand Down
6 changes: 0 additions & 6 deletions csrc/ops_hip.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,6 @@
#include <vector>
#include <functional>

/*
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
*/


#define CUDA_CHECK_RETURN(value) { \
hipError_t _m_cudaStat = value; \
if (_m_cudaStat != hipSuccess) { \
Expand Down
20 changes: 15 additions & 5 deletions docs/source/installation.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -146,15 +146,25 @@ Please follow these steps to install bitsandbytes with device-specific backend s
bitsandbytes is fully supported from ROCm 6.1 onwards (currently in alpha release).

> [!TIP]
> If you already installed ROCm and PyTorch, skip Docker steps below and please check that the torch version matches your ROCm install. To install torch for a specific ROCm version, please refer to step 3 of wheels install in [Installing PyTorch for ROCm](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/3rd-party/pytorch-install.html#using-wheels-package) guide.
> If you would like to install ROCm and PyTorch on bare metal, skip Docker steps and refer to our official guides at [ROCm installation overview](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/install-overview.html#rocm-install-overview) and [Installing PyTorch for ROCm](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/3rd-party/pytorch-install.html#using-wheels-package) (Step 3 of wheels build for quick installation). Please make sure to get PyTorch wheel for the installed ROCm version.

```bash
# Create a docker container with latest pytorch. It comes with ROCm and pytorch preinstalled
docker pull rocm/pytorch:latest
docker run -it --device=/dev/kfd --device=/dev/dri --group-add video rocm/pytorch:latest
# Create a docker container with latest ROCm image, which includes ROCm libraries
docker pull rocm/dev-ubuntu-22.04:6.1.2-complete
docker run -it --device=/dev/kfd --device=/dev/dri --group-add video rocm/dev-ubuntu-22.04:6.1.2-complete
apt-get update && apt-get install -y git && cd home

# Install pytorch compatible with above ROCm version
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/rocm6.1/
Comment thread
matthewdouglas marked this conversation as resolved.
Outdated

# Install bitsandbytes from PyPI
# (This is supported on Ubuntu 22.04, Python 3.10, ROCm 6.1.0/6.1.1/6.1.2 and gpu arch - gfx90a, gfx942, gfx1100
# Please install from source if your configuration doesn't match with these)
pip install bitsandbytes

# Install bitsandbytes from source
# Clone bitsandbytes repo, ROCm backend is currently enabled on multi-backend-refactor branch
git clone --depth 1 -b multi-backend-refactor https://github.com/TimDettmers/bitsandbytes.git && cd bitsandbytes/
git clone --depth 1 -b multi-backend-refactor https://github.com/bitsandbytes-foundation/bitsandbytes.git && cd bitsandbytes/

# Install dependencies
pip install -r requirements-dev.txt
Expand Down
3 changes: 3 additions & 0 deletions tests/test_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,9 @@ def test_nvidia_transform(dim1, dim2, dim3, dims, dtype, orderA, orderOut, trans
@pytest.mark.parametrize("ldb", (0,), ids=id_formatter("ldb"))
@pytest.mark.parametrize("device", ("cuda", "cpu"), ids=id_formatter("device"))
def test_igemmlt_int(dim1, dim2, dim3, dim4, dims, ldb, device):
if HIP_ENVIRONMENT and device == "cpu":
pytest.skip("this test is not supported on ROCm yet")

for i in range(k):
if dims == 2:
A = torch.randint(-128, 127, size=(dim1, dim3), device=device).to(torch.int8)
Expand Down