@@ -390,8 +390,7 @@ bitsandbytes.optim.optimizer.Optimizer8bit(params, defaults, optim_bits=32, is_p
390390bitsandbytes.optim.optimizer.Optimizer2State(
391391 optimizer_name, params, lr=1e-3, betas=(0.9, 0.999),
392392 eps=1e-8, weight_decay=0.0, optim_bits=32, args=None,
393- min_8bit_size=4096, percentile_clipping=100,
394- block_wise=True, max_unorm=0.0, skip_zeros=False,
393+ min_8bit_size=4096, max_unorm=0.0, skip_zeros=False,
395394 is_paged=False, alpha=0.0, t_alpha=None, t_beta3=None,
396395)
397396```
@@ -405,8 +404,7 @@ bitsandbytes.optim.optimizer.Optimizer2State(
405404bitsandbytes.optim.optimizer.Optimizer1State(
406405 optimizer_name, params, lr=1e-3, betas=(0.9, 0.0),
407406 eps=1e-8, weight_decay=0.0, optim_bits=32, args=None,
408- min_8bit_size=4096, percentile_clipping=100,
409- block_wise=True, max_unorm=0.0, skip_zeros=False,
407+ min_8bit_size=4096, max_unorm=0.0, skip_zeros=False,
410408 is_paged=False,
411409)
412410```
@@ -532,8 +530,6 @@ All bnb optimizers share these parameters beyond the standard PyTorch ones:
532530| -----------| ------| ---------| -------------|
533531| ` optim_bits ` | ` int ` | 32 | 32 for full precision state, 8 for quantized state |
534532| ` min_8bit_size ` | ` int ` | 4096 | Parameters smaller than this use 32-bit state even in 8-bit mode |
535- | ` percentile_clipping ` | ` int ` | 100 | Gradient clipping at a percentile. 100 = disabled |
536- | ` block_wise ` | ` bool ` | ` True ` | Block-wise quantization of optimizer states (vs global) |
537533| ` max_unorm ` | ` float ` | 0.0 | Maximum update norm relative to weight norm. 0 = disabled |
538534| ` skip_zeros ` | ` bool ` | ` False ` | Skip zero gradients in sparse models |
539535| ` is_paged ` | ` bool ` | ` False ` | Use CUDA managed memory for state offloading |
@@ -864,57 +860,7 @@ F.batched_igemm(
864860Batched int8 matrix multiplication.
865861** Stability:** Stable (internal).
866862
867- ### 4.9 Sparse Operations
868-
869- #### ` COOSparseTensor `
870-
871- ``` python
872- class F.COOSparseTensor:
873- def __init__ (self , rows , cols , nnz , rowidx , colidx , values ): ...
874- ```
875-
876- ** Stability:** Legacy — used internally for sparse decomposition.
877-
878- #### ` CSRSparseTensor ` / ` CSCSparseTensor `
879-
880- Similar sparse tensor containers.
881- ** Stability:** Legacy.
882-
883- #### ` coo_zeros `
884-
885- ``` python
886- F.coo_zeros(rows, cols, nnz, device, dtype = torch.half) -> COOSparseTensor
887- ```
888-
889- #### ` coo2csr ` / ` coo2csc `
890-
891- ``` python
892- F.coo2csr(cooA: COOSparseTensor) -> CSRSparseTensor
893- F.coo2csc(cooA: COOSparseTensor) -> CSCSparseTensor
894- ```
895-
896- #### ` spmm_coo `
897-
898- ``` python
899- F.spmm_coo(
900- cooA: COOSparseTensor, B: torch.Tensor,
901- out: Optional[torch.Tensor] = None ,
902- ) -> torch.Tensor
903- ```
904-
905- Sparse matrix-dense matrix multiply using cusparse.
906- ** Stability:** Legacy.
907-
908- #### ` spmm_coo_very_sparse `
909-
910- ``` python
911- F.spmm_coo_very_sparse(cooA, B, dequant_stats = None , out = None ) -> torch.Tensor
912- ```
913-
914- Optimized for very sparse matrices with custom kernel.
915- ** Stability:** Legacy.
916-
917- ### 4.10 Paged Memory
863+ ### 4.9 Paged Memory
918864
919865#### ` get_paged `
920866
@@ -934,7 +880,7 @@ F.prefetch_tensor(A: torch.Tensor, to_cpu: bool = False) -> None
934880Prefetch a paged tensor to GPU or CPU.
935881** Stability:** Stable (internal).
936882
937- ### 4.11 CPU-Specific Functions
883+ ### 4.10 CPU-Specific Functions
938884
939885#### ` _convert_weight_packed_for_cpu `
940886
@@ -967,7 +913,7 @@ F.has_avx512bf16() -> bool
967913Detects AVX512BF16 CPU support.
968914** Stability:** Internal but may be useful externally.
969915
970- ### 4.12 Utility Functions
916+ ### 4.11 Utility Functions
971917
972918#### ` is_on_gpu `
973919
@@ -987,7 +933,7 @@ F.get_ptr(A: Optional[Tensor]) -> Optional[ct.c_void_p]
987933Gets the data pointer of a tensor for ctypes calls.
988934** Stability:** Internal.
989935
990- ### 4.13 Singleton Managers
936+ ### 4.12 Singleton Managers
991937
992938#### ` GlobalPageManager `
993939
@@ -1007,15 +953,6 @@ F.CUBLAS_Context.get_instance() -> CUBLAS_Context
1007953Manages cuBLAS context handles per device.
1008954** Stability:** Internal.
1009955
1010- #### ` Cusparse_Context `
1011-
1012- ``` python
1013- F.Cusparse_Context.get_instance() -> Cusparse_Context
1014- ```
1015-
1016- Manages cusparse context handle.
1017- ** Stability:** Internal.
1018-
1019956---
1020957
1021958## 5. Autograd Functions
@@ -1238,7 +1175,7 @@ bitsandbytes.utils.replace_linear(
12381175| Class | Description |
12391176| -------| -------------|
12401177| ` BNBNativeLibrary ` | Base wrapper for the ctypes-loaded native library |
1241- | ` CudaBNBNativeLibrary ` | CUDA-specific subclass (sets up context/cusparse/ managed ptr) |
1178+ | ` CudaBNBNativeLibrary ` | CUDA-specific subclass (sets up context/managed ptr) |
12421179| ` ErrorHandlerMockBNBNativeLibrary ` | Fallback mock that defers error messages to call time |
12431180
12441181### Module-level symbols
@@ -1313,7 +1250,6 @@ removed in a future release.
13131250| ` quantize_no_absmax ` | ` functional ` | ` quantize_blockwise ` |
13141251| ` dequantize_no_absmax ` | ` functional ` | ` dequantize_blockwise ` |
13151252| ` optimizer_update_8bit ` | ` functional ` | ` optimizer_update_8bit_blockwise ` |
1316- | ` percentile_clipping ` | ` functional ` | N/A (still used internally by non-blockwise path) |
13171253
13181254---
13191255
@@ -1401,11 +1337,9 @@ A PR that changes any of these symbols MUST consider downstream impact:
14011337
14021338- ` bitsandbytes.cextension.* ` (native library loading)
14031339- ` bitsandbytes.functional.get_ptr ` , ` is_on_gpu ` , ` _get_tensor_stream `
1404- - ` bitsandbytes.functional.GlobalPageManager ` , ` CUBLAS_Context ` , ` Cusparse_Context `
1340+ - ` bitsandbytes.functional.GlobalPageManager ` , ` CUBLAS_Context `
14051341- ` bitsandbytes.functional._convert_weight_packed_for_cpu* `
14061342- ` bitsandbytes.functional.check_matmul ` , ` elementwise_func ` , ` fill ` , ` _mul `
1407- - ` bitsandbytes.functional.spmm_coo ` , ` spmm_coo_very_sparse `
1408- - ` bitsandbytes.functional.COOSparseTensor ` , ` CSRSparseTensor ` , ` CSCSparseTensor `
14091343- ` bitsandbytes.utils.pack_dict_to_tensor ` , ` unpack_tensor_to_dict `
14101344- ` bitsandbytes.utils.execute_and_return ` , ` sync_gpu `
14111345- ` bitsandbytes.optim.optimizer.MockArgs `
0 commit comments