lint

galqiwi · galqiwi · commit 67d546ef0188 · 2024-07-24T23:58:15.000+04:00
diff --git a/bitsandbytes/nn/__init__.py b/bitsandbytes/nn/__init__.py
@@ -4,15 +4,15 @@
 # LICENSE file in the root directory of this source tree.
 from .modules import (
     Embedding,
+    Embedding4bit,
+    Embedding8bit,
+    EmbeddingFP4,
+    EmbeddingNF4,
     Int8Params,
     Linear4bit,
     Linear8bitLt,
     LinearFP4,
     LinearNF4,
-    Embedding8bit,
-    Embedding4bit,
-    EmbeddingFP4,
-    EmbeddingNF4,
     OutlierAwareLinear,
     Params4bit,
     StableEmbedding,
diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py
@@ -462,7 +462,7 @@ def _save_to_state_dict(self, destination, prefix, keep_vars):
 
     def forward(self, x: torch.Tensor):
         fix_4bit_weight_quant_state_from_module(self)
-        
+
         # weights are cast automatically as Int8Params, but the bias has to be cast manually
         if self.bias is not None and self.bias.dtype != x.dtype:
             self.bias.data = self.bias.data.to(x.dtype)
@@ -679,6 +679,7 @@ class Embedding8bit(nn.Embedding):
     int8_module = int8_module.to(0) # Quantization happens here
     ```
     """
+
     def __init__(self, num_embeddings, embedding_dim, device=None, dtype=None):
         super().__init__(num_embeddings, embedding_dim, device=device, dtype=dtype)
         self.dtype = self.weight.data.dtype
@@ -689,10 +690,8 @@ def _save_to_state_dict(self, destination, prefix, keep_vars):
         raise NotImplementedError("saving Embedding4bit module is not implemented")
 
     def forward(self, input: Tensor) -> Tensor:
-        if not hasattr(self.weight, 'SCB'):
-            raise RuntimeError(
-                "Embedding layer is not quantized. Please call .cuda() or .to(device) first."
-            )
+        if not hasattr(self.weight, "SCB"):
+            raise RuntimeError("Embedding layer is not quantized. Please call .cuda() or .to(device) first.")
 
         rows = self.weight.data
         row_stats = self.weight.SCB
@@ -728,6 +727,7 @@ class Embedding4bit(nn.Embedding):
     quantized_module = quantized_module.to(0) # Quantization happens here
     ```
     """
+
     def __init__(
         self,
         num_embeddings,
@@ -757,22 +757,17 @@ def __init__(
                 "This will lead to slow inference.",
             )
 
-
     def _forward_with_partial_dequantize(self, input: Tensor):
         assert self.embedding_dim % self.weight.quant_state.blocksize == 0
 
-        w_4bit_uint8 = (
-            self.weight.data.view(torch.uint8)
-            .view(self.num_embeddings * self.embedding_dim // 2, 1)
-        )
+        w_4bit_uint8 = self.weight.data.view(torch.uint8).view(self.num_embeddings * self.embedding_dim // 2, 1)
 
         output_4bit = torch.nn.functional.embedding(
             weight=w_4bit_uint8.view(self.num_embeddings, self.embedding_dim // 2),
             input=input,
         ).view(-1, 1)
         assert output_4bit.shape == (input.numel() * self.embedding_dim // 2, 1)
 
-
         blocks_per_emb = self.embedding_dim // self.weight.blocksize
 
         absmax = self.weight.quant_state.absmax
@@ -781,16 +776,16 @@ def _forward_with_partial_dequantize(self, input: Tensor):
         output_absmax = torch.nn.functional.embedding(
             weight=absmax.view(self.num_embeddings, blocks_per_emb),
             input=input,
-        ).view(-1,)
+        ).view(
+            -1,
+        )
         assert output_absmax.shape == (input.numel() * blocks_per_emb,)
 
         output_quant_state = copy.deepcopy(self.weight.quant_state)
         output_quant_state.absmax = output_absmax
         output_quant_state.shape = torch.Size((*input.shape, self.embedding_dim))
 
-        output = bnb.functional.dequantize_4bit(
-            output_4bit, output_quant_state
-        )
+        output = bnb.functional.dequantize_4bit(output_4bit, output_quant_state)
         assert output.shape == (*input.shape, self.embedding_dim)
 
         return output.to(self.dtype)
@@ -803,10 +798,8 @@ def forward(self, input: Tensor) -> Tensor:
 
         if self.embedding_dim % self.weight.quant_state.blocksize == 0:
             return self._forward_with_partial_dequantize(input)
-        
-        dequantized_weight = bnb.functional.dequantize_4bit(
-            self.weight.data, self.weight.quant_state
-        )
+
+        dequantized_weight = bnb.functional.dequantize_4bit(self.weight.data, self.weight.quant_state)
 
         return torch.nn.functional.embedding(
             weight=dequantized_weight,
@@ -824,13 +817,13 @@ def __init__(
         device=None,
     ):
         super().__init__(
-        num_embeddings,
-        embedding_dim,
-        dtype=dtype,
-        quant_type="fp4",
-        quant_storage=quant_storage,
-        device=device,
-    )
+            num_embeddings,
+            embedding_dim,
+            dtype=dtype,
+            quant_type="fp4",
+            quant_storage=quant_storage,
+            device=device,
+        )
 
 
 class EmbeddingNF4(Embedding4bit):
@@ -843,13 +836,13 @@ def __init__(
         device=None,
     ):
         super().__init__(
-        num_embeddings,
-        embedding_dim,
-        dtype=dtype,
-        quant_type="nf4",
-        quant_storage=quant_storage,
-        device=device,
-    )
+            num_embeddings,
+            embedding_dim,
+            dtype=dtype,
+            quant_type="nf4",
+            quant_storage=quant_storage,
+            device=device,
+        )
 
 
 class Linear8bitLt(nn.Linear):
diff --git a/tests/test_modules.py b/tests/test_modules.py
@@ -1,9 +1,9 @@
+import inspect
 import math
 
 import einops
 import pytest
 import torch
-import inspect
 from torch import nn
 
 import bitsandbytes as bnb
@@ -620,7 +620,8 @@ def test_fp8linear():
 @pytest.mark.parametrize("embedding_dim", [64, 65])
 @pytest.mark.parametrize("input_shape", [(10,), (10, 10), (10, 10, 10)], ids=str)
 @pytest.mark.parametrize(
-    "embedding_class,quant_storage", [
+    "embedding_class,quant_storage",
+    [
         (bnb.nn.Embedding8bit, None),
         (bnb.nn.EmbeddingFP4, torch.uint8),
         (bnb.nn.EmbeddingFP4, torch.float32),
@@ -632,9 +633,9 @@ def test_fp8linear():
 def test_embedding_lossless(embedding_class, input_shape, embedding_dim, quant_storage):
     num_embeddings = 128
 
-    src_weight = (
-        (torch.randn((num_embeddings, embedding_dim), dtype=torch.float32) > 0).to(torch.float32) * 2 - 1
-    ) # Embeddings filled with {-1, 1} values. It should compress losslessly 
+    src_weight = (torch.randn((num_embeddings, embedding_dim), dtype=torch.float32) > 0).to(
+        torch.float32
+    ) * 2 - 1  # Embeddings filled with {-1, 1} values. It should compress losslessly
 
     emb_base = nn.Embedding(
         num_embeddings=num_embeddings,
@@ -652,7 +653,7 @@ def test_embedding_lossless(embedding_class, input_shape, embedding_dim, quant_s
     emb_base.cuda()
     e.cuda()
 
-    input_tokens = torch.randint(low=0, high=num_embeddings, size=input_shape, device='cuda')
+    input_tokens = torch.randint(low=0, high=num_embeddings, size=input_shape, device="cuda")
 
     torch.testing.assert_close(
         actual=e(input_tokens),
@@ -663,7 +664,8 @@ def test_embedding_lossless(embedding_class, input_shape, embedding_dim, quant_s
 @pytest.mark.parametrize("embedding_dim", [64, 65])
 @pytest.mark.parametrize("input_shape", [(10,), (10, 10), (10, 10, 10)], ids=str)
 @pytest.mark.parametrize(
-    "embedding_class,quant_storage", [
+    "embedding_class,quant_storage",
+    [
         (bnb.nn.Embedding8bit, None),
         (bnb.nn.EmbeddingFP4, torch.uint8),
         (bnb.nn.EmbeddingFP4, torch.float32),
@@ -695,7 +697,7 @@ def test_embedding_error(embedding_class, input_shape, embedding_dim, quant_stor
     emb_base.cuda()
     e.cuda()
 
-    input_tokens = torch.randint(low=0, high=num_embeddings, size=input_shape, device='cuda')
+    input_tokens = torch.randint(low=0, high=num_embeddings, size=input_shape, device="cuda")
 
     torch.testing.assert_close(
         actual=e(input_tokens),
@@ -740,7 +742,7 @@ def test_4bit_embedding_warnings():
     with pytest.warns(UserWarning, match=r"inference."):
         net = bnb.nn.Embedding4bit(num_embeddings=num_embeddings, embedding_dim=default_block_size + 1)
         net.cuda()
-        inp = torch.randint(low=0, high=num_embeddings, size=(1,), device='cuda')
+        inp = torch.randint(low=0, high=num_embeddings, size=(1,), device="cuda")
         net(inp)
 
 
@@ -752,9 +754,9 @@ def test_4bit_embedding_weight_fsdp_fix():
 
     module.cuda()
 
-    setattr(module.weight, "quant_state", None)
+    module.weight.quant_state = None
 
-    input_tokens = torch.randint(low=0, high=num_embeddings, size=(1,), device='cuda')
+    input_tokens = torch.randint(low=0, high=num_embeddings, size=(1,), device="cuda")
 
     module(input_tokens)
 
@@ -769,9 +771,9 @@ def test_4bit_linear_weight_fsdp_fix():
 
     module.cuda()
 
-    setattr(module.weight, "quant_state", None)
+    module.weight.quant_state = None
 
-    input_tensor = torch.randn((1, inp_size), device='cuda')
+    input_tensor = torch.randn((1, inp_size), device="cuda")
 
     module(input_tensor)