Remove dead quant_state.dtype mutation in matmul_4bit CPU path (#1917)

Titus-von-Koeller · claude · Titus-von-Koeller · commit e2dc8323077d · 2026-04-08T15:13:35.000+02:00
The mutation `quant_state.dtype = A.dtype` is unnecessary: MatMul4Bit.forward
already casts via `.to(A.dtype)`, and gemv_4bit doesn't read state.dtype.
Removing it eliminates the Dynamo graph break on CPU under activation
checkpointing, so the regression test no longer needs a CPU skip.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/bitsandbytes/autograd/_functions.py b/bitsandbytes/autograd/_functions.py
@@ -382,10 +382,7 @@ def matmul_4bit(
     bias: Optional[torch.Tensor] = None,
 ):
     assert quant_state is not None
-    # Change dtype to input dtype on CPU
     if A.device.type == "cpu":
-        quant_state.dtype = A.dtype
-
         if getattr(quant_state, "packing_format_for_cpu", False):
             out = F.gemv_4bit(A, B, out, state=quant_state)
             if bias is not None:
diff --git a/tests/test_linear4bit.py b/tests/test_linear4bit.py
@@ -453,9 +453,6 @@ def test_linear4bit_torch_compile_activation_checkpointing(device, quant_type, c
         pytest.skip("This configuration is not supported on HPU.")
     if device == "cuda" and platform.system() == "Windows":
         pytest.skip("Triton is not officially supported on Windows")
-    if device == "cpu":
-        pytest.skip("matmul_4bit mutates quant_state.dtype on CPU, causing a separate graph break (#1917)")
-
     dim = 256
     batch_size = 16
     compute_dtype = torch.bfloat16