Fix attention mask to use float_lowest instead of -inf and add NaN-safe softmax handling (#2654)

Aniketsy · web-flow · commit 68e0458e1416 · 2026-02-03T11:01:24.000-08:00
#2561 - Use lowest representable float value instead of -inf for attention masks. - Add NaN-safe handling and a unit test for softmax with all masked positions. Please let me know if my approach or fix needs any improvements . I’m open to feedback and happy to make changes based on suggestions. Thankyou !
diff --git a/onnxscript/function_libs/torch_lib/ops/nn.py b/onnxscript/function_libs/torch_lib/ops/nn.py
@@ -2073,7 +2073,7 @@ def _aten_scaled_dot_product_attention_bool_mask_onnx(
     key_transposed_scaled = op.Mul(key_transposed, op.Sqrt(scale))
     # Turn the Boolean mask to float: attn_mask.masked_fill(not attn_mask, -float('inf'))
     zero = op.Constant(value=ir.tensor(0.0, dtype=query.dtype))
-    neg_inf = op.Constant(value=ir.tensor(-float("inf"), dtype=query.dtype))
+    neg_inf = op.Constant(value=ir.tensor(query.dtype.min, dtype=query.dtype))
     attn_mask = op.Where(attn_mask, zero, neg_inf)
     attn_weight = op.Softmax(
         op.Add(op.MatMul(query_scaled, key_transposed_scaled), attn_mask),