[Pallas] Add expected-failure test for non-power-of-2 RDIM size

norx1991 · norx1991 · commit d852bde1d5bf · 2026-04-03T15:59:54.000-07:00
RDIM_SIZE is rounded to next power of 2, but Pallas block refs use
the exact dimension size, causing shape mismatches (e.g., 1000 vs 1024).

Adds test_reduce_non_pow2 as expectedFailure to document the bug.
diff --git a/test/test_pallas.py b/test/test_pallas.py
@@ -11,6 +11,7 @@
 from helion._testing import code_and_output
 from helion._testing import onlyBackends
 from helion._testing import skipUnlessPallas
+from helion._testing import xfailIfPallas
 import helion.language as hl
 
 
@@ -210,6 +211,23 @@ def pallas_attention(
     return out.view(q_in.size())
 
 
+@helion.kernel(backend="pallas", static_shapes=True)
+def pallas_reduce_non_pow2(x: torch.Tensor) -> torch.Tensor:
+    """Softmax over a non-power-of-2 reduction dim.
+
+    Uses amax + exp + sum which forces explicit index/mask generation,
+    exercising the RDIM_SIZE code path.
+    """
+    n, _m = x.size()
+    out = torch.empty_like(x)
+    for tile_n in hl.tile(n):
+        row = x[tile_n, :]
+        max_val = torch.amax(row, dim=-1, keepdim=True)
+        exp_val = torch.exp(row - max_val)
+        out[tile_n, :] = exp_val / torch.sum(exp_val, dim=-1, keepdim=True)
+    return out
+
+
 @onlyBackends(["triton", "pallas"])
 @skipUnlessPallas("JAX/Pallas TPU not available")
 class TestPallas(TestCase):
@@ -503,5 +521,16 @@ def test_attention_emit_pipeline_non_divisible(self) -> None:
         torch.testing.assert_close(result, ref, rtol=1e-2, atol=1e-2)
 
 
+    @xfailIfPallas("RDIM_SIZE rounded to next power of 2 causes shape mismatch")
+    def test_reduce_non_pow2(self) -> None:
+        """Reduction over non-power-of-2 dim should use exact size, not rounded."""
+        x = torch.randn(128, 1000, device=DEVICE, dtype=torch.float32)
+        code, result = code_and_output(
+            pallas_reduce_non_pow2, (x,), block_size=128
+        )
+        expected = torch.nn.functional.softmax(x, dim=-1)
+        torch.testing.assert_close(result, expected, rtol=1e-4, atol=1e-4)
+
+
 if __name__ == "__main__":
     unittest.main()