Fix tosa.cast float-to-int rounding to use truncation (RTZ) (#2340)

umangyadav · web-flow · commit 1714bad70833 · 2026-04-28T20:16:02.000-04:00
Upstream tosa-to-linalg lowers tosa.cast float-to-int by inserting
math.roundeven before arith.fptosi to implement TOSA's
round-to-nearest-even semantics. ONNX and PyTorch instead define
float-to-int cast as truncation (round-towards-zero), which is what
arith.fptosi already does natively.

Since rocMLIR primarily serves ONNX/MIGraphX workloads, restore RTZ
semantics without modifying upstream LLVM:

- The migraphx.convert lowering in MIGraphXToTosa tags float-to-int
  tosa.cast ops with the FusedLoc metadata "rocmlir.rtz_cast".
  Other casts (int-to-float, int-to-int, float-to-float, unsigned)
  are left untouched to avoid affecting unrelated lowerings or
  stripping legitimate rounding upstream may insert in the future.
- New conversion pass fix-tosa-cast-rounding strips math.roundeven
  inside linalg.generic when (a) it (or its parent generic) carries
  the RTZ tag and (b) it exclusively feeds the recognized cast chain
  (clamp / saturation merge ending at arith.fptosi). Quantization
  casts (which need RNE) are untouched because they are not tagged.
- The pass is wired into the bufferize pipeline immediately after
  tosa-to-linalg.

Tests:
- Lit tests cover both the float-clamp and i32-saturation matching
  paths, plus negatives for untagged roundeven, roundeven outside
  linalg.generic, quantization, float-to-float convert, and roundeven
  with extra users.
- A CANARY RUN line guards against silent upstream regressions in
  tosa-to-linalg's choice to emit math.roundeven.
- New CPU e2e test verifies actual numerical RTZ behaviour
  (3.5 -&gt; 3, -3.5 -&gt; -3, etc.).
diff --git a/mlir/include/mlir/Conversion/FixTosaCastRounding/FixTosaCastRounding.h b/mlir/include/mlir/Conversion/FixTosaCastRounding/FixTosaCastRounding.h
@@ -0,0 +1,32 @@
+//===- FixTosaCastRounding.h - Fix tosa.cast rounding -----------*- C++ -*-===//
+//
+// Part of the rocMLIR Project, under the Apache License v2.0 with LLVM
+// Exceptions. See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Copyright (c) 2026 Advanced Micro Devices Inc.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_CONVERSION_FIXTOSACASTROUNDING_FIXTOSACASTROUNDING_H
+#define MLIR_CONVERSION_FIXTOSACASTROUNDING_FIXTOSACASTROUNDING_H
+
+#include "mlir/Pass/Pass.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace mlir {
+
+#define GEN_PASS_DECL_FIXTOSACASTROUNDINGPASS
+#include "mlir/Conversion/RocMLIRPasses.h.inc"
+
+namespace rock {
+/// FusedLoc metadata tag used to mark tosa.cast ops that want RTZ rounding.
+/// Casts from migraphx.convert carry this tag; casts from quantization do not.
+/// Read by the fix-tosa-cast-rounding pass to decide whether to strip the
+/// math.roundeven that upstream tosa-to-linalg inserts before arith.fptosi.
+constexpr llvm::StringLiteral kRtzCastLocTag("rocmlir.rtz_cast");
+} // namespace rock
+
+} // namespace mlir
+
+#endif // MLIR_CONVERSION_FIXTOSACASTROUNDING_FIXTOSACASTROUNDING_H
diff --git a/mlir/include/mlir/Conversion/RocMLIRPasses.h b/mlir/include/mlir/Conversion/RocMLIRPasses.h
@@ -10,6 +10,7 @@
 #define MLIR_CONVERSION_ROCMLIRPASSES_H
 
 #include "mlir/Conversion/EmulateFp8ExtTrunc/EmulateFp8ExtTrunc.h"
+#include "mlir/Conversion/FixTosaCastRounding/FixTosaCastRounding.h"
 #include "mlir/Conversion/LinalgToRock/LinalgToRock.h"
 #include "mlir/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.h"
 #include "mlir/Conversion/MIGraphXToTosa/MIGraphXToTosa.h"
diff --git a/mlir/include/mlir/Conversion/RocMLIRPasses.td b/mlir/include/mlir/Conversion/RocMLIRPasses.td
@@ -33,6 +33,40 @@ def ConvertRockToGPUPass : Pass<"convert-rock-to-gpu", "ModuleOp"> {
   ];
 }
 
+//===----------------------------------------------------------------------===//
+// FixTosaCastRoundingPass
+//===----------------------------------------------------------------------===//
+
+def FixTosaCastRoundingPass
+    : Pass<"fix-tosa-cast-rounding", "::mlir::func::FuncOp"> {
+  let summary = "Change tosa.cast float-to-int from round-to-nearest-even to "
+                "round-towards-zero";
+  let description = [{
+    The upstream tosa-to-linalg pass inserts math.roundeven before arith.fptosi
+    when lowering tosa.cast, implementing TOSA's round-to-nearest-even
+    semantics. This pass removes those math.roundeven ops so that
+    arith.fptosi's native truncation (round towards zero) is used instead,
+    matching ONNX and PyTorch cast semantics. Note that this intentionally
+    diverges from the TOSA spec; rocMLIR primarily serves ONNX/MIGraphX
+    workloads where RTZ is the expected behavior.
+
+    Only math.roundeven ops that belong to an RTZ-tagged cast lowering are
+    removed; this preserves round-to-nearest-even for quantization casts.
+    The tag is the FusedLoc metadata exposed as `mlir::rock::kRtzCastLocTag`
+    and is set by the migraphx.convert lowering in migraphx-to-tosa. Because
+    upstream tosa-to-linalg often rewrites the inner roundeven's location,
+    the tag may end up on the math.roundeven itself, on the parent
+    linalg.generic's location, or on one of the generic region's *output*
+    block-argument locations; the pass checks all three. Input block-arg
+    locations are deliberately ignored to avoid false positives when a
+    downstream generic merely consumes a previously-tagged cast result.
+    To stay safe the pass also bails on multi-output generics and on i1
+    outputs.
+  }];
+  let dependentDialects = ["func::FuncDialect", "linalg::LinalgDialect",
+                           "math::MathDialect"];
+}
+
 //===----------------------------------------------------------------------===//
 // RocmlirCustomTosaDecomposePass
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_subdirectory(EmulateFp8ExtTrunc)
+add_subdirectory(FixTosaCastRounding)
 add_subdirectory(MIGraphXToTosa)
 add_subdirectory(RocmlirCustomTosaDecompose)
 add_subdirectory(RocmlirCustomTosaToLinalg)
diff --git a/mlir/lib/Conversion/FixTosaCastRounding/CMakeLists.txt b/mlir/lib/Conversion/FixTosaCastRounding/CMakeLists.txt
@@ -0,0 +1,18 @@
+add_rocmlir_conversion_library(RocmlirFixTosaCastRounding
+  FixTosaCastRounding.cpp
+
+  DEPENDS
+  RocMLIRConversionPassIncGen
+)
+
+target_link_libraries(RocmlirFixTosaCastRounding
+  PUBLIC
+  MLIRIR
+  MLIRPass
+  MLIRSupport
+  MLIRArithDialect
+  MLIRFuncDialect
+  MLIRLinalgDialect
+  MLIRMathDialect
+  MLIRTransformUtils
+)
diff --git a/mlir/lib/Conversion/FixTosaCastRounding/FixTosaCastRounding.cpp b/mlir/lib/Conversion/FixTosaCastRounding/FixTosaCastRounding.cpp
@@ -0,0 +1,172 @@
+//===- FixTosaCastRounding.cpp - Fix tosa.cast rounding -------------------===//
+//
+// Part of the rocMLIR Project, under the Apache License v2.0 with LLVM
+// Exceptions. See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Copyright (c) 2026 Advanced Micro Devices Inc.
+//
+//===----------------------------------------------------------------------===//
+//
+// The upstream tosa-to-linalg pass inserts math.roundeven before arith.fptosi
+// when lowering tosa.cast from float to integer. This implements TOSA's
+// "round to nearest, ties to even" semantics.
+//
+// However, ONNX and PyTorch define float-to-int cast as truncation (round
+// towards zero), which is what arith.fptosi already does natively. Since
+// rocMLIR primarily serves ONNX/MIGraphX workloads, this pass removes the
+// math.roundeven ops to restore RTZ semantics without modifying the upstream
+// LLVM code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/FixTosaCastRounding/FixTosaCastRounding.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Location.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/TypeUtilities.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/ADT/STLExtras.h"
+
+namespace mlir {
+#define GEN_PASS_DEF_FIXTOSACASTROUNDINGPASS
+#include "mlir/Conversion/RocMLIRPasses.h.inc"
+} // namespace mlir
+
+using namespace mlir;
+
+namespace {
+
+/// Returns true when `roundeven`'s result participates *exclusively* in the
+/// upstream tosa-to-linalg float-to-int cast chain. The chain has two parts:
+///   1. Float clamp: optional `arith.minimumf`/`maximumf` on the rounded
+///      value, ending at `arith.fptosi`.
+///   2. Integer saturation merge (i32 case): the rounded value also feeds
+///      `arith.cmpf` to produce an i1 mask, which feeds an `arith.select`
+///      that picks between an integer saturation constant and the
+///      `arith.fptosi` result. The merged i32 then flows to `linalg.yield`.
+///
+/// We follow both branches and accept `linalg.yield` as a terminal. Removing
+/// the `math.roundeven` is safe even for the saturation comparison: at the
+/// i32 saturation boundary (|2^31|) every f32 value is already an integer
+/// (f32 ULP is >= 1 above 2^23), so `roundeven` is a no-op there and the
+/// `arith.cmpf` result is unchanged.
+///
+/// This is intentionally strict: if any user of a value in the chain is not
+/// recognized, we return false (do not strip the `roundeven`) even when a
+/// sibling user does reach an `arith.fptosi`. This prevents miscompiles
+/// where the rounded value is also consumed by an unrelated op that depends
+/// on RNE semantics.
+static bool reachesFPToSI(math::RoundEvenOp op) {
+  SmallVector<Value, 8> worklist(op->getResults());
+  bool foundFPToSI = false;
+  while (!worklist.empty()) {
+    Value v = worklist.pop_back_val();
+    for (Operation *user : v.getUsers()) {
+      if (isa<arith::FPToSIOp>(user)) {
+        foundFPToSI = true;
+        continue;
+      }
+      if (isa<linalg::YieldOp>(user))
+        continue;
+      if (isa<arith::MinimumFOp, arith::MaximumFOp, arith::CmpFOp,
+              arith::SelectOp>(user)) {
+        for (Value r : user->getResults())
+          worklist.push_back(r);
+        continue;
+      }
+      return false;
+    }
+  }
+  return foundFPToSI;
+}
+
+static bool hasRtzCastLocTag(Location loc) {
+  if (auto fused = dyn_cast<FusedLoc>(loc))
+    if (auto meta = dyn_cast_or_null<StringAttr>(fused.getMetadata()))
+      return meta.getValue() == rock::kRtzCastLocTag;
+  return false;
+}
+
+/// True when this `math.roundeven` is part of an RTZ-tagged
+/// `migraphx.convert` lowering. The tag is set on the `tosa.cast` and ends
+/// up on the parent `linalg.generic`'s loc and on its output region block
+/// argument (the one carved out from the `tensor.empty()` that this cast
+/// writes into). Upstream `tosa-to-linalg` may assign the inner
+/// `math.roundeven` a different `Location`, so we don't rely on the op's
+/// own loc alone.
+///
+/// We deliberately do NOT scan input block arguments: those inherit the
+/// loc of their incoming SSA value, and if that value comes from a
+/// previously-tagged cast the tag would propagate forward, causing this
+/// pass to wrongly strip an unrelated `math.roundeven` in a downstream
+/// generic.
+static bool isRtzTaggedCastLowering(math::RoundEvenOp op,
+                                    linalg::GenericOp generic) {
+  if (hasRtzCastLocTag(op->getLoc()) || hasRtzCastLocTag(generic.getLoc()))
+    return true;
+  return llvm::any_of(generic.getRegionOutputArgs(), [](BlockArgument arg) {
+    return hasRtzCastLocTag(arg.getLoc());
+  });
+}
+
+struct RemoveRoundEvenBeforeFPToSI
+    : public OpRewritePattern<math::RoundEvenOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(math::RoundEvenOp op,
+                                PatternRewriter &rewriter) const override {
+    auto generic = op->getParentOfType<linalg::GenericOp>();
+    if (!generic)
+      return failure();
+
+    // The RTZ-tagged cast lowering corresponds to a single tosa.cast and
+    // therefore produces exactly one integer output from the generic. Use
+    // getOutputs() rather than getResultTypes() so this still works in
+    // buffer semantics (where there are no SSA results).
+    //
+    // Bail on multi-output generics (e.g. produced by linalg fusion) to
+    // avoid stripping a math.roundeven that also feeds a sibling result --
+    // for instance an i1 yielded directly from arith.cmpf, which would
+    // silently flip if we removed the rounding.
+    //
+    // Bail on i1 outputs as well: ONNX/PyTorch float-to-bool semantics is
+    // "non-zero" rather than truncation, so removing roundeven would be
+    // unsafe even if upstream tosa-to-linalg ever emitted it for an i1
+    // cast. Today MIGraphXToTosa does not tag float-to-i1 casts, but this
+    // guard is defense-in-depth.
+    ValueRange outs = generic.getOutputs();
+    if (outs.size() != 1)
+      return failure();
+    Type outElemTy = getElementTypeOrSelf(outs[0].getType());
+    if (!isa<IntegerType>(outElemTy) || outElemTy.isInteger(1))
+      return failure();
+
+    if (!isRtzTaggedCastLowering(op, generic))
+      return failure();
+
+    if (!reachesFPToSI(op))
+      return failure();
+
+    rewriter.replaceOp(op, op.getOperand());
+    return success();
+  }
+};
+
+struct FixTosaCastRoundingPass
+    : public impl::FixTosaCastRoundingPassBase<FixTosaCastRoundingPass> {
+  using FixTosaCastRoundingPassBase::FixTosaCastRoundingPassBase;
+
+  void runOnOperation() override {
+    RewritePatternSet patterns(&getContext());
+    patterns.add<RemoveRoundEvenBeforeFPToSI>(&getContext());
+    if (failed(applyPatternsGreedily(getOperation(), std::move(patterns))))
+      return signalPassFailure();
+  }
+};
+
+} // namespace
diff --git a/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp b/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "mlir/Conversion/MIGraphXToTosa/MIGraphXToTosa.h"
+#include "mlir/Conversion/FixTosaCastRounding/FixTosaCastRounding.h"
 #include "mlir/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
@@ -1313,9 +1314,28 @@ ConvertConverter::matchAndRewrite(migraphx::ConvertOp op, OpAdaptor adaptor,
         ROCK_CUSTOMOP_UNSIGNED_CAST, ROCK_CUSTOMOP_DOMAIN_NAME, "",
         adaptor.getInA());
   } else {
-    rewriter.replaceOpWithNewOp<tosa::CastOp>(
-        op, getTypeConverter()->convertType(op.getResult().getType()),
+    // Tag float-to-int casts with RTZ metadata so that fix-tosa-cast-rounding
+    // can distinguish them (want truncation) from quantization casts (want
+    // RNE). Other casts (int-to-float, int-to-int, float-to-float) don't go
+    // through math.roundeven today; tagging them serves no purpose and would
+    // risk stripping legitimate rounding if upstream tosa-to-linalg ever
+    // inserts it (e.g. for narrowing float-to-float casts).
+    //
+    // Float-to-bool is excluded explicitly: ONNX/PyTorch bool cast semantics
+    // is "non-zero" (not truncation), and upstream tosa-to-linalg lowers it
+    // via arith.cmpf une rather than roundeven+fptosi. Tagging it would be
+    // misleading and unsafe if upstream ever changes that lowering.
+    Location castLoc = op.getLoc();
+    if (isa<FloatType>(inputType) && isa<IntegerType>(outputType) &&
+        !outputType.isInteger(1))
+      castLoc =
+          FusedLoc::get(op.getContext(), {op.getLoc()},
+                        StringAttr::get(op.getContext(), rock::kRtzCastLocTag));
+    auto castOp = tosa::CastOp::create(
+        rewriter, castLoc,
+        getTypeConverter()->convertType(op.getResult().getType()),
         adaptor.getInA());
+    rewriter.replaceOp(op, castOp);
   }
   return success();
 }
diff --git a/mlir/lib/Dialect/Rock/Pipelines/CMakeLists.txt b/mlir/lib/Dialect/Rock/Pipelines/CMakeLists.txt
@@ -20,6 +20,7 @@ add_rocmlir_dialect_library(MLIRRockPipeline
   MLIRRockTransforms
   MLIRRockUtility
   MLIRUBToLLVM
+  RocmlirFixTosaCastRounding
   RocmlirCustomTosaDecompose
   RocmlirCustomTosaToLinalg
   RocmlirEmulateFp8ExtTrunc
diff --git a/mlir/lib/Dialect/Rock/Pipelines/Pipelines.cpp b/mlir/lib/Dialect/Rock/Pipelines/Pipelines.cpp
@@ -97,6 +97,10 @@ void rock::buildBufferizePipeline(OpPassManager &pm,
                               /*validationOptions=*/std::nullopt,
                               /*attachTargetOptions=*/tosaOptions);
 
+  // Strip math.roundeven inserted by tosa-to-linalg for RTZ-tagged casts.
+  auto &castFixPm = pm.nest<func::FuncOp>();
+  castFixPm.addPass(createFixTosaCastRoundingPass());
+
   // convert named linalg operations into linalg generic
   LinalgMorphOpsPassOptions morphOptions;
   morphOptions.namedToCategory = false;
diff --git a/mlir/test/Conversion/FixTosaCastRounding/fix-tosa-cast-rounding.mlir b/mlir/test/Conversion/FixTosaCastRounding/fix-tosa-cast-rounding.mlir
diff --git a/mlir/test/fusion/pr-e2e/tosa-cast-rtz.cpu.mlir b/mlir/test/fusion/pr-e2e/tosa-cast-rtz.cpu.mlir
diff --git a/mlir/test/rocmlir-driver/pipelines.mlir b/mlir/test/rocmlir-driver/pipelines.mlir
diff --git a/mlir/tools/rocmlir-lib/librockcompiler_deps.cmake b/mlir/tools/rocmlir-lib/librockcompiler_deps.cmake

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`add_subdirectory(EmulateFp8ExtTrunc)`
	`2`	`+add_subdirectory(FixTosaCastRounding)`
`2`	`3`	`add_subdirectory(MIGraphXToTosa)`
`3`	`4`	`add_subdirectory(RocmlirCustomTosaDecompose)`
`4`	`5`	`add_subdirectory(RocmlirCustomTosaToLinalg)`