ROCm · umangyadav · Mar 23, 2026 · Mar 24, 2026 · Mar 24, 2026 · Mar 25, 2026
@@ -27,6 +27,13 @@
 # Nested build directory
 /build*
 
+# Local plan/scratch notes (per workspace-hygiene rule). Match anywhere in
+# the tree (no leading slash) so notes nested under e.g. mlir/plans/ also
+# stay untracked.
+plans/
+scratch/
+notes/
+
 #==============================================================================#
 # Explicit files to ignore (only matches one).
 #==============================================================================#

@@ -30,6 +30,10 @@ extern "C" {
 //   - mlirGetKernelAttrs() returns uint32_t[3] {block_size, grid_size,
 //     cluster_size} instead of uint32_t[2] {block_size, grid_size}.
 //   - Removed: mlirGetKernelInfo(), mlirMIGraphXAddApplicabilityPipeline().
+//   - Added: rocmlirMIGraphXAttentionCreate() for building migraphx.attention
+//     ops with variadic inputs, optional LSE, softmaxType, preSoftmaxBody,
+//     feature flags (kvcache, causal, prefix_offset, sliding_window, splitkv),
+//     currentSeqLen, prefixOffset, splitKV, and slidingWindowSize.
 #define MLIR_MIGRAPHX_DIALECT_API_VERSION 5
 
 typedef struct MlirMIGraphXBackendOptions {
@@ -38,6 +42,13 @@ typedef struct MlirMIGraphXBackendOptions {
   int optLevel;
 } MlirMIGraphXBackendOptions;
 
+#define MLIR_MIGRAPHX_ATTENTION_NONE 0
+#define MLIR_MIGRAPHX_ATTENTION_KVCACHE (1 << 0)
+#define MLIR_MIGRAPHX_ATTENTION_CAUSAL (1 << 1)
+#define MLIR_MIGRAPHX_ATTENTION_PREFIX_OFFSET (1 << 2)
+#define MLIR_MIGRAPHX_ATTENTION_SLIDING_WINDOW (1 << 3)
+#define MLIR_MIGRAPHX_ATTENTION_SPLITKV (1 << 4)
+
 MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(MIGraphX, migraphx);
 
 // Types
@@ -73,6 +84,56 @@ MLIR_CAPI_EXPORTED void mlirMIGraphXAddHighLevelPipeline(MlirPassManager pm);
 MLIR_CAPI_EXPORTED bool
 mlirMIGraphXAddBackendPipeline(MlirPassManager pm,
                                const MlirMIGraphXBackendOptions *opts);
+
+// Op creation helpers
+
+/// Creates a `migraphx.attention` operation.
+///
+/// \p queries, \p keys, \p values are the required Q, K, V operands.
+/// \p preSoftmaxElemWiseInputs is an array of \p numPreSoftmaxInputs additional
+///    operands for element-wise fusion before softmax (can be NULL if 0).
+/// \p resultType is the MIXRShaped type of the attention result (required).
+/// \p lseType is the MIXRShaped type of the optional log-sum-exp output; pass
+///    a null type (via mlirTypeIsNull) to omit.
+/// \p softmaxType is the optional element type for softmax computation; pass
+///    a null type to omit.
+/// \p preSoftmaxBody is a caller-created region for pre-softmax element-wise
+///    ops. Pass an empty region (mlirRegionCreate()) for a no-op body.
+///    Ownership of the region transfers to the created operation.
+/// \p features is the bitwise-OR of MLIR_MIGRAPHX_ATTENTION_* flags (0 = none).
+/// \p currentSeqLen is required when kvcache is set; pass null value to omit.
+/// \p prefixOffset is required when prefix_offset is set; pass null to omit.
+/// \p splitKV is the number of KV splits (0 or 1 = omit attribute).
+/// \p slidingWindowSize is the window size (0 = omit attribute).
+///
+/// Contract violations are rejected with a stderr diagnostic and a null
+/// MlirOperation return (check via mlirOperationIsNull). The same contract
+/// is enforced in both debug and release builds. Specifically the function
+/// returns a null op (and writes a "rocmlirMIGraphXAttentionCreate: ..."
+/// line to stderr) if \p location is null, if any of \p queries, \p keys,
+/// \p values is null, if \p numPreSoftmaxInputs is negative or
+/// \p preSoftmaxElemWiseInputs is NULL when the count is positive, if
+/// \p splitKV or \p slidingWindowSize is negative, if \p resultType is
+/// null, or if \p preSoftmaxBody is null (use mlirRegionCreate() for the
+/// no-body case rather than a default-initialized struct).
+///
+/// The feature/attribute and feature/operand pairings from the op verifier
+/// are also enforced here so the diagnostic happens before any IR is
+/// constructed: \p splitKV > 1 requires MLIR_MIGRAPHX_ATTENTION_SPLITKV in
+/// \p features, \p slidingWindowSize > 0 requires
+/// MLIR_MIGRAPHX_ATTENTION_SLIDING_WINDOW, a non-null \p currentSeqLen
+/// requires MLIR_MIGRAPHX_ATTENTION_KVCACHE, and a non-null
+/// \p prefixOffset requires MLIR_MIGRAPHX_ATTENTION_PREFIX_OFFSET. All
+/// other invariants (operand element types, shape compatibility, the
+/// missing-operand-required-by-feature direction, etc.) are still left to
+/// the AttentionOp verifier.
+MLIR_CAPI_EXPORTED MlirOperation rocmlirMIGraphXAttentionCreate(
+    MlirLocation location, MlirValue queries, MlirValue keys, MlirValue values,
+    intptr_t numPreSoftmaxInputs, const MlirValue *preSoftmaxElemWiseInputs,
+    MlirType resultType, MlirType lseType, MlirType softmaxType,
+    MlirRegion preSoftmaxBody, uint32_t features, MlirValue currentSeqLen,
+    MlirValue prefixOffset, int32_t splitKV, int32_t slidingWindowSize);
+
 #ifdef __cplusplus
 }
 #endif

@@ -0,0 +1,27 @@
+//===-- MIGraphXAttentionToRock.h -------------------------------*- C++ -*-===//
+//
+// Part of the rocMLIR Project, under the Apache License v2.0 with LLVM
+// Exceptions. See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Copyright (c) 2026 Advanced Micro Devices
+//
+// Pass declaration for lowering migraphx.attention to rock.attention.
+// See MIGraphXAttentionToRock.cpp for the polarity contract with the
+// host-side AttentionDecompose pattern.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_CONVERSION_MIGRAPHXATTENTIONTOROCK_H
+#define MLIR_CONVERSION_MIGRAPHXATTENTIONTOROCK_H
+
+#include "mlir/Pass/Pass.h"
+
+namespace mlir {
+
+#define GEN_PASS_DECL_MIGRAPHXATTENTIONTOROCKPASS
+#include "mlir/Conversion/RocMLIRPasses.h.inc"
+
+} // namespace mlir
+
+#endif // MLIR_CONVERSION_MIGRAPHXATTENTIONTOROCK_H
@@ -12,6 +12,7 @@
 #include "mlir/Conversion/EmulateFp8ExtTrunc/EmulateFp8ExtTrunc.h"
 #include "mlir/Conversion/FixTosaCastRounding/FixTosaCastRounding.h"
 #include "mlir/Conversion/LinalgToRock/LinalgToRock.h"
+#include "mlir/Conversion/MIGraphXAttentionToRock/MIGraphXAttentionToRock.h"
 #include "mlir/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.h"
 #include "mlir/Conversion/MIGraphXToTosa/MIGraphXToTosa.h"
 #include "mlir/Conversion/Passes.h"

@@ -124,10 +124,23 @@ def MIGraphXToTosaPass : Pass<"migraphx-to-tosa", "::mlir::func::FuncOp"> {
     Pass that converts MIGraphX operations to TOSA operations.
   }];
 
-  let dependentDialects = [
-    "func::FuncDialect",
-    "tosa::TosaDialect",
-    "mhal::MHALDialect",
+  // The `arith`, `rock`, and `tensor` dialects appear in the conversion
+  // target's legality rules (addLegalDialect / addDynamicallyLegalDialect /
+  // markOpRecursivelyLegal<rock::AttentionOp>) so that rock.attention and
+  // any pre-existing arith/tensor ops survive the partial conversion. They
+  // need to be loaded by the time the pass runs even when the IR being
+  // converted doesn't yet contain ops in those dialects (e.g. an off-tree
+  // tool that schedules just this pass before MIGraphXAttentionToRock has
+  // produced the rock.attention).
+  //
+  // The dialects nested inside rock.attention's region (linalg, math,
+  // memref, bufferization, ...) are intentionally NOT listed here: this
+  // pass only marks them recursively legal, never references their types
+  // or creates ops in them, and ops inside an in-flight rock.attention
+  // imply their dialect was already loaded at parse time.
+  let dependentDialects = ["arith::ArithDialect", "func::FuncDialect",
+                           "mhal::MHALDialect", "rock::RockDialect",
+                           "tensor::TensorDialect", "tosa::TosaDialect",
   ];
 }
 
@@ -193,4 +206,26 @@ def LinalgToRockPass : Pass<"linalg-to-rock", "::mlir::func::FuncOp"> {
   let dependentDialects = ["rock::RockDialect",
                            "bufferization::BufferizationDialect"];
 }
+//===----------------------------------------------------------------------===//
+// MIGraphXAttentionToRock
+//===----------------------------------------------------------------------===//
+def MIGraphXAttentionToRockPass
+    : Pass<"migraphx-attention-to-rock", "::mlir::func::FuncOp"> {
+  let summary = "Lower migraphx.attention to rock.attention";
+  let description = [{
+    Pass that converts migraphx.attention operations directly to
+    rock.attention operations for GPU compilation.
+
+    Anchored to func::FuncOp for consistency with the other migraphx-side
+    conversion pass (MIGraphXToTosaPass) and because the pipeline already
+    schedules it inside a func-nested pass manager.
+  }];
+  let dependentDialects = ["arith::ArithDialect", "linalg::LinalgDialect",
+                           "math::MathDialect", "rock::RockDialect",
+                           "bufferization::BufferizationDialect",
+                           "memref::MemRefDialect", "tensor::TensorDialect",
+                           "migraphx::MIGraphXDialect",
+  ];
+}
+
 #endif // ROCMLIR_CONVERSION_PASSES
@@ -0,0 +1,75 @@
+//===- AttentionUtils.h - Shared rules for migraphx.attention ----- C++ -===//
+//
+// Part of the rocMLIR Project, under the Apache License v2.0 with LLVM
+// Exceptions. See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Copyright (c) 2026 Advanced Micro Devices
+//
+//===----------------------------------------------------------------------===//
+//
+// Small inline helpers that encode contracts shared by several pieces of
+// migraphx.attention's lowering chain. Keeping them here means the
+// verifier, the host AttentionDecompose, the GPU MIGraphXAttentionToRock
+// lowering, and rocmlir-gen all derive the same answers from the same code.
+//
+// Anything that's only used in one place, or that requires
+// path-specific inputs (e.g. expectedQKShape, which the verifier
+// computes from pre-splitKV operands while the host decompose computes
+// from post-splitKV-reshaped types), should stay local to that pass.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_MIGRAPHX_IR_ATTENTIONUTILS_H_
+#define MLIR_MIGRAPHX_IR_ATTENTIONUTILS_H_
+
+#include "mlir/Dialect/MIGraphX/IR/MIGraphX.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Operation.h"
+
+namespace mlir {
+namespace migraphx {
+
+/// The element type that the first GEMM (Q*K) of a migraphx.attention
+/// produces, given Q's element type. For float Q the QK output stays in
+/// Q's type; for integer Q the first GEMM is a quantized matmul whose
+/// output is i32 (the body is then expected to dequantize that i32 to a
+/// float type). Used by AttentionOp::verify, MIGraphXTransform's host
+/// AttentionDecompose, and rocmlir-gen so all three derive the same QK
+/// type for the same Q.
+inline Type computeAttentionQKElemType(Type qElemType, MLIRContext *ctx) {
+  if (isa<FloatType>(qElemType))
+    return qElemType;
+  return IntegerType::get(ctx, 32);
+}
+
+/// Returns true if `op` is in the closed set of migraphx ops that
+/// MIGraphXAttentionToRock::lowerMIGraphXElementwiseToScalar can lower to
+/// a scalar arith / math equivalent inside a linalg.generic body. The
+/// AttentionOp verifier consults this so the verifier never accepts a
+/// preSoftmaxBody that the lowering would later reject; the lowering
+/// itself uses the same membership rule (encoded as a dispatch table) to
+/// decide what to emit.
+///
+/// IMPORTANT: this list and
+/// MIGraphXAttentionToRock::lowerMIGraphXElementwiseToScalar must stay in
+/// lock-step. Adding a new body op is a one-line change in two coupled
+/// places (this function plus the lowering's dispatch table). The
+/// AttentionToRockPattern body-builder asserts at runtime that any op in
+/// this allowlist is also handled by the dispatcher, so divergence trips
+/// the assertion (debug builds) or surfaces as a structured
+/// "unsupported migraphx op in preSoftmaxBody" error (release builds).
+inline bool isAllowedInPreSoftmaxBody(Operation &op) {
+  return isa<migraphx::AddOp, migraphx::SubOp, migraphx::MulOp, migraphx::DivOp,
+             migraphx::PowOp, migraphx::NegOp, migraphx::AbsOp,
+             migraphx::CeilOp, migraphx::FloorOp, migraphx::ExpOp,
+             migraphx::LogOp, migraphx::SqrtOp, migraphx::TanhOp,
+             migraphx::ErfOp, migraphx::RecipOp, migraphx::ReluOp,
+             migraphx::SigmoidOp, migraphx::WhereOp, migraphx::ConvertOp,
+             migraphx::DeQuantizeLinearOp>(op);
+}
+
+} // namespace migraphx
+} // namespace mlir
+
+#endif // MLIR_MIGRAPHX_IR_ATTENTIONUTILS_H_
@@ -35,6 +35,17 @@ namespace migraphx {} // end namespace migraphx
 
 #include "mlir/Dialect/MIGraphX/IR/MIGraphXEnums.h.inc"
 
+namespace mlir {
+namespace migraphx {
+inline bool hasAttentionFeature(std::optional<AttentionFeatures> features,
+                                AttentionFeatures flag) {
+  if (!features)
+    return false;
+  return bitEnumContainsAll(*features, flag);
+}
+} // namespace migraphx
+} // namespace mlir
+
 #define GET_OP_CLASSES
 #include "mlir/Dialect/MIGraphX/IR/MIGraphX.h.inc"