Skip to content

[RISCV] Consider known leading zeros in narrowIndex for gather/scatter.#203970

Open
topperc wants to merge 2 commits into
llvm:mainfrom
topperc:pr/narrowIndex
Open

[RISCV] Consider known leading zeros in narrowIndex for gather/scatter.#203970
topperc wants to merge 2 commits into
llvm:mainfrom
topperc:pr/narrowIndex

Conversation

@topperc

@topperc topperc commented Jun 15, 2026

Copy link
Copy Markdown
Contributor

If we there are enough leading zeros for the shift amount, then
we can do the shift in the narrow type.

topperc added 2 commits June 15, 2026 11:27
If we there are enough leading zeros for the shift amount, then
we can do the shift in the narrow type.
@llvmorg-github-actions

Copy link
Copy Markdown

@llvm/pr-subscribers-backend-risc-v

Author: Craig Topper (topperc)

Changes

If we there are enough leading zeros for the shift amount, then
we can do the shift in the narrow type.


Full diff: https://github.com/llvm/llvm-project/pull/203970.diff

7 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+6)
  • (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (+143)
  • (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll (+26)
  • (modified) llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll (+14)
  • (modified) llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll (+15)
  • (modified) llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll (+24)
  • (modified) llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll (+22)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4a937fe2c6462..c7010c95bfc07 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -17753,9 +17753,15 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D
   SDValue Src = N0.getOperand(0);
   EVT SrcVT = Src.getValueType();
   unsigned SrcElen = SrcVT.getScalarSizeInBits();
+
+  // Consider any leading zeros in the source.
+  SrcElen -= DAG.computeKnownBits(Src).countMinLeadingZeros();
+
   unsigned ShAmtV = ShAmt.getZExtValue();
   unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
   NewElen = std::max(NewElen, 8U);
+  // Make sure the new elen is at least as large as the original elen.
+  NewElen = std::max<unsigned>(NewElen, SrcVT.getScalarSizeInBits());
 
   // Skip if NewElen is not narrower than the original extended type.
   if (NewElen >= N0.getValueType().getScalarSizeInBits())
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index b0aed1a65592b..e8a117ac53377 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -15440,6 +15440,149 @@ define <7 x i8> @mgather_baseidx_v7i8(ptr %base, <7 x i8> %idxs, <7 x i1> %m, <7
   ret <7 x i8> %v
 }
 
+define <8 x i16> @mgather_baseidx_and_v8i16_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
+; RV32-LABEL: mgather_baseidx_and_v8i16_v8i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vluxei16.v v9, (a0), v8, v0.t
+; RV32-NEXT:    vmv.v.v v8, v9
+; RV32-NEXT:    ret
+;
+; RV64V-LABEL: mgather_baseidx_and_v8i16_v8i16:
+; RV64V:       # %bb.0:
+; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
+; RV64V-NEXT:    vadd.vv v8, v8, v8
+; RV64V-NEXT:    vluxei16.v v9, (a0), v8, v0.t
+; RV64V-NEXT:    vmv.v.v v8, v9
+; RV64V-NEXT:    ret
+;
+; RV64ZVE32F-LABEL: mgather_baseidx_and_v8i16_v8i16:
+; RV64ZVE32F:       # %bb.0:
+; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
+; RV64ZVE32F-NEXT:    lui a2, 8
+; RV64ZVE32F-NEXT:    addi a2, a2, -1
+; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-NEXT:    vand.vx v8, v8, a2
+; RV64ZVE32F-NEXT:    andi a2, a1, 1
+; RV64ZVE32F-NEXT:    beqz a2, .LBB133_2
+; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
+; RV64ZVE32F-NEXT:    slli a2, a2, 49
+; RV64ZVE32F-NEXT:    srli a2, a2, 48
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lh a2, 0(a2)
+; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
+; RV64ZVE32F-NEXT:  .LBB133_2: # %else
+; RV64ZVE32F-NEXT:    andi a2, a1, 2
+; RV64ZVE32F-NEXT:    beqz a2, .LBB133_4
+; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
+; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
+; RV64ZVE32F-NEXT:    slli a2, a2, 49
+; RV64ZVE32F-NEXT:    srli a2, a2, 48
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lh a2, 0(a2)
+; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
+; RV64ZVE32F-NEXT:  .LBB133_4: # %else2
+; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT:    andi a2, a1, 4
+; RV64ZVE32F-NEXT:    beqz a2, .LBB133_6
+; RV64ZVE32F-NEXT:  # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
+; RV64ZVE32F-NEXT:    slli a2, a2, 49
+; RV64ZVE32F-NEXT:    srli a2, a2, 48
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lh a2, 0(a2)
+; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 2
+; RV64ZVE32F-NEXT:  .LBB133_6: # %else5
+; RV64ZVE32F-NEXT:    andi a2, a1, 8
+; RV64ZVE32F-NEXT:    beqz a2, .LBB133_8
+; RV64ZVE32F-NEXT:  # %bb.7: # %cond.load7
+; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT:    vslidedown.vi v10, v10, 1
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
+; RV64ZVE32F-NEXT:    slli a2, a2, 49
+; RV64ZVE32F-NEXT:    srli a2, a2, 48
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lh a2, 0(a2)
+; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 3
+; RV64ZVE32F-NEXT:  .LBB133_8: # %else8
+; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT:    andi a2, a1, 16
+; RV64ZVE32F-NEXT:    beqz a2, .LBB133_10
+; RV64ZVE32F-NEXT:  # %bb.9: # %cond.load10
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
+; RV64ZVE32F-NEXT:    slli a2, a2, 49
+; RV64ZVE32F-NEXT:    srli a2, a2, 48
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lh a2, 0(a2)
+; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 4
+; RV64ZVE32F-NEXT:  .LBB133_10: # %else11
+; RV64ZVE32F-NEXT:    andi a2, a1, 32
+; RV64ZVE32F-NEXT:    beqz a2, .LBB133_12
+; RV64ZVE32F-NEXT:  # %bb.11: # %cond.load13
+; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
+; RV64ZVE32F-NEXT:    slli a2, a2, 49
+; RV64ZVE32F-NEXT:    srli a2, a2, 48
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lh a2, 0(a2)
+; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 5
+; RV64ZVE32F-NEXT:  .LBB133_12: # %else14
+; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
+; RV64ZVE32F-NEXT:    andi a2, a1, 64
+; RV64ZVE32F-NEXT:    beqz a2, .LBB133_14
+; RV64ZVE32F-NEXT:  # %bb.13: # %cond.load16
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
+; RV64ZVE32F-NEXT:    slli a2, a2, 49
+; RV64ZVE32F-NEXT:    srli a2, a2, 48
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lh a2, 0(a2)
+; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 6
+; RV64ZVE32F-NEXT:  .LBB133_14: # %else17
+; RV64ZVE32F-NEXT:    andi a1, a1, -128
+; RV64ZVE32F-NEXT:    beqz a1, .LBB133_16
+; RV64ZVE32F-NEXT:  # %bb.15: # %cond.load19
+; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
+; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
+; RV64ZVE32F-NEXT:    slli a1, a1, 49
+; RV64ZVE32F-NEXT:    srli a1, a1, 48
+; RV64ZVE32F-NEXT:    add a0, a0, a1
+; RV64ZVE32F-NEXT:    lh a0, 0(a0)
+; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
+; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 7
+; RV64ZVE32F-NEXT:  .LBB133_16: # %else20
+; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT:    vmv1r.v v8, v9
+; RV64ZVE32F-NEXT:    ret
+  %eidxs = and <8 x i16> %idxs, splat (i16 32767)
+  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
+  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
+  ret <8 x i16> %v
+}
+
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; RV32V-ZVFH: {{.*}}
 ; RV32V-ZVFHMIN: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
index 58b482199af02..db2dae784b213 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
@@ -2617,3 +2617,29 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x
   %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
   ret <32 x double> %v
 }
+
+define <8 x i32> @vpgather_baseidx_and_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_and_v8i16_v8i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a2, 255
+; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT:    vand.vx v8, v8, a2
+; RV32-NEXT:    vsll.vi v10, v8, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; RV32-NEXT:    vluxei16.v v8, (a0), v10, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_and_v8i16_v8i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a2, 255
+; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    vsll.vi v10, v8, 2
+; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; RV64-NEXT:    vluxei16.v v8, (a0), v10, v0.t
+; RV64-NEXT:    ret
+  %eidxs = and <8 x i16> %idxs, splat (i16 255)
+  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %eidxs
+  %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
index 3dc6115314e32..9896bc800569e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
@@ -2540,3 +2540,17 @@ define <4 x i32> @diagonal_i32(ptr %base, <4 x i32> %vecidx) {
   ret <4 x i32> %res
 }
 
+define <vscale x 8 x i32> @mgather_baseidx_and_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
+; CHECK-LABEL: mgather_baseidx_and_nxv8i16_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vluxei16.v v12, (a0), v8, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    ret
+  %eidxs = and <vscale x 8 x i16> %idxs, splat (i16 u0x3fff)
+  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %eidxs
+  %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
+  ret <vscale x 8 x i32> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
index 906b7fb6d4e6b..14cc210347a8d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
@@ -2014,3 +2014,18 @@ define void @mscatter_baseidx_zext_nxv1i1_nxv1i8(<vscale x 1 x i8> %val, ptr %ba
   call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, i32 1, <vscale x 1 x i1> %m)
   ret void
 }
+
+define void @mscatter_baseidx_and_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
+; CHECK-LABEL: mscatter_baseidx_and_nxv8i16_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 255
+; CHECK-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vand.vx v10, v10, a1
+; CHECK-NEXT:    vadd.vv v10, v10, v10
+; CHECK-NEXT:    vsoxei16.v v8, (a0), v10, v0.t
+; CHECK-NEXT:    ret
+  %eidxs = and <vscale x 8 x i16> %idxs, splat (i16 255)
+  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
+  call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
index 8063b62b32f91..0856b9a42f826 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
@@ -2586,3 +2586,27 @@ define <vscale x 16 x double> @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base
   %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x double> %v
 }
+
+define <vscale x 8 x i16> @vpgather_baseidx_and_nxv8i16_nxv8i16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_and_nxv8i16_nxv8i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a2, 255
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vand.vx v8, v8, a2
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vluxei16.v v8, (a0), v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_and_nxv8i16_nxv8i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a2, 255
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vluxei16.v v8, (a0), v8, v0.t
+; RV64-NEXT:    ret
+  %eidxs = and <vscale x 8 x i16> %idxs, splat (i16 255)
+  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
+  %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
index 1fa0159795b70..7daf16e963027 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
@@ -2441,3 +2441,25 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
   call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
   ret void
 }
+
+define void @vpscatter_baseidx_and_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpscatter_baseidx_and_nxv8i8_nxv8i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV32-NEXT:    vadd.vv v10, v10, v10
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsoxei8.v v8, (a0), v10, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpscatter_baseidx_and_nxv8i8_nxv8i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV64-NEXT:    vadd.vv v10, v10, v10
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsoxei8.v v8, (a0), v10, v0.t
+; RV64-NEXT:    ret
+  %eidxs = and <vscale x 8 x i8> %idxs, splat (i8 127)
+  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %eidxs
+  call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
+  ret void
+}

@lukel97

lukel97 commented Jun 16, 2026

Copy link
Copy Markdown
Contributor

/test-suite

@lukel97 lukel97 left a comment

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@wangpc-pp

Copy link
Copy Markdown
Contributor

/test-suite

@lukel97 How can we check the result of this new pipeline? I think I can't find it in Github's Checks.

@wangpc-pp wangpc-pp left a comment

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants