llvm · topperc · Jun 16, 2026 · Jun 15, 2026 · Jun 15, 2026
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -17753,9 +17753,15 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D
   SDValue Src = N0.getOperand(0);
   EVT SrcVT = Src.getValueType();
   unsigned SrcElen = SrcVT.getScalarSizeInBits();
+
+  // Consider any leading zeros in the source.
+  SrcElen -= DAG.computeKnownBits(Src).countMinLeadingZeros();
+
   unsigned ShAmtV = ShAmt.getZExtValue();
   unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
   NewElen = std::max(NewElen, 8U);
+  // Make sure the new elen is at least as large as the original elen.
+  NewElen = std::max<unsigned>(NewElen, SrcVT.getScalarSizeInBits());
 
   // Skip if NewElen is not narrower than the original extended type.
   if (NewElen >= N0.getValueType().getScalarSizeInBits())

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -15440,6 +15440,149 @@ define <7 x i8> @mgather_baseidx_v7i8(ptr %base, <7 x i8> %idxs, <7 x i1> %m, <7
   ret <7 x i8> %v
 }
 
+define <8 x i16> @mgather_baseidx_and_v8i16_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
+; RV32-LABEL: mgather_baseidx_and_v8i16_v8i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vluxei16.v v9, (a0), v8, v0.t
+; RV32-NEXT:    vmv.v.v v8, v9
+; RV32-NEXT:    ret
+;
+; RV64V-LABEL: mgather_baseidx_and_v8i16_v8i16:
+; RV64V:       # %bb.0:
+; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
+; RV64V-NEXT:    vadd.vv v8, v8, v8
+; RV64V-NEXT:    vluxei16.v v9, (a0), v8, v0.t
+; RV64V-NEXT:    vmv.v.v v8, v9
+; RV64V-NEXT:    ret
+;
+; RV64ZVE32F-LABEL: mgather_baseidx_and_v8i16_v8i16:
+; RV64ZVE32F:       # %bb.0:
+; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
+; RV64ZVE32F-NEXT:    lui a2, 8
+; RV64ZVE32F-NEXT:    addi a2, a2, -1
+; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-NEXT:    vand.vx v8, v8, a2
+; RV64ZVE32F-NEXT:    andi a2, a1, 1
+; RV64ZVE32F-NEXT:    beqz a2, .LBB133_2
+; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
+; RV64ZVE32F-NEXT:    slli a2, a2, 49
+; RV64ZVE32F-NEXT:    srli a2, a2, 48
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lh a2, 0(a2)
+; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
+; RV64ZVE32F-NEXT:  .LBB133_2: # %else
+; RV64ZVE32F-NEXT:    andi a2, a1, 2
+; RV64ZVE32F-NEXT:    beqz a2, .LBB133_4
+; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
+; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
+; RV64ZVE32F-NEXT:    slli a2, a2, 49
+; RV64ZVE32F-NEXT:    srli a2, a2, 48
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lh a2, 0(a2)
+; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 1
+; RV64ZVE32F-NEXT:  .LBB133_4: # %else2
+; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT:    andi a2, a1, 4
+; RV64ZVE32F-NEXT:    beqz a2, .LBB133_6
+; RV64ZVE32F-NEXT:  # %bb.5: # %cond.load4
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
+; RV64ZVE32F-NEXT:    slli a2, a2, 49
+; RV64ZVE32F-NEXT:    srli a2, a2, 48
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lh a2, 0(a2)
+; RV64ZVE32F-NEXT:    vmv.s.x v11, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vslideup.vi v9, v11, 2
+; RV64ZVE32F-NEXT:  .LBB133_6: # %else5
+; RV64ZVE32F-NEXT:    andi a2, a1, 8
+; RV64ZVE32F-NEXT:    beqz a2, .LBB133_8
+; RV64ZVE32F-NEXT:  # %bb.7: # %cond.load7
+; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT:    vslidedown.vi v10, v10, 1
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
+; RV64ZVE32F-NEXT:    slli a2, a2, 49
+; RV64ZVE32F-NEXT:    srli a2, a2, 48
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lh a2, 0(a2)
+; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 3
+; RV64ZVE32F-NEXT:  .LBB133_8: # %else8
+; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 4
+; RV64ZVE32F-NEXT:    andi a2, a1, 16
+; RV64ZVE32F-NEXT:    beqz a2, .LBB133_10
+; RV64ZVE32F-NEXT:  # %bb.9: # %cond.load10
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
+; RV64ZVE32F-NEXT:    slli a2, a2, 49
+; RV64ZVE32F-NEXT:    srli a2, a2, 48
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lh a2, 0(a2)
+; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 4
+; RV64ZVE32F-NEXT:  .LBB133_10: # %else11
+; RV64ZVE32F-NEXT:    andi a2, a1, 32
+; RV64ZVE32F-NEXT:    beqz a2, .LBB133_12
+; RV64ZVE32F-NEXT:  # %bb.11: # %cond.load13
+; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT:    vslidedown.vi v10, v8, 1
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v10
+; RV64ZVE32F-NEXT:    slli a2, a2, 49
+; RV64ZVE32F-NEXT:    srli a2, a2, 48
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lh a2, 0(a2)
+; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 5
+; RV64ZVE32F-NEXT:  .LBB133_12: # %else14
+; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
+; RV64ZVE32F-NEXT:    andi a2, a1, 64
+; RV64ZVE32F-NEXT:    beqz a2, .LBB133_14
+; RV64ZVE32F-NEXT:  # %bb.13: # %cond.load16
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
+; RV64ZVE32F-NEXT:    slli a2, a2, 49
+; RV64ZVE32F-NEXT:    srli a2, a2, 48
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lh a2, 0(a2)
+; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
+; RV64ZVE32F-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV64ZVE32F-NEXT:    vslideup.vi v9, v10, 6
+; RV64ZVE32F-NEXT:  .LBB133_14: # %else17
+; RV64ZVE32F-NEXT:    andi a1, a1, -128
+; RV64ZVE32F-NEXT:    beqz a1, .LBB133_16
+; RV64ZVE32F-NEXT:  # %bb.15: # %cond.load19
+; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
+; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
+; RV64ZVE32F-NEXT:    slli a1, a1, 49
+; RV64ZVE32F-NEXT:    srli a1, a1, 48
+; RV64ZVE32F-NEXT:    add a0, a0, a1
+; RV64ZVE32F-NEXT:    lh a0, 0(a0)
+; RV64ZVE32F-NEXT:    vmv.s.x v8, a0
+; RV64ZVE32F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-NEXT:    vslideup.vi v9, v8, 7
+; RV64ZVE32F-NEXT:  .LBB133_16: # %else20
+; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-NEXT:    vmv1r.v v8, v9
+; RV64ZVE32F-NEXT:    ret
+  %eidxs = and <8 x i16> %idxs, splat (i16 32767)
+  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
+  %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
+  ret <8 x i16> %v
+}
+
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; RV32V-ZVFH: {{.*}}
 ; RV32V-ZVFHMIN: {{.*}}

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
@@ -2617,3 +2617,29 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x
   %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
   ret <32 x double> %v
 }
+
+define <8 x i32> @vpgather_baseidx_and_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_and_v8i16_v8i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a2, 255
+; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT:    vand.vx v8, v8, a2
+; RV32-NEXT:    vsll.vi v10, v8, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; RV32-NEXT:    vluxei16.v v8, (a0), v10, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_and_v8i16_v8i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a2, 255
+; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    vsll.vi v10, v8, 2
+; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; RV64-NEXT:    vluxei16.v v8, (a0), v10, v0.t
+; RV64-NEXT:    ret
+  %eidxs = and <8 x i16> %idxs, splat (i16 255)
+  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %eidxs
+  %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
@@ -2540,3 +2540,17 @@ define <4 x i32> @diagonal_i32(ptr %base, <4 x i32> %vecidx) {
   ret <4 x i32> %res
 }
 
+define <vscale x 8 x i32> @mgather_baseidx_and_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
+; CHECK-LABEL: mgather_baseidx_and_nxv8i16_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vsll.vi v8, v8, 2
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vluxei16.v v12, (a0), v8, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    ret
+  %eidxs = and <vscale x 8 x i16> %idxs, splat (i16 u0x3fff)
+  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %eidxs
+  %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
+  ret <vscale x 8 x i32> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
@@ -2014,3 +2014,18 @@ define void @mscatter_baseidx_zext_nxv1i1_nxv1i8(<vscale x 1 x i8> %val, ptr %ba
   call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, i32 1, <vscale x 1 x i1> %m)
   ret void
 }
+
+define void @mscatter_baseidx_and_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
+; CHECK-LABEL: mscatter_baseidx_and_nxv8i16_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 255
+; CHECK-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vand.vx v10, v10, a1
+; CHECK-NEXT:    vadd.vv v10, v10, v10
+; CHECK-NEXT:    vsoxei16.v v8, (a0), v10, v0.t
+; CHECK-NEXT:    ret
+  %eidxs = and <vscale x 8 x i16> %idxs, splat (i16 255)
+  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
+  call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
@@ -2586,3 +2586,27 @@ define <vscale x 16 x double> @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base
   %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
   ret <vscale x 16 x double> %v
 }
+
+define <vscale x 8 x i16> @vpgather_baseidx_and_nxv8i16_nxv8i16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_and_nxv8i16_nxv8i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a2, 255
+; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV32-NEXT:    vand.vx v8, v8, a2
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vluxei16.v v8, (a0), v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_and_nxv8i16_nxv8i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a2, 255
+; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    vadd.vv v8, v8, v8
+; RV64-NEXT:    vluxei16.v v8, (a0), v8, v0.t
+; RV64-NEXT:    ret
+  %eidxs = and <vscale x 8 x i16> %idxs, splat (i16 255)
+  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
+  %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
@@ -2441,3 +2441,25 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
   call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
   ret void
 }
+
+define void @vpscatter_baseidx_and_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpscatter_baseidx_and_nxv8i8_nxv8i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV32-NEXT:    vadd.vv v10, v10, v10
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vsoxei8.v v8, (a0), v10, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpscatter_baseidx_and_nxv8i8_nxv8i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; RV64-NEXT:    vadd.vv v10, v10, v10
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vsoxei8.v v8, (a0), v10, v0.t
+; RV64-NEXT:    ret
+  %eidxs = and <vscale x 8 x i8> %idxs, splat (i8 127)
+  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %eidxs
+  call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
+  ret void
+}