Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17753,9 +17753,15 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D
SDValue Src = N0.getOperand(0);
EVT SrcVT = Src.getValueType();
unsigned SrcElen = SrcVT.getScalarSizeInBits();

// Consider any leading zeros in the source.
SrcElen -= DAG.computeKnownBits(Src).countMinLeadingZeros();

unsigned ShAmtV = ShAmt.getZExtValue();
unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
NewElen = std::max(NewElen, 8U);
// Make sure the new elen is at least as large as the original elen.
NewElen = std::max<unsigned>(NewElen, SrcVT.getScalarSizeInBits());

// Skip if NewElen is not narrower than the original extended type.
if (NewElen >= N0.getValueType().getScalarSizeInBits())
Expand Down
143 changes: 143 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15440,6 +15440,149 @@ define <7 x i8> @mgather_baseidx_v7i8(ptr %base, <7 x i8> %idxs, <7 x i1> %m, <7
ret <7 x i8> %v
}

define <8 x i16> @mgather_baseidx_and_v8i16_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
; RV32-LABEL: mgather_baseidx_and_v8i16_v8i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV32-NEXT: vadd.vv v8, v8, v8
; RV32-NEXT: vluxei16.v v9, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_and_v8i16_v8i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64V-NEXT: vadd.vv v8, v8, v8
; RV64V-NEXT: vluxei16.v v9, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_and_v8i16_v8i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: lui a2, 8
; RV64ZVE32F-NEXT: addi a2, a2, -1
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vand.vx v8, v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB133_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 49
; RV64ZVE32F-NEXT: srli a2, a2, 48
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB133_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB133_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 49
; RV64ZVE32F-NEXT: srli a2, a2, 48
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB133_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB133_6
; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 49
; RV64ZVE32F-NEXT: srli a2, a2, 48
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: .LBB133_6: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB133_8
; RV64ZVE32F-NEXT: # %bb.7: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 49
; RV64ZVE32F-NEXT: srli a2, a2, 48
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3
; RV64ZVE32F-NEXT: .LBB133_8: # %else8
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB133_10
; RV64ZVE32F-NEXT: # %bb.9: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 49
; RV64ZVE32F-NEXT: srli a2, a2, 48
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 4
; RV64ZVE32F-NEXT: .LBB133_10: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB133_12
; RV64ZVE32F-NEXT: # %bb.11: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 49
; RV64ZVE32F-NEXT: srli a2, a2, 48
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 5
; RV64ZVE32F-NEXT: .LBB133_12: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: beqz a2, .LBB133_14
; RV64ZVE32F-NEXT: # %bb.13: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 49
; RV64ZVE32F-NEXT: srli a2, a2, 48
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: .LBB133_14: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB133_16
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 49
; RV64ZVE32F-NEXT: srli a1, a1, 48
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: .LBB133_16: # %else20
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%eidxs = and <8 x i16> %idxs, splat (i16 32767)
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
ret <8 x i16> %v
}

;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32V-ZVFH: {{.*}}
; RV32V-ZVFHMIN: {{.*}}
Expand Down
26 changes: 26 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2617,3 +2617,29 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x
%v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
ret <32 x double> %v
}

define <8 x i32> @vpgather_baseidx_and_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_and_v8i16_v8i32:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 255
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vand.vx v8, v8, a2
; RV32-NEXT: vsll.vi v10, v8, 2
; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_and_v8i16_v8i32:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 255
; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64-NEXT: vand.vx v8, v8, a2
; RV64-NEXT: vsll.vi v10, v8, 2
; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t
; RV64-NEXT: ret
%eidxs = and <8 x i16> %idxs, splat (i16 255)
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %eidxs
%v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
ret <8 x i32> %v
}
14 changes: 14 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2540,3 +2540,17 @@ define <4 x i32> @diagonal_i32(ptr %base, <4 x i32> %vecidx) {
ret <4 x i32> %res
}

define <vscale x 8 x i32> @mgather_baseidx_and_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
; CHECK-LABEL: mgather_baseidx_and_nxv8i16_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; CHECK-NEXT: vsll.vi v8, v8, 2
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vluxei16.v v12, (a0), v8, v0.t
; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%eidxs = and <vscale x 8 x i16> %idxs, splat (i16 u0x3fff)
%ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %eidxs
%v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
ret <vscale x 8 x i32> %v
}
15 changes: 15 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2014,3 +2014,18 @@ define void @mscatter_baseidx_zext_nxv1i1_nxv1i8(<vscale x 1 x i8> %val, ptr %ba
call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, i32 1, <vscale x 1 x i1> %m)
ret void
}

define void @mscatter_baseidx_and_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
; CHECK-LABEL: mscatter_baseidx_and_nxv8i16_nxv8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 255
; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
; CHECK-NEXT: vand.vx v10, v10, a1
; CHECK-NEXT: vadd.vv v10, v10, v10
; CHECK-NEXT: vsoxei16.v v8, (a0), v10, v0.t
; CHECK-NEXT: ret
%eidxs = and <vscale x 8 x i16> %idxs, splat (i16 255)
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
ret void
}
24 changes: 24 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2586,3 +2586,27 @@ define <vscale x 16 x double> @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base
%v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x double> %v
}

define <vscale x 8 x i16> @vpgather_baseidx_and_nxv8i16_nxv8i16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpgather_baseidx_and_nxv8i16_nxv8i16:
; RV32: # %bb.0:
; RV32-NEXT: li a2, 255
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV32-NEXT: vand.vx v8, v8, a2
; RV32-NEXT: vadd.vv v8, v8, v8
; RV32-NEXT: vluxei16.v v8, (a0), v8, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpgather_baseidx_and_nxv8i16_nxv8i16:
; RV64: # %bb.0:
; RV64-NEXT: li a2, 255
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; RV64-NEXT: vand.vx v8, v8, a2
; RV64-NEXT: vadd.vv v8, v8, v8
; RV64-NEXT: vluxei16.v v8, (a0), v8, v0.t
; RV64-NEXT: ret
%eidxs = and <vscale x 8 x i16> %idxs, splat (i16 255)
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
%v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x i16> %v
}
22 changes: 22 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2441,3 +2441,25 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
ret void
}

define void @vpscatter_baseidx_and_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpscatter_baseidx_and_nxv8i8_nxv8i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; RV32-NEXT: vadd.vv v10, v10, v10
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-NEXT: vsoxei8.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: vpscatter_baseidx_and_nxv8i8_nxv8i16:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
; RV64-NEXT: vadd.vv v10, v10, v10
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vsoxei8.v v8, (a0), v10, v0.t
; RV64-NEXT: ret
%eidxs = and <vscale x 8 x i8> %idxs, splat (i8 127)
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %eidxs
call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
ret void
}