[RISCV] Add lowering for @llvm.experimental.vector.compress#113291
Merged
[RISCV] Add lowering for @llvm.experimental.vector.compress#113291
Conversation
This intrinsic was introduced by llvm#92289 and currently we just expand it for RISC-V. This patch adds custom lowering for this intrinsic and simply maps it to `vcompress` instruction. Fixes llvm#113242.
Member
|
@llvm/pr-subscribers-backend-risc-v Author: Pengcheng Wang (wangpc-pp) ChangesThis intrinsic was introduced by #92289 and currently we just expand This patch adds custom lowering for this intrinsic and simply maps Fixes #113242. Patch is 67.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113291.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 3588ef46cadce1..eddb2fbbd709c2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -930,6 +930,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
VT, Custom);
}
}
+
+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
}
for (MVT VT : VecTupleVTs) {
@@ -1051,6 +1053,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
VT, Custom);
+
+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
};
// Sets common extload/truncstore actions on RVV floating-point vector
@@ -1306,6 +1310,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
{ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
Custom);
}
+
+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
}
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
@@ -1434,6 +1440,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
VT, Custom);
+
+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
}
// Custom-legalize bitcasts from fixed-length vectors to scalar types.
@@ -7082,6 +7090,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::MSTORE:
case ISD::VP_STORE:
return lowerMaskedStore(Op, DAG);
+ case ISD::VECTOR_COMPRESS:
+ return lowerVectorCompress(Op, DAG);
case ISD::SELECT_CC: {
// This occurs because we custom legalize SETGT and SETUGT for setcc. That
// causes LegalizeDAG to think we need to custom legalize select_cc. Expand
@@ -11225,6 +11235,36 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
}
+SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Val = Op.getOperand(0);
+ SDValue Mask = Op.getOperand(1);
+ SDValue Passthru = Op.getOperand(2);
+
+ MVT VT = Val.getSimpleValueType();
+ MVT XLenVT = Subtarget.getXLenVT();
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
+ Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
+ }
+
+ SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+ SDValue Res =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
+ DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
+ Passthru, Val, Mask, VL);
+
+ if (VT.isFixedLengthVector())
+ Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
+
+ return Res;
+}
+
SDValue
RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
SelectionDAG &DAG) const {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index c3749447955330..9191d9a9469b60 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -955,6 +955,7 @@ class RISCVTargetLowering : public TargetLowering {
SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVectorCompress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op,
SelectionDAG &DAG) const;
SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll
new file mode 100644
index 00000000000000..8f1ff7ed4a11e2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll
@@ -0,0 +1,255 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s
+
+define <1 x half> @vector_compress_v1f16(<1 x half> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <1 x half> @llvm.experimental.vector.compress.v1f16(<1 x half> %v, <1 x i1> %mask, <1 x half> undef)
+ ret <1 x half> %ret
+}
+
+define <1 x half> @vector_compress_v1f16_passthru(<1 x half> %passthru, <1 x half> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1f16_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <1 x half> @llvm.experimental.vector.compress.v1f16(<1 x half> %v, <1 x i1> %mask, <1 x half> %passthru)
+ ret <1 x half> %ret
+}
+
+define <2 x half> @vector_compress_v2f16(<2 x half> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <2 x half> @llvm.experimental.vector.compress.v2f16(<2 x half> %v, <2 x i1> %mask, <2 x half> undef)
+ ret <2 x half> %ret
+}
+
+define <2 x half> @vector_compress_v2f16_passthru(<2 x half> %passthru, <2 x half> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2f16_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <2 x half> @llvm.experimental.vector.compress.v2f16(<2 x half> %v, <2 x i1> %mask, <2 x half> %passthru)
+ ret <2 x half> %ret
+}
+
+define <4 x half> @vector_compress_v4f16(<4 x half> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <4 x half> @llvm.experimental.vector.compress.v4f16(<4 x half> %v, <4 x i1> %mask, <4 x half> undef)
+ ret <4 x half> %ret
+}
+
+define <4 x half> @vector_compress_v4f16_passthru(<4 x half> %passthru, <4 x half> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4f16_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <4 x half> @llvm.experimental.vector.compress.v4f16(<4 x half> %v, <4 x i1> %mask, <4 x half> %passthru)
+ ret <4 x half> %ret
+}
+
+define <8 x half> @vector_compress_v8f16(<8 x half> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <8 x half> @llvm.experimental.vector.compress.v8f16(<8 x half> %v, <8 x i1> %mask, <8 x half> undef)
+ ret <8 x half> %ret
+}
+
+define <8 x half> @vector_compress_v8f16_passthru(<8 x half> %passthru, <8 x half> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8f16_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <8 x half> @llvm.experimental.vector.compress.v8f16(<8 x half> %v, <8 x i1> %mask, <8 x half> %passthru)
+ ret <8 x half> %ret
+}
+
+define <1 x float> @vector_compress_v1f32(<1 x float> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <1 x float> @llvm.experimental.vector.compress.v1f32(<1 x float> %v, <1 x i1> %mask, <1 x float> undef)
+ ret <1 x float> %ret
+}
+
+define <1 x float> @vector_compress_v1f32_passthru(<1 x float> %passthru, <1 x float> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1f32_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e32, mf2, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <1 x float> @llvm.experimental.vector.compress.v1f32(<1 x float> %v, <1 x i1> %mask, <1 x float> %passthru)
+ ret <1 x float> %ret
+}
+
+define <2 x float> @vector_compress_v2f32(<2 x float> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <2 x float> @llvm.experimental.vector.compress.v2f32(<2 x float> %v, <2 x i1> %mask, <2 x float> undef)
+ ret <2 x float> %ret
+}
+
+define <2 x float> @vector_compress_v2f32_passthru(<2 x float> %passthru, <2 x float> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2f32_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <2 x float> @llvm.experimental.vector.compress.v2f32(<2 x float> %v, <2 x i1> %mask, <2 x float> %passthru)
+ ret <2 x float> %ret
+}
+
+define <4 x float> @vector_compress_v4f32(<4 x float> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <4 x float> @llvm.experimental.vector.compress.v4f32(<4 x float> %v, <4 x i1> %mask, <4 x float> undef)
+ ret <4 x float> %ret
+}
+
+define <4 x float> @vector_compress_v4f32_passthru(<4 x float> %passthru, <4 x float> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4f32_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <4 x float> @llvm.experimental.vector.compress.v4f32(<4 x float> %v, <4 x i1> %mask, <4 x float> %passthru)
+ ret <4 x float> %ret
+}
+
+define <8 x float> @vector_compress_v8f32(<8 x float> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vcompress.vm v10, v8, v0
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %ret = call <8 x float> @llvm.experimental.vector.compress.v8f32(<8 x float> %v, <8 x i1> %mask, <8 x float> undef)
+ ret <8 x float> %ret
+}
+
+define <8 x float> @vector_compress_v8f32_passthru(<8 x float> %passthru, <8 x float> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8f32_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v10, v0
+; CHECK-NEXT: ret
+ %ret = call <8 x float> @llvm.experimental.vector.compress.v8f32(<8 x float> %v, <8 x i1> %mask, <8 x float> %passthru)
+ ret <8 x float> %ret
+}
+
+define <1 x double> @vector_compress_v1f64(<1 x double> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <1 x double> @llvm.experimental.vector.compress.v1f64(<1 x double> %v, <1 x i1> %mask, <1 x double> undef)
+ ret <1 x double> %ret
+}
+
+define <1 x double> @vector_compress_v1f64_passthru(<1 x double> %passthru, <1 x double> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1f64_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <1 x double> @llvm.experimental.vector.compress.v1f64(<1 x double> %v, <1 x i1> %mask, <1 x double> %passthru)
+ ret <1 x double> %ret
+}
+
+define <2 x double> @vector_compress_v2f64(<2 x double> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <2 x double> @llvm.experimental.vector.compress.v2f64(<2 x double> %v, <2 x i1> %mask, <2 x double> undef)
+ ret <2 x double> %ret
+}
+
+define <2 x double> @vector_compress_v2f64_passthru(<2 x double> %passthru, <2 x double> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2f64_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <2 x double> @llvm.experimental.vector.compress.v2f64(<2 x double> %v, <2 x i1> %mask, <2 x double> %passthru)
+ ret <2 x double> %ret
+}
+
+define <4 x double> @vector_compress_v4f64(<4 x double> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vcompress.vm v10, v8, v0
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %ret = call <4 x double> @llvm.experimental.vector.compress.v4f64(<4 x double> %v, <4 x i1> %mask, <4 x double> undef)
+ ret <4 x double> %ret
+}
+
+define <4 x double> @vector_compress_v4f64_passthru(<4 x double> %passthru, <4 x double> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4f64_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v10, v0
+; CHECK-NEXT: ret
+ %ret = call <4 x double> @llvm.experimental.vector.compress.v4f64(<4 x double> %v, <4 x i1> %mask, <4 x double> %passthru)
+ ret <4 x double> %ret
+}
+
+define <8 x double> @vector_compress_v8f64(<8 x double> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: vcompress.vm v12, v8, v0
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+ %ret = call <8 x double> @llvm.experimental.vector.compress.v8f64(<8 x double> %v, <8 x i1> %mask, <8 x double> undef)
+ ret <8 x double> %ret
+}
+
+define <8 x double> @vector_compress_v8f64_passthru(<8 x double> %passthru, <8 x double> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8f64_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v12, v0
+; CHECK-NEXT: ret
+ %ret = call <8 x double> @llvm.experimental.vector.compress.v8f64(<8 x double> %v, <8 x i1> %mask, <8 x double> %passthru)
+ ret <8 x double> %ret
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-int.ll
new file mode 100644
index 00000000000000..3952dc31838a2d
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-int.ll
@@ -0,0 +1,339 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s
+
+define <1 x i8> @vector_compress_v1i8(<1 x i8> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <1 x i8> @llvm.experimental.vector.compress.v1i8(<1 x i8> %v, <1 x i1> %mask, <1 x i8> undef)
+ ret <1 x i8> %ret
+}
+
+define <1 x i8> @vector_compress_v1i8_passthru(<1 x i8> %passthru, <1 x i8> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1i8_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, mf8, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <1 x i8> @llvm.experimental.vector.compress.v1i8(<1 x i8> %v, <1 x i1> %mask, <1 x i8> %passthru)
+ ret <1 x i8> %ret
+}
+
+define <2 x i8> @vector_compress_v2i8(<2 x i8> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <2 x i8> @llvm.experimental.vector.compress.v2i8(<2 x i8> %v, <2 x i1> %mask, <2 x i8> undef)
+ ret <2 x i8> %ret
+}
+
+define <2 x i8> @vector_compress_v2i8_passthru(<2 x i8> %passthru, <2 x i8> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2i8_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <2 x i8> @llvm.experimental.vector.compress.v2i8(<2 x i8> %v, <2 x i1> %mask, <2 x i8> %passthru)
+ ret <2 x i8> %ret
+}
+
+define <4 x i8> @vector_compress_v4i8(<4 x i8> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <4 x i8> @llvm.experimental.vector.compress.v4i8(<4 x i8> %v, <4 x i1> %mask, <4 x i8> undef)
+ ret <4 x i8> %ret
+}
+
+define <4 x i8> @vector_compress_v4i8_passthru(<4 x i8> %passthru, <4 x i8> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4i8_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <4 x i8> @llvm.experimental.vector.compress.v4i8(<4 x i8> %v, <4 x i1> %mask, <4 x i8> %passthru)
+ ret <4 x i8> %ret
+}
+
+define <8 x i8> @vector_compress_v8i8(<8 x i8> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <8 x i8> @llvm.experimental.vector.compress.v8i8(<8 x i8> %v, <8 x i1> %mask, <8 x i8> undef)
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vector_compress_v8i8_passthru(<8 x i8> %passthru, <8 x i8> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8i8_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <8 x i8> @llvm.experimental.vector.compress.v8i8(<8 x i8> %v, <8 x i1> %mask, <8 x i8> %passthru)
+ ret <8 x i8> %ret
+}
+
+define <1 x i16> @vector_compress_v1i16(<1 x i16> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <1 x i16> @llvm.experimental.vector.compress.v1i16(<1 x i16> %v, <1 x i1> %mask, <1 x i16> undef)
+ ret <1 x i16> %ret
+}
+
+define <1 x i16> @vector_compress_v1i16_passthru(<1 x i16> %passthru, <1 x i16> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1i16_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <1 x i16> @llvm.experimental.vector.compress.v1i16(<1 x i16> %v, <1 x i1> %mask, <1 x i16> %passthru)
+ ret <1 x i16> %ret
+}
+
+define <2 x i16> @vector_compress_v2i16(<2 x i16> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2i16:
+; CHE...
[truncated]
|
Closed
lukel97
added a commit
to lukel97/llvm-project
that referenced
this pull request
Oct 26, 2024
This is a follow up to llvm#113291 and handles f16/bf16 with zvfhmin and zvfbmin.
NoumanAmir657
pushed a commit
to NoumanAmir657/llvm-project
that referenced
this pull request
Nov 4, 2024
…llvm#113770) This is a follow up to llvm#113291 and handles f16/bf16 with zvfhmin and zvfbmin.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
This intrinsic was introduced by #92289 and currently we just expand
it for RISC-V.
This patch adds custom lowering for this intrinsic and simply maps
it to
vcompressinstruction.Fixes #113242.