[VPlan] Introduce VPGEPInstruction#193510
Conversation
|
@llvm/pr-subscribers-vectorizers Author: Ramkumar Ramachandra (artagnon) ChangesCurrently, WidenGEP, Replicate, and VPInstruction recipes can all hold a Instruction::GetElementPtr. Introduce a first-class "scalar GEP" VPInstructionWithType, similar to the existing "scalar cast", with the additional benefit of being able to query the source element type of the GEP directly without going to the underlying value. Planned follow-ups include unifying WidenGEP and Replicate GEPs with this VPInstruction GEP. Full diff: https://github.com/llvm/llvm-project/pull/193510.diff 8 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index f06b959700687..fd859fcefe7df 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -364,6 +364,14 @@ class VPBuilder {
new VPInstructionWithType(Opcode, Op, ResultTy, Flags, Metadata, DL));
}
+ VPInstruction *createScalarGEP(Type *SourceElementTy, ArrayRef<VPValue *> Ops,
+ Type *ResultTy, DebugLoc DL,
+ const VPIRFlags &Flags,
+ const VPIRMetadata &Metadata = {}) {
+ return tryInsertInstruction(new VPInstructionWithType(
+ SourceElementTy, Ops, ResultTy, Flags, Metadata, DL));
+ }
+
VPValue *createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy,
DebugLoc DL) {
if (ResultTy == SrcTy)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 934cca006e91c..005cae1c94552 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -556,6 +556,9 @@ class LLVM_ABI_FOR_TEST VPRecipeBase
/// Return true if the recipe is a scalar cast.
bool isScalarCast() const;
+ /// Return true if the recipe is a scalar getelementptr.
+ bool isScalarGEP() const;
+
/// Set the recipe's debug location to \p NewDL.
void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
@@ -1517,6 +1520,10 @@ class VPInstructionWithType : public VPInstruction {
/// Scalar result type produced by the recipe.
Type *ResultTy;
+ /// The source element type, which is present when the recipe has a
+ /// getelementptr opcode.
+ Type *SourceElementTy = nullptr;
+
public:
VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
Type *ResultTy, const VPIRFlags &Flags = {},
@@ -1526,10 +1533,20 @@ class VPInstructionWithType : public VPInstruction {
: VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
ResultTy(ResultTy) {}
+ /// Constructor for GEPs.
+ VPInstructionWithType(Type *SourceElementTy, ArrayRef<VPValue *> Operands,
+ Type *ResultTy, const VPIRFlags &Flags = {},
+ const VPIRMetadata &Metadata = {},
+ DebugLoc DL = DebugLoc::getUnknown(),
+ const Twine &Name = "")
+ : VPInstruction(Instruction::GetElementPtr, Operands, Flags, Metadata, DL,
+ Name),
+ ResultTy(ResultTy), SourceElementTy(SourceElementTy) {}
+
static inline bool classof(const VPRecipeBase *R) {
// VPInstructionWithType are VPInstructions with specific opcodes requiring
// type information.
- if (R->isScalarCast())
+ if (R->isScalarCast() || R->isScalarGEP())
return true;
auto *VPI = dyn_cast<VPInstruction>(R);
if (!VPI)
@@ -1567,6 +1584,11 @@ class VPInstructionWithType : public VPInstruction {
}
Type *getResultType() const { return ResultTy; }
+ Type *getSourceElementType() const {
+ assert(isScalarGEP() &&
+ "Source element type requested for non-getelementptr");
+ return SourceElementTy;
+ }
protected:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 9b052047dcc8b..6cbe36b44af58 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -267,6 +267,12 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
CI->getType(), CI->getDebugLoc(),
VPIRFlags(*CI), MD);
NewR->setUnderlyingValue(CI);
+ } else if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+ NewR = VPIRBuilder.createScalarGEP(
+ GEP->getSourceElementType(), VPOperands,
+ GEP->getPointerOperandType(), GEP->getDebugLoc(), VPIRFlags(*GEP),
+ MD);
+ NewR->setUnderlyingValue(GEP);
} else if (auto *LI = dyn_cast<LoadInst>(Inst)) {
NewR = VPIRBuilder.createScalarLoad(LI->getType(), VPOperands[0],
LI->getDebugLoc(), MD);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 25fe37124b017..573fa419b4bf1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -317,6 +317,11 @@ bool VPRecipeBase::isScalarCast() const {
return VPI && Instruction::isCast(VPI->getOpcode());
}
+bool VPRecipeBase::isScalarGEP() const {
+ auto *VPI = dyn_cast<VPInstruction>(this);
+ return VPI && VPI->getOpcode() == Instruction::GetElementPtr;
+}
+
void VPIRFlags::intersectFlags(const VPIRFlags &Other) {
assert(OpType == Other.OpType && "OpType must match");
switch (OpType) {
@@ -1291,7 +1296,7 @@ bool VPInstruction::isSingleScalar() const {
case VPInstruction::VScale:
return true;
default:
- return isScalarCast();
+ return isScalarCast() || isScalarGEP();
}
}
@@ -1583,6 +1588,16 @@ void VPInstructionWithType::execute(VPTransformState &State) {
State.set(this, Cast, VPLane(0));
return;
}
+ if (isScalarGEP()) {
+ Value *Ptr = State.get(getOperand(0), true);
+ auto IdxList =
+ to_vector(map_range(drop_begin(operands()),
+ [&](VPValue *Op) { return State.get(Op, true); }));
+ Value *GEP = State.Builder.CreateGEP(ResultTy, Ptr, IdxList, "",
+ getGEPNoWrapFlags());
+ State.set(this, GEP, true);
+ return;
+ }
switch (getOpcode()) {
case VPInstruction::StepVector: {
Value *StepVector =
@@ -1623,6 +1638,12 @@ void VPInstructionWithType::printRecipe(raw_ostream &O, const Twine &Indent,
O << "load ";
printOperands(O, SlotTracker);
break;
+ case Instruction::GetElementPtr:
+ O << "getelementptr";
+ printFlags(O);
+ O << *ResultTy << " ";
+ printOperands(O, SlotTracker);
+ break;
default:
assert(Instruction::isCast(getOpcode()) && "unhandled opcode");
O << Instruction::getOpcodeName(getOpcode()) << " ";
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 2234b2c5d56a3..39187c613f620 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1227,6 +1227,27 @@ getOpcodeOrIntrinsicID(const VPSingleDefRecipe *R) {
.Default([](auto *) { return std::nullopt; });
}
+/// If recipe \p R will lower to a GEP with a non-i8 source element type,
+/// return that source element type.
+static Type *getGEPSourceElementType(const VPSingleDefRecipe *R) {
+ // All VPInstructions that lower to GEPs must have the i8 source element
+ // type (as they are PtrAdds), so we omit it.
+ return TypeSwitch<const VPSingleDefRecipe *, Type *>(R)
+ .Case([](const VPReplicateRecipe *I) -> Type * {
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I->getUnderlyingValue()))
+ return GEP->getSourceElementType();
+ return nullptr;
+ })
+ .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
+ [](auto *I) { return I->getSourceElementType(); })
+ .Case<VPInstructionWithType>([](auto *I) {
+ return I->getOpcode() == Instruction::GetElementPtr
+ ? I->getSourceElementType()
+ : nullptr;
+ })
+ .Default([](auto *) { return nullptr; });
+}
+
/// Try to fold \p R using InstSimplifyFolder. Will succeed and return a
/// non-nullptr VPValue for a handled opcode or intrinsic ID if corresponding \p
/// Operands are foldable live-ins.
@@ -1279,8 +1300,7 @@ static VPIRValue *tryToFoldLiveIns(VPSingleDefRecipe &R,
Ops[1]);
case Instruction::GetElementPtr: {
auto &RFlags = cast<VPRecipeWithIRFlags>(R);
- auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr());
- return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0],
+ return Folder.FoldGEP(getGEPSourceElementType(&RFlags), Ops[0],
drop_begin(Ops), RFlags.getGEPNoWrapFlags());
}
case VPInstruction::PtrAdd:
@@ -2577,22 +2597,6 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
return Def == getEmptyKey() || Def == getTombstoneKey();
}
- /// If recipe \p R will lower to a GEP with a non-i8 source element type,
- /// return that source element type.
- static Type *getGEPSourceElementType(const VPSingleDefRecipe *R) {
- // All VPInstructions that lower to GEPs must have the i8 source element
- // type (as they are PtrAdds), so we omit it.
- return TypeSwitch<const VPSingleDefRecipe *, Type *>(R)
- .Case([](const VPReplicateRecipe *I) -> Type * {
- if (auto *GEP = dyn_cast<GetElementPtrInst>(I->getUnderlyingValue()))
- return GEP->getSourceElementType();
- return nullptr;
- })
- .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
- [](auto *I) { return I->getSourceElementType(); })
- .Default([](auto *) { return nullptr; });
- }
-
/// Returns true if recipe \p Def can be safely handed for CSE.
static bool canHandle(const VPSingleDefRecipe *Def) {
// We can extend the list of handled recipes in the future,
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll
index 2857fcd246cef..da5789714b852 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll
@@ -8,7 +8,7 @@ define void @diamond_phi(ptr %a) {
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<[[VP0:%[0-9]+]]>
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%a>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%a>, ir<%iv>
; CHECK-NEXT: EMIT ir<%c0> = icmp sle ir<%iv>, ir<0>
; CHECK-NEXT: Successor(s): bb2
; CHECK-EMPTY:
@@ -73,7 +73,7 @@ define void @mask_reuse(ptr %a) {
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<[[VP0:%[0-9]+]]>
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%a>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%a>, ir<%iv>
; CHECK-NEXT: EMIT ir<%c0> = icmp sle ir<%iv>, ir<0>
; CHECK-NEXT: EMIT ir<%add0> = add ir<%iv>, ir<0>
; CHECK-NEXT: Successor(s): bb1
@@ -159,7 +159,7 @@ define void @optimized_mask(ptr %a) {
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<[[VP0:%[0-9]+]]>
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%a>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%a>, ir<%iv>
; CHECK-NEXT: EMIT ir<%c0> = icmp sle ir<%iv>, ir<0>
; CHECK-NEXT: Successor(s): bb6
; CHECK-EMPTY:
@@ -285,7 +285,7 @@ define void @switch(ptr %a) {
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<[[VP0:%[0-9]+]]>
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%a>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%a>, ir<%iv>
; CHECK-NEXT: EMIT ir<%c0> = icmp sle ir<%iv>, ir<0>
; CHECK-NEXT: EMIT ir<%add0> = add ir<%iv>, ir<0>
; CHECK-NEXT: Successor(s): bb2
@@ -420,7 +420,7 @@ define void @diamond_phi2(ptr %a, i1 %c1, i1 %c2) {
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = logical-and ir<%c0>, ir<%c1>
; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = or vp<[[VP5]]>, vp<[[VP6]]>
; CHECK-NEXT: BLEND ir<%phi> = ir<%add2>/vp<[[VP5]]> ir<%add1>/vp<[[VP6]]>
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%a>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%a>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%phi>, ir<%gep>, vp<[[VP7]]>
; CHECK-NEXT: Successor(s): bb5
; CHECK-EMPTY:
@@ -521,7 +521,7 @@ define void @blend_masks(ptr noalias %p, i1 %c0, i1 %c1, i1 %c2, i1 %c3, i1 %c4)
; CHECK-NEXT: EMIT vp<[[VP15:%[0-9]+]]> = logical-and vp<[[VP9]]>, ir<%c4>
; CHECK-NEXT: EMIT vp<[[VP16:%[0-9]+]]> = or vp<[[VP15]]>, vp<[[VP14]]>
; CHECK-NEXT: BLEND ir<%phi> = ir<1>/vp<[[VP15]]> ir<0>/vp<[[VP14]]>
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%p>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%p>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%phi>, ir<%gep>, vp<[[VP16]]>
; CHECK-NEXT: Successor(s): bb8
; CHECK-EMPTY:
@@ -607,7 +607,7 @@ define void @blend_masks_triangle_phi(ptr noalias %p, i1 %c0, i1 %c1) {
; CHECK-NEXT: bb3:
; CHECK-NEXT: EMIT vp<[[VP8:%[0-9]+]]> = logical-and ir<%c0>, ir<%c1>
; CHECK-NEXT: BLEND ir<%phi> = ir<1>/vp<[[VP7]]> ir<0>/vp<[[VP8]]>
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%p>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%p>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%phi>, ir<%gep>
; CHECK-NEXT: EMIT ir<%iv.next> = add ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%ec> = icmp eq ir<%iv.next>, ir<128>
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/tail-folding.ll b/llvm/test/Transforms/LoopVectorize/VPlan/tail-folding.ll
index d02fd486ec609..741bfd4d90822 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/tail-folding.ll
@@ -27,7 +27,7 @@ define i32 @live_out(ptr noalias %p, i32 %n) {
; CHECK-NEXT: Successor(s): vector.body.split, vector.latch
; CHECK-EMPTY:
; CHECK-NEXT: vector.body.split:
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%p>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%p>, ir<%iv>
; CHECK-NEXT: EMIT-SCALAR ir<%x> = load ir<%gep>
; CHECK-NEXT: EMIT ir<%y> = add ir<%x>, ir<1>
; CHECK-NEXT: EMIT store ir<%y>, ir<%gep>
@@ -118,7 +118,7 @@ define i32 @conditional_live_out(ptr noalias %p, i32 %n, i1 %c) {
; CHECK-NEXT: Successor(s): if, latch
; CHECK-EMPTY:
; CHECK-NEXT: if:
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%p>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%p>, ir<%iv>
; CHECK-NEXT: EMIT-SCALAR ir<%x> = load ir<%gep>
; CHECK-NEXT: EMIT ir<%y> = add ir<%x>, ir<1>
; CHECK-NEXT: EMIT store ir<%y>, ir<%gep>
@@ -284,7 +284,7 @@ define i32 @reduction(ptr noalias %p, i32 %n) {
; CHECK-NEXT: Successor(s): vector.body.split, vector.latch
; CHECK-EMPTY:
; CHECK-NEXT: vector.body.split:
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%p>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%p>, ir<%iv>
; CHECK-NEXT: EMIT-SCALAR ir<%x> = load ir<%gep>
; CHECK-NEXT: EMIT ir<%rdx.next> = add ir<%rdx>, ir<%x>
; CHECK-NEXT: EMIT ir<%iv.next> = add ir<%iv>, ir<1>
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-printing-outer-loop.ll
index 6eff6d1df3523..a27d0d9ecd227 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-printing-outer-loop.ll
@@ -15,14 +15,14 @@ define void @foo(i64 %n) {
; CHECK-EMPTY:
; CHECK-NEXT: outer.header:
; CHECK-NEXT: EMIT-SCALAR ir<%outer.iv> = phi [ ir<%outer.iv.next>, outer.latch ], [ ir<0>, ir-bb<entry> ]
-; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr inbounds ir<@arr2>, ir<0>, ir<%outer.iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep.1> = getelementptr inbounds ptr ir<@arr2>, ir<0>, ir<%outer.iv>
; CHECK-NEXT: EMIT store ir<%outer.iv>, ir<%gep.1>
; CHECK-NEXT: EMIT ir<%add> = add nsw ir<%outer.iv>, ir<%n>
; CHECK-NEXT: Successor(s): inner
; CHECK-EMPTY:
; CHECK-NEXT: inner:
; CHECK-NEXT: EMIT-SCALAR ir<%inner.iv> = phi [ ir<%inner.iv.next>, inner ], [ ir<0>, outer.header ]
-; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr inbounds ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep.2> = getelementptr inbounds ptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
; CHECK-NEXT: EMIT ir<%inner.iv.next> = add nuw nsw ir<%inner.iv>, ir<1>
; CHECK-NEXT: EMIT ir<%inner.ec> = icmp eq ir<%inner.iv.next>, ir<8>
|
|
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesCurrently, WidenGEP, Replicate, and VPInstruction recipes can all hold a Instruction::GetElementPtr. Introduce a first-class "scalar GEP" VPInstructionWithType, similar to the existing "scalar cast", with the additional benefit of being able to query the source element type of the GEP directly without going to the underlying value. Planned follow-ups include unifying WidenGEP and Replicate GEPs with this VPInstruction GEP. Full diff: https://github.com/llvm/llvm-project/pull/193510.diff 8 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index f06b959700687..fd859fcefe7df 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -364,6 +364,14 @@ class VPBuilder {
new VPInstructionWithType(Opcode, Op, ResultTy, Flags, Metadata, DL));
}
+ VPInstruction *createScalarGEP(Type *SourceElementTy, ArrayRef<VPValue *> Ops,
+ Type *ResultTy, DebugLoc DL,
+ const VPIRFlags &Flags,
+ const VPIRMetadata &Metadata = {}) {
+ return tryInsertInstruction(new VPInstructionWithType(
+ SourceElementTy, Ops, ResultTy, Flags, Metadata, DL));
+ }
+
VPValue *createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy,
DebugLoc DL) {
if (ResultTy == SrcTy)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 934cca006e91c..005cae1c94552 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -556,6 +556,9 @@ class LLVM_ABI_FOR_TEST VPRecipeBase
/// Return true if the recipe is a scalar cast.
bool isScalarCast() const;
+ /// Return true if the recipe is a scalar getelementptr.
+ bool isScalarGEP() const;
+
/// Set the recipe's debug location to \p NewDL.
void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
@@ -1517,6 +1520,10 @@ class VPInstructionWithType : public VPInstruction {
/// Scalar result type produced by the recipe.
Type *ResultTy;
+ /// The source element type, which is present when the recipe has a
+ /// getelementptr opcode.
+ Type *SourceElementTy = nullptr;
+
public:
VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
Type *ResultTy, const VPIRFlags &Flags = {},
@@ -1526,10 +1533,20 @@ class VPInstructionWithType : public VPInstruction {
: VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
ResultTy(ResultTy) {}
+ /// Constructor for GEPs.
+ VPInstructionWithType(Type *SourceElementTy, ArrayRef<VPValue *> Operands,
+ Type *ResultTy, const VPIRFlags &Flags = {},
+ const VPIRMetadata &Metadata = {},
+ DebugLoc DL = DebugLoc::getUnknown(),
+ const Twine &Name = "")
+ : VPInstruction(Instruction::GetElementPtr, Operands, Flags, Metadata, DL,
+ Name),
+ ResultTy(ResultTy), SourceElementTy(SourceElementTy) {}
+
static inline bool classof(const VPRecipeBase *R) {
// VPInstructionWithType are VPInstructions with specific opcodes requiring
// type information.
- if (R->isScalarCast())
+ if (R->isScalarCast() || R->isScalarGEP())
return true;
auto *VPI = dyn_cast<VPInstruction>(R);
if (!VPI)
@@ -1567,6 +1584,11 @@ class VPInstructionWithType : public VPInstruction {
}
Type *getResultType() const { return ResultTy; }
+ Type *getSourceElementType() const {
+ assert(isScalarGEP() &&
+ "Source element type requested for non-getelementptr");
+ return SourceElementTy;
+ }
protected:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 9b052047dcc8b..6cbe36b44af58 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -267,6 +267,12 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
CI->getType(), CI->getDebugLoc(),
VPIRFlags(*CI), MD);
NewR->setUnderlyingValue(CI);
+ } else if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+ NewR = VPIRBuilder.createScalarGEP(
+ GEP->getSourceElementType(), VPOperands,
+ GEP->getPointerOperandType(), GEP->getDebugLoc(), VPIRFlags(*GEP),
+ MD);
+ NewR->setUnderlyingValue(GEP);
} else if (auto *LI = dyn_cast<LoadInst>(Inst)) {
NewR = VPIRBuilder.createScalarLoad(LI->getType(), VPOperands[0],
LI->getDebugLoc(), MD);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 25fe37124b017..573fa419b4bf1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -317,6 +317,11 @@ bool VPRecipeBase::isScalarCast() const {
return VPI && Instruction::isCast(VPI->getOpcode());
}
+bool VPRecipeBase::isScalarGEP() const {
+ auto *VPI = dyn_cast<VPInstruction>(this);
+ return VPI && VPI->getOpcode() == Instruction::GetElementPtr;
+}
+
void VPIRFlags::intersectFlags(const VPIRFlags &Other) {
assert(OpType == Other.OpType && "OpType must match");
switch (OpType) {
@@ -1291,7 +1296,7 @@ bool VPInstruction::isSingleScalar() const {
case VPInstruction::VScale:
return true;
default:
- return isScalarCast();
+ return isScalarCast() || isScalarGEP();
}
}
@@ -1583,6 +1588,16 @@ void VPInstructionWithType::execute(VPTransformState &State) {
State.set(this, Cast, VPLane(0));
return;
}
+ if (isScalarGEP()) {
+ Value *Ptr = State.get(getOperand(0), true);
+ auto IdxList =
+ to_vector(map_range(drop_begin(operands()),
+ [&](VPValue *Op) { return State.get(Op, true); }));
+ Value *GEP = State.Builder.CreateGEP(ResultTy, Ptr, IdxList, "",
+ getGEPNoWrapFlags());
+ State.set(this, GEP, true);
+ return;
+ }
switch (getOpcode()) {
case VPInstruction::StepVector: {
Value *StepVector =
@@ -1623,6 +1638,12 @@ void VPInstructionWithType::printRecipe(raw_ostream &O, const Twine &Indent,
O << "load ";
printOperands(O, SlotTracker);
break;
+ case Instruction::GetElementPtr:
+ O << "getelementptr";
+ printFlags(O);
+ O << *ResultTy << " ";
+ printOperands(O, SlotTracker);
+ break;
default:
assert(Instruction::isCast(getOpcode()) && "unhandled opcode");
O << Instruction::getOpcodeName(getOpcode()) << " ";
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 2234b2c5d56a3..39187c613f620 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1227,6 +1227,27 @@ getOpcodeOrIntrinsicID(const VPSingleDefRecipe *R) {
.Default([](auto *) { return std::nullopt; });
}
+/// If recipe \p R will lower to a GEP with a non-i8 source element type,
+/// return that source element type.
+static Type *getGEPSourceElementType(const VPSingleDefRecipe *R) {
+ // All VPInstructions that lower to GEPs must have the i8 source element
+ // type (as they are PtrAdds), so we omit it.
+ return TypeSwitch<const VPSingleDefRecipe *, Type *>(R)
+ .Case([](const VPReplicateRecipe *I) -> Type * {
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I->getUnderlyingValue()))
+ return GEP->getSourceElementType();
+ return nullptr;
+ })
+ .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
+ [](auto *I) { return I->getSourceElementType(); })
+ .Case<VPInstructionWithType>([](auto *I) {
+ return I->getOpcode() == Instruction::GetElementPtr
+ ? I->getSourceElementType()
+ : nullptr;
+ })
+ .Default([](auto *) { return nullptr; });
+}
+
/// Try to fold \p R using InstSimplifyFolder. Will succeed and return a
/// non-nullptr VPValue for a handled opcode or intrinsic ID if corresponding \p
/// Operands are foldable live-ins.
@@ -1279,8 +1300,7 @@ static VPIRValue *tryToFoldLiveIns(VPSingleDefRecipe &R,
Ops[1]);
case Instruction::GetElementPtr: {
auto &RFlags = cast<VPRecipeWithIRFlags>(R);
- auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr());
- return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0],
+ return Folder.FoldGEP(getGEPSourceElementType(&RFlags), Ops[0],
drop_begin(Ops), RFlags.getGEPNoWrapFlags());
}
case VPInstruction::PtrAdd:
@@ -2577,22 +2597,6 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
return Def == getEmptyKey() || Def == getTombstoneKey();
}
- /// If recipe \p R will lower to a GEP with a non-i8 source element type,
- /// return that source element type.
- static Type *getGEPSourceElementType(const VPSingleDefRecipe *R) {
- // All VPInstructions that lower to GEPs must have the i8 source element
- // type (as they are PtrAdds), so we omit it.
- return TypeSwitch<const VPSingleDefRecipe *, Type *>(R)
- .Case([](const VPReplicateRecipe *I) -> Type * {
- if (auto *GEP = dyn_cast<GetElementPtrInst>(I->getUnderlyingValue()))
- return GEP->getSourceElementType();
- return nullptr;
- })
- .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
- [](auto *I) { return I->getSourceElementType(); })
- .Default([](auto *) { return nullptr; });
- }
-
/// Returns true if recipe \p Def can be safely handed for CSE.
static bool canHandle(const VPSingleDefRecipe *Def) {
// We can extend the list of handled recipes in the future,
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll
index 2857fcd246cef..da5789714b852 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll
@@ -8,7 +8,7 @@ define void @diamond_phi(ptr %a) {
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<[[VP0:%[0-9]+]]>
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%a>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%a>, ir<%iv>
; CHECK-NEXT: EMIT ir<%c0> = icmp sle ir<%iv>, ir<0>
; CHECK-NEXT: Successor(s): bb2
; CHECK-EMPTY:
@@ -73,7 +73,7 @@ define void @mask_reuse(ptr %a) {
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<[[VP0:%[0-9]+]]>
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%a>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%a>, ir<%iv>
; CHECK-NEXT: EMIT ir<%c0> = icmp sle ir<%iv>, ir<0>
; CHECK-NEXT: EMIT ir<%add0> = add ir<%iv>, ir<0>
; CHECK-NEXT: Successor(s): bb1
@@ -159,7 +159,7 @@ define void @optimized_mask(ptr %a) {
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<[[VP0:%[0-9]+]]>
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%a>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%a>, ir<%iv>
; CHECK-NEXT: EMIT ir<%c0> = icmp sle ir<%iv>, ir<0>
; CHECK-NEXT: Successor(s): bb6
; CHECK-EMPTY:
@@ -285,7 +285,7 @@ define void @switch(ptr %a) {
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<[[VP0:%[0-9]+]]>
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%a>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%a>, ir<%iv>
; CHECK-NEXT: EMIT ir<%c0> = icmp sle ir<%iv>, ir<0>
; CHECK-NEXT: EMIT ir<%add0> = add ir<%iv>, ir<0>
; CHECK-NEXT: Successor(s): bb2
@@ -420,7 +420,7 @@ define void @diamond_phi2(ptr %a, i1 %c1, i1 %c2) {
; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = logical-and ir<%c0>, ir<%c1>
; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = or vp<[[VP5]]>, vp<[[VP6]]>
; CHECK-NEXT: BLEND ir<%phi> = ir<%add2>/vp<[[VP5]]> ir<%add1>/vp<[[VP6]]>
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%a>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%a>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%phi>, ir<%gep>, vp<[[VP7]]>
; CHECK-NEXT: Successor(s): bb5
; CHECK-EMPTY:
@@ -521,7 +521,7 @@ define void @blend_masks(ptr noalias %p, i1 %c0, i1 %c1, i1 %c2, i1 %c3, i1 %c4)
; CHECK-NEXT: EMIT vp<[[VP15:%[0-9]+]]> = logical-and vp<[[VP9]]>, ir<%c4>
; CHECK-NEXT: EMIT vp<[[VP16:%[0-9]+]]> = or vp<[[VP15]]>, vp<[[VP14]]>
; CHECK-NEXT: BLEND ir<%phi> = ir<1>/vp<[[VP15]]> ir<0>/vp<[[VP14]]>
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%p>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%p>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%phi>, ir<%gep>, vp<[[VP16]]>
; CHECK-NEXT: Successor(s): bb8
; CHECK-EMPTY:
@@ -607,7 +607,7 @@ define void @blend_masks_triangle_phi(ptr noalias %p, i1 %c0, i1 %c1) {
; CHECK-NEXT: bb3:
; CHECK-NEXT: EMIT vp<[[VP8:%[0-9]+]]> = logical-and ir<%c0>, ir<%c1>
; CHECK-NEXT: BLEND ir<%phi> = ir<1>/vp<[[VP7]]> ir<0>/vp<[[VP8]]>
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%p>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%p>, ir<%iv>
; CHECK-NEXT: EMIT store ir<%phi>, ir<%gep>
; CHECK-NEXT: EMIT ir<%iv.next> = add ir<%iv>, ir<1>
; CHECK-NEXT: EMIT ir<%ec> = icmp eq ir<%iv.next>, ir<128>
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/tail-folding.ll b/llvm/test/Transforms/LoopVectorize/VPlan/tail-folding.ll
index d02fd486ec609..741bfd4d90822 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/tail-folding.ll
@@ -27,7 +27,7 @@ define i32 @live_out(ptr noalias %p, i32 %n) {
; CHECK-NEXT: Successor(s): vector.body.split, vector.latch
; CHECK-EMPTY:
; CHECK-NEXT: vector.body.split:
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%p>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%p>, ir<%iv>
; CHECK-NEXT: EMIT-SCALAR ir<%x> = load ir<%gep>
; CHECK-NEXT: EMIT ir<%y> = add ir<%x>, ir<1>
; CHECK-NEXT: EMIT store ir<%y>, ir<%gep>
@@ -118,7 +118,7 @@ define i32 @conditional_live_out(ptr noalias %p, i32 %n, i1 %c) {
; CHECK-NEXT: Successor(s): if, latch
; CHECK-EMPTY:
; CHECK-NEXT: if:
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%p>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%p>, ir<%iv>
; CHECK-NEXT: EMIT-SCALAR ir<%x> = load ir<%gep>
; CHECK-NEXT: EMIT ir<%y> = add ir<%x>, ir<1>
; CHECK-NEXT: EMIT store ir<%y>, ir<%gep>
@@ -284,7 +284,7 @@ define i32 @reduction(ptr noalias %p, i32 %n) {
; CHECK-NEXT: Successor(s): vector.body.split, vector.latch
; CHECK-EMPTY:
; CHECK-NEXT: vector.body.split:
-; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%p>, ir<%iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep> = getelementptr ptr ir<%p>, ir<%iv>
; CHECK-NEXT: EMIT-SCALAR ir<%x> = load ir<%gep>
; CHECK-NEXT: EMIT ir<%rdx.next> = add ir<%rdx>, ir<%x>
; CHECK-NEXT: EMIT ir<%iv.next> = add ir<%iv>, ir<1>
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-printing-outer-loop.ll
index 6eff6d1df3523..a27d0d9ecd227 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-printing-outer-loop.ll
@@ -15,14 +15,14 @@ define void @foo(i64 %n) {
; CHECK-EMPTY:
; CHECK-NEXT: outer.header:
; CHECK-NEXT: EMIT-SCALAR ir<%outer.iv> = phi [ ir<%outer.iv.next>, outer.latch ], [ ir<0>, ir-bb<entry> ]
-; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr inbounds ir<@arr2>, ir<0>, ir<%outer.iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep.1> = getelementptr inbounds ptr ir<@arr2>, ir<0>, ir<%outer.iv>
; CHECK-NEXT: EMIT store ir<%outer.iv>, ir<%gep.1>
; CHECK-NEXT: EMIT ir<%add> = add nsw ir<%outer.iv>, ir<%n>
; CHECK-NEXT: Successor(s): inner
; CHECK-EMPTY:
; CHECK-NEXT: inner:
; CHECK-NEXT: EMIT-SCALAR ir<%inner.iv> = phi [ ir<%inner.iv.next>, inner ], [ ir<0>, outer.header ]
-; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr inbounds ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
+; CHECK-NEXT: EMIT-SCALAR ir<%gep.2> = getelementptr inbounds ptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
; CHECK-NEXT: EMIT ir<%inner.iv.next> = add nuw nsw ir<%inner.iv>, ir<1>
; CHECK-NEXT: EMIT ir<%inner.ec> = icmp eq ir<%inner.iv.next>, ir<8>
|
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
🪟 Windows x64 Test Results
✅ The build succeeded and all tests passed. |
fcbb92c to
b79ff4a
Compare
|
Gentle ping. |
b79ff4a to
a072b3d
Compare
205ebeb to
bbb6169
Compare
f3d1b67 to
a585365
Compare
|
Gentle ping. |
48f5b9f to
9e0e2a2
Compare
|
Gentle ping. |
Currently, WidenGEP, Replicate, and VPInstruction recipes can all hold a Instruction::GetElementPtr. Introduce a first-class "scalar GEP" VPInstructionWithType, similar to the existing "scalar cast", with the additional benefit of being able to query the source element type of the GEP directly without going to the underlying value. Planned follow-ups include unifying WidenGEP and Replicate GEPs with this VPInstruction GEP.
Introduce a first-class VPGEPInstruction, similar to the existing "scalar cast", with the additional benefit of being able to query the source element type of the GEP directly without going to the underlying value. Planned follow-ups include unifying WidenGEP and Replicate GEPs with this.