[VPlan] Split VPWidenMemoryInstructionRecipe (NFCI).#87411
Conversation
This patch introduces a new VPWidenMemoryRecipe abstract base class a and distinct sub-classes to model loads and stores. This is a first step in an effort to simplify and modularize code generation for widened loads and stores and enable adding further more specialized memory recipes. Note that this adjusts the order of the operands for VPWidenStoreRecipe to match the order of operands of stores in IR and other recipes (like VPReplicateRecipe).
| VPValue *getAddr() const { | ||
| return getOperand(0); // Address is the 1st, mandatory operand. | ||
| } | ||
| virtual VPValue *getAddr() const = 0; |
There was a problem hiding this comment.
Do you really need to make it virtual? I think you can just remove it from the base class
There was a problem hiding this comment.
There are callers that need to get the address of any WidenMemoryRecipe (e.g. VPlanTransforms::dropPoisonGeneratingRecipes), kept virtual for now.
There was a problem hiding this comment.
Better to use static isa/dyn_cast sequences where possible instead of virtual functions
There was a problem hiding this comment.
Fair enough, replaced with an implementation using switch and recipe ID.
| Instruction &getIngredient() const { return Ingredient; } | ||
| }; | ||
|
|
||
| struct VPWidenLoadRecipe : public VPWidenMemoryRecipe, public VPValue { |
| Instruction &getIngredient() const { return Ingredient; } | ||
| }; | ||
|
|
||
| struct VPWidenStoreRecipe : public VPWidenMemoryRecipe { |
|
@llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesThis patch introduces a new VPWidenMemoryRecipe abstract base class a and distinct sub-classes to model loads and stores. This is a first step in an effort to simplify and modularize code generation for widened loads and stores and enable adding further more specialized memory recipes. Note that this adjusts the order of the operands for VPWidenStoreRecipe to match the order of operands of stores in IR and other recipes (like VPReplicateRecipe). Patch is 56.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/87411.diff 22 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 49bacb5ae6cc4e..d6a3365743355f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8095,7 +8095,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB) {
BlockMaskCache[BB] = BlockMask;
}
-VPWidenMemoryInstructionRecipe *
+VPWidenMemoryRecipe *
VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
VFRange &Range) {
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
@@ -8140,12 +8140,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
Ptr = VectorPtr;
}
if (LoadInst *Load = dyn_cast<LoadInst>(I))
- return new VPWidenMemoryInstructionRecipe(*Load, Ptr, Mask, Consecutive,
- Reverse, I->getDebugLoc());
+ return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
+ I->getDebugLoc());
StoreInst *Store = cast<StoreInst>(I);
- return new VPWidenMemoryInstructionRecipe(
- *Store, Ptr, Operands[0], Mask, Consecutive, Reverse, I->getDebugLoc());
+ return new VPWidenStoreRecipe(*Store, Operands[0], Ptr, Mask, Consecutive,
+ Reverse, I->getDebugLoc());
}
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
@@ -8780,13 +8780,12 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// for this VPlan, replace the Recipes widening its memory instructions with a
// single VPInterleaveRecipe at its insertion point.
for (const auto *IG : InterleaveGroups) {
- auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
- RecipeBuilder.getRecipe(IG->getInsertPos()));
+ auto *Recipe =
+ cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getInsertPos()));
SmallVector<VPValue *, 4> StoredValues;
for (unsigned i = 0; i < IG->getFactor(); ++i)
if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember(i))) {
- auto *StoreR =
- cast<VPWidenMemoryInstructionRecipe>(RecipeBuilder.getRecipe(SI));
+ auto *StoreR = cast<VPWidenStoreRecipe>(RecipeBuilder.getRecipe(SI));
StoredValues.push_back(StoreR->getStoredValue());
}
@@ -9464,22 +9463,15 @@ static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder,
return Call;
}
-void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
- VPValue *StoredValue = isStore() ? getStoredValue() : nullptr;
-
+void VPWidenLoadRecipe::execute(VPTransformState &State) {
// Attempt to issue a wide load.
- LoadInst *LI = dyn_cast<LoadInst>(&Ingredient);
- StoreInst *SI = dyn_cast<StoreInst>(&Ingredient);
-
- assert((LI || SI) && "Invalid Load/Store instruction");
- assert((!SI || StoredValue) && "No stored value provided for widened store");
- assert((!LI || !StoredValue) && "Stored value provided for widened load");
+ LoadInst *LI = cast<LoadInst>(&Ingredient);
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
const Align Alignment = getLoadStoreAlignment(&Ingredient);
- bool CreateGatherScatter = !isConsecutive();
+ bool CreateGather = !isConsecutive();
auto &Builder = State.Builder;
InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
@@ -9495,56 +9487,6 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
}
}
- // Handle Stores:
- if (SI) {
- State.setDebugLocFrom(getDebugLoc());
-
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Instruction *NewSI = nullptr;
- Value *StoredVal = State.get(StoredValue, Part);
- // TODO: split this into several classes for better design.
- if (State.EVL) {
- assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
- "explicit vector length.");
- assert(cast<VPInstruction>(State.EVL)->getOpcode() ==
- VPInstruction::ExplicitVectorLength &&
- "EVL must be VPInstruction::ExplicitVectorLength.");
- Value *EVL = State.get(State.EVL, VPIteration(0, 0));
- // If EVL is not nullptr, then EVL must be a valid value set during plan
- // creation, possibly default value = whole vector register length. EVL
- // is created only if TTI prefers predicated vectorization, thus if EVL
- // is not nullptr it also implies preference for predicated
- // vectorization.
- // FIXME: Support reverse store after vp_reverse is added.
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
- NewSI = lowerStoreUsingVectorIntrinsics(
- Builder, State.get(getAddr(), Part, !CreateGatherScatter),
- StoredVal, CreateGatherScatter, MaskPart, EVL, Alignment);
- } else if (CreateGatherScatter) {
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
- Value *VectorGep = State.get(getAddr(), Part);
- NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
- MaskPart);
- } else {
- if (isReverse()) {
- // If we store to reverse consecutive memory locations, then we need
- // to reverse the order of elements in the stored value.
- StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
- // We don't want to update the value in the map as it might be used in
- // another expression. So don't call resetVectorValue(StoredVal).
- }
- auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
- if (isMaskRequired)
- NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
- BlockInMaskParts[Part]);
- else
- NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
- }
- State.addMetadata(NewSI, SI);
- }
- return;
- }
-
// Handle loads.
assert(LI && "Must have a load instruction");
State.setDebugLocFrom(getDebugLoc());
@@ -9566,9 +9508,9 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// FIXME: Support reverse loading after vp_reverse is added.
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
NewLI = lowerLoadUsingVectorIntrinsics(
- Builder, DataTy, State.get(getAddr(), Part, !CreateGatherScatter),
- CreateGatherScatter, MaskPart, EVL, Alignment);
- } else if (CreateGatherScatter) {
+ Builder, DataTy, State.get(getAddr(), Part, !CreateGather),
+ CreateGather, MaskPart, EVL, Alignment);
+ } else if (CreateGather) {
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
Value *VectorGep = State.get(getAddr(), Part);
NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart,
@@ -9590,7 +9532,75 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
}
- State.set(getVPSingleValue(), NewLI, Part);
+ State.set(this, NewLI, Part);
+ }
+}
+
+void VPWidenStoreRecipe::execute(VPTransformState &State) {
+ VPValue *StoredValue = getStoredValue();
+
+ const Align Alignment = getLoadStoreAlignment(&Ingredient);
+ bool CreateScatter = !isConsecutive();
+
+ StoreInst *SI = cast<StoreInst>(&Ingredient);
+ auto &Builder = State.Builder;
+ InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
+ bool isMaskRequired = getMask();
+ if (isMaskRequired) {
+ // Mask reversal is only needed for non-all-one (null) masks, as reverse of
+ // a null all-one mask is a null mask.
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *Mask = State.get(getMask(), Part);
+ if (isReverse())
+ Mask = Builder.CreateVectorReverse(Mask, "reverse");
+ BlockInMaskParts[Part] = Mask;
+ }
+ }
+
+ State.setDebugLocFrom(getDebugLoc());
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Instruction *NewSI = nullptr;
+ Value *StoredVal = State.get(StoredValue, Part);
+ // TODO: split this into several classes for better design.
+ if (State.EVL) {
+ assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
+ "explicit vector length.");
+ assert(cast<VPInstruction>(State.EVL)->getOpcode() ==
+ VPInstruction::ExplicitVectorLength &&
+ "EVL must be VPInstruction::ExplicitVectorLength.");
+ Value *EVL = State.get(State.EVL, VPIteration(0, 0));
+ // If EVL is not nullptr, then EVL must be a valid value set during plan
+ // creation, possibly default value = whole vector register length. EVL
+ // is created only if TTI prefers predicated vectorization, thus if EVL
+ // is not nullptr it also implies preference for predicated
+ // vectorization.
+ // FIXME: Support reverse store after vp_reverse is added.
+ Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+ NewSI = lowerStoreUsingVectorIntrinsics(
+ Builder, State.get(getAddr(), Part, !CreateScatter), StoredVal,
+ CreateScatter, MaskPart, EVL, Alignment);
+ } else if (CreateScatter) {
+ Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+ Value *VectorGep = State.get(getAddr(), Part);
+ NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
+ MaskPart);
+ } else {
+ if (isReverse()) {
+ // If we store to reverse consecutive memory locations, then we need
+ // to reverse the order of elements in the stored value.
+ StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
+ // We don't want to update the value in the map as it might be used in
+ // another expression. So don't call resetVectorValue(StoredVal).
+ }
+ auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
+ if (isMaskRequired)
+ NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
+ BlockInMaskParts[Part]);
+ else
+ NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
+ }
+ State.addMetadata(NewSI, SI);
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 605b47fa0a46b8..b4c7ab02f928f0 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -69,9 +69,9 @@ class VPRecipeBuilder {
/// Check if the load or store instruction \p I should widened for \p
/// Range.Start and potentially masked. Such instructions are handled by a
/// recipe that takes an additional VPInstruction for the mask.
- VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I,
- ArrayRef<VPValue *> Operands,
- VFRange &Range);
+ VPWidenMemoryRecipe *tryToWidenMemory(Instruction *I,
+ ArrayRef<VPValue *> Operands,
+ VFRange &Range);
/// Check if an induction recipe should be constructed for \p Phi. If so build
/// and return it. If not, return null.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 77577b516ae274..3a0800bbb3d45c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -875,7 +875,8 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
return true;
case VPRecipeBase::VPInterleaveSC:
case VPRecipeBase::VPBranchOnMaskSC:
- case VPRecipeBase::VPWidenMemoryInstructionSC:
+ case VPRecipeBase::VPWidenLoadSC:
+ case VPRecipeBase::VPWidenStoreSC:
// TODO: Widened stores don't define a value, but widened loads do. Split
// the recipes to be able to make widened loads VPSingleDefRecipes.
return false;
@@ -2279,7 +2280,8 @@ class VPPredInstPHIRecipe : public VPSingleDefRecipe {
/// - For store: Address, stored value, optional mask
/// TODO: We currently execute only per-part unless a specific instance is
/// provided.
-class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
+class VPWidenMemoryRecipe : public VPRecipeBase {
+protected:
Instruction &Ingredient;
// Whether the loaded-from / stored-to addresses are consecutive.
@@ -2294,47 +2296,40 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
addOperand(Mask);
}
- bool isMasked() const {
- return isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
- }
-
public:
- VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
- bool Consecutive, bool Reverse, DebugLoc DL)
- : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}, DL),
- Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) {
+ VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
+ std::initializer_list<VPValue *> Operands,
+ bool Consecutive, bool Reverse, DebugLoc DL)
+ : VPRecipeBase(SC, Operands, DL), Ingredient(I), Consecutive(Consecutive),
+ Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
- new VPValue(this, &Load);
- setMask(Mask);
}
- VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
- VPValue *StoredValue, VPValue *Mask,
- bool Consecutive, bool Reverse, DebugLoc DL)
- : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr, StoredValue},
- DL),
- Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) {
- assert((Consecutive || !Reverse) && "Reverse implies consecutive");
- setMask(Mask);
- }
+ VPRecipeBase *clone() override = 0;
- VPRecipeBase *clone() override {
- if (isStore())
- return new VPWidenMemoryInstructionRecipe(
- cast<StoreInst>(Ingredient), getAddr(), getStoredValue(), getMask(),
- Consecutive, Reverse, getDebugLoc());
+ static inline bool classof(const VPRecipeBase *R) {
+ return R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenLoadSC;
+ }
- return new VPWidenMemoryInstructionRecipe(cast<LoadInst>(Ingredient),
- getAddr(), getMask(), Consecutive,
- Reverse, getDebugLoc());
+ static inline bool classof(const VPUser *U) {
+ auto *R = dyn_cast<VPRecipeBase>(U);
+ return R && classof(R);
}
- VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC)
+ /// Returns true if the recipe is masked.
+ virtual bool isMasked() const = 0;
/// Return the address accessed by this recipe.
- VPValue *getAddr() const {
- return getOperand(0); // Address is the 1st, mandatory operand.
- }
+ virtual VPValue *getAddr() const = 0;
+
+
+ // Return whether the loaded-from / stored-to addresses are consecutive.
+ bool isConsecutive() const { return Consecutive; }
+
+ // Return whether the consecutive loaded/stored addresses are in reverse
+ // order.
+ bool isReverse() const { return Reverse; }
/// Return the mask used by this recipe. Note that a full mask is represented
/// by a nullptr.
@@ -2343,21 +2338,37 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
}
- /// Returns true if this recipe is a store.
- bool isStore() const { return isa<StoreInst>(Ingredient); }
+ /// Generate the wide load/store.
+ void execute(VPTransformState &State) override = 0;
+
+ Instruction &getIngredient() const { return Ingredient; }
+};
- /// Return the address accessed by this recipe.
- VPValue *getStoredValue() const {
- assert(isStore() && "Stored value only available for store instructions");
- return getOperand(1); // Stored value is the 2nd, mandatory operand.
+struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
+ VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
+ bool Consecutive, bool Reverse, DebugLoc DL)
+ : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
+ Reverse, DL),
+ VPValue(this, &Load) {
+ assert((Consecutive || !Reverse) && "Reverse implies consecutive");
+ setMask(Mask);
}
- // Return whether the loaded-from / stored-to addresses are consecutive.
- bool isConsecutive() const { return Consecutive; }
+ VPRecipeBase *clone() override {
+ return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
+ getMask(), Consecutive, Reverse,
+ getDebugLoc());
+ }
- // Return whether the consecutive loaded/stored addresses are in reverse
- // order.
- bool isReverse() const { return Reverse; }
+ VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
+
+ /// Returns true if the recipe is masked.
+ bool isMasked() const override { return getNumOperands() == 2; }
+
+ /// Return the address accessed by this recipe.
+ VPValue *getAddr() const override {
+ return getOperand(0); // Address is the 1st, mandatory operand.
+ }
/// Generate the wide load/store.
void execute(VPTransformState &State) override;
@@ -2376,13 +2387,56 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
// Widened, consecutive memory operations only demand the first lane of
// their address, unless the same operand is also stored. That latter can
// happen with opaque pointers.
- return Op == getAddr() && isConsecutive() &&
- (!isStore() || Op != getStoredValue());
+ return Op == getAddr() && isConsecutive();
}
-
- Instruction &getIngredient() const { return Ingredient; }
};
+struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
+ VPWidenStoreRecipe(StoreInst &Store, VPValue *StoredVal, VPValue *Addr,
+ VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
+ : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {StoredVal, Addr},
+ Consecutive, Reverse, DL) {
+ assert((Consecutive || !Reverse) && "Reverse implies consecutive");
+ setMask(Mask);
+ }
+
+ VPRecipeBase *clone() override {
+ return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getStoredValue(),
+ getAddr(), getMask(), Consecutive, Reverse,
+ getDebugLoc());
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
+
+ /// Returns true if the recipe is masked.
+ bool isMasked() const override { return getNumOperands() == 3; }
+
+ /// Return the address accessed by this recipe.
+ VPValue *getAddr() const override { return getOperand(1); }
+
+ /// Return the address accessed by this recipe.
+ VPValue *getStoredValue() const { return getOperand(0); }
+
+ /// Generate the wide load/store.
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+
+ // Widened, consecutive memory operations only demand the first lane of
+ // their address, unless the same operand is also stored. That latter can
+ // happen with opaque pointers.
+ return Op == getAddr() && isConsecutive() && Op != getStoredValue();
+ }
+};
/// Recipe to expand a SCEV expression.
class VPExpandSCEVRecipe : public VPSingleDefRecipe {
const SCEV *Expr;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index c8ae2ee5a30fe5..130fb04f586e75 100644
--- a/llvm/lib/T...
[truncated]
|
|
@llvm/pr-subscribers-backend-risc-v Author: Florian Hahn (fhahn) ChangesThis patch introduces a new VPWidenMemoryRecipe abstract base class a and distinct sub-classes to model loads and stores. This is a first step in an effort to simplify and modularize code generation for widened loads and stores and enable adding further more specialized memory recipes. Note that this adjusts the order of the operands for VPWidenStoreRecipe to match the order of operands of stores in IR and other recipes (like VPReplicateRecipe). Patch is 56.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/87411.diff 22 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 49bacb5ae6cc4e..d6a3365743355f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8095,7 +8095,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB) {
BlockMaskCache[BB] = BlockMask;
}
-VPWidenMemoryInstructionRecipe *
+VPWidenMemoryRecipe *
VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
VFRange &Range) {
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
@@ -8140,12 +8140,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
Ptr = VectorPtr;
}
if (LoadInst *Load = dyn_cast<LoadInst>(I))
- return new VPWidenMemoryInstructionRecipe(*Load, Ptr, Mask, Consecutive,
- Reverse, I->getDebugLoc());
+ return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
+ I->getDebugLoc());
StoreInst *Store = cast<StoreInst>(I);
- return new VPWidenMemoryInstructionRecipe(
- *Store, Ptr, Operands[0], Mask, Consecutive, Reverse, I->getDebugLoc());
+ return new VPWidenStoreRecipe(*Store, Operands[0], Ptr, Mask, Consecutive,
+ Reverse, I->getDebugLoc());
}
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
@@ -8780,13 +8780,12 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// for this VPlan, replace the Recipes widening its memory instructions with a
// single VPInterleaveRecipe at its insertion point.
for (const auto *IG : InterleaveGroups) {
- auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
- RecipeBuilder.getRecipe(IG->getInsertPos()));
+ auto *Recipe =
+ cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getInsertPos()));
SmallVector<VPValue *, 4> StoredValues;
for (unsigned i = 0; i < IG->getFactor(); ++i)
if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember(i))) {
- auto *StoreR =
- cast<VPWidenMemoryInstructionRecipe>(RecipeBuilder.getRecipe(SI));
+ auto *StoreR = cast<VPWidenStoreRecipe>(RecipeBuilder.getRecipe(SI));
StoredValues.push_back(StoreR->getStoredValue());
}
@@ -9464,22 +9463,15 @@ static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder,
return Call;
}
-void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
- VPValue *StoredValue = isStore() ? getStoredValue() : nullptr;
-
+void VPWidenLoadRecipe::execute(VPTransformState &State) {
// Attempt to issue a wide load.
- LoadInst *LI = dyn_cast<LoadInst>(&Ingredient);
- StoreInst *SI = dyn_cast<StoreInst>(&Ingredient);
-
- assert((LI || SI) && "Invalid Load/Store instruction");
- assert((!SI || StoredValue) && "No stored value provided for widened store");
- assert((!LI || !StoredValue) && "Stored value provided for widened load");
+ LoadInst *LI = cast<LoadInst>(&Ingredient);
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
const Align Alignment = getLoadStoreAlignment(&Ingredient);
- bool CreateGatherScatter = !isConsecutive();
+ bool CreateGather = !isConsecutive();
auto &Builder = State.Builder;
InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
@@ -9495,56 +9487,6 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
}
}
- // Handle Stores:
- if (SI) {
- State.setDebugLocFrom(getDebugLoc());
-
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Instruction *NewSI = nullptr;
- Value *StoredVal = State.get(StoredValue, Part);
- // TODO: split this into several classes for better design.
- if (State.EVL) {
- assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
- "explicit vector length.");
- assert(cast<VPInstruction>(State.EVL)->getOpcode() ==
- VPInstruction::ExplicitVectorLength &&
- "EVL must be VPInstruction::ExplicitVectorLength.");
- Value *EVL = State.get(State.EVL, VPIteration(0, 0));
- // If EVL is not nullptr, then EVL must be a valid value set during plan
- // creation, possibly default value = whole vector register length. EVL
- // is created only if TTI prefers predicated vectorization, thus if EVL
- // is not nullptr it also implies preference for predicated
- // vectorization.
- // FIXME: Support reverse store after vp_reverse is added.
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
- NewSI = lowerStoreUsingVectorIntrinsics(
- Builder, State.get(getAddr(), Part, !CreateGatherScatter),
- StoredVal, CreateGatherScatter, MaskPart, EVL, Alignment);
- } else if (CreateGatherScatter) {
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
- Value *VectorGep = State.get(getAddr(), Part);
- NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
- MaskPart);
- } else {
- if (isReverse()) {
- // If we store to reverse consecutive memory locations, then we need
- // to reverse the order of elements in the stored value.
- StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
- // We don't want to update the value in the map as it might be used in
- // another expression. So don't call resetVectorValue(StoredVal).
- }
- auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
- if (isMaskRequired)
- NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
- BlockInMaskParts[Part]);
- else
- NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
- }
- State.addMetadata(NewSI, SI);
- }
- return;
- }
-
// Handle loads.
assert(LI && "Must have a load instruction");
State.setDebugLocFrom(getDebugLoc());
@@ -9566,9 +9508,9 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// FIXME: Support reverse loading after vp_reverse is added.
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
NewLI = lowerLoadUsingVectorIntrinsics(
- Builder, DataTy, State.get(getAddr(), Part, !CreateGatherScatter),
- CreateGatherScatter, MaskPart, EVL, Alignment);
- } else if (CreateGatherScatter) {
+ Builder, DataTy, State.get(getAddr(), Part, !CreateGather),
+ CreateGather, MaskPart, EVL, Alignment);
+ } else if (CreateGather) {
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
Value *VectorGep = State.get(getAddr(), Part);
NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart,
@@ -9590,7 +9532,75 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
}
- State.set(getVPSingleValue(), NewLI, Part);
+ State.set(this, NewLI, Part);
+ }
+}
+
+void VPWidenStoreRecipe::execute(VPTransformState &State) {
+ VPValue *StoredValue = getStoredValue();
+
+ const Align Alignment = getLoadStoreAlignment(&Ingredient);
+ bool CreateScatter = !isConsecutive();
+
+ StoreInst *SI = cast<StoreInst>(&Ingredient);
+ auto &Builder = State.Builder;
+ InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
+ bool isMaskRequired = getMask();
+ if (isMaskRequired) {
+ // Mask reversal is only needed for non-all-one (null) masks, as reverse of
+ // a null all-one mask is a null mask.
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *Mask = State.get(getMask(), Part);
+ if (isReverse())
+ Mask = Builder.CreateVectorReverse(Mask, "reverse");
+ BlockInMaskParts[Part] = Mask;
+ }
+ }
+
+ State.setDebugLocFrom(getDebugLoc());
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Instruction *NewSI = nullptr;
+ Value *StoredVal = State.get(StoredValue, Part);
+ // TODO: split this into several classes for better design.
+ if (State.EVL) {
+ assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
+ "explicit vector length.");
+ assert(cast<VPInstruction>(State.EVL)->getOpcode() ==
+ VPInstruction::ExplicitVectorLength &&
+ "EVL must be VPInstruction::ExplicitVectorLength.");
+ Value *EVL = State.get(State.EVL, VPIteration(0, 0));
+ // If EVL is not nullptr, then EVL must be a valid value set during plan
+ // creation, possibly default value = whole vector register length. EVL
+ // is created only if TTI prefers predicated vectorization, thus if EVL
+ // is not nullptr it also implies preference for predicated
+ // vectorization.
+ // FIXME: Support reverse store after vp_reverse is added.
+ Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+ NewSI = lowerStoreUsingVectorIntrinsics(
+ Builder, State.get(getAddr(), Part, !CreateScatter), StoredVal,
+ CreateScatter, MaskPart, EVL, Alignment);
+ } else if (CreateScatter) {
+ Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+ Value *VectorGep = State.get(getAddr(), Part);
+ NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
+ MaskPart);
+ } else {
+ if (isReverse()) {
+ // If we store to reverse consecutive memory locations, then we need
+ // to reverse the order of elements in the stored value.
+ StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
+ // We don't want to update the value in the map as it might be used in
+ // another expression. So don't call resetVectorValue(StoredVal).
+ }
+ auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
+ if (isMaskRequired)
+ NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
+ BlockInMaskParts[Part]);
+ else
+ NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
+ }
+ State.addMetadata(NewSI, SI);
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 605b47fa0a46b8..b4c7ab02f928f0 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -69,9 +69,9 @@ class VPRecipeBuilder {
/// Check if the load or store instruction \p I should widened for \p
/// Range.Start and potentially masked. Such instructions are handled by a
/// recipe that takes an additional VPInstruction for the mask.
- VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I,
- ArrayRef<VPValue *> Operands,
- VFRange &Range);
+ VPWidenMemoryRecipe *tryToWidenMemory(Instruction *I,
+ ArrayRef<VPValue *> Operands,
+ VFRange &Range);
/// Check if an induction recipe should be constructed for \p Phi. If so build
/// and return it. If not, return null.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 77577b516ae274..3a0800bbb3d45c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -875,7 +875,8 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
return true;
case VPRecipeBase::VPInterleaveSC:
case VPRecipeBase::VPBranchOnMaskSC:
- case VPRecipeBase::VPWidenMemoryInstructionSC:
+ case VPRecipeBase::VPWidenLoadSC:
+ case VPRecipeBase::VPWidenStoreSC:
// TODO: Widened stores don't define a value, but widened loads do. Split
// the recipes to be able to make widened loads VPSingleDefRecipes.
return false;
@@ -2279,7 +2280,8 @@ class VPPredInstPHIRecipe : public VPSingleDefRecipe {
/// - For store: Address, stored value, optional mask
/// TODO: We currently execute only per-part unless a specific instance is
/// provided.
-class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
+class VPWidenMemoryRecipe : public VPRecipeBase {
+protected:
Instruction &Ingredient;
// Whether the loaded-from / stored-to addresses are consecutive.
@@ -2294,47 +2296,40 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
addOperand(Mask);
}
- bool isMasked() const {
- return isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
- }
-
public:
- VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
- bool Consecutive, bool Reverse, DebugLoc DL)
- : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}, DL),
- Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) {
+ VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
+ std::initializer_list<VPValue *> Operands,
+ bool Consecutive, bool Reverse, DebugLoc DL)
+ : VPRecipeBase(SC, Operands, DL), Ingredient(I), Consecutive(Consecutive),
+ Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
- new VPValue(this, &Load);
- setMask(Mask);
}
- VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
- VPValue *StoredValue, VPValue *Mask,
- bool Consecutive, bool Reverse, DebugLoc DL)
- : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr, StoredValue},
- DL),
- Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) {
- assert((Consecutive || !Reverse) && "Reverse implies consecutive");
- setMask(Mask);
- }
+ VPRecipeBase *clone() override = 0;
- VPRecipeBase *clone() override {
- if (isStore())
- return new VPWidenMemoryInstructionRecipe(
- cast<StoreInst>(Ingredient), getAddr(), getStoredValue(), getMask(),
- Consecutive, Reverse, getDebugLoc());
+ static inline bool classof(const VPRecipeBase *R) {
+ return R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenLoadSC;
+ }
- return new VPWidenMemoryInstructionRecipe(cast<LoadInst>(Ingredient),
- getAddr(), getMask(), Consecutive,
- Reverse, getDebugLoc());
+ static inline bool classof(const VPUser *U) {
+ auto *R = dyn_cast<VPRecipeBase>(U);
+ return R && classof(R);
}
- VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC)
+ /// Returns true if the recipe is masked.
+ virtual bool isMasked() const = 0;
/// Return the address accessed by this recipe.
- VPValue *getAddr() const {
- return getOperand(0); // Address is the 1st, mandatory operand.
- }
+ virtual VPValue *getAddr() const = 0;
+
+
+ // Return whether the loaded-from / stored-to addresses are consecutive.
+ bool isConsecutive() const { return Consecutive; }
+
+ // Return whether the consecutive loaded/stored addresses are in reverse
+ // order.
+ bool isReverse() const { return Reverse; }
/// Return the mask used by this recipe. Note that a full mask is represented
/// by a nullptr.
@@ -2343,21 +2338,37 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
}
- /// Returns true if this recipe is a store.
- bool isStore() const { return isa<StoreInst>(Ingredient); }
+ /// Generate the wide load/store.
+ void execute(VPTransformState &State) override = 0;
+
+ Instruction &getIngredient() const { return Ingredient; }
+};
- /// Return the address accessed by this recipe.
- VPValue *getStoredValue() const {
- assert(isStore() && "Stored value only available for store instructions");
- return getOperand(1); // Stored value is the 2nd, mandatory operand.
+struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
+ VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
+ bool Consecutive, bool Reverse, DebugLoc DL)
+ : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
+ Reverse, DL),
+ VPValue(this, &Load) {
+ assert((Consecutive || !Reverse) && "Reverse implies consecutive");
+ setMask(Mask);
}
- // Return whether the loaded-from / stored-to addresses are consecutive.
- bool isConsecutive() const { return Consecutive; }
+ VPRecipeBase *clone() override {
+ return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
+ getMask(), Consecutive, Reverse,
+ getDebugLoc());
+ }
- // Return whether the consecutive loaded/stored addresses are in reverse
- // order.
- bool isReverse() const { return Reverse; }
+ VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
+
+ /// Returns true if the recipe is masked.
+ bool isMasked() const override { return getNumOperands() == 2; }
+
+ /// Return the address accessed by this recipe.
+ VPValue *getAddr() const override {
+ return getOperand(0); // Address is the 1st, mandatory operand.
+ }
/// Generate the wide load/store.
void execute(VPTransformState &State) override;
@@ -2376,13 +2387,56 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
// Widened, consecutive memory operations only demand the first lane of
// their address, unless the same operand is also stored. That latter can
// happen with opaque pointers.
- return Op == getAddr() && isConsecutive() &&
- (!isStore() || Op != getStoredValue());
+ return Op == getAddr() && isConsecutive();
}
-
- Instruction &getIngredient() const { return Ingredient; }
};
+struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
+ VPWidenStoreRecipe(StoreInst &Store, VPValue *StoredVal, VPValue *Addr,
+ VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
+ : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {StoredVal, Addr},
+ Consecutive, Reverse, DL) {
+ assert((Consecutive || !Reverse) && "Reverse implies consecutive");
+ setMask(Mask);
+ }
+
+ VPRecipeBase *clone() override {
+ return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getStoredValue(),
+ getAddr(), getMask(), Consecutive, Reverse,
+ getDebugLoc());
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
+
+ /// Returns true if the recipe is masked.
+ bool isMasked() const override { return getNumOperands() == 3; }
+
+ /// Return the address accessed by this recipe.
+ VPValue *getAddr() const override { return getOperand(1); }
+
+ /// Return the address accessed by this recipe.
+ VPValue *getStoredValue() const { return getOperand(0); }
+
+ /// Generate the wide load/store.
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+
+ // Widened, consecutive memory operations only demand the first lane of
+ // their address, unless the same operand is also stored. That latter can
+ // happen with opaque pointers.
+ return Op == getAddr() && isConsecutive() && Op != getStoredValue();
+ }
+};
/// Recipe to expand a SCEV expression.
class VPExpandSCEVRecipe : public VPSingleDefRecipe {
const SCEV *Expr;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index c8ae2ee5a30fe5..130fb04f586e75 100644
--- a/llvm/lib/T...
[truncated]
|
| VPValue *getAddr() const { | ||
| return getOperand(0); // Address is the 1st, mandatory operand. | ||
| } | ||
| virtual VPValue *getAddr() const = 0; |
There was a problem hiding this comment.
There are callers that need to get the address of any WidenMemoryRecipe (e.g. VPlanTransforms::dropPoisonGeneratingRecipes), kept virtual for now.
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
| assert((LI || SI) && "Invalid Load/Store instruction"); | ||
| assert((!SI || StoredValue) && "No stored value provided for widened store"); | ||
| assert((!LI || !StoredValue) && "Stored value provided for widened load"); | ||
| LoadInst *LI = cast<LoadInst>(&Ingredient); |
There was a problem hiding this comment.
| LoadInst *LI = cast<LoadInst>(&Ingredient); | |
| auto *LI = cast<LoadInst>(&Ingredient); |
| auto *DataTy = VectorType::get(ScalarDataTy, State.VF); | ||
| const Align Alignment = getLoadStoreAlignment(&Ingredient); | ||
| bool CreateGatherScatter = !isConsecutive(); | ||
| bool CreateGather = !isConsecutive(); |
There was a problem hiding this comment.
| bool CreateGather = !isConsecutive(); | |
| bool IsConsecutive = isConsecutive(); |
There was a problem hiding this comment.
Kept as is for now, as CreateGather seems slightly more descriptive w.r.t. how it is used
| Builder, DataTy, State.get(getAddr(), Part, !CreateGatherScatter), | ||
| CreateGatherScatter, MaskPart, EVL, Alignment); | ||
| } else if (CreateGatherScatter) { | ||
| Builder, DataTy, State.get(getAddr(), Part, !CreateGather), |
There was a problem hiding this comment.
| Builder, DataTy, State.get(getAddr(), Part, !CreateGather), | |
| Builder, DataTy, State.get(getAddr(), Part, IsConsecutive), |
There was a problem hiding this comment.
Kept as is for now, as CreateGather seems slightly more descriptive w.r.t. how it is used
| } else if (CreateGatherScatter) { | ||
| Builder, DataTy, State.get(getAddr(), Part, !CreateGather), | ||
| CreateGather, MaskPart, EVL, Alignment); | ||
| } else if (CreateGather) { |
There was a problem hiding this comment.
| } else if (CreateGather) { | |
| } else if (!IsConsecutive) { |
There was a problem hiding this comment.
Kept as is for now, as CreateGather seems slightly more descriptive w.r.t. how it is used
| const Align Alignment = getLoadStoreAlignment(&Ingredient); | ||
| bool CreateScatter = !isConsecutive(); | ||
|
|
||
| StoreInst *SI = cast<StoreInst>(&Ingredient); |
There was a problem hiding this comment.
| StoreInst *SI = cast<StoreInst>(&Ingredient); | |
| auto *SI = cast<StoreInst>(&Ingredient); |
| // FIXME: Support reverse store after vp_reverse is added. | ||
| Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; | ||
| NewSI = lowerStoreUsingVectorIntrinsics( | ||
| Builder, State.get(getAddr(), Part, !CreateScatter), StoredVal, |
There was a problem hiding this comment.
| Builder, State.get(getAddr(), Part, !CreateScatter), StoredVal, | |
| Builder, State.get(getAddr(), Part, IsConsecutive), StoredVal, |
There was a problem hiding this comment.
Kept as is for now, as CreateScatter seems slightly more descriptive w.r.t. how it is used
| NewSI = lowerStoreUsingVectorIntrinsics( | ||
| Builder, State.get(getAddr(), Part, !CreateScatter), StoredVal, | ||
| CreateScatter, MaskPart, EVL, Alignment); | ||
| } else if (CreateScatter) { |
There was a problem hiding this comment.
| } else if (CreateScatter) { | |
| } else if (!IsConsecutive) { |
There was a problem hiding this comment.
Kept as is for now, as CreateScatter seems slightly more descriptive w.r.t. how it is used
| return getNumOperands() == 2; | ||
| case VPDef::VPWidenStoreSC: | ||
| return getNumOperands() == 3; |
There was a problem hiding this comment.
| return getNumOperands() == 2; | |
| case VPDef::VPWidenStoreSC: | |
| return getNumOperands() == 3; | |
| cast<VPWidenLoadRecipe>(this)->isMasked(); | |
| case VPDef::VPWidenStoreSC: | |
| cast<VPWidenStoreRecipe>(this)->isMasked(); |
There was a problem hiding this comment.
Is it worth duplicating isMasked in the subclasses if we dispatch manually here? Having the checks here directly seems slightly more compact. Same for getAddr
There was a problem hiding this comment.
Is it needeв at all to keep it in a base class? Maybe just use the recipe classes explicitly rather than rely on base class? It exposes implementation details in the base class, which is not very good.
There was a problem hiding this comment.
Updated to use IsMasked to track if it is masked, keeping things simpler for the initial version, thanks!
| case VPDef::VPWidenLoadSC: | ||
| return getOperand(0); | ||
| case VPDef::VPWidenStoreSC: | ||
| return getOperand(1); |
There was a problem hiding this comment.
| case VPDef::VPWidenLoadSC: | |
| return getOperand(0); | |
| case VPDef::VPWidenStoreSC: | |
| return getOperand(1); | |
| case VPDef::VPWidenLoadSC: | |
| return cast<VPWidenLoadRecipe>(this)->getAddr(); | |
| case VPDef::VPWidenStoreSC: | |
| return cast<VPWidenStoreRecipe>(this)->getAddr(); |
There was a problem hiding this comment.
see comment above for isMasked
| /// Returns true if this recipe is a store. | ||
| bool isStore() const { return isa<StoreInst>(Ingredient); } | ||
| /// Generate the wide load/store. | ||
| void execute(VPTransformState &State) override = 0; |
There was a problem hiding this comment.
Do you really need to make it pure virtual or just enough to have execute function in each implementation? And here just make it llvm_unreachable?
There was a problem hiding this comment.
Replaced with llvm_unreachable, thanks!
fhahn
left a comment
There was a problem hiding this comment.
Addressed latest comments, thanks!
| assert((LI || SI) && "Invalid Load/Store instruction"); | ||
| assert((!SI || StoredValue) && "No stored value provided for widened store"); | ||
| assert((!LI || !StoredValue) && "Stored value provided for widened load"); | ||
| LoadInst *LI = cast<LoadInst>(&Ingredient); |
| auto *DataTy = VectorType::get(ScalarDataTy, State.VF); | ||
| const Align Alignment = getLoadStoreAlignment(&Ingredient); | ||
| bool CreateGatherScatter = !isConsecutive(); | ||
| bool CreateGather = !isConsecutive(); |
There was a problem hiding this comment.
Kept as is for now, as CreateGather seems slightly more descriptive w.r.t. how it is used
| Builder, DataTy, State.get(getAddr(), Part, !CreateGatherScatter), | ||
| CreateGatherScatter, MaskPart, EVL, Alignment); | ||
| } else if (CreateGatherScatter) { | ||
| Builder, DataTy, State.get(getAddr(), Part, !CreateGather), |
There was a problem hiding this comment.
Kept as is for now, as CreateGather seems slightly more descriptive w.r.t. how it is used
| } else if (CreateGatherScatter) { | ||
| Builder, DataTy, State.get(getAddr(), Part, !CreateGather), | ||
| CreateGather, MaskPart, EVL, Alignment); | ||
| } else if (CreateGather) { |
There was a problem hiding this comment.
Kept as is for now, as CreateGather seems slightly more descriptive w.r.t. how it is used
| VPValue *StoredValue = getStoredValue(); | ||
|
|
||
| const Align Alignment = getLoadStoreAlignment(&Ingredient); | ||
| bool CreateScatter = !isConsecutive(); |
There was a problem hiding this comment.
Kept as is for now, as CreateScatter seems slightly more descriptive w.r.t. how it is used
| /// Returns true if this recipe is a store. | ||
| bool isStore() const { return isa<StoreInst>(Ingredient); } | ||
| /// Generate the wide load/store. | ||
| void execute(VPTransformState &State) override = 0; |
There was a problem hiding this comment.
Replaced with llvm_unreachable, thanks!
| return getNumOperands() == 2; | ||
| case VPDef::VPWidenStoreSC: | ||
| return getNumOperands() == 3; |
There was a problem hiding this comment.
Is it worth duplicating isMasked in the subclasses if we dispatch manually here? Having the checks here directly seems slightly more compact. Same for getAddr
| // FIXME: Support reverse store after vp_reverse is added. | ||
| Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; | ||
| NewSI = lowerStoreUsingVectorIntrinsics( | ||
| Builder, State.get(getAddr(), Part, !CreateScatter), StoredVal, |
There was a problem hiding this comment.
Kept as is for now, as CreateScatter seems slightly more descriptive w.r.t. how it is used
| NewSI = lowerStoreUsingVectorIntrinsics( | ||
| Builder, State.get(getAddr(), Part, !CreateScatter), StoredVal, | ||
| CreateScatter, MaskPart, EVL, Alignment); | ||
| } else if (CreateScatter) { |
There was a problem hiding this comment.
Kept as is for now, as CreateScatter seems slightly more descriptive w.r.t. how it is used
| case VPDef::VPWidenLoadSC: | ||
| return getOperand(0); | ||
| case VPDef::VPWidenStoreSC: | ||
| return getOperand(1); |
There was a problem hiding this comment.
see comment above for isMasked
| bool isMasked() const { | ||
| switch (getVPDefID()) { | ||
| case VPDef::VPWidenLoadSC: | ||
| return getNumOperands() == 2; | ||
| case VPDef::VPWidenStoreSC: | ||
| return getNumOperands() == 3; |
There was a problem hiding this comment.
I think it can be fixed this way:
template <typename T>
class Base {
...
bool isMasked() const {
return cast<typename T>(this)->isMasked();
}
...
}
class Derived1 : public Base<Derived1> {
...
bool isMasked() const {return ..;}
}
class Derived2 : public Base<Derived2> {
...
bool isMasked() const {return ..;}
}
ayalz
left a comment
There was a problem hiding this comment.
Good step forward, thanks for following-up on this!
| // Handle loads. | ||
| assert(LI && "Must have a load instruction"); |
There was a problem hiding this comment.
| // Handle loads. | |
| assert(LI && "Must have a load instruction"); |
Only loads are handled, and LI is asserted to be non-null by the non-dynamic cast.
| InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF); | ||
| bool isMaskRequired = getMask(); | ||
| if (isMaskRequired) { | ||
| bool IsMaskRequired = getMask(); |
There was a problem hiding this comment.
Now that loads and stores are handled separately, it makes sense for each to get its mask while taking care of each part, instead of preparing BlockInMaskParts, and do so once for all EVL/gather/consecutive cases. I.e.,
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *NewLI;
Value *Mask = nullptr;
if (VPValue *VPMask = getMask()) {
Mask = State.get(VPMask, Part);
if (isReverse())
Mask = Builder.CreateVectorReverse(Mask, "reverse");
}
// TODO: split this into several classes for better design.
if (State.EVL) {
...
}
| case VPRecipeBase::VPBranchOnMaskSC: | ||
| case VPRecipeBase::VPWidenMemoryInstructionSC: | ||
| case VPRecipeBase::VPWidenLoadSC: | ||
| case VPRecipeBase::VPWidenStoreSC: |
There was a problem hiding this comment.
Hmm, the TODO below suggests that Loads should (also) be considered single def. Should VPWidenLoadRecipe inherit from both VPWidenMemoryRecipe and VPSingleDefRecipe? (Deserves a separate patch, but worth thinking when introducing the class hierarchy here.)
There was a problem hiding this comment.
Yes, unfortunately it will require some extra work, as at the moment both VPWidenMemoryRecipe and VPSingleDefRecipe inherits from VPRecipeBase, both so they can manage operands.
There was a problem hiding this comment.
Another alternative may be to also consider VPWidenStoreRecipe as a Single Def recipe, with a singleton "void" Def that has no uses. Akin to LLVM. I.e., VPSingle[OrNo]DefRecipe.
| /// provided. | ||
| class VPWidenMemoryInstructionRecipe : public VPRecipeBase { | ||
| /// A common base class for widening memory operations. An optional mask can be | ||
| /// provided the last operand. |
There was a problem hiding this comment.
| /// provided the last operand. | |
| /// provided as the last operand. |
| VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL) | ||
| : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {StoredVal, Addr}, | ||
| Consecutive, Reverse, DL) { | ||
| assert((Consecutive || !Reverse) && "Reverse implies consecutive"); |
There was a problem hiding this comment.
nit: suffice to assert that reverse implies consecutive in the WidenMemory base class, where they are held.
There was a problem hiding this comment.
Removed here, thanks!
| : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive, | ||
| Reverse, DL), | ||
| VPValue(this, &Load) { | ||
| assert((Consecutive || !Reverse) && "Reverse implies consecutive"); |
There was a problem hiding this comment.
nit: suffice to assert that reverse implies consecutive once, in the WidenMemory base class, where they are held.
There was a problem hiding this comment.
Removed here, thanks!
| } | ||
|
|
||
| /// Return the address accessed by this recipe. | ||
| VPValue *getAddr() const { |
There was a problem hiding this comment.
Note that this adjusts the order of the operands for VPWidenStoreRecipe to match the order of operands of stores in IR and other recipes (like VPReplicateRecipe).
Note that the current order, even if distinct from IR and other recipes, would help simplify this base recipe, responsible for elements common to stores/loads/scatters/gathers, by holding the address as the first operand (and mask as last) for all, supporting its simple retrieval:
VPValue *getAddr() const {
return getOperand(0); // Address is the 1st, mandatory operand.
}
In any case, it may be good to swap the order in a follow-up patch.
There was a problem hiding this comment.
Updated to keep the address as first operand for now, to keep the patch simpler initially, thanks!
|
|
||
| VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC) | ||
| /// Returns true if the recipe is masked. | ||
| bool isMasked() const { |
There was a problem hiding this comment.
Another option is to have VPWidenMemoryRecipe maintain an IsMasked indicator instead of counting operands (the latter may be done by assert/validation).
There was a problem hiding this comment.
Updated for now, to keep initial version simpler, thanks!
ayalz
left a comment
There was a problem hiding this comment.
LGTM, thanks! Please wait a day or so if @alexey-bataev has further comments.
Added minor nits.
Commit message worth updating: last paragraph regarding operand reordering, and slight typo in first paragraph.
| StoreInst *Store = cast<StoreInst>(I); | ||
| return new VPWidenMemoryInstructionRecipe( | ||
| *Store, Ptr, Operands[0], Mask, Consecutive, Reverse, I->getDebugLoc()); | ||
| return new VPWidenStoreRecipe(*Store, Operands[0], Ptr, Mask, Consecutive, |
There was a problem hiding this comment.
nit: worth retaining the parameters in their order as operands? I.e., Ptr as first operand, before stored value.
| return; | ||
| } | ||
|
|
||
| // Handle loads. |
| void VPWidenLoadRecipe::execute(VPTransformState &State) { | ||
| // Attempt to issue a wide load. |
There was a problem hiding this comment.
| void VPWidenLoadRecipe::execute(VPTransformState &State) { | |
| // Attempt to issue a wide load. | |
| void VPWidenLoadRecipe::execute(VPTransformState &State) { |
nit: redundant?
| Mask = Builder.CreateVectorReverse(Mask, "reverse"); | ||
| } | ||
|
|
||
| Value *StoredVal = State.get(StoredValue, Part); |
There was a problem hiding this comment.
| Value *StoredVal = State.get(StoredValue, Part); | |
| Value *StoredVal = State.get(StoredValue, Part); | |
| if (isReverse()) { | |
| // If we store to reverse consecutive memory locations, then we need | |
| // to reverse the order of elements in the stored value. | |
| StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse"); | |
| // We don't want to update the value in the map as it might be used in | |
| // another expression. So don't call resetVectorValue(StoredVal). | |
| } |
| if (isReverse()) { | ||
| // If we store to reverse consecutive memory locations, then we need | ||
| // to reverse the order of elements in the stored value. | ||
| StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse"); | ||
| // We don't want to update the value in the map as it might be used in | ||
| // another expression. So don't call resetVectorValue(StoredVal). | ||
| } |
There was a problem hiding this comment.
| if (isReverse()) { | |
| // If we store to reverse consecutive memory locations, then we need | |
| // to reverse the order of elements in the stored value. | |
| StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse"); | |
| // We don't want to update the value in the map as it might be used in | |
| // another expression. So don't call resetVectorValue(StoredVal). | |
| } |
nit: better fix StoredVal above, when set. Can assert that reverse implies !State.EVL. The assert that reverse implies !CreateScatter == isConsecutive is already there.
| /// Return the value stored by this recipe. | ||
| VPValue *getStoredValue() const { return getOperand(1); } | ||
|
|
||
| /// Generate the wide load/store. |
There was a problem hiding this comment.
| /// Generate the wide load/store. | |
| /// Generate a wide store or scatter. |
| case VPWidenGEPSC: | ||
| case VPWidenIntOrFpInductionSC: | ||
| case VPWidenPHISC: | ||
| case VPWidenLoadSC: |
There was a problem hiding this comment.
| case VPWidenPHISC: | |
| case VPWidenLoadSC: | |
| case VPWidenLoadSC: | |
| case VPWidenPHISC: |
nit: retain lex order.
| case VPScalarIVStepsSC: | ||
| case VPPredInstPHISC: | ||
| case VPWidenStoreSC: |
There was a problem hiding this comment.
| case VPScalarIVStepsSC: | |
| case VPPredInstPHISC: | |
| case VPWidenStoreSC: | |
| case VPPredInstPHISC: | |
| case VPScalarIVStepsSC: | |
| case VPWidenStoreSC: |
nit: while we're here, can fix lex order.
| void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent, | ||
| VPSlotTracker &SlotTracker) const { | ||
| O << Indent << "WIDEN "; | ||
| printAsOperand(O, SlotTracker); |
There was a problem hiding this comment.
Can this call to printAsOperand() work w/o getVPSingleValue(), given that VPWidenLoadRecipe inherits from RecipeBase rather than SingleDefRecipe?
There was a problem hiding this comment.
VPWidenLoadRecipe inherits directly from VPValue.
| if (isa<VPWidenLoadRecipe>(&R)) { | ||
| continue; | ||
| } |
There was a problem hiding this comment.
| if (isa<VPWidenLoadRecipe>(&R)) { | |
| continue; | |
| } | |
| if (isa<VPWidenLoadRecipe>(&R)) | |
| continue; |
This patch introduces a new VPWidenMemoryRecipe base class and distinct sub-classes to model loads and stores.
This is a first step in an effort to simplify and modularize code generation for widened loads and stores and enable adding further more specialized memory recipes.