[VPlan] Create header phis once, after constructing VPlan0 (NFC).#168291
[VPlan] Create header phis once, after constructing VPlan0 (NFC).#168291
Conversation
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-vectorizers Author: Florian Hahn (fhahn) ChangesTogether with #168289 & #166099 we can construct header phis once up front, after creating VPlan0, as the induction/reduction/first-order-recurrence classification applies across all VFs. #168289 & #166099 are included in the PR and should be reviewed first. Patch is 41.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168291.diff 13 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index cbfbc29360b0b..68386cefe39a8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1423,6 +1423,11 @@ class LoopVectorizationCostModel {
return InLoopReductions.contains(Phi);
}
+ /// Returns the set of in-loop reduction PHIs.
+ const SmallPtrSetImpl<PHINode *> &getInLoopReductions() const {
+ return InLoopReductions;
+ }
+
/// Returns true if the predicated reduction select should be used to set the
/// incoming value for the reduction phi.
bool usePredicatedReductionSelect() const {
@@ -7626,58 +7631,6 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
VPIRMetadata(*Store, LVer), VPI->getDebugLoc());
}
-/// Creates a VPWidenIntOrFpInductionRecipe for \p PhiR. If needed, it will
-/// also insert a recipe to expand the step for the induction recipe.
-static VPWidenIntOrFpInductionRecipe *
-createWidenInductionRecipes(VPInstruction *PhiR,
- const InductionDescriptor &IndDesc, VPlan &Plan,
- ScalarEvolution &SE, Loop &OrigLoop) {
- assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
- "step must be loop invariant");
-
- VPValue *Start = PhiR->getOperand(0);
- assert(Plan.getLiveIn(IndDesc.getStartValue()) == Start &&
- "Start VPValue must match IndDesc's start value");
-
- VPValue *Step =
- vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());
-
- // Update wide induction increments to use the same step as the corresponding
- // wide induction. This enables detecting induction increments directly in
- // VPlan and removes redundant splats.
- using namespace llvm::VPlanPatternMatch;
- if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue())))
- PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step);
-
- PHINode *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
- return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
- IndDesc, PhiR->getDebugLoc());
-}
-
-VPHeaderPHIRecipe *
-VPRecipeBuilder::tryToOptimizeInductionPHI(VPInstruction *VPI, VFRange &Range) {
- auto *Phi = cast<PHINode>(VPI->getUnderlyingInstr());
-
- // Check if this is an integer or fp induction. If so, build the recipe that
- // produces its scalar and vector values.
- if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi))
- return createWidenInductionRecipes(VPI, *II, Plan, *PSE.getSE(), *OrigLoop);
-
- // Check if this is pointer induction. If so, build the recipe for it.
- if (auto *II = Legal->getPointerInductionDescriptor(Phi)) {
- VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep());
- return new VPWidenPointerInductionRecipe(
- Phi, VPI->getOperand(0), Step, &Plan.getVFxUF(), *II,
- LoopVectorizationPlanner::getDecisionAndClampRange(
- [&](ElementCount VF) {
- return CM.isScalarAfterVectorization(Phi, VF);
- },
- Range),
- VPI->getDebugLoc());
- }
- return nullptr;
-}
-
VPWidenIntOrFpInductionRecipe *
VPRecipeBuilder::tryToOptimizeInductionTruncate(VPInstruction *VPI,
VFRange &Range) {
@@ -8154,45 +8107,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
// First, check for specific widening recipes that deal with inductions, Phi
// nodes, calls and memory operations.
VPRecipeBase *Recipe;
- if (auto *PhiR = dyn_cast<VPPhi>(R)) {
- VPBasicBlock *Parent = PhiR->getParent();
- [[maybe_unused]] VPRegionBlock *LoopRegionOf =
- Parent->getEnclosingLoopRegion();
- assert(LoopRegionOf && LoopRegionOf->getEntry() == Parent &&
- "Non-header phis should have been handled during predication");
- auto *Phi = cast<PHINode>(R->getUnderlyingInstr());
- assert(R->getNumOperands() == 2 && "Must have 2 operands for header phis");
- if ((Recipe = tryToOptimizeInductionPHI(PhiR, Range)))
- return Recipe;
-
- VPHeaderPHIRecipe *PhiRecipe = nullptr;
- assert((Legal->isReductionVariable(Phi) ||
- Legal->isFixedOrderRecurrence(Phi)) &&
- "can only widen reductions and fixed-order recurrences here");
- VPValue *StartV = R->getOperand(0);
- if (Legal->isReductionVariable(Phi)) {
- const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(Phi);
- assert(RdxDesc.getRecurrenceStartValue() ==
- Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
-
- // If the PHI is used by a partial reduction, set the scale factor.
- unsigned ScaleFactor =
- getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
- PhiRecipe = new VPReductionPHIRecipe(
- Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi),
- CM.useOrderedReductions(RdxDesc), ScaleFactor);
- } else {
- // TODO: Currently fixed-order recurrences are modeled as chains of
- // first-order recurrences. If there are no users of the intermediate
- // recurrences in the chain, the fixed order recurrence should be modeled
- // directly, enabling more efficient codegen.
- PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
- }
- // Add backedge value.
- PhiRecipe->addOperand(R->getOperand(1));
- return PhiRecipe;
- }
- assert(!R->isPhi() && "only VPPhi nodes expected at this point");
+ assert(!R->isPhi() && "phis must be handled earlier");
auto *VPI = cast<VPInstruction>(R);
Instruction *Instr = R->getUnderlyingInstr();
@@ -8249,6 +8164,9 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction,
if (isa<VPReductionPHIRecipe>(BinOp) || isa<VPPartialReductionRecipe>(BinOp))
std::swap(BinOp, Accumulator);
+ if (auto *RedPhiR = dyn_cast<VPReductionPHIRecipe>(Accumulator))
+ RedPhiR->setVFScaleFactor(ScaleFactor);
+
assert(ScaleFactor ==
vputils::getVFScaleFactor(Accumulator->getDefiningRecipe()) &&
"all accumulators in chain must have same scale factor");
@@ -8295,6 +8213,12 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
OrigLoop, *LI, Legal->getWidestInductionType(),
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE);
+ // Create recipes for header phis.
+ VPlanTransforms::createHeaderPhiRecipes(
+ *VPlan0, *PSE.getSE(), *OrigLoop, Legal->getInductionVars(),
+ Legal->getReductionVars(), Legal->getFixedOrderRecurrences(),
+ CM.getInLoopReductions(), Hints.allowReordering());
+
auto MaxVFTimes2 = MaxVF * 2;
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
VFRange SubRange = {VF, MaxVFTimes2};
@@ -8415,25 +8339,18 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Mapping from VPValues in the initial plan to their widened VPValues. Needed
// temporarily to update created block masks.
DenseMap<VPValue *, VPValue *> Old2New;
+
+ // Now process all other blocks and instructions.
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
// Convert input VPInstructions to widened recipes.
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- auto *SingleDef = cast<VPSingleDefRecipe>(&R);
- auto *UnderlyingValue = SingleDef->getUnderlyingValue();
- // Skip recipes that do not need transforming, including canonical IV,
- // wide canonical IV and VPInstructions without underlying values. The
- // latter are added above for masking.
- // FIXME: Migrate code relying on the underlying instruction from VPlan0
- // to construct recipes below to not use the underlying instruction.
- if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
- &R) ||
- (isa<VPInstruction>(&R) && !UnderlyingValue))
+ auto *SingleDef = dyn_cast<VPInstruction>(&R);
+ if (!SingleDef || !SingleDef->getUnderlyingValue())
continue;
- assert(isa<VPInstruction>(&R) && UnderlyingValue && "unsupported recipe");
// TODO: Gradually replace uses of underlying instruction by analyses on
// VPlan.
- Instruction *Instr = cast<Instruction>(UnderlyingValue);
+ Instruction *Instr = cast<Instruction>(SingleDef->getUnderlyingValue());
Builder.setInsertPoint(SingleDef);
// The stores with invariant address inside the loop will be deleted, and
@@ -8759,9 +8676,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
RecurrenceDescriptor RdxDesc = Legal->getRecurrenceDescriptor(
cast<PHINode>(PhiR->getUnderlyingInstr()));
// Non-FP RdxDescs will have all fast math flags set, so clear them.
- FastMathFlags FMFs = isa<FPMathOperator>(CurrentLinkI)
- ? RdxDesc.getFastMathFlags()
- : FastMathFlags();
+ FastMathFlags FMFs =
+ RecurrenceDescriptor::isFloatingPointRecurrenceKind(Kind)
+ ? RdxDesc.getFastMathFlags()
+ : FastMathFlags();
auto *RedRecipe = new VPReductionRecipe(
Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
PhiR->isOrdered(), CurrentLinkI->getDebugLoc());
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index a7000aff06379..367b42d72633d 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -98,11 +98,6 @@ class VPRecipeBuilder {
/// recipe that takes an additional VPInstruction for the mask.
VPWidenMemoryRecipe *tryToWidenMemory(VPInstruction *VPI, VFRange &Range);
- /// Check if an induction recipe should be constructed for \p VPI. If so build
- /// and return it. If not, return null.
- VPHeaderPHIRecipe *tryToOptimizeInductionPHI(VPInstruction *VPI,
- VFRange &Range);
-
/// Optimize the special case where the operand of \p VPI is a constant
/// integer induction variable.
VPWidenIntOrFpInductionRecipe *
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index ea88eaa42d945..b8522e7305ca0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1084,7 +1084,7 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
OpcodeTy Opcode;
/// An optional name that can be used for the generated IR instruction.
- const std::string Name;
+ std::string Name;
/// Returns true if we can generate a scalar for the first lane only if
/// needed.
@@ -1183,6 +1183,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
/// Returns the symbolic name assigned to the VPInstruction.
StringRef getName() const { return Name; }
+
+ void setName(StringRef NewName) { Name = NewName.str(); }
};
/// A specialization of VPInstruction augmenting it with a dedicated result
@@ -2211,19 +2213,15 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
};
class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe {
- bool IsScalarAfterVectorization;
-
public:
/// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
/// Start and the number of elements unrolled \p NumUnrolledElems, typically
/// VF*UF.
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
VPValue *NumUnrolledElems,
- const InductionDescriptor &IndDesc,
- bool IsScalarAfterVectorization, DebugLoc DL)
+ const InductionDescriptor &IndDesc, DebugLoc DL)
: VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
- Step, IndDesc, DL),
- IsScalarAfterVectorization(IsScalarAfterVectorization) {
+ Step, IndDesc, DL) {
addOperand(NumUnrolledElems);
}
@@ -2232,8 +2230,7 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe {
VPWidenPointerInductionRecipe *clone() override {
return new VPWidenPointerInductionRecipe(
cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
- getOperand(2), getInductionDescriptor(), IsScalarAfterVectorization,
- getDebugLoc());
+ getOperand(2), getInductionDescriptor(), getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
@@ -2309,8 +2306,10 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
VPFirstOrderRecurrencePHIRecipe *clone() override {
- return new VPFirstOrderRecurrencePHIRecipe(
+ auto *R = new VPFirstOrderRecurrencePHIRecipe(
cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
+ R->addOperand(getOperand(1));
+ return R;
}
void execute(VPTransformState &State) override;
@@ -2379,6 +2378,8 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// Get the factor that the VF of this recipe's output should be scaled by.
unsigned getVFScaleFactor() const { return VFScaleFactor; }
+ void setVFScaleFactor(unsigned ScaleFactor) { VFScaleFactor = ScaleFactor; }
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 4ffd5577d31a4..d517271b868f0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -533,6 +533,15 @@ static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
Plan.getEntry()->swapSuccessors();
createExtractsForLiveOuts(Plan, MiddleVPBB);
+
+ VPBuilder ScalarPHBuilder(ScalarPH);
+ for (const auto &[PhiR, ScalarPhiR] : zip_equal(
+ drop_begin(HeaderVPBB->phis()), Plan.getScalarHeader()->phis())) {
+ auto *VectorPhiR = cast<VPPhi>(&PhiR);
+ auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
+ {VectorPhiR, VectorPhiR->getOperand(0)}, VectorPhiR->getDebugLoc());
+ cast<VPIRPhi>(&ScalarPhiR)->addOperand(ResumePhiR);
+ }
}
std::unique_ptr<VPlan>
@@ -544,6 +553,93 @@ VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
return VPlan0;
}
+/// Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe
+/// for \p Phi based on \p IndDesc.
+static VPHeaderPHIRecipe *
+createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR,
+ const InductionDescriptor &IndDesc, VPlan &Plan,
+ ScalarEvolution &SE, Loop &OrigLoop) {
+ assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
+ "step must be loop invariant");
+
+ VPValue *Start = PhiR->getOperand(0);
+ assert(Plan.getLiveIn(IndDesc.getStartValue()) == Start &&
+ "Start VPValue must match IndDesc's start value");
+ VPValue *Step =
+ vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());
+
+ if (IndDesc.getKind() == InductionDescriptor::IK_PtrInduction)
+ return new VPWidenPointerInductionRecipe(Phi, Start, Step, &Plan.getVFxUF(),
+ IndDesc, PhiR->getDebugLoc());
+
+ // Update wide induction increments to use the same step as the corresponding
+ // wide induction. This enables detecting induction increments directly in
+ // VPlan and removes redundant splats.
+ using namespace llvm::VPlanPatternMatch;
+ if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue())))
+ PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step);
+
+ return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
+ IndDesc, PhiR->getDebugLoc());
+}
+
+void VPlanTransforms::createHeaderPhiRecipes(
+ VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop,
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
+ const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
+ const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
+ const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {
+
+ VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
+ Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
+
+ for (VPRecipeBase &R : make_early_inc_range(*HeaderVPBB)) {
+ if (isa<VPCanonicalIVPHIRecipe>(&R))
+ continue;
+ auto *PhiR = dyn_cast<VPPhi>(&R);
+ if (!PhiR)
+ break;
+
+ // TODO: Gradually replace uses of underlying instruction by analyses on
+ // VPlan.
+ auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
+ assert(PhiR->getNumOperands() == 2 &&
+ "Must have 2 operands for header phis");
+
+ VPHeaderPHIRecipe *HeaderPhiR = nullptr;
+ auto InductionIt = Inductions.find(Phi);
+ if (InductionIt != Inductions.end()) {
+ HeaderPhiR = createWidenInductionRecipe(Phi, PhiR, InductionIt->second,
+ Plan, SE, OrigLoop);
+ } else {
+ VPValue *Start = PhiR->getOperand(0);
+ auto ReductionIt = Reductions.find(Phi);
+ if (ReductionIt != Reductions.end()) {
+ const RecurrenceDescriptor &RdxDesc = ReductionIt->second;
+ assert(RdxDesc.getRecurrenceStartValue() ==
+ Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()));
+
+ bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
+ HeaderPhiR = new VPReductionPHIRecipe(
+ Phi, RdxDesc.getRecurrenceKind(), *Start,
+ InLoopReductions.contains(Phi), UseOrderedReductions);
+ } else {
+ assert(FixedOrderRecurrences.contains(Phi) &&
+ "can only widen reductions and fixed-order recurrences here");
+ // TODO: Currently fixed-order recurrences are modeled as chains of
+ // first-order recurrences. If there are no users of the intermediate
+ // recurrences in the chain, the fixed order recurrence should be
+ // modeled directly, enabling more efficient codegen.
+ HeaderPhiR = new VPFirstOrderRecurrencePHIRecipe(Phi, *Start);
+ }
+ HeaderPhiR->addOperand(PhiR->getOperand(1));
+ }
+ HeaderPhiR->insertBefore(PhiR);
+ PhiR->replaceAllUsesWith(HeaderPhiR);
+ PhiR->eraseFromParent();
+ }
+}
+
void VPlanTransforms::handleEarlyExits(VPlan &Plan,
bool HasUncountableEarlyExit) {
auto *MiddleVPBB = cast<VPBasicBlock>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index aa85bd435ee9e..5071941eb1413 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -4310,7 +4310,7 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {
- return IsScalarAfterVectorization &&
+ return vputils::onlyScalarValuesUsed(this) &&
(!IsScalable || vputils::onlyFirstLaneUsed(this));
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d05c22e3aeb61..b80c43661c53c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -4459,9 +4459,10 @@ void VPlanTransforms::addBranchWeightToMiddleTerminator(
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
/// the end value of the induction.
-static VPInstruction *addResumePhiRecipeForInduction(
- VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
- VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC) {
+static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
+ VPBuilder &VectorPHBuilder,
+ VPTypeAnalysis &TypeInfo,
+ ...
[truncated]
|
aecd404 to
2c266d7
Compare
…NFC) This patch splits off VPReductionRecipe creation for in-loop reductions to a separate transform from adjustInLoopReductions, which has been renamed. The new transform has been updated to work directly on VPInstructions, and gets applied after header phis have been processed, once on VPlan0. Builds on top of llvm#168291 and llvm#166099 which should be reviewed first.
2c266d7 to
a086412
Compare
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
a086412 to
8e26db2
Compare
|
ping |
7cc7e6f to
aa55a07
Compare
|
ping |
aa55a07 to
761f70c
Compare
| @@ -8206,56 +8160,12 @@ bool VPRecipeBuilder::getScaledReductions( | |||
|
|
|||
| VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, | |||
There was a problem hiding this comment.
Worth being more specific, as in
| VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, | |
| VPRecipeBase *VPRecipeBuilder::tryToCreateWidenNonPhiRecipe(VPSingleDefRecipe *R, |
?
| // First, check for specific widening recipes that deal with optimizing | ||
| // truncates, calls and memory operations. | ||
| assert(!R->isPhi() && "phis must be handled earlier"); |
There was a problem hiding this comment.
| // First, check for specific widening recipes that deal with optimizing | |
| // truncates, calls and memory operations. | |
| assert(!R->isPhi() && "phis must be handled earlier"); | |
| assert(!R->isPhi() && "phis must be handled elsewhere"); | |
| // First, check for specific widening recipes that deal with optimizing | |
| // truncates, calls and memory operations. |
or does the code below rely on already having handled phis earlier?
| if (auto *RedPhiR = dyn_cast<VPReductionPHIRecipe>(Accumulator)) | ||
| RedPhiR->setVFScaleFactor(ScaleFactor); |
There was a problem hiding this comment.
ReductionPHIRecipe currently gets its Style/ScaleFactor set when constructed:
// If the PHI is used by a partial reduction, set the scale factor.
bool UseInLoopReduction = CM.isInLoopReduction(Phi);
bool UseOrderedReductions = CM.useOrderedReductions(RdxDesc);
unsigned ScaleFactor =
getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
PhiRecipe = new VPReductionPHIRecipe(
Phi, RdxDesc.getRecurrenceKind(), *StartV,
getReductionStyle(UseInLoopReduction, UseOrderedReductions,
ScaleFactor),
RdxDesc.hasUsesOutsideReductionChain());
Setting ScaleFactor here is needed now as noted below, but could this change be applied independently?
There was a problem hiding this comment.
Yep it could be done separately, but only needed as part of this change
There was a problem hiding this comment.
Reasonable to move the setting of RedPhiR's ScaleFactor here in tryToCreatePartialReduction() from the construction of reduction phi recipes in tryToCreateWidenRecipe(), before this refactoring patch introduces VPlanTransforms::createHeaderPhiRecipes()?
There was a problem hiding this comment.
yes, will land separately beforehand, thanks!
| // Create recipes for header phis. | ||
| VPlanTransforms::createHeaderPhiRecipes( | ||
| *VPlan0, *PSE.getSE(), *OrigLoop, Legal->getInductionVars(), | ||
| Legal->getReductionVars(), Legal->getFixedOrderRecurrences(), | ||
| CM.getInLoopReductions(), Hints.allowReordering()); | ||
|
|
There was a problem hiding this comment.
Should createHeaderPhiRecipes() be part of buildVPlan0()? This would admittedly require passing the former's parameters to the latter.
There was a problem hiding this comment.
I am no sure; I don't think we it would be good to pass more legacy information to buildVPlan0, and so the initial VPlan0 still reflects to scalar loop directly initially, without any wideneing. this should make it easier to compute the scalar cost based on VPlan0
There was a problem hiding this comment.
OK. Note that classifying the recurrence type of each header phi as being either fixed-order or else loop-based, and further classifying the latter as either induction (invariant/constant increment add recurrence) or else reduction (of various types), can be done independent of widening considerations and decisions. Analogous to recording Legal properties/descriptors independent of CM decisions.
| // Skip recipes that do not need transforming, including canonical IV, | ||
| // wide canonical IV and VPInstructions without underlying values. The | ||
| // latter are added above for masking. | ||
| // FIXME: Migrate code relying on the underlying instruction from VPlan0 | ||
| // to construct recipes below to not use the underlying instruction. |
There was a problem hiding this comment.
Worth retaining/updating these comments?
There was a problem hiding this comment.
Yep, moved to the TODO below, thanks
| VPValue *BackedgeValue = PhiR->getOperand(1); | ||
| DebugLoc DL = PhiR->getDebugLoc(); | ||
|
|
||
| VPHeaderPHIRecipe *HeaderPhiR = nullptr; |
There was a problem hiding this comment.
Introduce a lambda and call it as in VPHeaderPHIRecipe *HeaderPhiR = createHeaderPhiRecipe()?
| HeaderPhiR = new VPFirstOrderRecurrencePHIRecipe(Phi, *Start); | ||
| HeaderPhiR->addOperand(BackedgeValue); |
There was a problem hiding this comment.
Feed BackedgeValue in addition to the constructor of VPFirstOrderRecurrencePHIRecipe?
Or feed it only PhiR - indicating the conversion from VPPhi to FOR Phi?
There was a problem hiding this comment.
Updated to take the backedge value, thanks
| } else { | ||
| assert(FixedOrderRecurrences.contains(Phi) && |
There was a problem hiding this comment.
nit: perhaps start with FOR as it seems the simplest case.
| HeaderPhiR = new VPReductionPHIRecipe( | ||
| Phi, RdxDesc.getRecurrenceKind(), *Start, | ||
| getReductionStyle(InLoopReductions.contains(Phi), | ||
| UseOrderedReductions, 1), |
There was a problem hiding this comment.
This deviates from the current construction of VPReductionPHIRecipes which passes
unsigned ScaleFactor = getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
instead of 1, which requires resetting ScaleFactor later?
There was a problem hiding this comment.
Yep, by default all reductions start out as unscaled, to be optimized later if possible
| void VPlanTransforms::createHeaderPhiRecipes( | ||
| VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop, | ||
| const MapVector<PHINode *, InductionDescriptor> &Inductions, | ||
| const MapVector<PHINode *, RecurrenceDescriptor> &Reductions, | ||
| const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences, | ||
| const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) { |
There was a problem hiding this comment.
Follow-up thought: VPPhi's may first be classified into FOR and loop(non-fixed-ordered)-recurrences; then classifying the latter into Inductions and Reductions. Finally deciding how to best handle the latter - which may be VF/cost-dependent: in-loop or not, partial?
There was a problem hiding this comment.
Yes, those are great candidates to be moved to VPlan anlysis.
761f70c to
41f4576
Compare
Pass backedge values directly to VPFirstOrderRecurrencePHIRecipe and VPReductionPHIRecipe directly, as they must be provided and availbale. Split off from #168291.
41f4576 to
dec335d
Compare
…phis (NFC). Pass backedge values directly to VPFirstOrderRecurrencePHIRecipe and VPReductionPHIRecipe directly, as they must be provided and availbale. Split off from llvm/llvm-project#168291.
Pass backedge values directly to VPFirstOrderRecurrencePHIRecipe and VPReductionPHIRecipe directly, as they must be provided and availbale. Split off from llvm#168291.
ayalz
left a comment
There was a problem hiding this comment.
LGTM, thanks! Adding some final comments.
| assert(Reductions.contains(Phi) && | ||
| "can only widen reductions and fixed-order recurrences here"); |
There was a problem hiding this comment.
| assert(Reductions.contains(Phi) && | |
| "can only widen reductions and fixed-order recurrences here"); | |
| assert(Reductions.contains(Phi) && "only reductions are expected now"); |
| if (auto *RedPhiR = dyn_cast<VPReductionPHIRecipe>(Accumulator)) | ||
| RedPhiR->setVFScaleFactor(ScaleFactor); |
There was a problem hiding this comment.
Reasonable to move the setting of RedPhiR's ScaleFactor here in tryToCreatePartialReduction() from the construction of reduction phi recipes in tryToCreateWidenRecipe(), before this refactoring patch introduces VPlanTransforms::createHeaderPhiRecipes()?
| // Create recipes for header phis. | ||
| VPlanTransforms::createHeaderPhiRecipes( | ||
| *VPlan0, *PSE.getSE(), *OrigLoop, Legal->getInductionVars(), | ||
| Legal->getReductionVars(), Legal->getFixedOrderRecurrences(), | ||
| CM.getInLoopReductions(), Hints.allowReordering()); | ||
|
|
There was a problem hiding this comment.
OK. Note that classifying the recurrence type of each header phi as being either fixed-order or else loop-based, and further classifying the latter as either induction (invariant/constant increment add recurrence) or else reduction (of various types), can be done independent of widening considerations and decisions. Analogous to recording Legal properties/descriptors independent of CM decisions.
| VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, Builder, | ||
| BlockMaskCache); |
There was a problem hiding this comment.
RecipeBuilder(tryToOptimizeInductionPHI()) no longer calls createWidenInductionRecipes() which requires PSE(SE), so the latter can be dropped.
| if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>( | ||
| &R) || | ||
| (isa<VPInstruction>(&R) && !UnderlyingValue)) | ||
| auto *VPI = dyn_cast<VPInstruction>(&R); |
There was a problem hiding this comment.
| auto *VPI = dyn_cast<VPInstruction>(&R); | |
| auto *VPI = dyn_cast<VPInstruction>(&R); | |
| // Skip recipes that do not need transforming, including non-VPInstructions | |
| // (such as ...) and VPInstructions without underlying values. The | |
| // latter are added above for masking. |
| if (IndDesc.getKind() == InductionDescriptor::IK_PtrInduction) | ||
| return new VPWidenPointerInductionRecipe(Phi, Start, Step, &Plan.getVFxUF(), | ||
| IndDesc, DL); | ||
|
|
There was a problem hiding this comment.
Worth asserting getKind() is now Int or Fp (not NoInduction)?
| auto *PhiR = dyn_cast<VPPhi>(&R); | ||
| if (!PhiR) | ||
| break; |
There was a problem hiding this comment.
Can this be
| auto *PhiR = dyn_cast<VPPhi>(&R); | |
| if (!PhiR) | |
| break; | |
| auto *PhiR = cast<VPPhi>(&R); |
given the iteration over phis()?
| return new VPReductionPHIRecipe( | ||
| Phi, RdxDesc.getRecurrenceKind(), *Start, *BackedgeValue, | ||
| getReductionStyle(InLoopReductions.contains(Phi), UseOrderedReductions, | ||
| 1), |
There was a problem hiding this comment.
| return new VPReductionPHIRecipe( | |
| Phi, RdxDesc.getRecurrenceKind(), *Start, *BackedgeValue, | |
| getReductionStyle(InLoopReductions.contains(Phi), UseOrderedReductions, | |
| 1), | |
| unsigned ScaleFactor = 1; // Will be updated later to >1 if reduction is partial. | |
| return new VPReductionPHIRecipe( | |
| Phi, RdxDesc.getRecurrenceKind(), *Start, *BackedgeValue, | |
| getReductionStyle(InLoopReductions.contains(Phi), UseOrderedReductions, | |
| ScaleFactor), |
?
Split off unrelated change from approved #168291 to land separately as suggested.
… (NFCI). Split off unrelated change from approved llvm/llvm-project#168291 to land separately as suggested.
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/162/builds/37170 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/141/builds/13701 Here is the relevant piece of the build log for the reference |
… (NFC). (#168291) Together with llvm/llvm-project#168289 & llvm/llvm-project#166099 we can construct header phis once up front, after creating VPlan0, as the induction/reduction/first-order-recurrence classification applies across all VFs. Depends on llvm/llvm-project#168289 & llvm/llvm-project#166099 PR: llvm/llvm-project#168291
| (isa<VPInstruction>(&R) && !UnderlyingValue)) | ||
| auto *VPI = dyn_cast<VPInstruction>(&R); | ||
| // Skip recipes that do not need transforming, including | ||
| // non-VPInstructions (such as ...) and VPInstructions without underlying |
There was a problem hiding this comment.
The ... should be replaced by concrete examples...
There was a problem hiding this comment.
We now only need to process non-phi recipes and there is a small list of recipes that can be skipped because they do not need transforming, which can be skipped explicitly, which should be cleaner: d777b1a
…NFC) This patch splits off VPReductionRecipe creation for in-loop reductions to a separate transform from adjustInLoopReductions, which has been renamed. The new transform has been updated to work directly on VPInstructions, and gets applied after header phis have been processed, once on VPlan0. Builds on top of llvm#168291 and llvm#166099 which should be reviewed first.
…NFC) This patch splits off VPReductionRecipe creation for in-loop reductions to a separate transform from adjustInLoopReductions, which has been renamed. The new transform has been updated to work directly on VPInstructions, and gets applied after header phis have been processed, once on VPlan0. Builds on top of llvm#168291 and llvm#166099 which should be reviewed first.
Pass backedge values directly to VPFirstOrderRecurrencePHIRecipe and VPReductionPHIRecipe directly, as they must be provided and availbale. Split off from llvm#168291.
Split off unrelated change from approved llvm#168291 to land separately as suggested.
…vm#168291) Together with llvm#168289 & llvm#166099 we can construct header phis once up front, after creating VPlan0, as the induction/reduction/first-order-recurrence classification applies across all VFs. Depends on llvm#168289 & llvm#166099 PR: llvm#168291
Pass backedge values directly to VPFirstOrderRecurrencePHIRecipe and VPReductionPHIRecipe directly, as they must be provided and availbale. Split off from llvm#168291.
Split off unrelated change from approved llvm#168291 to land separately as suggested.
…vm#168291) Together with llvm#168289 & llvm#166099 we can construct header phis once up front, after creating VPlan0, as the induction/reduction/first-order-recurrence classification applies across all VFs. Depends on llvm#168289 & llvm#166099 PR: llvm#168291
…NFC) (#168784) This patch splits off VPReductionRecipe creation for in-loop reductions to a separate transform from adjustInLoopReductions, which has been renamed. The new transform has been updated to work directly on VPInstructions, and gets applied after header phis have been processed, once on VPlan0. Builds on top of #168291 and #166099 which should be reviewed first. PR: #168784
…eductions (NFC) (#168784) This patch splits off VPReductionRecipe creation for in-loop reductions to a separate transform from adjustInLoopReductions, which has been renamed. The new transform has been updated to work directly on VPInstructions, and gets applied after header phis have been processed, once on VPlan0. Builds on top of llvm/llvm-project#168291 and llvm/llvm-project#166099 which should be reviewed first. PR: llvm/llvm-project#168784
No phi recipes are being transformed in the main loop any longer, so skip phi recipes. This also allows to clarify which recipes need skipping explicitly. Those are recipes that have been already transformed. Follow-up to post-commit comment in #168291.
No phi recipes are being transformed in the main loop any longer, so skip phi recipes. This also allows to clarify which recipes need skipping explicitly. Those are recipes that have been already transformed. Follow-up to post-commit comment in llvm/llvm-project#168291.
…NFC) (llvm#168784) This patch splits off VPReductionRecipe creation for in-loop reductions to a separate transform from adjustInLoopReductions, which has been renamed. The new transform has been updated to work directly on VPInstructions, and gets applied after header phis have been processed, once on VPlan0. Builds on top of llvm#168291 and llvm#166099 which should be reviewed first. PR: llvm#168784
No phi recipes are being transformed in the main loop any longer, so skip phi recipes. This also allows to clarify which recipes need skipping explicitly. Those are recipes that have been already transformed. Follow-up to post-commit comment in llvm#168291.
…NFC) (#168784) This patch splits off VPReductionRecipe creation for in-loop reductions to a separate transform from adjustInLoopReductions, which has been renamed. The new transform has been updated to work directly on VPInstructions, and gets applied after header phis have been processed, once on VPlan0. Builds on top of llvm/llvm-project#168291 and llvm/llvm-project#166099 which should be reviewed first. PR: llvm/llvm-project#168784
No phi recipes are being transformed in the main loop any longer, so skip phi recipes. This also allows to clarify which recipes need skipping explicitly. Those are recipes that have been already transformed. Follow-up to post-commit comment in llvm/llvm-project#168291.
…NFC) (#168784) This patch splits off VPReductionRecipe creation for in-loop reductions to a separate transform from adjustInLoopReductions, which has been renamed. The new transform has been updated to work directly on VPInstructions, and gets applied after header phis have been processed, once on VPlan0. Builds on top of llvm/llvm-project#168291 and llvm/llvm-project#166099 which should be reviewed first. PR: llvm/llvm-project#168784 (cherry picked from commit c2a8739)
No phi recipes are being transformed in the main loop any longer, so skip phi recipes. This also allows to clarify which recipes need skipping explicitly. Those are recipes that have been already transformed. Follow-up to post-commit comment in llvm/llvm-project#168291. (cherry picked from commit d777b1a)
Together with #168289 & #166099 we can construct header phis once up front, after creating VPlan0, as the induction/reduction/first-order-recurrence classification applies across all VFs.
#168289 & #166099 are included in the PR and should be reviewed first.