diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 78163b5fe35d5..f1bb40a592def 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7845,7 +7845,7 @@ static SmallVector preparePlanForEpilogueVectorLoop( [](const VPUser *U) { return isa(U) || isa(U) || - cast(U)->isScalarCast() || + cast(U)->isScalarCast() || cast(U)->getOpcode() == Instruction::Add; }) && diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 453b5eec9fe5e..fe503c1601c6b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -542,6 +542,14 @@ void VPBasicBlock::executeRecipes(VPTransformState *State, BasicBlock *BB) { for (VPRecipeBase &Recipe : Recipes) { State->setDebugLocFrom(Recipe.getDebugLoc()); + assert(Recipe.WideningInfo && "WideInfo needs to be set"); + assert( + (!Recipe.couldReplicatePerPart() || + (!Recipe.couldProducWideResult() && !Recipe.isAgnostic())) && + "WideInfo cannot simultaneously be ReplicatePart and Wide or Agnostic"); + assert((!Recipe.isAgnostic() || (Recipe.couldProduceNarrowResult() ^ + Recipe.couldProducWideResult())) && + "Agnostic WideInfo cannot be Narrow and Wide simultaenously"); Recipe.execute(*State); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 4a5420185224b..3880946f07180 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -409,6 +409,9 @@ class LLVM_ABI_FOR_TEST VPRecipeBase /// Subclass identifier (for isa/dyn_cast). const unsigned char SubclassID; + /// Four bits of widening information, which takes values in VPWideningTy. + unsigned char WideningInfo : 4; + /// Each VPRecipe belongs to a single VPBasicBlock. VPBasicBlock *Parent = nullptr; @@ -466,9 +469,30 @@ class LLVM_ABI_FOR_TEST VPRecipeBase VPLastPHISC = VPReductionPHISC, }; - VPRecipeBase(const unsigned char SC, ArrayRef Operands, + /// An enumeration for keeping track of the widening status of the recipe. + /// The recipe necessarily produces a scalar value if only the Narrow bit is + /// set, a wide value if only the Wide bit is set, and scalar values for each + /// unroll part if only the ReplicatePart bit is set. The Narrow bit can be + /// set on Wide and ReplicatePart recipes, which indicates that the recipe + /// could be considered narrow if profitable. For instructions not producing + /// values, like an assume or store, the bits talk about the inherent widening + /// status of the recipe. Finally, there is a class of instructions that + /// necessarily take vector operands and produce a scalar result, like + /// (Insert|Extract)Element, or necessarily take a single scalar operand and + /// produce a vector, like Broadcast: there is no widening decision to make on + /// this class, and it is marked with the Agnostic bit. + using VPWideningTy = enum { + Narrow = 1 << 0, + Wide = 1 << 1, + ReplicatePart = 1 << 2, + Agnostic = 1 << 3 + }; + + VPRecipeBase(const unsigned char SC, unsigned char WideInfo, + ArrayRef Operands, DebugLoc DL = DebugLoc::getUnknown()) - : VPDef(), VPUser(Operands), SubclassID(SC), DL(DL) {} + : VPDef(), VPUser(Operands), SubclassID(SC), WideningInfo(WideInfo), + DL(DL) {} ~VPRecipeBase() override = default; @@ -552,8 +576,14 @@ class LLVM_ABI_FOR_TEST VPRecipeBase /// Returns the debug location of the recipe. DebugLoc getDebugLoc() const { return DL; } - /// Return true if the recipe is a scalar cast. - bool isScalarCast() const; + /// Methods to query WideningInfo. + bool isAgnostic() const { return WideningInfo & Agnostic; } + bool producesNarrowResult() const { + return !(WideningInfo & (Wide | ReplicatePart)); + } + bool couldProduceNarrowResult() const { return WideningInfo & Narrow; } + bool couldProducWideResult() const { return WideningInfo & Wide; } + bool couldReplicatePerPart() const { return WideningInfo & ReplicatePart; } /// Set the recipe's debug location to \p NewDL. void setDebugLoc(DebugLoc NewDL) { DL = NewDL; } @@ -568,6 +598,13 @@ class LLVM_ABI_FOR_TEST VPRecipeBase #endif protected: + /// Methods to set WideningInfo. + void markNarrow() { WideningInfo = Narrow; } + void markPossiblyNarrow() { WideningInfo |= Narrow; } + void markVectorToScalar() { WideningInfo = (Narrow | Agnostic); } + void markScalarToVector() { WideningInfo = (Wide | Agnostic); } + void markReplicatePart() { WideningInfo = ReplicatePart; } + /// Compute the cost of this recipe either using a recipe's specialized /// implementation or using the legacy cost model and the underlying /// instructions. @@ -604,13 +641,15 @@ class LLVM_ABI_FOR_TEST VPRecipeBase /// Note that VPRecipeBase must be inherited from before VPValue. class VPSingleDefRecipe : public VPRecipeBase, public VPRecipeValue { public: - VPSingleDefRecipe(const unsigned char SC, ArrayRef Operands, + VPSingleDefRecipe(const unsigned char SC, unsigned char WideInfo, + ArrayRef Operands, DebugLoc DL = DebugLoc::getUnknown()) - : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this) {} + : VPRecipeBase(SC, WideInfo, Operands, DL), VPRecipeValue(this) {} - VPSingleDefRecipe(const unsigned char SC, ArrayRef Operands, - Value *UV, DebugLoc DL = DebugLoc::getUnknown()) - : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this, UV) {} + VPSingleDefRecipe(const unsigned char SC, unsigned char WideInfo, + ArrayRef Operands, Value *UV, + DebugLoc DL = DebugLoc::getUnknown()) + : VPRecipeBase(SC, WideInfo, Operands, DL), VPRecipeValue(this, UV) {} static inline bool classof(const VPRecipeBase *R) { switch (R->getVPRecipeID()) { @@ -1107,10 +1146,10 @@ static_assert(sizeof(VPIRFlags) <= 3, "VPIRFlags should not grow"); /// A pure-virtual common base class for recipes defining a single VPValue and /// using IR flags. struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { - VPRecipeWithIRFlags(const unsigned char SC, ArrayRef Operands, - const VPIRFlags &Flags, + VPRecipeWithIRFlags(const unsigned char SC, unsigned char WideInfo, + ArrayRef Operands, const VPIRFlags &Flags, DebugLoc DL = DebugLoc::getUnknown()) - : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {} + : VPSingleDefRecipe(SC, WideInfo, Operands, DL), VPIRFlags(Flags) {} static inline bool classof(const VPRecipeBase *R) { return R->getVPRecipeID() == VPRecipeBase::VPBlendSC || @@ -1379,6 +1418,10 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, return Opcode == Instruction::PHI || Opcode == Instruction::GetElementPtr; } + /// Returns true if this VPInstruction produces a scalar value from a vector, + /// e.g. by performing a reduction or extracting a lane. + bool isVectorToScalar() const; + public: VPInstruction(unsigned Opcode, ArrayRef Operands, const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {}, @@ -1481,13 +1524,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, /// Returns true if the recipe only uses the first part of operand \p Op. bool usesFirstPartOnly(const VPValue *Op) const override; - /// Returns true if this VPInstruction produces a scalar value from a vector, - /// e.g. by performing a reduction or extracting a lane. - bool isVectorToScalar() const; - - /// Returns true if this VPInstruction's operands are single scalars and the - /// result is also a single scalar. - bool isSingleScalar() const; + /// Returns true if this VPInstruction is a scalar cast. + bool isScalarCast() const { return Instruction::isCast(getOpcode()); } /// Returns the symbolic name assigned to the VPInstruction. StringRef getName() const { return Name; } @@ -1496,6 +1534,10 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, void setName(StringRef NewName) { Name = NewName.str(); } protected: + /// Returns true if this VPInstruction's operands are single scalars and the + /// result is also a single scalar. + bool isSingleScalar() const; + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the VPInstruction to \p O. void printRecipe(raw_ostream &O, const Twine &Indent, @@ -1524,8 +1566,6 @@ class VPInstructionWithType : public VPInstruction { static inline bool classof(const VPRecipeBase *R) { // VPInstructionWithType are VPInstructions with specific opcodes requiring // type information. - if (R->isScalarCast()) - return true; auto *VPI = dyn_cast(R); if (!VPI) return false; @@ -1536,7 +1576,7 @@ class VPInstructionWithType : public VPInstruction { case Instruction::Load: return true; default: - return false; + return VPI->isScalarCast(); } } @@ -1684,7 +1724,8 @@ class VPIRInstruction : public VPRecipeBase { /// VPIRInstruction::create() should be used to create VPIRInstructions, as /// subclasses may need to be created, e.g. VPIRPhi. VPIRInstruction(Instruction &I) - : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, {}), I(I) {} + : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, VPRecipeBase::Narrow, {}), + I(I) {} public: ~VPIRInstruction() override = default; @@ -1710,12 +1751,6 @@ class VPIRInstruction : public VPRecipeBase { Instruction &getInstruction() const { return I; } - bool usesScalars(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - return true; - } - bool usesFirstPartOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); @@ -1779,17 +1814,11 @@ class LLVM_ABI_FOR_TEST VPWidenRecipe : public VPRecipeWithIRFlags, public: VPWidenRecipe(Instruction &I, ArrayRef Operands, const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {}, - DebugLoc DL = {}) - : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL), - VPIRMetadata(Metadata), Opcode(I.getOpcode()) { - setUnderlyingValue(&I); - } + DebugLoc DL = {}); VPWidenRecipe(unsigned Opcode, ArrayRef Operands, const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {}, - DebugLoc DL = {}) - : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL), - VPIRMetadata(Metadata), Opcode(Opcode) {} + DebugLoc DL = {}); ~VPWidenRecipe() override = default; @@ -1841,7 +1870,8 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { CastInst *CI = nullptr, const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {}, DebugLoc DL = DebugLoc::getUnknown()) - : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, Op, Flags, DL), + : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, VPRecipeBase::Wide, Op, + Flags, DL), VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) { assert(flagsValidForOpcode(Opcode) && "Set flags not supported for the provided opcode"); @@ -1903,8 +1933,8 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {}, DebugLoc DL = DebugLoc::getUnknown()) - : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments, - Flags, DL), + : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, + VPRecipeBase::Wide, CallArguments, Flags, DL), VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty), MayReadFromMemory(CI.mayReadFromMemory()), MayWriteToMemory(CI.mayWriteToMemory()), @@ -1917,8 +1947,8 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {}, DebugLoc DL = DebugLoc::getUnknown()) - : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments, - Flags, DL), + : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, + VPRecipeBase::Wide, CallArguments, Flags, DL), VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) { LLVMContext &Ctx = Ty->getContext(); @@ -1992,8 +2022,8 @@ class LLVM_ABI_FOR_TEST VPWidenCallRecipe : public VPRecipeWithIRFlags, ArrayRef CallArguments, const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {}, DebugLoc DL = {}) - : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, CallArguments, Flags, - DL), + : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, VPRecipeBase::Wide, + CallArguments, Flags, DL), VPIRMetadata(Metadata), Variant(Variant) { setUnderlyingValue(UV); assert( @@ -2044,7 +2074,8 @@ class VPHistogramRecipe : public VPRecipeBase { public: VPHistogramRecipe(unsigned Opcode, ArrayRef Operands, DebugLoc DL = DebugLoc::getUnknown()) - : VPRecipeBase(VPRecipeBase::VPHistogramSC, Operands, DL), + : VPRecipeBase(VPRecipeBase::VPHistogramSC, VPRecipeBase::Wide, Operands, + DL), Opcode(Opcode) {} ~VPHistogramRecipe() override = default; @@ -2094,7 +2125,9 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags { VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef Operands, const VPIRFlags &Flags = {}, DebugLoc DL = DebugLoc::getUnknown()) - : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, Operands, Flags, DL), + : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, + VPRecipeBase::Wide | VPRecipeBase::Narrow, Operands, + Flags, DL), SourceElementTy(GEP->getSourceElementType()) { setUnderlyingValue(GEP); SmallVector> Metadata; @@ -2153,8 +2186,8 @@ class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags { public: VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL) - : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF}, - GEPFlags, DL), + : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, + VPRecipeBase::Narrow, {Ptr, VF}, GEPFlags, DL), SourceElementTy(SourceElementTy), Stride(Stride) { assert(Stride < 0 && "Stride must be negative"); } @@ -2222,7 +2255,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags { public: VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL) - : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC, Ptr, GEPFlags, DL), + : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC, + VPRecipeBase::Narrow, Ptr, GEPFlags, DL), SourceElementTy(SourceElementTy) {} VP_CLASSOF_IMPL(VPRecipeBase::VPVectorPointerSC) @@ -2296,7 +2330,8 @@ class LLVM_ABI_FOR_TEST VPHeaderPHIRecipe : public VPSingleDefRecipe, protected: VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL = DebugLoc::getUnknown()) - : VPSingleDefRecipe(VPRecipeID, Start, UnderlyingInstr, DL) {} + : VPSingleDefRecipe(VPRecipeID, VPRecipeBase::Wide, Start, + UnderlyingInstr, DL) {} const VPRecipeBase *getAsRecipe() const override { return this; } @@ -2585,7 +2620,8 @@ class LLVM_ABI_FOR_TEST VPWidenPHIRecipe : public VPSingleDefRecipe, /// debug location \p DL and \p Name. VPWidenPHIRecipe(ArrayRef IncomingValues, DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") - : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, IncomingValues, DL), + : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, VPRecipeBase::Wide, + IncomingValues, DL), Name(Name.str()) {} VPWidenPHIRecipe *clone() override { @@ -2780,7 +2816,9 @@ class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPRecipeWithIRFlags { /// all other incoming values are merged into it. VPBlendRecipe(PHINode *Phi, ArrayRef Operands, const VPIRFlags &Flags, DebugLoc DL) - : VPRecipeWithIRFlags(VPRecipeBase::VPBlendSC, Operands, Flags, DL) { + : VPRecipeWithIRFlags(VPRecipeBase::VPBlendSC, + VPRecipeBase::Wide | VPRecipeBase::Narrow, Operands, + Flags, DL) { assert(Operands.size() >= 2 && "Expected at least two operands!"); setUnderlyingValue(Phi); } @@ -2861,8 +2899,8 @@ class LLVM_ABI_FOR_TEST VPInterleaveBase : public VPRecipeBase, ArrayRef Operands, ArrayRef StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL) - : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG), - NeedsMaskForGaps(NeedsMaskForGaps) { + : VPRecipeBase(SC, VPRecipeBase::Wide, Operands, DL), VPIRMetadata(MD), + IG(IG), NeedsMaskForGaps(NeedsMaskForGaps) { // TODO: extend the masked interleaved-group support to reversed access. assert((!Mask || !IG->isReverse()) && "Reversed masked interleave-group not supported."); @@ -3046,13 +3084,15 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags { FastMathFlags FMFs, Instruction *I, ArrayRef Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL) - : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind), - Style(Style) { + : VPRecipeWithIRFlags(SC, VPRecipeBase::Wide, Operands, FMFs, DL), + RdxKind(RdxKind), Style(Style) { if (CondOp) { IsConditional = true; addOperand(CondOp); } setUnderlyingValue(I); + if (!isPartialReduction()) + markVectorToScalar(); } public: @@ -3188,9 +3228,6 @@ class LLVM_ABI_FOR_TEST VPReductionEVLRecipe : public VPReductionRecipe { /// a single scalar, only one copy will be generated. class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { - /// Indicator if only a single replica per lane is needed. - bool IsSingleScalar; - /// Indicator if the replicas are also predicated. bool IsPredicated; @@ -3198,20 +3235,13 @@ class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags, VPReplicateRecipe(Instruction *I, ArrayRef Operands, bool IsSingleScalar, VPValue *Mask = nullptr, const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {}, - DebugLoc DL = DebugLoc::getUnknown()) - : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, Operands, Flags, DL), - VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar), - IsPredicated(Mask) { - setUnderlyingValue(I); - if (Mask) - addOperand(Mask); - } + DebugLoc DL = DebugLoc::getUnknown()); ~VPReplicateRecipe() override = default; VPReplicateRecipe *clone() override { auto *Copy = new VPReplicateRecipe( - getUnderlyingInstr(), operands(), IsSingleScalar, + getUnderlyingInstr(), operands(), producesNarrowResult(), isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc()); Copy->transferFlags(*this); return Copy; @@ -3228,22 +3258,13 @@ class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags, InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override; - bool isSingleScalar() const { return IsSingleScalar; } - bool isPredicated() const { return IsPredicated; } /// Returns true if the recipe only uses the first lane of operand \p Op. bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); - return isSingleScalar(); - } - - /// Returns true if the recipe uses scalars of operand \p Op. - bool usesScalars(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - return true; + return producesNarrowResult(); } /// Return the mask of a predicated VPReplicateRecipe. @@ -3266,7 +3287,8 @@ class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags, class LLVM_ABI_FOR_TEST VPBranchOnMaskRecipe : public VPRecipeBase { public: VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL) - : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, {BlockInMask}, DL) {} + : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, VPRecipeBase::Narrow, + {BlockInMask}, DL) {} VPBranchOnMaskRecipe *clone() override { return new VPBranchOnMaskRecipe(getOperand(0), getDebugLoc()); @@ -3290,13 +3312,6 @@ class LLVM_ABI_FOR_TEST VPBranchOnMaskRecipe : public VPRecipeBase { printOperands(O, SlotTracker); } #endif - - /// Returns true if the recipe uses scalars of operand \p Op. - bool usesScalars(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - return true; - } }; /// A recipe to combine multiple recipes into a single 'expression' recipe, @@ -3434,10 +3449,10 @@ class VPExpressionRecipe : public VPSingleDefRecipe { /// effects. bool mayHaveSideEffects() const; - /// Returns true if the result of this VPExpressionRecipe is a single-scalar. - bool isSingleScalar() const; - protected: + /// Returns true if this recipe produces a scalar result. + bool isVectorToScalar() const; + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void printRecipe(raw_ostream &O, const Twine &Indent, @@ -3455,7 +3470,9 @@ class LLVM_ABI_FOR_TEST VPPredInstPHIRecipe : public VPSingleDefRecipe { /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi /// nodes after merging back from a Branch-on-Mask. VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL) - : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, PredV, DL) {} + : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, + VPRecipeBase::Wide | VPRecipeBase::Narrow, PredV, + DL) {} ~VPPredInstPHIRecipe() override = default; VPPredInstPHIRecipe *clone() override { @@ -3475,13 +3492,6 @@ class LLVM_ABI_FOR_TEST VPPredInstPHIRecipe : public VPSingleDefRecipe { return 0; } - /// Returns true if the recipe uses scalars of operand \p Op. - bool usesScalars(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - return true; - } - protected: #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. @@ -3518,7 +3528,8 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase, std::initializer_list Operands, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL) - : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I), + : VPRecipeBase(SC, VPRecipeBase::Wide, Operands, DL), + VPIRMetadata(Metadata), Ingredient(I), Alignment(getLoadStoreAlignment(&I)), Consecutive(Consecutive) {} public: @@ -3748,7 +3759,9 @@ class VPExpandSCEVRecipe : public VPSingleDefRecipe { public: VPExpandSCEVRecipe(const SCEV *Expr) - : VPSingleDefRecipe(VPRecipeBase::VPExpandSCEVSC, {}), Expr(Expr) {} + : VPSingleDefRecipe(VPRecipeBase::VPExpandSCEVSC, VPRecipeBase::Narrow, + {}), + Expr(Expr) {} ~VPExpandSCEVRecipe() override = default; @@ -3857,7 +3870,8 @@ class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe, public VPUnrollPartAccessor<1> { public: VPWidenCanonicalIVRecipe(VPRegionValue *CanonicalIV) - : VPSingleDefRecipe(VPRecipeBase::VPWidenCanonicalIVSC, {CanonicalIV}) {} + : VPSingleDefRecipe(VPRecipeBase::VPWidenCanonicalIVSC, + VPRecipeBase::Wide, {CanonicalIV}) {} ~VPWidenCanonicalIVRecipe() override = default; @@ -3913,7 +3927,8 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *IV, VPValue *Step) - : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, {Start, IV, Step}), + : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, VPRecipeBase::Narrow, + {Start, IV, Step}), Kind(Kind), FPBinOp(FPBinOp) {} ~VPDerivedIVRecipe() override = default; @@ -3973,8 +3988,9 @@ class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags { VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL) - : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, {IV, Step, VF}, - FMFs, DL), + : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, + VPRecipeBase::ReplicatePart, {IV, Step, VF}, FMFs, + DL), InductionOpcode(Opcode) {} VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 2225dfa310c6c..e214aef2fd1d6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -47,6 +47,14 @@ using VectorParts = SmallVector; #define LV_NAME "loop-vectorize" #define DEBUG_TYPE LV_NAME +bool VPUser::usesScalars(const VPValue *Op) const { + assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); + const VPRecipeBase *R = cast(this); + return (R->producesNarrowResult() && !R->isAgnostic()) || + (!isa(R) && R->couldReplicatePerPart()) || + usesFirstLaneOnly(Op); +} + bool VPRecipeBase::mayWriteToMemory() const { switch (getVPRecipeID()) { case VPExpressionSC: @@ -312,11 +320,6 @@ bool VPRecipeBase::isPhi() const { isa(this); } -bool VPRecipeBase::isScalarCast() const { - auto *VPI = dyn_cast(this); - return VPI && Instruction::isCast(VPI->getOpcode()); -} - void VPIRFlags::intersectFlags(const VPIRFlags &Other) { assert(OpType == Other.OpType && "OpType must match"); switch (OpType) { @@ -423,11 +426,42 @@ template class VPUnrollPartAccessor<2>; template class VPUnrollPartAccessor<3>; } +/// Returns true if \p Opcode preserves uniformity, i.e., if all operands are +/// uniform, the result will also be uniform. +static bool possiblyNarrowOpcode(unsigned Opcode) { + if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode)) + return true; + switch (Opcode) { + case Instruction::Freeze: + case Instruction::GetElementPtr: + case Instruction::ICmp: + case Instruction::FCmp: + case Instruction::Select: + case VPInstruction::Not: + case VPInstruction::MaskedCond: + case VPInstruction::PtrAdd: + return true; + default: + return false; + } +} + VPInstruction::VPInstruction(unsigned Opcode, ArrayRef Operands, const VPIRFlags &Flags, const VPIRMetadata &MD, DebugLoc DL, const Twine &Name) - : VPRecipeWithIRFlags(VPRecipeBase::VPInstructionSC, Operands, Flags, DL), + : VPRecipeWithIRFlags(VPRecipeBase::VPInstructionSC, VPRecipeBase::Wide, + Operands, Flags, DL), VPIRMetadata(MD), Opcode(Opcode), Name(Name.str()) { + if (isSingleScalar()) + markNarrow(); + else if (isVectorToScalar()) + markVectorToScalar(); + else if (Opcode == VPInstruction::Broadcast) + markScalarToVector(); + else if (is_contained({VPInstruction::Unpack, VPInstruction::PtrAdd}, Opcode)) + markReplicatePart(); + if (possiblyNarrowOpcode(Opcode)) + markPossiblyNarrow(); assert(flagsValidForOpcode(getOpcode()) && "Set flags not supported for the provided opcode"); assert(hasRequiredFlagsForOpcode(getOpcode()) && @@ -527,7 +561,8 @@ unsigned VPInstruction::getNumOperandsForOpcode() const { } bool VPInstruction::doesGeneratePerAllLanes() const { - return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this); + return Opcode == VPInstruction::Unpack || + (Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this)); } bool VPInstruction::canGenerateScalarForFirstLane() const { @@ -2324,6 +2359,27 @@ void VPIRFlags::printFlags(raw_ostream &O) const { } #endif +VPWidenRecipe::VPWidenRecipe(Instruction &I, ArrayRef Operands, + const VPIRFlags &Flags, + const VPIRMetadata &Metadata, DebugLoc DL) + : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, VPRecipeBase::Wide, Operands, + Flags, DL), + VPIRMetadata(Metadata), Opcode(I.getOpcode()) { + setUnderlyingValue(&I); + if (possiblyNarrowOpcode(Opcode)) + markPossiblyNarrow(); +} + +VPWidenRecipe::VPWidenRecipe(unsigned Opcode, ArrayRef Operands, + const VPIRFlags &Flags, + const VPIRMetadata &Metadata, DebugLoc DL) + : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, VPRecipeBase::Wide, Operands, + Flags, DL), + VPIRMetadata(Metadata), Opcode(Opcode) { + if (possiblyNarrowOpcode(Opcode)) + markPossiblyNarrow(); +} + void VPWidenRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; switch (Opcode) { @@ -2952,7 +3008,8 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF, VPExpressionRecipe::VPExpressionRecipe( ExpressionTypes ExpressionType, ArrayRef ExpressionRecipes) - : VPSingleDefRecipe(VPRecipeBase::VPExpressionSC, {}, {}), + : VPSingleDefRecipe(VPRecipeBase::VPExpressionSC, VPRecipeBase::Wide, {}, + {}), ExpressionRecipes(ExpressionRecipes), ExpressionType(ExpressionType) { assert(!ExpressionRecipes.empty() && "Nothing to combine?"); assert( @@ -3005,6 +3062,9 @@ VPExpressionRecipe::VPExpressionRecipe( for (auto *R : ExpressionRecipes) for (auto const &[LiveIn, Tmp] : zip(operands(), LiveInPlaceholders)) R->replaceUsesOfWith(LiveIn, Tmp); + + if (isVectorToScalar()) + markVectorToScalar(); } void VPExpressionRecipe::decompose() { @@ -3098,9 +3158,7 @@ bool VPExpressionRecipe::mayHaveSideEffects() const { return false; } -bool VPExpressionRecipe::isSingleScalar() const { - // Cannot use vputils::isSingleScalar(), because all external operands - // of the expression will be live-ins while bundled. +bool VPExpressionRecipe::isVectorToScalar() const { auto *RR = dyn_cast(ExpressionRecipes.back()); return RR && !RR->isPartialReduction(); } @@ -3303,10 +3361,28 @@ static void scalarizeInstruction(const Instruction *Instr, "are defined outside the vectorized region."); } +VPReplicateRecipe::VPReplicateRecipe(Instruction *I, + ArrayRef Operands, + bool IsSingleScalar, VPValue *Mask, + const VPIRFlags &Flags, + VPIRMetadata Metadata, DebugLoc DL) + : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, + IsSingleScalar ? VPRecipeBase::Narrow + : VPRecipeBase::ReplicatePart, + Operands, Flags, DL), + VPIRMetadata(Metadata), IsPredicated(Mask) { + setUnderlyingValue(I); + if (Mask) + addOperand(Mask); + if (possiblyNarrowOpcode(I->getOpcode())) + markPossiblyNarrow(); +} + void VPReplicateRecipe::execute(VPTransformState &State) { assert(!State.Lane && "replicate regions must be dissolved before ::execute"); - assert(IsSingleScalar && "VPReplicateRecipes outside replicate regions " - "must have already been unrolled"); + assert(producesNarrowResult() && + "VPReplicateRecipes outside replicate regions " + "must have already been unrolled"); Instruction *UI = getUnderlyingInstr(); scalarizeInstruction(UI, this, VPLane(0), State); } @@ -3397,7 +3473,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, // transform, avoid computing their cost multiple times for now. Ctx.SkipCostComputation.insert(UI); - if (VF.isScalable() && !isSingleScalar()) + if (VF.isScalable() && !producesNarrowResult()) return InstructionCost::getInvalid(); switch (UI->getOpcode()) { @@ -3443,7 +3519,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, Type *ResultTy = Ctx.Types.inferScalarType(this); InstructionCost ScalarCallCost = Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind); - if (isSingleScalar()) { + if (producesNarrowResult()) { if (CalledFn->isIntrinsic()) ScalarCallCost = std::min( ScalarCallCost, @@ -3473,14 +3549,14 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, case Instruction::FCmp: return getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1), Ctx) * - (isSingleScalar() ? 1 : VF.getFixedValue()); + (producesNarrowResult() ? 1 : VF.getFixedValue()); case Instruction::SDiv: case Instruction::UDiv: case Instruction::SRem: case Instruction::URem: { InstructionCost ScalarCost = getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1), Ctx); - if (isSingleScalar()) + if (producesNarrowResult()) return ScalarCost; // If any of the operands is from a different replicate region and has its @@ -3555,13 +3631,14 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, return UniformCost; } - Type *PtrTy = isSingleScalar() ? ScalarPtrTy : toVectorTy(ScalarPtrTy, VF); + Type *PtrTy = + producesNarrowResult() ? ScalarPtrTy : toVectorTy(ScalarPtrTy, VF); InstructionCost ScalarCost = ScalarMemOpCost + Ctx.TTI.getAddressComputationCost( PtrTy, UsedByLoadStoreAddress ? nullptr : Ctx.PSE.getSE(), PtrSCEV, Ctx.CostKind); - if (isSingleScalar()) + if (producesNarrowResult()) return ScalarCost; SmallVector OpsToScalarize; @@ -3624,7 +3701,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, case Instruction::AddrSpaceCast: { return getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1), Ctx) * - (isSingleScalar() ? 1 : VF.getFixedValue()); + (producesNarrowResult() ? 1 : VF.getFixedValue()); } case Instruction::ExtractValue: case Instruction::InsertValue: @@ -3637,7 +3714,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPReplicateRecipe::printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << Indent << (IsSingleScalar ? "CLONE " : "REPLICATE "); + O << Indent << (producesNarrowResult() ? "CLONE " : "REPLICATE "); if (!getUnderlyingInstr()->getType()->isVoidTy()) { printAsOperand(O, SlotTracker); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 262f4798b3d63..0b30b75eb03e4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -310,7 +310,7 @@ static bool sinkScalarOperands(VPlan &Plan) { return; if (auto *RepR = dyn_cast(Candidate)) - if (!ScalarVFOnly && RepR->isSingleScalar()) + if (!ScalarVFOnly && RepR->producesNarrowResult()) return; WorkList.insert({SinkTo, Candidate}); @@ -520,8 +520,8 @@ static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe, // mask but in the replicate region. auto *RecipeWithoutMask = new VPReplicateRecipe( PredRecipe->getUnderlyingInstr(), drop_end(PredRecipe->operands()), - PredRecipe->isSingleScalar(), nullptr /*Mask*/, *PredRecipe, *PredRecipe, - PredRecipe->getDebugLoc()); + PredRecipe->producesNarrowResult(), nullptr /*Mask*/, *PredRecipe, + *PredRecipe, PredRecipe->getDebugLoc()); auto *Pred = Plan.createVPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask); auto *Exiting = Plan.createVPBasicBlock(Twine(RegionName) + ".continue"); @@ -908,7 +908,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) { // Skip recipes that shouldn't be narrowed. if (!Def || !isa(Def) || Def->getNumUsers() == 0 || !Def->getUnderlyingValue() || - (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))) + (RepR && (RepR->producesNarrowResult() || RepR->isPredicated()))) continue; // Skip recipes that may have other lanes than their first used. @@ -1859,7 +1859,7 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { if (!isa(&R)) continue; auto *RepR = dyn_cast(&R); - if (RepR && (RepR->isSingleScalar() || RepR->isPredicated())) + if (RepR && (RepR->producesNarrowResult() || RepR->isPredicated())) continue; auto *RepOrWidenR = cast(&R); @@ -1911,7 +1911,7 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { auto *IRV = dyn_cast(Op); bool LiveInNeedsBroadcast = IRV && !isa(IRV->getValue()); auto *OpR = dyn_cast(Op); - return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar()); + return LiveInNeedsBroadcast || (OpR && OpR->producesNarrowResult()); })) continue; @@ -2523,7 +2523,7 @@ static void licm(VPlan &Plan) { // replicates to single-scalar replicates. // TODO: When unrolling, replicateByVF doesn't handle sunk // non-single-scalar replicates correctly. - if (!RepR->isSingleScalar()) + if (!RepR->producesNarrowResult()) continue; } @@ -4600,7 +4600,7 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) { for (VPRecipeBase &R : *VPBB) { // Only handle single-scalar replicated loads with invariant addresses. if (auto *RepR = dyn_cast(&R)) { - if (RepR->isPredicated() || !RepR->isSingleScalar() || + if (RepR->isPredicated() || !RepR->producesNarrowResult() || RepR->getOpcode() != Instruction::Load) continue; @@ -4742,18 +4742,18 @@ void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, // Find the load with minimum alignment to use. auto *LoadWithMinAlign = findRecipeWithMinAlign(Group); - bool IsSingleScalar = EarliestLoad->isSingleScalar(); + bool producesNarrowResult = EarliestLoad->producesNarrowResult(); assert(all_of(Group, - [IsSingleScalar](VPReplicateRecipe *R) { - return R->isSingleScalar() == IsSingleScalar; + [producesNarrowResult](VPReplicateRecipe *R) { + return R->producesNarrowResult() == producesNarrowResult; }) && - "all members in group must agree on IsSingleScalar"); + "all members in group must agree on narrowing"); // Create an unpredicated version of the earliest load with common // metadata. auto *UnpredicatedLoad = new VPReplicateRecipe( LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)}, - IsSingleScalar, /*Mask=*/nullptr, *EarliestLoad, CommonMetadata); + producesNarrowResult, /*Mask=*/nullptr, *EarliestLoad, CommonMetadata); UnpredicatedLoad->insertBefore(EarliestLoad); @@ -4810,10 +4810,10 @@ void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, VPValue *SelectedValue = Group[0]->getOperand(0); VPBuilder Builder(InsertBB, LastStore->getIterator()); - bool IsSingleScalar = Group[0]->isSingleScalar(); + bool producesNarrowResult = Group[0]->producesNarrowResult(); for (unsigned I = 1; I < Group.size(); ++I) { - assert(IsSingleScalar == Group[I]->isSingleScalar() && - "all members in group must agree on IsSingleScalar"); + assert(producesNarrowResult == Group[I]->producesNarrowResult() && + "all members in group must agree on narrowing"); VPValue *Mask = Group[I]->getMask(); VPValue *Value = Group[I]->getOperand(0); SelectedValue = Builder.createSelect(Mask, Value, SelectedValue, @@ -4826,7 +4826,7 @@ void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, // Create unconditional store with selected value and common metadata. auto *UnpredicatedStore = new VPReplicateRecipe( StoreWithMinAlign->getUnderlyingInstr(), - {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar, + {SelectedValue, LastStore->getOperand(1)}, producesNarrowResult, /*Mask=*/nullptr, *LastStore, CommonMetadata); UnpredicatedStore->insertBefore(*InsertBB, LastStore->getIterator()); @@ -4901,18 +4901,15 @@ void VPlanTransforms::materializePacksAndUnpacks(VPlan &Plan) { for (VPBasicBlock *VPBB : concat(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { - if (!isa(&R)) + if (!R.couldReplicatePerPart()) continue; auto *DefR = cast(&R); auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) { VPRegionBlock *ParentRegion = cast(U)->getRegion(); return !U->usesScalars(DefR) || ParentRegion != LoopRegion; }; - if ((isa(DefR) && - cast(DefR)->isSingleScalar()) || - (isa(DefR) && - (vputils::onlyFirstLaneUsed(DefR) || - !cast(DefR)->doesGeneratePerAllLanes())) || + if ((isa(DefR) && + !cast(DefR)->doesGeneratePerAllLanes()) || none_of(DefR->users(), UsesVectorOrInsideReplicateRegion)) continue; @@ -5236,7 +5233,7 @@ static bool isAlreadyNarrow(VPValue *VPV) { if (isa(VPV)) return true; auto *RepR = dyn_cast(VPV); - return RepR && RepR->isSingleScalar(); + return RepR && RepR->producesNarrowResult(); } // Convert a wide recipe defining a VPValue \p V feeding an interleave group to @@ -5272,7 +5269,8 @@ narrowInterleaveGroupOp(VPValue *V, SmallPtrSetImpl &NarrowedOps) { } if (auto *RepR = dyn_cast(R)) { - assert(RepR->isSingleScalar() && RepR->getOpcode() == Instruction::Load && + assert(RepR->producesNarrowResult() && + RepR->getOpcode() == Instruction::Load && "must be a single scalar load"); NarrowedOps.insert(RepR); return RepR; diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index f1b9efae08377..006ed3d57a2b2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -940,12 +940,9 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { SmallVector ToRemove; for (VPBasicBlock *VPBB : VPBBsToUnroll) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { - if (!isa(&R) || - (isa(&R) && - cast(&R)->isSingleScalar()) || + if (!R.couldReplicatePerPart() || (isa(&R) && - !cast(&R)->doesGeneratePerAllLanes() && - cast(&R)->getOpcode() != VPInstruction::Unpack)) + !cast(&R)->doesGeneratePerAllLanes())) continue; auto *DefR = cast(&R); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index a60b490a69ce6..cd67dd591b147 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -338,62 +338,25 @@ bool vputils::isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE, match(Addr, m_scev_AffineAddRec(m_SCEV(), m_SCEV())); } -/// Returns true if \p Opcode preserves uniformity, i.e., if all operands are -/// uniform, the result will also be uniform. -static bool preservesUniformity(unsigned Opcode) { - if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode)) - return true; - switch (Opcode) { - case Instruction::Freeze: - case Instruction::GetElementPtr: - case Instruction::ICmp: - case Instruction::FCmp: - case Instruction::Select: - case VPInstruction::Not: - case VPInstruction::Broadcast: - case VPInstruction::MaskedCond: - case VPInstruction::PtrAdd: - return true; - default: - return false; - } -} - bool vputils::isSingleScalar(const VPValue *VPV) { // Live-in, symbolic and region-values represent single-scalar values. if (isa(VPV)) return true; - if (auto *Rep = dyn_cast(VPV)) { + // Use WideningInfo present directly on the recipe. + const VPRecipeBase *R = VPV->getDefiningRecipe(); + if (R->producesNarrowResult() || R->isAgnostic()) + return true; + + if (auto *Rep = dyn_cast(R)) { const VPRegionBlock *RegionOfR = Rep->getRegion(); // Don't consider recipes in replicate regions as uniform yet; their first // lane cannot be accessed when executing the replicate region for other // lanes. if (RegionOfR && RegionOfR->isReplicator()) return false; - return Rep->isSingleScalar() || (preservesUniformity(Rep->getOpcode()) && - all_of(Rep->operands(), isSingleScalar)); - } - if (isa(VPV)) - return all_of(VPV->getDefiningRecipe()->operands(), isSingleScalar); - if (auto *WidenR = dyn_cast(VPV)) { - return preservesUniformity(WidenR->getOpcode()) && - all_of(WidenR->operands(), isSingleScalar); } - if (auto *VPI = dyn_cast(VPV)) - return VPI->isSingleScalar() || VPI->isVectorToScalar() || - (preservesUniformity(VPI->getOpcode()) && - all_of(VPI->operands(), isSingleScalar)); - if (auto *RR = dyn_cast(VPV)) - return !RR->isPartialReduction(); - if (isa( - VPV)) - return true; - if (auto *Expr = dyn_cast(VPV)) - return Expr->isSingleScalar(); - - // VPExpandSCEVRecipes must be placed in the entry and are always uniform. - return isa(VPV); + return R->couldProduceNarrowResult() && all_of(R->operands(), isSingleScalar); } bool vputils::isUniformAcrossVFsAndUFs(const VPValue *V) { @@ -419,19 +382,15 @@ bool vputils::isUniformAcrossVFsAndUFs(const VPValue *V) { // Be conservative about side-effects, except for the // known-side-effecting assumes and stores, which we know will be // uniform. - return R->isSingleScalar() && + return R->producesNarrowResult() && (!R->mayHaveSideEffects() || isa(R->getUnderlyingInstr())) && all_of(R->operands(), isUniformAcrossVFsAndUFs); }) - .Case([](const VPWidenRecipe *R) { - return preservesUniformity(R->getOpcode()) && + .Case([](const auto *R) { + return (R->producesNarrowResult() || R->isAgnostic()) && all_of(R->operands(), isUniformAcrossVFsAndUFs); }) - .Case([](const VPInstruction *VPI) { - return preservesUniformity(VPI->getOpcode()) && - all_of(VPI->operands(), isUniformAcrossVFsAndUFs); - }) .Case([](const VPWidenCastRecipe *R) { // A cast is uniform according to its operand. return isUniformAcrossVFsAndUFs(R->getOperand(0)); diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 8f9806adf774e..9ad05bddf1c1e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -399,13 +399,8 @@ class VPUser { return const_operand_range(op_begin(), op_end()); } - /// Returns true if the VPUser uses scalars of operand \p Op. Conservatively - /// returns if only first (scalar) lane is used, as default. - virtual bool usesScalars(const VPValue *Op) const { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - return usesFirstLaneOnly(Op); - } + /// Returns true if the VPUser uses scalars of operand \p Op. + bool usesScalars(const VPValue *Op) const; /// Returns true if the VPUser only uses the first lane of operand \p Op. /// Conservatively returns false. diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index a1ddda7eda969..dd9c1332d54e2 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -1697,7 +1697,8 @@ TEST_F(VPRecipeTest, CastVPReductionEVLRecipeToVPUser) { } // namespace struct VPDoubleValueDef : public VPRecipeBase { - VPDoubleValueDef(ArrayRef Operands) : VPRecipeBase(99, Operands) { + VPDoubleValueDef(ArrayRef Operands) + : VPRecipeBase(99, VPRecipeBase::Wide, Operands) { new VPRecipeValue(this); new VPRecipeValue(this); }