diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index cdf9e8b9dede0..76f57165322a8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -622,7 +622,8 @@ unsigned VPInstruction::getNumOperandsForOpcode() const { } bool VPInstruction::doesGeneratePerAllLanes() const { - return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this); + return Opcode == VPInstruction::Unpack || + (Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this)); } bool VPInstruction::canGenerateScalarForFirstLane() const { diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 65ad0feaa71c9..5ee3de315697b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -945,8 +945,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { (isa(&R) && cast(&R)->isSingleScalar()) || (isa(&R) && - !cast(&R)->doesGeneratePerAllLanes() && - cast(&R)->getOpcode() != VPInstruction::Unpack)) + !cast(&R)->doesGeneratePerAllLanes())) continue; auto *DefR = cast(&R); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 9750cba584f82..863234a44ece8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -365,11 +365,43 @@ bool vputils::isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE, match(Addr, m_scev_AffineAddRec(m_SCEV(), m_SCEV())); } -/// Returns true if \p Opcode preserves uniformity, i.e., if all operands are -/// uniform, the result will also be uniform. -static bool preservesUniformity(unsigned Opcode) { +/// A class keeping track of widening information of various recipes. +/// A recipe necessarily produces a single scalar value if only the SingleScalar +/// bit is set, a wide value if only the Wide bit is set, and scalar values for +/// all lanes only the Replicate bit is set. The SingleScalar bit can be set on +/// Wide or Replicate recipes, which indicates that the recipe could be narrowed +/// to single-scalar if legal and profitable. For instructions not producing +/// values, like an assume or store, the bits talk about the inherent widening +/// of the recipe. Finally, there is a class of instructions that necessarily +/// take vector operands and produce a scalar result, like +/// (Insert|Extract)Element, or necessarily take a scalar values and produce a +/// vector, like Build(Struct)Vector: there is no widening decision to make +/// on this class, and it is marked with the Agnostic bit. Broadcast is a +/// special-case of a scalar-to-vector which consumes a single-scalar and +/// produces a vector. +class VPWideningInfo { + unsigned char Info : 4; + +public: + using VPWideningTy = enum { + SingleScalar = 1 << 0, + Wide = 1 << 1, + Replicate = 1 << 2, + Agnostic = 1 << 3 + }; + + VPWideningInfo(unsigned char Info) : Info(Info) {} + operator unsigned char() const { return Info; } + bool producesSingleScalarResult() const { + return !(Info & (Wide | Replicate)); + } + bool couldProduceSingleScalarResult() const { return Info & SingleScalar; } +}; + +static VPWideningInfo getNarrowableWideningInfo(unsigned Opcode, + VPWideningInfo WideOrRep) { if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode)) - return true; + return WideOrRep | VPWideningInfo::SingleScalar; switch (Opcode) { case Instruction::Freeze: case Instruction::GetElementPtr: @@ -377,20 +409,103 @@ static bool preservesUniformity(unsigned Opcode) { case Instruction::FCmp: case Instruction::Select: case VPInstruction::Not: - case VPInstruction::Broadcast: case VPInstruction::MaskedCond: case VPInstruction::PtrAdd: - return true; + return WideOrRep | VPWideningInfo::SingleScalar; default: - return false; + return WideOrRep; } } -bool vputils::isSingleScalar(const VPValue *VPV) { - // Live-in, symbolic and region-values represent single-scalar values. - if (isa(VPV)) - return true; +static VPWideningInfo getWideningInfo(const VPRecipeBase &R) { + switch (R.getVPRecipeID()) { + case VPRecipeBase::VPVectorPointerSC: + case VPRecipeBase::VPVectorEndPointerSC: + case VPRecipeBase::VPDerivedIVSC: + case VPRecipeBase::VPExpandSCEVSC: + case VPRecipeBase::VPIRInstructionSC: + case VPRecipeBase::VPBranchOnMaskSC: + return VPWideningInfo::SingleScalar; + case VPRecipeBase::VPScalarIVStepsSC: + return VPWideningInfo::Replicate; + case VPRecipeBase::VPWidenCastSC: + case VPRecipeBase::VPWidenGEPSC: + case VPRecipeBase::VPPredInstPHISC: + case VPRecipeBase::VPBlendSC: + return VPWideningInfo::Wide | VPWideningInfo::SingleScalar; + case VPRecipeBase::VPInstructionSC: { + auto *VPI = cast(&R); + if (VPI->isVectorToScalar()) + return VPWideningInfo::SingleScalar | VPWideningInfo::Agnostic; + // Broadcast is a single-scalar to vector. + if (VPI->getOpcode() == VPInstruction::Broadcast) + return VPWideningInfo::Wide | VPWideningInfo::SingleScalar | + VPWideningInfo::Agnostic; + // Build(Struct)Vector take multiple scalars are produce a vector. + if (is_contained( + {VPInstruction::BuildStructVector, VPInstruction::BuildVector}, + VPI->getOpcode())) + return VPWideningInfo::Wide | VPWideningInfo::Agnostic; + if (VPI->isSingleScalar()) + return VPWideningInfo::SingleScalar; + if (VPI->doesGeneratePerAllLanes()) + return VPWideningInfo::Replicate; + return getNarrowableWideningInfo(VPI->getOpcode(), VPWideningInfo::Wide); + } + case VPRecipeBase::VPExpressionSC: { + auto *Expr = cast(&R); + return Expr->isVectorToScalar() + ? (VPWideningInfo::SingleScalar | VPWideningInfo::Agnostic) + : VPWideningInfo::Wide; + } + case VPRecipeBase::VPReductionSC: + case VPRecipeBase::VPReductionEVLSC: { + auto *Red = cast(&R); + return Red->isPartialReduction() + ? VPWideningInfo::Wide + : (VPWideningInfo::SingleScalar | VPWideningInfo::Agnostic); + } + case VPRecipeBase::VPReplicateSC: { + auto *Rep = cast(&R); + if (Rep->isSingleScalar()) + return VPWideningInfo::SingleScalar; + return getNarrowableWideningInfo(Rep->getOpcode(), + VPWideningInfo::Replicate); + } + case VPRecipeBase::VPWidenSC: { + auto *Wide = cast(&R); + return getNarrowableWideningInfo(Wide->getOpcode(), VPWideningInfo::Wide); + } + case VPRecipeBase::VPWidenCanonicalIVSC: + case VPRecipeBase::VPWidenPHISC: + case VPRecipeBase::VPWidenCallSC: + case VPRecipeBase::VPWidenIntrinsicSC: + case VPRecipeBase::VPWidenMemIntrinsicSC: + case VPRecipeBase::VPWidenLoadSC: + case VPRecipeBase::VPWidenLoadEVLSC: + case VPRecipeBase::VPWidenStoreSC: + case VPRecipeBase::VPWidenStoreEVLSC: + case VPRecipeBase::VPInterleaveSC: + case VPRecipeBase::VPInterleaveEVLSC: + case VPRecipeBase::VPHistogramSC: + case VPRecipeBase::VPCurrentIterationPHISC: + case VPRecipeBase::VPActiveLaneMaskPHISC: + case VPRecipeBase::VPFirstOrderRecurrencePHISC: + case VPRecipeBase::VPWidenIntOrFpInductionSC: + case VPRecipeBase::VPWidenPointerInductionSC: + case VPRecipeBase::VPReductionPHISC: + return VPWideningInfo::Wide; + } + llvm_unreachable("Fell off end of switch: unknown recipe class"); +} + +static VPWideningInfo getWideningInfo(const VPValue *VPV) { + if (!VPV->hasDefiningRecipe()) + return VPWideningInfo::SingleScalar; + return getWideningInfo(*VPV->getDefiningRecipe()); +} +bool vputils::isSingleScalar(const VPValue *VPV) { if (auto *Rep = dyn_cast(VPV)) { const VPRegionBlock *RegionOfR = Rep->getRegion(); // Don't consider recipes in replicate regions as uniform yet; their first @@ -398,29 +513,13 @@ bool vputils::isSingleScalar(const VPValue *VPV) { // lanes. if (RegionOfR && RegionOfR->isReplicator()) return false; - return Rep->isSingleScalar() || (preservesUniformity(Rep->getOpcode()) && - all_of(Rep->operands(), isSingleScalar)); - } - if (isa(VPV)) - return all_of(VPV->getDefiningRecipe()->operands(), isSingleScalar); - if (auto *WidenR = dyn_cast(VPV)) { - return preservesUniformity(WidenR->getOpcode()) && - all_of(WidenR->operands(), isSingleScalar); } - if (auto *VPI = dyn_cast(VPV)) - return VPI->isSingleScalar() || VPI->isVectorToScalar() || - (preservesUniformity(VPI->getOpcode()) && - all_of(VPI->operands(), isSingleScalar)); - if (auto *RR = dyn_cast(VPV)) - return !RR->isPartialReduction(); - if (isa( - VPV)) - return true; - if (auto *Expr = dyn_cast(VPV)) - return Expr->isVectorToScalar(); - - // VPExpandSCEVRecipes must be placed in the entry and are always uniform. - return isa(VPV); + // FIXME: Marking WidenCast as a single-scalar leads to regressions. + VPWideningInfo Info = getWideningInfo(VPV); + return Info.producesSingleScalarResult() || + (!isa(VPV) && + Info.couldProduceSingleScalarResult() && + all_of(VPV->getDefiningRecipe()->operands(), isSingleScalar)); } bool vputils::isUniformAcrossVFsAndUFs(const VPValue *V) { @@ -428,50 +527,38 @@ bool vputils::isUniformAcrossVFsAndUFs(const VPValue *V) { if (isa(V)) return true; - const VPRecipeBase *R = V->getDefiningRecipe(); - const VPBasicBlock *VPBB = R ? R->getParent() : nullptr; - const VPlan *Plan = VPBB ? VPBB->getPlan() : nullptr; - if (VPBB) { - if ((VPBB == Plan->getVectorPreheader() || VPBB == Plan->getEntry())) { - if (match(V->getDefiningRecipe(), + // Bail out on VPPhi, as we can end up in infinite cycles. + if (isa(V)) + return false; + + if (const VPRecipeBase *R = V->getDefiningRecipe()) { + const VPBasicBlock *VPBB = R->getParent(); + const VPlan *Plan = VPBB->getPlan(); + if (VPBB == Plan->getVectorPreheader() || VPBB == Plan->getEntry()) { + if (match(R, m_VPInstruction())) return false; return all_of(R->operands(), isUniformAcrossVFsAndUFs); } + if (auto *RepR = dyn_cast(R)) { + // Be conservative about side-effects, except for the + // known-side-effecting assumes and stores, which we know will be + // uniform. + return RepR->isSingleScalar() && + (!RepR->mayHaveSideEffects() || + isa(RepR->getUnderlyingInstr())) && + all_of(RepR->operands(), isUniformAcrossVFsAndUFs); + } } - return TypeSwitch(R) - .Case([](const VPDerivedIVRecipe *R) { return true; }) - .Case([](const VPReplicateRecipe *R) { - // Be conservative about side-effects, except for the - // known-side-effecting assumes and stores, which we know will be - // uniform. - return R->isSingleScalar() && - (!R->mayHaveSideEffects() || - isa(R->getUnderlyingInstr())) && - all_of(R->operands(), isUniformAcrossVFsAndUFs); - }) - .Case([](const VPWidenRecipe *R) { - return preservesUniformity(R->getOpcode()) && - all_of(R->operands(), isUniformAcrossVFsAndUFs); - }) - .Case([](const VPPhi *) { - // Bail out on VPPhi, as we can end up in infinite cycles. - return false; - }) - .Case([](const VPInstruction *VPI) { - return (VPI->isSingleScalar() || VPI->isVectorToScalar() || - preservesUniformity(VPI->getOpcode())) && - all_of(VPI->operands(), isUniformAcrossVFsAndUFs); - }) - .Case([](const VPWidenCastRecipe *R) { - // A cast is uniform according to its operand. - return isUniformAcrossVFsAndUFs(R->getOperand(0)); - }) - .Default([](const VPRecipeBase *) { // A value is considered non-uniform - // unless proven otherwise. - return false; - }); + // TODO: Match more recipes. + if (!isa( + V)) + return false; + + VPWideningInfo Info = getWideningInfo(V); + return Info.couldProduceSingleScalarResult() && + all_of(V->getDefiningRecipe()->operands(), isUniformAcrossVFsAndUFs); } VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) { diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index b4ba5f8c50ae4..211460a2af037 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -1859,40 +1859,52 @@ using VPUtilsTest = VPlanTestBase; TEST_F(VPUtilsTest, IsUniformAcrossVFsAndUFsForSingleScalarOpcodes) { VPlan &Plan = getPlan(); + VPBasicBlock *Header = Plan.createVPBasicBlock("vector.header"); + VPBasicBlock *Latch = Plan.createVPBasicBlock("vector.latch"); + VPValue *UF = &Plan.getUF(); + Type *IVTy = UF->getScalarType(); + VPRegionBlock *LoopRegion = Plan.createLoopRegion( + IVTy, DebugLoc::getUnknown(), "vector.loop", Header, Latch); + VPBlockUtils::connectBlocks(Header, Latch); + VPBlockUtils::connectBlocks(Plan.getEntry(), LoopRegion); + VPBlockUtils::connectBlocks(LoopRegion, Plan.getScalarHeader()); // isSingleScalar opcode without operands. - std::unique_ptr VScale(new VPInstructionWithType( - VPInstruction::VScale, {}, IntegerType::get(C, 32))); - EXPECT_TRUE(vputils::isUniformAcrossVFsAndUFs(VScale.get())); + auto *VScale = new VPInstructionWithType(VPInstruction::VScale, {}, + IntegerType::get(C, 32)); + Latch->appendRecipe(VScale); + EXPECT_TRUE(vputils::isUniformAcrossVFsAndUFs(VScale)); // isSingleScalar opcode with a uniform operand. - std::unique_ptr EVL( - new VPInstruction(VPInstruction::ExplicitVectorLength, {&Plan.getVF()})); - EXPECT_TRUE(vputils::isUniformAcrossVFsAndUFs(EVL.get())); + auto *EVL = + new VPInstruction(VPInstruction::ExplicitVectorLength, &Plan.getVF()); + Latch->appendRecipe(EVL); + EXPECT_TRUE(vputils::isUniformAcrossVFsAndUFs(EVL)); // isVectorToScalar opcode with a uniform operand. - std::unique_ptr FirstActiveLane( - new VPInstructionWithType(VPInstruction::FirstActiveLane, {&Plan.getVF()}, - IntegerType::get(C, 32))); - EXPECT_TRUE(vputils::isUniformAcrossVFsAndUFs(FirstActiveLane.get())); + auto *FirstActiveLane = new VPInstructionWithType( + VPInstruction::FirstActiveLane, &Plan.getVF(), IntegerType::get(C, 32)); + Latch->appendRecipe(FirstActiveLane); + EXPECT_TRUE(vputils::isUniformAcrossVFsAndUFs(FirstActiveLane)); // StepVector produces a distinct value per lane and is non-uniform; use it // as the non-single-scalar operand in the negative cases below. - std::unique_ptr StepVector(new VPInstructionWithType( - VPInstruction::StepVector, {}, IntegerType::get(C, 32))); - EXPECT_FALSE(vputils::isUniformAcrossVFsAndUFs(StepVector.get())); + auto *StepVector = new VPInstructionWithType(VPInstruction::StepVector, {}, + IntegerType::get(C, 32)); + Latch->appendRecipe(StepVector); + EXPECT_FALSE(vputils::isUniformAcrossVFsAndUFs(StepVector)); // isSingleScalar opcode with a non-single-scalar operand. - std::unique_ptr EVLNonUniform(new VPInstruction( - VPInstruction::ExplicitVectorLength, {StepVector.get()})); - EXPECT_FALSE(vputils::isUniformAcrossVFsAndUFs(EVLNonUniform.get())); + auto *EVLNonUniform = + new VPInstruction(VPInstruction::ExplicitVectorLength, StepVector); + Latch->appendRecipe(EVLNonUniform); + EXPECT_FALSE(vputils::isUniformAcrossVFsAndUFs(EVLNonUniform)); // isVectorToScalar opcode with a non-single-scalar operand. - std::unique_ptr FirstActiveLaneNonUniform( - new VPInstructionWithType(VPInstruction::FirstActiveLane, - {StepVector.get()}, IntegerType::get(C, 32))); - EXPECT_FALSE( - vputils::isUniformAcrossVFsAndUFs(FirstActiveLaneNonUniform.get())); + auto *FirstActiveLaneNonUniform = new VPInstructionWithType( + VPInstruction::FirstActiveLane, StepVector, IntegerType::get(C, 32)); + Latch->appendRecipe(FirstActiveLaneNonUniform); + EXPECT_FALSE(vputils::isUniformAcrossVFsAndUFs(FirstActiveLaneNonUniform)); } #if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)