diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0debab4a2a0ee..f68d3ad2a74fb 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7371,21 +7371,39 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( vputils::findRecipe(BackedgeVal, IsaPred)); if (!EpiRedHeaderPhi) { match(BackedgeVal, - VPlanPatternMatch::m_Select(VPlanPatternMatch::m_VPValue(), - VPlanPatternMatch::m_VPValue(BackedgeVal), - VPlanPatternMatch::m_VPValue())); - EpiRedHeaderPhi = cast( + m_Select(m_VPValue(), m_VPValue(BackedgeVal), m_VPValue())); + EpiRedHeaderPhi = cast_if_present( vputils::findRecipe(BackedgeVal, IsaPred)); } + // Look through Broadcast or ReductionStartVector to get the underlying + // start value. + auto GetStartValue = [](VPValue *V) -> Value * { + VPValue *Start; + if (match(V, m_VPInstruction( + m_VPValue(Start), m_VPValue(), m_VPValue())) || + match(V, m_Broadcast(m_VPValue(Start)))) + return Start->getUnderlyingValue(); + return V->getUnderlyingValue(); + }; + Value *MainResumeValue; - if (auto *VPI = dyn_cast(EpiRedHeaderPhi->getStartValue())) { - assert((VPI->getOpcode() == VPInstruction::Broadcast || - VPI->getOpcode() == VPInstruction::ReductionStartVector) && - "unexpected start recipe"); - MainResumeValue = VPI->getOperand(0)->getUnderlyingValue(); - } else - MainResumeValue = EpiRedHeaderPhi->getStartValue()->getUnderlyingValue(); + if (EpiRedHeaderPhi) { + MainResumeValue = GetStartValue(EpiRedHeaderPhi->getStartValue()); + } else { + // The epilogue vector loop was dissolved (single-iteration). The + // reduction header phi was replaced by its start value. Look for a + // Broadcast or ReductionStartVector in BackedgeVal or its operands. + Value *FromOperand = nullptr; + if (auto *BackedgeR = BackedgeVal->getDefiningRecipe()) { + auto *It = find_if(BackedgeR->operands(), [&](VPValue *Op) { + return GetStartValue(Op) != Op->getUnderlyingValue(); + }); + if (It != BackedgeR->op_end()) + FromOperand = GetStartValue(*It); + } + MainResumeValue = FromOperand ? FromOperand : GetStartValue(BackedgeVal); + } if (EpiRedResult->getOpcode() == VPInstruction::ComputeAnyOfResult) { [[maybe_unused]] Value *StartV = EpiRedResult->getOperand(0)->getLiveInIRValue(); @@ -7467,6 +7485,8 @@ DenseMap LoopVectorizationPlanner::executePlan( VPlanTransforms::expandBranchOnTwoConds(BestVPlan); // Convert loops with variable-length stepping after regions are dissolved. VPlanTransforms::convertToVariableLengthStep(BestVPlan); + // Remove dead edges for single-iteration loops with BranchOnCond(true). + VPlanTransforms::removeBranchOnConst(BestVPlan); VPlanTransforms::materializeBackedgeTakenCount(BestVPlan, VectorPH); VPlanTransforms::materializeVectorTripCount( BestVPlan, VectorPH, CM.foldTailByMasking(), @@ -7474,6 +7494,7 @@ DenseMap LoopVectorizationPlanner::executePlan( VPlanTransforms::materializeFactors(BestVPlan, VectorPH, BestVF); VPlanTransforms::cse(BestVPlan); VPlanTransforms::simplifyRecipes(BestVPlan); + VPlanTransforms::simplifyKnownEVL(BestVPlan, BestVF, PSE); // 0. Generate SCEV-dependent code in the entry, including TripCount, before // making any changes to the CFG. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index eb1b51c40cec4..4343c13d795b5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -128,6 +128,7 @@ bool VPRecipeBase::mayReadFromMemory() const { return cast(this)->mayReadFromMemory(); case VPBranchOnMaskSC: case VPDerivedIVSC: + case VPCurrentIterationPHISC: case VPFirstOrderRecurrencePHISC: case VPReductionPHISC: case VPPredInstPHISC: @@ -165,6 +166,7 @@ bool VPRecipeBase::mayHaveSideEffects() const { return cast(this)->mayHaveSideEffects(); case VPActiveLaneMaskPHISC: case VPDerivedIVSC: + case VPCurrentIterationPHISC: case VPFirstOrderRecurrencePHISC: case VPReductionPHISC: case VPPredInstPHISC: diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 4c393f2f6f05a..6ca0e1864409a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1469,12 +1469,6 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) { return; } - if (auto *Phi = dyn_cast(Def)) { - if (Phi->getOperand(0) == Phi->getOperand(1)) - Phi->replaceAllUsesWith(Phi->getOperand(0)); - return; - } - // Simplify MaskedCond with no block mask to its single operand. if (match(Def, m_VPInstruction()) && !cast(Def)->isMasked()) @@ -1523,9 +1517,15 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) { return; } - if (isa(Def)) { - if (Def->getNumOperands() == 1) + if (isa(Def)) { + if (Def->getNumOperands() == 1) { Def->replaceAllUsesWith(Def->getOperand(0)); + return; + } + if (auto *Phi = dyn_cast(Def)) { + if (all_equal(Phi->incoming_values())) + Phi->replaceAllUsesWith(Phi->getOperand(0)); + } return; } @@ -2097,72 +2097,16 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, return false; } - // The vector loop region only executes once. If possible, completely remove - // the region, otherwise replace the terminator controlling the latch with - // (BranchOnCond true). - // TODO: VPWidenIntOrFpInductionRecipe is only partially supported; add - // support for other non-canonical widen induction recipes (e.g., - // VPWidenPointerInductionRecipe). - // TODO: fold branch-on-constant after dissolving region. - auto *Header = cast(VectorRegion->getEntry()); - if (all_of(Header->phis(), [](VPRecipeBase &Phi) { - if (auto *R = dyn_cast(&Phi)) - return R->isCanonical(); - return isa(&Phi); - })) { - for (VPRecipeBase &HeaderR : make_early_inc_range(Header->phis())) { - if (auto *R = dyn_cast(&HeaderR)) { - VPBuilder Builder(Plan.getVectorPreheader()); - VPValue *StepV = Builder.createNaryOp(VPInstruction::StepVector, {}, - R->getScalarType()); - HeaderR.getVPSingleValue()->replaceAllUsesWith(StepV); - HeaderR.eraseFromParent(); - continue; - } - auto *Phi = cast(&HeaderR); - HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0)); - HeaderR.eraseFromParent(); - } - - VPBlockBase *Preheader = VectorRegion->getSinglePredecessor(); - SmallVector Exits = to_vector(VectorRegion->getSuccessors()); - VPBlockUtils::disconnectBlocks(Preheader, VectorRegion); - for (VPBlockBase *Exit : Exits) - VPBlockUtils::disconnectBlocks(VectorRegion, Exit); - - for (VPBlockBase *B : vp_depth_first_shallow(VectorRegion->getEntry())) - B->setParent(nullptr); - - VPBlockUtils::connectBlocks(Preheader, Header); - - for (VPBlockBase *Exit : Exits) - VPBlockUtils::connectBlocks(ExitingVPBB, Exit); - - // Replace terminating branch-on-two-conds with branch-on-cond to early - // exit. - if (Exits.size() != 1) { - assert(match(Term, m_BranchOnTwoConds()) && Exits.size() == 2 && - "BranchOnTwoConds needs 2 remaining exits"); - VPBuilder(Term).createNaryOp(VPInstruction::BranchOnCond, - Term->getOperand(0)); - } - VPlanTransforms::simplifyRecipes(Plan); - } else { - // The vector region contains header phis for which we cannot remove the - // loop region yet. - - // For BranchOnTwoConds, set the latch exit condition to true directly. - if (match(Term, m_BranchOnTwoConds())) { - Term->setOperand(1, Plan.getTrue()); - return true; - } - - auto *BOC = new VPInstruction(VPInstruction::BranchOnCond, {Plan.getTrue()}, - {}, {}, Term->getDebugLoc()); - ExitingVPBB->appendRecipe(BOC); + // The vector loop region only executes once. Convert terminator of the + // exiting block to exit in the first iteration. + if (match(Term, m_BranchOnTwoConds())) { + Term->setOperand(1, Plan.getTrue()); + return true; } + auto *BOC = new VPInstruction(VPInstruction::BranchOnCond, Plan.getTrue(), {}, + {}, Term->getDebugLoc()); + ExitingVPBB->appendRecipe(BOC); Term->eraseFromParent(); return true; @@ -2170,8 +2114,8 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, /// From the definition of llvm.experimental.get.vector.length, /// VPInstruction::ExplicitVectorLength(%AVL) = %AVL when %AVL <= VF. -static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF, - PredicatedScalarEvolution &PSE) { +bool VPlanTransforms::simplifyKnownEVL(VPlan &Plan, ElementCount VF, + PredicatedScalarEvolution &PSE) { for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_deep(Plan.getEntry()))) { for (VPRecipeBase &R : *VPBB) { @@ -2206,7 +2150,6 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, bool MadeChange = tryToReplaceALMWithWideALM(Plan, BestVF, BestUF); MadeChange |= simplifyBranchConditionForVFAndUF(Plan, BestVF, BestUF, PSE); MadeChange |= optimizeVectorInductionWidthForTCAndVFUF(Plan, BestVF, BestUF); - MadeChange |= simplifyKnownEVL(Plan, BestVF, PSE); if (MadeChange) { Plan.setVF(BestVF); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 0dce486cb1c2c..b516adb83d832 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -239,6 +239,11 @@ struct VPlanTransforms { unsigned BestUF, PredicatedScalarEvolution &PSE); + /// Try to simplify VPInstruction::ExplicitVectorLength recipes when the AVL + /// is known to be <= VF, replacing them with the AVL directly. + static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF, + PredicatedScalarEvolution &PSE); + /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe /// optimizations, dead recipe removal, replicate region optimizations and /// block merging. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index e054c916de6e0..d54e8582676d6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -8,28 +8,17 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP8]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw [[VEC_IND]], splat (i64 3) +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw [[TMP8]], splat (i64 3) ; CHECK-NEXT: [[TMP11:%.*]] = lshr [[BROADCAST_SPLAT]], [[TMP10]] ; CHECK-NEXT: [[TMP14:%.*]] = trunc [[TMP11]] to -; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0( [[TMP14]], ptr align 1 [[NEXT_GEP]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8) -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0( [[TMP14]], ptr align 1 [[DST]], [[ACTIVE_LANE_MASK_ENTRY]]) +; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: @@ -68,28 +57,17 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SHR]] to i64 ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP8]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw [[VEC_IND]], splat (i64 3) +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw [[TMP8]], splat (i64 3) ; CHECK-NEXT: [[TMP11:%.*]] = lshr [[BROADCAST_SPLAT]], [[TMP10]] ; CHECK-NEXT: [[TMP14:%.*]] = trunc [[TMP11]] to -; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0( [[TMP14]], ptr align 1 [[NEXT_GEP]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]]) -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0( [[TMP14]], ptr align 1 [[DST]], [[ACTIVE_LANE_MASK_ENTRY]]) +; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: @@ -124,9 +102,3 @@ for.body: ; preds = %for.body.preheader, for.cond.cleanup: ; preds = %for.body ret void } -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} -;. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index e1eb86e2e3a57..c066e0b41013d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -12,11 +12,9 @@ define double @test_reduction_costs() { ; COMMON: [[VECTOR_PH]]: ; COMMON-NEXT: br label %[[VECTOR_BODY:.*]] ; COMMON: [[VECTOR_BODY]]: -; COMMON-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ] -; COMMON-NEXT: [[VEC_PHI1:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ] -; COMMON-NEXT: [[TMP0]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> splat (double 3.000000e+00)) -; COMMON-NEXT: [[TMP1]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI1]], <2 x double> splat (double 9.000000e+00)) -; COMMON-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; COMMON-NEXT: [[TMP0:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> splat (double 3.000000e+00)) +; COMMON-NEXT: [[TMP1:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> splat (double 9.000000e+00)) +; COMMON-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; COMMON: [[MIDDLE_BLOCK]]: ; COMMON-NEXT: br label %[[EXIT:.*]] ; COMMON: [[EXIT]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index c340cfc9ad6cc..6acda0d4b3294 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -464,13 +464,9 @@ define i32 @tc4(ptr noundef readonly captures(none) %tmp) vscale_range(1,16) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[TMP3]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> zeroinitializer, [[WIDE_LOAD]] +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]]) ; CHECK-NEXT: br label %[[EXIT:.*]] @@ -509,7 +505,7 @@ define i32 @tc4_from_profile(ptr noundef readonly captures(none) %tmp, i64 %N) v ; CHECK-NEXT: [[ADD]] = add i32 [[SUM_0179]], [[TMP0]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]], !prof [[PROF9:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]], !prof [[PROF8:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[ADD_LCSSA]] @@ -546,8 +542,7 @@ exit: ; preds = %for.body ; CHECK-VS1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} ; CHECK-VS1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} ; CHECK-VS1: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} -; CHECK-VS1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} -; CHECK-VS1: [[PROF9]] = !{!"branch_weights", i32 10, i32 30} +; CHECK-VS1: [[PROF8]] = !{!"branch_weights", i32 10, i32 30} ;. ; CHECK-VS2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK-VS2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} @@ -557,6 +552,5 @@ exit: ; preds = %for.body ; CHECK-VS2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} ; CHECK-VS2: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} ; CHECK-VS2: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} -; CHECK-VS2: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} -; CHECK-VS2: [[PROF9]] = !{!"branch_weights", i32 10, i32 30} +; CHECK-VS2: [[PROF8]] = !{!"branch_weights", i32 10, i32 30} ;. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll b/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll index c4d06254a0d30..4b25bec39e51b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll @@ -51,17 +51,17 @@ define i32 @add_reduction_select_operand_constant_but_non_uniform() { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP2]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP1]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_PHI]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[VEC_PHI1]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[VEC_PHI1]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: ret i32 [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP1]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index 135d9d526954e..1805243cbbca0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -196,165 +196,124 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT5]], <16 x i8> poison, <16 x i32> zeroinitializer ; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] ; DEFAULT: [[VECTOR_BODY]]: -; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE35:.*]] ] -; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <16 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE35]] ] -; DEFAULT-NEXT: [[VEC_IND1:%.*]] = phi <16 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT36:%.*]], %[[PRED_STORE_CONTINUE35]] ] -; DEFAULT-NEXT: [[TMP0:%.*]] = icmp ule <16 x i8> [[VEC_IND]], splat (i8 14) -; DEFAULT-NEXT: [[TMP1:%.*]] = mul <16 x i8> [[BROADCAST_SPLAT]], [[VEC_IND1]] -; DEFAULT-NEXT: [[TMP2:%.*]] = lshr <16 x i8> [[VEC_IND1]], splat (i8 1) -; DEFAULT-NEXT: [[TMP3:%.*]] = mul <16 x i8> [[TMP2]], [[BROADCAST_SPLAT4]] +; DEFAULT-NEXT: [[TMP1:%.*]] = mul <16 x i8> [[BROADCAST_SPLAT]], +; DEFAULT-NEXT: [[TMP3:%.*]] = mul <16 x i8> , [[BROADCAST_SPLAT4]] ; DEFAULT-NEXT: [[TMP4:%.*]] = add <16 x i8> [[TMP3]], [[TMP1]] -; DEFAULT-NEXT: [[TMP5:%.*]] = lshr <16 x i8> [[VEC_IND1]], splat (i8 2) -; DEFAULT-NEXT: [[TMP6:%.*]] = mul <16 x i8> [[TMP5]], [[BROADCAST_SPLAT6]] +; DEFAULT-NEXT: [[TMP6:%.*]] = mul <16 x i8> , [[BROADCAST_SPLAT6]] ; DEFAULT-NEXT: [[TMP7:%.*]] = add <16 x i8> [[TMP4]], [[TMP6]] -; DEFAULT-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0 -; DEFAULT-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; DEFAULT: [[PRED_STORE_IF]]: -; DEFAULT-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 -; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP9]] +; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0 ; DEFAULT-NEXT: [[TMP11:%.*]] = extractelement <16 x i8> [[TMP7]], i32 0 ; DEFAULT-NEXT: store i8 [[TMP11]], ptr [[TMP10]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]] ; DEFAULT: [[PRED_STORE_CONTINUE]]: -; DEFAULT-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP0]], i32 1 -; DEFAULT-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] -; DEFAULT: [[PRED_STORE_IF6]]: -; DEFAULT-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 1 -; DEFAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP13]] +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] +; DEFAULT: [[PRED_STORE_IF5]]: +; DEFAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 1 ; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <16 x i8> [[TMP7]], i32 1 ; DEFAULT-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE7]] -; DEFAULT: [[PRED_STORE_CONTINUE7]]: -; DEFAULT-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2 -; DEFAULT-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] -; DEFAULT: [[PRED_STORE_IF8]]: -; DEFAULT-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2 -; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP17]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE6]] +; DEFAULT: [[PRED_STORE_CONTINUE6]]: +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] +; DEFAULT: [[PRED_STORE_IF7]]: +; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 2 ; DEFAULT-NEXT: [[TMP19:%.*]] = extractelement <16 x i8> [[TMP7]], i32 2 ; DEFAULT-NEXT: store i8 [[TMP19]], ptr [[TMP18]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE9]] -; DEFAULT: [[PRED_STORE_CONTINUE9]]: -; DEFAULT-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3 -; DEFAULT-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11:.*]] -; DEFAULT: [[PRED_STORE_IF10]]: -; DEFAULT-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3 -; DEFAULT-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP21]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE8]] +; DEFAULT: [[PRED_STORE_CONTINUE8]]: +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] +; DEFAULT: [[PRED_STORE_IF9]]: +; DEFAULT-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 3 ; DEFAULT-NEXT: [[TMP23:%.*]] = extractelement <16 x i8> [[TMP7]], i32 3 ; DEFAULT-NEXT: store i8 [[TMP23]], ptr [[TMP22]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE11]] -; DEFAULT: [[PRED_STORE_CONTINUE11]]: -; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4 -; DEFAULT-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]] -; DEFAULT: [[PRED_STORE_IF12]]: -; DEFAULT-NEXT: [[TMP25:%.*]] = add i64 [[INDEX]], 4 -; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP25]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE10]] +; DEFAULT: [[PRED_STORE_CONTINUE10]]: +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]] +; DEFAULT: [[PRED_STORE_IF11]]: +; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 4 ; DEFAULT-NEXT: [[TMP27:%.*]] = extractelement <16 x i8> [[TMP7]], i32 4 ; DEFAULT-NEXT: store i8 [[TMP27]], ptr [[TMP26]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE13]] -; DEFAULT: [[PRED_STORE_CONTINUE13]]: -; DEFAULT-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5 -; DEFAULT-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]] -; DEFAULT: [[PRED_STORE_IF14]]: -; DEFAULT-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 5 -; DEFAULT-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP29]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE12]] +; DEFAULT: [[PRED_STORE_CONTINUE12]]: +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]] +; DEFAULT: [[PRED_STORE_IF13]]: +; DEFAULT-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 5 ; DEFAULT-NEXT: [[TMP31:%.*]] = extractelement <16 x i8> [[TMP7]], i32 5 ; DEFAULT-NEXT: store i8 [[TMP31]], ptr [[TMP30]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE15]] -; DEFAULT: [[PRED_STORE_CONTINUE15]]: -; DEFAULT-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6 -; DEFAULT-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]] -; DEFAULT: [[PRED_STORE_IF16]]: -; DEFAULT-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], 6 -; DEFAULT-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP33]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE14]] +; DEFAULT: [[PRED_STORE_CONTINUE14]]: +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]] +; DEFAULT: [[PRED_STORE_IF15]]: +; DEFAULT-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 6 ; DEFAULT-NEXT: [[TMP35:%.*]] = extractelement <16 x i8> [[TMP7]], i32 6 ; DEFAULT-NEXT: store i8 [[TMP35]], ptr [[TMP34]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE17]] -; DEFAULT: [[PRED_STORE_CONTINUE17]]: -; DEFAULT-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7 -; DEFAULT-NEXT: br i1 [[TMP36]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]] -; DEFAULT: [[PRED_STORE_IF18]]: -; DEFAULT-NEXT: [[TMP37:%.*]] = add i64 [[INDEX]], 7 -; DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP37]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE16]] +; DEFAULT: [[PRED_STORE_CONTINUE16]]: +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]] +; DEFAULT: [[PRED_STORE_IF17]]: +; DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 7 ; DEFAULT-NEXT: [[TMP39:%.*]] = extractelement <16 x i8> [[TMP7]], i32 7 ; DEFAULT-NEXT: store i8 [[TMP39]], ptr [[TMP38]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE19]] -; DEFAULT: [[PRED_STORE_CONTINUE19]]: -; DEFAULT-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8 -; DEFAULT-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]] -; DEFAULT: [[PRED_STORE_IF20]]: -; DEFAULT-NEXT: [[TMP41:%.*]] = add i64 [[INDEX]], 8 -; DEFAULT-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP41]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE18]] +; DEFAULT: [[PRED_STORE_CONTINUE18]]: +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]] +; DEFAULT: [[PRED_STORE_IF19]]: +; DEFAULT-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8 ; DEFAULT-NEXT: [[TMP43:%.*]] = extractelement <16 x i8> [[TMP7]], i32 8 ; DEFAULT-NEXT: store i8 [[TMP43]], ptr [[TMP42]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE21]] -; DEFAULT: [[PRED_STORE_CONTINUE21]]: -; DEFAULT-NEXT: [[TMP44:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9 -; DEFAULT-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]] -; DEFAULT: [[PRED_STORE_IF22]]: -; DEFAULT-NEXT: [[TMP45:%.*]] = add i64 [[INDEX]], 9 -; DEFAULT-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP45]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE20]] +; DEFAULT: [[PRED_STORE_CONTINUE20]]: +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] +; DEFAULT: [[PRED_STORE_IF21]]: +; DEFAULT-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 9 ; DEFAULT-NEXT: [[TMP47:%.*]] = extractelement <16 x i8> [[TMP7]], i32 9 ; DEFAULT-NEXT: store i8 [[TMP47]], ptr [[TMP46]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE23]] -; DEFAULT: [[PRED_STORE_CONTINUE23]]: -; DEFAULT-NEXT: [[TMP48:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10 -; DEFAULT-NEXT: br i1 [[TMP48]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]] -; DEFAULT: [[PRED_STORE_IF24]]: -; DEFAULT-NEXT: [[TMP49:%.*]] = add i64 [[INDEX]], 10 -; DEFAULT-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP49]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE22]] +; DEFAULT: [[PRED_STORE_CONTINUE22]]: +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] +; DEFAULT: [[PRED_STORE_IF23]]: +; DEFAULT-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 10 ; DEFAULT-NEXT: [[TMP51:%.*]] = extractelement <16 x i8> [[TMP7]], i32 10 ; DEFAULT-NEXT: store i8 [[TMP51]], ptr [[TMP50]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE25]] -; DEFAULT: [[PRED_STORE_CONTINUE25]]: -; DEFAULT-NEXT: [[TMP52:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11 -; DEFAULT-NEXT: br i1 [[TMP52]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27:.*]] -; DEFAULT: [[PRED_STORE_IF26]]: -; DEFAULT-NEXT: [[TMP53:%.*]] = add i64 [[INDEX]], 11 -; DEFAULT-NEXT: [[TMP54:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP53]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE24]] +; DEFAULT: [[PRED_STORE_CONTINUE24]]: +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]] +; DEFAULT: [[PRED_STORE_IF25]]: +; DEFAULT-NEXT: [[TMP54:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 11 ; DEFAULT-NEXT: [[TMP55:%.*]] = extractelement <16 x i8> [[TMP7]], i32 11 ; DEFAULT-NEXT: store i8 [[TMP55]], ptr [[TMP54]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE27]] -; DEFAULT: [[PRED_STORE_CONTINUE27]]: -; DEFAULT-NEXT: [[TMP56:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12 -; DEFAULT-NEXT: br i1 [[TMP56]], label %[[PRED_STORE_IF28:.*]], label %[[PRED_STORE_CONTINUE29:.*]] -; DEFAULT: [[PRED_STORE_IF28]]: -; DEFAULT-NEXT: [[TMP57:%.*]] = add i64 [[INDEX]], 12 -; DEFAULT-NEXT: [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP57]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE26]] +; DEFAULT: [[PRED_STORE_CONTINUE26]]: +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]] +; DEFAULT: [[PRED_STORE_IF27]]: +; DEFAULT-NEXT: [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12 ; DEFAULT-NEXT: [[TMP59:%.*]] = extractelement <16 x i8> [[TMP7]], i32 12 ; DEFAULT-NEXT: store i8 [[TMP59]], ptr [[TMP58]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE29]] -; DEFAULT: [[PRED_STORE_CONTINUE29]]: -; DEFAULT-NEXT: [[TMP60:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13 -; DEFAULT-NEXT: br i1 [[TMP60]], label %[[PRED_STORE_IF30:.*]], label %[[PRED_STORE_CONTINUE31:.*]] -; DEFAULT: [[PRED_STORE_IF30]]: -; DEFAULT-NEXT: [[TMP61:%.*]] = add i64 [[INDEX]], 13 -; DEFAULT-NEXT: [[TMP62:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP61]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE28]] +; DEFAULT: [[PRED_STORE_CONTINUE28]]: +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]] +; DEFAULT: [[PRED_STORE_IF29]]: +; DEFAULT-NEXT: [[TMP62:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 13 ; DEFAULT-NEXT: [[TMP63:%.*]] = extractelement <16 x i8> [[TMP7]], i32 13 ; DEFAULT-NEXT: store i8 [[TMP63]], ptr [[TMP62]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE31]] -; DEFAULT: [[PRED_STORE_CONTINUE31]]: -; DEFAULT-NEXT: [[TMP64:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14 -; DEFAULT-NEXT: br i1 [[TMP64]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]] -; DEFAULT: [[PRED_STORE_IF32]]: -; DEFAULT-NEXT: [[TMP65:%.*]] = add i64 [[INDEX]], 14 -; DEFAULT-NEXT: [[TMP66:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP65]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE30]] +; DEFAULT: [[PRED_STORE_CONTINUE30]]: +; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF31:.*]], label %[[PRED_STORE_CONTINUE32:.*]] +; DEFAULT: [[PRED_STORE_IF31]]: +; DEFAULT-NEXT: [[TMP66:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 14 ; DEFAULT-NEXT: [[TMP67:%.*]] = extractelement <16 x i8> [[TMP7]], i32 14 ; DEFAULT-NEXT: store i8 [[TMP67]], ptr [[TMP66]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE33]] -; DEFAULT: [[PRED_STORE_CONTINUE33]]: -; DEFAULT-NEXT: [[TMP68:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15 -; DEFAULT-NEXT: br i1 [[TMP68]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35]] -; DEFAULT: [[PRED_STORE_IF34]]: -; DEFAULT-NEXT: [[TMP69:%.*]] = add i64 [[INDEX]], 15 -; DEFAULT-NEXT: [[TMP70:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP69]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE32]] +; DEFAULT: [[PRED_STORE_CONTINUE32]]: +; DEFAULT-NEXT: br i1 false, label %[[PRED_STORE_IF33:.*]], label %[[PRED_STORE_CONTINUE34:.*]] +; DEFAULT: [[PRED_STORE_IF33]]: +; DEFAULT-NEXT: [[TMP70:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 15 ; DEFAULT-NEXT: [[TMP71:%.*]] = extractelement <16 x i8> [[TMP7]], i32 15 ; DEFAULT-NEXT: store i8 [[TMP71]], ptr [[TMP70]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]] -; DEFAULT: [[PRED_STORE_CONTINUE35]]: -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 16) -; DEFAULT-NEXT: [[VEC_IND_NEXT36]] = add <16 x i8> [[VEC_IND1]], splat (i8 16) -; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE34]] +; DEFAULT: [[PRED_STORE_CONTINUE34]]: +; DEFAULT-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[FOR_COND_CLEANUP]]: @@ -474,7 +433,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; DEFAULT-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; DEFAULT-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[FOR_COND_CLEANUP]]: @@ -630,7 +589,7 @@ define void @dont_vectorize_with_minsize() { ; DEFAULT-NEXT: store <8 x i16> [[TMP15]], ptr [[TMP11]], align 2 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 -; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[FOR_COND_CLEANUP]]: @@ -746,7 +705,7 @@ define void @vectorization_forced_minsize_reduce_width() { ; DEFAULT-NEXT: store <8 x i16> [[TMP15]], ptr [[TMP11]], align 2 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 -; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[FOR_COND_CLEANUP]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll index b439353444409..fa4d90181f744 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll @@ -588,11 +588,7 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src. ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi double [ 3.000000e+00, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[IV]] +; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 0 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <6 x double>, ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> @@ -602,11 +598,11 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src. ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[STRIDED_VEC2]], splat (double 3.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[IV]] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[GEP_SRC]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], [[WIDE_LOAD]] -; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[IV]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr [[T_2]], ptr [[SRC_2]], i64 [[TMP1]] +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr [[T_2]], ptr [[SRC_2]], i64 1 ; CHECK-NEXT: [[GEP_72:%.*]] = getelementptr i8, ptr [[GEP_SRC_2]], i64 72 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP11]], i64 72 ; CHECK-NEXT: [[L_P_2:%.*]] = load ptr, ptr [[GEP_72]], align 8 @@ -616,9 +612,8 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src. ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x double> poison, double [[LV]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[TMP18]], double [[TMP17]], i32 1 ; CHECK-NEXT: [[TMP20:%.*]] = fmul <2 x double> [[TMP9]], [[TMP19]] -; CHECK-NEXT: [[TMP21]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> [[TMP20]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: [[TMP21:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double 3.000000e+00, <2 x double> [[TMP20]]) +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[EXIT]]: @@ -779,7 +774,7 @@ define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) { ; CHECK-NEXT: [[TMP66]] = or <16 x i32> [[TMP65]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], 48 -; CHECK-NEXT: br i1 [[TMP67]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP67]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP68:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP66]]) ; CHECK-NEXT: br label %[[SCALAR_PH:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll index fceab6f823d5a..1da60ddd6997a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll @@ -552,11 +552,7 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src. ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi double [ 3.000000e+00, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[IV]] +; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 0 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <6 x double>, ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> @@ -566,11 +562,11 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src. ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[STRIDED_VEC2]], splat (double 3.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[IV]] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[GEP_SRC]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], [[WIDE_LOAD]] -; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[IV]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr [[T_2]], ptr [[SRC_2]], i64 [[TMP1]] +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr [[T_2]], ptr [[SRC_2]], i64 1 ; CHECK-NEXT: [[GEP_72:%.*]] = getelementptr i8, ptr [[GEP_SRC_2]], i64 72 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP11]], i64 72 ; CHECK-NEXT: [[L_P_2:%.*]] = load ptr, ptr [[GEP_72]], align 8 @@ -580,9 +576,8 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src. ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x double> poison, double [[LV]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[TMP18]], double [[TMP17]], i32 1 ; CHECK-NEXT: [[TMP20:%.*]] = fmul <2 x double> [[TMP9]], [[TMP19]] -; CHECK-NEXT: [[TMP21]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> [[TMP20]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: [[TMP21:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double 3.000000e+00, <2 x double> [[TMP20]]) +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[EXIT]]: @@ -743,7 +738,7 @@ define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) { ; CHECK-NEXT: [[TMP66]] = or <16 x i32> [[TMP65]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], 48 -; CHECK-NEXT: br i1 [[TMP67]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP67]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP68:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP66]]) ; CHECK-NEXT: br label %[[SCALAR_PH:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll index 78b54efe7e682..0af9795cfdf62 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll @@ -285,13 +285,9 @@ define i8 @mul_non_pow_2_low_trip_count(ptr noalias %a) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i8> [ , [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 -; CHECK-NEXT: [[TMP1]] = mul <8 x i8> [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0:%.*]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> [[TMP1]]) ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] @@ -300,12 +296,12 @@ define i8 @mul_non_pow_2_low_trip_count(ptr noalias %a) { ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 8, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[RDX:%.*]] = phi i8 [ [[TMP3]], [[SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[IV]] ; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[GEP]], align 1 ; CHECK-NEXT: [[MUL]] = mul i8 [[TMP5]], [[RDX]] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 10 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: [[MUL_LCSSA:%.*]] = phi i8 [ [[MUL]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i8 [[MUL_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll index f1dda3d5a2f91..a80d5dacff583 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll @@ -41,13 +41,9 @@ define i32 @test_remove_iv(i32 %start) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ [[TMP2]], %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[AVL:%.*]] = phi i32 [ 6, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 4, i1 true) -; CHECK-NEXT: [[TMP4:%.*]] = xor [[VEC_PHI]], splat (i32 3) -; CHECK-NEXT: [[TMP5]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP4]], [[VEC_PHI]], i32 [[TMP3]]) -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP3]] -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = xor [[TMP2]], splat (i32 3) +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP3]], [[TMP2]], i32 6) +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP5]]) ; CHECK-NEXT: br label %[[EXIT:.*]] @@ -71,8 +67,3 @@ exit: attributes #0 = { vscale_range(2,2) } -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index f81f2a32318d4..473c5ea790452 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -606,19 +606,19 @@ define i64 @live_in_known_1_via_scev() { ; CHECK-NEXT: [[P:%.*]] = phi i32 [ 1, %[[ENTRY]] ] ; CHECK-NEXT: [[N:%.*]] = add nuw nsw i32 [[SEL]], 6 ; CHECK-NEXT: [[P_EXT:%.*]] = zext nneg i32 [[P]] to i64 -; CHECK-NEXT: br label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_PHI]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_BODY]] ], [ [[INDEX_NEXT:%.*]], %[[EXIT]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ , %[[VECTOR_BODY]] ], [ [[VEC_PHI]], %[[EXIT]] ] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 -; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[EXIT]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> [[VEC_PHI]]) -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[EXIT]]: +; CHECK-NEXT: br label %[[EXIT1:.*]] +; CHECK: [[EXIT1]]: ; CHECK-NEXT: ret i64 [[TMP3]] ; entry: @@ -660,9 +660,8 @@ define i64 @cost_loop_invariant_recipes(i1 %x, i64 %y) { ; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[BROADCAST_SPLAT]], [[TMP1]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 1), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3]] = mul <2 x i64> [[TMP2]], [[VEC_PHI]] -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[TMP2]], splat (i64 1) +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> [[TMP3]]) ; CHECK-NEXT: br label %[[EXIT:.*]] @@ -700,18 +699,15 @@ define i32 @narrowed_reduction(ptr %a, i1 %cmp) #0 { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = and <16 x i32> [[VEC_PHI]], splat (i32 1) -; CHECK-NEXT: [[TMP1:%.*]] = or <16 x i32> [[TMP0]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i32> [[TMP1]] to <16 x i1> -; CHECK-NEXT: [[TMP3]] = zext <16 x i1> [[TMP2]] to <16 x i32> -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = or <16 x i32> zeroinitializer, [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <16 x i32> [[TMP0]] to <16 x i1> +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP2]]) -; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: ret i32 [[TMP3]] ; entry: %conv = zext i1 %cmp to i32 @@ -781,7 +777,7 @@ define i32 @g(i64 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i32> [[TMP16]], [[TMP15]] ; CHECK-NEXT: [[BIN_RDX5:%.*]] = or <4 x i32> [[TMP17]], [[BIN_RDX]] @@ -815,7 +811,7 @@ define i32 @g(i64 %n) { ; CHECK-NEXT: [[INDEX_NEXT15]] = add nuw i32 [[INDEX9]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT11]] = add <4 x i32> [[VEC_IND10]], splat (i32 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT15]], [[N_VEC8]] -; CHECK-NEXT: br i1 [[TMP26]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP26]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP25]]) ; CHECK-NEXT: [[CMP_N18:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC8]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index 78363e13595cb..64461271ec5ff 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -2454,44 +2454,22 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP112:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP113:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 5 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 10 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 15 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 20 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 25 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 30 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 35 -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 40 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 45 -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 50 -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 55 -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 60 -; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 65 -; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 70 -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 75 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 5 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 10 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 15 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 20 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 25 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 30 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 35 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 40 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 45 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 50 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 55 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 60 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 65 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 70 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 75 ; CHECK-NEXT: [[TMP32:%.*]] = load i1, ptr [[TMP16]], align 1 ; CHECK-NEXT: [[TMP33:%.*]] = load i1, ptr [[TMP17]], align 1 ; CHECK-NEXT: [[TMP34:%.*]] = load i1, ptr [[TMP18]], align 1 @@ -2524,22 +2502,22 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 0 +; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 5 +; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 10 +; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 15 +; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 20 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 25 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 30 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 35 +; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 40 +; CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 45 +; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 50 +; CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 55 +; CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 60 +; CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 65 +; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 70 +; CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 75 ; CHECK-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP64]], align 4 ; CHECK-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP65]], align 4 ; CHECK-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP66]], align 4 @@ -2576,16 +2554,12 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[TMP95]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[TMP103]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI6:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP111]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP112]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] -; CHECK-NEXT: [[TMP113]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI4]] -; CHECK-NEXT: [[TMP114]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI5]] -; CHECK-NEXT: [[TMP115]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI6]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: [[TMP112:%.*]] = add <4 x i32> zeroinitializer, [[PREDPHI]] +; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP113]], [[TMP112]] -; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP114]], [[BIN_RDX]] -; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <4 x i32> [[TMP115]], [[BIN_RDX7]] +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[PREDPHI4]], [[TMP112]] +; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <4 x i32> [[PREDPHI5]], [[BIN_RDX]] +; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <4 x i32> [[PREDPHI6]], [[BIN_RDX4]] ; CHECK-NEXT: [[TMP117:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX8]]) ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -2605,7 +2579,7 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] ; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] ; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 100 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: loop_exit: ; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ] ; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] @@ -2772,7 +2746,7 @@ define i32 @test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP115]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP116:%.*]] = icmp eq i64 [[INDEX_NEXT]], 48 -; CHECK-NEXT: br i1 [[TMP116]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP116]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP113]], [[TMP112]] ; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP114]], [[BIN_RDX]] @@ -2796,7 +2770,7 @@ define i32 @test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] ; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] ; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 100 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: loop_exit: ; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ] ; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] @@ -2980,7 +2954,7 @@ define i32 @test_non_unit_stride_with_first_iteration_step_access(i64 %len, ptr ; CHECK-NEXT: [[TMP131]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP132:%.*]] = icmp eq i64 [[INDEX_NEXT]], 144 -; CHECK-NEXT: br i1 [[TMP132]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP132]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP129]], [[TMP128]] ; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP130]], [[BIN_RDX]] @@ -3004,7 +2978,7 @@ define i32 @test_non_unit_stride_with_first_iteration_step_access(i64 %len, ptr ; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] ; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] ; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 300 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: loop_exit: ; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ] ; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll index 76f30decf81e7..4a3356889c36e 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll @@ -777,10 +777,9 @@ define i16 @test_no_op_or_reduction_single_vector_iteration(i64 %N) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[CLAMPED]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[VEC_PHI]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[VEC_PHI]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[CLAMPED]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: @@ -794,23 +793,22 @@ define i16 @test_no_op_or_reduction_single_vector_iteration(i64 %N) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> zeroinitializer, i16 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i16> [ [[TMP1]], %[[VEC_EPILOG_PH]] ], [ [[VEC_PHI4]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK-NEXT: br label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[VEC_PHI4]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP1]]) ; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[CLAMPED]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N5]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i16 [ [[TMP2]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP0]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX5:%.*]] = phi i16 [ [[TMP2]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP0]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i16 [ [[BC_MERGE_RDX6]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i16 [ [[BC_MERGE_RDX5]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[RED_NEXT]] = or i16 [[RED]], 0 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[CLAMPED]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP27:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i16 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[TMP2]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i16 [[RED_NEXT_LCSSA]] @@ -830,3 +828,183 @@ loop: exit: ret i16 %red.next } + +; Test or-reduction with an induction-derived operand and a small bounded trip +; count. The main vector loop is dissolved (trip count <= VF), and the epilogue +; vector loop's reduction body remains with a ReductionStartVector operand. +define i16 @test_or_reduction_with_induction_single_vector_iteration(i64 %N) { +; CHECK-LABEL: define i16 @test_or_reduction_with_induction_single_vector_iteration( +; CHECK-SAME: i64 [[N:%.*]]) { +; CHECK-NEXT: [[ITER_CHECK:.*]]: +; CHECK-NEXT: [[CLAMPED:%.*]] = call i64 @llvm.umin.i64(i64 [[N]], i64 4) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[CLAMPED]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[CLAMPED]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[CLAMPED]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[CLAMPED]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> ) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[CLAMPED]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] +; CHECK: [[VEC_EPILOG_ITER_CHECK]]: +; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]] +; CHECK: [[VEC_EPILOG_PH]]: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP0]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[TMP1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[CLAMPED]], 4 +; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[CLAMPED]], [[N_MOD_VF2]] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[N_VEC3]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> zeroinitializer, i16 [[BC_MERGE_RDX]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[BC_RESUME_VAL]] to i16 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP4]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i16> [[BROADCAST_SPLAT]], +; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: +; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i16> [[TMP3]], [[INDUCTION]] +; CHECK-NEXT: br label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]] +; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP5]]) +; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[CLAMPED]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N4]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] +; CHECK: [[VEC_EPILOG_SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i32 [ [[TMP2]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i16 [ [[TMP6]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL5]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[SHIFT:%.*]] = phi i32 [ [[BC_RESUME_VAL6]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SHIFT_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i16 [ [[BC_MERGE_RDX7]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHIFT]] to i16 +; CHECK-NEXT: [[RED_NEXT]] = or i16 [[RED]], [[TRUNC]] +; CHECK-NEXT: [[SHIFT_NEXT]] = add i32 [[SHIFT]], 1 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[CLAMPED]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i16 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[TMP6]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i16 [[RED_NEXT_LCSSA]] +; +entry: + %clamped = call i64 @llvm.umin.i32(i64 %N, i64 4) + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %shift = phi i32 [ 0, %entry ], [ %shift.next, %loop ] + %red = phi i16 [ 0, %entry ], [ %red.next, %loop ] + %trunc = trunc i32 %shift to i16 + %red.next = or i16 %red, %trunc + %shift.next = add i32 %shift, 1 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %clamped + br i1 %ec, label %exit, label %loop + +exit: + ret i16 %red.next +} + +; Test AnyOf reduction in epilogue with a small trip count that results in +; the epilogue vector loop being dissolved (single iteration). +define i32 @anyof_reduction_in_dissolved_epilogue(i32 %val, i1 %c) { +; CHECK-LABEL: define i32 @anyof_reduction_in_dissolved_epilogue( +; CHECK-SAME: i32 [[VAL:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[START:%.*]] = sext i1 [[C]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = zext i1 [[C]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[START]] +; CHECK-NEXT: br label %[[ITER_CHECK:.*]] +; CHECK: [[OUTER_HEADER_LOOPEXIT:.*]]: +; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL:%.*]], %[[LOOP:.*]] ], [ [[RDX_SELECT:%.*]], %[[MIDDLE_BLOCK:.*]] ], [ [[RDX_SELECT7:%.*]], %[[VEC_EPILOG_MIDDLE_BLOCK:.*]] ] +; CHECK-NEXT: br label %[[ITER_CHECK]] +; CHECK: [[ITER_CHECK]]: +; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SEL_LCSSA]], %[[OUTER_HEADER_LOOPEXIT]] ] +; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: +; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[VAL]], 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP2]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: br label %[[MIDDLE_BLOCK]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BROADCAST_SPLAT]]) +; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP3]] +; CHECK-NEXT: [[RDX_SELECT]] = select i1 [[TMP4]], i32 [[OUTER_IV]], i32 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[OUTER_HEADER_LOOPEXIT]], label %[[VEC_EPILOG_ITER_CHECK:.*]] +; CHECK: [[VEC_EPILOG_ITER_CHECK]]: +; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[START]], [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_MOD_VF]], 4 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]] +; CHECK: [[VEC_EPILOG_PH]]: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[BC_MERGE_RDX]], 0 +; CHECK-NEXT: [[N_MOD_VF1:%.*]] = urem i32 [[TMP1]], 4 +; CHECK-NEXT: [[N_VEC2:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF1]] +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[START]], [[N_VEC2]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i32 [[VAL]], 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT5]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i1> poison, i1 [[TMP5]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT3]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: +; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i1> [[BROADCAST_SPLAT4]], [[BROADCAST_SPLAT6]] +; CHECK-NEXT: br label %[[VEC_EPILOG_MIDDLE_BLOCK]] +; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]]) +; CHECK-NEXT: [[TMP10:%.*]] = freeze i1 [[TMP9]] +; CHECK-NEXT: [[RDX_SELECT7]] = select i1 [[TMP10]], i32 [[OUTER_IV]], i32 0 +; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC2]] +; CHECK-NEXT: br i1 [[CMP_N8]], label %[[OUTER_HEADER_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] +; CHECK: [[VEC_EPILOG_SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP6]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX9:%.*]] = phi i32 [ [[RDX_SELECT7]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: br label %[[LOOP]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX9]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL]], %[[LOOP]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[VAL]], 0 +; CHECK-NEXT: [[SEL]] = select i1 [[CMP]], i32 [[RDX]], i32 [[OUTER_IV]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[TC:%.*]] = zext i1 [[C]] to i32 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[TC]] +; CHECK-NEXT: br i1 [[EC]], label %[[OUTER_HEADER_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP29:![0-9]+]] +; +entry: + %start = sext i1 %c to i32 + br label %outer.header + +outer.header: + %outer.iv = phi i32 [ 0, %entry ], [ %sel, %loop ] + br label %loop + +loop: + %iv = phi i32 [ %start, %outer.header ], [ %iv.next, %loop ] + %rdx = phi i32 [ 0, %outer.header ], [ %sel, %loop ] + %cmp = icmp eq i32 %val, 0 + %sel = select i1 %cmp, i32 %rdx, i32 %outer.iv + %iv.next = add i32 %iv, 1 + %tc = zext i1 %c to i32 + %ec = icmp eq i32 %iv.next, %tc + br i1 %ec, label %outer.header, label %loop + + uselistorder i32 %sel, { 1, 0 } +} diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll index dd3ad4d01b465..b1027a8540c57 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll @@ -194,182 +194,129 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC4VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer ; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]] ; IC4VF4: [[VECTOR_BODY]]: -; IC4VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44:.*]] ] -; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44]] ] -; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP108:%.*]], %[[PRED_LOAD_CONTINUE44]] ] -; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP109:%.*]], %[[PRED_LOAD_CONTINUE44]] ] -; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP110:%.*]], %[[PRED_LOAD_CONTINUE44]] ] -; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP111:%.*]], %[[PRED_LOAD_CONTINUE44]] ] -; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 -4) -; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 -4) -; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i16> [[STEP_ADD_2]], splat (i16 -4) -; IC4VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 -; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]] -; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0 -; IC4VF4-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer -; IC4VF4-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], -; IC4VF4-NEXT: [[VEC_IV8:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], -; IC4VF4-NEXT: [[VEC_IV11:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], -; IC4VF4-NEXT: [[VEC_IV14:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], -; IC4VF4-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 11) -; IC4VF4-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IV8]], splat (i32 11) -; IC4VF4-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV11]], splat (i32 11) -; IC4VF4-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV14]], splat (i32 11) -; IC4VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 -; IC4VF4-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; IC4VF4: [[PRED_LOAD_IF]]: -; IC4VF4-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 0 -; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP5]] +; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 12 ; IC4VF4-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 1 ; IC4VF4-NEXT: [[TMP8:%.*]] = insertelement <4 x i16> poison, i16 [[TMP7]], i32 0 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE]] ; IC4VF4: [[PRED_LOAD_CONTINUE]]: ; IC4VF4-NEXT: [[TMP9:%.*]] = phi <4 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ] -; IC4VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 -; IC4VF4-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]] ; IC4VF4: [[PRED_LOAD_IF15]]: -; IC4VF4-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], -1 -; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP11]] +; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 11 ; IC4VF4-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 1 ; IC4VF4-NEXT: [[TMP14:%.*]] = insertelement <4 x i16> [[TMP9]], i16 [[TMP13]], i32 1 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE16]] ; IC4VF4: [[PRED_LOAD_CONTINUE16]]: ; IC4VF4-NEXT: [[TMP15:%.*]] = phi <4 x i16> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF15]] ] -; IC4VF4-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 -; IC4VF4-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]] ; IC4VF4: [[PRED_LOAD_IF17]]: -; IC4VF4-NEXT: [[TMP17:%.*]] = add i16 [[OFFSET_IDX]], -2 -; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP17]] +; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 10 ; IC4VF4-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP18]], align 1 ; IC4VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP19]], i32 2 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE18]] ; IC4VF4: [[PRED_LOAD_CONTINUE18]]: ; IC4VF4-NEXT: [[TMP21:%.*]] = phi <4 x i16> [ [[TMP15]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP20]], %[[PRED_LOAD_IF17]] ] -; IC4VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 -; IC4VF4-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]] ; IC4VF4: [[PRED_LOAD_IF19]]: -; IC4VF4-NEXT: [[TMP23:%.*]] = add i16 [[OFFSET_IDX]], -3 -; IC4VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP23]] +; IC4VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 9 ; IC4VF4-NEXT: [[TMP25:%.*]] = load i16, ptr [[TMP24]], align 1 ; IC4VF4-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> [[TMP21]], i16 [[TMP25]], i32 3 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE20]] ; IC4VF4: [[PRED_LOAD_CONTINUE20]]: ; IC4VF4-NEXT: [[TMP27:%.*]] = phi <4 x i16> [ [[TMP21]], %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP26]], %[[PRED_LOAD_IF19]] ] -; IC4VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 -; IC4VF4-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]] ; IC4VF4: [[PRED_LOAD_IF21]]: -; IC4VF4-NEXT: [[TMP29:%.*]] = add i16 [[OFFSET_IDX]], -4 -; IC4VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP29]] +; IC4VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 8 ; IC4VF4-NEXT: [[TMP31:%.*]] = load i16, ptr [[TMP30]], align 1 ; IC4VF4-NEXT: [[TMP32:%.*]] = insertelement <4 x i16> poison, i16 [[TMP31]], i32 0 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE22]] ; IC4VF4: [[PRED_LOAD_CONTINUE22]]: ; IC4VF4-NEXT: [[TMP33:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP32]], %[[PRED_LOAD_IF21]] ] -; IC4VF4-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 -; IC4VF4-NEXT: br i1 [[TMP34]], label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]] ; IC4VF4: [[PRED_LOAD_IF23]]: -; IC4VF4-NEXT: [[TMP35:%.*]] = add i16 [[OFFSET_IDX]], -5 -; IC4VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP35]] +; IC4VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 7 ; IC4VF4-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP36]], align 1 ; IC4VF4-NEXT: [[TMP38:%.*]] = insertelement <4 x i16> [[TMP33]], i16 [[TMP37]], i32 1 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE24]] ; IC4VF4: [[PRED_LOAD_CONTINUE24]]: ; IC4VF4-NEXT: [[TMP39:%.*]] = phi <4 x i16> [ [[TMP33]], %[[PRED_LOAD_CONTINUE22]] ], [ [[TMP38]], %[[PRED_LOAD_IF23]] ] -; IC4VF4-NEXT: [[TMP40:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 -; IC4VF4-NEXT: br i1 [[TMP40]], label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]] ; IC4VF4: [[PRED_LOAD_IF25]]: -; IC4VF4-NEXT: [[TMP41:%.*]] = add i16 [[OFFSET_IDX]], -6 -; IC4VF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP41]] +; IC4VF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 6 ; IC4VF4-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP42]], align 1 ; IC4VF4-NEXT: [[TMP44:%.*]] = insertelement <4 x i16> [[TMP39]], i16 [[TMP43]], i32 2 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE26]] ; IC4VF4: [[PRED_LOAD_CONTINUE26]]: ; IC4VF4-NEXT: [[TMP45:%.*]] = phi <4 x i16> [ [[TMP39]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP44]], %[[PRED_LOAD_IF25]] ] -; IC4VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 -; IC4VF4-NEXT: br i1 [[TMP46]], label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]] ; IC4VF4: [[PRED_LOAD_IF27]]: -; IC4VF4-NEXT: [[TMP47:%.*]] = add i16 [[OFFSET_IDX]], -7 -; IC4VF4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP47]] +; IC4VF4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 5 ; IC4VF4-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP48]], align 1 ; IC4VF4-NEXT: [[TMP50:%.*]] = insertelement <4 x i16> [[TMP45]], i16 [[TMP49]], i32 3 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE28]] ; IC4VF4: [[PRED_LOAD_CONTINUE28]]: ; IC4VF4-NEXT: [[TMP51:%.*]] = phi <4 x i16> [ [[TMP45]], %[[PRED_LOAD_CONTINUE26]] ], [ [[TMP50]], %[[PRED_LOAD_IF27]] ] -; IC4VF4-NEXT: [[TMP52:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 -; IC4VF4-NEXT: br i1 [[TMP52]], label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]] ; IC4VF4: [[PRED_LOAD_IF29]]: -; IC4VF4-NEXT: [[TMP53:%.*]] = add i16 [[OFFSET_IDX]], -8 -; IC4VF4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP53]] +; IC4VF4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 4 ; IC4VF4-NEXT: [[TMP55:%.*]] = load i16, ptr [[TMP54]], align 1 ; IC4VF4-NEXT: [[TMP56:%.*]] = insertelement <4 x i16> poison, i16 [[TMP55]], i32 0 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE30]] ; IC4VF4: [[PRED_LOAD_CONTINUE30]]: ; IC4VF4-NEXT: [[TMP57:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE28]] ], [ [[TMP56]], %[[PRED_LOAD_IF29]] ] -; IC4VF4-NEXT: [[TMP58:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 -; IC4VF4-NEXT: br i1 [[TMP58]], label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]] ; IC4VF4: [[PRED_LOAD_IF31]]: -; IC4VF4-NEXT: [[TMP59:%.*]] = add i16 [[OFFSET_IDX]], -9 -; IC4VF4-NEXT: [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP59]] +; IC4VF4-NEXT: [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 3 ; IC4VF4-NEXT: [[TMP61:%.*]] = load i16, ptr [[TMP60]], align 1 ; IC4VF4-NEXT: [[TMP62:%.*]] = insertelement <4 x i16> [[TMP57]], i16 [[TMP61]], i32 1 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE32]] ; IC4VF4: [[PRED_LOAD_CONTINUE32]]: ; IC4VF4-NEXT: [[TMP63:%.*]] = phi <4 x i16> [ [[TMP57]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP62]], %[[PRED_LOAD_IF31]] ] -; IC4VF4-NEXT: [[TMP64:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 -; IC4VF4-NEXT: br i1 [[TMP64]], label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]] ; IC4VF4: [[PRED_LOAD_IF33]]: -; IC4VF4-NEXT: [[TMP65:%.*]] = add i16 [[OFFSET_IDX]], -10 -; IC4VF4-NEXT: [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP65]] +; IC4VF4-NEXT: [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 2 ; IC4VF4-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP66]], align 1 ; IC4VF4-NEXT: [[TMP68:%.*]] = insertelement <4 x i16> [[TMP63]], i16 [[TMP67]], i32 2 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE34]] ; IC4VF4: [[PRED_LOAD_CONTINUE34]]: ; IC4VF4-NEXT: [[TMP69:%.*]] = phi <4 x i16> [ [[TMP63]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP68]], %[[PRED_LOAD_IF33]] ] -; IC4VF4-NEXT: [[TMP70:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 -; IC4VF4-NEXT: br i1 [[TMP70]], label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]] ; IC4VF4: [[PRED_LOAD_IF35]]: -; IC4VF4-NEXT: [[TMP71:%.*]] = add i16 [[OFFSET_IDX]], -11 -; IC4VF4-NEXT: [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP71]] +; IC4VF4-NEXT: [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 1 ; IC4VF4-NEXT: [[TMP73:%.*]] = load i16, ptr [[TMP72]], align 1 ; IC4VF4-NEXT: [[TMP74:%.*]] = insertelement <4 x i16> [[TMP69]], i16 [[TMP73]], i32 3 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE36]] ; IC4VF4: [[PRED_LOAD_CONTINUE36]]: ; IC4VF4-NEXT: [[TMP75:%.*]] = phi <4 x i16> [ [[TMP69]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP74]], %[[PRED_LOAD_IF35]] ] -; IC4VF4-NEXT: [[TMP76:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; IC4VF4-NEXT: br i1 [[TMP76]], label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]] +; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]] ; IC4VF4: [[PRED_LOAD_IF37]]: -; IC4VF4-NEXT: [[TMP77:%.*]] = add i16 [[OFFSET_IDX]], -12 -; IC4VF4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP77]] +; IC4VF4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 0 ; IC4VF4-NEXT: [[TMP79:%.*]] = load i16, ptr [[TMP78]], align 1 ; IC4VF4-NEXT: [[TMP80:%.*]] = insertelement <4 x i16> poison, i16 [[TMP79]], i32 0 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE38]] ; IC4VF4: [[PRED_LOAD_CONTINUE38]]: ; IC4VF4-NEXT: [[TMP81:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP80]], %[[PRED_LOAD_IF37]] ] -; IC4VF4-NEXT: [[TMP82:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; IC4VF4-NEXT: br i1 [[TMP82]], label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]] +; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]] ; IC4VF4: [[PRED_LOAD_IF39]]: -; IC4VF4-NEXT: [[TMP83:%.*]] = add i16 [[OFFSET_IDX]], -13 -; IC4VF4-NEXT: [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP83]] +; IC4VF4-NEXT: [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -1 ; IC4VF4-NEXT: [[TMP85:%.*]] = load i16, ptr [[TMP84]], align 1 ; IC4VF4-NEXT: [[TMP86:%.*]] = insertelement <4 x i16> [[TMP81]], i16 [[TMP85]], i32 1 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE40]] ; IC4VF4: [[PRED_LOAD_CONTINUE40]]: ; IC4VF4-NEXT: [[TMP87:%.*]] = phi <4 x i16> [ [[TMP81]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP86]], %[[PRED_LOAD_IF39]] ] -; IC4VF4-NEXT: [[TMP88:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; IC4VF4-NEXT: br i1 [[TMP88]], label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]] +; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]] ; IC4VF4: [[PRED_LOAD_IF41]]: -; IC4VF4-NEXT: [[TMP89:%.*]] = add i16 [[OFFSET_IDX]], -14 -; IC4VF4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP89]] +; IC4VF4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -2 ; IC4VF4-NEXT: [[TMP91:%.*]] = load i16, ptr [[TMP90]], align 1 ; IC4VF4-NEXT: [[TMP92:%.*]] = insertelement <4 x i16> [[TMP87]], i16 [[TMP91]], i32 2 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE42]] ; IC4VF4: [[PRED_LOAD_CONTINUE42]]: ; IC4VF4-NEXT: [[TMP93:%.*]] = phi <4 x i16> [ [[TMP87]], %[[PRED_LOAD_CONTINUE40]] ], [ [[TMP92]], %[[PRED_LOAD_IF41]] ] -; IC4VF4-NEXT: [[TMP94:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; IC4VF4-NEXT: br i1 [[TMP94]], label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44]] +; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44:.*]] ; IC4VF4: [[PRED_LOAD_IF43]]: -; IC4VF4-NEXT: [[TMP95:%.*]] = add i16 [[OFFSET_IDX]], -15 -; IC4VF4-NEXT: [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP95]] +; IC4VF4-NEXT: [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -3 ; IC4VF4-NEXT: [[TMP97:%.*]] = load i16, ptr [[TMP96]], align 1 ; IC4VF4-NEXT: [[TMP98:%.*]] = insertelement <4 x i16> [[TMP93]], i16 [[TMP97]], i32 3 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE44]] @@ -379,22 +326,16 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC4VF4-NEXT: [[TMP101:%.*]] = icmp ugt <4 x i16> [[TMP51]], [[BROADCAST_SPLAT]] ; IC4VF4-NEXT: [[TMP102:%.*]] = icmp ugt <4 x i16> [[TMP75]], [[BROADCAST_SPLAT]] ; IC4VF4-NEXT: [[TMP103:%.*]] = icmp ugt <4 x i16> [[TMP99]], [[BROADCAST_SPLAT]] -; IC4VF4-NEXT: [[TMP104:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1) -; IC4VF4-NEXT: [[TMP105:%.*]] = add nsw <4 x i16> [[STEP_ADD]], splat (i16 -1) -; IC4VF4-NEXT: [[TMP106:%.*]] = add nsw <4 x i16> [[STEP_ADD_2]], splat (i16 -1) -; IC4VF4-NEXT: [[TMP107:%.*]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -1) -; IC4VF4-NEXT: [[TMP108]] = select <4 x i1> [[TMP100]], <4 x i16> [[TMP104]], <4 x i16> [[VEC_PHI]] -; IC4VF4-NEXT: [[TMP109]] = select <4 x i1> [[TMP101]], <4 x i16> [[TMP105]], <4 x i16> [[VEC_PHI1]] -; IC4VF4-NEXT: [[TMP110]] = select <4 x i1> [[TMP102]], <4 x i16> [[TMP106]], <4 x i16> [[VEC_PHI2]] -; IC4VF4-NEXT: [[TMP111]] = select <4 x i1> [[TMP103]], <4 x i16> [[TMP107]], <4 x i16> [[VEC_PHI3]] -; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 -; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -4) -; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; IC4VF4-NEXT: [[TMP76:%.*]] = select <4 x i1> [[TMP100]], <4 x i16> , <4 x i16> splat (i16 32767) +; IC4VF4-NEXT: [[TMP77:%.*]] = select <4 x i1> [[TMP101]], <4 x i16> , <4 x i16> splat (i16 32767) +; IC4VF4-NEXT: [[TMP70:%.*]] = select <4 x i1> [[TMP102]], <4 x i16> , <4 x i16> splat (i16 32767) +; IC4VF4-NEXT: [[TMP71:%.*]] = select <4 x i1> [[TMP103]], <4 x i16> , <4 x i16> splat (i16 32767) +; IC4VF4-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; IC4VF4: [[MIDDLE_BLOCK]]: -; IC4VF4-NEXT: [[TMP112:%.*]] = select <4 x i1> [[TMP0]], <4 x i16> [[TMP108]], <4 x i16> [[VEC_PHI]] -; IC4VF4-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP109]], <4 x i16> [[VEC_PHI1]] -; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]] -; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]] +; IC4VF4-NEXT: [[TMP112:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP76]], <4 x i16> splat (i16 32767) +; IC4VF4-NEXT: [[TMP113:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP77]], <4 x i16> splat (i16 32767) +; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP70]], <4 x i16> splat (i16 32767) +; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> zeroinitializer, <4 x i16> [[TMP71]], <4 x i16> splat (i16 32767) ; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]]) ; IC4VF4-NEXT: [[RDX_MINMAX45:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP114]]) ; IC4VF4-NEXT: [[RDX_MINMAX46:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX45]], <4 x i16> [[TMP115]]) @@ -519,182 +460,129 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC4VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x half> [[BROADCAST_SPLATINSERT]], <4 x half> poison, <4 x i32> zeroinitializer ; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]] ; IC4VF4: [[VECTOR_BODY]]: -; IC4VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44:.*]] ] -; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44]] ] -; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP108:%.*]], %[[PRED_LOAD_CONTINUE44]] ] -; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP109:%.*]], %[[PRED_LOAD_CONTINUE44]] ] -; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP110:%.*]], %[[PRED_LOAD_CONTINUE44]] ] -; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP111:%.*]], %[[PRED_LOAD_CONTINUE44]] ] -; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 -4) -; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 -4) -; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i16> [[STEP_ADD_2]], splat (i16 -4) -; IC4VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 -; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]] -; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0 -; IC4VF4-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer -; IC4VF4-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], -; IC4VF4-NEXT: [[VEC_IV8:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], -; IC4VF4-NEXT: [[VEC_IV11:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], -; IC4VF4-NEXT: [[VEC_IV14:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], -; IC4VF4-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 11) -; IC4VF4-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IV8]], splat (i32 11) -; IC4VF4-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV11]], splat (i32 11) -; IC4VF4-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV14]], splat (i32 11) -; IC4VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 -; IC4VF4-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; IC4VF4: [[PRED_LOAD_IF]]: -; IC4VF4-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 0 -; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP5]] +; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 12 ; IC4VF4-NEXT: [[TMP7:%.*]] = load half, ptr [[TMP6]], align 1 ; IC4VF4-NEXT: [[TMP8:%.*]] = insertelement <4 x half> poison, half [[TMP7]], i32 0 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE]] ; IC4VF4: [[PRED_LOAD_CONTINUE]]: ; IC4VF4-NEXT: [[TMP9:%.*]] = phi <4 x half> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ] -; IC4VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 -; IC4VF4-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]] ; IC4VF4: [[PRED_LOAD_IF15]]: -; IC4VF4-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], -1 -; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP11]] +; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 11 ; IC4VF4-NEXT: [[TMP13:%.*]] = load half, ptr [[TMP12]], align 1 ; IC4VF4-NEXT: [[TMP14:%.*]] = insertelement <4 x half> [[TMP9]], half [[TMP13]], i32 1 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE16]] ; IC4VF4: [[PRED_LOAD_CONTINUE16]]: ; IC4VF4-NEXT: [[TMP15:%.*]] = phi <4 x half> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF15]] ] -; IC4VF4-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 -; IC4VF4-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]] ; IC4VF4: [[PRED_LOAD_IF17]]: -; IC4VF4-NEXT: [[TMP17:%.*]] = add i16 [[OFFSET_IDX]], -2 -; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP17]] +; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 10 ; IC4VF4-NEXT: [[TMP19:%.*]] = load half, ptr [[TMP18]], align 1 ; IC4VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x half> [[TMP15]], half [[TMP19]], i32 2 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE18]] ; IC4VF4: [[PRED_LOAD_CONTINUE18]]: ; IC4VF4-NEXT: [[TMP21:%.*]] = phi <4 x half> [ [[TMP15]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP20]], %[[PRED_LOAD_IF17]] ] -; IC4VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 -; IC4VF4-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]] ; IC4VF4: [[PRED_LOAD_IF19]]: -; IC4VF4-NEXT: [[TMP23:%.*]] = add i16 [[OFFSET_IDX]], -3 -; IC4VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP23]] +; IC4VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 9 ; IC4VF4-NEXT: [[TMP25:%.*]] = load half, ptr [[TMP24]], align 1 ; IC4VF4-NEXT: [[TMP26:%.*]] = insertelement <4 x half> [[TMP21]], half [[TMP25]], i32 3 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE20]] ; IC4VF4: [[PRED_LOAD_CONTINUE20]]: ; IC4VF4-NEXT: [[TMP27:%.*]] = phi <4 x half> [ [[TMP21]], %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP26]], %[[PRED_LOAD_IF19]] ] -; IC4VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 -; IC4VF4-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]] ; IC4VF4: [[PRED_LOAD_IF21]]: -; IC4VF4-NEXT: [[TMP29:%.*]] = add i16 [[OFFSET_IDX]], -4 -; IC4VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP29]] +; IC4VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 8 ; IC4VF4-NEXT: [[TMP31:%.*]] = load half, ptr [[TMP30]], align 1 ; IC4VF4-NEXT: [[TMP32:%.*]] = insertelement <4 x half> poison, half [[TMP31]], i32 0 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE22]] ; IC4VF4: [[PRED_LOAD_CONTINUE22]]: ; IC4VF4-NEXT: [[TMP33:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP32]], %[[PRED_LOAD_IF21]] ] -; IC4VF4-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 -; IC4VF4-NEXT: br i1 [[TMP34]], label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]] ; IC4VF4: [[PRED_LOAD_IF23]]: -; IC4VF4-NEXT: [[TMP35:%.*]] = add i16 [[OFFSET_IDX]], -5 -; IC4VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP35]] +; IC4VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 7 ; IC4VF4-NEXT: [[TMP37:%.*]] = load half, ptr [[TMP36]], align 1 ; IC4VF4-NEXT: [[TMP38:%.*]] = insertelement <4 x half> [[TMP33]], half [[TMP37]], i32 1 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE24]] ; IC4VF4: [[PRED_LOAD_CONTINUE24]]: ; IC4VF4-NEXT: [[TMP39:%.*]] = phi <4 x half> [ [[TMP33]], %[[PRED_LOAD_CONTINUE22]] ], [ [[TMP38]], %[[PRED_LOAD_IF23]] ] -; IC4VF4-NEXT: [[TMP40:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 -; IC4VF4-NEXT: br i1 [[TMP40]], label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]] ; IC4VF4: [[PRED_LOAD_IF25]]: -; IC4VF4-NEXT: [[TMP41:%.*]] = add i16 [[OFFSET_IDX]], -6 -; IC4VF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP41]] +; IC4VF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 6 ; IC4VF4-NEXT: [[TMP43:%.*]] = load half, ptr [[TMP42]], align 1 ; IC4VF4-NEXT: [[TMP44:%.*]] = insertelement <4 x half> [[TMP39]], half [[TMP43]], i32 2 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE26]] ; IC4VF4: [[PRED_LOAD_CONTINUE26]]: ; IC4VF4-NEXT: [[TMP45:%.*]] = phi <4 x half> [ [[TMP39]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP44]], %[[PRED_LOAD_IF25]] ] -; IC4VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 -; IC4VF4-NEXT: br i1 [[TMP46]], label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]] ; IC4VF4: [[PRED_LOAD_IF27]]: -; IC4VF4-NEXT: [[TMP47:%.*]] = add i16 [[OFFSET_IDX]], -7 -; IC4VF4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP47]] +; IC4VF4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 5 ; IC4VF4-NEXT: [[TMP49:%.*]] = load half, ptr [[TMP48]], align 1 ; IC4VF4-NEXT: [[TMP50:%.*]] = insertelement <4 x half> [[TMP45]], half [[TMP49]], i32 3 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE28]] ; IC4VF4: [[PRED_LOAD_CONTINUE28]]: ; IC4VF4-NEXT: [[TMP51:%.*]] = phi <4 x half> [ [[TMP45]], %[[PRED_LOAD_CONTINUE26]] ], [ [[TMP50]], %[[PRED_LOAD_IF27]] ] -; IC4VF4-NEXT: [[TMP52:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 -; IC4VF4-NEXT: br i1 [[TMP52]], label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]] ; IC4VF4: [[PRED_LOAD_IF29]]: -; IC4VF4-NEXT: [[TMP53:%.*]] = add i16 [[OFFSET_IDX]], -8 -; IC4VF4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP53]] +; IC4VF4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 4 ; IC4VF4-NEXT: [[TMP55:%.*]] = load half, ptr [[TMP54]], align 1 ; IC4VF4-NEXT: [[TMP56:%.*]] = insertelement <4 x half> poison, half [[TMP55]], i32 0 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE30]] ; IC4VF4: [[PRED_LOAD_CONTINUE30]]: ; IC4VF4-NEXT: [[TMP57:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE28]] ], [ [[TMP56]], %[[PRED_LOAD_IF29]] ] -; IC4VF4-NEXT: [[TMP58:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 -; IC4VF4-NEXT: br i1 [[TMP58]], label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]] ; IC4VF4: [[PRED_LOAD_IF31]]: -; IC4VF4-NEXT: [[TMP59:%.*]] = add i16 [[OFFSET_IDX]], -9 -; IC4VF4-NEXT: [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP59]] +; IC4VF4-NEXT: [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 3 ; IC4VF4-NEXT: [[TMP61:%.*]] = load half, ptr [[TMP60]], align 1 ; IC4VF4-NEXT: [[TMP62:%.*]] = insertelement <4 x half> [[TMP57]], half [[TMP61]], i32 1 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE32]] ; IC4VF4: [[PRED_LOAD_CONTINUE32]]: ; IC4VF4-NEXT: [[TMP63:%.*]] = phi <4 x half> [ [[TMP57]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP62]], %[[PRED_LOAD_IF31]] ] -; IC4VF4-NEXT: [[TMP64:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 -; IC4VF4-NEXT: br i1 [[TMP64]], label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]] ; IC4VF4: [[PRED_LOAD_IF33]]: -; IC4VF4-NEXT: [[TMP65:%.*]] = add i16 [[OFFSET_IDX]], -10 -; IC4VF4-NEXT: [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP65]] +; IC4VF4-NEXT: [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 2 ; IC4VF4-NEXT: [[TMP67:%.*]] = load half, ptr [[TMP66]], align 1 ; IC4VF4-NEXT: [[TMP68:%.*]] = insertelement <4 x half> [[TMP63]], half [[TMP67]], i32 2 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE34]] ; IC4VF4: [[PRED_LOAD_CONTINUE34]]: ; IC4VF4-NEXT: [[TMP69:%.*]] = phi <4 x half> [ [[TMP63]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP68]], %[[PRED_LOAD_IF33]] ] -; IC4VF4-NEXT: [[TMP70:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 -; IC4VF4-NEXT: br i1 [[TMP70]], label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]] +; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]] ; IC4VF4: [[PRED_LOAD_IF35]]: -; IC4VF4-NEXT: [[TMP71:%.*]] = add i16 [[OFFSET_IDX]], -11 -; IC4VF4-NEXT: [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP71]] +; IC4VF4-NEXT: [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 1 ; IC4VF4-NEXT: [[TMP73:%.*]] = load half, ptr [[TMP72]], align 1 ; IC4VF4-NEXT: [[TMP74:%.*]] = insertelement <4 x half> [[TMP69]], half [[TMP73]], i32 3 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE36]] ; IC4VF4: [[PRED_LOAD_CONTINUE36]]: ; IC4VF4-NEXT: [[TMP75:%.*]] = phi <4 x half> [ [[TMP69]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP74]], %[[PRED_LOAD_IF35]] ] -; IC4VF4-NEXT: [[TMP76:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; IC4VF4-NEXT: br i1 [[TMP76]], label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]] +; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]] ; IC4VF4: [[PRED_LOAD_IF37]]: -; IC4VF4-NEXT: [[TMP77:%.*]] = add i16 [[OFFSET_IDX]], -12 -; IC4VF4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP77]] +; IC4VF4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 0 ; IC4VF4-NEXT: [[TMP79:%.*]] = load half, ptr [[TMP78]], align 1 ; IC4VF4-NEXT: [[TMP80:%.*]] = insertelement <4 x half> poison, half [[TMP79]], i32 0 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE38]] ; IC4VF4: [[PRED_LOAD_CONTINUE38]]: ; IC4VF4-NEXT: [[TMP81:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP80]], %[[PRED_LOAD_IF37]] ] -; IC4VF4-NEXT: [[TMP82:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; IC4VF4-NEXT: br i1 [[TMP82]], label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]] +; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]] ; IC4VF4: [[PRED_LOAD_IF39]]: -; IC4VF4-NEXT: [[TMP83:%.*]] = add i16 [[OFFSET_IDX]], -13 -; IC4VF4-NEXT: [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP83]] +; IC4VF4-NEXT: [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -1 ; IC4VF4-NEXT: [[TMP85:%.*]] = load half, ptr [[TMP84]], align 1 ; IC4VF4-NEXT: [[TMP86:%.*]] = insertelement <4 x half> [[TMP81]], half [[TMP85]], i32 1 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE40]] ; IC4VF4: [[PRED_LOAD_CONTINUE40]]: ; IC4VF4-NEXT: [[TMP87:%.*]] = phi <4 x half> [ [[TMP81]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP86]], %[[PRED_LOAD_IF39]] ] -; IC4VF4-NEXT: [[TMP88:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; IC4VF4-NEXT: br i1 [[TMP88]], label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]] +; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]] ; IC4VF4: [[PRED_LOAD_IF41]]: -; IC4VF4-NEXT: [[TMP89:%.*]] = add i16 [[OFFSET_IDX]], -14 -; IC4VF4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP89]] +; IC4VF4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -2 ; IC4VF4-NEXT: [[TMP91:%.*]] = load half, ptr [[TMP90]], align 1 ; IC4VF4-NEXT: [[TMP92:%.*]] = insertelement <4 x half> [[TMP87]], half [[TMP91]], i32 2 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE42]] ; IC4VF4: [[PRED_LOAD_CONTINUE42]]: ; IC4VF4-NEXT: [[TMP93:%.*]] = phi <4 x half> [ [[TMP87]], %[[PRED_LOAD_CONTINUE40]] ], [ [[TMP92]], %[[PRED_LOAD_IF41]] ] -; IC4VF4-NEXT: [[TMP94:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; IC4VF4-NEXT: br i1 [[TMP94]], label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44]] +; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44:.*]] ; IC4VF4: [[PRED_LOAD_IF43]]: -; IC4VF4-NEXT: [[TMP95:%.*]] = add i16 [[OFFSET_IDX]], -15 -; IC4VF4-NEXT: [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP95]] +; IC4VF4-NEXT: [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -3 ; IC4VF4-NEXT: [[TMP97:%.*]] = load half, ptr [[TMP96]], align 1 ; IC4VF4-NEXT: [[TMP98:%.*]] = insertelement <4 x half> [[TMP93]], half [[TMP97]], i32 3 ; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE44]] @@ -704,22 +592,16 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC4VF4-NEXT: [[TMP101:%.*]] = fcmp ugt <4 x half> [[TMP51]], [[BROADCAST_SPLAT]] ; IC4VF4-NEXT: [[TMP102:%.*]] = fcmp ugt <4 x half> [[TMP75]], [[BROADCAST_SPLAT]] ; IC4VF4-NEXT: [[TMP103:%.*]] = fcmp ugt <4 x half> [[TMP99]], [[BROADCAST_SPLAT]] -; IC4VF4-NEXT: [[TMP104:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1) -; IC4VF4-NEXT: [[TMP105:%.*]] = add nsw <4 x i16> [[STEP_ADD]], splat (i16 -1) -; IC4VF4-NEXT: [[TMP106:%.*]] = add nsw <4 x i16> [[STEP_ADD_2]], splat (i16 -1) -; IC4VF4-NEXT: [[TMP107:%.*]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -1) -; IC4VF4-NEXT: [[TMP108]] = select <4 x i1> [[TMP100]], <4 x i16> [[TMP104]], <4 x i16> [[VEC_PHI]] -; IC4VF4-NEXT: [[TMP109]] = select <4 x i1> [[TMP101]], <4 x i16> [[TMP105]], <4 x i16> [[VEC_PHI1]] -; IC4VF4-NEXT: [[TMP110]] = select <4 x i1> [[TMP102]], <4 x i16> [[TMP106]], <4 x i16> [[VEC_PHI2]] -; IC4VF4-NEXT: [[TMP111]] = select <4 x i1> [[TMP103]], <4 x i16> [[TMP107]], <4 x i16> [[VEC_PHI3]] -; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 -; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -4) -; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IC4VF4-NEXT: [[TMP76:%.*]] = select <4 x i1> [[TMP100]], <4 x i16> , <4 x i16> splat (i16 32767) +; IC4VF4-NEXT: [[TMP77:%.*]] = select <4 x i1> [[TMP101]], <4 x i16> , <4 x i16> splat (i16 32767) +; IC4VF4-NEXT: [[TMP70:%.*]] = select <4 x i1> [[TMP102]], <4 x i16> , <4 x i16> splat (i16 32767) +; IC4VF4-NEXT: [[TMP71:%.*]] = select <4 x i1> [[TMP103]], <4 x i16> , <4 x i16> splat (i16 32767) +; IC4VF4-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; IC4VF4: [[MIDDLE_BLOCK]]: -; IC4VF4-NEXT: [[TMP112:%.*]] = select <4 x i1> [[TMP0]], <4 x i16> [[TMP108]], <4 x i16> [[VEC_PHI]] -; IC4VF4-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP109]], <4 x i16> [[VEC_PHI1]] -; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]] -; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]] +; IC4VF4-NEXT: [[TMP112:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP76]], <4 x i16> splat (i16 32767) +; IC4VF4-NEXT: [[TMP113:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP77]], <4 x i16> splat (i16 32767) +; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP70]], <4 x i16> splat (i16 32767) +; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> zeroinitializer, <4 x i16> [[TMP71]], <4 x i16> splat (i16 32767) ; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]]) ; IC4VF4-NEXT: [[RDX_MINMAX45:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP114]]) ; IC4VF4-NEXT: [[RDX_MINMAX46:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX45]], <4 x i16> [[TMP115]]) @@ -873,7 +755,7 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[STEP_ADD_3]], splat (i64 -4) ; IC4VF4-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808 -; IC4VF4-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; IC4VF4-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IC4VF4: [[MIDDLE_BLOCK]]: ; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[TMP13]], <4 x i64> [[TMP14]]) ; IC4VF4-NEXT: [[RDX_MINMAX10:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP15]]) @@ -1102,7 +984,7 @@ define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(ptr %a, ptr % ; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -4) ; IC4VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], -16 -; IC4VF4-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IC4VF4-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IC4VF4: [[MIDDLE_BLOCK]]: ; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[TMP27]], <4 x i64> [[TMP28]]) ; IC4VF4-NEXT: [[RDX_MINMAX18:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP29]]) @@ -1124,7 +1006,7 @@ define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(ptr %a, ptr % ; IC4VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i8 [[LD_A]], [[LD_B]] ; IC4VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]] ; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0 -; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; IC4VF4: [[EXIT]]: ; IC4VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ] ; IC4VF4-NEXT: ret i64 [[COND_LCSSA]] @@ -1363,7 +1245,7 @@ define i64 @select_decreasing_induction_icmp_non_const_start(ptr %a, ptr %b, i64 ; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[STEP_ADD_3]], splat (i64 -4) ; IC4VF4-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IC4VF4-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; IC4VF4-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IC4VF4: [[MIDDLE_BLOCK]]: ; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[TMP34]], <4 x i64> [[TMP35]]) ; IC4VF4-NEXT: [[RDX_MINMAX23:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP36]]) @@ -1392,7 +1274,7 @@ define i64 @select_decreasing_induction_icmp_non_const_start(ptr %a, ptr %b, i64 ; IC4VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]] ; IC4VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]] ; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1 -; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP9:![0-9]+]] +; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; IC4VF4: [[EXIT]]: ; IC4VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; IC4VF4-NEXT: ret i64 [[COND_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll index 5e88072517b37..b5b1879ec6c25 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll @@ -17,58 +17,45 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ] -; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10) -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 -; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]] +; CHECK-NEXT: br i1 false, label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]] ; CHECK: [[PRED_SDIV_IF]]: ; CHECK-NEXT: [[TMP2:%.*]] = sdiv i16 24316, [[OFF]] ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i32 0 ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE]] ; CHECK: [[PRED_SDIV_CONTINUE]]: ; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP3]], %[[PRED_SDIV_IF]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 -; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_SDIV_IF1:.*]], label %[[PRED_SDIV_CONTINUE2:.*]] +; CHECK-NEXT: br i1 true, label %[[PRED_SDIV_IF1:.*]], label %[[PRED_SDIV_CONTINUE2:.*]] ; CHECK: [[PRED_SDIV_IF1]]: ; CHECK-NEXT: [[TMP18:%.*]] = sdiv i16 24316, [[OFF]] ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i16> [[TMP4]], i16 [[TMP18]], i32 1 ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE2]] ; CHECK: [[PRED_SDIV_CONTINUE2]]: ; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ [[TMP4]], %[[PRED_SDIV_CONTINUE]] ], [ [[TMP7]], %[[PRED_SDIV_IF1]] ] -; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i16> [[VEC_IND]], splat (i16 16383) ; CHECK-NEXT: [[TMP22:%.*]] = shl <2 x i16> [[TMP8]], splat (i16 14) -; CHECK-NEXT: [[TMP23:%.*]] = sub <2 x i16> [[TMP21]], [[TMP22]] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 -; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK-NEXT: [[TMP23:%.*]] = sub <2 x i16> , [[TMP22]] +; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i16> [[TMP23]], i32 0 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP13]] ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP24]], align 1 -; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 9 ; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP11]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 -; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4]] +; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[EXIT:.*]] ; CHECK: [[PRED_STORE_IF3]]: ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i16> [[TMP23]], i32 1 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP25]] ; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP20]], align 1 -; CHECK-NEXT: [[TMP16:%.*]] = add i16 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 10 ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 1 -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]] -; CHECK: [[PRED_STORE_CONTINUE4]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: br label %[[SCALAR_PH:.*]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br label %[[EXIT1:.*]] +; CHECK: [[EXIT1]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -111,40 +98,30 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) { ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] -; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10) -; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i16> [[VEC_IND]], [[TMP1]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 -; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i16> , [[TMP1]] +; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i16> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP14]], align 1 -; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 9 ; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 -; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] +; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[EXIT:.*]] ; CHECK: [[PRED_STORE_IF1]]: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i16> [[TMP3]], i32 1 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP15]] ; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP16]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 10 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 1 -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] -; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: br label %[[SCALAR_PH:.*]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br label %[[EXIT1:.*]] +; CHECK: [[EXIT1]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -183,38 +160,28 @@ define void @ptr_doesnt_depend_on_poison_or_ub(ptr noalias %dst, i16 noundef %of ; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], [[OFF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] -; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10) -; CHECK-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = add i16 9, [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr @src, i16 [[TMP3]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 -; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 9 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0 ; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 -; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] +; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[EXIT:.*]] ; CHECK: [[PRED_STORE_IF1]]: -; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 10 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 1 -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] -; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: br label %[[SCALAR_PH:.*]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br label %[[EXIT1:.*]] +; CHECK: [[EXIT1]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -252,45 +219,35 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] -; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]] ; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @src, align 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10) ; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i16> splat (i16 1), [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i16> [[TMP2]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i16> [[VEC_IND]], [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 -; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i16> , [[TMP3]] +; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP6]] ; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP15]], align 1 -; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 9 ; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 -; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] +; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[EXIT:.*]] ; CHECK: [[PRED_STORE_IF1]]: ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP16]] ; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP17]], align 1 -; CHECK-NEXT: [[TMP12:%.*]] = add i16 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 10 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 1 -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] -; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: br label %[[SCALAR_PH:.*]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br label %[[EXIT1:.*]] +; CHECK: [[EXIT1]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -329,41 +286,31 @@ define void @ptr_depends_on_noundef_load(ptr noalias %dst) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] -; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]] -; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @src, align 1, !noundef [[META10:![0-9]+]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10) +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @src, align 1, !noundef [[META0:![0-9]+]] ; CHECK-NEXT: [[TMP2:%.*]] = sub i16 1, [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = add i16 [[TMP2]], [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = add i16 9, [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr @src, i16 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP5]], align 1 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 -; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK-NEXT: br i1 false, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 9 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0 ; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 -; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] +; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[EXIT:.*]] ; CHECK: [[PRED_STORE_IF1]]: -; CHECK-NEXT: [[TMP12:%.*]] = add i16 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 10 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 1 -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] -; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: br label %[[SCALAR_PH:.*]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br label %[[EXIT1:.*]] +; CHECK: [[EXIT1]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header diff --git a/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll index c9cc8060ff498..7119728d493d3 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll @@ -48,57 +48,43 @@ define i32 @chained_smax(i32 %x, ptr %src) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 1) -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[VEC_PHI]]) -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 -; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[BROADCAST_SPLAT]], <4 x i32> zeroinitializer) +; CHECK-NEXT: br i1 true, label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 0 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] ; CHECK: [[PRED_LOAD_CONTINUE]]: ; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 -; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] +; CHECK-NEXT: br i1 true, label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] ; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 1 ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP11]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 -; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] +; CHECK-NEXT: br i1 false, label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] ; CHECK: [[PRED_LOAD_IF3]]: -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 2 ; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP17]], i32 2 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]] ; CHECK: [[PRED_LOAD_CONTINUE4]]: ; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], %[[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 -; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6]] +; CHECK-NEXT: br i1 false, label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]] ; CHECK: [[PRED_LOAD_IF5]]: -; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 3 ; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP23]], i32 3 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] ; CHECK: [[PRED_LOAD_CONTINUE6]]: -; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i32> [ [[TMP19]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], %[[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP26]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP25]], <4 x i32> [[TMP1]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ [[TMP19]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], %[[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP21:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP20]], <4 x i32> [[TMP1]]) +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP26]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> , <4 x i32> [[TMP21]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP27]]) ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[EXIT]]: @@ -141,7 +127,7 @@ define void @smax_with_invariant_store_user(ptr noalias %src, ptr %dst, i64 %n) ; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) ; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4 @@ -160,7 +146,7 @@ define void @smax_with_invariant_store_user(ptr noalias %src, ptr %dst, i64 %n) ; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -201,7 +187,7 @@ define void @smax_with_multiple_invariant_store_user_same_addr(ptr noalias %src, ; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) ; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4 @@ -221,7 +207,7 @@ define void @smax_with_multiple_invariant_store_user_same_addr(ptr noalias %src, ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -301,7 +287,7 @@ define void @smax_with_multiple_invariant_store_user_same_addr3(ptr noalias %src ; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) ; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4 @@ -321,7 +307,7 @@ define void @smax_with_multiple_invariant_store_user_same_addr3(ptr noalias %src ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -478,7 +464,7 @@ define i32 @test_predicated_smin(ptr %src) { ; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[PREDPHI]]) ; CHECK-NEXT: br label %[[EXIT:.*]] @@ -545,7 +531,7 @@ define i32 @smax_reduction_multiple_incoming(ptr %src, i32 %n, i1 %cond) { ; CHECK-NEXT: [[TMP5]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] @@ -562,7 +548,7 @@ define i32 @smax_reduction_multiple_incoming(ptr %src, i32 %n, i1 %cond) { ; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi i32 [ [[MAX_NEXT]], %[[LOOP_HEADER]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[MAX_NEXT_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll index 97d57a0cf83a0..3b7a0d348fbfb 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll @@ -26,6 +26,8 @@ define noundef i32 @f(i32 noundef %g) { ; VF4IC2-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) ; VF4IC2-NEXT: br i1 [[TMP7]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] ; VF4IC2: [[MIDDLE_BLOCK]]: +; VF4IC2-NEXT: br label %[[MIDDLE_BLOCK1:.*]] +; VF4IC2: [[MIDDLE_BLOCK1]]: ; VF4IC2-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 ; VF4IC2-NEXT: br label %[[RETURN:.*]] ; VF4IC2: [[VECTOR_EARLY_EXIT]]: @@ -38,7 +40,7 @@ define noundef i32 @f(i32 noundef %g) { ; VF4IC2-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 ; VF4IC2-NEXT: br label %[[RETURN]] ; VF4IC2: [[RETURN]]: -; VF4IC2-NEXT: [[RES:%.*]] = phi i32 [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ [[TMP15]], %[[VECTOR_EARLY_EXIT]] ] +; VF4IC2-NEXT: [[RES:%.*]] = phi i32 [ [[TMP8]], %[[MIDDLE_BLOCK1]] ], [ [[TMP15]], %[[VECTOR_EARLY_EXIT]] ] ; VF4IC2-NEXT: ret i32 [[RES]] ; ; VF8IC1-LABEL: define noundef i32 @f( @@ -56,6 +58,8 @@ define noundef i32 @f(i32 noundef %g) { ; VF8IC1-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP8]]) ; VF8IC1-NEXT: br i1 [[TMP3]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] ; VF8IC1: [[MIDDLE_BLOCK]]: +; VF8IC1-NEXT: br label %[[MIDDLE_BLOCK1:.*]] +; VF8IC1: [[MIDDLE_BLOCK1]]: ; VF8IC1-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7 ; VF8IC1-NEXT: br label %[[RETURN:.*]] ; VF8IC1: [[VECTOR_EARLY_EXIT]]: @@ -63,7 +67,7 @@ define noundef i32 @f(i32 noundef %g) { ; VF8IC1-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; VF8IC1-NEXT: br label %[[RETURN]] ; VF8IC1: [[RETURN]]: -; VF8IC1-NEXT: [[RES:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[TMP6]], %[[VECTOR_EARLY_EXIT]] ] +; VF8IC1-NEXT: [[RES:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK1]] ], [ [[TMP6]], %[[VECTOR_EARLY_EXIT]] ] ; VF8IC1-NEXT: ret i32 [[RES]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll index b93215035cebf..1857b5797a5db 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll @@ -53,10 +53,12 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF8UF2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: br label %[[EXIT:.*]] -; VF8UF2: [[VECTOR_EARLY_EXIT]]: -; VF8UF2-NEXT: br label %[[EXIT]] ; VF8UF2: [[EXIT]]: -; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_BLOCK]] ] +; VF8UF2-NEXT: br label %[[EXIT1:.*]] +; VF8UF2: [[VECTOR_EARLY_EXIT]]: +; VF8UF2-NEXT: br label %[[EXIT1]] +; VF8UF2: [[EXIT1]]: +; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[EXIT]] ] ; VF8UF2-NEXT: ret i8 [[RES]] ; ; VF16UF1-LABEL: define i8 @test_early_exit_max_tc_less_than_16( @@ -73,10 +75,12 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF16UF1-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] ; VF16UF1: [[MIDDLE_BLOCK]]: ; VF16UF1-NEXT: br label %[[EXIT:.*]] -; VF16UF1: [[VECTOR_EARLY_EXIT]]: -; VF16UF1-NEXT: br label %[[EXIT]] ; VF16UF1: [[EXIT]]: -; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_BLOCK]] ] +; VF16UF1-NEXT: br label %[[EXIT1:.*]] +; VF16UF1: [[VECTOR_EARLY_EXIT]]: +; VF16UF1-NEXT: br label %[[EXIT1]] +; VF16UF1: [[EXIT1]]: +; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[EXIT]] ] ; VF16UF1-NEXT: ret i8 [[RES]] ; entry: @@ -147,6 +151,8 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: br label %[[EXIT:.*]] +; VF8UF2: [[EXIT]]: +; VF8UF2-NEXT: br label %[[EXIT1:.*]] ; VF8UF2: [[VECTOR_EARLY_EXIT]]: ; VF8UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 false) ; VF8UF2-NEXT: [[TMP7:%.*]] = add i64 8, [[TMP5]] @@ -154,9 +160,9 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF2-NEXT: [[TMP9:%.*]] = add i64 0, [[TMP8]] ; VF8UF2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP8]], 8 ; VF8UF2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 [[TMP7]] -; VF8UF2-NEXT: br label %[[EXIT]] -; VF8UF2: [[EXIT]]: -; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[TMP11]], %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_BLOCK]] ] +; VF8UF2-NEXT: br label %[[EXIT1]] +; VF8UF2: [[EXIT1]]: +; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[TMP11]], %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[EXIT]] ] ; VF8UF2-NEXT: ret i64 [[RES]] ; ; VF16UF1-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside( @@ -173,11 +179,13 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF16UF1-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] ; VF16UF1: [[MIDDLE_BLOCK]]: ; VF16UF1-NEXT: br label %[[EXIT:.*]] +; VF16UF1: [[EXIT]]: +; VF16UF1-NEXT: br label %[[EXIT1:.*]] ; VF16UF1: [[VECTOR_EARLY_EXIT]]: ; VF16UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> [[TMP3]], i1 false) -; VF16UF1-NEXT: br label %[[EXIT]] -; VF16UF1: [[EXIT]]: -; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[FIRST_ACTIVE_LANE]], %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[MIDDLE_BLOCK]] ] +; VF16UF1-NEXT: br label %[[EXIT1]] +; VF16UF1: [[EXIT1]]: +; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[FIRST_ACTIVE_LANE]], %[[VECTOR_EARLY_EXIT]] ], [ 1, %[[EXIT]] ] ; VF16UF1-NEXT: ret i64 [[RES]] ; entry: @@ -258,22 +266,24 @@ define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosyn ; VF8UF2-NEXT: br i1 [[TMP5]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: br label %[[SCALAR_PH:.*]] -; VF8UF2: [[VECTOR_EARLY_EXIT]]: -; VF8UF2-NEXT: br label %[[EXIT:.*]] ; VF8UF2: [[SCALAR_PH]]: +; VF8UF2-NEXT: br label %[[EXIT:.*]] +; VF8UF2: [[VECTOR_EARLY_EXIT]]: ; VF8UF2-NEXT: br label %[[LOOP_HEADER:.*]] -; VF8UF2: [[LOOP_HEADER]]: -; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF8UF2: [[EXIT]]: +; VF8UF2-NEXT: br label %[[LOOP_HEADER1:.*]] +; VF8UF2: [[LOOP_HEADER1]]: +; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[EXIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] ; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1 ; VF8UF2-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 -; VF8UF2-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]] +; VF8UF2-NEXT: br i1 [[C]], label %[[LOOP_HEADER]], label %[[LOOP_LATCH]] ; VF8UF2: [[LOOP_LATCH]]: ; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17 -; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]] -; VF8UF2: [[EXIT]]: -; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] +; VF8UF2-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[LOOP_HEADER1]], !llvm.loop [[LOOP0:![0-9]+]] +; VF8UF2: [[LOOP_HEADER]]: +; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER1]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] ; VF8UF2-NEXT: ret i8 [[RES]] ; ; VF16UF1-LABEL: define i8 @test_early_exit_max_vector_tc_eq_16( @@ -290,22 +300,24 @@ define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosyn ; VF16UF1-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] ; VF16UF1: [[MIDDLE_BLOCK]]: ; VF16UF1-NEXT: br label %[[SCALAR_PH:.*]] -; VF16UF1: [[VECTOR_EARLY_EXIT]]: -; VF16UF1-NEXT: br label %[[EXIT:.*]] ; VF16UF1: [[SCALAR_PH]]: +; VF16UF1-NEXT: br label %[[EXIT:.*]] +; VF16UF1: [[VECTOR_EARLY_EXIT]]: ; VF16UF1-NEXT: br label %[[LOOP_HEADER:.*]] -; VF16UF1: [[LOOP_HEADER]]: -; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF16UF1: [[EXIT]]: +; VF16UF1-NEXT: br label %[[LOOP_HEADER1:.*]] +; VF16UF1: [[LOOP_HEADER1]]: +; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[EXIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] ; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1 ; VF16UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 -; VF16UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]] +; VF16UF1-NEXT: br i1 [[C]], label %[[LOOP_HEADER]], label %[[LOOP_LATCH]] ; VF16UF1: [[LOOP_LATCH]]: ; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17 -; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]] -; VF16UF1: [[EXIT]]: -; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] +; VF16UF1-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[LOOP_HEADER1]], !llvm.loop [[LOOP0:![0-9]+]] +; VF16UF1: [[LOOP_HEADER]]: +; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER1]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] ; VF16UF1-NEXT: ret i8 [[RES]] ; entry: @@ -368,28 +380,22 @@ define i1 @test_early_exit_max_tc_less_than_16_non_canonical_iv(ptr dereferencea ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ] -; VF8UF2-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY_INTERIM]] ] -; VF8UF2-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8) -; VF8UF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]] -; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_IDX]] +; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 2 ; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 ; VF8UF2-NEXT: [[TMP4:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer ; VF8UF2-NEXT: [[TMP5:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD1]], zeroinitializer -; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; VF8UF2-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]] ; VF8UF2-NEXT: [[TMP7:%.*]] = freeze <8 x i1> [[TMP5]] ; VF8UF2-NEXT: [[TMP8:%.*]] = or <8 x i1> [[TMP6]], [[TMP7]] ; VF8UF2-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP8]]) -; VF8UF2-NEXT: [[VEC_IND_NEXT]] = add nsw <8 x i64> [[STEP_ADD]], splat (i64 8) -; VF8UF2-NEXT: br i1 [[TMP9]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]] +; VF8UF2-NEXT: br i1 [[TMP9]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM:.*]] ; VF8UF2: [[VECTOR_BODY_INTERIM]]: -; VF8UF2-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: [[TMP10:%.*]] = zext <8 x i8> [[WIDE_LOAD1]] to <8 x i64> -; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i64> [[TMP10]], [[STEP_ADD]] +; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i64> [[TMP10]], ; VF8UF2-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7 ; VF8UF2-NEXT: br label %[[EXIT:.*]] ; VF8UF2: [[VECTOR_EARLY_EXIT]]: @@ -405,22 +411,17 @@ define i1 @test_early_exit_max_tc_less_than_16_non_canonical_iv(ptr dereferencea ; VF16UF1: [[VECTOR_PH]]: ; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]] ; VF16UF1: [[VECTOR_BODY]]: -; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ] -; VF16UF1-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY_INTERIM]] ] -; VF16UF1-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]] -; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_IDX]] +; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 2 ; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer -; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; VF16UF1-NEXT: [[TMP4:%.*]] = freeze <16 x i1> [[TMP3]] ; VF16UF1-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP4]]) -; VF16UF1-NEXT: [[VEC_IND_NEXT]] = add nsw <16 x i64> [[VEC_IND]], splat (i64 16) -; VF16UF1-NEXT: br i1 [[TMP5]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]] +; VF16UF1-NEXT: br i1 [[TMP5]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM:.*]] ; VF16UF1: [[VECTOR_BODY_INTERIM]]: -; VF16UF1-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF16UF1: [[MIDDLE_BLOCK]]: ; VF16UF1-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i64> -; VF16UF1-NEXT: [[TMP2:%.*]] = icmp eq <16 x i64> [[TMP6]], [[VEC_IND]] +; VF16UF1-NEXT: [[TMP2:%.*]] = icmp eq <16 x i64> [[TMP6]], ; VF16UF1-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP2]], i32 15 ; VF16UF1-NEXT: br label %[[EXIT:.*]] ; VF16UF1: [[VECTOR_EARLY_EXIT]]: diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll index 5da6fc3179043..02846aba50f72 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll @@ -1211,6 +1211,272 @@ loop: exit: ret void } + +; Test that a first-order recurrence with a single vector iteration (where the +; vector loop backedge is removed) does not crash. +define void @first_order_recurrence_single_vector_iteration(ptr noalias %pkt, ptr noalias %dst) { +; VF8UF1-LABEL: define void @first_order_recurrence_single_vector_iteration( +; VF8UF1-SAME: ptr noalias [[PKT:%.*]], ptr noalias [[DST:%.*]]) { +; VF8UF1-NEXT: [[ENTRY:.*:]] +; VF8UF1-NEXT: br label %[[VECTOR_PH:.*]] +; VF8UF1: [[VECTOR_PH]]: +; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]] +; VF8UF1: [[VECTOR_BODY]]: +; VF8UF1-NEXT: [[TMP0:%.*]] = load i8, ptr [[PKT]], align 1 +; VF8UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i64 0 +; VF8UF1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer +; VF8UF1-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> zeroinitializer, <8 x i8> [[BROADCAST_SPLAT]], <8 x i32> +; VF8UF1-NEXT: [[TMP2:%.*]] = extractelement <8 x i8> [[TMP1]], i32 7 +; VF8UF1-NEXT: store i8 [[TMP2]], ptr [[DST]], align 1 +; VF8UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]] +; VF8UF1: [[MIDDLE_BLOCK]]: +; VF8UF1-NEXT: br label %[[EXIT:.*]] +; VF8UF1: [[EXIT]]: +; VF8UF1-NEXT: ret void +; +; VF8UF2-LABEL: define void @first_order_recurrence_single_vector_iteration( +; VF8UF2-SAME: ptr noalias [[PKT:%.*]], ptr noalias [[DST:%.*]]) { +; VF8UF2-NEXT: [[ENTRY:.*:]] +; VF8UF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF8UF2: [[VECTOR_PH]]: +; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF8UF2: [[VECTOR_BODY]]: +; VF8UF2-NEXT: [[TMP0:%.*]] = load i8, ptr [[PKT]], align 1 +; VF8UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i64 0 +; VF8UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer +; VF8UF2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> zeroinitializer, <8 x i8> [[BROADCAST_SPLAT]], <8 x i32> +; VF8UF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLAT]], <8 x i8> [[BROADCAST_SPLAT]], <8 x i32> +; VF8UF2-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; VF8UF2: [[PRED_STORE_IF]]: +; VF8UF2-NEXT: [[TMP3:%.*]] = extractelement <8 x i8> [[TMP1]], i32 0 +; VF8UF2-NEXT: store i8 [[TMP3]], ptr [[DST]], align 1 +; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE]] +; VF8UF2: [[PRED_STORE_CONTINUE]]: +; VF8UF2-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] +; VF8UF2: [[PRED_STORE_IF1]]: +; VF8UF2-NEXT: [[TMP4:%.*]] = extractelement <8 x i8> [[TMP1]], i32 1 +; VF8UF2-NEXT: store i8 [[TMP4]], ptr [[DST]], align 1 +; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE2]] +; VF8UF2: [[PRED_STORE_CONTINUE2]]: +; VF8UF2-NEXT: br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] +; VF8UF2: [[PRED_STORE_IF3]]: +; VF8UF2-NEXT: [[TMP5:%.*]] = extractelement <8 x i8> [[TMP1]], i32 2 +; VF8UF2-NEXT: store i8 [[TMP5]], ptr [[DST]], align 1 +; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE4]] +; VF8UF2: [[PRED_STORE_CONTINUE4]]: +; VF8UF2-NEXT: br i1 true, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] +; VF8UF2: [[PRED_STORE_IF5]]: +; VF8UF2-NEXT: [[TMP6:%.*]] = extractelement <8 x i8> [[TMP1]], i32 3 +; VF8UF2-NEXT: store i8 [[TMP6]], ptr [[DST]], align 1 +; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE6]] +; VF8UF2: [[PRED_STORE_CONTINUE6]]: +; VF8UF2-NEXT: br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] +; VF8UF2: [[PRED_STORE_IF7]]: +; VF8UF2-NEXT: [[TMP7:%.*]] = extractelement <8 x i8> [[TMP1]], i32 4 +; VF8UF2-NEXT: store i8 [[TMP7]], ptr [[DST]], align 1 +; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE8]] +; VF8UF2: [[PRED_STORE_CONTINUE8]]: +; VF8UF2-NEXT: br i1 true, label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] +; VF8UF2: [[PRED_STORE_IF9]]: +; VF8UF2-NEXT: [[TMP8:%.*]] = extractelement <8 x i8> [[TMP1]], i32 5 +; VF8UF2-NEXT: store i8 [[TMP8]], ptr [[DST]], align 1 +; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE10]] +; VF8UF2: [[PRED_STORE_CONTINUE10]]: +; VF8UF2-NEXT: br i1 true, label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]] +; VF8UF2: [[PRED_STORE_IF11]]: +; VF8UF2-NEXT: [[TMP9:%.*]] = extractelement <8 x i8> [[TMP1]], i32 6 +; VF8UF2-NEXT: store i8 [[TMP9]], ptr [[DST]], align 1 +; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE12]] +; VF8UF2: [[PRED_STORE_CONTINUE12]]: +; VF8UF2-NEXT: br i1 true, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]] +; VF8UF2: [[PRED_STORE_IF13]]: +; VF8UF2-NEXT: [[TMP10:%.*]] = extractelement <8 x i8> [[TMP1]], i32 7 +; VF8UF2-NEXT: store i8 [[TMP10]], ptr [[DST]], align 1 +; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE14]] +; VF8UF2: [[PRED_STORE_CONTINUE14]]: +; VF8UF2-NEXT: br i1 false, label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]] +; VF8UF2: [[PRED_STORE_IF15]]: +; VF8UF2-NEXT: [[TMP11:%.*]] = extractelement <8 x i8> [[TMP2]], i32 0 +; VF8UF2-NEXT: store i8 [[TMP11]], ptr [[DST]], align 1 +; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE16]] +; VF8UF2: [[PRED_STORE_CONTINUE16]]: +; VF8UF2-NEXT: br i1 false, label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]] +; VF8UF2: [[PRED_STORE_IF17]]: +; VF8UF2-NEXT: [[TMP12:%.*]] = extractelement <8 x i8> [[TMP2]], i32 1 +; VF8UF2-NEXT: store i8 [[TMP12]], ptr [[DST]], align 1 +; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE18]] +; VF8UF2: [[PRED_STORE_CONTINUE18]]: +; VF8UF2-NEXT: br i1 false, label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]] +; VF8UF2: [[PRED_STORE_IF19]]: +; VF8UF2-NEXT: [[TMP13:%.*]] = extractelement <8 x i8> [[TMP2]], i32 2 +; VF8UF2-NEXT: store i8 [[TMP13]], ptr [[DST]], align 1 +; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE20]] +; VF8UF2: [[PRED_STORE_CONTINUE20]]: +; VF8UF2-NEXT: br i1 false, label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] +; VF8UF2: [[PRED_STORE_IF21]]: +; VF8UF2-NEXT: [[TMP14:%.*]] = extractelement <8 x i8> [[TMP2]], i32 3 +; VF8UF2-NEXT: store i8 [[TMP14]], ptr [[DST]], align 1 +; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE22]] +; VF8UF2: [[PRED_STORE_CONTINUE22]]: +; VF8UF2-NEXT: br i1 false, label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] +; VF8UF2: [[PRED_STORE_IF23]]: +; VF8UF2-NEXT: [[TMP15:%.*]] = extractelement <8 x i8> [[TMP2]], i32 4 +; VF8UF2-NEXT: store i8 [[TMP15]], ptr [[DST]], align 1 +; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE24]] +; VF8UF2: [[PRED_STORE_CONTINUE24]]: +; VF8UF2-NEXT: br i1 false, label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]] +; VF8UF2: [[PRED_STORE_IF25]]: +; VF8UF2-NEXT: [[TMP16:%.*]] = extractelement <8 x i8> [[TMP2]], i32 5 +; VF8UF2-NEXT: store i8 [[TMP16]], ptr [[DST]], align 1 +; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE26]] +; VF8UF2: [[PRED_STORE_CONTINUE26]]: +; VF8UF2-NEXT: br i1 false, label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]] +; VF8UF2: [[PRED_STORE_IF27]]: +; VF8UF2-NEXT: [[TMP17:%.*]] = extractelement <8 x i8> [[TMP2]], i32 6 +; VF8UF2-NEXT: store i8 [[TMP17]], ptr [[DST]], align 1 +; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE28]] +; VF8UF2: [[PRED_STORE_CONTINUE28]]: +; VF8UF2-NEXT: br i1 false, label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]] +; VF8UF2: [[PRED_STORE_IF29]]: +; VF8UF2-NEXT: [[TMP18:%.*]] = extractelement <8 x i8> [[TMP2]], i32 7 +; VF8UF2-NEXT: store i8 [[TMP18]], ptr [[DST]], align 1 +; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE30]] +; VF8UF2: [[PRED_STORE_CONTINUE30]]: +; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]] +; VF8UF2: [[MIDDLE_BLOCK]]: +; VF8UF2-NEXT: br label %[[EXIT:.*]] +; VF8UF2: [[EXIT]]: +; VF8UF2-NEXT: ret void +; +; VF16UF1-LABEL: define void @first_order_recurrence_single_vector_iteration( +; VF16UF1-SAME: ptr noalias [[PKT:%.*]], ptr noalias [[DST:%.*]]) { +; VF16UF1-NEXT: [[ENTRY:.*:]] +; VF16UF1-NEXT: br label %[[VECTOR_PH:.*]] +; VF16UF1: [[VECTOR_PH]]: +; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]] +; VF16UF1: [[VECTOR_BODY]]: +; VF16UF1-NEXT: [[TMP0:%.*]] = load i8, ptr [[PKT]], align 1 +; VF16UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i64 0 +; VF16UF1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer +; VF16UF1-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> zeroinitializer, <16 x i8> [[BROADCAST_SPLAT]], <16 x i32> +; VF16UF1-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; VF16UF1: [[PRED_STORE_IF]]: +; VF16UF1-NEXT: [[TMP2:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0 +; VF16UF1-NEXT: store i8 [[TMP2]], ptr [[DST]], align 1 +; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE]] +; VF16UF1: [[PRED_STORE_CONTINUE]]: +; VF16UF1-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] +; VF16UF1: [[PRED_STORE_IF1]]: +; VF16UF1-NEXT: [[TMP3:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1 +; VF16UF1-NEXT: store i8 [[TMP3]], ptr [[DST]], align 1 +; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE2]] +; VF16UF1: [[PRED_STORE_CONTINUE2]]: +; VF16UF1-NEXT: br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] +; VF16UF1: [[PRED_STORE_IF3]]: +; VF16UF1-NEXT: [[TMP4:%.*]] = extractelement <16 x i8> [[TMP1]], i32 2 +; VF16UF1-NEXT: store i8 [[TMP4]], ptr [[DST]], align 1 +; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE4]] +; VF16UF1: [[PRED_STORE_CONTINUE4]]: +; VF16UF1-NEXT: br i1 true, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] +; VF16UF1: [[PRED_STORE_IF5]]: +; VF16UF1-NEXT: [[TMP5:%.*]] = extractelement <16 x i8> [[TMP1]], i32 3 +; VF16UF1-NEXT: store i8 [[TMP5]], ptr [[DST]], align 1 +; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE6]] +; VF16UF1: [[PRED_STORE_CONTINUE6]]: +; VF16UF1-NEXT: br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] +; VF16UF1: [[PRED_STORE_IF7]]: +; VF16UF1-NEXT: [[TMP6:%.*]] = extractelement <16 x i8> [[TMP1]], i32 4 +; VF16UF1-NEXT: store i8 [[TMP6]], ptr [[DST]], align 1 +; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE8]] +; VF16UF1: [[PRED_STORE_CONTINUE8]]: +; VF16UF1-NEXT: br i1 true, label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] +; VF16UF1: [[PRED_STORE_IF9]]: +; VF16UF1-NEXT: [[TMP7:%.*]] = extractelement <16 x i8> [[TMP1]], i32 5 +; VF16UF1-NEXT: store i8 [[TMP7]], ptr [[DST]], align 1 +; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE10]] +; VF16UF1: [[PRED_STORE_CONTINUE10]]: +; VF16UF1-NEXT: br i1 true, label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]] +; VF16UF1: [[PRED_STORE_IF11]]: +; VF16UF1-NEXT: [[TMP8:%.*]] = extractelement <16 x i8> [[TMP1]], i32 6 +; VF16UF1-NEXT: store i8 [[TMP8]], ptr [[DST]], align 1 +; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE12]] +; VF16UF1: [[PRED_STORE_CONTINUE12]]: +; VF16UF1-NEXT: br i1 true, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]] +; VF16UF1: [[PRED_STORE_IF13]]: +; VF16UF1-NEXT: [[TMP9:%.*]] = extractelement <16 x i8> [[TMP1]], i32 7 +; VF16UF1-NEXT: store i8 [[TMP9]], ptr [[DST]], align 1 +; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE14]] +; VF16UF1: [[PRED_STORE_CONTINUE14]]: +; VF16UF1-NEXT: br i1 false, label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]] +; VF16UF1: [[PRED_STORE_IF15]]: +; VF16UF1-NEXT: [[TMP10:%.*]] = extractelement <16 x i8> [[TMP1]], i32 8 +; VF16UF1-NEXT: store i8 [[TMP10]], ptr [[DST]], align 1 +; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE16]] +; VF16UF1: [[PRED_STORE_CONTINUE16]]: +; VF16UF1-NEXT: br i1 false, label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]] +; VF16UF1: [[PRED_STORE_IF17]]: +; VF16UF1-NEXT: [[TMP11:%.*]] = extractelement <16 x i8> [[TMP1]], i32 9 +; VF16UF1-NEXT: store i8 [[TMP11]], ptr [[DST]], align 1 +; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE18]] +; VF16UF1: [[PRED_STORE_CONTINUE18]]: +; VF16UF1-NEXT: br i1 false, label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]] +; VF16UF1: [[PRED_STORE_IF19]]: +; VF16UF1-NEXT: [[TMP12:%.*]] = extractelement <16 x i8> [[TMP1]], i32 10 +; VF16UF1-NEXT: store i8 [[TMP12]], ptr [[DST]], align 1 +; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE20]] +; VF16UF1: [[PRED_STORE_CONTINUE20]]: +; VF16UF1-NEXT: br i1 false, label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] +; VF16UF1: [[PRED_STORE_IF21]]: +; VF16UF1-NEXT: [[TMP13:%.*]] = extractelement <16 x i8> [[TMP1]], i32 11 +; VF16UF1-NEXT: store i8 [[TMP13]], ptr [[DST]], align 1 +; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE22]] +; VF16UF1: [[PRED_STORE_CONTINUE22]]: +; VF16UF1-NEXT: br i1 false, label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] +; VF16UF1: [[PRED_STORE_IF23]]: +; VF16UF1-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[TMP1]], i32 12 +; VF16UF1-NEXT: store i8 [[TMP14]], ptr [[DST]], align 1 +; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE24]] +; VF16UF1: [[PRED_STORE_CONTINUE24]]: +; VF16UF1-NEXT: br i1 false, label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]] +; VF16UF1: [[PRED_STORE_IF25]]: +; VF16UF1-NEXT: [[TMP15:%.*]] = extractelement <16 x i8> [[TMP1]], i32 13 +; VF16UF1-NEXT: store i8 [[TMP15]], ptr [[DST]], align 1 +; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE26]] +; VF16UF1: [[PRED_STORE_CONTINUE26]]: +; VF16UF1-NEXT: br i1 false, label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]] +; VF16UF1: [[PRED_STORE_IF27]]: +; VF16UF1-NEXT: [[TMP16:%.*]] = extractelement <16 x i8> [[TMP1]], i32 14 +; VF16UF1-NEXT: store i8 [[TMP16]], ptr [[DST]], align 1 +; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE28]] +; VF16UF1: [[PRED_STORE_CONTINUE28]]: +; VF16UF1-NEXT: br i1 false, label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]] +; VF16UF1: [[PRED_STORE_IF29]]: +; VF16UF1-NEXT: [[TMP17:%.*]] = extractelement <16 x i8> [[TMP1]], i32 15 +; VF16UF1-NEXT: store i8 [[TMP17]], ptr [[DST]], align 1 +; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE30]] +; VF16UF1: [[PRED_STORE_CONTINUE30]]: +; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]] +; VF16UF1: [[MIDDLE_BLOCK]]: +; VF16UF1-NEXT: br label %[[EXIT:.*]] +; VF16UF1: [[EXIT]]: +; VF16UF1-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %recur = phi i8 [ 0, %entry ], [ %load, %loop ] + %load = load i8, ptr %pkt, align 1 + store i8 %recur, ptr %dst, align 1 + %iv.next = add i64 %iv, 1 + %cmp = icmp eq i64 %iv, 7 + br i1 %cmp, label %exit, label %loop + +exit: + ret void +} +;. !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ;. ; VF8UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; VF8UF1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}