diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index dbbb5e75adc66..3eda4f66d7a4b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -470,6 +470,23 @@ class VPBuilder { Opcode, Op, ResultTy, nullptr, VPIRFlags::getDefaultFlags(Opcode))); } + /// Create a single-scalar recipe with \p Opcode and \p Operands without + /// inserting it. + static VPSingleDefRecipe *createSingleScalarOp(unsigned Opcode, + ArrayRef Operands, + VPValue *Mask, + const VPIRFlags &Flags, + const VPIRMetadata &Metadata, + DebugLoc DL, Instruction *UV) { + if (Instruction::isCast(Opcode)) { + assert(!Mask && "Cast cannot be predicated"); + return new VPInstructionWithType(Opcode, Operands, UV->getType(), Flags, + Metadata, DL, UV->getName(), UV); + } + return new VPReplicateRecipe(UV, Operands, /*IsSingleScalar=*/true, Mask, + Flags, Metadata, DL); + } + VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7ad454d4a1797..9b763f0b07def 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6365,7 +6365,7 @@ bool VPRecipeBuilder::replaceWithFinalIfReductionStore( return false; } -VPReplicateRecipe *VPRecipeBuilder::handleReplication(VPInstruction *VPI, +VPSingleDefRecipe *VPRecipeBuilder::handleReplication(VPInstruction *VPI, VFRange &Range) { auto *I = VPI->getUnderlyingInstr(); bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange( @@ -6423,9 +6423,14 @@ VPReplicateRecipe *VPRecipeBuilder::handleReplication(VPInstruction *VPI, assert((Range.Start.isScalar() || !IsUniform || !IsPredicated || (Range.Start.isScalable() && isa(I))) && "Should not predicate a uniform recipe"); - auto *Recipe = - new VPReplicateRecipe(I, VPI->operandsWithoutMask(), IsUniform, - BlockInMask, *VPI, *VPI, VPI->getDebugLoc()); + if (IsUniform) { + return VPBuilder::createSingleScalarOp( + VPI->getOpcode(), VPI->operandsWithoutMask(), BlockInMask, *VPI, *VPI, + VPI->getDebugLoc(), I); + } + auto *Recipe = new VPReplicateRecipe(I, VPI->operandsWithoutMask(), + /*IsSingleScalar=*/false, BlockInMask, + *VPI, *VPI, VPI->getDebugLoc()); return Recipe; } @@ -6726,7 +6731,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VPlanPtr Plan, if (isa(&R)) + VPVectorEndPointerRecipe, VPHistogramRecipe>(&R) || + (isa(R) && + Instruction::isCast(cast(R).getOpcode()) && + vputils::onlyFirstLaneUsed(R.getVPSingleValue()))) continue; auto *VPI = cast(&R); if (!VPI->getUnderlyingValue()) diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index f2777f99b407a..8296d058b71c3 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -78,10 +78,10 @@ class VPRecipeBuilder { bool replaceWithFinalIfReductionStore(VPInstruction *VPI, VPBuilder &FinalRedStoresBuilder); - /// Build a VPReplicationRecipe for \p VPI. If it is predicated, add the mask - /// as last operand. Range.End may be decreased to ensure same recipe behavior - /// from \p Range.Start to \p Range.End. - VPReplicateRecipe *handleReplication(VPInstruction *VPI, VFRange &Range); + /// Build a replicating or single-scalar recipe for \p VPI. If it is + /// predicated, add the mask as last operand. Range.End may be decreased to + /// ensure same recipe behavior from \p Range.Start to \p Range.End. + VPSingleDefRecipe *handleReplication(VPInstruction *VPI, VFRange &Range); }; } // end namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 71080705a1c6e..c00d199b5e4c4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1541,8 +1541,10 @@ class VPInstructionWithType : public VPInstruction { Type *ResultTy, const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {}, DebugLoc DL = DebugLoc::getUnknown(), - const Twine &Name = "") - : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name, ResultTy) {} + const Twine &Name = "", Value *UV = nullptr) + : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name, ResultTy) { + setUnderlyingValue(UV); + } static inline bool classof(const VPRecipeBase *R) { // VPInstructionWithType are VPInstructions with specific opcodes requiring @@ -1580,13 +1582,17 @@ class VPInstructionWithType : public VPInstruction { /// Return the cost of this VPInstruction. InstructionCost computeCost(ElementCount VF, - VPCostContext &Ctx) const override { - // TODO: Compute accurate cost after retiring the legacy cost model. - return 0; - } + VPCostContext &Ctx) const override; Type *getResultType() const { return getScalarType(); } + /// Cast recipes always use scalars of their operand. + bool usesScalars(const VPValue *Op) const override { + if (Instruction::isCast(getOpcode())) + return true; + return VPInstruction::usesScalars(Op); + } + protected: #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. @@ -3417,6 +3423,8 @@ class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags, computeScalarType(I, Operands), Flags, DL), VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar), IsPredicated(Mask) { + assert((!IsSingleScalar || !I->isCast()) && + "single-scalar casts should use VPInstructionWithType"); setUnderlyingValue(I); if (Mask) addOperand(Mask); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 19cf8e9034792..f718635691e50 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1820,6 +1820,17 @@ void VPInstructionWithType::execute(VPTransformState &State) { } } +InstructionCost VPInstructionWithType::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + // TODO: Compute cost for VPInstructions without underlying values. + if (!getUnderlyingValue()) + return 0; + assert(Instruction::isCast(getOpcode()) && + "only casts have underlying values currently"); + return getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1), + Ctx); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPInstructionWithType::printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index bd292e22135a9..ddaeb7f60f881 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -367,10 +367,11 @@ static bool sinkScalarOperands(VPlan &Plan) { dyn_cast(SinkCandidate)) { // TODO: Handle converting to uniform recipes as separate transform, // then cloning should be sufficient here. - Instruction *I = SinkCandidate->getUnderlyingInstr(); - Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true, - nullptr /*Mask*/, *SinkCandidateRepR, - *SinkCandidateRepR); + Clone = VPBuilder::createSingleScalarOp( + SinkCandidateRepR->getOpcode(), SinkCandidate->operands(), + /*Mask=*/nullptr, *SinkCandidateRepR, *SinkCandidateRepR, + SinkCandidate->getDebugLoc(), + SinkCandidate->getUnderlyingInstr()); // TODO: add ".cloned" suffix to name of Clone's VPValue. } else { Clone = SinkCandidate->clone(); @@ -903,9 +904,10 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) { m_Binary(m_VPValue(), m_VPValue()))) continue; - auto *Clone = new VPReplicateRecipe(Def->getUnderlyingInstr(), - Def->operands(), /*IsUniform*/ true, - /*Mask*/ nullptr, /*Flags*/ *Def); + auto *Clone = VPBuilder::createSingleScalarOp( + Def->getUnderlyingInstr()->getOpcode(), Def->operands(), + /*Mask=*/nullptr, *Def, {}, DebugLoc::getUnknown(), + Def->getUnderlyingInstr()); Clone->insertAfter(Def); Def->replaceAllUsesWith(Clone); } @@ -1965,9 +1967,10 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { })) continue; - auto *Clone = new VPReplicateRecipe( - RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(), - true /*IsSingleScalar*/, nullptr, *RepOrWidenR); + auto *Clone = VPBuilder::createSingleScalarOp( + getOpcodeOrIntrinsicID(RepOrWidenR)->second, RepOrWidenR->operands(), + /*Mask=*/nullptr, *RepOrWidenR, {}, DebugLoc::getUnknown(), + RepOrWidenR->getUnderlyingInstr()); Clone->insertBefore(RepOrWidenR); RepOrWidenR->replaceAllUsesWith(Clone); if (isDeadRecipe(*RepOrWidenR)) @@ -6961,9 +6964,9 @@ void VPlanTransforms::makeScalarizationDecisions(VPlan &Plan, VFRange &Range) { if (!vputils::onlyFirstLaneUsed(VPI)) continue; - auto *Recipe = new VPReplicateRecipe( - I, VPI->operandsWithoutMask(), /*IsSingleScalar=*/true, - /*Mask=*/nullptr, *VPI, *VPI, VPI->getDebugLoc()); + auto *Recipe = VPBuilder::createSingleScalarOp( + VPI->getOpcode(), VPI->operandsWithoutMask(), /*Mask=*/nullptr, *VPI, + *VPI, VPI->getDebugLoc(), I); Recipe->insertBefore(VPI); VPI->replaceAllUsesWith(Recipe); VPI->eraseFromParent(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index bb49059e731d7..4fffcb51374fc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -645,9 +645,9 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, // TODO: have cloning of replicate recipes also provide the desired result // coupled with setting its operands to NewOps (deriving IsSingleScalar and // Mask from the operands?) - New = new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps, - /*IsSingleScalar=*/true, /*Mask=*/nullptr, - *RepR, *RepR, RepR->getDebugLoc()); + New = VPBuilder::createSingleScalarOp( + RepR->getOpcode(), NewOps, /*Mask=*/nullptr, *RepR, *RepR, + RepR->getDebugLoc(), RepR->getUnderlyingInstr()); } else { New = DefR->clone(); for (const auto &[Idx, Op] : enumerate(NewOps)) { @@ -706,9 +706,9 @@ static void convertRecipesInRegionBlocksToSingleScalar(VPlan &Plan, Type *IdxTy, } if (auto *RepR = dyn_cast(&OldR)) { - auto *NewR = new VPReplicateRecipe( - RepR->getUnderlyingInstr(), RepR->operands(), - /* IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR, *RepR, OldDL); + auto *NewR = VPBuilder::createSingleScalarOp( + RepR->getOpcode(), to_vector(RepR->operands()), /*Mask=*/nullptr, + *RepR, *RepR, OldDL, RepR->getUnderlyingInstr()); NewR->insertBefore(RepR); RepR->replaceAllUsesWith(NewR); RepR->eraseFromParent(); diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/single-scalar-cast.ll b/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/single-scalar-cast.ll index 26043f73178f3..7743c7bcbe058 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/single-scalar-cast.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/single-scalar-cast.ll @@ -23,7 +23,7 @@ define i16 @narrow_iv_cast_to_single_scalar(ptr %p, ptr %q) { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: vp<[[VP4:%[0-9]+]]> = SCALAR-STEPS vp<[[VP3]]>, ir<1>, vp<[[VP0]]> ; CHECK-NEXT: CLONE ir<%shr> = lshr vp<[[VP4]]>, ir<1> -; CHECK-NEXT: CLONE ir<%shr.ext> = zext ir<%shr> +; CHECK-NEXT: EMIT-SCALAR ir<%shr.ext> = zext ir<%shr> to i64 ; CHECK-NEXT: CLONE ir<%gep.p> = getelementptr ir<%p>, ir<%shr.ext> ; CHECK-NEXT: CLONE ir<%l> = load ir<%gep.p> ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> @@ -76,7 +76,7 @@ define void @sink_replicate_region_with_cast(ptr %dst, i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: vector.body: ; CHECK-NEXT: vp<[[VP5:%[0-9]+]]> = SCALAR-STEPS vp<[[VP4]]>, ir<1>, vp<[[VP0]]> -; CHECK-NEXT: CLONE ir<%conv> = zext vp<[[VP5]]> +; CHECK-NEXT: EMIT-SCALAR ir<%conv> = zext vp<[[VP5]]> to i64 ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%dst>, ir<%conv> ; CHECK-NEXT: vp<[[VP6:%[0-9]+]]> = vector-pointer ir<%gep>, ir<1> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VP6]]> diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/sve2-histcnt-vplan.ll b/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/sve2-histcnt-vplan.ll index 046ffd3f45f47..03bec83d6ab4a 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/sve2-histcnt-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/sve2-histcnt-vplan.ll @@ -29,7 +29,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: [[STEPS:vp.*]] = SCALAR-STEPS [[IV]], ir<1>, [[VF]] ; CHECK-NEXT: CLONE [[GEP_IDX:.*]] = getelementptr inbounds ir<%indices>, [[STEPS]] ; CHECK-NEXT: CLONE [[IDX:.*]] = load [[GEP_IDX]] -; CHECK-NEXT: CLONE [[EXT_IDX:.*]] = zext [[IDX]] +; CHECK-NEXT: EMIT-SCALAR [[EXT_IDX:.*]] = zext [[IDX]] ; CHECK-NEXT: CLONE [[GEP_BUCKET:.*]] = getelementptr inbounds ir<%buckets>, [[EXT_IDX]] ; CHECK-NEXT: CLONE [[HISTVAL:.*]] = load [[GEP_BUCKET]] ; CHECK-NEXT: CLONE [[UPDATE:.*]] = add nsw [[HISTVAL]], ir<1> diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-riscv-vector-reverse.ll index 32765c53efdc8..acdcfafed372e 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-riscv-vector-reverse.ll @@ -31,7 +31,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[EVL_PHI]]> * ir<-1> ; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<-1>, vp<[[EVL]]> ; CHECK-NEXT: CLONE ir<[[IDX:%.+]]> = add nsw vp<[[SCALAR_STEPS]]>, ir<-1> -; CHECK-NEXT: CLONE ir<[[IDX_PROM:%.+]]> = zext ir<[[IDX]]> +; CHECK-NEXT: EMIT-SCALAR ir<[[IDX_PROM:%.+]]> = zext ir<[[IDX]]> to i64 ; CHECK-NEXT: CLONE ir<[[ARRAY_IDX_B:%.+]]> = getelementptr inbounds ir<[[B:%.+]]>, ir<[[IDX_PROM]]> ; CHECK-NEXT: vp<[[VEC_END_PTR_B:%.+]]> = vector-end-pointer ir<[[ARRAY_IDX_B]]>, vp<[[EVL]]> ; CHECK-NEXT: WIDEN ir<[[LOAD_B:%.+]]> = vp.load vp<[[VEC_END_PTR_B]]>, vp<[[EVL]]> diff --git a/llvm/test/Transforms/LoopVectorize/as_cast.ll b/llvm/test/Transforms/LoopVectorize/as_cast.ll index 31ed496de0ccf..7d93501da4411 100644 --- a/llvm/test/Transforms/LoopVectorize/as_cast.ll +++ b/llvm/test/Transforms/LoopVectorize/as_cast.ll @@ -7,6 +7,7 @@ define void @loop_invariant_as_cast(ptr addrspace(1) %in) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[IN]] to ptr ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] @@ -16,7 +17,6 @@ define void @loop_invariant_as_cast(ptr addrspace(1) %in) { ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[IN]] to ptr ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP3]] ; CHECK-NEXT: store i64 [[TMP3]], ptr [[TMP5]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -24,8 +24,7 @@ define void @loop_invariant_as_cast(ptr addrspace(1) %in) { ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_IF1]]: ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[IN]] to ptr -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP6]] ; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: @@ -65,19 +64,19 @@ define void @loop_varying_as_cast(ptr addrspace(1) %in) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 [[TMP0]], 6 -; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[IN]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[IN]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[TMP4]] to ptr +; CHECK-NEXT: [[TMP8:%.*]] = addrspacecast ptr addrspace(1) [[TMP7]] to ptr +; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: store i64 [[TMP3]], ptr [[TMP5]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_IF1]]: -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[IN]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = addrspacecast ptr addrspace(1) [[TMP7]] to ptr ; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: diff --git a/llvm/test/Transforms/LoopVectorize/cast-costs.ll b/llvm/test/Transforms/LoopVectorize/cast-costs.ll index 4020807988635..8613e52ed9e7d 100644 --- a/llvm/test/Transforms/LoopVectorize/cast-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/cast-costs.ll @@ -81,7 +81,7 @@ exit: define void @replicate_zext(ptr %A, i32 %n) { ; CHECK-LABEL: 'replicate_zext' -; CHECK: Cost of 1 for VF vscale x 1: CLONE ir<%iv.ext> = zext vp<[[VP4:%[0-9]+]]> +; CHECK: Cost of 1 for VF vscale x 1: EMIT-SCALAR ir<%iv.ext> = zext vp<[[VP4:%[0-9]+]]> to i64 ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll index 1f719673eb217..d497a887130b0 100644 --- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll @@ -276,16 +276,16 @@ define void @cast_induction_tail_folding(ptr %A) { ; IC2-NEXT: [[INDEX1:%.*]] = add i32 [[INDEX]], 1 ; IC2-NEXT: [[TMP2:%.*]] = icmp ule i32 [[INDEX]], 2 ; IC2-NEXT: [[TMP3:%.*]] = icmp ule i32 [[INDEX1]], 2 +; IC2-NEXT: [[TMP4:%.*]] = sext i32 [[INDEX]] to i64 +; IC2-NEXT: [[TMP6:%.*]] = sext i32 [[INDEX1]] to i64 ; IC2-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; IC2: [[PRED_STORE_IF]]: -; IC2-NEXT: [[TMP4:%.*]] = sext i32 [[INDEX]] to i64 ; IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]] ; IC2-NEXT: store i32 [[INDEX]], ptr [[TMP5]], align 4 ; IC2-NEXT: br label %[[PRED_STORE_CONTINUE]] ; IC2: [[PRED_STORE_CONTINUE]]: ; IC2-NEXT: br i1 [[TMP3]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] ; IC2: [[PRED_STORE_IF1]]: -; IC2-NEXT: [[TMP6:%.*]] = sext i32 [[INDEX1]] to i64 ; IC2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] ; IC2-NEXT: store i32 [[INDEX1]], ptr [[TMP7]], align 4 ; IC2-NEXT: br label %[[PRED_STORE_CONTINUE2]] diff --git a/llvm/test/Transforms/LoopVectorize/preserve-inbounds-gep-with-pointer-casts.ll b/llvm/test/Transforms/LoopVectorize/preserve-inbounds-gep-with-pointer-casts.ll index a9bb1c8f7f7be..daaf641a1c802 100644 --- a/llvm/test/Transforms/LoopVectorize/preserve-inbounds-gep-with-pointer-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/preserve-inbounds-gep-with-pointer-casts.ll @@ -49,9 +49,8 @@ define void @bitcast_in_ptr_chain(ptr %src, ptr noalias %dst) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr [[TMP0]] to ptr -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i64 4 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP1]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP0]], align 8 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 4