diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 3eda4f66d7a4b..dbbb5e75adc66 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -470,23 +470,6 @@ class VPBuilder { Opcode, Op, ResultTy, nullptr, VPIRFlags::getDefaultFlags(Opcode))); } - /// Create a single-scalar recipe with \p Opcode and \p Operands without - /// inserting it. - static VPSingleDefRecipe *createSingleScalarOp(unsigned Opcode, - ArrayRef Operands, - VPValue *Mask, - const VPIRFlags &Flags, - const VPIRMetadata &Metadata, - DebugLoc DL, Instruction *UV) { - if (Instruction::isCast(Opcode)) { - assert(!Mask && "Cast cannot be predicated"); - return new VPInstructionWithType(Opcode, Operands, UV->getType(), Flags, - Metadata, DL, UV->getName(), UV); - } - return new VPReplicateRecipe(UV, Operands, /*IsSingleScalar=*/true, Mask, - Flags, Metadata, DL); - } - VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 338bd12f07920..302d99a39ed34 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6365,7 +6365,7 @@ bool VPRecipeBuilder::replaceWithFinalIfReductionStore( return false; } -VPSingleDefRecipe *VPRecipeBuilder::handleReplication(VPInstruction *VPI, +VPReplicateRecipe *VPRecipeBuilder::handleReplication(VPInstruction *VPI, VFRange &Range) { auto *I = VPI->getUnderlyingInstr(); bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange( @@ -6423,14 +6423,9 @@ VPSingleDefRecipe *VPRecipeBuilder::handleReplication(VPInstruction *VPI, assert((Range.Start.isScalar() || !IsUniform || !IsPredicated || (Range.Start.isScalable() && isa(I))) && "Should not predicate a uniform recipe"); - if (IsUniform) { - return VPBuilder::createSingleScalarOp( - VPI->getOpcode(), VPI->operandsWithoutMask(), BlockInMask, *VPI, *VPI, - VPI->getDebugLoc(), I); - } - auto *Recipe = new VPReplicateRecipe(I, VPI->operandsWithoutMask(), - /*IsSingleScalar=*/false, BlockInMask, - *VPI, *VPI, VPI->getDebugLoc()); + auto *Recipe = + new VPReplicateRecipe(I, VPI->operandsWithoutMask(), IsUniform, + BlockInMask, *VPI, *VPI, VPI->getDebugLoc()); return Recipe; } @@ -6733,10 +6728,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VPlanPtr Plan, if (isa(&R) || - (isa(R) && - Instruction::isCast(cast(R).getOpcode()) && - vputils::onlyFirstLaneUsed(R.getVPSingleValue()))) + VPVectorEndPointerRecipe, VPHistogramRecipe>(&R)) continue; auto *VPI = cast(&R); if (!VPI->getUnderlyingValue()) diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index 8296d058b71c3..f2777f99b407a 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -78,10 +78,10 @@ class VPRecipeBuilder { bool replaceWithFinalIfReductionStore(VPInstruction *VPI, VPBuilder &FinalRedStoresBuilder); - /// Build a replicating or single-scalar recipe for \p VPI. If it is - /// predicated, add the mask as last operand. Range.End may be decreased to - /// ensure same recipe behavior from \p Range.Start to \p Range.End. - VPSingleDefRecipe *handleReplication(VPInstruction *VPI, VFRange &Range); + /// Build a VPReplicationRecipe for \p VPI. If it is predicated, add the mask + /// as last operand. Range.End may be decreased to ensure same recipe behavior + /// from \p Range.Start to \p Range.End. + VPReplicateRecipe *handleReplication(VPInstruction *VPI, VFRange &Range); }; } // end namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 92f2c51640a5c..83475129cdebf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1542,10 +1542,8 @@ class VPInstructionWithType : public VPInstruction { Type *ResultTy, const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {}, DebugLoc DL = DebugLoc::getUnknown(), - const Twine &Name = "", Value *UV = nullptr) - : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name, ResultTy) { - setUnderlyingValue(UV); - } + const Twine &Name = "") + : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name, ResultTy) {} static inline bool classof(const VPRecipeBase *R) { // VPInstructionWithType are VPInstructions with specific opcodes requiring @@ -1583,17 +1581,13 @@ class VPInstructionWithType : public VPInstruction { /// Return the cost of this VPInstruction. InstructionCost computeCost(ElementCount VF, - VPCostContext &Ctx) const override; + VPCostContext &Ctx) const override { + // TODO: Compute accurate cost after retiring the legacy cost model. + return 0; + } Type *getResultType() const { return getScalarType(); } - /// Cast recipes always use scalars of their operand. - bool usesScalars(const VPValue *Op) const override { - if (Instruction::isCast(getOpcode())) - return true; - return VPInstruction::usesScalars(Op); - } - protected: #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. @@ -3424,8 +3418,6 @@ class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags, computeScalarType(I, Operands), Flags, DL), VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar), IsPredicated(Mask) { - assert((!IsSingleScalar || !I->isCast()) && - "single-scalar casts should use VPInstructionWithType"); setUnderlyingValue(I); if (Mask) addOperand(Mask); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 05f145d105b4f..d13c1f10ad284 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1821,17 +1821,6 @@ void VPInstructionWithType::execute(VPTransformState &State) { } } -InstructionCost VPInstructionWithType::computeCost(ElementCount VF, - VPCostContext &Ctx) const { - // TODO: Compute cost for VPInstructions without underlying values. - if (!getUnderlyingValue()) - return 0; - assert(Instruction::isCast(getOpcode()) && - "only casts have underlying values currently"); - return getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1), - Ctx); -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPInstructionWithType::printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index f1c59ba43bfa2..0fa411450ea92 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -367,11 +367,10 @@ static bool sinkScalarOperands(VPlan &Plan) { dyn_cast(SinkCandidate)) { // TODO: Handle converting to uniform recipes as separate transform, // then cloning should be sufficient here. - Clone = VPBuilder::createSingleScalarOp( - SinkCandidateRepR->getOpcode(), SinkCandidate->operands(), - /*Mask=*/nullptr, *SinkCandidateRepR, *SinkCandidateRepR, - SinkCandidate->getDebugLoc(), - SinkCandidate->getUnderlyingInstr()); + Instruction *I = SinkCandidate->getUnderlyingInstr(); + Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true, + nullptr /*Mask*/, *SinkCandidateRepR, + *SinkCandidateRepR); // TODO: add ".cloned" suffix to name of Clone's VPValue. } else { Clone = SinkCandidate->clone(); @@ -904,10 +903,9 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) { m_Binary(m_VPValue(), m_VPValue()))) continue; - auto *Clone = VPBuilder::createSingleScalarOp( - Def->getUnderlyingInstr()->getOpcode(), Def->operands(), - /*Mask=*/nullptr, *Def, {}, DebugLoc::getUnknown(), - Def->getUnderlyingInstr()); + auto *Clone = new VPReplicateRecipe(Def->getUnderlyingInstr(), + Def->operands(), /*IsUniform*/ true, + /*Mask*/ nullptr, /*Flags*/ *Def); Clone->insertAfter(Def); Def->replaceAllUsesWith(Clone); } @@ -1967,10 +1965,9 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { })) continue; - auto *Clone = VPBuilder::createSingleScalarOp( - getOpcodeOrIntrinsicID(RepOrWidenR)->second, RepOrWidenR->operands(), - /*Mask=*/nullptr, *RepOrWidenR, {}, DebugLoc::getUnknown(), - RepOrWidenR->getUnderlyingInstr()); + auto *Clone = new VPReplicateRecipe( + RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(), + true /*IsSingleScalar*/, nullptr, *RepOrWidenR); Clone->insertBefore(RepOrWidenR); RepOrWidenR->replaceAllUsesWith(Clone); if (isDeadRecipe(*RepOrWidenR)) @@ -6964,9 +6961,9 @@ void VPlanTransforms::makeScalarizationDecisions(VPlan &Plan, VFRange &Range) { if (!vputils::onlyFirstLaneUsed(VPI)) continue; - auto *Recipe = VPBuilder::createSingleScalarOp( - VPI->getOpcode(), VPI->operandsWithoutMask(), /*Mask=*/nullptr, *VPI, - *VPI, VPI->getDebugLoc(), I); + auto *Recipe = new VPReplicateRecipe( + I, VPI->operandsWithoutMask(), /*IsSingleScalar=*/true, + /*Mask=*/nullptr, *VPI, *VPI, VPI->getDebugLoc()); Recipe->insertBefore(VPI); VPI->replaceAllUsesWith(Recipe); VPI->eraseFromParent(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 5276aeba2fba0..ed3068e1e9197 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -645,9 +645,9 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, // TODO: have cloning of replicate recipes also provide the desired result // coupled with setting its operands to NewOps (deriving IsSingleScalar and // Mask from the operands?) - New = VPBuilder::createSingleScalarOp( - RepR->getOpcode(), NewOps, /*Mask=*/nullptr, *RepR, *RepR, - RepR->getDebugLoc(), RepR->getUnderlyingInstr()); + New = new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps, + /*IsSingleScalar=*/true, /*Mask=*/nullptr, + *RepR, *RepR, RepR->getDebugLoc()); } else { New = DefR->clone(); for (const auto &[Idx, Op] : enumerate(NewOps)) { @@ -706,9 +706,9 @@ static void convertRecipesInRegionBlocksToSingleScalar(VPlan &Plan, Type *IdxTy, } if (auto *RepR = dyn_cast(&OldR)) { - auto *NewR = VPBuilder::createSingleScalarOp( - RepR->getOpcode(), to_vector(RepR->operands()), /*Mask=*/nullptr, - *RepR, *RepR, OldDL, RepR->getUnderlyingInstr()); + auto *NewR = new VPReplicateRecipe( + RepR->getUnderlyingInstr(), RepR->operands(), + /* IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR, *RepR, OldDL); NewR->insertBefore(RepR); RepR->replaceAllUsesWith(NewR); RepR->eraseFromParent(); diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/single-scalar-cast.ll b/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/single-scalar-cast.ll index 7743c7bcbe058..26043f73178f3 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/single-scalar-cast.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/single-scalar-cast.ll @@ -23,7 +23,7 @@ define i16 @narrow_iv_cast_to_single_scalar(ptr %p, ptr %q) { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: vp<[[VP4:%[0-9]+]]> = SCALAR-STEPS vp<[[VP3]]>, ir<1>, vp<[[VP0]]> ; CHECK-NEXT: CLONE ir<%shr> = lshr vp<[[VP4]]>, ir<1> -; CHECK-NEXT: EMIT-SCALAR ir<%shr.ext> = zext ir<%shr> to i64 +; CHECK-NEXT: CLONE ir<%shr.ext> = zext ir<%shr> ; CHECK-NEXT: CLONE ir<%gep.p> = getelementptr ir<%p>, ir<%shr.ext> ; CHECK-NEXT: CLONE ir<%l> = load ir<%gep.p> ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]> @@ -76,7 +76,7 @@ define void @sink_replicate_region_with_cast(ptr %dst, i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: vector.body: ; CHECK-NEXT: vp<[[VP5:%[0-9]+]]> = SCALAR-STEPS vp<[[VP4]]>, ir<1>, vp<[[VP0]]> -; CHECK-NEXT: EMIT-SCALAR ir<%conv> = zext vp<[[VP5]]> to i64 +; CHECK-NEXT: CLONE ir<%conv> = zext vp<[[VP5]]> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%dst>, ir<%conv> ; CHECK-NEXT: vp<[[VP6:%[0-9]+]]> = vector-pointer ir<%gep>, ir<1> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VP6]]> diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/sve2-histcnt-vplan.ll b/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/sve2-histcnt-vplan.ll index 03bec83d6ab4a..046ffd3f45f47 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/sve2-histcnt-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/sve2-histcnt-vplan.ll @@ -29,7 +29,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: [[STEPS:vp.*]] = SCALAR-STEPS [[IV]], ir<1>, [[VF]] ; CHECK-NEXT: CLONE [[GEP_IDX:.*]] = getelementptr inbounds ir<%indices>, [[STEPS]] ; CHECK-NEXT: CLONE [[IDX:.*]] = load [[GEP_IDX]] -; CHECK-NEXT: EMIT-SCALAR [[EXT_IDX:.*]] = zext [[IDX]] +; CHECK-NEXT: CLONE [[EXT_IDX:.*]] = zext [[IDX]] ; CHECK-NEXT: CLONE [[GEP_BUCKET:.*]] = getelementptr inbounds ir<%buckets>, [[EXT_IDX]] ; CHECK-NEXT: CLONE [[HISTVAL:.*]] = load [[GEP_BUCKET]] ; CHECK-NEXT: CLONE [[UPDATE:.*]] = add nsw [[HISTVAL]], ir<1> diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-riscv-vector-reverse.ll index acdcfafed372e..32765c53efdc8 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-riscv-vector-reverse.ll @@ -31,7 +31,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[EVL_PHI]]> * ir<-1> ; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<-1>, vp<[[EVL]]> ; CHECK-NEXT: CLONE ir<[[IDX:%.+]]> = add nsw vp<[[SCALAR_STEPS]]>, ir<-1> -; CHECK-NEXT: EMIT-SCALAR ir<[[IDX_PROM:%.+]]> = zext ir<[[IDX]]> to i64 +; CHECK-NEXT: CLONE ir<[[IDX_PROM:%.+]]> = zext ir<[[IDX]]> ; CHECK-NEXT: CLONE ir<[[ARRAY_IDX_B:%.+]]> = getelementptr inbounds ir<[[B:%.+]]>, ir<[[IDX_PROM]]> ; CHECK-NEXT: vp<[[VEC_END_PTR_B:%.+]]> = vector-end-pointer ir<[[ARRAY_IDX_B]]>, vp<[[EVL]]> ; CHECK-NEXT: WIDEN ir<[[LOAD_B:%.+]]> = vp.load vp<[[VEC_END_PTR_B]]>, vp<[[EVL]]> diff --git a/llvm/test/Transforms/LoopVectorize/as_cast.ll b/llvm/test/Transforms/LoopVectorize/as_cast.ll index 7d93501da4411..31ed496de0ccf 100644 --- a/llvm/test/Transforms/LoopVectorize/as_cast.ll +++ b/llvm/test/Transforms/LoopVectorize/as_cast.ll @@ -7,7 +7,6 @@ define void @loop_invariant_as_cast(ptr addrspace(1) %in) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[IN]] to ptr ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] @@ -17,6 +16,7 @@ define void @loop_invariant_as_cast(ptr addrspace(1) %in) { ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[IN]] to ptr ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP3]] ; CHECK-NEXT: store i64 [[TMP3]], ptr [[TMP5]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -24,7 +24,8 @@ define void @loop_invariant_as_cast(ptr addrspace(1) %in) { ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_IF1]]: ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[IN]] to ptr +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP6]] ; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: @@ -64,19 +65,19 @@ define void @loop_varying_as_cast(ptr addrspace(1) %in) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 [[TMP0]], 6 +; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[IN]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[IN]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[TMP4]] to ptr -; CHECK-NEXT: [[TMP8:%.*]] = addrspacecast ptr addrspace(1) [[TMP7]] to ptr -; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: store i64 [[TMP3]], ptr [[TMP5]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_IF1]]: +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[IN]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = addrspacecast ptr addrspace(1) [[TMP7]] to ptr ; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: diff --git a/llvm/test/Transforms/LoopVectorize/cast-costs.ll b/llvm/test/Transforms/LoopVectorize/cast-costs.ll index 8613e52ed9e7d..4020807988635 100644 --- a/llvm/test/Transforms/LoopVectorize/cast-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/cast-costs.ll @@ -81,7 +81,7 @@ exit: define void @replicate_zext(ptr %A, i32 %n) { ; CHECK-LABEL: 'replicate_zext' -; CHECK: Cost of 1 for VF vscale x 1: EMIT-SCALAR ir<%iv.ext> = zext vp<[[VP4:%[0-9]+]]> to i64 +; CHECK: Cost of 1 for VF vscale x 1: CLONE ir<%iv.ext> = zext vp<[[VP4:%[0-9]+]]> ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll index d497a887130b0..1f719673eb217 100644 --- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll @@ -276,16 +276,16 @@ define void @cast_induction_tail_folding(ptr %A) { ; IC2-NEXT: [[INDEX1:%.*]] = add i32 [[INDEX]], 1 ; IC2-NEXT: [[TMP2:%.*]] = icmp ule i32 [[INDEX]], 2 ; IC2-NEXT: [[TMP3:%.*]] = icmp ule i32 [[INDEX1]], 2 -; IC2-NEXT: [[TMP4:%.*]] = sext i32 [[INDEX]] to i64 -; IC2-NEXT: [[TMP6:%.*]] = sext i32 [[INDEX1]] to i64 ; IC2-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; IC2: [[PRED_STORE_IF]]: +; IC2-NEXT: [[TMP4:%.*]] = sext i32 [[INDEX]] to i64 ; IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]] ; IC2-NEXT: store i32 [[INDEX]], ptr [[TMP5]], align 4 ; IC2-NEXT: br label %[[PRED_STORE_CONTINUE]] ; IC2: [[PRED_STORE_CONTINUE]]: ; IC2-NEXT: br i1 [[TMP3]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] ; IC2: [[PRED_STORE_IF1]]: +; IC2-NEXT: [[TMP6:%.*]] = sext i32 [[INDEX1]] to i64 ; IC2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] ; IC2-NEXT: store i32 [[INDEX1]], ptr [[TMP7]], align 4 ; IC2-NEXT: br label %[[PRED_STORE_CONTINUE2]] diff --git a/llvm/test/Transforms/LoopVectorize/preserve-inbounds-gep-with-pointer-casts.ll b/llvm/test/Transforms/LoopVectorize/preserve-inbounds-gep-with-pointer-casts.ll index daaf641a1c802..a9bb1c8f7f7be 100644 --- a/llvm/test/Transforms/LoopVectorize/preserve-inbounds-gep-with-pointer-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/preserve-inbounds-gep-with-pointer-casts.ll @@ -49,8 +49,9 @@ define void @bitcast_in_ptr_chain(ptr %src, ptr noalias %dst) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 4 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr [[TMP0]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i64 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 4