diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 5b94897f4342f..5f1db9c54b291 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -1093,6 +1093,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (!RdxResult) { RdxResult = PartialReductions.front(); IRBuilder Builder(ExitBlock, ExitBlock->getFirstNonPHIIt()); + Builder.setFastMathFlags(Reductions.begin()->second.getFastMathFlags()); RecurKind RK = Reductions.begin()->second.getRecurrenceKind(); for (Instruction *RdxPart : drop_begin(PartialReductions)) { RdxResult = Builder.CreateBinOp( @@ -1255,14 +1256,15 @@ llvm::canParallelizeReductionWhenUnrolling(PHINode &Phi, Loop *L, return std::nullopt; RecurKind RK = RdxDesc.getRecurrenceKind(); // Skip unsupported reductions. - // TODO: Handle additional reductions, including FP and min-max - // reductions. - if (!RecurrenceDescriptor::isIntegerRecurrenceKind(RK) || - RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) || + // TODO: Handle additional reductions, including min-max reductions. + if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) || RecurrenceDescriptor::isFindIVRecurrenceKind(RK) || RecurrenceDescriptor::isMinMaxRecurrenceKind(RK)) return std::nullopt; + if (RdxDesc.hasExactFPMath()) + return std::nullopt; + if (RdxDesc.IntermediateStore) return std::nullopt; diff --git a/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll b/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll index 220a4a29a3041..e94a368d3ded0 100644 --- a/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll +++ b/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll @@ -319,27 +319,33 @@ define float @test_fadd_with_ressaoc(ptr %src, i64 %n, float %start) { ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_1:%.*]] = phi float [ -0.000000e+00, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_2:%.*]] = phi float [ -0.000000e+00, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_3:%.*]] = phi float [ -0.000000e+00, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 1 -; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd float [[RDX]], [[L]] +; CHECK-NEXT: [[RDX_NEXT]] = fadd reassoc float [[RDX]], [[L]] ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT]] ; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 1 -; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = fadd float [[RDX_NEXT]], [[L_1]] +; CHECK-NEXT: [[RDX_NEXT_1]] = fadd reassoc float [[RDX_1]], [[L_1]] ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_1]] ; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 1 -; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = fadd float [[RDX_NEXT_1]], [[L_2]] +; CHECK-NEXT: [[RDX_NEXT_2]] = fadd reassoc float [[RDX_2]], [[L_2]] ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_2]] ; CHECK-NEXT: [[L_24:%.*]] = load float, ptr [[GEP_SRC_24]], align 1 -; CHECK-NEXT: [[RDX_NEXT_3]] = fadd float [[RDX_NEXT_2]], [[L_24]] +; CHECK-NEXT: [[RDX_NEXT_3]] = fadd reassoc float [[RDX_3]], [[L_24]] ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi float [ [[RDX_NEXT_3]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_NEXT_LCSSA1:%.*]] = phi float [ [[RDX_NEXT_3]], %[[LOOP]] ] +; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc float [[RDX_NEXT_1]], [[RDX_NEXT]] +; CHECK-NEXT: [[BIN_RDX1:%.*]] = fadd reassoc float [[RDX_NEXT_2]], [[BIN_RDX]] +; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = fadd reassoc float [[RDX_NEXT_3]], [[BIN_RDX1]] ; CHECK-NEXT: ret float [[RDX_NEXT_LCSSA]] ; entry: @@ -351,7 +357,7 @@ loop: %iv.next = add i64 %iv, 1 %gep.src = getelementptr float, ptr %src, i64 %iv %l = load float, ptr %gep.src, align 1 - %rdx.next = fadd float %rdx, %l + %rdx.next = fadd reassoc float %rdx, %l %ec = icmp ne i64 %iv.next, 1000 br i1 %ec, label %loop, label %exit diff --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll index fb1f2fcf5c190..840cc6c507c3d 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll @@ -287,6 +287,202 @@ exit: ret <4 x i32> %res } +define float @test_fadd_reduction(ptr %a, i64 %n) { +; CHECK-LABEL: define float @test_fadd_reduction( +; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1 +; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]] +; CHECK: [[ENTRY_NEW]]: +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_1:%.*]] = phi float [ -0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[GEP_A]], align 16 +; CHECK-NEXT: [[RDX_NEXT]] = fadd reassoc float [[RDX]], [[TMP2]] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[GEP_A_1]], align 16 +; CHECK-NEXT: [[RDX_NEXT_1]] = fadd reassoc float [[RDX_1]], [[TMP3]] +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2 +; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] +; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[EXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_UNR:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc float [[RDX_NEXT_1]], [[RDX_NEXT]] +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]] +; CHECK: [[LOOP_EPIL_PREHEADER]]: +; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]]) +; CHECK-NEXT: br label %[[LOOP_EPIL:.*]] +; CHECK: [[LOOP_EPIL]]: +; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_EPIL_INIT]] +; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP_A_EPIL]], align 16 +; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = fadd reassoc float [[RDX_EPIL_INIT]], [[TMP4]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi float [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ] +; CHECK-NEXT: ret float [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi float [ 0.0, %entry ], [ %rdx.next, %loop ] + %gep.a = getelementptr inbounds nuw float, ptr %a, i64 %iv + %1 = load float, ptr %gep.a, align 16 + %rdx.next = fadd reassoc float %rdx, %1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop, !llvm.loop !0 + +exit: + %res = phi float [ %rdx.next, %loop ] + ret float %res +} + +define float @test_fadd_no_reassoc(ptr %a, i64 %n) { +; CHECK-LABEL: define float @test_fadd_no_reassoc( +; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1 +; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]] +; CHECK: [[ENTRY_NEW]]: +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[GEP_A]], align 16 +; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd float [[RDX]], [[TMP2]] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[GEP_A_1]], align 16 +; CHECK-NEXT: [[RDX_NEXT_1]] = fadd float [[RDX_NEXT]], [[TMP3]] +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2 +; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] +; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[EXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_UNR:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]] +; CHECK: [[LOOP_EPIL_PREHEADER]]: +; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RDX_UNR]], %[[EXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]]) +; CHECK-NEXT: br label %[[LOOP_EPIL:.*]] +; CHECK: [[LOOP_EPIL]]: +; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_EPIL_INIT]] +; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP_A_EPIL]], align 16 +; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = fadd float [[RDX_EPIL_INIT]], [[TMP4]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi float [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ] +; CHECK-NEXT: ret float [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi float [ 0.0, %entry ], [ %rdx.next, %loop ] + %gep.a = getelementptr inbounds nuw float, ptr %a, i64 %iv + %1 = load float, ptr %gep.a, align 16 + %rdx.next = fadd float %rdx, %1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop, !llvm.loop !0 + +exit: + %res = phi float [ %rdx.next, %loop ] + ret float %res +} + +define float @test_fadd_other_fastmath(ptr %a, i64 %n) { +; CHECK-LABEL: define float @test_fadd_other_fastmath( +; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1 +; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]] +; CHECK: [[ENTRY_NEW]]: +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[GEP_A]], align 16 +; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd contract float [[RDX]], [[TMP2]] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[GEP_A_1]], align 16 +; CHECK-NEXT: [[RDX_NEXT_1]] = fadd contract float [[RDX_NEXT]], [[TMP3]] +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2 +; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] +; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[EXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_UNR:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]] +; CHECK: [[LOOP_EPIL_PREHEADER]]: +; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RDX_UNR]], %[[EXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]]) +; CHECK-NEXT: br label %[[LOOP_EPIL:.*]] +; CHECK: [[LOOP_EPIL]]: +; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_EPIL_INIT]] +; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP_A_EPIL]], align 16 +; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = fadd contract float [[RDX_EPIL_INIT]], [[TMP4]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi float [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ] +; CHECK-NEXT: ret float [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi float [ 0.0, %entry ], [ %rdx.next, %loop ] + %gep.a = getelementptr inbounds nuw float, ptr %a, i64 %iv + %1 = load float, ptr %gep.a, align 16 + %rdx.next = fadd contract float %rdx, %1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop, !llvm.loop !0 + +exit: + %res = phi float [ %rdx.next, %loop ] + ret float %res +} !0 = distinct !{!0, !1} !1 = !{!"llvm.loop.unroll.count", i32 2} @@ -301,4 +497,7 @@ exit: ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]} ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} +; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]]} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]]} ;.