[Analysis] Ignore convergence tokens in dead branches in CodeMetrics#193590
[Analysis] Ignore convergence tokens in dead branches in CodeMetrics#193590inbelic wants to merge 7 commits into
CodeMetrics#193590Conversation
CodeMetrics
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: Finn Plummer (inbelic) ChangesIn Assisted by: Claude Opus 4.6 Full diff: https://github.com/llvm/llvm-project/pull/193590.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp
index ea67b526423bf..21f5ff7319ceb 100644
--- a/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/llvm/lib/Analysis/CodeMetrics.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/InstructionCost.h"
@@ -118,7 +119,12 @@ static bool extendsConvergenceOutsideLoop(const Instruction &I, const Loop *L) {
if (!isa<ConvergenceControlInst>(I))
return false;
for (const auto *U : I.users()) {
- if (!L->contains(cast<Instruction>(U)))
+ const auto *UserInst = cast<Instruction>(U);
+ // Ignore users in dead branches, identified by blocks terminated with
+ // unreachable.
+ if (isa<UnreachableInst>(UserInst->getParent()->getTerminator()))
+ continue;
+ if (!L->contains(UserInst))
return true;
}
return false;
diff --git a/llvm/test/Transforms/LoopUnroll/convergent.controlled.ll b/llvm/test/Transforms/LoopUnroll/convergent.controlled.ll
index 5dc613e733f00..6836aea1ff61f 100644
--- a/llvm/test/Transforms/LoopUnroll/convergent.controlled.ll
+++ b/llvm/test/Transforms/LoopUnroll/convergent.controlled.ll
@@ -555,6 +555,64 @@ exit:
ret i32 0
}
+; A convergence token defined in the loop is used outside the loop, but only
+; in a dead branch (block terminated by unreachable). This should not prevent
+; unrolling.
+define i32 @extended_loop_dead_branch(i32 %n) {
+; CHECK-LABEL: @extended_loop_dead_branch(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[N:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], -1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 1
+; CHECK-NEXT: br i1 [[TMP2]], label [[L3_EPIL_PREHEADER:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK: entry.new:
+; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i32 [[TMP0]], [[XTRAITER]]
+; CHECK-NEXT: br label [[L3:%.*]], !llvm.loop [[LOOP4]]
+; CHECK: l3:
+; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[INC_1:%.*]], [[L3]] ]
+; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[L3]] ]
+; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP]]) ]
+; CHECK-NEXT: [[TOK_LOOP_1:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP_1]]) ]
+; CHECK-NEXT: [[INC_1]] = add nsw i32 [[X_0]], 2
+; CHECK-NEXT: [[NITER_NEXT_1]] = add i32 [[NITER]], 2
+; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i32 [[NITER_NEXT_1]], [[UNROLL_ITER]]
+; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA:%.*]], label [[L3]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: exit.unr-lcssa:
+; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[L3_EPIL_PREHEADER]], label [[EXIT:%.*]]
+; CHECK: l3.epil.preheader:
+; CHECK-NEXT: [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD1]])
+; CHECK-NEXT: br label [[L3_EPIL:%.*]]
+; CHECK: l3.epil:
+; CHECK-NEXT: [[TOK_LOOP_EPIL:%.*]] = call token @llvm.experimental.convergence.anchor()
+; CHECK-NEXT: call void @f() [ "convergencectrl"(token [[TOK_LOOP_EPIL]]) ]
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ br label %l3, !llvm.loop !1
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+ %tok.loop = call token @llvm.experimental.convergence.anchor()
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+ ret i32 0
+
+dead:
+ call void @f() [ "convergencectrl"(token %tok.loop) ]
+ unreachable
+}
+
declare token @llvm.experimental.convergence.anchor()
declare token @llvm.experimental.convergence.loop()
|
In `extendsConvergenceOutsideLoop`, skip users whose parent block is terminated by unreachable, as these represent dead branches. This prevents convergence control tokens in dead code from incorrectly marking a loop as having extended convergence, which could block loop unrolling.
ccfea43 to
edcb29d
Compare
| } | ||
|
|
||
| /// Check if a block was previously marked dead by setting the terminator to | ||
| /// `unreachable` and the condiational branch to the block is statically |
There was a problem hiding this comment.
also perhaps a more accurate description:
"... and no predecessors exist with statically evaluatable terminators that branch to this block"
| for (const BasicBlock *Pred : predecessors(BB)) { | ||
| auto *CondBr = dyn_cast<CondBrInst>(Pred->getTerminator()); | ||
| if (!CondBr) | ||
| return false; |
There was a problem hiding this comment.
Are there other types of control flow instructions, other than condBrInst, that we care about that could switch control flow to this block?
There was a problem hiding this comment.
The other kind would be an UncondBrInst, which if that is a predessor then this block isn't dead and we should return false
edit: (or as Deric pointed out offline a switch instruction)
| /// Check if a block was previously marked dead by setting the terminator to | ||
| /// `unreachable` and their is a statically evaluated conditional branch to NOT | ||
| /// branch to the block. This is done for instance within the unroll pass, | ||
| /// between unrolling inner/outer loops. |
There was a problem hiding this comment.
| /// Check if a block was previously marked dead by setting the terminator to | |
| /// `unreachable` and their is a statically evaluated conditional branch to NOT | |
| /// branch to the block. This is done for instance within the unroll pass, | |
| /// between unrolling inner/outer loops. | |
| // Check if a block was previously marked dead by setting the terminator to | |
| // `unreachable` and there are no branches that may branch to the block. We | |
| // can only ensure a predecessor doesn't branch to this block if it is statically | |
| // known not to. This is done for instance within the unroll pass, between | |
| // unrolling inner/outer loops. |
(removed the extra comment line as static functions shouldn't generate a doc)
| } | ||
|
|
||
| /// Check if a block was previously marked dead by setting the terminator to | ||
| /// `unreachable` and their is a statically evaluated conditional branch to NOT |
There was a problem hiding this comment.
nit: This comment could be slightly improved in accuracy and grammar
…g DirectX" (#194452) The initial landing surfaced 3 somewhat orthogonal issues related to loop unrolling. These are addressed: [here](#193592), [here](#193593) and [here](#193590). These caused these [tests](https://github.com/llvm/llvm-project/actions/runs/24577221310/job/71865579618#step:8:87913) to fail in the offload test suite. We can verify that these are now passing as expected (fixing any of the 3 issues would resolve this and allow us to reland) Some additional tests were added since the revert that are now accounted for and updated in the reland fixes commit. This relands #188792
…en targeting DirectX" (#194452) The initial landing surfaced 3 somewhat orthogonal issues related to loop unrolling. These are addressed: [here](llvm/llvm-project#193592), [here](llvm/llvm-project#193593) and [here](llvm/llvm-project#193590). These caused these [tests](https://github.com/llvm/llvm-project/actions/runs/24577221310/job/71865579618#step:8:87913) to fail in the offload test suite. We can verify that these are now passing as expected (fixing any of the 3 issues would resolve this and allow us to reland) Some additional tests were added since the revert that are now accounted for and updated in the reland fixes commit. This relands llvm/llvm-project#188792
…en targeting DirectX" (#194452) The initial landing surfaced 3 somewhat orthogonal issues related to loop unrolling. These are addressed: [here](llvm/llvm-project#193592), [here](llvm/llvm-project#193593) and [here](llvm/llvm-project#193590). These caused these [tests](https://github.com/llvm/llvm-project/actions/runs/24577221310/job/71865579618#step:8:87913) to fail in the offload test suite. We can verify that these are now passing as expected (fixing any of the 3 issues would resolve this and allow us to reland) Some additional tests were added since the revert that are now accounted for and updated in the reland fixes commit. This relands llvm/llvm-project#188792
In
extendsConvergenceOutsideLoop, skip users whose parent block is terminated by unreachable and which are statically known to not branch to anymore, as these represent dead branches. This prevents convergence control tokens in dead code from incorrectly marking a loop as having extended convergence, which could block loop unrolling.This was discovered when #188792 caused the following failure: https://github.com/llvm/llvm-project/actions/runs/24577221310/job/71865579618. When emitting convergence control tokens, the inner loop successfully unrolls and marks the old body branch as unreachable, it then tries to unroll the outer loop, however the
convergence.looptoken in the outer loop header sees that a convergent op (in this casedx.resource.getpointer) in the dead branch is using it and marks it asConvergence::Extended, preventing a loop unroll from occurring. In this case it should still be possible to unroll.Assisted by: Claude Opus 4.6