AMDGPU: Fix runtime unrolling when cascaded GEPs present#147700
Merged
macurtis-amd merged 3 commits intollvm:mainfrom Jul 10, 2025
Merged
AMDGPU: Fix runtime unrolling when cascaded GEPs present#147700macurtis-amd merged 3 commits intollvm:mainfrom
macurtis-amd merged 3 commits intollvm:mainfrom
Conversation
Member
|
@llvm/pr-subscribers-llvm-transforms Author: None (macurtis-amd) ChangesCascaded GEP (i.e. GEP of GEP) are not handled when determining if it is ok to runtime unroll loops. This change simply uses Full diff: https://github.com/llvm/llvm-project/pull/147700.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index f693580929518..24f4df2aff9d1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -216,10 +216,13 @@ void AMDGPUTTIImpl::getUnrollingPreferences(
// a variable, most likely we will be unable to combine it.
// Do not unroll too deep inner loops for local memory to give a chance
// to unroll an outer loop for a more important reason.
- if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2 ||
- (!isa<GlobalVariable>(GEP->getPointerOperand()) &&
- !isa<Argument>(GEP->getPointerOperand())))
+ if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2)
continue;
+
+ const Value *V = getUnderlyingObject(GEP->getPointerOperand());
+ if (!isa<GlobalVariable>(V) && !isa<Argument>(V))
+ continue;
+
LLVM_DEBUG(dbgs() << "Allow unroll runtime for loop:\n"
<< *L << " due to LDS use.\n");
UP.Runtime = UnrollRuntimeLocal;
diff --git a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-runtime.ll b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-runtime.ll
new file mode 100644
index 0000000000000..9414a1e24e542
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-runtime.ll
@@ -0,0 +1,44 @@
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -passes=loop-unroll -S %s | FileCheck %s
+
+%struct.wombat = type { %struct.zot, i32, [16 x i32], [16 x i32], i32, i32, [16 x i32], i32 }
+%struct.zot = type { i32, i32, [1024 x i32] }
+
+@global = external addrspace(3) global %struct.wombat
+
+; Ensure that a cascaded GEP for local address space does not inhibit unrolling
+;
+; CHECK-LABEL: @unroll_when_cascaded_gep
+; CHECK: bb:
+; CHECK: br {{.*}}, label %bb2.unr-lcssa, label %bb.new
+; CHECK: bb.new:
+; CHECK: %unroll_iter =
+; CHECK: br label %bb1
+; CHECK: bb1:
+; CHECK: br {{.*}}, label %bb2.unr-lcssa.loopexit, label %bb1
+; CHECK: bb2.unr-lcssa.loopexit:
+; CHECK: br label %bb2.unr-lcssa
+; CHECK: bb2.unr-lcssa:
+; CHECK: br {{.*}}, label %bb1.epil.preheader, label %bb2
+; CHECK: bb1.epil.preheader:
+; CHECK: br label %bb1.epil
+; CHECK: bb1.epil:
+; CHECK: br {{.*}}, label %bb1.epil, label %bb2.epilog-lcssa
+; CHECK: bb2.epilog-lcssa:
+; CHECK: br label %bb2
+; CHECK: bb2:
+; CHECK: ret void
+define amdgpu_kernel void @unroll_when_cascaded_gep(i32 %arg) {
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb1, %bb
+ %phi = phi i32 [ 0, %bb ], [ %add, %bb1 ]
+ %getelementptr = getelementptr [1024 x i32], ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @global, i32 8), i32 0, i32 0
+ %add = add i32 %phi, 1
+ %icmp = icmp eq i32 %phi, %arg
+ br i1 %icmp, label %bb2, label %bb1
+
+bb2: ; preds = %bb1
+ ret void
+}
+
|
Member
|
@llvm/pr-subscribers-backend-amdgpu Author: None (macurtis-amd) ChangesCascaded GEP (i.e. GEP of GEP) are not handled when determining if it is ok to runtime unroll loops. This change simply uses Full diff: https://github.com/llvm/llvm-project/pull/147700.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index f693580929518..24f4df2aff9d1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -216,10 +216,13 @@ void AMDGPUTTIImpl::getUnrollingPreferences(
// a variable, most likely we will be unable to combine it.
// Do not unroll too deep inner loops for local memory to give a chance
// to unroll an outer loop for a more important reason.
- if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2 ||
- (!isa<GlobalVariable>(GEP->getPointerOperand()) &&
- !isa<Argument>(GEP->getPointerOperand())))
+ if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2)
continue;
+
+ const Value *V = getUnderlyingObject(GEP->getPointerOperand());
+ if (!isa<GlobalVariable>(V) && !isa<Argument>(V))
+ continue;
+
LLVM_DEBUG(dbgs() << "Allow unroll runtime for loop:\n"
<< *L << " due to LDS use.\n");
UP.Runtime = UnrollRuntimeLocal;
diff --git a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-runtime.ll b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-runtime.ll
new file mode 100644
index 0000000000000..9414a1e24e542
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-runtime.ll
@@ -0,0 +1,44 @@
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -passes=loop-unroll -S %s | FileCheck %s
+
+%struct.wombat = type { %struct.zot, i32, [16 x i32], [16 x i32], i32, i32, [16 x i32], i32 }
+%struct.zot = type { i32, i32, [1024 x i32] }
+
+@global = external addrspace(3) global %struct.wombat
+
+; Ensure that a cascaded GEP for local address space does not inhibit unrolling
+;
+; CHECK-LABEL: @unroll_when_cascaded_gep
+; CHECK: bb:
+; CHECK: br {{.*}}, label %bb2.unr-lcssa, label %bb.new
+; CHECK: bb.new:
+; CHECK: %unroll_iter =
+; CHECK: br label %bb1
+; CHECK: bb1:
+; CHECK: br {{.*}}, label %bb2.unr-lcssa.loopexit, label %bb1
+; CHECK: bb2.unr-lcssa.loopexit:
+; CHECK: br label %bb2.unr-lcssa
+; CHECK: bb2.unr-lcssa:
+; CHECK: br {{.*}}, label %bb1.epil.preheader, label %bb2
+; CHECK: bb1.epil.preheader:
+; CHECK: br label %bb1.epil
+; CHECK: bb1.epil:
+; CHECK: br {{.*}}, label %bb1.epil, label %bb2.epilog-lcssa
+; CHECK: bb2.epilog-lcssa:
+; CHECK: br label %bb2
+; CHECK: bb2:
+; CHECK: ret void
+define amdgpu_kernel void @unroll_when_cascaded_gep(i32 %arg) {
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb1, %bb
+ %phi = phi i32 [ 0, %bb ], [ %add, %bb1 ]
+ %getelementptr = getelementptr [1024 x i32], ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @global, i32 8), i32 0, i32 0
+ %add = add i32 %phi, 1
+ %icmp = icmp eq i32 %phi, %arg
+ br i1 %icmp, label %bb2, label %bb1
+
+bb2: ; preds = %bb1
+ ret void
+}
+
|
JonChesterfield
approved these changes
Jul 9, 2025
Collaborator
JonChesterfield
left a comment
There was a problem hiding this comment.
Yep, that's consistent. Thanks very much for running it to ground!
shiltian
reviewed
Jul 9, 2025
shiltian
approved these changes
Jul 9, 2025
searlmc1
pushed a commit
to ROCm/llvm-project
that referenced
this pull request
Jul 17, 2025
Cascaded GEP (i.e. GEP of GEP) are not handled when determining if it is ok to runtime unroll loops. This change simply uses `getUnderlyingObjects` to look through cascaded GEPs.
searlmc1
pushed a commit
to ROCm/llvm-project
that referenced
this pull request
Jul 17, 2025
searlmc1
pushed a commit
to ROCm/llvm-project
that referenced
this pull request
Jul 17, 2025
searlmc1
pushed a commit
to ROCm/llvm-project
that referenced
this pull request
Jul 17, 2025
This was referenced Sep 19, 2025
ronlieb
pushed a commit
to ROCm/llvm-project
that referenced
this pull request
Feb 4, 2026
…m#147700) (#1349) This reverts commit cff4a00. Related to ROCm/rocm-systems#2865
JeniferC99
added a commit
to ROCm/llvm-project
that referenced
this pull request
Feb 26, 2026
chiranjeevipattigidi
added a commit
to ROCm/llvm-project
that referenced
this pull request
Mar 11, 2026
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Cascaded GEP (i.e. GEP of GEP) are not handled when determining if it is ok to runtime unroll loops.
This change simply uses
getUnderlyingObjectsto look through cascaded GEPs.