Revert "AMDGPU: Fix runtime unrolling when cascaded GEPs present (#14… by ronlieb · Pull Request #183641 · llvm/llvm-project

ronlieb · 2026-02-26T23:30:51Z

…7700)"

slows down llama.cpp

This reverts commit cff4a00.

…m#147700)" slows down llama.cpp This reverts commit cff4a00.

llvmbot · 2026-02-26T23:31:33Z

@llvm/pr-subscribers-backend-amdgpu

Author: theRonShark (ronlieb)

Changes

…7700)"

slows down llama.cpp

This reverts commit cff4a00.

Full diff: https://github.com/llvm/llvm-project/pull/183641.diff

2 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (+3-6)
(removed) llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-runtime.ll (-64)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 6eca575cd180f..d746ce65a6288 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -217,13 +217,10 @@ void AMDGPUTTIImpl::getUnrollingPreferences(
         // a variable, most likely we will be unable to combine it.
         // Do not unroll too deep inner loops for local memory to give a chance
         // to unroll an outer loop for a more important reason.
-        if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2)
+        if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2 ||
+            (!isa<GlobalVariable>(GEP->getPointerOperand()) &&
+             !isa<Argument>(GEP->getPointerOperand())))
           continue;
-
-        const Value *V = getUnderlyingObject(GEP->getPointerOperand());
-        if (!isa<GlobalVariable>(V) && !isa<Argument>(V))
-          continue;
-
         LLVM_DEBUG(dbgs() << "Allow unroll runtime for loop:\n"
                           << *L << " due to LDS use.\n");
         UP.Runtime = UnrollRuntimeLocal;
diff --git a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-runtime.ll b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-runtime.ll
deleted file mode 100644
index adf1e2117a0b5..0000000000000
--- a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-runtime.ll
+++ /dev/null
@@ -1,64 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=loop-unroll -S %s -o - | FileCheck %s
-
-%struct.wombat = type { %struct.zot, i32, [16 x i32], [16 x i32], i32, i32, [16 x i32], i32 }
-%struct.zot = type { i32, i32, [1024 x i32] }
-
-@global = external addrspace(3) global %struct.wombat
-
-; Ensure that a cascaded GEP for local address space does not inhibit unrolling
-;
-define amdgpu_kernel void @unroll_when_cascaded_gep(i32 %arg) {
-; CHECK-LABEL: @unroll_when_cascaded_gep(
-; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[ARG:%.*]], 1
-; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[TMP0]], 7
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[ARG]], 7
-; CHECK-NEXT:    br i1 [[TMP1]], label [[BB1_EPIL_PREHEADER:%.*]], label [[BB_NEW:%.*]]
-; CHECK:       bb.new:
-; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[TMP0]], [[XTRAITER]]
-; CHECK-NEXT:    br label [[BB1:%.*]]
-; CHECK:       bb1:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, [[BB_NEW]] ], [ [[ADD_7:%.*]], [[BB1]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[BB_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[BB1]] ]
-; CHECK-NEXT:    [[ADD_7]] = add i32 [[PHI]], 8
-; CHECK-NEXT:    [[NITER_NEXT_7]] = add i32 [[NITER]], 8
-; CHECK-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i32 [[NITER_NEXT_7]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[BB2_UNR_LCSSA:%.*]], label [[BB1]]
-; CHECK:       bb2.unr-lcssa:
-; CHECK-NEXT:    [[PHI_UNR:%.*]] = phi i32 [ [[ADD_7]], [[BB1]] ]
-; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[BB1_EPIL_PREHEADER]], label [[BB2:%.*]]
-; CHECK:       bb1.epil.preheader:
-; CHECK-NEXT:    [[PHI_EPIL_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[PHI_UNR]], [[BB2_UNR_LCSSA]] ]
-; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
-; CHECK-NEXT:    br label [[BB1_EPIL:%.*]]
-; CHECK:       bb1.epil:
-; CHECK-NEXT:    [[PHI_EPIL:%.*]] = phi i32 [ [[PHI_EPIL_INIT]], [[BB1_EPIL_PREHEADER]] ], [ [[ADD_EPIL:%.*]], [[BB1_EPIL]] ]
-; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i32 [ 0, [[BB1_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[BB1_EPIL]] ]
-; CHECK-NEXT:    [[GETELEMENTPTR_EPIL:%.*]] = getelementptr [1024 x i32], ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @global, i32 8), i32 0, i32 0
-; CHECK-NEXT:    [[ADD_EPIL]] = add i32 [[PHI_EPIL]], 1
-; CHECK-NEXT:    [[ICMP_EPIL:%.*]] = icmp eq i32 [[PHI_EPIL]], [[ARG]]
-; CHECK-NEXT:    [[EPIL_ITER_NEXT]] = add i32 [[EPIL_ITER]], 1
-; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_NEXT]], [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[BB1_EPIL]], label [[BB2_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK:       bb2.epilog-lcssa:
-; CHECK-NEXT:    br label [[BB2]]
-; CHECK:       bb2:
-; CHECK-NEXT:    ret void
-;
-bb:
-  br label %bb1
-
-bb1:                                              ; preds = %bb1, %bb
-  %phi = phi i32 [ 0, %bb ], [ %add, %bb1 ]
-  %getelementptr = getelementptr [1024 x i32], ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @global, i32 8), i32 0, i32 0
-  %add = add i32 %phi, 1
-  %icmp = icmp eq i32 %phi, %arg
-  br i1 %icmp, label %bb2, label %bb1
-
-bb2:                                              ; preds = %bb1
-  ret void
-}
-

llvmbot · 2026-02-26T23:31:34Z

@llvm/pr-subscribers-llvm-transforms

Author: theRonShark (ronlieb)

Changes

…7700)"

slows down llama.cpp

This reverts commit cff4a00.

Full diff: https://github.com/llvm/llvm-project/pull/183641.diff

2 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (+3-6)
(removed) llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-runtime.ll (-64)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 6eca575cd180f..d746ce65a6288 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -217,13 +217,10 @@ void AMDGPUTTIImpl::getUnrollingPreferences(
         // a variable, most likely we will be unable to combine it.
         // Do not unroll too deep inner loops for local memory to give a chance
         // to unroll an outer loop for a more important reason.
-        if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2)
+        if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2 ||
+            (!isa<GlobalVariable>(GEP->getPointerOperand()) &&
+             !isa<Argument>(GEP->getPointerOperand())))
           continue;
-
-        const Value *V = getUnderlyingObject(GEP->getPointerOperand());
-        if (!isa<GlobalVariable>(V) && !isa<Argument>(V))
-          continue;
-
         LLVM_DEBUG(dbgs() << "Allow unroll runtime for loop:\n"
                           << *L << " due to LDS use.\n");
         UP.Runtime = UnrollRuntimeLocal;
diff --git a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-runtime.ll b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-runtime.ll
deleted file mode 100644
index adf1e2117a0b5..0000000000000
--- a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-runtime.ll
+++ /dev/null
@@ -1,64 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=loop-unroll -S %s -o - | FileCheck %s
-
-%struct.wombat = type { %struct.zot, i32, [16 x i32], [16 x i32], i32, i32, [16 x i32], i32 }
-%struct.zot = type { i32, i32, [1024 x i32] }
-
-@global = external addrspace(3) global %struct.wombat
-
-; Ensure that a cascaded GEP for local address space does not inhibit unrolling
-;
-define amdgpu_kernel void @unroll_when_cascaded_gep(i32 %arg) {
-; CHECK-LABEL: @unroll_when_cascaded_gep(
-; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[ARG:%.*]], 1
-; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[TMP0]], 7
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[ARG]], 7
-; CHECK-NEXT:    br i1 [[TMP1]], label [[BB1_EPIL_PREHEADER:%.*]], label [[BB_NEW:%.*]]
-; CHECK:       bb.new:
-; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[TMP0]], [[XTRAITER]]
-; CHECK-NEXT:    br label [[BB1:%.*]]
-; CHECK:       bb1:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, [[BB_NEW]] ], [ [[ADD_7:%.*]], [[BB1]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[BB_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[BB1]] ]
-; CHECK-NEXT:    [[ADD_7]] = add i32 [[PHI]], 8
-; CHECK-NEXT:    [[NITER_NEXT_7]] = add i32 [[NITER]], 8
-; CHECK-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i32 [[NITER_NEXT_7]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[BB2_UNR_LCSSA:%.*]], label [[BB1]]
-; CHECK:       bb2.unr-lcssa:
-; CHECK-NEXT:    [[PHI_UNR:%.*]] = phi i32 [ [[ADD_7]], [[BB1]] ]
-; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[BB1_EPIL_PREHEADER]], label [[BB2:%.*]]
-; CHECK:       bb1.epil.preheader:
-; CHECK-NEXT:    [[PHI_EPIL_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[PHI_UNR]], [[BB2_UNR_LCSSA]] ]
-; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
-; CHECK-NEXT:    br label [[BB1_EPIL:%.*]]
-; CHECK:       bb1.epil:
-; CHECK-NEXT:    [[PHI_EPIL:%.*]] = phi i32 [ [[PHI_EPIL_INIT]], [[BB1_EPIL_PREHEADER]] ], [ [[ADD_EPIL:%.*]], [[BB1_EPIL]] ]
-; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i32 [ 0, [[BB1_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[BB1_EPIL]] ]
-; CHECK-NEXT:    [[GETELEMENTPTR_EPIL:%.*]] = getelementptr [1024 x i32], ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @global, i32 8), i32 0, i32 0
-; CHECK-NEXT:    [[ADD_EPIL]] = add i32 [[PHI_EPIL]], 1
-; CHECK-NEXT:    [[ICMP_EPIL:%.*]] = icmp eq i32 [[PHI_EPIL]], [[ARG]]
-; CHECK-NEXT:    [[EPIL_ITER_NEXT]] = add i32 [[EPIL_ITER]], 1
-; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_NEXT]], [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[BB1_EPIL]], label [[BB2_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK:       bb2.epilog-lcssa:
-; CHECK-NEXT:    br label [[BB2]]
-; CHECK:       bb2:
-; CHECK-NEXT:    ret void
-;
-bb:
-  br label %bb1
-
-bb1:                                              ; preds = %bb1, %bb
-  %phi = phi i32 [ 0, %bb ], [ %add, %bb1 ]
-  %getelementptr = getelementptr [1024 x i32], ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @global, i32 8), i32 0, i32 0
-  %add = add i32 %phi, 1
-  %icmp = icmp eq i32 %phi, %arg
-  br i1 %icmp, label %bb2, label %bb1
-
-bb2:                                              ; preds = %bb1
-  ret void
-}
-

shiltian · 2026-02-27T00:37:17Z

I'm not sure if that's a good idea to revert an upstream PR that is almost a year old.

ronlieb · 2026-02-27T00:41:24Z

we are getting a lot of recent performance compaints about it from the ML world

llvm-ci · 2026-02-27T03:15:38Z

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-gcc-ubuntu running on sie-linux-worker3 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/32234

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'XRay-x86_64-linux :: TestCases/Posix/fdr-mode.cpp' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/./bin/clang  --driver-mode=g++ -fxray-instrument  -m64   -g -std=c++11 /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/xray/TestCases/Posix/fdr-mode.cpp -o /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/xray/X86_64LinuxConfig/TestCases/Posix/Output/fdr-mode.cpp.tmp
# executed command: /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/./bin/clang --driver-mode=g++ -fxray-instrument -m64 -g -std=c++11 /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/xray/TestCases/Posix/fdr-mode.cpp -o /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/xray/X86_64LinuxConfig/TestCases/Posix/Output/fdr-mode.cpp.tmp
# note: command had no output on stdout or stderr
# RUN: at line 2
rm -f fdr-logging-test-*
# executed command: rm -f 'fdr-logging-test-*'
# note: command had no output on stdout or stderr
# RUN: at line 3
rm -f fdr-unwrite-test-*
# executed command: rm -f 'fdr-unwrite-test-*'
# note: command had no output on stdout or stderr
# RUN: at line 4
env XRAY_OPTIONS="patch_premain=false xray_logfile_base=fdr-logging-test-      xray_mode=xray-fdr verbosity=1"  env XRAY_FDR_OPTIONS="func_duration_threshold_us=0"       /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/xray/X86_64LinuxConfig/TestCases/Posix/Output/fdr-mode.cpp.tmp 2>&1 | FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/xray/TestCases/Posix/fdr-mode.cpp
# executed command: env 'XRAY_OPTIONS=patch_premain=false xray_logfile_base=fdr-logging-test-      xray_mode=xray-fdr verbosity=1' env XRAY_FDR_OPTIONS=func_duration_threshold_us=0 /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/xray/X86_64LinuxConfig/TestCases/Posix/Output/fdr-mode.cpp.tmp
# note: command had no output on stdout or stderr
# executed command: FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/xray/TestCases/Posix/fdr-mode.cpp
# note: command had no output on stdout or stderr
# RUN: at line 8
env XRAY_OPTIONS="patch_premain=false      xray_logfile_base=fdr-unwrite-test- xray_mode=xray-fdr      verbosity=1"  env XRAY_FDR_OPTIONS="func_duration_threshold_us=5000"       /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/xray/X86_64LinuxConfig/TestCases/Posix/Output/fdr-mode.cpp.tmp 2>&1 | FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/xray/TestCases/Posix/fdr-mode.cpp
# executed command: env 'XRAY_OPTIONS=patch_premain=false      xray_logfile_base=fdr-unwrite-test- xray_mode=xray-fdr      verbosity=1' env XRAY_FDR_OPTIONS=func_duration_threshold_us=5000 /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/xray/X86_64LinuxConfig/TestCases/Posix/Output/fdr-mode.cpp.tmp
# note: command had no output on stdout or stderr
# executed command: FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/xray/TestCases/Posix/fdr-mode.cpp
# note: command had no output on stdout or stderr
# RUN: at line 13
ls fdr-logging-test-* | head -1 | tr -d '\n' > /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/xray/X86_64LinuxConfig/TestCases/Posix/Output/fdr-mode.cpp.tmp.log
# executed command: ls 'fdr-logging-test-*'
# note: command had no output on stdout or stderr
# executed command: head -1
# note: command had no output on stdout or stderr
# executed command: tr -d '\n'
# note: command had no output on stdout or stderr
# RUN: at line 14
/home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/./bin/llvm-xray convert --symbolize --output-format=yaml -instr_map=/home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/xray/X86_64LinuxConfig/TestCases/Posix/Output/fdr-mode.cpp.tmp      "/home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/xray/X86_64LinuxConfig/TestCases/Posix/fdr-logging-test-fdr-mode.cpp.tmp.exZg0L"      | FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/xray/TestCases/Posix/fdr-mode.cpp --check-prefix=TRACE
# executed command: /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/./bin/llvm-xray convert --symbolize --output-format=yaml -instr_map=/home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/xray/X86_64LinuxConfig/TestCases/Posix/Output/fdr-mode.cpp.tmp '%{readfile:/home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/xray/X86_64LinuxConfig/TestCases/Posix/Output/fdr-mode.cpp.tmp.log}'
# note: command had no output on stdout or stderr
# executed command: FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/xray/TestCases/Posix/fdr-mode.cpp --check-prefix=TRACE
# note: command had no output on stdout or stderr
# RUN: at line 17
ls fdr-unwrite-test-* | head -1 | tr -d '\n' > /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/xray/X86_64LinuxConfig/TestCases/Posix/Output/fdr-mode.cpp.tmp.log
# executed command: ls 'fdr-unwrite-test-*'
# note: command had no output on stdout or stderr
# executed command: head -1
# note: command had no output on stdout or stderr
# executed command: tr -d '\n'
...

…m#14… (llvm#183641) …7700)" slows down llama.cpp This reverts commit cff4a00.

Revert "AMDGPU: Fix runtime unrolling when cascaded GEPs present (llv…

47fe257

…m#147700)" slows down llama.cpp This reverts commit cff4a00.

ronlieb requested review from bcahoon, kzhuravl and macurtis-amd February 26, 2026 23:30

llvmbot added backend:AMDGPU llvm:transforms labels Feb 26, 2026

bcahoon approved these changes Feb 26, 2026

View reviewed changes

ronlieb merged commit 8f9c926 into llvm:main Feb 27, 2026
13 checks passed

sujianIBM pushed a commit to sujianIBM/llvm-project that referenced this pull request Mar 5, 2026

Revert "AMDGPU: Fix runtime unrolling when cascaded GEPs present (llv…

1c59c4b

…m#14… (llvm#183641) …7700)" slows down llama.cpp This reverts commit cff4a00.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Revert "AMDGPU: Fix runtime unrolling when cascaded GEPs present (#14…#183641

Revert "AMDGPU: Fix runtime unrolling when cascaded GEPs present (#14…#183641
ronlieb merged 1 commit intollvm:mainfrom
ronlieb:ronlieb/revertUnrollRuntime

ronlieb commented Feb 26, 2026

Uh oh!

llvmbot commented Feb 26, 2026

Uh oh!

llvmbot commented Feb 26, 2026

Uh oh!

shiltian commented Feb 27, 2026

Uh oh!

ronlieb commented Feb 27, 2026

Uh oh!

Uh oh!

llvm-ci commented Feb 27, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

Conversation

ronlieb commented Feb 26, 2026

Uh oh!

llvmbot commented Feb 26, 2026

Uh oh!

llvmbot commented Feb 26, 2026

Uh oh!

shiltian commented Feb 27, 2026

Uh oh!

ronlieb commented Feb 27, 2026

Uh oh!

Uh oh!

llvm-ci commented Feb 27, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants