[AArch64] Convert CSEL(X, 1) into CSINC(X, XZR) in early-ifcvt#162993
Merged
Il-Capitano merged 3 commits intollvm:mainfrom Oct 20, 2025
Merged
[AArch64] Convert CSEL(X, 1) into CSINC(X, XZR) in early-ifcvt#162993Il-Capitano merged 3 commits intollvm:mainfrom
CSEL(X, 1) into CSINC(X, XZR) in early-ifcvt#162993Il-Capitano merged 3 commits intollvm:mainfrom
Conversation
Member
|
@llvm/pr-subscribers-backend-aarch64 Author: Csanád Hajdú (Il-Capitano) ChangesEarly if conversion can create instruction sequences such as which could be simplified into the following instead One notable example that generates code like this is This is fixed by handling an immediate value of 1 as Full diff: https://github.com/llvm/llvm-project/pull/162993.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index b8761d971a67d..578fd321b4873 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -708,8 +708,32 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
const MachineInstr *DefMI = MRI.getVRegDef(VReg);
unsigned Opc = 0;
- unsigned SrcOpNum = 0;
+ unsigned SrcReg = 0;
switch (DefMI->getOpcode()) {
+ case AArch64::SUBREG_TO_REG:
+ // Check for the following way to define an 64-bit immediate:
+ // %0:gpr32 = MOVi32imm 1
+ // %1:gpr64 = SUBREG_TO_REG 0, %0:gpr32, %subreg.sub_32
+ if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 0)
+ return 0;
+ if (!DefMI->getOperand(2).isReg())
+ return 0;
+ if (!DefMI->getOperand(3).isImm() ||
+ DefMI->getOperand(3).getImm() != AArch64::sub_32)
+ return 0;
+ DefMI = MRI.getVRegDef(DefMI->getOperand(2).getReg());
+ if (DefMI->getOpcode() != AArch64::MOVi32imm)
+ return 0;
+ // fall-through to MOVi32imm case.
+ [[fallthrough]];
+ case AArch64::MOVi32imm:
+ case AArch64::MOVi64imm:
+ if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
+ return 0;
+ SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
+ Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
+ break;
+
case AArch64::ADDSXri:
case AArch64::ADDSWri:
// if NZCV is used, do not fold.
@@ -724,7 +748,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
DefMI->getOperand(3).getImm() != 0)
return 0;
- SrcOpNum = 1;
+ SrcReg = DefMI->getOperand(1).getReg();
Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
break;
@@ -734,7 +758,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
return 0;
- SrcOpNum = 2;
+ SrcReg = DefMI->getOperand(2).getReg();
Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
break;
}
@@ -753,17 +777,17 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
return 0;
- SrcOpNum = 2;
+ SrcReg = DefMI->getOperand(2).getReg();
Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
break;
}
default:
return 0;
}
- assert(Opc && SrcOpNum && "Missing parameters");
+ assert(Opc && SrcReg && "Missing parameters");
if (NewVReg)
- *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
+ *NewVReg = SrcReg;
return Opc;
}
@@ -976,6 +1000,14 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
// Fold the operation. Leave any dead instructions for DCE to clean up.
if (FoldedOpc) {
+ // NewVReg might be XZR/WZR. In that case create a COPY into a virtual
+ // register.
+ if (!Register::isVirtualRegister(NewVReg)) {
+ unsigned ZeroReg = NewVReg;
+ NewVReg = MRI.createVirtualRegister(RC);
+ BuildMI(MBB, I, DL, get(TargetOpcode::COPY), NewVReg).addReg(ZeroReg);
+ }
+
FalseReg = NewVReg;
Opc = FoldedOpc;
// The extends the live range of NewVReg.
diff --git a/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll b/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll
index 97a7741bcde75..849323f0fedf3 100644
--- a/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll
@@ -421,3 +421,83 @@ for.body51: ; preds = %is_sbox.exit155
unreachable
}
declare fastcc void @get_switch_type(i32, i32, i16 signext, i16 signext, ptr nocapture) nounwind ssp
+
+; CHECK-LABEL: fold_imm1_csinc_32:
+; CHECK: cmp w0, w1
+; CHECK-NEXT: csinc w0, w2, wzr, ge
+; CHECK-NEXT: ret
+define i32 @fold_imm1_csinc_32(i32 %x, i32 %y, i32 %n) nounwind ssp {
+entry:
+ %cmp = icmp slt i32 %x, %y
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ br label %exit
+
+if.else:
+ br label %exit
+
+exit:
+ %result = phi i32 [ 1, %if.then ], [ %n, %if.else ]
+ ret i32 %result
+}
+
+; CHECK-LABEL: fold_imm1_csinc_64:
+; CHECK: cmp x0, x1
+; CHECK-NEXT: csinc x0, x2, xzr, ge
+; CHECK-NEXT: ret
+define i64 @fold_imm1_csinc_64(i64 %x, i64 %y, i64 %n) nounwind ssp {
+entry:
+ %cmp = icmp slt i64 %x, %y
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ br label %exit
+
+if.else:
+ br label %exit
+
+exit:
+ %result = phi i64 [ 1, %if.then ], [ %n, %if.else ]
+ ret i64 %result
+}
+
+; CHECK-LABEL: fold_imm1_cset_32:
+; CHECK: cmp w0, w1
+; CHECK-NEXT: cset w0, lt
+; CHECK-NEXT: ret
+define i32 @fold_imm1_cset_32(i32 %x, i32 %y) nounwind ssp {
+entry:
+ %cmp = icmp slt i32 %x, %y
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ br label %exit
+
+if.else:
+ br label %exit
+
+exit:
+ %result = phi i32 [ 1, %if.then ], [ 0, %if.else ]
+ ret i32 %result
+}
+
+; CHECK-LABEL: fold_imm1_cset_64:
+; CHECK: cmp x0, x1
+; CHECK-NEXT: cset x0, lt
+; CHECK-NEXT: ret
+define i64 @fold_imm1_cset_64(i64 %x, i64 %y) nounwind ssp {
+entry:
+ %cmp = icmp slt i64 %x, %y
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ br label %exit
+
+if.else:
+ br label %exit
+
+exit:
+ %result = phi i64 [ 1, %if.then ], [ 0, %if.else ]
+ ret i64 %result
+}
diff --git a/llvm/test/CodeGen/AArch64/peephole-csel.ll b/llvm/test/CodeGen/AArch64/peephole-csel.ll
index 868b9f1f2f6ac..b085258059b7e 100644
--- a/llvm/test/CodeGen/AArch64/peephole-csel.ll
+++ b/llvm/test/CodeGen/AArch64/peephole-csel.ll
@@ -5,10 +5,9 @@ define void @peephole_csel(ptr %dst, i1 %0, i1 %cmp) {
; CHECK-LABEL: peephole_csel:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: tst w2, #0x1
-; CHECK-NEXT: mov w8, #1 // =0x1
-; CHECK-NEXT: mov x9, xzr
+; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: tst w1, #0x1
-; CHECK-NEXT: csel x8, x8, x9, eq
+; CHECK-NEXT: csinc x8, x8, xzr, ne
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: ret
entry:
|
Contributor
|
Thanks for adding me but I don't know this area well enough to review. Hopefully @davemgreen or @nikic will have a chance to take a look. |
rj-jesus
reviewed
Oct 17, 2025
paulwalker-arm
approved these changes
Oct 17, 2025
Early if conversion can create instruction sequences such as ``` mov x1, llvm#1 csel x0, x1, x2, eq ``` which could be simplified into the following instead ``` csinc x0, x2, xzr, ne ``` One notable example that generates code like this is `cmpxchg weak`. This is fixed by handling an immediate value of 1 as `add(wzr, 1)` so that the addition can be folded into CSEL by using CSINC instead.
* Remove fallthrough * Rename NewVReg -> NewReg
* NewVReg -> NewReg in comments. * Use WZR/XZR directly in the folded instruction.
382ac01 to
51f0dc6
Compare
Contributor
Author
|
The previous CI failures seem to be unrelated to my change, so I've rebased it. Once CI is all green I'll merge it. |
Lukacma
pushed a commit
to Lukacma/llvm-project
that referenced
this pull request
Oct 29, 2025
…lvm#162993) Early if conversion can create instruction sequences such as ``` mov x1, #1 csel x0, x1, x2, eq ``` which could be simplified into the following instead ``` csinc x0, x2, xzr, ne ``` One notable example that generates code like this is `cmpxchg weak`. This is fixed by handling an immediate value of 1 as `add(wzr, 1)` so that the addition can be folded into CSEL by using CSINC instead.
aokblast
pushed a commit
to aokblast/llvm-project
that referenced
this pull request
Oct 30, 2025
…lvm#162993) Early if conversion can create instruction sequences such as ``` mov x1, llvm#1 csel x0, x1, x2, eq ``` which could be simplified into the following instead ``` csinc x0, x2, xzr, ne ``` One notable example that generates code like this is `cmpxchg weak`. This is fixed by handling an immediate value of 1 as `add(wzr, 1)` so that the addition can be folded into CSEL by using CSINC instead.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Early if conversion can create instruction sequences such as
which could be simplified into the following instead
One notable example that generates code like this is
cmpxchg weak.This is fixed by handling an immediate value of 1 as
add(wzr, 1)so that the addition can be folded into CSEL by using CSINC instead.