[InstCombine] Extend foldICmpAddConstant to disjoint or.#75899
[InstCombine] Extend foldICmpAddConstant to disjoint or.#75899
foldICmpAddConstant to disjoint or.#75899Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: Mikhail Gudim (mgudim) ChangesFull diff: https://github.com/llvm/llvm-project/pull/75899.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 289976718e52f3..1cbcec3f21e8b2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2918,17 +2918,19 @@ static Value *createLogicFromTable(const std::bitset<4> &Table, Value *Op0,
}
/// Fold icmp (add X, Y), C.
-Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp,
- BinaryOperator *Add,
+Instruction *InstCombinerImpl::foldICmpAddLikeConstant(ICmpInst &Cmp,
+ BinaryOperator *AddLike,
const APInt &C) {
- Value *Y = Add->getOperand(1);
- Value *X = Add->getOperand(0);
+ Value *X = nullptr;
+ Value *Y = nullptr;
+ if (!match(AddLike, m_AddLike(m_Value(X), m_Value(Y))))
+ return nullptr;
Value *Op0, *Op1;
Instruction *Ext0, *Ext1;
const CmpInst::Predicate Pred = Cmp.getPredicate();
- if (match(Add,
- m_Add(m_CombineAnd(m_Instruction(Ext0), m_ZExtOrSExt(m_Value(Op0))),
+ if (match(AddLike,
+ m_AddLike(m_CombineAnd(m_Instruction(Ext0), m_ZExtOrSExt(m_Value(Op0))),
m_CombineAnd(m_Instruction(Ext1),
m_ZExtOrSExt(m_Value(Op1))))) &&
Op0->getType()->isIntOrIntVectorTy(1) &&
@@ -2949,7 +2951,7 @@ Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp,
Table[2] = ComputeTable(true, false);
Table[3] = ComputeTable(true, true);
if (auto *Cond =
- createLogicFromTable(Table, Op0, Op1, Builder, Add->hasOneUse()))
+ createLogicFromTable(Table, Op0, Op1, Builder, AddLike->hasOneUse()))
return replaceInstUsesWith(Cmp, Cond);
}
const APInt *C2;
@@ -2957,14 +2959,15 @@ Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp,
return nullptr;
// Fold icmp pred (add X, C2), C.
- Type *Ty = Add->getType();
+ Type *Ty = AddLike->getType();
// If the add does not wrap, we can always adjust the compare by subtracting
// the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE
// are canonicalized to SGT/SLT/UGT/ULT.
- if ((Add->hasNoSignedWrap() &&
+ if (AddLike->getOpcode() == Instruction::Or ||
+ (AddLike->hasNoSignedWrap() &&
(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) ||
- (Add->hasNoUnsignedWrap() &&
+ (AddLike->hasNoUnsignedWrap() &&
(Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT))) {
bool Overflow;
APInt NewC =
@@ -3021,7 +3024,7 @@ Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp,
return new ICmpInst(ICmpInst::ICMP_ULE, X, ConstantInt::get(Ty, C));
}
- if (!Add->hasOneUse())
+ if (!AddLike->hasOneUse())
return nullptr;
// X+C <u C2 -> (X & -C2) == C
@@ -3674,6 +3677,9 @@ InstCombinerImpl::foldICmpInstWithConstantAllowUndef(ICmpInst &Cmp,
Instruction *InstCombinerImpl::foldICmpBinOpWithConstant(ICmpInst &Cmp,
BinaryOperator *BO,
const APInt &C) {
+ if (Instruction *I = foldICmpAddLikeConstant(Cmp, BO, C))
+ return I;
+
switch (BO->getOpcode()) {
case Instruction::Xor:
if (Instruction *I = foldICmpXorConstant(Cmp, BO, C))
@@ -3716,10 +3722,6 @@ Instruction *InstCombinerImpl::foldICmpBinOpWithConstant(ICmpInst &Cmp,
if (Instruction *I = foldICmpSubConstant(Cmp, BO, C))
return I;
break;
- case Instruction::Add:
- if (Instruction *I = foldICmpAddConstant(Cmp, BO, C))
- return I;
- break;
default:
break;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index f86db698ef8f12..756da48a1800e0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -671,7 +671,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
const APInt &C);
Instruction *foldICmpSubConstant(ICmpInst &Cmp, BinaryOperator *Sub,
const APInt &C);
- Instruction *foldICmpAddConstant(ICmpInst &Cmp, BinaryOperator *Add,
+ Instruction *foldICmpAddLikeConstant(ICmpInst &Cmp, BinaryOperator *AddLike,
const APInt &C);
Instruction *foldICmpAndConstConst(ICmpInst &Cmp, BinaryOperator *And,
const APInt &C1);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 20bf00344b144b..f5a91f5a84f012 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1550,7 +1550,7 @@ tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp,
if (C0->getType() != Sel.getType())
return nullptr;
- // ULT with 'add' of a constant is canonical. See foldICmpAddConstant().
+ // ULT with 'add' of a constant is canonical. See foldICmpAddLikeConstant().
// FIXME: Are there more magic icmp predicate+constant pairs we must avoid?
// Or should we just abandon this transform entirely?
if (Pred == CmpInst::ICMP_ULT && match(X, m_Add(m_Value(), m_Constant())))
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index 1c7bb36f0d34c0..33d09bfad856a7 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -4912,3 +4912,13 @@ define i1 @or_positive_sgt_zero_multi_use(i8 %a) {
%cmp = icmp sgt i8 %b, 0
ret i1 %cmp
}
+
+define i1 @icmp_disjoint_or(i32 %x) {
+; CHECK-LABEL: @icmp_disjoint_or(
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[X:%.*]], 35
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %or_ = or disjoint i32 %x, 6
+ %C = icmp sgt i32 %or_, 41
+ ret i1 %C
+}
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
ac71355 to
f601b61
Compare
|
@mgudim Could you please have a look at dtcxzyw/llvm-opt-benchmark#10? |
@dtcxzyw Sure, I'll take a look. But I didn't understand what file exactly the regressions are at. Can you paste the IR before and after the transformation. Preferably the smallest function which shows the regression. |
One example: I will post a reduced test case later :) |
Sorry, I cannot provide a reduced test since it is hard to do semantic-preserving reduction. |
No worries. I'll take a look, but sometime later. I'm in the middle of something else right now. |
|
@dtcxzyw Is the diff shown for output of the optimizer before / after my optimization. If so, where is the original source of the benchmark? Basically, I am asking for steps to reproduce the regression. |
|
|
OK, I see what's going on. First, note that 9223372036854775792 has 0 in position 63, bits [0, 3] are zero and all other bits are 1. Below is the simplified test case: Before my change, the code looks like this when we come to In other words, the After my change: The compare instruction is now changed to There are a couple of ways to fix this but I am not sure which one is the best. I was thinking we could add some capabilities to @nikic What do you think? |
I don't really get what your change does in that example. Why is it changing the comparison operand from 0 to -15? Where is the or + icmp pattern in this sample? |
In the "simplified testcase" we have this: The |
|
I see, thanks. A possible way to fix this would be to try to take into account already known bits when adding known bits from conditions, i.e. (It looks like the patch needs a rebase.) |
f601b61 to
5c0606e
Compare
5c0606e to
1ad7ff9
Compare
|
I'll try to put in a fix for that degradation in the near future @dtcxzyw |
You can file an issue to track the work. |
|
ping |
Added more tests.
ebeceb0 to
6b7e41c
Compare
|
I have created an MR to fix the degradation: #79405 |
|
@nikic @dtcxzyw @goldsteinn |
|
@nikic @dtcxzyw @goldsteinn |
|
@nikic @dtcxzyw @goldsteinn |
| @@ -3721,10 +3728,6 @@ Instruction *InstCombinerImpl::foldICmpBinOpWithConstant(ICmpInst &Cmp, | |||
| if (Instruction *I = foldICmpSubConstant(Cmp, BO, C)) | |||
| return I; | |||
| break; | |||
There was a problem hiding this comment.
| break; | |
| break; | |
| case Instruction::Or: | |
| if (Instruction *I = foldICmpOrConstant(Cmp, BO, C)) | |
| return I; | |
| [[fallthrough]]; |
I prefer the switch+fallthrough approach.
| %or_ = or disjoint i32 %x, 5 | ||
| %C = icmp ne i32 %or_, 5 | ||
| ret i1 %C | ||
| } |
There was a problem hiding this comment.
Please also add some negative tests.
|
Reverse ping :) |
I am busy working on other things now, but I plan to come back to this. |
No description provided.