[InstCombine] Extend foldICmpBinOp to add-like or.#71396
Conversation
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
f4346fc to
c73a702
Compare
There was a problem hiding this comment.
I have seen checks for or which is actually an add in other places. Should we make this into a centralized utility function?
There was a problem hiding this comment.
Great, looks like this comes up in other places too: #72583
I'll wait for that PR to land, so I can apply it here.
3133fd3 to
d52843a
Compare
|
@llvm/pr-subscribers-llvm-transforms Author: Mikhail Gudim (mgudim) ChangesInstCombine canonicalizes In this patch we generalize Full diff: https://github.com/llvm/llvm-project/pull/71396.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 289976718e52f3..0b99260bb635a9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4624,31 +4624,37 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
}
bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
- if (BO0 && isa<OverflowingBinaryOperator>(BO0))
- NoOp0WrapProblem =
- ICmpInst::isEquality(Pred) ||
- (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
- (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
- if (BO1 && isa<OverflowingBinaryOperator>(BO1))
- NoOp1WrapProblem =
- ICmpInst::isEquality(Pred) ||
- (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
- (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
-
+ bool Op0HasNUW = false, Op1HasNUW = false;
+ bool Op0HasNSW = false, Op1HasNSW = false;
// Analyze the case when either Op0 or Op1 is an add instruction.
// Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
- if (BO0 && BO0->getOpcode() == Instruction::Add) {
- A = BO0->getOperand(0);
- B = BO0->getOperand(1);
+ auto hasNoWrapProblem = [](const BinaryOperator &BO, CmpInst::Predicate Pred,
+ bool &HasNSW, bool &HasNUW) -> bool {
+ if (isa<OverflowingBinaryOperator>(BO)) {
+ HasNUW = BO.hasNoUnsignedWrap();
+ HasNSW = BO.hasNoSignedWrap();
+ return ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && HasNUW) ||
+ (CmpInst::isSigned(Pred) && HasNSW);
+ } else if (BO.getOpcode() == Instruction::Or) {
+ HasNUW = true;
+ HasNSW = true;
+ return true;
+ } else {
+ return false;
+ }
+ };
+
+ if (BO0) {
+ match(BO0, m_AddLike(m_Value(A), m_Value(B)));
+ NoOp0WrapProblem = hasNoWrapProblem(*BO0, Pred, Op0HasNSW, Op0HasNUW);
}
- if (BO1 && BO1->getOpcode() == Instruction::Add) {
- C = BO1->getOperand(0);
- D = BO1->getOperand(1);
+ if (BO1) {
+ match(BO1, m_AddLike(m_Value(C), m_Value(D)));
+ NoOp1WrapProblem = hasNoWrapProblem(*BO1, Pred, Op1HasNSW, Op1HasNUW);
}
- // icmp (A+B), A -> icmp B, 0 for equalities or if there is no overflow.
- // icmp (A+B), B -> icmp A, 0 for equalities or if there is no overflow.
if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
return new ICmpInst(Pred, A == Op1 ? B : A,
Constant::getNullValue(Op1->getType()));
@@ -4764,17 +4770,15 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
APInt AP2Abs = AP2->abs();
if (AP1Abs.uge(AP2Abs)) {
APInt Diff = *AP1 - *AP2;
- bool HasNUW = BO0->hasNoUnsignedWrap() && Diff.ule(*AP1);
- bool HasNSW = BO0->hasNoSignedWrap();
Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff);
- Value *NewAdd = Builder.CreateAdd(A, C3, "", HasNUW, HasNSW);
+ Value *NewAdd = Builder.CreateAdd(
+ A, C3, "", Op0HasNUW && Diff.ule(*AP1), Op0HasNSW);
return new ICmpInst(Pred, NewAdd, C);
} else {
APInt Diff = *AP2 - *AP1;
- bool HasNUW = BO1->hasNoUnsignedWrap() && Diff.ule(*AP2);
- bool HasNSW = BO1->hasNoSignedWrap();
Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff);
- Value *NewAdd = Builder.CreateAdd(C, C3, "", HasNUW, HasNSW);
+ Value *NewAdd = Builder.CreateAdd(
+ C, C3, "", Op1HasNUW && Diff.ule(*AP1), Op1HasNSW);
return new ICmpInst(Pred, A, NewAdd);
}
}
@@ -4868,16 +4872,14 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
// if Z != 0 and nsw(X * Z) and nsw(Y * Z)
// X * Z eq/ne Y * Z -> X eq/ne Y
- if (NonZero && BO0 && BO1 && BO0->hasNoSignedWrap() &&
- BO1->hasNoSignedWrap())
+ if (NonZero && BO0 && BO1 && Op0HasNSW && Op1HasNSW)
return new ICmpInst(Pred, X, Y);
} else
NonZero = isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
// If Z != 0 and nuw(X * Z) and nuw(Y * Z)
// X * Z u{lt/le/gt/ge}/eq/ne Y * Z -> X u{lt/le/gt/ge}/eq/ne Y
- if (NonZero && BO0 && BO1 && BO0->hasNoUnsignedWrap() &&
- BO1->hasNoUnsignedWrap())
+ if (NonZero && BO0 && BO1 && Op0HasNUW && Op1HasNUW)
return new ICmpInst(Pred, X, Y);
}
}
@@ -4976,8 +4978,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
case Instruction::Shl: {
- bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
- bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
+ bool NUW = Op0HasNUW && Op1HasNUW;
+ bool NSW = Op0HasNSW && Op1HasNSW;
if (!NUW && !NSW)
break;
if (!NSW && I.isSigned())
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index 1c7bb36f0d34c0..cc1bcffa136066 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -3862,10 +3862,9 @@ define <8 x i1> @bitreverse_vec_ne(<8 x i16> %x, <8 x i16> %y) {
define i1 @knownbits1(i8 %a, i8 %b) {
; CHECK-LABEL: @knownbits1(
; CHECK-NEXT: [[A1:%.*]] = and i8 [[A:%.*]], 1
-; CHECK-NEXT: [[A2:%.*]] = or disjoint i8 [[A1]], 4
; CHECK-NEXT: [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT: [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A2]], [[B2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A1]], [[TMP1]]
; CHECK-NEXT: ret i1 [[C]]
;
%a1 = and i8 %a, 5
@@ -3879,10 +3878,9 @@ define i1 @knownbits1(i8 %a, i8 %b) {
define i1 @knownbits2(i8 %a, i8 %b) {
; CHECK-LABEL: @knownbits2(
; CHECK-NEXT: [[A1:%.*]] = and i8 [[A:%.*]], 1
-; CHECK-NEXT: [[A2:%.*]] = or disjoint i8 [[A1]], 4
; CHECK-NEXT: [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT: [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A2]], [[B2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A1]], [[TMP1]]
; CHECK-NEXT: ret i1 [[C]]
;
%a1 = and i8 %a, 5
@@ -3896,10 +3894,9 @@ define i1 @knownbits2(i8 %a, i8 %b) {
define i1 @knownbits3(i8 %a, i8 %b) {
; CHECK-LABEL: @knownbits3(
; CHECK-NEXT: [[A1:%.*]] = and i8 [[A:%.*]], 1
-; CHECK-NEXT: [[A2:%.*]] = or disjoint i8 [[A1]], 4
; CHECK-NEXT: [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT: [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[B2]], [[A2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[TMP1]], [[A1]]
; CHECK-NEXT: ret i1 [[C]]
;
%a1 = and i8 %a, 5
@@ -3913,10 +3910,9 @@ define i1 @knownbits3(i8 %a, i8 %b) {
define <2 x i1> @knownbits4(<2 x i8> %a, <2 x i8> %b) {
; CHECK-LABEL: @knownbits4(
; CHECK-NEXT: [[A1:%.*]] = and <2 x i8> [[A:%.*]], <i8 1, i8 1>
-; CHECK-NEXT: [[A2:%.*]] = or disjoint <2 x i8> [[A1]], <i8 4, i8 4>
; CHECK-NEXT: [[B1:%.*]] = and <2 x i8> [[B:%.*]], <i8 2, i8 2>
-; CHECK-NEXT: [[B2:%.*]] = or disjoint <2 x i8> [[B1]], <i8 5, i8 5>
-; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[B2]], [[A2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], <i8 1, i8 1>
+; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[TMP1]], [[A1]]
; CHECK-NEXT: ret <2 x i1> [[C]]
;
%a1 = and <2 x i8> %a, <i8 5, i8 5>
@@ -3932,10 +3928,9 @@ define <2 x i1> @knownbits4(<2 x i8> %a, <2 x i8> %b) {
define i1 @knownbits5(i8 %a, i8 %b) {
; CHECK-LABEL: @knownbits5(
; CHECK-NEXT: [[A1:%.*]] = and i8 [[A:%.*]], -127
-; CHECK-NEXT: [[A2:%.*]] = or disjoint i8 [[A1]], 4
; CHECK-NEXT: [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT: [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A2]], [[B2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A1]], [[TMP1]]
; CHECK-NEXT: ret i1 [[C]]
;
%a1 = and i8 %a, 133
@@ -3949,10 +3944,9 @@ define i1 @knownbits5(i8 %a, i8 %b) {
define i1 @knownbits6(i8 %a, i8 %b) {
; CHECK-LABEL: @knownbits6(
; CHECK-NEXT: [[A1:%.*]] = and i8 [[A:%.*]], -127
-; CHECK-NEXT: [[A2:%.*]] = or disjoint i8 [[A1]], 4
; CHECK-NEXT: [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT: [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A2]], [[B2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A1]], [[TMP1]]
; CHECK-NEXT: ret i1 [[C]]
;
%a1 = and i8 %a, 133
@@ -3966,10 +3960,9 @@ define i1 @knownbits6(i8 %a, i8 %b) {
define <2 x i1> @knownbits7(<2 x i8> %a, <2 x i8> %b) {
; CHECK-LABEL: @knownbits7(
; CHECK-NEXT: [[A1:%.*]] = and <2 x i8> [[A:%.*]], <i8 -127, i8 -127>
-; CHECK-NEXT: [[A2:%.*]] = or disjoint <2 x i8> [[A1]], <i8 4, i8 4>
; CHECK-NEXT: [[B1:%.*]] = and <2 x i8> [[B:%.*]], <i8 2, i8 2>
-; CHECK-NEXT: [[B2:%.*]] = or disjoint <2 x i8> [[B1]], <i8 5, i8 5>
-; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[B2]], [[A2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], <i8 1, i8 1>
+; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[TMP1]], [[A1]]
; CHECK-NEXT: ret <2 x i1> [[C]]
;
%a1 = and <2 x i8> %a, <i8 133, i8 133>
@@ -3983,10 +3976,9 @@ define <2 x i1> @knownbits7(<2 x i8> %a, <2 x i8> %b) {
define i1 @knownbits8(i8 %a, i8 %b) {
; CHECK-LABEL: @knownbits8(
; CHECK-NEXT: [[A1:%.*]] = and i8 [[A:%.*]], -127
-; CHECK-NEXT: [[A2:%.*]] = or disjoint i8 [[A1]], 4
; CHECK-NEXT: [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT: [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[B2]], [[A2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[TMP1]], [[A1]]
; CHECK-NEXT: ret i1 [[C]]
;
%a1 = and i8 %a, 133
|
7eaae44 to
275f781
Compare
There was a problem hiding this comment.
This comment should be below the lambda (above the Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; line).
InstCombine canonicalizes `add` to `or` when possible, but this makes some optimizations applicable to `add` to be missed because they don't realize that the `or` is equivalent to `add`. In this patch we generalize `foldICmpBinOp` to handle such cases.
275f781 to
4dee840
Compare
This patch tries to fold minmax intrinsic by using `computeConstantRangeIncludingKnownBits`. Fixes regression in [_karatsuba_rec:cpython/Modules/_decimal/libmpdec/mpdecimal.c](https://github.com/python/cpython/blob/c31943af16f885c8cf5d5a690c25c366afdb2862/Modules/_decimal/libmpdec/mpdecimal.c#L5460-L5462), which was introduced by #71396. See also dtcxzyw/llvm-opt-benchmark#16 (comment). Alive2 for splat vectors with undef: https://alive2.llvm.org/ce/z/J8hKWd
InstCombine canonicalizes
addtoorwhen possible, but this makes some optimizations applicable toaddto be missed because they don't realize that theoris equivalent toadd.In this patch we generalize
foldICmpBinOpto handle such cases.