[InstCombine] Extend `foldICmpBinOp` to `add`-like `or`. by mgudim · Pull Request #71396 · llvm/llvm-project

mgudim · 2023-11-06T14:02:13Z

InstCombine canonicalizes add to or when possible, but this makes some optimizations applicable to add to be missed because they don't realize that the or is equivalent to add.

In this patch we generalize foldICmpBinOp to handle such cases.

github-actions · 2023-11-06T14:04:22Z

✅ With the latest revision this PR passed the C/C++ code formatter.

mgudim · 2023-11-06T14:15:32Z

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

I have seen checks for or which is actually an add in other places. Should we make this into a centralized utility function?

Great, looks like this comes up in other places too: #72583
I'll wait for that PR to land, so I can apply it here.

Also see similar PR: #75899

llvmbot · 2023-12-19T03:16:13Z

@llvm/pr-subscribers-llvm-transforms

Author: Mikhail Gudim (mgudim)

Changes

InstCombine canonicalizes add to or when possible, but this makes some optimizations applicable to add to be missed because they don't realize that the or is equivalent to add.

In this patch we generalize foldICmpBinOp to handle such cases.

Full diff: https://github.com/llvm/llvm-project/pull/71396.diff

2 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (+33-31)
(modified) llvm/test/Transforms/InstCombine/icmp.ll (+16-24)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 289976718e52f3..0b99260bb635a9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4624,31 +4624,37 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
   }
 
   bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
-  if (BO0 && isa<OverflowingBinaryOperator>(BO0))
-    NoOp0WrapProblem =
-        ICmpInst::isEquality(Pred) ||
-        (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
-        (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
-  if (BO1 && isa<OverflowingBinaryOperator>(BO1))
-    NoOp1WrapProblem =
-        ICmpInst::isEquality(Pred) ||
-        (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
-        (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
-
+  bool Op0HasNUW = false, Op1HasNUW = false;
+  bool Op0HasNSW = false, Op1HasNSW = false;
   // Analyze the case when either Op0 or Op1 is an add instruction.
   // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
   Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
-  if (BO0 && BO0->getOpcode() == Instruction::Add) {
-    A = BO0->getOperand(0);
-    B = BO0->getOperand(1);
+  auto hasNoWrapProblem = [](const BinaryOperator &BO, CmpInst::Predicate Pred,
+                             bool &HasNSW, bool &HasNUW) -> bool {
+    if (isa<OverflowingBinaryOperator>(BO)) {
+      HasNUW = BO.hasNoUnsignedWrap();
+      HasNSW = BO.hasNoSignedWrap();
+      return ICmpInst::isEquality(Pred) ||
+             (CmpInst::isUnsigned(Pred) && HasNUW) ||
+             (CmpInst::isSigned(Pred) && HasNSW);
+    } else if (BO.getOpcode() == Instruction::Or) {
+      HasNUW = true;
+      HasNSW = true;
+      return true;
+    } else {
+      return false;
+    }
+  };
+
+  if (BO0) {
+    match(BO0, m_AddLike(m_Value(A), m_Value(B)));
+    NoOp0WrapProblem = hasNoWrapProblem(*BO0, Pred, Op0HasNSW, Op0HasNUW);
   }
-  if (BO1 && BO1->getOpcode() == Instruction::Add) {
-    C = BO1->getOperand(0);
-    D = BO1->getOperand(1);
+  if (BO1) {
+    match(BO1, m_AddLike(m_Value(C), m_Value(D)));
+    NoOp1WrapProblem = hasNoWrapProblem(*BO1, Pred, Op1HasNSW, Op1HasNUW);
   }
 
-  // icmp (A+B), A -> icmp B, 0 for equalities or if there is no overflow.
-  // icmp (A+B), B -> icmp A, 0 for equalities or if there is no overflow.
   if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
     return new ICmpInst(Pred, A == Op1 ? B : A,
                         Constant::getNullValue(Op1->getType()));
@@ -4764,17 +4770,15 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
       APInt AP2Abs = AP2->abs();
       if (AP1Abs.uge(AP2Abs)) {
         APInt Diff = *AP1 - *AP2;
-        bool HasNUW = BO0->hasNoUnsignedWrap() && Diff.ule(*AP1);
-        bool HasNSW = BO0->hasNoSignedWrap();
         Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff);
-        Value *NewAdd = Builder.CreateAdd(A, C3, "", HasNUW, HasNSW);
+        Value *NewAdd = Builder.CreateAdd(
+            A, C3, "", Op0HasNUW && Diff.ule(*AP1), Op0HasNSW);
         return new ICmpInst(Pred, NewAdd, C);
       } else {
         APInt Diff = *AP2 - *AP1;
-        bool HasNUW = BO1->hasNoUnsignedWrap() && Diff.ule(*AP2);
-        bool HasNSW = BO1->hasNoSignedWrap();
         Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff);
-        Value *NewAdd = Builder.CreateAdd(C, C3, "", HasNUW, HasNSW);
+        Value *NewAdd = Builder.CreateAdd(
+            C, C3, "", Op1HasNUW && Diff.ule(*AP1), Op1HasNSW);
         return new ICmpInst(Pred, A, NewAdd);
       }
     }
@@ -4868,16 +4872,14 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
                   isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
         // if Z != 0 and nsw(X * Z) and nsw(Y * Z)
         //    X * Z eq/ne Y * Z -> X eq/ne Y
-        if (NonZero && BO0 && BO1 && BO0->hasNoSignedWrap() &&
-            BO1->hasNoSignedWrap())
+        if (NonZero && BO0 && BO1 && Op0HasNSW && Op1HasNSW)
           return new ICmpInst(Pred, X, Y);
       } else
         NonZero = isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
 
       // If Z != 0 and nuw(X * Z) and nuw(Y * Z)
       //    X * Z u{lt/le/gt/ge}/eq/ne Y * Z -> X u{lt/le/gt/ge}/eq/ne Y
-      if (NonZero && BO0 && BO1 && BO0->hasNoUnsignedWrap() &&
-          BO1->hasNoUnsignedWrap())
+      if (NonZero && BO0 && BO1 && Op0HasNUW && Op1HasNUW)
         return new ICmpInst(Pred, X, Y);
     }
   }
@@ -4976,8 +4978,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
       return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
 
     case Instruction::Shl: {
-      bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
-      bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
+      bool NUW = Op0HasNUW && Op1HasNUW;
+      bool NSW = Op0HasNSW && Op1HasNSW;
       if (!NUW && !NSW)
         break;
       if (!NSW && I.isSigned())
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index 1c7bb36f0d34c0..cc1bcffa136066 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -3862,10 +3862,9 @@ define <8 x i1> @bitreverse_vec_ne(<8 x i16> %x, <8 x i16> %y) {
 define i1 @knownbits1(i8 %a, i8 %b) {
 ; CHECK-LABEL: @knownbits1(
 ; CHECK-NEXT:    [[A1:%.*]] = and i8 [[A:%.*]], 1
-; CHECK-NEXT:    [[A2:%.*]] = or disjoint i8 [[A1]], 4
 ; CHECK-NEXT:    [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT:    [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[A2]], [[B2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[A1]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %a1 = and i8 %a, 5
@@ -3879,10 +3878,9 @@ define i1 @knownbits1(i8 %a, i8 %b) {
 define i1 @knownbits2(i8 %a, i8 %b) {
 ; CHECK-LABEL: @knownbits2(
 ; CHECK-NEXT:    [[A1:%.*]] = and i8 [[A:%.*]], 1
-; CHECK-NEXT:    [[A2:%.*]] = or disjoint i8 [[A1]], 4
 ; CHECK-NEXT:    [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT:    [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[A2]], [[B2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[A1]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %a1 = and i8 %a, 5
@@ -3896,10 +3894,9 @@ define i1 @knownbits2(i8 %a, i8 %b) {
 define i1 @knownbits3(i8 %a, i8 %b) {
 ; CHECK-LABEL: @knownbits3(
 ; CHECK-NEXT:    [[A1:%.*]] = and i8 [[A:%.*]], 1
-; CHECK-NEXT:    [[A2:%.*]] = or disjoint i8 [[A1]], 4
 ; CHECK-NEXT:    [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT:    [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[B2]], [[A2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[TMP1]], [[A1]]
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %a1 = and i8 %a, 5
@@ -3913,10 +3910,9 @@ define i1 @knownbits3(i8 %a, i8 %b) {
 define <2 x i1> @knownbits4(<2 x i8> %a, <2 x i8> %b) {
 ; CHECK-LABEL: @knownbits4(
 ; CHECK-NEXT:    [[A1:%.*]] = and <2 x i8> [[A:%.*]], <i8 1, i8 1>
-; CHECK-NEXT:    [[A2:%.*]] = or disjoint <2 x i8> [[A1]], <i8 4, i8 4>
 ; CHECK-NEXT:    [[B1:%.*]] = and <2 x i8> [[B:%.*]], <i8 2, i8 2>
-; CHECK-NEXT:    [[B2:%.*]] = or disjoint <2 x i8> [[B1]], <i8 5, i8 5>
-; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[B2]], [[A2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], <i8 1, i8 1>
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[TMP1]], [[A1]]
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %a1 = and <2 x i8> %a, <i8 5, i8 5>
@@ -3932,10 +3928,9 @@ define <2 x i1> @knownbits4(<2 x i8> %a, <2 x i8> %b) {
 define i1 @knownbits5(i8 %a, i8 %b) {
 ; CHECK-LABEL: @knownbits5(
 ; CHECK-NEXT:    [[A1:%.*]] = and i8 [[A:%.*]], -127
-; CHECK-NEXT:    [[A2:%.*]] = or disjoint i8 [[A1]], 4
 ; CHECK-NEXT:    [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT:    [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[A2]], [[B2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[A1]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %a1 = and i8 %a, 133
@@ -3949,10 +3944,9 @@ define i1 @knownbits5(i8 %a, i8 %b) {
 define i1 @knownbits6(i8 %a, i8 %b) {
 ; CHECK-LABEL: @knownbits6(
 ; CHECK-NEXT:    [[A1:%.*]] = and i8 [[A:%.*]], -127
-; CHECK-NEXT:    [[A2:%.*]] = or disjoint i8 [[A1]], 4
 ; CHECK-NEXT:    [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT:    [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[A2]], [[B2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[A1]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %a1 = and i8 %a, 133
@@ -3966,10 +3960,9 @@ define i1 @knownbits6(i8 %a, i8 %b) {
 define <2 x i1> @knownbits7(<2 x i8> %a, <2 x i8> %b) {
 ; CHECK-LABEL: @knownbits7(
 ; CHECK-NEXT:    [[A1:%.*]] = and <2 x i8> [[A:%.*]], <i8 -127, i8 -127>
-; CHECK-NEXT:    [[A2:%.*]] = or disjoint <2 x i8> [[A1]], <i8 4, i8 4>
 ; CHECK-NEXT:    [[B1:%.*]] = and <2 x i8> [[B:%.*]], <i8 2, i8 2>
-; CHECK-NEXT:    [[B2:%.*]] = or disjoint <2 x i8> [[B1]], <i8 5, i8 5>
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i8> [[B2]], [[A2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], <i8 1, i8 1>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i8> [[TMP1]], [[A1]]
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %a1 = and <2 x i8> %a, <i8 133, i8 133>
@@ -3983,10 +3976,9 @@ define <2 x i1> @knownbits7(<2 x i8> %a, <2 x i8> %b) {
 define i1 @knownbits8(i8 %a, i8 %b) {
 ; CHECK-LABEL: @knownbits8(
 ; CHECK-NEXT:    [[A1:%.*]] = and i8 [[A:%.*]], -127
-; CHECK-NEXT:    [[A2:%.*]] = or disjoint i8 [[A1]], 4
 ; CHECK-NEXT:    [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT:    [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[B2]], [[A2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[TMP1]], [[A1]]
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %a1 = and i8 %a, 133

dtcxzyw · 2023-12-19T03:39:04Z

Alive2: https://alive2.llvm.org/ce/z/B_7XNs

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

llvm/test/Transforms/InstCombine/icmp.ll

nikic

LGTM

nikic · 2023-12-20T10:12:32Z

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

This comment should be below the lambda (above the Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; line).

InstCombine canonicalizes `add` to `or` when possible, but this makes some optimizations applicable to `add` to be missed because they don't realize that the `or` is equivalent to `add`. In this patch we generalize `foldICmpBinOp` to handle such cases.

…m#71396)" This reverts commit 8773c9b.

This patch tries to fold minmax intrinsic by using `computeConstantRangeIncludingKnownBits`. Fixes regression in [_karatsuba_rec:cpython/Modules/_decimal/libmpdec/mpdecimal.c](https://github.com/python/cpython/blob/c31943af16f885c8cf5d5a690c25c366afdb2862/Modules/_decimal/libmpdec/mpdecimal.c#L5460-L5462), which was introduced by #71396. See also dtcxzyw/llvm-opt-benchmark#16 (comment). Alive2 for splat vectors with undef: https://alive2.llvm.org/ce/z/J8hKWd

mgudim force-pushed the add-like-or-in-icmp branch from f4346fc to c73a702 Compare November 6, 2023 14:12

mgudim commented Nov 6, 2023

View reviewed changes

mgudim force-pushed the add-like-or-in-icmp branch 7 times, most recently from 3133fd3 to d52843a Compare December 18, 2023 21:25

mgudim marked this pull request as ready for review December 19, 2023 03:15

mgudim requested a review from nikic as a code owner December 19, 2023 03:15

mgudim requested review from topperc and removed request for nikic December 19, 2023 03:15

llvmbot added the llvm:transforms label Dec 19, 2023

dtcxzyw requested review from goldsteinn and nikic December 19, 2023 03:19

nikic reviewed Dec 19, 2023

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp Outdated Show resolved Hide resolved

llvm/test/Transforms/InstCombine/icmp.ll Outdated Show resolved Hide resolved

mgudim force-pushed the add-like-or-in-icmp branch from 7eaae44 to 275f781 Compare December 20, 2023 04:37

nikic approved these changes Dec 20, 2023

View reviewed changes

mgudim added 6 commits December 20, 2023 14:28

Rerun CI

99d841c

updated tests.

d01ea90

Fixed a typo

e955d12

Deleted unnecessary test

077ccf0

Addressed review comments

4dee840

mgudim force-pushed the add-like-or-in-icmp branch from 275f781 to 4dee840 Compare December 20, 2023 19:28

mgudim merged commit 8773c9b into llvm:main Dec 20, 2023

mgudim added a commit to mgudim/llvm-project that referenced this pull request Dec 21, 2023

Revert "[InstCombine] Extend foldICmpBinOp to add-like or. (llv…

56ea579

…m#71396)" This reverts commit 8773c9b.

This was referenced Dec 22, 2023

[InstSimplify] Improve simplifyICmpWithConstant by using KnownBits info #76221

Closed

[InstCombine] Fold minmax intrinsic using KnownBits information #76242

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[InstCombine] Extend `foldICmpBinOp` to `add`-like `or`.#71396

[InstCombine] Extend `foldICmpBinOp` to `add`-like `or`.#71396
mgudim merged 6 commits intollvm:mainfrom
mgudim:add-like-or-in-icmp

mgudim commented Nov 6, 2023

Uh oh!

github-actions bot commented Nov 6, 2023 •

edited

Loading

Uh oh!

mgudim Nov 6, 2023

Uh oh!

mgudim Nov 20, 2023

Uh oh!

mgudim Dec 19, 2023

Uh oh!

llvmbot commented Dec 19, 2023

Uh oh!

dtcxzyw commented Dec 19, 2023

Uh oh!

Uh oh!

Uh oh!

Uh oh!

nikic left a comment

Uh oh!

nikic Dec 20, 2023

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

Conversation

mgudim commented Nov 6, 2023

Uh oh!

github-actions bot commented Nov 6, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

mgudim Nov 6, 2023

Choose a reason for hiding this comment

Uh oh!

mgudim Nov 20, 2023

Choose a reason for hiding this comment

Uh oh!

mgudim Dec 19, 2023

Choose a reason for hiding this comment

Uh oh!

llvmbot commented Dec 19, 2023

Uh oh!

dtcxzyw commented Dec 19, 2023

Uh oh!

Uh oh!

Uh oh!

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

nikic Dec 20, 2023

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

github-actions bot commented Nov 6, 2023 •

edited

Loading