InstCombine: Handle exp/exp2/exp10 in SimplifyDemandedFPClass#173432
InstCombine: Handle exp/exp2/exp10 in SimplifyDemandedFPClass#173432
Conversation
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-support Author: Matt Arsenault (arsenm) ChangesInstCombine: Handle exp/exp2/exp10 in SimplifyDemandedFPClass I'm working on optimizing out the tail sequences in the Related to #64870 Make KnownFPClass::exp not side-effecting Patch is 25.27 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/173432.diff 5 Files Affected:
diff --git a/llvm/include/llvm/Support/KnownFPClass.h b/llvm/include/llvm/Support/KnownFPClass.h
index 7fe6197cb84aa..3f55ae5e08f9e 100644
--- a/llvm/include/llvm/Support/KnownFPClass.h
+++ b/llvm/include/llvm/Support/KnownFPClass.h
@@ -161,6 +161,9 @@ struct KnownFPClass {
canonicalize(const KnownFPClass &Src,
DenormalMode DenormMode = DenormalMode::getDynamic());
+ /// Report known values for exp, exp2 and exp10.
+ LLVM_ABI static KnownFPClass exp(const KnownFPClass &Src);
+
/// Return true if the sign bit must be 0, ignoring the sign of nans.
bool signBitIsZeroOrNaN() const { return isKnownNever(fcNegative); }
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 9a3d11eaa38c8..cb99ec0194d42 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -5345,38 +5345,15 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
case Intrinsic::exp2:
case Intrinsic::exp10:
case Intrinsic::amdgcn_exp2: {
- Known.knownNot(fcNegative);
-
- Type *EltTy = II->getType()->getScalarType();
- if (IID == Intrinsic::amdgcn_exp2 && EltTy->isFloatTy())
- Known.knownNot(fcSubnormal);
-
- if ((InterestedClasses & fcNan) == fcNone)
- break;
-
KnownFPClass KnownSrc;
computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
KnownSrc, Q, Depth + 1);
- if (KnownSrc.isKnownNeverNaN()) {
- Known.knownNot(fcNan);
- Known.signBitMustBeZero();
- }
- if (KnownSrc.cannotBeOrderedLessThanZero()) {
- // If the source is positive, and cannot be ~0, this cannot underflow.
- Known.knownNot(fcPosZero);
+ Known = KnownFPClass::exp(KnownSrc);
- // Cannot introduce new denormal values.
- if (KnownSrc.isKnownNever(fcPosSubnormal))
- Known.knownNot(fcPosSubnormal);
- }
-
- if (KnownSrc.cannotBeOrderedGreaterThanZero()) {
- // If the source is negative, and cannot be infinity, this cannot
- // overflow to infinity.
- if (KnownSrc.isKnownNeverPosInfinity())
- Known.knownNot(fcPosInf);
- }
+ Type *EltTy = II->getType()->getScalarType();
+ if (IID == Intrinsic::amdgcn_exp2 && EltTy->isFloatTy())
+ Known.knownNot(fcSubnormal);
break;
}
diff --git a/llvm/lib/Support/KnownFPClass.cpp b/llvm/lib/Support/KnownFPClass.cpp
index 556a3b165d80d..1676196d073b4 100644
--- a/llvm/lib/Support/KnownFPClass.cpp
+++ b/llvm/lib/Support/KnownFPClass.cpp
@@ -137,6 +137,34 @@ KnownFPClass KnownFPClass::canonicalize(const KnownFPClass &KnownSrc,
return Known;
}
+KnownFPClass KnownFPClass::exp(const KnownFPClass &KnownSrc) {
+ KnownFPClass Known;
+ Known.knownNot(fcNegative);
+
+ if (KnownSrc.isKnownNeverNaN()) {
+ Known.knownNot(fcNan);
+ Known.signBitMustBeZero();
+ }
+
+ if (KnownSrc.cannotBeOrderedLessThanZero()) {
+ // If the source is positive, and cannot be ~0, this cannot underflow.
+ Known.knownNot(fcPosZero);
+
+ // Cannot introduce new denormal values.
+ if (KnownSrc.isKnownNever(fcPosSubnormal))
+ Known.knownNot(fcPosSubnormal);
+ }
+
+ if (KnownSrc.cannotBeOrderedGreaterThanZero()) {
+ // If the source is negative, and cannot be infinity, this cannot
+ // overflow to infinity.
+ if (KnownSrc.isKnownNeverPosInfinity())
+ Known.knownNot(fcPosInf);
+ }
+
+ return Known;
+}
+
void KnownFPClass::propagateCanonicalizingSrc(const KnownFPClass &Src,
DenormalMode Mode) {
propagateDenormal(Src, Mode);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 579cbd07fbc0f..7fa7ecad33bb1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -2106,6 +2106,98 @@ Value *InstCombinerImpl::SimplifyDemandedUseFPClass(Value *V,
Known.copysign(KnownSign);
break;
}
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::exp10: {
+ if ((DemandedMask &
+ (fcPosZero | fcPosSubnormal | fcPosNormal | fcPosInf)) == fcNone) {
+ // Only returns positive values or nans.
+ if ((DemandedMask & fcNan) == fcNone)
+ return PoisonValue::get(VTy);
+
+ // Only need nan propagation.
+ // Note: Dropping snan quieting.
+ return CI->getArgOperand(0);
+ }
+
+ FPClassTest SrcDemandedMask = DemandedMask & fcNan;
+
+ if (DemandedMask & fcZero) {
+ // exp(-infinity) = 0
+ SrcDemandedMask |= fcNegInf;
+
+ // exp(-largest_normal) = 0
+ //
+ // Negative numbers of sufficiently large magnitude underflow to 0. No
+ // subnormal input has a 0 result.
+ SrcDemandedMask |= fcNegNormal;
+ }
+
+ if (DemandedMask & fcPosSubnormal) {
+ // Negative numbers of sufficiently large magnitude underflow to 0. No
+ // subnormal input has a 0 result.
+ SrcDemandedMask |= fcNegNormal;
+ }
+
+ if (DemandedMask & fcPosNormal) {
+ // exp(0) = 1
+ // exp(+/- smallest_normal) = 1
+ // exp(+/- largest_denormal) = 1
+ // exp(+/- smallest_denormal) = 1
+ SrcDemandedMask |= fcPosNormal | fcSubnormal | fcZero;
+ }
+
+ // exp(inf), exp(largest_normal) = inf
+ if (DemandedMask & fcPosInf)
+ SrcDemandedMask |= fcPosInf | fcPosNormal;
+
+ KnownFPClass KnownSrc;
+
+ // TODO: This could really make use of KnownFPClass of specific value
+ // range, (i.e., close enough to 1)
+ if (SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownSrc, Depth + 1))
+ return I;
+
+ /// Propagate nnan-ness to simplify edge case checks.
+ if ((DemandedMask & fcNan) == fcNone)
+ KnownSrc.knownNot(fcNan);
+
+ // exp(+/-0) = 1
+ if (KnownSrc.isKnownAlways(fcZero))
+ return ConstantFP::get(VTy, 1.0);
+
+ // exp(0 | nan) => x == 0.0 ? 1.0 : x
+ if (KnownSrc.isKnownAlways(fcZero | fcNan)) {
+ IRBuilderBase::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(CI);
+
+ // fadd +/-0, 1.0 => 1.0
+ // fadd nan, 1.0 => nan
+ return Builder.CreateFAdd(CI->getArgOperand(0),
+ ConstantFP::get(VTy, 1.0));
+ }
+
+ if (KnownSrc.isKnownAlways(fcInf | fcNan)) {
+ // exp(-inf) = 0
+ // exp(+inf) = +inf
+ IRBuilderBase::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(CI);
+
+ // Note: Dropping canonicalize / quiet of signaling nan.
+ Value *X = CI->getArgOperand(0);
+ Value *IsPosInfOrNan =
+ Builder.CreateFCmpUEQ(X, ConstantFP::getInfinity(VTy));
+ return Builder.CreateSelect(IsPosInfOrNan, X, ConstantFP::getZero(VTy));
+ }
+
+ // Only perform nan propagation.
+ // Note: Dropping canonicalize / quiet of signaling nan.
+ if (KnownSrc.isKnownAlways(fcNan))
+ return CI->getArgOperand(0);
+
+ Known = KnownFPClass::exp(KnownSrc);
+ break;
+ }
case Intrinsic::canonicalize: {
Type *EltTy = VTy->getScalarType();
diff --git a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass-exp.ll b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass-exp.ll
index 7706d2de68f16..8311243e45580 100644
--- a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass-exp.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass-exp.ll
@@ -54,8 +54,7 @@ define nofpclass(nan) float @ret_nofpclass_nan__exp2_select_maybe_inf_or_not_nan
define nofpclass(nan) float @ret_nofpclass_nan__exp2_select_maybe_inf_or_nan(i1 %cond, float %maybe.nan, float nofpclass(inf zero sub norm) %only.nan) {
; CHECK-LABEL: define nofpclass(nan) float @ret_nofpclass_nan__exp2_select_maybe_inf_or_nan(
; CHECK-SAME: i1 [[COND:%.*]], float [[MAYBE_NAN:%.*]], float nofpclass(inf zero sub norm) [[ONLY_NAN:%.*]]) {
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[MAYBE_NAN]], float [[ONLY_NAN]]
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[SELECT]])
+; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MAYBE_NAN]])
; CHECK-NEXT: ret float [[EXP]]
;
%select = select i1 %cond, float %maybe.nan, float %only.nan
@@ -67,8 +66,7 @@ define nofpclass(nan) float @ret_nofpclass_nan__exp2_select_maybe_inf_or_nan(i1
define nofpclass(pinf zero psub pnorm) float @ret_nofpclass_no_positives__exp2(float %x) {
; CHECK-LABEL: define nofpclass(pinf zero psub pnorm) float @ret_nofpclass_no_positives__exp2(
; CHECK-SAME: float [[X:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[X]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float [[X]]
;
%exp = call float @llvm.exp2.f32(float %x)
ret float %exp
@@ -88,8 +86,7 @@ define nofpclass(nan pinf zero psub pnorm) float @ret_nofpclass_no_positives_no_
define nofpclass(pzero pinf psub pnorm) float @ret_nofpclass_no_positives_except_neg0__exp2(float %x) {
; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_nofpclass_no_positives_except_neg0__exp2(
; CHECK-SAME: float [[X:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[X]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float [[X]]
;
%exp = call float @llvm.exp2.f32(float %x)
ret float %exp
@@ -131,8 +128,7 @@ define nofpclass(pinf psub pnorm) float @ret_nofpclass_no_positives_except_0__ex
define nofpclass(nan) float @handle_exp(i1 %cond, float %maybe.nan, float nofpclass(inf zero sub norm) %only.nan) {
; CHECK-LABEL: define nofpclass(nan) float @handle_exp(
; CHECK-SAME: i1 [[COND:%.*]], float [[MAYBE_NAN:%.*]], float nofpclass(inf zero sub norm) [[ONLY_NAN:%.*]]) {
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[MAYBE_NAN]], float [[ONLY_NAN]]
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp.f32(float [[SELECT]])
+; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp.f32(float [[MAYBE_NAN]])
; CHECK-NEXT: ret float [[EXP]]
;
%select = select i1 %cond, float %maybe.nan, float %only.nan
@@ -144,8 +140,7 @@ define nofpclass(nan) float @handle_exp(i1 %cond, float %maybe.nan, float nofpcl
define nofpclass(nan) float @handle_exp10(i1 %cond, float %maybe.nan, float nofpclass(inf zero sub norm) %only.nan) {
; CHECK-LABEL: define nofpclass(nan) float @handle_exp10(
; CHECK-SAME: i1 [[COND:%.*]], float [[MAYBE_NAN:%.*]], float nofpclass(inf zero sub norm) [[ONLY_NAN:%.*]]) {
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[MAYBE_NAN]], float [[ONLY_NAN]]
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp10.f32(float [[SELECT]])
+; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp10.f32(float [[MAYBE_NAN]])
; CHECK-NEXT: ret float [[EXP]]
;
%select = select i1 %cond, float %maybe.nan, float %only.nan
@@ -158,7 +153,7 @@ define nofpclass(nan) float @handle_exp10(i1 %cond, float %maybe.nan, float nofp
define nofpclass(inf norm nan) float @ret_nofpclass_only_subzero__exp2_select_unknown_or_not_norm(i1 %cond, float %unknown, float nofpclass(norm) %not.norm) {
; CHECK-LABEL: define nofpclass(nan inf norm) float @ret_nofpclass_only_subzero__exp2_select_unknown_or_not_norm(
; CHECK-SAME: i1 [[COND:%.*]], float [[UNKNOWN:%.*]], float nofpclass(norm) [[NOT_NORM:%.*]]) {
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[UNKNOWN]], float [[NOT_NORM]]
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[UNKNOWN]], float 0xFFF0000000000000
; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[SELECT]])
; CHECK-NEXT: ret float [[EXP]]
;
@@ -170,8 +165,7 @@ define nofpclass(inf norm nan) float @ret_nofpclass_only_subzero__exp2_select_un
define nofpclass(inf norm nan zero) float @ret_nofpclass_only_sub__exp2_select_unknown_or_not_norm(i1 %cond, float %unknown, float nofpclass(norm) %not.norm) {
; CHECK-LABEL: define nofpclass(nan inf zero norm) float @ret_nofpclass_only_sub__exp2_select_unknown_or_not_norm(
; CHECK-SAME: i1 [[COND:%.*]], float [[UNKNOWN:%.*]], float nofpclass(norm) [[NOT_NORM:%.*]]) {
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[UNKNOWN]], float [[NOT_NORM]]
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[SELECT]])
+; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[UNKNOWN]])
; CHECK-NEXT: ret float [[EXP]]
;
%select = select i1 %cond, float %unknown, float %not.norm
@@ -195,8 +189,7 @@ define nofpclass(inf norm nan sub) float @ret_nofpclass_only_zero__exp2_select_u
define nofpclass(ninf norm zero sub) float @pinf_result_implies_pnorm_source(float nofpclass(pinf nan) %maybe.pnorm) {
; CHECK-LABEL: define nofpclass(ninf zero sub norm) float @pinf_result_implies_pnorm_source(
; CHECK-SAME: float nofpclass(nan pinf) [[MAYBE_PNORM:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MAYBE_PNORM]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float 0x7FF0000000000000
;
%exp = call float @llvm.exp2.f32(float %maybe.pnorm)
ret float %exp
@@ -215,8 +208,7 @@ define nofpclass(ninf norm zero sub) float @pinf_result_implies_pnorm_source_nan
define nofpclass(pinf norm zero sub) float @ninf_result_implies_poison(float nofpclass(ninf nan) %maybe.nnorm) {
; CHECK-LABEL: define nofpclass(pinf zero sub norm) float @ninf_result_implies_poison(
; CHECK-SAME: float nofpclass(nan ninf) [[MAYBE_NNORM:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MAYBE_NNORM]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float poison
;
%exp = call float @llvm.exp2.f32(float %maybe.nnorm)
ret float %exp
@@ -275,8 +267,7 @@ define nofpclass(inf norm nan zero) float @sub_result_implies_nnorm_source_valid
define nofpclass(inf norm nan zero) float @sub_result_implies_nsub_source_valid(float nofpclass(norm psub nan) %maybe.nsub) {
; CHECK-LABEL: define nofpclass(nan inf zero norm) float @sub_result_implies_nsub_source_valid(
; CHECK-SAME: float nofpclass(nan psub norm) [[MAYBE_NSUB:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MAYBE_NSUB]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float poison
;
%exp = call float @llvm.exp2.f32(float %maybe.nsub)
ret float %exp
@@ -323,8 +314,7 @@ define nofpclass(inf nnorm nan zero) float @pnorm_result_implies_possible_0_sour
define nofpclass(inf nnorm nan zero sub) float @pnorm_result_implies_possible_0_source_no_inf(float nofpclass(inf norm sub) %maybe.zero.or.nan) {
; CHECK-LABEL: define nofpclass(nan inf zero sub nnorm) float @pnorm_result_implies_possible_0_source_no_inf(
; CHECK-SAME: float nofpclass(inf sub norm) [[MAYBE_ZERO_OR_NAN:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MAYBE_ZERO_OR_NAN]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float 1.000000e+00
;
%exp = call float @llvm.exp2.f32(float %maybe.zero.or.nan)
ret float %exp
@@ -344,8 +334,7 @@ define nofpclass(inf nnorm nan zero sub) float @pnorm_result_implies_possible_su
define nofpclass(pzero) float @source_is_known_zero(float nofpclass(nan inf norm sub) %must.be.zero) {
; CHECK-LABEL: define nofpclass(pzero) float @source_is_known_zero(
; CHECK-SAME: float nofpclass(nan inf sub norm) [[MUST_BE_ZERO:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MUST_BE_ZERO]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float 1.000000e+00
;
%exp = call float @llvm.exp2.f32(float %must.be.zero)
ret float %exp
@@ -354,8 +343,7 @@ define nofpclass(pzero) float @source_is_known_zero(float nofpclass(nan inf norm
define nofpclass(pzero) <2 x float> @source_is_known_zero_vec(<2 x float> nofpclass(nan inf norm sub) %must.be.zero) {
; CHECK-LABEL: define nofpclass(pzero) <2 x float> @source_is_known_zero_vec(
; CHECK-SAME: <2 x float> nofpclass(nan inf sub norm) [[MUST_BE_ZERO:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[MUST_BE_ZERO]])
-; CHECK-NEXT: ret <2 x float> [[EXP]]
+; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00)
;
%exp = call <2 x float> @llvm.exp2.v2f32(<2 x float> %must.be.zero)
ret <2 x float> %exp
@@ -364,8 +352,7 @@ define nofpclass(pzero) <2 x float> @source_is_known_zero_vec(<2 x float> nofpcl
define nofpclass(pzero) float @source_is_known_pzero(float nofpclass(nan inf norm sub nzero) %must.be.pzero) {
; CHECK-LABEL: define nofpclass(pzero) float @source_is_known_pzero(
; CHECK-SAME: float nofpclass(nan inf nzero sub norm) [[MUST_BE_PZERO:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MUST_BE_PZERO]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float 1.000000e+00
;
%exp = call float @llvm.exp2.f32(float %must.be.pzero)
ret float %exp
@@ -374,8 +361,7 @@ define nofpclass(pzero) float @source_is_known_pzero(float nofpclass(nan inf nor
define nofpclass(pzero) float @source_is_known_nzero(float nofpclass(nan inf norm sub pzero) %must.be.nzero) {
; CHECK-LABEL: define nofpclass(pzero) float @source_is_known_nzero(
; CHECK-SAME: float nofpclass(nan inf pzero sub norm) [[MUST_BE_NZERO:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MUST_BE_NZERO]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float 1.000000e+00
;
%exp = call float @llvm.exp2.f32(float %must.be.nzero)
ret float %exp
@@ -384,7 +370,8 @@ define nofpclass(pzero) float @source_is_known_nzero(float nofpclass(nan inf nor
define nofpclass(nzero) float @source_is_known_inf(float nofpclass(nan norm sub zero) %must.be.inf) {
; CHECK-LABEL: define nofpclass(nzero) float @source_is_known_inf(
; CHECK-SAME: float nofpclass(nan zero sub norm) [[MUST_BE_INF:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MUST_BE_INF]])
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq float [[MUST_BE_INF]], 0x7FF0000000000000
+; CHECK-NEXT: [[EXP:%.*]] = select i1 [[TMP1]], float [[MUST_BE_INF]], float 0.000000e+00
; CHECK-NEXT: ret float [[EXP]]
;
%exp = call float @llvm.exp2.f32(float %must.be.inf)
@@ -394,7 +381,8 @@ define nofpclass(nzero) float @source_is_known_inf(float nofpclass(nan norm sub
define nofpclass(nzero) <2 x float> @source_is_known_inf_vec(<2 x float> nofpclass(nan norm sub zero) %must.be.inf) {
; CHECK-LABEL: define nofpclass(nzero) <2 x float> @source_is_known_inf_vec(
; CHECK-SAME: <2 x float> nofpclass(nan zero sub norm) [[MUST_BE_INF:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[MUST_BE_INF]])
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq <2 x float> [[MUST_BE_INF]], splat (float 0x7FF0000000000000)
+; CHECK-NEXT: [[EXP:%.*]] = select <2 x i1> [[TMP1]], <2 x float> [[MUST_BE_INF]], <2 x float> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[EXP]]
;
%exp = call <2 x float> @llvm.exp2.v2f32(<2 x float> %must.be.inf)
@@ -404,8 +392,7 @@ define nofpclass(nzero) <2 x float> @source_is_known_inf_vec(<2 x float> nofpcla
define nofpclass(nzero) float @source_is_known_pinf(float nofpclass(ninf nan norm sub zero) %must.be.pinf) {
; CHECK-LABEL: define nofpclass(nzero) float @source_is_known_pinf(
; CHECK-SAME: float nofpclass(nan ninf zero sub norm) [[MUST_BE_PINF:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MUST_BE_PINF]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float 0x7FF0000000000000
;
%exp = call float @llvm.exp2.f32(float %must.be.pinf)
ret float %exp
@@ -414,8 +401,7 @@ define nofpclass(nzero) float @source_is_known_pinf(float nofpclass(ninf nan nor
define nofpclass(nzero) float @source_is_known_ninf(float nofpclass(pinf nan norm sub zero) %must.be.ninf) {
; CHECK-LABEL: define nofpclass(nzero) float @source_is_known_ninf(
; CHECK-SAME: float nofpclass(nan pinf zero sub norm) [[MUST_BE_NINF:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MUST_BE_NINF]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float 0.000000e+00
;
%exp = call float @llvm.exp2.f32(float %must.be.ninf)
ret float %exp
@@ -424,8 +410,8 @@ define nofpclass(nzero) float @source_is_known_ninf(float nofpclass(pinf nan nor
define nofpclass(nzero) floa...
[truncated]
|
|
@llvm/pr-subscribers-llvm-analysis Author: Matt Arsenault (arsenm) ChangesInstCombine: Handle exp/exp2/exp10 in SimplifyDemandedFPClass I'm working on optimizing out the tail sequences in the Related to #64870 Make KnownFPClass::exp not side-effecting Patch is 25.27 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/173432.diff 5 Files Affected:
diff --git a/llvm/include/llvm/Support/KnownFPClass.h b/llvm/include/llvm/Support/KnownFPClass.h
index 7fe6197cb84aa..3f55ae5e08f9e 100644
--- a/llvm/include/llvm/Support/KnownFPClass.h
+++ b/llvm/include/llvm/Support/KnownFPClass.h
@@ -161,6 +161,9 @@ struct KnownFPClass {
canonicalize(const KnownFPClass &Src,
DenormalMode DenormMode = DenormalMode::getDynamic());
+ /// Report known values for exp, exp2 and exp10.
+ LLVM_ABI static KnownFPClass exp(const KnownFPClass &Src);
+
/// Return true if the sign bit must be 0, ignoring the sign of nans.
bool signBitIsZeroOrNaN() const { return isKnownNever(fcNegative); }
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 9a3d11eaa38c8..cb99ec0194d42 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -5345,38 +5345,15 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
case Intrinsic::exp2:
case Intrinsic::exp10:
case Intrinsic::amdgcn_exp2: {
- Known.knownNot(fcNegative);
-
- Type *EltTy = II->getType()->getScalarType();
- if (IID == Intrinsic::amdgcn_exp2 && EltTy->isFloatTy())
- Known.knownNot(fcSubnormal);
-
- if ((InterestedClasses & fcNan) == fcNone)
- break;
-
KnownFPClass KnownSrc;
computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
KnownSrc, Q, Depth + 1);
- if (KnownSrc.isKnownNeverNaN()) {
- Known.knownNot(fcNan);
- Known.signBitMustBeZero();
- }
- if (KnownSrc.cannotBeOrderedLessThanZero()) {
- // If the source is positive, and cannot be ~0, this cannot underflow.
- Known.knownNot(fcPosZero);
+ Known = KnownFPClass::exp(KnownSrc);
- // Cannot introduce new denormal values.
- if (KnownSrc.isKnownNever(fcPosSubnormal))
- Known.knownNot(fcPosSubnormal);
- }
-
- if (KnownSrc.cannotBeOrderedGreaterThanZero()) {
- // If the source is negative, and cannot be infinity, this cannot
- // overflow to infinity.
- if (KnownSrc.isKnownNeverPosInfinity())
- Known.knownNot(fcPosInf);
- }
+ Type *EltTy = II->getType()->getScalarType();
+ if (IID == Intrinsic::amdgcn_exp2 && EltTy->isFloatTy())
+ Known.knownNot(fcSubnormal);
break;
}
diff --git a/llvm/lib/Support/KnownFPClass.cpp b/llvm/lib/Support/KnownFPClass.cpp
index 556a3b165d80d..1676196d073b4 100644
--- a/llvm/lib/Support/KnownFPClass.cpp
+++ b/llvm/lib/Support/KnownFPClass.cpp
@@ -137,6 +137,34 @@ KnownFPClass KnownFPClass::canonicalize(const KnownFPClass &KnownSrc,
return Known;
}
+KnownFPClass KnownFPClass::exp(const KnownFPClass &KnownSrc) {
+ KnownFPClass Known;
+ Known.knownNot(fcNegative);
+
+ if (KnownSrc.isKnownNeverNaN()) {
+ Known.knownNot(fcNan);
+ Known.signBitMustBeZero();
+ }
+
+ if (KnownSrc.cannotBeOrderedLessThanZero()) {
+ // If the source is positive, and cannot be ~0, this cannot underflow.
+ Known.knownNot(fcPosZero);
+
+ // Cannot introduce new denormal values.
+ if (KnownSrc.isKnownNever(fcPosSubnormal))
+ Known.knownNot(fcPosSubnormal);
+ }
+
+ if (KnownSrc.cannotBeOrderedGreaterThanZero()) {
+ // If the source is negative, and cannot be infinity, this cannot
+ // overflow to infinity.
+ if (KnownSrc.isKnownNeverPosInfinity())
+ Known.knownNot(fcPosInf);
+ }
+
+ return Known;
+}
+
void KnownFPClass::propagateCanonicalizingSrc(const KnownFPClass &Src,
DenormalMode Mode) {
propagateDenormal(Src, Mode);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 579cbd07fbc0f..7fa7ecad33bb1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -2106,6 +2106,98 @@ Value *InstCombinerImpl::SimplifyDemandedUseFPClass(Value *V,
Known.copysign(KnownSign);
break;
}
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::exp10: {
+ if ((DemandedMask &
+ (fcPosZero | fcPosSubnormal | fcPosNormal | fcPosInf)) == fcNone) {
+ // Only returns positive values or nans.
+ if ((DemandedMask & fcNan) == fcNone)
+ return PoisonValue::get(VTy);
+
+ // Only need nan propagation.
+ // Note: Dropping snan quieting.
+ return CI->getArgOperand(0);
+ }
+
+ FPClassTest SrcDemandedMask = DemandedMask & fcNan;
+
+ if (DemandedMask & fcZero) {
+ // exp(-infinity) = 0
+ SrcDemandedMask |= fcNegInf;
+
+ // exp(-largest_normal) = 0
+ //
+ // Negative numbers of sufficiently large magnitude underflow to 0. No
+ // subnormal input has a 0 result.
+ SrcDemandedMask |= fcNegNormal;
+ }
+
+ if (DemandedMask & fcPosSubnormal) {
+ // Negative numbers of sufficiently large magnitude underflow to 0. No
+ // subnormal input has a 0 result.
+ SrcDemandedMask |= fcNegNormal;
+ }
+
+ if (DemandedMask & fcPosNormal) {
+ // exp(0) = 1
+ // exp(+/- smallest_normal) = 1
+ // exp(+/- largest_denormal) = 1
+ // exp(+/- smallest_denormal) = 1
+ SrcDemandedMask |= fcPosNormal | fcSubnormal | fcZero;
+ }
+
+ // exp(inf), exp(largest_normal) = inf
+ if (DemandedMask & fcPosInf)
+ SrcDemandedMask |= fcPosInf | fcPosNormal;
+
+ KnownFPClass KnownSrc;
+
+ // TODO: This could really make use of KnownFPClass of specific value
+ // range, (i.e., close enough to 1)
+ if (SimplifyDemandedFPClass(I, 0, SrcDemandedMask, KnownSrc, Depth + 1))
+ return I;
+
+ /// Propagate nnan-ness to simplify edge case checks.
+ if ((DemandedMask & fcNan) == fcNone)
+ KnownSrc.knownNot(fcNan);
+
+ // exp(+/-0) = 1
+ if (KnownSrc.isKnownAlways(fcZero))
+ return ConstantFP::get(VTy, 1.0);
+
+ // exp(0 | nan) => x == 0.0 ? 1.0 : x
+ if (KnownSrc.isKnownAlways(fcZero | fcNan)) {
+ IRBuilderBase::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(CI);
+
+ // fadd +/-0, 1.0 => 1.0
+ // fadd nan, 1.0 => nan
+ return Builder.CreateFAdd(CI->getArgOperand(0),
+ ConstantFP::get(VTy, 1.0));
+ }
+
+ if (KnownSrc.isKnownAlways(fcInf | fcNan)) {
+ // exp(-inf) = 0
+ // exp(+inf) = +inf
+ IRBuilderBase::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(CI);
+
+ // Note: Dropping canonicalize / quiet of signaling nan.
+ Value *X = CI->getArgOperand(0);
+ Value *IsPosInfOrNan =
+ Builder.CreateFCmpUEQ(X, ConstantFP::getInfinity(VTy));
+ return Builder.CreateSelect(IsPosInfOrNan, X, ConstantFP::getZero(VTy));
+ }
+
+ // Only perform nan propagation.
+ // Note: Dropping canonicalize / quiet of signaling nan.
+ if (KnownSrc.isKnownAlways(fcNan))
+ return CI->getArgOperand(0);
+
+ Known = KnownFPClass::exp(KnownSrc);
+ break;
+ }
case Intrinsic::canonicalize: {
Type *EltTy = VTy->getScalarType();
diff --git a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass-exp.ll b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass-exp.ll
index 7706d2de68f16..8311243e45580 100644
--- a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass-exp.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass-exp.ll
@@ -54,8 +54,7 @@ define nofpclass(nan) float @ret_nofpclass_nan__exp2_select_maybe_inf_or_not_nan
define nofpclass(nan) float @ret_nofpclass_nan__exp2_select_maybe_inf_or_nan(i1 %cond, float %maybe.nan, float nofpclass(inf zero sub norm) %only.nan) {
; CHECK-LABEL: define nofpclass(nan) float @ret_nofpclass_nan__exp2_select_maybe_inf_or_nan(
; CHECK-SAME: i1 [[COND:%.*]], float [[MAYBE_NAN:%.*]], float nofpclass(inf zero sub norm) [[ONLY_NAN:%.*]]) {
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[MAYBE_NAN]], float [[ONLY_NAN]]
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[SELECT]])
+; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MAYBE_NAN]])
; CHECK-NEXT: ret float [[EXP]]
;
%select = select i1 %cond, float %maybe.nan, float %only.nan
@@ -67,8 +66,7 @@ define nofpclass(nan) float @ret_nofpclass_nan__exp2_select_maybe_inf_or_nan(i1
define nofpclass(pinf zero psub pnorm) float @ret_nofpclass_no_positives__exp2(float %x) {
; CHECK-LABEL: define nofpclass(pinf zero psub pnorm) float @ret_nofpclass_no_positives__exp2(
; CHECK-SAME: float [[X:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[X]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float [[X]]
;
%exp = call float @llvm.exp2.f32(float %x)
ret float %exp
@@ -88,8 +86,7 @@ define nofpclass(nan pinf zero psub pnorm) float @ret_nofpclass_no_positives_no_
define nofpclass(pzero pinf psub pnorm) float @ret_nofpclass_no_positives_except_neg0__exp2(float %x) {
; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_nofpclass_no_positives_except_neg0__exp2(
; CHECK-SAME: float [[X:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[X]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float [[X]]
;
%exp = call float @llvm.exp2.f32(float %x)
ret float %exp
@@ -131,8 +128,7 @@ define nofpclass(pinf psub pnorm) float @ret_nofpclass_no_positives_except_0__ex
define nofpclass(nan) float @handle_exp(i1 %cond, float %maybe.nan, float nofpclass(inf zero sub norm) %only.nan) {
; CHECK-LABEL: define nofpclass(nan) float @handle_exp(
; CHECK-SAME: i1 [[COND:%.*]], float [[MAYBE_NAN:%.*]], float nofpclass(inf zero sub norm) [[ONLY_NAN:%.*]]) {
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[MAYBE_NAN]], float [[ONLY_NAN]]
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp.f32(float [[SELECT]])
+; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp.f32(float [[MAYBE_NAN]])
; CHECK-NEXT: ret float [[EXP]]
;
%select = select i1 %cond, float %maybe.nan, float %only.nan
@@ -144,8 +140,7 @@ define nofpclass(nan) float @handle_exp(i1 %cond, float %maybe.nan, float nofpcl
define nofpclass(nan) float @handle_exp10(i1 %cond, float %maybe.nan, float nofpclass(inf zero sub norm) %only.nan) {
; CHECK-LABEL: define nofpclass(nan) float @handle_exp10(
; CHECK-SAME: i1 [[COND:%.*]], float [[MAYBE_NAN:%.*]], float nofpclass(inf zero sub norm) [[ONLY_NAN:%.*]]) {
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[MAYBE_NAN]], float [[ONLY_NAN]]
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp10.f32(float [[SELECT]])
+; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp10.f32(float [[MAYBE_NAN]])
; CHECK-NEXT: ret float [[EXP]]
;
%select = select i1 %cond, float %maybe.nan, float %only.nan
@@ -158,7 +153,7 @@ define nofpclass(nan) float @handle_exp10(i1 %cond, float %maybe.nan, float nofp
define nofpclass(inf norm nan) float @ret_nofpclass_only_subzero__exp2_select_unknown_or_not_norm(i1 %cond, float %unknown, float nofpclass(norm) %not.norm) {
; CHECK-LABEL: define nofpclass(nan inf norm) float @ret_nofpclass_only_subzero__exp2_select_unknown_or_not_norm(
; CHECK-SAME: i1 [[COND:%.*]], float [[UNKNOWN:%.*]], float nofpclass(norm) [[NOT_NORM:%.*]]) {
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[UNKNOWN]], float [[NOT_NORM]]
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[UNKNOWN]], float 0xFFF0000000000000
; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[SELECT]])
; CHECK-NEXT: ret float [[EXP]]
;
@@ -170,8 +165,7 @@ define nofpclass(inf norm nan) float @ret_nofpclass_only_subzero__exp2_select_un
define nofpclass(inf norm nan zero) float @ret_nofpclass_only_sub__exp2_select_unknown_or_not_norm(i1 %cond, float %unknown, float nofpclass(norm) %not.norm) {
; CHECK-LABEL: define nofpclass(nan inf zero norm) float @ret_nofpclass_only_sub__exp2_select_unknown_or_not_norm(
; CHECK-SAME: i1 [[COND:%.*]], float [[UNKNOWN:%.*]], float nofpclass(norm) [[NOT_NORM:%.*]]) {
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[UNKNOWN]], float [[NOT_NORM]]
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[SELECT]])
+; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[UNKNOWN]])
; CHECK-NEXT: ret float [[EXP]]
;
%select = select i1 %cond, float %unknown, float %not.norm
@@ -195,8 +189,7 @@ define nofpclass(inf norm nan sub) float @ret_nofpclass_only_zero__exp2_select_u
define nofpclass(ninf norm zero sub) float @pinf_result_implies_pnorm_source(float nofpclass(pinf nan) %maybe.pnorm) {
; CHECK-LABEL: define nofpclass(ninf zero sub norm) float @pinf_result_implies_pnorm_source(
; CHECK-SAME: float nofpclass(nan pinf) [[MAYBE_PNORM:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MAYBE_PNORM]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float 0x7FF0000000000000
;
%exp = call float @llvm.exp2.f32(float %maybe.pnorm)
ret float %exp
@@ -215,8 +208,7 @@ define nofpclass(ninf norm zero sub) float @pinf_result_implies_pnorm_source_nan
define nofpclass(pinf norm zero sub) float @ninf_result_implies_poison(float nofpclass(ninf nan) %maybe.nnorm) {
; CHECK-LABEL: define nofpclass(pinf zero sub norm) float @ninf_result_implies_poison(
; CHECK-SAME: float nofpclass(nan ninf) [[MAYBE_NNORM:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MAYBE_NNORM]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float poison
;
%exp = call float @llvm.exp2.f32(float %maybe.nnorm)
ret float %exp
@@ -275,8 +267,7 @@ define nofpclass(inf norm nan zero) float @sub_result_implies_nnorm_source_valid
define nofpclass(inf norm nan zero) float @sub_result_implies_nsub_source_valid(float nofpclass(norm psub nan) %maybe.nsub) {
; CHECK-LABEL: define nofpclass(nan inf zero norm) float @sub_result_implies_nsub_source_valid(
; CHECK-SAME: float nofpclass(nan psub norm) [[MAYBE_NSUB:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MAYBE_NSUB]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float poison
;
%exp = call float @llvm.exp2.f32(float %maybe.nsub)
ret float %exp
@@ -323,8 +314,7 @@ define nofpclass(inf nnorm nan zero) float @pnorm_result_implies_possible_0_sour
define nofpclass(inf nnorm nan zero sub) float @pnorm_result_implies_possible_0_source_no_inf(float nofpclass(inf norm sub) %maybe.zero.or.nan) {
; CHECK-LABEL: define nofpclass(nan inf zero sub nnorm) float @pnorm_result_implies_possible_0_source_no_inf(
; CHECK-SAME: float nofpclass(inf sub norm) [[MAYBE_ZERO_OR_NAN:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MAYBE_ZERO_OR_NAN]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float 1.000000e+00
;
%exp = call float @llvm.exp2.f32(float %maybe.zero.or.nan)
ret float %exp
@@ -344,8 +334,7 @@ define nofpclass(inf nnorm nan zero sub) float @pnorm_result_implies_possible_su
define nofpclass(pzero) float @source_is_known_zero(float nofpclass(nan inf norm sub) %must.be.zero) {
; CHECK-LABEL: define nofpclass(pzero) float @source_is_known_zero(
; CHECK-SAME: float nofpclass(nan inf sub norm) [[MUST_BE_ZERO:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MUST_BE_ZERO]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float 1.000000e+00
;
%exp = call float @llvm.exp2.f32(float %must.be.zero)
ret float %exp
@@ -354,8 +343,7 @@ define nofpclass(pzero) float @source_is_known_zero(float nofpclass(nan inf norm
define nofpclass(pzero) <2 x float> @source_is_known_zero_vec(<2 x float> nofpclass(nan inf norm sub) %must.be.zero) {
; CHECK-LABEL: define nofpclass(pzero) <2 x float> @source_is_known_zero_vec(
; CHECK-SAME: <2 x float> nofpclass(nan inf sub norm) [[MUST_BE_ZERO:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[MUST_BE_ZERO]])
-; CHECK-NEXT: ret <2 x float> [[EXP]]
+; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00)
;
%exp = call <2 x float> @llvm.exp2.v2f32(<2 x float> %must.be.zero)
ret <2 x float> %exp
@@ -364,8 +352,7 @@ define nofpclass(pzero) <2 x float> @source_is_known_zero_vec(<2 x float> nofpcl
define nofpclass(pzero) float @source_is_known_pzero(float nofpclass(nan inf norm sub nzero) %must.be.pzero) {
; CHECK-LABEL: define nofpclass(pzero) float @source_is_known_pzero(
; CHECK-SAME: float nofpclass(nan inf nzero sub norm) [[MUST_BE_PZERO:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MUST_BE_PZERO]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float 1.000000e+00
;
%exp = call float @llvm.exp2.f32(float %must.be.pzero)
ret float %exp
@@ -374,8 +361,7 @@ define nofpclass(pzero) float @source_is_known_pzero(float nofpclass(nan inf nor
define nofpclass(pzero) float @source_is_known_nzero(float nofpclass(nan inf norm sub pzero) %must.be.nzero) {
; CHECK-LABEL: define nofpclass(pzero) float @source_is_known_nzero(
; CHECK-SAME: float nofpclass(nan inf pzero sub norm) [[MUST_BE_NZERO:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MUST_BE_NZERO]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float 1.000000e+00
;
%exp = call float @llvm.exp2.f32(float %must.be.nzero)
ret float %exp
@@ -384,7 +370,8 @@ define nofpclass(pzero) float @source_is_known_nzero(float nofpclass(nan inf nor
define nofpclass(nzero) float @source_is_known_inf(float nofpclass(nan norm sub zero) %must.be.inf) {
; CHECK-LABEL: define nofpclass(nzero) float @source_is_known_inf(
; CHECK-SAME: float nofpclass(nan zero sub norm) [[MUST_BE_INF:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MUST_BE_INF]])
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq float [[MUST_BE_INF]], 0x7FF0000000000000
+; CHECK-NEXT: [[EXP:%.*]] = select i1 [[TMP1]], float [[MUST_BE_INF]], float 0.000000e+00
; CHECK-NEXT: ret float [[EXP]]
;
%exp = call float @llvm.exp2.f32(float %must.be.inf)
@@ -394,7 +381,8 @@ define nofpclass(nzero) float @source_is_known_inf(float nofpclass(nan norm sub
define nofpclass(nzero) <2 x float> @source_is_known_inf_vec(<2 x float> nofpclass(nan norm sub zero) %must.be.inf) {
; CHECK-LABEL: define nofpclass(nzero) <2 x float> @source_is_known_inf_vec(
; CHECK-SAME: <2 x float> nofpclass(nan zero sub norm) [[MUST_BE_INF:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[MUST_BE_INF]])
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq <2 x float> [[MUST_BE_INF]], splat (float 0x7FF0000000000000)
+; CHECK-NEXT: [[EXP:%.*]] = select <2 x i1> [[TMP1]], <2 x float> [[MUST_BE_INF]], <2 x float> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[EXP]]
;
%exp = call <2 x float> @llvm.exp2.v2f32(<2 x float> %must.be.inf)
@@ -404,8 +392,7 @@ define nofpclass(nzero) <2 x float> @source_is_known_inf_vec(<2 x float> nofpcla
define nofpclass(nzero) float @source_is_known_pinf(float nofpclass(ninf nan norm sub zero) %must.be.pinf) {
; CHECK-LABEL: define nofpclass(nzero) float @source_is_known_pinf(
; CHECK-SAME: float nofpclass(nan ninf zero sub norm) [[MUST_BE_PINF:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MUST_BE_PINF]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float 0x7FF0000000000000
;
%exp = call float @llvm.exp2.f32(float %must.be.pinf)
ret float %exp
@@ -414,8 +401,7 @@ define nofpclass(nzero) float @source_is_known_pinf(float nofpclass(ninf nan nor
define nofpclass(nzero) float @source_is_known_ninf(float nofpclass(pinf nan norm sub zero) %must.be.ninf) {
; CHECK-LABEL: define nofpclass(nzero) float @source_is_known_ninf(
; CHECK-SAME: float nofpclass(nan pinf zero sub norm) [[MUST_BE_NINF:%.*]]) {
-; CHECK-NEXT: [[EXP:%.*]] = call float @llvm.exp2.f32(float [[MUST_BE_NINF]])
-; CHECK-NEXT: ret float [[EXP]]
+; CHECK-NEXT: ret float 0.000000e+00
;
%exp = call float @llvm.exp2.f32(float %must.be.ninf)
ret float %exp
@@ -424,8 +410,8 @@ define nofpclass(nzero) float @source_is_known_ninf(float nofpclass(pinf nan nor
define nofpclass(nzero) floa...
[truncated]
|
🪟 Windows x64 Test Results
✅ The build succeeded and all tests passed. |
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
5ae6964 to
c585322
Compare
5bc2f96 to
80a4be8
Compare
c585322 to
54006df
Compare
llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Outdated
Show resolved
Hide resolved
54006df to
6fdbe25
Compare
80a4be8 to
0854e1a
Compare
0854e1a to
373b0b5
Compare
6fdbe25 to
9e86920
Compare
I'm working on optimizing out the tail sequences in the implementations of the 4 different flavors of pow. These include chains of selects on the various edge cases. Related to #64870
9e86920 to
6e161f1
Compare
| // exp(+/- smallest_normal) = 1 | ||
| // exp(+/- largest_denormal) = 1 | ||
| // exp(+/- smallest_denormal) = 1 | ||
| SrcDemandedMask |= fcPosNormal | fcSubnormal | fcZero; |
There was a problem hiding this comment.
| SrcDemandedMask |= fcPosNormal | fcSubnormal | fcZero; | |
| SrcDemandedMask |= fcNormal | fcSubnormal | fcZero; |
E.g. -1 is fcNegNormal, exp(-1) is 0.37 which is fcPosNormal.
| // exp(-inf) = 0 | ||
| // exp(+inf) = +inf |
There was a problem hiding this comment.
Could you implement this as max(x, 0)? Or is it too hard to get the right NaN handling?
There was a problem hiding this comment.
That would have to be maximum, not maxnum, to propagate nan, which is going to codegen worse most of the time
…73432) I'm working on optimizing out the tail sequences in the implementations of the 4 different flavors of pow. These include chains of selects on the various edge cases. Related to llvm#64870

I'm working on optimizing out the tail sequences in the
implementations of the 4 different flavors of pow. These
include chains of selects on the various edge cases.
Related to #64870