[BOLT][AArch64]support inline-small-functions for AArch64#120187
Merged
[BOLT][AArch64]support inline-small-functions for AArch64#120187
inline-small-functions for AArch64#120187Conversation
inline-small-functions for AArch64inline-small-functions for AArch64
Member
|
@llvm/pr-subscribers-bolt Author: Nicholas (liusy58) ChangesAdd some functions in Full diff: https://github.com/llvm/llvm-project/pull/120187.diff 3 Files Affected:
diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
index f004a8eeea185b..1793f4ff1f1480 100644
--- a/bolt/lib/Passes/Inliner.cpp
+++ b/bolt/lib/Passes/Inliner.cpp
@@ -310,13 +310,13 @@ Inliner::inlineCall(BinaryBasicBlock &CallerBB,
if (MIB.isPseudo(Inst))
continue;
- MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86());
+ MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86() || BC.isAArch64());
// Fix branch target. Strictly speaking, we don't have to do this as
// targets of direct branches will be fixed later and don't matter
// in the CFG state. However, disassembly may look misleading, and
// hence we do the fixing.
- if (MIB.isBranch(Inst)) {
+ if (MIB.isBranch(Inst) && !MIB.isTailCall(Inst)) {
assert(!MIB.isIndirectBranch(Inst) &&
"unexpected indirect branch in callee");
const BinaryBasicBlock *TargetBB =
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 7e08e5c81d26ff..0722b8ae0cb2c9 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -133,6 +133,34 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
public:
using MCPlusBuilder::MCPlusBuilder;
+ MCPhysReg getStackPointer() const override { return AArch64::SP; }
+
+ bool isPush(const MCInst &Inst) const override { return false; }
+
+ bool isPop(const MCInst &Inst) const override { return false; }
+
+ void createCall(MCInst &Inst, const MCSymbol *Target,
+ MCContext *Ctx) override {
+ createDirectCall(Inst, Target, Ctx, false);
+ }
+
+ bool convertTailCallToCall(MCInst &Inst) override {
+ int NewOpcode;
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case AArch64::B:
+ NewOpcode = AArch64::BL;
+ break;
+ case AArch64::BR:
+ NewOpcode = AArch64::BLR;
+ break;
+ }
+ Inst.setOpcode(NewOpcode);
+ removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall);
+ return true;
+ }
+
bool equals(const MCTargetExpr &A, const MCTargetExpr &B,
CompFuncTy Comp) const override {
const auto &AArch64ExprA = cast<AArch64MCExpr>(A);
diff --git a/bolt/test/AArch64/inline-test.s b/bolt/test/AArch64/inline-test.s
new file mode 100644
index 00000000000000..ec33f735163899
--- /dev/null
+++ b/bolt/test/AArch64/inline-test.s
@@ -0,0 +1,57 @@
+# This test checks that inline is properly handled by BOLT on aarch64.
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-linux-gnu %s -o %t.o
+# RUN: %clang --target=aarch64-unknown-linux %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt --inline-small-functions --print-inline --print-only=_Z3barP1A -debug-only=bolt-inliner %t.exe -o %t.bolt | FileCheck %s
+
+# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 4 bytes.
+# CHECK: Binary Function "_Z3barP1A" after inlining {
+# CHECK-NOT: bl _Z3fooP1A
+# CHECK: ldr x8, [x0]
+# CHECK-NEXT: ldr w0, [x8]
+
+ .text
+ .globl _Z3fooP1A // -- Begin function _Z3fooP1A
+ .p2align 2
+ .type _Z3fooP1A,@function
+_Z3fooP1A: // @_Z3fooP1A
+ .cfi_startproc
+ ldr x8, [x0]
+ ldr w0, [x8]
+ ret
+.Lfunc_end0:
+ .size _Z3fooP1A, .Lfunc_end0-_Z3fooP1A
+ .cfi_endproc
+ .globl _Z3barP1A // -- Begin function _Z3barP1A
+ .p2align 2
+ .type _Z3barP1A,@function
+_Z3barP1A: // @_Z3barP1A
+ .cfi_startproc
+ stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+ .cfi_def_cfa_offset 16
+ mov x29, sp
+ .cfi_def_cfa w29, 16
+ .cfi_offset w30, -8
+ .cfi_offset w29, -16
+ bl _Z3fooP1A
+ mul w0, w0, w0
+ .cfi_def_cfa wsp, 16
+ ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+ .cfi_def_cfa_offset 0
+ .cfi_restore w30
+ .cfi_restore w29
+ ret
+.Lfunc_end1:
+ .size _Z3barP1A, .Lfunc_end1-_Z3barP1A
+ .cfi_endproc
+ .globl main // -- Begin function main
+ .p2align 2
+ .type main,@function
+main: // @main
+ .cfi_startproc
+ mov w0, wzr
+ ret
+.Lfunc_end2:
+ .size main, .Lfunc_end2-main
+ .cfi_endproc
+ .section ".note.GNU-stack","",@progbits
+ .addrsig
\ No newline at end of file
|
efce59a to
d8acd28
Compare
aaupov
reviewed
Dec 17, 2024
Contributor
aaupov
left a comment
There was a problem hiding this comment.
Looks good overall, but please address comments/suggestions
| # RUN: llvm-bolt --inline-small-functions --print-inline --print-only=_Z3barP1A \ | ||
| # RUN: %t.exe -o %t.bolt | FileCheck %s | ||
|
|
||
| # CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 4 bytes. |
Contributor
There was a problem hiding this comment.
Why does it report inlined 0 calls but inlining is actually in effect?
Contributor
There was a problem hiding this comment.
"inlined 0 calls" here due to lack of fdata file
Comment on lines
+23
to
+24
| .Lfunc_end0: | ||
| .size _Z3fooP1A, .Lfunc_end0-_Z3fooP1A |
Contributor
There was a problem hiding this comment.
Suggested change
| .Lfunc_end0: | |
| .size _Z3fooP1A, .Lfunc_end0-_Z3fooP1A | |
| .size _Z3fooP1A, .-_Z3fooP1A |
Comment on lines
+47
to
+48
| .section ".note.GNU-stack","",@progbits | ||
| .addrsig No newline at end of file |
Contributor
There was a problem hiding this comment.
Suggested change
| .section ".note.GNU-stack","",@progbits | |
| .addrsig |
yota9
reviewed
Dec 17, 2024
| @@ -0,0 +1,48 @@ | |||
| ## This test checks that inline is properly handled by BOLT on aarch64. | |||
|
|
|||
| # REQUIRES: system-linux, asserts | |||
d8acd28 to
766cd23
Compare
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
3b6eabf to
3bb6893
Compare
yavtuk
reviewed
Dec 18, 2024
yavtuk
reviewed
Dec 18, 2024
yavtuk
reviewed
Dec 26, 2024
3bb6893 to
aa2bb22
Compare
yavtuk
approved these changes
Jan 16, 2025
liusy58
pushed a commit
that referenced
this pull request
Jan 20, 2025
This functionality is needed for inliner pass and also for correct dyno stats. Needed for [PR](#120187)
github-actions bot
pushed a commit
to arm/arm-toolchain
that referenced
this pull request
Jan 20, 2025
This functionality is needed for inliner pass and also for correct dyno stats. Needed for [PR](llvm/llvm-project#120187)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Add some functions in
AArch64MCPlusBuilder.cppto support inline for AArch64.