Skip to content

[bolt][aarch64] Change indirect call instrumentation snippet#180229

Merged
yavtuk merged 1 commit intollvm:mainfrom
yavtuk:fix/indirect-call-instr-snippet
Feb 16, 2026
Merged

[bolt][aarch64] Change indirect call instrumentation snippet#180229
yavtuk merged 1 commit intollvm:mainfrom
yavtuk:fix/indirect-call-instr-snippet

Conversation

@yavtuk
Copy link
Contributor

@yavtuk yavtuk commented Feb 6, 2026

Indirect call instrumentation snippet uses x16 register in exit handler to go to destination target

__bolt_instr_ind_call_handler_func:
        msr  nzcv, x1
        ldp  x0, x1, [sp], #16
        ldr  x16, [sp], #16
        ldp  x0, x1, [sp], #16
        br   x16	<-----

This patch adds the instrumentation snippet by calling instrumentation runtime library through indirect call instruction and adding the wrapper to store/load target value and the register for original indirect instruction.

Example:
mov x16, foo

infirectCall:
        adrp x8, Label
        add  x8, x8, #:lo12:Label
        blr x8

Before:

Instrumented indirect call:
        stp     x0, x1, [sp, #-16]!
        mov     x0, x8
        movk    x1, #0x0, lsl #48
        movk    x1, #0x0, lsl #32
        movk    x1, #0x0, lsl #16
        movk    x1, #0x0
        stp     x0, x1, [sp, #-16]!
        adrp    x0, __bolt_instr_ind_call_handler_func
        add     x0, x0, #:lo12:__bolt_instr_ind_call_handler_func
        blr     x0

__bolt_instr_ind_call_handler:  (exit snippet)
        msr     nzcv, x1
        ldp     x0, x1, [sp], #16
        ldr     x16, [sp], #16
        ldp     x0, x1, [sp], #16
        br      x16    <- overwrites the original value in X16

__bolt_instr_ind_call_handler_func:  (entry snippet)
        stp     x0, x1, [sp, #-16]!
        mrs     x1, nzcv
        adrp    x0, __bolt_instr_ind_call_handler
        add     x0, x0, x0, #:lo12:__bolt_instr_ind_call_handler
        ldr     x0, [x0]
        cmp     x0, #0x0
        b.eq    __bolt_instr_ind_call_handler
        str     x30, [sp, #-16]!
        blr     x0     <--- runtime lib store/load all regs
        ldr     x30, [sp], #16
        b       __bolt_instr_ind_call_handler

After:

        mov     x16, foo
infirectCall:
        adrp    x8, Label
        add     x8, x8, #:lo12:Label
        blr     x8

Instrumented indirect call:
        stp     x0, x30, [sp, #-16]!
        mov     x0, callsiteid
        stp    x8, x0, [sp, #-16]!
        adrp    x8, __bolt_instr_ind_call_handler_func
        add     x8, x8, #:lo12:__bolt_instr_ind_call_handler_func
        blr     x8       <--- call trampoline instr lib
        ldr     x8, [sp], #16
        ldp     x0, x30, [sp], #16
        blr     x8       <--- original indirect call instruction

// don't touch regs besides x0, x1
__bolt_instr_ind_call_handler:  (exit snippet)
        ret     <---- return to original function with indirect call

__bolt_instr_ind_call_handler_func: (entry snippet)
        adrp    x0, __bolt_instr_ind_call_handler
        add     x0, x0, #:lo12:__bolt_instr_ind_call_handler
        ldr     x0, [x0]
        cmp     x0, #0x0
        b.eq    __bolt_instr_ind_call_handler
        str     x30, [sp, #-16]!
        blr     x0     <--- runtime lib store/load all regs
        ldr     x30, [sp], #16
        b       __bolt_instr_ind_call_handler

@llvmbot
Copy link
Member

llvmbot commented Feb 6, 2026

@llvm/pr-subscribers-backend-risc-v

@llvm/pr-subscribers-bolt

Author: Alexey Moksyakov (yavtuk)

Changes

Indirect call instrumentation snippet uses x16 register in exit handler to go to destination target

__bolt_instr_ind_call_handler_func:
        msr  nzcv, x1
        ldp  x0, x1, [sp], #<!-- -->16
        ldr  x16, [sp], #<!-- -->16
        ldp  x0, x1, [sp], #<!-- -->16
        br   x16	&lt;-----

This patch adds the instrumentation snippet by calling instrumentation runtime library through indirect call instruction and adding the wrapper to store/load target value and the register for original indirect instruction.

Example:
mov x16, foo

infirectCall:
        adrp x8, Label
        add  x8, x8, #:lo12:Label
        blr x8

Before:

Instrumented indirect call:
        stp     x0, x1, [sp, #-16]!
        mov     x0, x8
        movk    x1, #<!-- -->0x0, lsl #<!-- -->48
        movk    x1, #<!-- -->0x0, lsl #<!-- -->32
        movk    x1, #<!-- -->0x0, lsl #<!-- -->16
        movk    x1, #<!-- -->0x0
        stp     x0, x1, [sp, #-16]!
        adrp    x0, __bolt_instr_ind_call_handler_func
        add     x0, x0, #:lo12:__bolt_instr_ind_call_handler_func
        blr     x0

__bolt_instr_ind_call_handler:  (exit snippet)
        msr     nzcv, x1
        ldp     x0, x1, [sp], #<!-- -->16
        ldr     x16, [sp], #<!-- -->16
        ldp     x0, x1, [sp], #<!-- -->16
        br      x16    &lt;- overwrites the original value in X16

__bolt_instr_ind_call_handler_func:  (entry snippet)
        stp     x0, x1, [sp, #-16]!
        mrs     x1, nzcv
        adrp    x0, __bolt_instr_ind_call_handler
        add     x0, x0, x0, #:lo12:__bolt_instr_ind_call_handler
        ldr     x0, [x0]
        cmp     x0, #<!-- -->0x0
        b.eq    __bolt_instr_ind_call_handler
        str     x30, [sp, #-16]!
        blr     x0     &lt;--- runtime lib store/load all regs
        ldr     x30, [sp], #<!-- -->16
        b       __bolt_instr_ind_call_handler

After:

        mov     x16, foo
infirectCall:
        adrp    x8, Label
        add     x8, x8, #:lo12:Label
        blr     x8

Instrumented indirect call:
        stp     x0, x1, [sp, #-16]!
        mov     x0, x8
        movk    x1, #<!-- -->0x0, lsl #<!-- -->48
        movk    x1, #<!-- -->0x0, lsl #<!-- -->32
        movk    x1, #<!-- -->0x0, lsl #<!-- -->16
        movk    x1, #<!-- -->0x0
        stp     x0, x1, [sp, #-16]!
        str     x30, [sp, #-16]!
        adrp    x8, __bolt_instr_ind_call_handler_func
        add     x8, x8, #:lo12:__bolt_instr_ind_call_handler_func
        blr     x8       &lt;--- call trampoline instr lib
        ldr     x30, [sp], #<!-- -->16
        ldp     x0, x1, [sp], #<!-- -->16
        mov     x8, x0   &lt;---- restore original target
        ldp     x0, x1, [sp], #<!-- -->16
        blr     x8       &lt;--- original indirect call instruction

// don't touch regs besides x0, x1
__bolt_instr_ind_call_handler:  (exit snippet)
        ret     &lt;---- return to original function with indirect call

__bolt_instr_ind_call_handler_func: (entry snippet)
        adrp    x0, __bolt_instr_ind_call_handler
        add     x0, x0, #:lo12:__bolt_instr_ind_call_handler
        ldr     x0, [x0]
        cmp     x0, #<!-- -->0x0
        b.eq    __bolt_instr_ind_call_handler
        str     x30, [sp, #-16]!
        blr     x0     &lt;--- runtime lib store/load all regs
        ldr     x30, [sp], #<!-- -->16
        b       __bolt_instr_ind_call_handler

Full diff: https://github.com/llvm/llvm-project/pull/180229.diff

6 Files Affected:

  • (modified) bolt/include/bolt/Core/MCPlusBuilder.h (+5)
  • (modified) bolt/lib/Passes/Instrumentation.cpp (+6-3)
  • (modified) bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp (+95-55)
  • (modified) bolt/runtime/instr.cpp (+6)
  • (modified) bolt/runtime/sys_aarch64.h (+4-2)
  • (modified) bolt/test/runtime/AArch64/instrumentation-ind-call.c (+57-1)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index e571e91d85135..595e6d66da90e 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -545,6 +545,11 @@ class MCPlusBuilder {
     llvm_unreachable("not implemented");
   }
 
+  virtual void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
+                                  MCContext *Ctx) {
+    llvm_unreachable("not implemented");
+  }
+
   virtual MCPhysReg getX86R11() const { llvm_unreachable("not implemented"); }
 
   virtual unsigned getShortBranchOpcode(unsigned Opcode) const {
diff --git a/bolt/lib/Passes/Instrumentation.cpp b/bolt/lib/Passes/Instrumentation.cpp
index 418643713c54e..7d62f67aafef3 100644
--- a/bolt/lib/Passes/Instrumentation.cpp
+++ b/bolt/lib/Passes/Instrumentation.cpp
@@ -311,9 +311,12 @@ void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB,
                  : IndCallHandlerExitBBFunction->getSymbol(),
       IndCallSiteID, &*BC.Ctx);
 
-  Iter = BB.eraseInstruction(Iter);
-  Iter = insertInstructions(CounterInstrs, BB, Iter);
-  --Iter;
+  if (!BC.isAArch64()) {
+    Iter = BB.eraseInstruction(Iter);
+    Iter = insertInstructions(CounterInstrs, BB, Iter);
+    --Iter;
+  } else
+    Iter = insertInstructions(CounterInstrs, BB, Iter);
 }
 
 bool Instrumentation::instrumentOneTarget(
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index aa5cf3c671cdc..6e63a64cebfaf 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -48,14 +48,14 @@ static cl::opt<bool> NoLSEAtomics(
 
 namespace {
 
-static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
+[[maybe_unused]] static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
   Inst.setOpcode(AArch64::MRS);
   Inst.clear();
   Inst.addOperand(MCOperand::createReg(RegName));
   Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
 }
 
-static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
+[[maybe_unused]] static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
   Inst.setOpcode(AArch64::MSR);
   Inst.clear();
   Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
@@ -2178,6 +2178,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       convertJmpToTailCall(Inst);
   }
 
+  void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
+                          MCContext *Ctx) override {
+    Inst.setOpcode(AArch64::B);
+    Inst.clear();
+    Inst.addOperand(MCOperand::createExpr(getTargetExprFor(
+        Inst, MCSymbolRefExpr::create(Target, *Ctx), *Ctx, 0)));
+  }
+
   bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
                      const MCSymbol *&TBB, const MCSymbol *&FBB,
                      MCInst *&CondBranch,
@@ -2535,21 +2543,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
   }
 
   InstructionListType createInstrumentedIndCallHandlerExitBB() const override {
-    InstructionListType Insts(5);
     // Code sequence for instrumented indirect call handler:
-    //   msr  nzcv, x1
-    //   ldp  x0, x1, [sp], #16
-    //   ldr  x16, [sp], #16
-    //   ldp  x0, x1, [sp], #16
-    //   br   x16
-    setSystemFlag(Insts[0], AArch64::X1);
-    createPopRegisters(Insts[1], AArch64::X0, AArch64::X1);
-    // Here we load address of the next function which should be called in the
-    // original binary to X16 register. Writing to X16 is permitted without
-    // needing to restore.
-    loadReg(Insts[2], AArch64::X16, AArch64::SP);
-    createPopRegisters(Insts[3], AArch64::X0, AArch64::X1);
-    createIndirectBranch(Insts[4], AArch64::X16, 0);
+    //   ret
+
+    InstructionListType Insts;
+
+    Insts.emplace_back();
+    createReturn(Insts.back());
+
     return Insts;
   }
 
@@ -2625,39 +2626,68 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
                                                      MCSymbol *HandlerFuncAddr,
                                                      int CallSiteID,
                                                      MCContext *Ctx) override {
-    InstructionListType Insts;
     // Code sequence used to enter indirect call instrumentation helper:
-    //   stp x0, x1, [sp, #-16]! createPushRegisters
-    //   mov target x0  convertIndirectCallToLoad -> orr x0 target xzr
+    //   stp x0, x1, [sp, #-16]! createPushRegisters  (1)
+    //   mov target, x0  convertIndirectCallToLoad -> orr x0 target xzr
     //   mov x1 CallSiteID createLoadImmediate ->
     //   movk    x1, #0x0, lsl #48
     //   movk    x1, #0x0, lsl #32
     //   movk    x1, #0x0, lsl #16
     //   movk    x1, #0x0
-    //   stp x0, x1, [sp, #-16]!
-    //   bl *HandlerFuncAddr createIndirectCall ->
+    //   stp x0, x1, [sp, #-16]!    (2)
+    //   str x30, [sp, #-16]!    (3)
     //   adr x0 *HandlerFuncAddr -> adrp + add
-    //   blr x0
+    //   blr x0   (__bolt_instr_ind_call_handler_func)
+    //   ldr x30, [sp], #16   (3)
+    //   ldp x0, x1, [sp], #16   (2)
+    //   mov x0, target ; move target address to used register
+    //   ldp x0, x1, [sp], #16   (1)
+
+    InstructionListType Insts;
     Insts.emplace_back();
-    createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
+    createPushRegisters(Insts.back(), getIntArgRegister(0),
+                        getIntArgRegister(1));
     Insts.emplace_back(CallInst);
-    convertIndirectCallToLoad(Insts.back(), AArch64::X0);
+    convertIndirectCallToLoad(Insts.back(), getIntArgRegister(0));
     InstructionListType LoadImm =
         createLoadImmediate(getIntArgRegister(1), CallSiteID);
     Insts.insert(Insts.end(), LoadImm.begin(), LoadImm.end());
     Insts.emplace_back();
-    createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
+    createPushRegisters(Insts.back(), getIntArgRegister(0),
+                        getIntArgRegister(1));
+    Insts.emplace_back();
+    storeReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
     Insts.resize(Insts.size() + 2);
-    InstructionListType Addr =
-        materializeAddress(HandlerFuncAddr, Ctx, AArch64::X0);
+    InstructionListType Addr = materializeAddress(
+        HandlerFuncAddr, Ctx, CallInst.getOperand(0).getReg());
     assert(Addr.size() == 2 && "Invalid Addr size");
     std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
+
+    Insts.emplace_back();
+    createIndirectCallInst(Insts.back(), false,
+                           CallInst.getOperand(0).getReg());
+
+    Insts.emplace_back();
+    loadReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
+
     Insts.emplace_back();
-    createIndirectCallInst(Insts.back(), isTailCall(CallInst), AArch64::X0);
+    createPopRegisters(Insts.back(), getIntArgRegister(0),
+                       getIntArgRegister(1));
 
-    // Carry over metadata including tail call marker if present.
-    stripAnnotations(Insts.back());
-    moveAnnotations(std::move(CallInst), Insts.back());
+    // move x0 to indirect call register
+    Insts.emplace_back();
+    Insts.back().setOpcode(AArch64::ORRXrs);
+    Insts.back().insert(Insts.back().begin(),
+                        MCOperand::createReg(CallInst.getOperand(0).getReg()));
+    Insts.back().insert(Insts.back().begin() + 1,
+                        MCOperand::createReg(AArch64::XZR));
+    Insts.back().insert(Insts.back().begin() + 2,
+                        MCOperand::createReg(getIntArgRegister(0)));
+    Insts.back().insert(Insts.back().begin() + 3, MCOperand::createImm(0));
+
+    Insts.emplace_back();
+    createPopRegisters(Insts.back(), getIntArgRegister(0),
+                       getIntArgRegister(1));
 
     return Insts;
   }
@@ -2666,12 +2696,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
   createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline,
                                           const MCSymbol *IndCallHandler,
                                           MCContext *Ctx) override {
-    // Code sequence used to check whether InstrTampoline was initialized
+    // Code sequence used to check whether InstrTrampoline was initialized
     // and call it if so, returns via IndCallHandler
-    //   stp     x0, x1, [sp, #-16]!
-    //   mrs     x1, nzcv
-    //   adr     x0, InstrTrampoline -> adrp + add
-    //   ldr     x0, [x0]
+    //   adrp    x0, InstrTrampoline
+    //   ldr     x0, [x0, #lo12:InstrTrampoline]
     //   subs    x0, x0, #0x0
     //   b.eq    IndCallHandler
     //   str     x30, [sp, #-16]!
@@ -2679,30 +2707,42 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     //   ldr     x30, [sp], #16
     //   b       IndCallHandler
     InstructionListType Insts;
+
+    // load handler address
+    MCInst InstAdrp;
+    InstAdrp.setOpcode(AArch64::ADRP);
+    InstAdrp.addOperand(MCOperand::createReg(getIntArgRegister(0)));
+    InstAdrp.addOperand(MCOperand::createImm(0));
+    setOperandToSymbolRef(InstAdrp, /* OpNum */ 1, InstrTrampoline,
+                          /* Addend */ 0, Ctx, ELF::R_AARCH64_ADR_GOT_PAGE);
+    Insts.emplace_back(InstAdrp);
+
+    MCInst InstLoad;
+    InstLoad.setOpcode(AArch64::LDRXui);
+    InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0)));
+    InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0)));
+    InstLoad.addOperand(MCOperand::createImm(0));
+    setOperandToSymbolRef(InstLoad, /* OpNum */ 2, InstrTrampoline,
+                          /* Addend */ 0, Ctx, ELF::R_AARCH64_LD64_GOT_LO12_NC);
+    Insts.emplace_back(InstLoad);
+
+    InstructionListType CmpJmp =
+        createCmpJE(getIntArgRegister(0), 0, IndCallHandler, Ctx);
+    Insts.insert(Insts.end(), CmpJmp.begin(), CmpJmp.end());
+
     Insts.emplace_back();
-    createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
-    Insts.emplace_back();
-    getSystemFlag(Insts.back(), getIntArgRegister(1));
-    Insts.emplace_back();
-    Insts.emplace_back();
-    InstructionListType Addr =
-        materializeAddress(InstrTrampoline, Ctx, AArch64::X0);
-    std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
-    assert(Addr.size() == 2 && "Invalid Addr size");
-    Insts.emplace_back();
-    loadReg(Insts.back(), AArch64::X0, AArch64::X0);
-    InstructionListType cmpJmp =
-        createCmpJE(AArch64::X0, 0, IndCallHandler, Ctx);
-    Insts.insert(Insts.end(), cmpJmp.begin(), cmpJmp.end());
-    Insts.emplace_back();
-    storeReg(Insts.back(), AArch64::LR, AArch64::SP);
+    storeReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
+
     Insts.emplace_back();
     Insts.back().setOpcode(AArch64::BLR);
-    Insts.back().addOperand(MCOperand::createReg(AArch64::X0));
+    Insts.back().addOperand(MCOperand::createReg(getIntArgRegister(0)));
+
     Insts.emplace_back();
-    loadReg(Insts.back(), AArch64::LR, AArch64::SP);
+    loadReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
+
     Insts.emplace_back();
-    createDirectCall(Insts.back(), IndCallHandler, Ctx, /*IsTailCall*/ true);
+    createDirectBranch(Insts.back(), IndCallHandler, Ctx);
+
     return Insts;
   }
 
diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp
index c0b8fc3d807c9..68cb36dd3fae4 100644
--- a/bolt/runtime/instr.cpp
+++ b/bolt/runtime/instr.cpp
@@ -1694,6 +1694,9 @@ instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
 extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
 {
 #if defined(__aarch64__)
+  // the target address is placed on stack
+  // the identifier of the indirect call site is placed in X1 register
+
   // clang-format off
   __asm__ __volatile__(SAVE_ALL
                        "ldp x0, x1, [sp, #288]\n"
@@ -1731,6 +1734,9 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
 extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall()
 {
 #if defined(__aarch64__)
+  // the target address is placed on stack
+  // the identifier of the indirect call site is placed in X1 register
+
   // clang-format off
   __asm__ __volatile__(SAVE_ALL
                        "ldp x0, x1, [sp, #288]\n"
diff --git a/bolt/runtime/sys_aarch64.h b/bolt/runtime/sys_aarch64.h
index b1d04f9d558e0..9cb8e022f58df 100644
--- a/bolt/runtime/sys_aarch64.h
+++ b/bolt/runtime/sys_aarch64.h
@@ -18,10 +18,12 @@
   "stp x24, x25, [sp, #-16]!\n"                                                \
   "stp x26, x27, [sp, #-16]!\n"                                                \
   "stp x28, x29, [sp, #-16]!\n"                                                \
-  "str x30, [sp,#-16]!\n"
+  "mrs x29, nzcv\n"                                                            \
+  "stp x29, x30, [sp, #-16]!\n"
 // Mirrors SAVE_ALL
 #define RESTORE_ALL                                                            \
-  "ldr x30, [sp], #16\n"                                                       \
+  "ldp x29, x30, [sp], #16\n"                                                  \
+  "msr nzcv, x29\n"                                                            \
   "ldp x28, x29, [sp], #16\n"                                                  \
   "ldp x26, x27, [sp], #16\n"                                                  \
   "ldp x24, x25, [sp], #16\n"                                                  \
diff --git a/bolt/test/runtime/AArch64/instrumentation-ind-call.c b/bolt/test/runtime/AArch64/instrumentation-ind-call.c
index f9056da333b4e..6ea9400f24d10 100644
--- a/bolt/test/runtime/AArch64/instrumentation-ind-call.c
+++ b/bolt/test/runtime/AArch64/instrumentation-ind-call.c
@@ -15,9 +15,65 @@ int main() {
 REQUIRES: system-linux,bolt-runtime
 
 RUN: %clang %cflags %s -o %t.exe -Wl,-q -no-pie -fpie
+RUN: llvm-objdump --disassemble-symbols=main %t.exe \
+RUN:   | FileCheck %s --check-prefix=CHECKINDIRECTREG
+
+CHECKINDIRECTREG: mov w0, #0xa
+CHECKINDIRECTREG-NEXT: mov w1, #0x14
+CHECKINDIRECTREG-NEXT: blr x8
 
 RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t.fdata \
-RUN:   -o %t.instrumented
+RUN:   -o %t.instrumented \
+RUN:   | FileCheck %s --check-prefix=CHECK-INSTR-LOG
+
+CHECK-INSTR-LOG: BOLT-INSTRUMENTER: Number of indirect call site descriptors: 1
+
+RUN: llvm-objdump --disassemble-symbols=main %t.instrumented \
+RUN:   | FileCheck %s --check-prefix=CHECK-INSTR-INDIRECTREG
+
+RUN: llvm-objdump --disassemble-symbols=__bolt_instr_ind_call_handler \
+RUN:   %t.instrumented | FileCheck %s --check-prefix=CHECK-INSTR-INDIR-CALL
+RUN: llvm-objdump --disassemble-symbols=__bolt_instr_ind_call_handler_func \
+RUN:   %t.instrumented | FileCheck %s --check-prefix=CHECK-INSTR-INDIR-CALL-FUNC
+
+CHECK-INSTR-INDIRECTREG: mov w0, #0xa
+CHECK-INSTR-INDIRECTREG-NEXT: mov w1, #0x14
+// store current values
+CHECK-INSTR-INDIRECTREG-NEXT: stp x0, x1, {{.*}}
+// store the indirect target address in x0
+CHECK-INSTR-INDIRECTREG-NEXT: mov x0, x8
+// load callsite id into x1
+CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: stp x0, x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: str x30, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: adrp x8, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: add x8, {{.*}}
+// call instrumentation library handler function
+CHECK-INSTR-INDIRECTREG-NEXT: blr x8
+// restore registers saved before
+CHECK-INSTR-INDIRECTREG-NEXT: ldr x30, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: ldp x0, x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: mov x8, x0
+CHECK-INSTR-INDIRECTREG-NEXT: ldp x0, x1, {{.*}}
+// original indirect call instruction
+CHECK-INSTR-INDIRECTREG-NEXT: blr x8
+
+
+CHECK-INSTR-INDIR-CALL: __bolt_instr_ind_call_handler>:
+CHECK-INSTR-INDIR-CALL-NEXT: ret
+
+CHECK-INSTR-INDIR-CALL-FUNC: __bolt_instr_ind_call_handler_func>:
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: adrp x0
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: ldr x0
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: cmp x0, #0x0
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: b.eq{{.*}}__bolt_instr_ind_call_handler
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: str x30
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: blr x0
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: ldr x30
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: b{{.*}}__bolt_instr_ind_call_handler
 
 # Instrumented program needs to finish returning zero
 RUN: %t.instrumented | FileCheck %s -check-prefix=CHECK-OUTPUT

@yavtuk
Copy link
Contributor Author

yavtuk commented Feb 6, 2026

@yozhu hello, patch the same as I merge it before. Change only push/pop to stack x0, x1 & x30. Also handler gets value from stack with 288 offset. I think the error was with 282 offset. Which is not aligned 16 bytes, all tests are passed. I am going to do more verification next week.

@yozhu
Copy link
Contributor

yozhu commented Feb 7, 2026

@yavtuk Thanks Alexey! I'm going to run some internal testing on this patch next week and will let you know how it goes.

@yavtuk
Copy link
Contributor Author

yavtuk commented Feb 7, 2026

@yavtuk Thanks Alexey! I'm going to run some internal testing on this patch next week and will let you know how it goes.

Thanks a lot

@yavtuk yavtuk force-pushed the fix/indirect-call-instr-snippet branch from da22745 to a954a63 Compare February 10, 2026 13:24
@yavtuk yavtuk changed the title WIP: [bolt][aarch64] Change indirect call instrumentation snippet [bolt][aarch64] Change indirect call instrumentation snippet Feb 10, 2026
@github-actions
Copy link

github-actions bot commented Feb 11, 2026

✅ With the latest revision this PR passed the C/C++ code formatter.

@yavtuk
Copy link
Contributor Author

yavtuk commented Feb 11, 2026

@yozhu @paschalis-mpeis Can you verify the current changes on your sites?
I've reduced the instrumentation indirect snippet as much as possible

@yavtuk yavtuk force-pushed the fix/indirect-call-instr-snippet branch from d271ac4 to c9b9024 Compare February 12, 2026 12:40
@yavtuk yavtuk temporarily deployed to main-branch-only February 12, 2026 12:40 — with GitHub Actions Inactive
@yavtuk
Copy link
Contributor Author

yavtuk commented Feb 12, 2026

@yozhu re-check comments and snippet, what we have

stp     CallSiteIDReg, x30, [sp, -16]!  
movz/k  CallSiteIDReg, 1 <- depends on call site id value (from 1 to 4 instructions)
stp     BlrReg, CallSiteIDReg, [sp, -16]! ; push address and id for lib
adrp    BlrReg, __bolt_instr_ind_call_handler_func
add     BlrReg, BlrReg, #:lo12:__bolt_instr_ind_call_handler_func
blr     BlrReg
ldr     BlrReg, [sp], 16
ldp     CallSiteIDReg, x30, [sp], 16
blr     BlrReg 

@yozhu
Copy link
Contributor

yozhu commented Feb 12, 2026

@yozhu re-check comments and snippet, what we have

stp     CallSiteIDReg, x30, [sp, -16]!  
movz/k  CallSiteIDReg, 1 <- depends on call site id value (from 1 to 4 instructions)
stp     BlrReg, CallSiteIDReg, [sp, -16]! ; push address and id for lib
adrp    BlrReg, __bolt_instr_ind_call_handler_func
add     BlrReg, BlrReg, #:lo12:__bolt_instr_ind_call_handler_func
blr     BlrReg
ldr     BlrReg, [sp], 16
ldp     CallSiteIDReg, x30, [sp], 16
blr     BlrReg 

The new sequence and comment look correct to me. I'll start internal testing.

@yavtuk yavtuk force-pushed the fix/indirect-call-instr-snippet branch from c9b9024 to 185962b Compare February 13, 2026 10:52
@yavtuk yavtuk temporarily deployed to main-branch-only February 13, 2026 10:52 — with GitHub Actions Inactive
Indirect call instrumentation snippet uses x16 register in exit
handler to go to destination target

    __bolt_instr_ind_call_handler_func:
            msr  nzcv, x1
            ldp  x0, x1, [sp], llvm#16
            ldr  x16, [sp], llvm#16
            ldp  x0, x1, [sp], llvm#16
            br   x16	<-----

This patch adds the instrumentation snippet by calling instrumentation
runtime library through indirect call instruction and adding the wrapper
to store/load target value and the register for original indirect instruction.

Example:
            mov x16, foo

    infirectCall:
            adrp x8, Label
            add  x8, x8, #:lo12:Label
            blr x8

Before:

    Instrumented indirect call:
            stp     x0, x1, [sp, #-16]!
            mov     x0, x8
            movk    x1, #0x0, lsl llvm#48
            movk    x1, #0x0, lsl llvm#32
            movk    x1, #0x0, lsl llvm#16
            movk    x1, #0x0
            stp     x0, x1, [sp, #-16]!
            adrp    x0, __bolt_instr_ind_call_handler_func
            add     x0, x0, #:lo12:__bolt_instr_ind_call_handler_func
            blr     x0

    __bolt_instr_ind_call_handler:  (exit snippet)
            msr     nzcv, x1
            ldp     x0, x1, [sp], llvm#16
            ldr     x16, [sp], llvm#16
            ldp     x0, x1, [sp], llvm#16
            br      x16    <- overwrites the original value in X16

    __bolt_instr_ind_call_handler_func:  (entry snippet)
            stp     x0, x1, [sp, #-16]!
            mrs     x1, nzcv
            adrp    x0, __bolt_instr_ind_call_handler
            add     x0, x0, x0, #:lo12:__bolt_instr_ind_call_handler
            ldr     x0, [x0]
            cmp     x0, #0x0
            b.eq    __bolt_instr_ind_call_handler
            str     x30, [sp, #-16]!
            blr     x0     <--- runtime lib store/load all regs
            ldr     x30, [sp], llvm#16
            b       __bolt_instr_ind_call_handler

_________________________________________________________________________

After:

            mov     x16, foo
    infirectCall:
            adrp    x8, Label
            add     x8, x8, #:lo12:Label
            blr     x8

    Instrumented indirect call:
            stp     x0, x30, [sp, #-16]!
            movz/k  x0, 1
            stp     x8, x0, [sp, #-16]! ; push address and id for lib
            adrp    x8, __bolt_instr_ind_call_handler_func
            add     x8, x8, #:lo12:__bolt_instr_ind_call_handler_func
            blr     x8       <--- call trampoline instr lib
            ldr     x8, [sp], llvm#16 ; restore target address
            ldp     x0, x30, [sp], llvm#16
            blr     x8       <--- original indirect call instruction

    __bolt_instr_ind_call_handler:  (exit snippet)
            ret     <---- return to original function with indirect call

    __bolt_instr_ind_call_handler_func: (entry snippet)
            adrp    x0, __bolt_instr_ind_call_handler
            add     x0, x0, #:lo12:__bolt_instr_ind_call_handler
            ldr     x0, [x0]
            cmp     x0, #0x0
            b.eq    __bolt_instr_ind_call_handler
            str     x30, [sp, #-16]!
            blr     x0     <--- runtime lib store/load all regs
            ldr     x30, [sp], llvm#16
            b       __bolt_instr_ind_call_handler
@yavtuk yavtuk force-pushed the fix/indirect-call-instr-snippet branch from 185962b to be87959 Compare February 13, 2026 11:00
@yavtuk yavtuk temporarily deployed to main-branch-only February 13, 2026 11:00 — with GitHub Actions Inactive
@yavtuk
Copy link
Contributor Author

yavtuk commented Feb 13, 2026

@yozhu extended the test, added verification for blr x0 case.

Copy link
Contributor

@yozhu yozhu left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, thanks!

I also ran some internal testing based off from this patch and all looked good.

@yavtuk
Copy link
Contributor Author

yavtuk commented Feb 16, 2026

LGTM, thanks!

I also ran some internal testing based off from this patch and all looked good.

Thank you for the additional check.

@yavtuk yavtuk merged commit 12b561a into llvm:main Feb 16, 2026
10 checks passed
@yavtuk yavtuk deleted the fix/indirect-call-instr-snippet branch February 16, 2026 14:59
manasij7479 pushed a commit to manasij7479/llvm-project that referenced this pull request Feb 18, 2026
…0229)

Indirect call instrumentation snippet uses x16 register in exit handler
to go to destination target

    __bolt_instr_ind_call_handler_func:
            msr  nzcv, x1
            ldp  x0, x1, [sp], llvm#16
            ldr  x16, [sp], llvm#16
            ldp  x0, x1, [sp], llvm#16
            br   x16	<-----

This patch adds the instrumentation snippet by calling instrumentation
runtime library through indirect call instruction and adding the wrapper
to store/load target value and the register for original indirect
instruction.

Example:
            mov x16, foo

    infirectCall:
            adrp x8, Label
            add  x8, x8, #:lo12:Label
            blr x8

Before:

    Instrumented indirect call:
            stp     x0, x1, [sp, #-16]!
            mov     x0, x8
            movk    x1, #0x0, lsl llvm#48
            movk    x1, #0x0, lsl llvm#32
            movk    x1, #0x0, lsl llvm#16
            movk    x1, #0x0
            stp     x0, x1, [sp, #-16]!
            adrp    x0, __bolt_instr_ind_call_handler_func
            add     x0, x0, #:lo12:__bolt_instr_ind_call_handler_func
            blr     x0

    __bolt_instr_ind_call_handler:  (exit snippet)
            msr     nzcv, x1
            ldp     x0, x1, [sp], llvm#16
            ldr     x16, [sp], llvm#16
            ldp     x0, x1, [sp], llvm#16
            br      x16    <- overwrites the original value in X16

    __bolt_instr_ind_call_handler_func:  (entry snippet)
            stp     x0, x1, [sp, #-16]!
            mrs     x1, nzcv
            adrp    x0, __bolt_instr_ind_call_handler
            add     x0, x0, x0, #:lo12:__bolt_instr_ind_call_handler
            ldr     x0, [x0]
            cmp     x0, #0x0
            b.eq    __bolt_instr_ind_call_handler
            str     x30, [sp, #-16]!
            blr     x0     <--- runtime lib store/load all regs
            ldr     x30, [sp], llvm#16
            b       __bolt_instr_ind_call_handler


_________________________________________________________________________

After:

            mov     x16, foo
    infirectCall:
            adrp    x8, Label
            add     x8, x8, #:lo12:Label
            blr     x8

    Instrumented indirect call:
            stp     x0, x30, [sp, #-16]!
            mov     x0, callsiteid
            stp    x8, x0, [sp, #-16]!
            adrp    x8, __bolt_instr_ind_call_handler_func
            add     x8, x8, #:lo12:__bolt_instr_ind_call_handler_func
            blr     x8       <--- call trampoline instr lib
            ldr     x8, [sp], llvm#16
            ldp     x0, x30, [sp], llvm#16
            blr     x8       <--- original indirect call instruction

    // don't touch regs besides x0, x1
    __bolt_instr_ind_call_handler:  (exit snippet)
            ret     <---- return to original function with indirect call

    __bolt_instr_ind_call_handler_func: (entry snippet)
            adrp    x0, __bolt_instr_ind_call_handler
            add     x0, x0, #:lo12:__bolt_instr_ind_call_handler
            ldr     x0, [x0]
            cmp     x0, #0x0
            b.eq    __bolt_instr_ind_call_handler
            str     x30, [sp, #-16]!
            blr     x0     <--- runtime lib store/load all regs
            ldr     x30, [sp], llvm#16
            b       __bolt_instr_ind_call_handler
HendrikHuebner pushed a commit to HendrikHuebner/llvm-project that referenced this pull request Mar 10, 2026
…0229)

Indirect call instrumentation snippet uses x16 register in exit handler
to go to destination target

    __bolt_instr_ind_call_handler_func:
            msr  nzcv, x1
            ldp  x0, x1, [sp], llvm#16
            ldr  x16, [sp], llvm#16
            ldp  x0, x1, [sp], llvm#16
            br   x16	<-----

This patch adds the instrumentation snippet by calling instrumentation
runtime library through indirect call instruction and adding the wrapper
to store/load target value and the register for original indirect
instruction.

Example:
            mov x16, foo

    infirectCall:
            adrp x8, Label
            add  x8, x8, #:lo12:Label
            blr x8

Before:

    Instrumented indirect call:
            stp     x0, x1, [sp, #-16]!
            mov     x0, x8
            movk    x1, #0x0, lsl llvm#48
            movk    x1, #0x0, lsl llvm#32
            movk    x1, #0x0, lsl llvm#16
            movk    x1, #0x0
            stp     x0, x1, [sp, #-16]!
            adrp    x0, __bolt_instr_ind_call_handler_func
            add     x0, x0, #:lo12:__bolt_instr_ind_call_handler_func
            blr     x0

    __bolt_instr_ind_call_handler:  (exit snippet)
            msr     nzcv, x1
            ldp     x0, x1, [sp], llvm#16
            ldr     x16, [sp], llvm#16
            ldp     x0, x1, [sp], llvm#16
            br      x16    <- overwrites the original value in X16

    __bolt_instr_ind_call_handler_func:  (entry snippet)
            stp     x0, x1, [sp, #-16]!
            mrs     x1, nzcv
            adrp    x0, __bolt_instr_ind_call_handler
            add     x0, x0, x0, #:lo12:__bolt_instr_ind_call_handler
            ldr     x0, [x0]
            cmp     x0, #0x0
            b.eq    __bolt_instr_ind_call_handler
            str     x30, [sp, #-16]!
            blr     x0     <--- runtime lib store/load all regs
            ldr     x30, [sp], llvm#16
            b       __bolt_instr_ind_call_handler


_________________________________________________________________________

After:

            mov     x16, foo
    infirectCall:
            adrp    x8, Label
            add     x8, x8, #:lo12:Label
            blr     x8

    Instrumented indirect call:
            stp     x0, x30, [sp, #-16]!
            mov     x0, callsiteid
            stp    x8, x0, [sp, #-16]!
            adrp    x8, __bolt_instr_ind_call_handler_func
            add     x8, x8, #:lo12:__bolt_instr_ind_call_handler_func
            blr     x8       <--- call trampoline instr lib
            ldr     x8, [sp], llvm#16
            ldp     x0, x30, [sp], llvm#16
            blr     x8       <--- original indirect call instruction

    // don't touch regs besides x0, x1
    __bolt_instr_ind_call_handler:  (exit snippet)
            ret     <---- return to original function with indirect call

    __bolt_instr_ind_call_handler_func: (entry snippet)
            adrp    x0, __bolt_instr_ind_call_handler
            add     x0, x0, #:lo12:__bolt_instr_ind_call_handler
            ldr     x0, [x0]
            cmp     x0, #0x0
            b.eq    __bolt_instr_ind_call_handler
            str     x30, [sp, #-16]!
            blr     x0     <--- runtime lib store/load all regs
            ldr     x30, [sp], llvm#16
            b       __bolt_instr_ind_call_handler
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants