From ae38b8e09a972eb30b52ee919ab35d882ebe6acc Mon Sep 17 00:00:00 2001 From: xiongzhou4 Date: Wed, 12 Jun 2024 17:12:36 +0800 Subject: [PATCH 01/11] [AArch64] Add AArch64 support for inline. --- bolt/include/bolt/Core/MCPlusBuilder.h | 5 +-- bolt/lib/Passes/Inliner.cpp | 31 +++++++++++++++++++ .../Target/AArch64/AArch64MCPlusBuilder.cpp | 10 ++++++ bolt/test/AArch64/Inputs/inline-foo.c | 5 +++ bolt/test/AArch64/Inputs/inline-main.c | 5 +++ bolt/test/AArch64/Inputs/inlined.cpp | 23 ++++++++++++++ bolt/test/AArch64/Inputs/inlinee.cpp | 3 ++ bolt/test/AArch64/Inputs/jmp_opt.cpp | 7 +++++ bolt/test/AArch64/Inputs/jmp_opt2.cpp | 3 ++ bolt/test/AArch64/Inputs/jmp_opt3.cpp | 3 ++ bolt/test/AArch64/inline-debug-info.test | 20 ++++++++++++ bolt/test/AArch64/inlined-function-mixed.test | 11 +++++++ bolt/test/AArch64/jmp-optimization.test | 14 +++++++++ 13 files changed, 136 insertions(+), 4 deletions(-) create mode 100644 bolt/test/AArch64/Inputs/inline-foo.c create mode 100644 bolt/test/AArch64/Inputs/inline-main.c create mode 100644 bolt/test/AArch64/Inputs/inlined.cpp create mode 100644 bolt/test/AArch64/Inputs/inlinee.cpp create mode 100644 bolt/test/AArch64/Inputs/jmp_opt.cpp create mode 100644 bolt/test/AArch64/Inputs/jmp_opt2.cpp create mode 100644 bolt/test/AArch64/Inputs/jmp_opt3.cpp create mode 100644 bolt/test/AArch64/inline-debug-info.test create mode 100644 bolt/test/AArch64/inlined-function-mixed.test create mode 100644 bolt/test/AArch64/jmp-optimization.test diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index ba2b7f3b5a19..095915c5cbbd 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -575,10 +575,7 @@ public: return 0; } - virtual bool isPush(const MCInst &Inst) const { - llvm_unreachable("not implemented"); - return false; - } + virtual bool isPush(const MCInst &Inst) const { return false; } /// Return the width, in bytes, of the memory access performed by \p Inst, if /// this is a push instruction. Return zero otherwise. diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp index 8dcb8934f2d2..e88dc442d217 100644 --- a/bolt/lib/Passes/Inliner.cpp +++ b/bolt/lib/Passes/Inliner.cpp @@ -465,6 +465,37 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) { << ". Size change: " << SizeAfterInlining << " bytes.\n"); +// Skip situations where some A64 instructions can't be inlined: +// # Indirect branch, e.g., BR. +// # Branch instructions but used to make a function call. + if (BC.isAArch64()) { + auto &MIB = *BC.MIB; + bool skip = false; + for (const BinaryBasicBlock &BB : *TargetFunction) { + for (MCInst Inst : BB) { + if (MIB.isPseudo(Inst)) + continue; + + MIB.stripAnnotations(Inst, false); + + if (MIB.isBranch(Inst)) { + const BinaryBasicBlock *TargetBB = + TargetFunction->getBasicBlockForLabel(MIB.getTargetSymbol(Inst)); + if (MIB.isIndirectBranch(Inst) || !TargetBB) { + skip = true; + break; + } + } + } + if (skip) + break; + } + if (skip) { + ++InstIt; + continue; + } + } + std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction); DidInlining = true; diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index 642de6c3c618..8ce16d63d52e 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -132,6 +132,8 @@ public: const MCRegisterInfo *RegInfo) : MCPlusBuilder(Analysis, Info, RegInfo) {} + MCPhysReg getStackPointer() const override { return AArch64::SP; } + bool equals(const MCTargetExpr &A, const MCTargetExpr &B, CompFuncTy Comp) const override { const auto &AArch64ExprA = cast(A); @@ -1000,6 +1002,14 @@ public: int getUncondBranchEncodingSize() const override { return 28; } + bool createCall(MCInst &Inst, const MCSymbol *Target, + MCContext *Ctx) override { + Inst.setOpcode(AArch64::BL); + Inst.addOperand(MCOperand::createExpr( + MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx))); + return true; + } + InstructionListType createCmpJE(MCPhysReg RegNo, int64_t Imm, const MCSymbol *Target, MCContext *Ctx) const override { diff --git a/bolt/test/AArch64/Inputs/inline-foo.c b/bolt/test/AArch64/Inputs/inline-foo.c new file mode 100644 index 000000000000..1307c13f22ab --- /dev/null +++ b/bolt/test/AArch64/Inputs/inline-foo.c @@ -0,0 +1,5 @@ +#include "stub.h" + +void foo() { + puts("Hello world!\n"); +} diff --git a/bolt/test/AArch64/Inputs/inline-main.c b/bolt/test/AArch64/Inputs/inline-main.c new file mode 100644 index 000000000000..7853d2b63ce6 --- /dev/null +++ b/bolt/test/AArch64/Inputs/inline-main.c @@ -0,0 +1,5 @@ +extern void foo(); +int main() { + foo(); + return 0; +} diff --git a/bolt/test/AArch64/Inputs/inlined.cpp b/bolt/test/AArch64/Inputs/inlined.cpp new file mode 100644 index 000000000000..a6ff9e262a4b --- /dev/null +++ b/bolt/test/AArch64/Inputs/inlined.cpp @@ -0,0 +1,23 @@ +extern "C" int printf(const char*, ...); +extern const char* question(); + +inline int answer() __attribute__((always_inline)); +inline int answer() { return 42; } + +int main(int argc, char *argv[]) { + int ans; + if (argc == 1) { + ans = 0; + } else { + ans = argc; + } + printf("%s\n", question()); + for (int i = 0; i < 10; ++i) { + int x = answer(); + int y = answer(); + ans += x - y; + } + // padding to make sure question() is inlineable + asm("nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;"); + return ans; +} diff --git a/bolt/test/AArch64/Inputs/inlinee.cpp b/bolt/test/AArch64/Inputs/inlinee.cpp new file mode 100644 index 000000000000..edb7ab145798 --- /dev/null +++ b/bolt/test/AArch64/Inputs/inlinee.cpp @@ -0,0 +1,3 @@ +const char* question() { + return "What do you get if you multiply six by nine?"; +} diff --git a/bolt/test/AArch64/Inputs/jmp_opt.cpp b/bolt/test/AArch64/Inputs/jmp_opt.cpp new file mode 100644 index 000000000000..cd6d53c3567c --- /dev/null +++ b/bolt/test/AArch64/Inputs/jmp_opt.cpp @@ -0,0 +1,7 @@ +int g(); + +int main() { + int x = g(); + int y = x*x; + return y; +} diff --git a/bolt/test/AArch64/Inputs/jmp_opt2.cpp b/bolt/test/AArch64/Inputs/jmp_opt2.cpp new file mode 100644 index 000000000000..80b853d632ae --- /dev/null +++ b/bolt/test/AArch64/Inputs/jmp_opt2.cpp @@ -0,0 +1,3 @@ +int f() { + return 0; +} diff --git a/bolt/test/AArch64/Inputs/jmp_opt3.cpp b/bolt/test/AArch64/Inputs/jmp_opt3.cpp new file mode 100644 index 000000000000..7fb55116369a --- /dev/null +++ b/bolt/test/AArch64/Inputs/jmp_opt3.cpp @@ -0,0 +1,3 @@ +int f(); + +int g() { return f(); } diff --git a/bolt/test/AArch64/inline-debug-info.test b/bolt/test/AArch64/inline-debug-info.test new file mode 100644 index 000000000000..e20e5e31ed9d --- /dev/null +++ b/bolt/test/AArch64/inline-debug-info.test @@ -0,0 +1,20 @@ +## Check that BOLT correctly prints and updates debug info for inlined +## functions. + +# REQUIRES: system-linux + +# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \ +# RUN: -I%p/../Inputs -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \ +# RUN: --print-only=main --print-after-lowering --force-inline=foo \ +# RUN: -o %t.bolt \ +# RUN: | FileCheck %s + +## The call to puts() should come from inline-foo.c: +# CHECK: callq {{.*}} # debug line {{.*}}inline-foo.c:4:3 + +# RUN: llvm-objdump --disassemble-symbols=main -d --line-numbers %t.bolt \ +# RUN: | FileCheck %s -check-prefix=CHECK-OBJDUMP + +## Dump of main() should include debug info from inline-foo.c after inlining: +# CHECK-OBJDUMP: inline-foo.c:4 diff --git a/bolt/test/AArch64/inlined-function-mixed.test b/bolt/test/AArch64/inlined-function-mixed.test new file mode 100644 index 000000000000..5a87bdde9535 --- /dev/null +++ b/bolt/test/AArch64/inlined-function-mixed.test @@ -0,0 +1,11 @@ +# Make sure inlining from a unit with debug info into unit without +# debug info does not cause a crash. + +RUN: %clangxx %cxxflags %S/Inputs/inlined.cpp -c -o %T/inlined.o +RUN: %clangxx %cxxflags %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g +RUN: %clangxx %cxxflags %T/inlined.o %T/inlinee.o -o %t + +RUN: llvm-bolt %t -o %t.bolt --update-debug-sections --reorder-blocks=reverse \ +RUN: --inline-small-functions --force-inline=main | FileCheck %s + +CHECK-NOT: BOLT: 0 out of {{.*}} functions were overwritten diff --git a/bolt/test/AArch64/jmp-optimization.test b/bolt/test/AArch64/jmp-optimization.test new file mode 100644 index 000000000000..92f4b9a14f0f --- /dev/null +++ b/bolt/test/AArch64/jmp-optimization.test @@ -0,0 +1,14 @@ +# Tests the optimization of functions that just do a tail call in the beginning. + +# This test has commands that rely on shell capabilities that won't execute +# correctly on Windows e.g. unsupported parameter expansion +REQUIRES: shell + +RUN: %clang %cflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t +RUN: llvm-bolt -inline-small-functions %t -o %t.bolt +RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s + +CHECK:
: +CHECK-NOT: call +CHECK: xorl %eax, %eax +CHECK: retq -- Gitee From 71cb525635dc9387ebff1074fb8ac54044b7484a Mon Sep 17 00:00:00 2001 From: rfwang07 Date: Fri, 21 Jun 2024 11:16:44 +0800 Subject: [PATCH 02/11] [Bolt] Solving pie support issue --- bolt/lib/Core/BinaryContext.cpp | 25 +++++++++++++++++++---- bolt/test/perf2bolt/Inputs/perf_test.c | 26 ++++++++++++++++++++++++ bolt/test/perf2bolt/Inputs/perf_test.lds | 13 ++++++++++++ bolt/test/perf2bolt/lit.local.cfg | 4 ++++ bolt/test/perf2bolt/perf_test.test | 17 ++++++++++++++++ bolt/unittests/Core/BinaryContext.cpp | 21 +++++++++++++++++++ 6 files changed, 102 insertions(+), 4 deletions(-) create mode 100644 bolt/test/perf2bolt/Inputs/perf_test.c create mode 100644 bolt/test/perf2bolt/Inputs/perf_test.lds create mode 100644 bolt/test/perf2bolt/lit.local.cfg create mode 100644 bolt/test/perf2bolt/perf_test.test diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 1a8fe1bc9900..c876be20826c 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1901,10 +1901,27 @@ BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, // Find a segment with a matching file offset. for (auto &KV : SegmentMapInfo) { const SegmentInfo &SegInfo = KV.second; - if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) { - // Use segment's aligned memory offset to calculate the base address. - const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment); - return MMapAddress - MemOffset; + // FileOffset is got from perf event, + // and it is equal to alignDown(SegInfo.FileOffset, pagesize). + // If the pagesize is not equal to SegInfo.Alignment. + // FileOffset and SegInfo.FileOffset should be aligned first, + // and then judge whether they are equal. + if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == + alignDown(FileOffset, SegInfo.Alignment)) { + // The function's offset from base address in VAS is aligned by pagesize + // instead of SegInfo.Alignment. Pagesize can't be got from perf events. + // However, The ELF document says that SegInfo.FileOffset should equal + // to SegInfo.Address, modulo the pagesize. + // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf + + // So alignDown(SegInfo.Address, pagesize) can be calculated by: + // alignDown(SegInfo.Address, pagesize) + // = SegInfo.Address - (SegInfo.Address % pagesize) + // = SegInfo.Address - (SegInfo.FileOffset % pagesize) + // = SegInfo.Address - SegInfo.FileOffset + + // alignDown(SegInfo.FileOffset, pagesize) + // = SegInfo.Address - SegInfo.FileOffset + FileOffset + return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset); } } diff --git a/bolt/test/perf2bolt/Inputs/perf_test.c b/bolt/test/perf2bolt/Inputs/perf_test.c new file mode 100644 index 000000000000..ff5ecf7a8752 --- /dev/null +++ b/bolt/test/perf2bolt/Inputs/perf_test.c @@ -0,0 +1,26 @@ +#include +#include +#include + +int add(int a, int b) { return a + b; } +int minus(int a, int b) { return a - b; } +int multiple(int a, int b) { return a * b; } +int divide(int a, int b) { + if (b == 0) + return 0; + return a / b; +} + +int main() { + int a = 16; + int b = 8; + + for (int i = 1; i < 100000; i++) { + add(a, b); + minus(a, b); + multiple(a, b); + divide(a, b); + } + + return 0; +} diff --git a/bolt/test/perf2bolt/Inputs/perf_test.lds b/bolt/test/perf2bolt/Inputs/perf_test.lds new file mode 100644 index 000000000000..9cb4ebbf1e9f --- /dev/null +++ b/bolt/test/perf2bolt/Inputs/perf_test.lds @@ -0,0 +1,13 @@ +SECTIONS { + . = SIZEOF_HEADERS; + .interp : { *(.interp) } + .note.gnu.build-id : { *(.note.gnu.build-id) } + . = 0x212e8; + .dynsym : { *(.dynsym) } + . = 0x31860; + .text : { *(.text*) } + . = 0x41c20; + .fini_array : { *(.fini_array) } + . = 0x54e18; + .data : { *(.data) } +} diff --git a/bolt/test/perf2bolt/lit.local.cfg b/bolt/test/perf2bolt/lit.local.cfg new file mode 100644 index 000000000000..87a96ec342b0 --- /dev/null +++ b/bolt/test/perf2bolt/lit.local.cfg @@ -0,0 +1,4 @@ +import shutil + +if shutil.which("perf") != None: + config.available_features.add("perf") diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test new file mode 100644 index 000000000000..fe6e015abaa1 --- /dev/null +++ b/bolt/test/perf2bolt/perf_test.test @@ -0,0 +1,17 @@ +# Check perf2bolt binary function which was compiled with pie + +REQUIRES: system-linux, perf + +RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -Wl,--script=%S/Inputs/perf_test.lds -o %t +RUN: perf record -e cycles:u -o %t2 -- %t +RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s + +CHECK-NOT: PERF2BOLT-ERROR +CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection. + +RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4 +RUN: perf record -e cycles:u -o %t5 -- %t4 +RUN: perf2bolt %t4 -p=%t5 -o %t6 -nl -ignore-build-id 2>&1 | FileCheck %s --check-prefix=CHECK-NO-PIE + +CHECK-NO-PIE-NOT: PERF2BOLT-ERROR +CHECK-NO-PIE-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection. diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp index bac264141251..5a80cb4a2838 100644 --- a/bolt/unittests/Core/BinaryContext.cpp +++ b/bolt/unittests/Core/BinaryContext.cpp @@ -83,3 +83,24 @@ TEST_P(BinaryContextTester, BaseAddress) { BaseAddress = BC->getBaseAddressForMapping(0x7f13f5556000, 0x137a000); ASSERT_FALSE(BaseAddress.has_value()); } + +TEST_P(BinaryContextTester, BaseAddress2) { + // Check that base address calculation is correct for a binary if the + // alignment in ELF file are different from pagesize. + // The segment layout is as follows: + BC->SegmentMapInfo[0] = SegmentInfo{0, 0x2177c, 0, 0x2177c, 0x10000}; + BC->SegmentMapInfo[0x31860] = + SegmentInfo{0x31860, 0x370, 0x21860, 0x370, 0x10000}; + BC->SegmentMapInfo[0x41c20] = + SegmentInfo{0x41c20, 0x1f8, 0x21c20, 0x1f8, 0x10000}; + BC->SegmentMapInfo[0x54e18] = + SegmentInfo{0x54e18, 0x51, 0x24e18, 0x51, 0x10000}; + + std::optional BaseAddress = + BC->getBaseAddressForMapping(0xaaaaea444000, 0x21000); + ASSERT_TRUE(BaseAddress.has_value()); + ASSERT_EQ(*BaseAddress, 0xaaaaea413000ULL); + + BaseAddress = BC->getBaseAddressForMapping(0xaaaaea444000, 0x11000); + ASSERT_FALSE(BaseAddress.has_value()); +} -- Gitee From 711159a174b839a52a7a58e753a48f5327e7d528 Mon Sep 17 00:00:00 2001 From: rfwang07 Date: Fri, 21 Jun 2024 11:23:42 +0800 Subject: [PATCH 03/11] [BOLT][AArch64] Don't change layout in PatchEntries --- bolt/lib/Passes/PatchEntries.cpp | 11 ++++++++ bolt/test/AArch64/patch-entries.s | 36 ++++++++++++++++++++++++ bolt/unittests/Core/BinaryContext.cpp | 40 +++++++++++++++++++++++++++ 3 files changed, 87 insertions(+) create mode 100644 bolt/test/AArch64/patch-entries.s diff --git a/bolt/lib/Passes/PatchEntries.cpp b/bolt/lib/Passes/PatchEntries.cpp index 02a044d8b2f6..ee7512d89962 100644 --- a/bolt/lib/Passes/PatchEntries.cpp +++ b/bolt/lib/Passes/PatchEntries.cpp @@ -98,6 +98,17 @@ void PatchEntries::runOnFunctions(BinaryContext &BC) { }); if (!Success) { + // We can't change output layout for AArch64 due to LongJmp pass + if (BC.isAArch64()) { + if (opts::ForcePatch) { + errs() << "BOLT-ERROR: unable to patch entries in " << Function + << "\n"; + exit(1); + } + + continue; + } + // If the original function entries cannot be patched, then we cannot // safely emit new function body. errs() << "BOLT-WARNING: failed to patch entries in " << Function diff --git a/bolt/test/AArch64/patch-entries.s b/bolt/test/AArch64/patch-entries.s new file mode 100644 index 000000000000..cf6f72a0b80d --- /dev/null +++ b/bolt/test/AArch64/patch-entries.s @@ -0,0 +1,36 @@ +# This test checks patch entries functionality + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \ +# RUN: %s -o %t.o +# RUN: %clang %cflags -pie %t.o -o %t.exe -nostdlib -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.bolt --use-old-text=0 --lite=0 --skip-funcs=_start +# RUN: llvm-objdump -dz %t.bolt | FileCheck %s + +# CHECK: : +# CHECK-NEXT: adrp x16, 0x[[#%x,ADRP:]] +# CHECK-NEXT: add x16, x16, #0x[[#%x,ADD:]] +# CHECK-NEXT: br x16 + +# CHECK: [[#ADRP + ADD]] : +# CHECK-NEXT: [[#ADRP + ADD]]: {{.*}} ret + +.text +.balign 4 +.global pathedEntries +.type pathedEntries, %function +pathedEntries: + .rept 32 + nop + .endr + ret +.size pathedEntries, .-pathedEntries + +.global _start +.type _start, %function +_start: + bl pathedEntries + .inst 0xdeadbeef + ret +.size _start, .-_start diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp index 5a80cb4a2838..7ac1c1435759 100644 --- a/bolt/unittests/Core/BinaryContext.cpp +++ b/bolt/unittests/Core/BinaryContext.cpp @@ -62,6 +62,46 @@ INSTANTIATE_TEST_SUITE_P(X86, BinaryContextTester, INSTANTIATE_TEST_SUITE_P(AArch64, BinaryContextTester, ::testing::Values(Triple::aarch64)); +TEST_P(BinaryContextTester, FlushPendingRelocCALL26) { + if (GetParam() != Triple::aarch64) + GTEST_SKIP(); + + // This test checks that encodeValueAArch64 used by flushPendingRelocations + // returns correctly encoded values for CALL26 relocation for both backward + // and forward branches. + // + // The offsets layout is: + // 4: func1 + // 8: bl func1 + // 12: bl func2 + // 16: func2 + + char Data[20] = {}; + BinarySection &BS = BC->registerOrUpdateSection( + ".text", ELF::SHT_PROGBITS, ELF::SHF_EXECINSTR | ELF::SHF_ALLOC, + (uint8_t *)Data, sizeof(Data), 4); + MCSymbol *RelSymbol1 = BC->getOrCreateGlobalSymbol(4, "Func1"); + ASSERT_TRUE(RelSymbol1); + BS.addRelocation(8, RelSymbol1, ELF::R_AARCH64_CALL26, 0, 0, true); + MCSymbol *RelSymbol2 = BC->getOrCreateGlobalSymbol(16, "Func2"); + ASSERT_TRUE(RelSymbol2); + BS.addRelocation(12, RelSymbol2, ELF::R_AARCH64_CALL26, 0, 0, true); + + std::error_code EC; + SmallVector Vect(sizeof(Data)); + raw_svector_ostream OS(Vect); + + BS.flushPendingRelocations(OS, [&](const MCSymbol *S) { + return S == RelSymbol1 ? 4 : S == RelSymbol2 ? 16 : 0; + }); + + const uint8_t Func1Call[4] = {255, 255, 255, 151}; + const uint8_t Func2Call[4] = {1, 0, 0, 148}; + + EXPECT_FALSE(memcmp(Func1Call, &Vect[8], 4)) << "Wrong backward call value\n"; + EXPECT_FALSE(memcmp(Func2Call, &Vect[12], 4)) << "Wrong forward call value\n"; +} + #endif TEST_P(BinaryContextTester, BaseAddress) { -- Gitee From ef2bcc60dae093399c98bd30c4ef14bfa70d05b6 Mon Sep 17 00:00:00 2001 From: rfwang07 Date: Thu, 25 Jul 2024 14:45:53 +0800 Subject: [PATCH 04/11] Add CFG block count correction optimization. --- bolt/include/bolt/Core/BinaryBasicBlock.h | 59 +- .../bolt/Core/BinaryBasicBlockFeature.h | 268 ++++++++ bolt/include/bolt/Passes/FeatureMiner.h | 176 ++++++ bolt/include/bolt/Passes/StaticBranchInfo.h | 108 ++++ bolt/include/bolt/Profile/DataReader.h | 93 ++- bolt/lib/Core/BinaryBasicBlockFeature.cpp | 21 + bolt/lib/Core/CMakeLists.txt | 1 + bolt/lib/Passes/CMakeLists.txt | 2 + bolt/lib/Passes/FeatureMiner.cpp | 572 ++++++++++++++++++ bolt/lib/Passes/StaticBranchInfo.cpp | 143 +++++ bolt/lib/Profile/DataReader.cpp | 120 +++- bolt/lib/Rewrite/RewriteInstance.cpp | 6 + 12 files changed, 1557 insertions(+), 12 deletions(-) create mode 100644 bolt/include/bolt/Core/BinaryBasicBlockFeature.h create mode 100644 bolt/include/bolt/Passes/FeatureMiner.h create mode 100644 bolt/include/bolt/Passes/StaticBranchInfo.h create mode 100644 bolt/lib/Core/BinaryBasicBlockFeature.cpp create mode 100644 bolt/lib/Passes/FeatureMiner.cpp create mode 100644 bolt/lib/Passes/StaticBranchInfo.cpp diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h index bc95e2c4de3a..1e55a40ab64b 100644 --- a/bolt/include/bolt/Core/BinaryBasicBlock.h +++ b/bolt/include/bolt/Core/BinaryBasicBlock.h @@ -15,6 +15,7 @@ #ifndef BOLT_CORE_BINARY_BASIC_BLOCK_H #define BOLT_CORE_BINARY_BASIC_BLOCK_H +#include "bolt/Core/BinaryBasicBlockFeature.h" #include "bolt/Core/FunctionLayout.h" #include "bolt/Core/MCPlus.h" #include "llvm/ADT/GraphTraits.h" @@ -25,6 +26,7 @@ #include "llvm/Support/raw_ostream.h" #include #include +#include namespace llvm { class MCCodeEmitter; @@ -137,6 +139,12 @@ private: /// Last computed hash value. mutable uint64_t Hash{0}; + std::set ChildrenSet; + + std::set ParentSet; + + BinaryBasicBlockFeature BlockFeatures; + private: BinaryBasicBlock() = delete; BinaryBasicBlock(const BinaryBasicBlock &) = delete; @@ -375,11 +383,14 @@ public: /// If the basic block ends with a conditional branch (possibly followed by /// an unconditional branch) and thus has 2 successors, return a successor /// corresponding to a jump condition which could be true or false. - /// Return nullptr if the basic block does not have a conditional jump. + /// Return the only successor if it's followed by an unconditional branch. + /// Return nullptr otherwise. BinaryBasicBlock *getConditionalSuccessor(bool Condition) { - if (succ_size() != 2) - return nullptr; - return Successors[Condition == true ? 0 : 1]; + if (succ_size() == 2) + return Successors[Condition == true ? 0 : 1]; + if (succ_size() == 1) + return Successors[0]; + return nullptr; } const BinaryBasicBlock *getConditionalSuccessor(bool Condition) const { @@ -400,6 +411,13 @@ public: return const_cast(this)->getFallthrough(); } + /// Return branch info corresponding to only branch. + const BinaryBranchInfo &getOnlyBranchInfo() const { + assert(BranchInfo.size() > 0 && + "could only be called for blocks with at least 1 successor"); + return BranchInfo[0]; + }; + /// Return branch info corresponding to a taken branch. const BinaryBranchInfo &getTakenBranchInfo() const { assert(BranchInfo.size() == 2 && @@ -808,6 +826,36 @@ public: OutputAddressRange.second = Address; } + /// Sets features of this BB. + void setFeatures(BinaryBasicBlockFeature BBF) { + BlockFeatures = BBF; + } + + /// Gets numberic features of this BB. + BinaryBasicBlockFeature getFeatures() { + return BlockFeatures; + } + + /// Gets children sets of this BB. + std::set getChildrenSet() { + return ChildrenSet; + } + + /// Gets parent sets of this BB. + std::set getParentSet() { + return ParentSet; + } + + /// Inserts children sets of this BB. + void insertChildrenSet(BinaryBasicBlock *Node) { + ChildrenSet.insert(Node); + } + + /// Inserts parent sets of this BB. + void insertParentSet(BinaryBasicBlock *Node) { + ParentSet.insert(Node); + } + /// Gets the memory address range of this BB in the input binary. std::pair getInputAddressRange() const { return InputRange; @@ -967,7 +1015,8 @@ private: #if defined(LLVM_ON_UNIX) /// Keep the size of the BinaryBasicBlock within a reasonable size class /// (jemalloc bucket) on Linux -static_assert(sizeof(BinaryBasicBlock) <= 256); +/// The size threshod is expanded from 256 to 2048 to contain the extra BB features +static_assert(sizeof(BinaryBasicBlock) <= 2048, ""); #endif bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS); diff --git a/bolt/include/bolt/Core/BinaryBasicBlockFeature.h b/bolt/include/bolt/Core/BinaryBasicBlockFeature.h new file mode 100644 index 000000000000..2b4809b1a20b --- /dev/null +++ b/bolt/include/bolt/Core/BinaryBasicBlockFeature.h @@ -0,0 +1,268 @@ +//===- bolt/Core/BinaryBasicBlockFeature.h - Low-level basic block -----*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Features of BinaryBasicBlock +// +//===----------------------------------------------------------------------===// + +#ifndef BOLT_CORE_BINARY_BASIC_BLOCK_FEATURE_H +#define BOLT_CORE_BINARY_BASIC_BLOCK_FEATURE_H + +#include "bolt/Core/FunctionLayout.h" +#include "bolt/Core/MCPlus.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +namespace llvm { + +namespace bolt { + +class BinaryBasicBlockFeature { + +public: + int32_t Opcode; + + int16_t Direction; + + int32_t CmpOpcode; + + int16_t LoopHeader; + + int16_t ProcedureType; + + int64_t Count; + + int64_t FallthroughCount; + + int64_t TotalLoops; + + int64_t LoopDepth; + + int64_t LoopNumBlocks; + + int64_t LocalExitingBlock; + + int64_t LocalLatchBlock; + + int64_t LocalLoopHeader; + + int64_t Call; + + int64_t DeltaTaken; + + int64_t NumLoads; + + int64_t NumCalls; + + int64_t OperandRAType; + + int64_t OperandRBType; + + int64_t BasicBlockSize; + + int64_t NumBasicBlocks; + + int64_t HasIndirectCalls; + + std::vector EndOpcode_vec; + + std::vector LoopHeader_vec; + + std::vector Backedge_vec; + + std::vector Exit_vec; + + std::vector Call_vec; + + std::vector BasicBlockSize_vec; + + std::vector InferenceFeatures; + + uint64_t FuncExec; + + int32_t ParentChildNum; + + int32_t ParentCount; + + int32_t ChildParentNum; + + int32_t ChildCount; + +public: + void setOpcode(const int32_t &BlockOpcode) { Opcode = BlockOpcode; } + + void setDirection(const int16_t &BlockDirection) { + Direction = BlockDirection; + } + + void setCmpOpcode(const int32_t &BlockCmpOpcode) { + CmpOpcode = BlockCmpOpcode; + } + + void setLoopHeader(const int16_t &BlockLoopHeader) { + LoopHeader = BlockLoopHeader; + } + + void setProcedureType(const int16_t &BlockProcedureType) { + ProcedureType = BlockProcedureType; + } + + void setCount(const int64_t &BlockCount) { Count = BlockCount; } + + void setFallthroughCount(const int64_t &BlockFallthroughCount) { + FallthroughCount = BlockFallthroughCount; + } + + void setTotalLoops(const int64_t &BlockTotalLoops) { + TotalLoops = BlockTotalLoops; + } + + void setLoopDepth(const int64_t &BlockLoopDepth) { + LoopDepth = BlockLoopDepth; + } + + void setLoopNumBlocks(const int64_t &BlockLoopNumBlocks) { + LoopNumBlocks = BlockLoopNumBlocks; + } + + void setLocalExitingBlock(const int64_t &BlockLocalExitingBlock) { + LocalExitingBlock = BlockLocalExitingBlock; + } + + void setLocalLatchBlock(const int64_t &BlockLocalLatchBlock) { + LocalLatchBlock = BlockLocalLatchBlock; + } + + void setLocalLoopHeader(const int64_t &BlockLocalLoopHeader) { + LocalLoopHeader = BlockLocalLoopHeader; + } + + void setDeltaTaken(const int64_t &BlockDeltaTaken) { + DeltaTaken = BlockDeltaTaken; + } + + void setNumLoads(const int64_t &BlockNumLoads) { NumLoads = BlockNumLoads; } + + void setNumCalls(const int64_t &BlockNumCalls) { NumCalls = BlockNumCalls; } + + void setOperandRAType(const int64_t &BlockOperandRAType) { + OperandRAType = BlockOperandRAType; + } + + void setOperandRBType(const int64_t &BlockOperandRBType) { + OperandRBType = BlockOperandRBType; + } + + void setBasicBlockSize(const int64_t &BlockBasicBlockSize) { + BasicBlockSize = BlockBasicBlockSize; + } + + void setNumBasicBlocks(const int64_t &BlockNumBasicBlocks) { + NumBasicBlocks = BlockNumBasicBlocks; + } + + void setHasIndirectCalls(const int64_t &BlockHasIndirectCalls) { + HasIndirectCalls = BlockHasIndirectCalls; + } + + void setEndOpcodeVec(const int32_t &EndOpcode) { + EndOpcode_vec.push_back(EndOpcode); + } + + void setLoopHeaderVec(const int16_t &LoopHeader) { + LoopHeader_vec.push_back(LoopHeader); + } + + void setBackedgeVec(const int16_t &Backedge) { + Backedge_vec.push_back(Backedge); + } + + void setExitVec(const int16_t &Exit) { Exit_vec.push_back(Exit); } + + void setCallVec(const int16_t &Call) { Call_vec.push_back(Call); } + + void setBasicBlockSizeVec(const int64_t &BasicBlockSize) { + BasicBlockSize_vec.push_back(BasicBlockSize); + } + + void setFunExec(const uint64_t &BlockFuncExec) { FuncExec = BlockFuncExec; } + + void setParentChildNum(const int32_t &BlockParentChildNum) { + ParentChildNum = BlockParentChildNum; + } + + void setParentCount(const int32_t &BlockParentCount) { + ParentCount = BlockParentCount; + } + + void setChildParentNum(const int32_t &BlockChildParentNum) { + ChildParentNum = BlockChildParentNum; + } + + void setChildCount(const int32_t &BlockChildCount) { + ChildCount = BlockChildCount; + } + + void setInferenceFeatures() { + + if (Count == -1 || FallthroughCount == -1) { + return; + } + if (ParentChildNum == -1 && ParentCount == -1 && ChildParentNum == -1 && + ChildCount == -1) { + return; + } + + InferenceFeatures.push_back(static_cast(Direction)); + InferenceFeatures.push_back(static_cast(LoopHeader)); + InferenceFeatures.push_back(static_cast(ProcedureType)); + InferenceFeatures.push_back(static_cast(OperandRAType)); + InferenceFeatures.push_back(static_cast(OperandRBType)); + InferenceFeatures.push_back(static_cast(LoopHeader_vec[0])); + InferenceFeatures.push_back(static_cast(Backedge_vec[0])); + InferenceFeatures.push_back(static_cast(Exit_vec[0])); + InferenceFeatures.push_back(static_cast(LoopHeader_vec[1])); + InferenceFeatures.push_back(static_cast(Call_vec[0])); + InferenceFeatures.push_back(static_cast(LocalExitingBlock)); + InferenceFeatures.push_back(static_cast(HasIndirectCalls)); + InferenceFeatures.push_back(static_cast(LocalLatchBlock)); + InferenceFeatures.push_back(static_cast(LocalLoopHeader)); + InferenceFeatures.push_back(static_cast(Opcode)); + InferenceFeatures.push_back(static_cast(CmpOpcode)); + InferenceFeatures.push_back(static_cast(EndOpcode_vec[0])); + InferenceFeatures.push_back(static_cast(EndOpcode_vec[1])); + InferenceFeatures.push_back(static_cast(FuncExec)); + InferenceFeatures.push_back(static_cast(NumBasicBlocks)); + InferenceFeatures.push_back(static_cast(BasicBlockSize)); + InferenceFeatures.push_back(static_cast(BasicBlockSize_vec[0])); + InferenceFeatures.push_back(static_cast(BasicBlockSize_vec[1])); + InferenceFeatures.push_back(static_cast(LoopNumBlocks)); + InferenceFeatures.push_back(static_cast(NumLoads)); + InferenceFeatures.push_back(static_cast(NumCalls)); + InferenceFeatures.push_back(static_cast(TotalLoops)); + InferenceFeatures.push_back(static_cast(DeltaTaken)); + InferenceFeatures.push_back(static_cast(LoopDepth)); + InferenceFeatures.push_back(static_cast(ParentChildNum)); + InferenceFeatures.push_back(static_cast(ParentCount)); + InferenceFeatures.push_back(static_cast(ChildParentNum)); + InferenceFeatures.push_back(static_cast(ChildCount)); + } + + std::vector getInferenceFeatures() { return InferenceFeatures; } +}; +} // namespace bolt +} // namespace llvm + +#endif \ No newline at end of file diff --git a/bolt/include/bolt/Passes/FeatureMiner.h b/bolt/include/bolt/Passes/FeatureMiner.h new file mode 100644 index 000000000000..6170aa62dc1c --- /dev/null +++ b/bolt/include/bolt/Passes/FeatureMiner.h @@ -0,0 +1,176 @@ +//===--- Passes/FeatureMiner.h +//---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// A very simple feature extractor based on Calder's paper +// Evidence-based static branch prediction using machine learning +// https://dl.acm.org/doi/10.1145/239912.239923 +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ +#define LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ + +#include "bolt/Core/BinaryData.h" +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Core/BinaryLoop.h" +#include "bolt/Passes/BinaryPasses.h" +#include "bolt/Passes/DominatorAnalysis.h" +#include "bolt/Passes/StaticBranchInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include + +namespace llvm { +namespace bolt { + +class FeatureMiner : public BinaryFunctionPass { +private: + std::unique_ptr SBI; + /// BasicBlockInfo - This structure holds feature information about the target + /// BasicBlock of either the taken or the fallthrough paths of a given branch. + struct BasicBlockInfo { + std::optional BranchDominates; // 1 - dominates, 0 - does not dominate + std::optional + BranchPostdominates; // 1 - postdominates, 0 - does not PD + std::optional LoopHeader; // 1 - loop header, 0 - not a loop header + std::optional Backedge; // 1 - loop back, 0 - not a loop back + std::optional Exit; // 1 - loop exit, 0 - not a loop exit + std::optional Call; // 1 - program call, 0 - not a program call + std::optional NumCalls; + std::optional NumLoads; + std::optional NumStores; + std::optional EndOpcode; // 0 = NOTHING + std::string EndOpcodeStr = "UNDEF"; + std::optional BasicBlockSize; + std::string FromFunName = "UNDEF"; + uint32_t FromBb; + std::string ToFunName = "UNDEF"; + uint32_t ToBb; + + std::optional NumCallsExit; + std::optional NumCallsInvoke; + std::optional NumIndirectCalls; + std::optional NumTailCalls; + }; + + typedef std::unique_ptr BBIPtr; + + /// BranchFeaturesInfo - This structure holds feature information about each + /// two-way branch from the program. + struct BranchFeaturesInfo { + std::string OpcodeStr = "UNDEF"; + std::string CmpOpcodeStr = "UNDEF"; + bool Simple = 0; + + std::optional Opcode; + std::optional CmpOpcode; + std::optional Count; + std::optional MissPredicted; + std::optional FallthroughCount; + std::optional FallthroughMissPredicted; + BBIPtr TrueSuccessor = std::make_unique(); + BBIPtr FalseSuccessor = std::make_unique(); + std::optional ProcedureType; // 1 - Leaf, 0 - NonLeaf, 2 - CallSelf + std::optional LoopHeader; // 1 — loop header, 0 - not a loop header + std::optional Direction; // 1 - Forward Branch, 0 - Backward Branch + + std::optional NumOuterLoops; + std::optional TotalLoops; + std::optional MaximumLoopDepth; + std::optional LoopDepth; + std::optional LoopNumExitEdges; + std::optional LoopNumExitBlocks; + std::optional LoopNumExitingBlocks; + std::optional LoopNumLatches; + std::optional LoopNumBlocks; + std::optional LoopNumBackEdges; + std::optional NumLoads; + std::optional NumStores; + + std::optional LocalExitingBlock; + std::optional LocalLatchBlock; + std::optional LocalLoopHeader; + std::optional Call; + + std::optional NumCalls; + std::optional NumCallsExit; + std::optional NumCallsInvoke; + std::optional NumIndirectCalls; + std::optional NumTailCalls; + std::optional NumSelfCalls; + + std::optional NumBasicBlocks; + + std::optional DeltaTaken; + + std::optional OperandRAType; + std::optional OperandRBType; + + std::optional BasicBlockSize; + + std::optional BranchOffset; + }; + + typedef std::unique_ptr BFIPtr; + + std::vector BranchesInfoSet; + + /// getProcedureType - Determines which category the function falls into: + /// Leaf, Non-leaf or Calls-self. + int8_t getProcedureType(BinaryFunction &Function, BinaryContext &BC); + + /// addSuccessorInfo - Discovers feature information for the target successor + /// basic block, and inserts it into the static branch info container. + void addSuccessorInfo(BFIPtr const &BFI, BinaryFunction &Function, + BinaryContext &BC, BinaryBasicBlock &BB, bool SuccType); + + /// extractFeatures - Extracts the feature information for each two-way branch + /// from the program. + void extractFeatures(BinaryFunction &Function, BinaryContext &BC); + + void generateInstFeatures(BinaryContext &BC, BinaryBasicBlock &BB, + BFIPtr const &BFI, int Index); + /// dumpSuccessorFeatures - Dumps the feature information about the target + /// BasicBlock of either the taken or the fallthrough paths of a given branch. + void generateSuccessorFeatures(BBIPtr &Successor, + BinaryBasicBlockFeature *BBF); + + /// dumpFeatures - Dumps the feature information about each two-way branch + /// from the program. + void dumpFeatures(raw_ostream &Printer, uint64_t FunctionAddress, + uint64_t FunctionFrequency); + + /// dumpProfileData - Dumps a limited version of the inout profile data + /// that contains only profile for conditional branches, unconditional + /// branches and terminators that aren't branches. + void dumpProfileData(BinaryFunction &Function, raw_ostream &Printer); + +public: + explicit FeatureMiner(const cl::opt &PrintPass) + : BinaryFunctionPass(PrintPass) {} + + std::ofstream trainPrinter; + + const char *getName() const override { return "feature-miner"; } + + void runOnFunctions(BinaryContext &BC) override; + void inferenceFeatures(BinaryFunction &Function); + void generateProfileFeatures(BinaryBasicBlock *BB, + BinaryBasicBlockFeature *BBF); +}; + +} // namespace bolt +} // namespace llvm + +#endif /* LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ */ diff --git a/bolt/include/bolt/Passes/StaticBranchInfo.h b/bolt/include/bolt/Passes/StaticBranchInfo.h new file mode 100644 index 000000000000..8de8df79359a --- /dev/null +++ b/bolt/include/bolt/Passes/StaticBranchInfo.h @@ -0,0 +1,108 @@ +//===------ Passes/StaticBranchInfo.h -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an auxiliary class to the feature miner, static branch probability +// and frequency passes. This class is responsible for finding loop info (loop +// back edges, loop exit edges and loop headers) of a function. It also finds +// basic block info (if a block contains store and call instructions) and if a +// basic block contains a call to the exit. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ +#define LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ + +#include "bolt/Core/BinaryContext.h" +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Core/BinaryLoop.h" +#include "llvm/MC/MCSymbol.h" +#include + +namespace llvm { +namespace bolt { + +class StaticBranchInfo { + +public: + /// An edge indicates that a control flow may go from a basic block (source) + /// to an other one (destination), and this pair of basic blocks will be used + /// to index maps and retrieve content of sets. + typedef std::pair Edge; + +private: + /// Holds the loop headers of a given function. + DenseSet LoopHeaders; + + /// Holds the loop backedges of a given function. + DenseSet BackEdges; + + /// Holds the loop exit edges of a given function. + DenseSet ExitEdges; + + /// Holds the basic blocks of a given function + /// that contains at least one call instructions. + DenseSet CallSet; + + /// Holds the basic blocks of a given function + /// that contains at least one store instructions. + DenseSet StoreSet; + + unsigned NumLoads; + unsigned NumStores; + +public: + unsigned getNumLoads() { return NumLoads; } + + unsigned getNumStores() { return NumStores; } + + /// findLoopEdgesInfo - Finds all loop back edges, loop exit eges + /// and loop headers within the function. + void findLoopEdgesInfo(const BinaryLoopInfo &LoopsInfo); + + /// findBasicBlockInfo - Finds all call and store instructions within + /// the basic blocks of a given function. + void findBasicBlockInfo(const BinaryFunction &Function, BinaryContext &BC); + + /// isBackEdge - Checks if the edge is a loop back edge. + bool isBackEdge(const Edge &CFGEdge) const; + + /// isBackEdge - Checks if the edge is a loop back edge. + bool isBackEdge(const BinaryBasicBlock *SrcBB, + const BinaryBasicBlock *DstBB) const; + + /// isExitEdge - Checks if the edge is a loop exit edge. + bool isExitEdge(const BinaryLoop::Edge &CFGEdge) const; + + /// isExitEdge - Checks if the edge is a loop exit edge. + bool isExitEdge(const BinaryBasicBlock *SrcBB, + const BinaryBasicBlock *DstBB) const; + + /// isLoopHeader - Checks if the basic block is a loop header. + bool isLoopHeader(const BinaryBasicBlock *BB) const; + + /// hasCallInst - Checks if the basic block has a call instruction. + bool hasCallInst(const BinaryBasicBlock *BB) const; + + /// hasStoreInst - Checks if the basic block has a store instruction. + bool hasStoreInst(const BinaryBasicBlock *BB) const; + + /// countBackEdges - Compute the number of BB's successor that are back edges. + unsigned countBackEdges(BinaryBasicBlock *BB) const; + + /// countExitEdges - Compute the number of BB's successor that are exit edges. + unsigned countExitEdges(BinaryBasicBlock *BB) const; + + /// clear - Cleans up all the content from the data structs used. + void clear(); +}; + +} // namespace bolt +} // namespace llvm + +#endif /* LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ */ diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h index 916b4f7e218a..bf732d47c13a 100644 --- a/bolt/include/bolt/Profile/DataReader.h +++ b/bolt/include/bolt/Profile/DataReader.h @@ -22,6 +22,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include @@ -44,6 +45,15 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LBREntry &LBR) { return OS; } +extern "C" { +typedef void *(*CreateONNXRunnerFunc)(const char *); +typedef void (*DeleteONNXRunnerFunc)(void *); +typedef std::vector (*RunONNXModelFunc)(void *, + const std::vector &, + const std::vector &, + const std::vector &, int); +} + struct Location { bool IsSymbol; StringRef Name; @@ -263,7 +273,8 @@ struct FuncSampleData { class DataReader : public ProfileReaderBase { public: explicit DataReader(StringRef Filename) - : ProfileReaderBase(Filename), Diag(errs()) {} + : ProfileReaderBase(Filename), Diag(errs()), onnxRunner(nullptr), + libHandle(nullptr), handleOnnxRuntime(nullptr) {} StringRef getReaderName() const override { return "branch profile reader"; } @@ -282,7 +293,87 @@ public: /// Return all event names used to collect this profile StringSet<> getEventNames() const override { return EventNames; } + ~DataReader() { + // delete onnxrunner; + if (onnxRunner && libHandle && handleOnnxRuntime) { + DeleteONNXRunnerFunc deleteONNXRunner = + (DeleteONNXRunnerFunc)dlsym(libHandle, "deleteONNXRunner"); + deleteONNXRunner(onnxRunner); + dlclose(libHandle); + dlclose(handleOnnxRuntime); + } + } + + /// Initialize the onnxruntime model. + void initializeONNXRunner(const std::string &modelPath) { + if (!onnxRunner && !libHandle && !handleOnnxRuntime) { + handleOnnxRuntime = + dlopen("libonnxruntime.so", RTLD_LAZY | RTLD_GLOBAL); + if (handleOnnxRuntime == nullptr) { + outs() << "error: llvm-bolt failed during loading onnxruntime.so.\n"; + exit(1); + } + libHandle = dlopen("libONNXRunner.so", RTLD_LAZY); + if (libHandle == nullptr) { + outs() << "error: llvm-bolt failed during loading libONNXRunner.so.\n"; + exit(1); + } + CreateONNXRunnerFunc createONNXRunner = + (CreateONNXRunnerFunc)dlsym(libHandle, "createONNXRunner"); + onnxRunner = createONNXRunner(modelPath.c_str()); + } + } + + /// Inference step for predicting the BB counts based on the BB features. + float ONNXInference(const std::vector &input_string, + const std::vector &input_int64, + const std::vector &input_float, int batch_size = 1) { + if (onnxRunner && libHandle) { + RunONNXModelFunc runONNXModel = + (RunONNXModelFunc)dlsym(libHandle, "runONNXModel"); + std::vector model_preds = runONNXModel( + onnxRunner, input_string, input_int64, input_float, batch_size); + if (model_preds.size() <= 0) { + outs() << "error: llvm-bolt model prediction result cannot be empty.\n"; + exit(1); + } + float pred = model_preds[0]; + return pred; + } + return -1.0; + } + + /// Return the annotating threshold for the model prediction. + void setThreshold(float annotate_threshold) { + threshold = annotate_threshold; + } + protected: + /// The onnxruntime model pointer read from the input model path. + void *onnxRunner; + + /// The library handle of the ai4compiler framwork. + void *libHandle; + + /// The library handle of the onnxruntime. + void *handleOnnxRuntime; + + /// The annotating threshold for the model prediction. + float threshold; + + /// Return the annotating threshold for the model prediction. + float getThreshold() const { return threshold; } + + /// The counting value of the total modified BB-count number. + uint64_t modified_BB_total = 0; + + /// Add the total modified BB-count number by the BB modifiied number within + /// the funciton. + void addModifiedBBTotal(uint64_t &value) { modified_BB_total += value; } + + /// Return the counting value of the total modified BB-count number. + uint64_t getModifiedBBTotal() const { return modified_BB_total; } + /// Read profile information available for the function. void readProfile(BinaryFunction &BF); diff --git a/bolt/lib/Core/BinaryBasicBlockFeature.cpp b/bolt/lib/Core/BinaryBasicBlockFeature.cpp new file mode 100644 index 000000000000..e1a2a3dd8112 --- /dev/null +++ b/bolt/lib/Core/BinaryBasicBlockFeature.cpp @@ -0,0 +1,21 @@ +//===- bolt/Core/BinaryBasicBlockFeature.cpp - Low-level basic block +//-------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the BinaryBasicBlock class. +// +//===----------------------------------------------------------------------===// + +#include "bolt/Core/BinaryBasicBlock.h" +#include "bolt/Core/BinaryBasicBlockFeature.h" + +#define DEBUG_TYPE "bolt" + +namespace llvm { +namespace bolt {} // namespace bolt +} // namespace llvm \ No newline at end of file diff --git a/bolt/lib/Core/CMakeLists.txt b/bolt/lib/Core/CMakeLists.txt index f45ce15afaac..8dfc7b320616 100644 --- a/bolt/lib/Core/CMakeLists.txt +++ b/bolt/lib/Core/CMakeLists.txt @@ -13,6 +13,7 @@ set(LLVM_LINK_COMPONENTS add_llvm_library(LLVMBOLTCore AddressMap.cpp BinaryBasicBlock.cpp + BinaryBasicBlockFeature.cpp BinaryContext.cpp BinaryData.cpp BinaryEmitter.cpp diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt index 80fce073d0f7..8af424c0a1cb 100644 --- a/bolt/lib/Passes/CMakeLists.txt +++ b/bolt/lib/Passes/CMakeLists.txt @@ -13,6 +13,7 @@ add_llvm_library(LLVMBOLTPasses DataflowInfoManager.cpp FrameAnalysis.cpp FrameOptimizer.cpp + FeatureMiner.cpp FixRelaxationPass.cpp FixRISCVCallsPass.cpp HFSort.cpp @@ -41,6 +42,7 @@ add_llvm_library(LLVMBOLTPasses StackAvailableExpressions.cpp StackPointerTracking.cpp StackReachingUses.cpp + StaticBranchInfo.cpp StokeInfo.cpp TailDuplication.cpp ThreeWayBranch.cpp diff --git a/bolt/lib/Passes/FeatureMiner.cpp b/bolt/lib/Passes/FeatureMiner.cpp new file mode 100644 index 000000000000..2559019877d4 --- /dev/null +++ b/bolt/lib/Passes/FeatureMiner.cpp @@ -0,0 +1,572 @@ +//===--- Passes/FeatureMiner.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// A very simple feature extractor based on Calder's paper +// Evidence-based static branch prediction using machine learning +// https://dl.acm.org/doi/10.1145/239912.239923 +//===----------------------------------------------------------------------===// + +#include "bolt/Passes/DataflowInfoManager.h" +#include "bolt/Passes/FeatureMiner.h" +#include "bolt/Passes/StaticBranchInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" + +#undef DEBUG_TYPE +#define DEBUG_TYPE "bolt-feature-miner" + +using namespace llvm; +using namespace bolt; + +namespace opts { +extern cl::opt BlockCorrection; + +} // namespace opts + +namespace llvm { +namespace bolt { + +class BinaryFunction; + +int8_t FeatureMiner::getProcedureType(BinaryFunction &Function, + BinaryContext &BC) { + int8_t ProcedureType = 1; + for (auto &BB : Function) { + for (auto &Inst : BB) { + if (BC.MIB->isCall(Inst)) { + ProcedureType = 0; // non-leaf type + if (const auto *CalleeSymbol = BC.MIB->getTargetSymbol(Inst)) { + const auto *Callee = BC.getFunctionForSymbol(CalleeSymbol); + if (Callee && + Callee->getFunctionNumber() == Function.getFunctionNumber()) { + return 2; // call self type + } + } + } + } + } + return ProcedureType; // leaf type +} + +void FeatureMiner::addSuccessorInfo(BFIPtr const &BFI, BinaryFunction &Function, + BinaryContext &BC, BinaryBasicBlock &BB, + bool SuccType) { + + BinaryBasicBlock *Successor = BB.getConditionalSuccessor(SuccType); + + if (!Successor) + return; + + unsigned NumCalls{0}; + + for (auto &Inst : BB) { + if (BC.MIB->isCall(Inst)) { + ++NumCalls; + } + } + + BBIPtr SuccBBInfo = std::make_unique(); + + // Check if the successor basic block is a loop header and store it. + SuccBBInfo->LoopHeader = SBI->isLoopHeader(Successor); + + SuccBBInfo->BasicBlockSize = Successor->size(); + + // Check if the edge getting to the successor basic block is a loop + // exit edge and store it. + SuccBBInfo->Exit = SBI->isExitEdge(&BB, Successor); + + // Check if the edge getting to the successor basic block is a loop + // back edge and store it. + SuccBBInfo->Backedge = SBI->isBackEdge(&BB, Successor); + + MCInst *SuccInst = Successor->getTerminatorBefore(nullptr); + + // Store information about the branch type ending sucessor basic block + SuccBBInfo->EndOpcode = (SuccInst && BC.MIA->isBranch(*SuccInst)) + ? SuccInst->getOpcode() + : 0; // 0 = NOTHING + + // Check if the successor basic block contains + // a procedure call and store it. + SuccBBInfo->Call = (NumCalls > 0) ? 1 // Contains a call instruction + : 0; // Does not contain a call instruction + + uint32_t Offset = BB.getEndOffset(); + + if (SuccType) { + BFI->TrueSuccessor = std::move(SuccBBInfo); + // Check if the taken branch is a forward + // or a backwards branch and store it + BFI->Direction = (Function.isForwardBranch(&BB, Successor) == true) + ? 1 // Forward branch + : 0; // Backwards branch + + auto OnlyBranchInfo = BB.getOnlyBranchInfo(); + BFI->Count = OnlyBranchInfo.Count; + + if (Offset) { + uint32_t TargetOffset = Successor->getInputOffset(); + uint32_t BranchOffset = Offset; + if (BranchOffset != UINT32_MAX && TargetOffset != UINT32_MAX) { + int64_t Delta = static_cast(TargetOffset) - + static_cast(BranchOffset); + BFI->DeltaTaken = std::abs(Delta); + } + } + } else { + if (BB.succ_size() == 2) { + auto FallthroughBranchInfo = BB.getFallthroughBranchInfo(); + BFI->FallthroughCount = FallthroughBranchInfo.Count; + } else { + auto OnlyBranchInfo = BB.getOnlyBranchInfo(); + BFI->FallthroughCount = OnlyBranchInfo.Count; + } + BFI->FalseSuccessor = std::move(SuccBBInfo); + } +} + +void FeatureMiner::extractFeatures(BinaryFunction &Function, + BinaryContext &BC) { + int8_t ProcedureType = getProcedureType(Function, BC); + auto Info = DataflowInfoManager(Function, nullptr, nullptr); + const BinaryLoopInfo &LoopsInfo = Function.getLoopInfo(); + + bool Simple = Function.isSimple(); + const auto &Order = Function.dfs(); + std::string Function_name = Function.getPrintName(); + + for (auto *BBA : Order) { + + auto &BB = *BBA; + + BinaryBasicBlockFeature BBF = BB.getFeatures(); + + unsigned TotalLoops{0}; + unsigned LoopDepth{0}; + unsigned LoopNumBlocks{0}; + + bool LocalExitingBlock{false}; + bool LocalLatchBlock{false}; + bool LocalLoopHeader{false}; + + generateProfileFeatures(&BB, &BBF); + + BinaryLoop *Loop = LoopsInfo.getLoopFor(&BB); + if (Loop) { + SmallVector ExitingBlocks; + Loop->getExitingBlocks(ExitingBlocks); + + SmallVector ExitBlocks; + Loop->getExitBlocks(ExitBlocks); + + SmallVector ExitEdges; + Loop->getExitEdges(ExitEdges); + + SmallVector Latches; + Loop->getLoopLatches(Latches); + + TotalLoops = LoopsInfo.TotalLoops; + LoopDepth = Loop->getLoopDepth(); + LoopNumBlocks = Loop->getNumBlocks(); + LocalExitingBlock = Loop->isLoopExiting(&BB); + LocalLatchBlock = Loop->isLoopLatch(&BB); + LocalLoopHeader = ((Loop->getHeader() == (&BB)) ? 1 : 0); + } + + unsigned NumLoads{0}; + unsigned NumCalls{0}; + unsigned NumIndirectCalls{0}; + + for (auto &Inst : BB) { + if (BC.MIB->mayLoad(Inst)) { + ++NumLoads; + } else if (BC.MIB->isCall(Inst)) { + ++NumCalls; + if (BC.MIB->isIndirectCall(Inst)) + ++NumIndirectCalls; + } + } + + int Index = -2; + bool LoopHeader = SBI->isLoopHeader(&BB); + + BFIPtr BFI = std::make_unique(); + + BFI->TotalLoops = TotalLoops; + BFI->LoopDepth = LoopDepth; + BFI->LoopNumBlocks = LoopNumBlocks; + BFI->LocalExitingBlock = LocalExitingBlock; + BFI->LocalLatchBlock = LocalLatchBlock; + BFI->LocalLoopHeader = LocalLoopHeader; + BFI->NumCalls = NumCalls; + BFI->BasicBlockSize = BB.size(); + BFI->NumBasicBlocks = Function.size(); + + BFI->NumLoads = NumLoads; + BFI->NumIndirectCalls = NumIndirectCalls; + BFI->LoopHeader = LoopHeader; + BFI->ProcedureType = ProcedureType; + + // Adding taken successor info. + addSuccessorInfo(BFI, Function, BC, BB, true); + // Adding fall through successor info. + addSuccessorInfo(BFI, Function, BC, BB, false); + + MCInst ConditionalInst; + bool hasConditionalBranch = false; + MCInst UnconditionalInst; + bool hasUnconditionalBranch = false; + + for (auto &Inst : BB) { + ++Index; + if (!BC.MIA->isConditionalBranch(Inst) && + !BC.MIA->isUnconditionalBranch(Inst)) + continue; + + generateInstFeatures(BC, BB, BFI, Index); + + if (BC.MIA->isConditionalBranch(Inst)) { + ConditionalInst = Inst; + hasConditionalBranch = true; + } + + if (BC.MIA->isUnconditionalBranch(Inst)) { + UnconditionalInst = Inst; + hasUnconditionalBranch = true; + } + } + + if (hasConditionalBranch) { + BFI->Opcode = ConditionalInst.getOpcode(); + + } else { + if (hasUnconditionalBranch) { + BFI->Opcode = UnconditionalInst.getOpcode(); + + } else { + auto Inst = BB.getLastNonPseudoInstr(); + BFI->Opcode = Inst->getOpcode(); + generateInstFeatures(BC, BB, BFI, Index); + } + } + + auto &FalseSuccessor = BFI->FalseSuccessor; + auto &TrueSuccessor = BFI->TrueSuccessor; + + int16_t ProcedureType = (BFI->ProcedureType.has_value()) + ? static_cast(*(BFI->ProcedureType)) + : -1; + + int64_t Count = + (BFI->Count.has_value()) ? static_cast(*(BFI->Count)) : -1; + + int64_t FallthroughCount = + (BFI->FallthroughCount.has_value()) + ? static_cast(*(BFI->FallthroughCount)) + : -1; + + int16_t LoopHeaderValid = (BFI->LoopHeader.has_value()) + ? static_cast(*(BFI->LoopHeader)) + : -1; + + int64_t TotalLoopsValid = (BFI->TotalLoops.has_value()) + ? static_cast(*(BFI->TotalLoops)) + : -1; + int64_t LoopDepthValid = (BFI->LoopDepth.has_value()) + ? static_cast(*(BFI->LoopDepth)) + : -1; + int64_t LoopNumBlocksValid = + (BFI->LoopNumBlocks.has_value()) + ? static_cast(*(BFI->LoopNumBlocks)) + : -1; + int64_t LocalExitingBlockValid = + (BFI->LocalExitingBlock.has_value()) + ? static_cast(*(BFI->LocalExitingBlock)) + : -1; + + int64_t LocalLatchBlockValid = + (BFI->LocalLatchBlock.has_value()) + ? static_cast(*(BFI->LocalLatchBlock)) + : -1; + + int64_t LocalLoopHeaderValid = + (BFI->LocalLoopHeader.has_value()) + ? static_cast(*(BFI->LocalLoopHeader)) + : -1; + + int32_t CmpOpcode = (BFI->CmpOpcode.has_value()) + ? static_cast(*(BFI->CmpOpcode)) + : -1; + + int64_t OperandRAType = (BFI->OperandRAType.has_value()) + ? static_cast(*(BFI->OperandRAType)) + : 10; + + int64_t OperandRBType = (BFI->OperandRBType.has_value()) + ? static_cast(*(BFI->OperandRBType)) + : 10; + int16_t Direction = (BFI->Direction.has_value()) + ? static_cast(*(BFI->Direction)) + : -1; + + int64_t DeltaTaken = (BFI->DeltaTaken.has_value()) + ? static_cast(*(BFI->DeltaTaken)) + : -1; + + int64_t NumLoadsValid = (BFI->NumLoads.has_value()) + ? static_cast(*(BFI->NumLoads)) + : -1; + + int64_t BasicBlockSize = (BFI->BasicBlockSize.has_value()) + ? static_cast(*(BFI->BasicBlockSize)) + : -1; + + int64_t NumBasicBlocks = (BFI->NumBasicBlocks.has_value()) + ? static_cast(*(BFI->NumBasicBlocks)) + : -1; + + int64_t NumCallsValid = (BFI->NumCalls.has_value()) + ? static_cast(*(BFI->NumCalls)) + : -1; + + int64_t NumIndirectCallsValid = + (BFI->NumIndirectCalls.has_value()) + ? static_cast(*(BFI->NumIndirectCalls)) + : -1; + + int64_t HasIndirectCalls = (NumIndirectCallsValid > 0) ? 1 : 0; + + int32_t Opcode = + (BFI->Opcode.has_value()) ? static_cast(*(BFI->Opcode)) : -1; + + uint64_t fun_exec = Function.getExecutionCount(); + fun_exec = (fun_exec != UINT64_MAX) ? fun_exec : 0; + + BBF.setDirection(Direction); + BBF.setDeltaTaken(DeltaTaken); + BBF.setOpcode(Opcode); + BBF.setCmpOpcode(CmpOpcode); + BBF.setOperandRAType(OperandRAType); + BBF.setOperandRBType(OperandRBType); + BBF.setFunExec(fun_exec); + BBF.setTotalLoops(TotalLoopsValid); + BBF.setLoopDepth(LoopDepthValid); + BBF.setLoopNumBlocks(LoopNumBlocksValid); + BBF.setLocalExitingBlock(LocalExitingBlockValid); + BBF.setLocalLatchBlock(LocalLatchBlockValid); + BBF.setLocalLoopHeader(LocalLoopHeaderValid); + BBF.setNumCalls(NumCallsValid); + BBF.setBasicBlockSize(BasicBlockSize); + BBF.setNumBasicBlocks(NumBasicBlocks); + BBF.setNumLoads(NumLoadsValid); + BBF.setHasIndirectCalls(HasIndirectCalls); + BBF.setLoopHeader(LoopHeaderValid); + BBF.setProcedureType(ProcedureType); + BBF.setCount(Count); + BBF.setFallthroughCount(FallthroughCount); + + generateSuccessorFeatures(TrueSuccessor, &BBF); + generateSuccessorFeatures(FalseSuccessor, &BBF); + + FalseSuccessor.reset(); + TrueSuccessor.reset(); + + BBF.setInferenceFeatures(); + BB.setFeatures(BBF); + + BFI.reset(); + } +} + +void FeatureMiner::generateInstFeatures(BinaryContext &BC, BinaryBasicBlock &BB, + BFIPtr const &BFI, int Index) { + + // Holds the branch opcode info. + + BFI->CmpOpcode = 0; + if (Index > -1) { + auto Cmp = BB.begin() + Index; + if (BC.MII->get((*Cmp).getOpcode()).isCompare()) { + // Holding the branch comparison opcode info. + BFI->CmpOpcode = (*Cmp).getOpcode(); + auto getOperandType = [&](const MCOperand &Operand) -> int32_t { + if (Operand.isReg()) + return 0; + else if (Operand.isImm()) + return 1; + else if (Operand.isSFPImm()) + return 2; + else if (Operand.isExpr()) + return 3; + else + return -1; + }; + + const auto InstInfo = BC.MII->get((*Cmp).getOpcode()); + unsigned NumDefs = InstInfo.getNumDefs(); + int32_t NumPrimeOperands = MCPlus::getNumPrimeOperands(*Cmp) - NumDefs; + switch (NumPrimeOperands) { + case 6: { + int32_t RBType = getOperandType((*Cmp).getOperand(NumDefs)); + int32_t RAType = getOperandType((*Cmp).getOperand(NumDefs + 1)); + + if (RBType == 0 && RAType == 0) { + BFI->OperandRBType = RBType; + BFI->OperandRAType = RAType; + } else if (RBType == 0 && (RAType == 1 || RAType == 2)) { + RAType = getOperandType((*Cmp).getOperand(NumPrimeOperands - 1)); + + if (RAType != 1 && RAType != 2) { + RAType = -1; + } + + BFI->OperandRBType = RBType; + BFI->OperandRAType = RAType; + } else { + BFI->OperandRAType = -1; + BFI->OperandRBType = -1; + } + break; + } + case 2: + BFI->OperandRBType = getOperandType((*Cmp).getOperand(NumDefs)); + BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs + 1)); + break; + case 3: + BFI->OperandRBType = getOperandType((*Cmp).getOperand(NumDefs)); + BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs + 2)); + break; + case 1: + BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs)); + break; + default: + BFI->OperandRAType = -1; + BFI->OperandRBType = -1; + break; + } + + } else { + Index -= 1; + for (int Idx = Index; Idx > -1; Idx--) { + auto Cmp = BB.begin() + Idx; + if (BC.MII->get((*Cmp).getOpcode()).isCompare()) { + // Holding the branch comparison opcode info. + BFI->CmpOpcode = (*Cmp).getOpcode(); + break; + } + } + } + } +} + +void FeatureMiner::generateSuccessorFeatures(BBIPtr &Successor, + BinaryBasicBlockFeature *BBF) { + + int16_t LoopHeader = (Successor->LoopHeader.has_value()) + ? static_cast(*(Successor->LoopHeader)) + : -1; + + int16_t Backedge = (Successor->Backedge.has_value()) + ? static_cast(*(Successor->Backedge)) + : -1; + + int16_t Exit = (Successor->Exit.has_value()) + ? static_cast(*(Successor->Exit)) + : -1; + + int16_t Call = (Successor->Call.has_value()) + ? static_cast(*(Successor->Call)) + : -1; + + int32_t EndOpcode = (Successor->EndOpcode.has_value()) + ? static_cast(*(Successor->EndOpcode)) + : -1; + + int64_t BasicBlockSize = + (Successor->BasicBlockSize.has_value()) + ? static_cast(*(Successor->BasicBlockSize)) + : -1; + + BBF->setEndOpcodeVec(EndOpcode); + BBF->setLoopHeaderVec(LoopHeader); + BBF->setBackedgeVec(Backedge); + BBF->setExitVec(Exit); + BBF->setCallVec(Call); + BBF->setBasicBlockSizeVec(BasicBlockSize); +} + +void FeatureMiner::runOnFunctions(BinaryContext &BC) {} + +void FeatureMiner::inferenceFeatures(BinaryFunction &Function) { + + SBI = std::make_unique(); + + if (Function.empty()) + return; + + if (!Function.isLoopFree()) { + const BinaryLoopInfo &LoopsInfo = Function.getLoopInfo(); + SBI->findLoopEdgesInfo(LoopsInfo); + } + + BinaryContext &BC = Function.getBinaryContext(); + extractFeatures(Function, BC); + + SBI->clear(); +} + +void FeatureMiner::generateProfileFeatures(BinaryBasicBlock *BB, + BinaryBasicBlockFeature *BBF) { + int32_t parentChildNum, parentCount, childParentNum, childCount; + + if (BB->getParentSet().size() == 0) { + parentChildNum = -1; + parentCount = -1; + } else { + parentChildNum = std::numeric_limits::max(); + parentCount = 0; + for (BinaryBasicBlock *parent : BB->getParentSet()) { + if (parent->getChildrenSet().size() < parentChildNum) { + parentChildNum = parent->getChildrenSet().size(); + parentCount = parent->getExecutionCount(); + } else if (parent->getChildrenSet().size() == parentChildNum && + parent->getExecutionCount() > parentCount) { + parentCount = parent->getExecutionCount(); + } + } + } + + if (BB->getChildrenSet().size() == 0) { + childParentNum = -1; + childCount = -1; + } else { + childParentNum = std::numeric_limits::max(); + childCount = 0; + for (BinaryBasicBlock *child : BB->getChildrenSet()) { + if (child->getParentSet().size() < childParentNum) { + childParentNum = child->getParentSet().size(); + childCount = child->getExecutionCount(); + } else if (child->getParentSet().size() == childParentNum && + child->getExecutionCount() > childCount) { + childCount = child->getExecutionCount(); + } + } + } + + int64_t parentCountCatch = parentCount > 0 ? 1 : 0; + int64_t childCountCatch = childCount > 0 ? 1 : 0; + + BBF->setParentChildNum(parentChildNum); + BBF->setParentCount(parentCountCatch); + BBF->setChildParentNum(childParentNum); + BBF->setChildCount(childCountCatch); +} + +} // namespace bolt +} // namespace llvm \ No newline at end of file diff --git a/bolt/lib/Passes/StaticBranchInfo.cpp b/bolt/lib/Passes/StaticBranchInfo.cpp new file mode 100644 index 000000000000..c9cac59bb4a4 --- /dev/null +++ b/bolt/lib/Passes/StaticBranchInfo.cpp @@ -0,0 +1,143 @@ +//===------ Passes/StaticBranchInfo.cpp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an auxiliary class to the feature miner, static branch probability +// and frequency passes. This class is responsible for finding loop info (loop +// back edges, loop exit edges and loop headers) of a function. It also finds +// basic block info (if a block contains store and call instructions) and if a +// basic block contains a call to the exit. +// +//===----------------------------------------------------------------------===// + +#include "bolt/Core/BinaryBasicBlock.h" +#include "bolt/Core/BinaryLoop.h" +#include "bolt/Passes/StaticBranchInfo.h" + +namespace llvm { +namespace bolt { + +void StaticBranchInfo::findLoopEdgesInfo(const BinaryLoopInfo &LoopsInfo) { + // Traverse discovered loops + std::stack Loops; + for (BinaryLoop *BL : LoopsInfo) + Loops.push(BL); + + while (!Loops.empty()) { + BinaryLoop *Loop = Loops.top(); + Loops.pop(); + BinaryBasicBlock *LoopHeader = Loop->getHeader(); + LoopHeaders.insert(LoopHeader); + + // Add nested loops in the stack. + for (BinaryLoop::iterator I = Loop->begin(), E = Loop->end(); I != E; ++I) { + Loops.push(*I); + } + + SmallVector Latches; + Loop->getLoopLatches(Latches); + + // Find back edges. + for (BinaryBasicBlock *Latch : Latches) { + for (BinaryBasicBlock *Succ : Latch->successors()) { + if (Succ == LoopHeader) { + Edge CFGEdge = std::make_pair(Latch->getLabel(), Succ->getLabel()); + BackEdges.insert(CFGEdge); + } + } + } + + // Find exit edges. + SmallVector AuxExitEdges; + Loop->getExitEdges(AuxExitEdges); + for (BinaryLoop::Edge &Exit : AuxExitEdges) { + ExitEdges.insert(Exit); + } + } +} + +void StaticBranchInfo::findBasicBlockInfo(const BinaryFunction &Function, + BinaryContext &BC) { + for (auto &BB : Function) { + for (auto &Inst : BB) { + if (BC.MIB->isCall(Inst)) + CallSet.insert(&BB); + else if (BC.MIB->mayStore(Inst)) + StoreSet.insert(&BB); + } + } +} + +bool StaticBranchInfo::isBackEdge(const Edge &CFGEdge) const { + return BackEdges.count(CFGEdge); +} + +bool StaticBranchInfo::isBackEdge(const BinaryBasicBlock *SrcBB, + const BinaryBasicBlock *DstBB) const { + const Edge CFGEdge = std::make_pair(SrcBB->getLabel(), DstBB->getLabel()); + return isBackEdge(CFGEdge); +} + +bool StaticBranchInfo::isExitEdge(const BinaryLoop::Edge &CFGEdge) const { + return ExitEdges.count(CFGEdge); +} + +bool StaticBranchInfo::isExitEdge(const BinaryBasicBlock *SrcBB, + const BinaryBasicBlock *DstBB) const { + const BinaryLoop::Edge CFGEdge = + std::make_pair(const_cast(SrcBB), + const_cast(DstBB)); + return isExitEdge(CFGEdge); +} + +bool StaticBranchInfo::isLoopHeader(const BinaryBasicBlock *BB) const { + return LoopHeaders.count(BB); +} + +bool StaticBranchInfo::hasCallInst(const BinaryBasicBlock *BB) const { + return CallSet.count(BB); +} + +bool StaticBranchInfo::hasStoreInst(const BinaryBasicBlock *BB) const { + return StoreSet.count(BB); +} + +unsigned StaticBranchInfo::countBackEdges(BinaryBasicBlock *BB) const { + unsigned CountEdges = 0; + + for (BinaryBasicBlock *SuccBB : BB->successors()) { + const Edge CFGEdge = std::make_pair(BB->getLabel(), SuccBB->getLabel()); + if (BackEdges.count(CFGEdge)) + ++CountEdges; + } + + return CountEdges; +} + +unsigned StaticBranchInfo::countExitEdges(BinaryBasicBlock *BB) const { + unsigned CountEdges = 0; + + for (BinaryBasicBlock *SuccBB : BB->successors()) { + const BinaryLoop::Edge CFGEdge = std::make_pair(BB, SuccBB); + if (ExitEdges.count(CFGEdge)) + ++CountEdges; + } + + return CountEdges; +} + +void StaticBranchInfo::clear() { + LoopHeaders.clear(); + BackEdges.clear(); + ExitEdges.clear(); + CallSet.clear(); + StoreSet.clear(); +} + +} // namespace bolt +} // namespace llvm diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp index dcc7578041fa..b5f0558e884b 100644 --- a/bolt/lib/Profile/DataReader.cpp +++ b/bolt/lib/Profile/DataReader.cpp @@ -12,13 +12,16 @@ //===----------------------------------------------------------------------===// #include "bolt/Profile/DataReader.h" +#include "bolt/Passes/FeatureMiner.h" #include "bolt/Core/BinaryFunction.h" #include "bolt/Passes/MCF.h" #include "bolt/Utils/Utils.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Errc.h" +#include #include +#include #undef DEBUG_TYPE #define DEBUG_TYPE "bolt-prof" @@ -26,15 +29,23 @@ using namespace llvm; namespace opts { - +extern cl::opt BlockCorrection; extern cl::OptionCategory BoltCategory; extern llvm::cl::opt Verbosity; -static cl::opt -DumpData("dump-data", - cl::desc("dump parsed bolt data for debugging"), - cl::Hidden, - cl::cat(BoltCategory)); +static cl::opt InputModelFilename("model-path", + cl::desc(""), + cl::Optional, + cl::cat(BoltCategory)); + +static cl::opt AnnotateThreshold( + "annotate-threshold", + cl::desc(""), + cl::init(0.85f), cl::Optional, cl::cat(BoltCategory)); + +static cl::opt DumpData("dump-data", + cl::desc("dump parsed bolt data for debugging"), + cl::Hidden, cl::cat(BoltCategory)); } // namespace opts @@ -311,6 +322,17 @@ Error DataReader::readProfilePreCFG(BinaryContext &BC) { } Error DataReader::readProfile(BinaryContext &BC) { + + if (opts::BlockCorrection) { + if (opts::InputModelFilename.empty()) { + outs() << "error: llvm-bolt expected -model-path= option.\n"; + exit(1); + } else { + DataReader::initializeONNXRunner(opts::InputModelFilename); + DataReader::setThreshold(opts::AnnotateThreshold); + } + } + for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &Function = BFI.second; readProfile(Function); @@ -324,6 +346,12 @@ Error DataReader::readProfile(BinaryContext &BC) { } BC.setNumUnusedProfiledObjects(NumUnused); + if (opts::BlockCorrection) { + uint64_t modified_total = DataReader::getModifiedBBTotal(); + outs() << "BOLT-INFO: total modified CFG BB count number is " + << modified_total << ".\n"; + } + return Error::success(); } @@ -555,6 +583,75 @@ float DataReader::evaluateProfileData(BinaryFunction &BF, return MatchRatio; } +void generateChildrenParentCount(BinaryBasicBlock *BB) { + typedef GraphTraits GraphT; + + for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB), + E = GraphT::child_end(BB); + CI != E; ++CI) { + typename GraphT::NodeRef Child = *CI; + BB->insertChildrenSet(Child); + Child->insertParentSet(BB); + } +} + +void generateChildrenParentCount(BinaryFunction &BF) { + for (BinaryBasicBlock &BB : BF) { + generateChildrenParentCount(&BB); + } +} + +uint64_t estimateBBCount(DataReader *dataReaderRef, BinaryBasicBlock *BB, + float threshold) { + uint64_t modified = 0; + if (BB->getExecutionCount() != 0) { + return modified; + } + + std::vector input_string; + std::vector input_int64; + std::vector input_float; + + BinaryBasicBlockFeature BBF = BB->getFeatures(); + input_int64 = BBF.getInferenceFeatures(); + + if (input_int64.empty()) { + return 0; + } + + float model_pred = + dataReaderRef->ONNXInference(input_string, input_int64, input_float); + if (model_pred >= threshold) { + uint64_t min_neighbor_count = std::numeric_limits::max(); + for (BinaryBasicBlock *parent : BB->getParentSet()) { + if (parent->getExecutionCount() > 0 && + parent->getExecutionCount() < min_neighbor_count) + min_neighbor_count = parent->getExecutionCount(); + } + for (BinaryBasicBlock *child : BB->getChildrenSet()) { + if (child->getExecutionCount() > 0 && + child->getExecutionCount() < min_neighbor_count) + min_neighbor_count = child->getExecutionCount(); + } + if (min_neighbor_count != std::numeric_limits::max()) { + BB->setExecutionCount(min_neighbor_count); + modified = 1; + } + } + return modified; +} + +uint64_t estimateBBCount(DataReader *dataReaderRef, BinaryFunction &BF, + float threshold) { + uint64_t modified_total_func = 0; + const auto &Order = BF.dfs(); + for (auto *BBA : Order) { + auto &BB = *BBA; + modified_total_func += estimateBBCount(dataReaderRef, &BB, threshold); + } + return modified_total_func; +} + void DataReader::readSampleData(BinaryFunction &BF) { FuncSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames()); if (!SampleDataOrErr) @@ -600,6 +697,17 @@ void DataReader::readSampleData(BinaryFunction &BF) { BF.ExecutionCount = TotalEntryCount; + if (opts::BlockCorrection) { + generateChildrenParentCount(BF); + std::unique_ptr FM = + std::make_unique(opts::BlockCorrection); + FM->inferenceFeatures(BF); + + float threshold = DataReader::getThreshold(); + uint64_t modified_total_func = estimateBBCount(this, BF, threshold); + DataReader::addModifiedBBTotal(modified_total_func); + } + estimateEdgeCounts(BF); } diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 7063b243b52d..305fb889d75f 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -107,6 +107,12 @@ cl::opt DumpDotAll( "enable '-print-loops' for color-coded blocks"), cl::Hidden, cl::cat(BoltCategory)); +cl::opt BlockCorrection( + "block-correction", + cl::desc("capture features useful for ML model to inference the count on the binary basic block" + " and correct them on CFG."), + cl::ZeroOrMore, cl::cat(BoltOptCategory)); + static cl::list ForceFunctionNames("funcs", cl::CommaSeparated, -- Gitee From 8c805f5ef7ea69fd74817329c937cc0ba2ef6f4b Mon Sep 17 00:00:00 2001 From: sinan Date: Wed, 7 Aug 2024 18:02:42 +0800 Subject: [PATCH 05/11] [BOLT] Skip PLT search for zero-value weak reference symbols (#69136) Take a common weak reference pattern for example ``` __attribute__((weak)) void undef_weak_fun(); if (&undef_weak_fun) undef_weak_fun(); ``` In this case, an undefined weak symbol `undef_weak_fun` has an address of zero, and Bolt incorrectly changes the relocation for the corresponding symbol to symbol@PLT, leading to incorrect runtime behavior. --- bolt/lib/Rewrite/RewriteInstance.cpp | 11 +++++- .../AArch64/update-weak-reference-symbol.s | 34 +++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 bolt/test/AArch64/update-weak-reference-symbol.s diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 305fb889d75f..e8d302e3f44d 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -2065,6 +2065,14 @@ bool RewriteInstance::analyzeRelocation( if (!Relocation::isSupported(RType)) return false; + auto IsWeakReference = [](const SymbolRef &Symbol) { + Expected SymFlagsOrErr = Symbol.getFlags(); + if (!SymFlagsOrErr) + return false; + return (*SymFlagsOrErr & SymbolRef::SF_Undefined) && + (*SymFlagsOrErr & SymbolRef::SF_Weak); + }; + const bool IsAArch64 = BC->isAArch64(); const size_t RelSize = Relocation::getSizeForType(RType); @@ -2096,7 +2104,8 @@ bool RewriteInstance::analyzeRelocation( // Section symbols are marked as ST_Debug. IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug); // Check for PLT entry registered with symbol name - if (!SymbolAddress && (IsAArch64 || BC->isRISCV())) { + if (!SymbolAddress && !IsWeakReference(Symbol) && + (IsAArch64 || BC->isRISCV())) { const BinaryData *BD = BC->getPLTBinaryDataByName(SymbolName); SymbolAddress = BD ? BD->getAddress() : 0; } diff --git a/bolt/test/AArch64/update-weak-reference-symbol.s b/bolt/test/AArch64/update-weak-reference-symbol.s new file mode 100644 index 000000000000..600a06b8b6d8 --- /dev/null +++ b/bolt/test/AArch64/update-weak-reference-symbol.s @@ -0,0 +1,34 @@ +// This test checks whether BOLT can correctly handle relocations against weak symbols. + +// RUN: %clang %cflags -Wl,-z,notext -shared -Wl,-q %s -o %t.so +// RUN: llvm-bolt %t.so -o %t.so.bolt +// RUN: llvm-nm -n %t.so.bolt > %t.out.txt +// RUN: llvm-objdump -dj .rodata %t.so.bolt >> %t.out.txt +// RUN: FileCheck %s --input-file=%t.out.txt + +# CHECK: w func_1 +# CHECK: {{0+}}[[#%x,ADDR:]] W func_2 + +# CHECK: {{.*}} <.rodata>: +# CHECK-NEXT: {{.*}} .word 0x00000000 +# CHECK-NEXT: {{.*}} .word 0x00000000 +# CHECK-NEXT: {{.*}} .word 0x{{[0]+}}[[#ADDR]] +# CHECK-NEXT: {{.*}} .word 0x00000000 + + .text + .weak func_2 + .weak func_1 + .global wow + .type wow, %function +wow: + bl func_1 + bl func_2 + ret + .type func_2, %function +func_2: + ret + .section .rodata +.LC0: + .xword func_1 +.LC1: + .xword func_2 -- Gitee From acce2c3ff7d23693197532b0ef38631c0fc45678 Mon Sep 17 00:00:00 2001 From: liyancheng <412998149@qq.com> Date: Tue, 10 Sep 2024 15:09:51 +0800 Subject: [PATCH 06/11] [merge-fdata] Support processing no_lbr profile file --- bolt/tools/merge-fdata/merge-fdata.cpp | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/bolt/tools/merge-fdata/merge-fdata.cpp b/bolt/tools/merge-fdata/merge-fdata.cpp index 757f0536616b..6ef10e1be5d8 100644 --- a/bolt/tools/merge-fdata/merge-fdata.cpp +++ b/bolt/tools/merge-fdata/merge-fdata.cpp @@ -261,6 +261,7 @@ bool isYAML(const StringRef Filename) { void mergeLegacyProfiles(const SmallVectorImpl &Filenames) { errs() << "Using legacy profile format.\n"; std::optional BoltedCollection; + std::optional NoLBRMode; std::mutex BoltedCollectionMutex; typedef StringMap ProfileTy; @@ -294,6 +295,22 @@ void mergeLegacyProfiles(const SmallVectorImpl &Filenames) { BoltedCollection = false; } + // Check if the string "no_lbr" is in the first line + if (Buf.startswith("no_lbr")) { + if (!NoLBRMode.value_or(true)) + report_error( + Filename, + "cannot mix profile collected with lbr and non-lbr info"); + NoLBRMode = true; + Buf = Buf.drop_front(Buf.find_first_of("\n")); + } else { + if (NoLBRMode.value_or(false)) + report_error( + Filename, + "cannot mix profile collected with lbr and non-lbr info"); + NoLBRMode = false; + } + Profile = &Profiles[tid]; } @@ -329,7 +346,9 @@ void mergeLegacyProfiles(const SmallVectorImpl &Filenames) { MergedProfile.insert_or_assign(Key, Count); } - if (BoltedCollection) + if (NoLBRMode) + output() << "no_lbr cycles:u:\n"; + else if (BoltedCollection) output() << "boltedcollection\n"; for (const auto &[Key, Value] : MergedProfile) output() << Key << " " << Value << "\n"; -- Gitee From a54187d02e11a436b46ccc893a1fee0c57ee6812 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=89=9F=E6=96=87=E9=BE=99?= Date: Mon, 18 Nov 2024 02:13:25 +0000 Subject: [PATCH 07/11] [AArch64] Add hybrid guess approach for edge weight estimation --- bolt/lib/Passes/MCF.cpp | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/bolt/lib/Passes/MCF.cpp b/bolt/lib/Passes/MCF.cpp index c3898d2dce98..a6455bbebc75 100644 --- a/bolt/lib/Passes/MCF.cpp +++ b/bolt/lib/Passes/MCF.cpp @@ -36,6 +36,11 @@ static cl::opt IterativeGuess( cl::desc("in non-LBR mode, guess edge counts using iterative technique"), cl::Hidden, cl::cat(BoltOptCategory)); +static cl::opt HybridGuess( + "hybrid-guess", + cl::desc("in non-LBR mode, guess edge counts using hybird estimation technique"), + cl::Hidden, cl::cat(BoltOptCategory)); + static cl::opt UseRArcs( "mcf-use-rarcs", cl::desc("in MCF, consider the possibility of cancelling flow to balance " @@ -350,6 +355,27 @@ void guessEdgeByIterativeApproach(BinaryFunction &BF) { } } +void guessEdgeByHybridApproach(BinaryFunction &BF, + EdgeWeightMap &PredEdgeWeights, + EdgeWeightMap &SuccEdgeWeights) { + for (BinaryBasicBlock &BB : BF) { + for (BinaryBasicBlock *Pred : BB.predecessors()) { + double RelativeExecSucc = SuccEdgeWeights[std::make_pair(Pred, &BB)]; + double RelativeExec = PredEdgeWeights[std::make_pair(Pred, &BB)]; + RelativeExec *= BB.getExecutionCount(); + RelativeExecSucc *= Pred->getExecutionCount(); + BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB); + if ((static_cast(RelativeExec) != 0) && (static_cast(RelativeExecSucc) != 0)) { + BI.Count = (static_cast(RelativeExec) + RelativeExecSucc) / 2; + } else if (static_cast(RelativeExec) != 0) { + BI.Count = static_cast(RelativeExec); + } else if (static_cast(RelativeExecSucc) != 0) { + BI.Count = static_cast(RelativeExecSucc); + } + } + } +} + /// Associate each basic block with the BinaryLoop object corresponding to the /// innermost loop containing this block. DenseMap @@ -454,11 +480,14 @@ void estimateEdgeCounts(BinaryFunction &BF) { equalizeBBCounts(Info, BF); LLVM_DEBUG(BF.print(dbgs(), "after equalize BB counts")); } - if (opts::IterativeGuess) + if (opts::IterativeGuess) { guessEdgeByIterativeApproach(BF); - else + } else if (opts::HybridGuess) { + guessEdgeByHybridApproach(BF, PredEdgeWeights, SuccEdgeWeights); + } else { guessEdgeByRelHotness(BF, /*UseSuccs=*/false, PredEdgeWeights, SuccEdgeWeights); + } recalculateBBCounts(BF, /*AllEdges=*/false); } -- Gitee From 09c0d1cf884d228e2bd64fa85b2a2939f53a72f4 Mon Sep 17 00:00:00 2001 From: rfwang07 Date: Tue, 19 Nov 2024 09:48:40 +0800 Subject: [PATCH 08/11] support D-FOT addrs data parsing for optimized binary --- bolt/include/bolt/Profile/DataAggregator.h | 31 ++++++++ bolt/lib/Profile/DataAggregator.cpp | 86 +++++++++++++++++++++- 2 files changed, 113 insertions(+), 4 deletions(-) diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index cc237a6e642b..d352f1bf91fd 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -102,6 +102,12 @@ private: Type EntryType; }; + /// Used for parsing specific libkperf input files. + struct LibkperfDataEntry { + uint64_t Addr; + uint64_t Count; + }; + struct Trace { uint64_t From; uint64_t To; @@ -300,6 +306,9 @@ private: /// Parse pre-aggregated LBR samples created by an external tool ErrorOr parseAggregatedLBREntry(); + /// Parse libkperf samples created by D-FOT + ErrorOr parseLibkperfDataEntry(); + /// Parse either buildid:offset or just offset, representing a location in the /// binary. Used exclusevely for pre-aggregated LBR samples. ErrorOr parseLocationOrOffset(); @@ -417,10 +426,32 @@ private: /// B 4b196f 4b19e0 2 0 void parsePreAggregated(); + /// Coordinate reading and parsing of libkperf file + /// The regular perf2bolt aggregation job is to read perf output directly. + /// But in the oeaware framework, sampling is done by libkperf. + /// For data collected by sampling the BOLT-optimized binary, + /// oeaware can export addrs and counts. + /// In perf2bolt, with the help of the BAT section, + /// this data is converted to profile that is usable for the original binary. + /// + /// File format syntax: + /// - first line: + /// - the other lines: + /// + /// Example: + /// cycles + /// 40f544 1 + /// 40f750 2 + /// 40f810 53 + void parseLibkperfFile(); + /// Parse the full output of pre-aggregated LBR samples generated by /// an external tool. std::error_code parsePreAggregatedLBRSamples(); + /// Parse the libkperf samples + std::error_code parseLibkperfSamples(); + /// Process parsed pre-aggregated data. void processPreAggregated(); diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index cbc079afbb7e..1d862a9aa098 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -90,6 +90,11 @@ cl::opt ReadPreAggregated( "pa", cl::desc("skip perf and read data from a pre-aggregated file format"), cl::cat(AggregatorCategory)); +cl::opt ReadLibkperfFile( + "libkperf", cl::desc("skip perf and read data from a libkperf file format, " + "only for continuous optimizing with BAT"), + cl::cat(AggregatorCategory)); + static cl::opt TimeAggregator("time-aggr", cl::desc("time BOLT aggregator"), @@ -162,8 +167,8 @@ void DataAggregator::findPerfExecutable() { void DataAggregator::start() { outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n"; - // Don't launch perf for pre-aggregated files - if (opts::ReadPreAggregated) + // Don't launch perf for pre-aggregated files and libkperf files + if (opts::ReadPreAggregated || opts::ReadLibkperfFile) return; findPerfExecutable(); @@ -205,7 +210,7 @@ void DataAggregator::start() { } void DataAggregator::abort() { - if (opts::ReadPreAggregated) + if (opts::ReadPreAggregated || opts::ReadLibkperfFile) return; std::string Error; @@ -325,6 +330,8 @@ void DataAggregator::processFileBuildID(StringRef FileBuildID) { bool DataAggregator::checkPerfDataMagic(StringRef FileName) { if (opts::ReadPreAggregated) return true; + if (opts::ReadLibkperfFile) + return true; Expected FD = sys::fs::openNativeFileForRead(FileName); if (!FD) { @@ -371,6 +378,27 @@ void DataAggregator::parsePreAggregated() { } } +void DataAggregator::parseLibkperfFile() { + std::string Error; + + ErrorOr> MB = + MemoryBuffer::getFileOrSTDIN(Filename); + if (std::error_code EC = MB.getError()) { + errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": " + << EC.message() << "\n"; + exit(1); + } + + FileBuf = std::move(*MB); + ParsingBuf = FileBuf->getBuffer(); + Col = 0; + Line = 0; + if (parseLibkperfSamples()) { + errs() << "PERF2BOLT: failed to parse libkperf samples\n"; + exit(1); + } +} + std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) { outs() << "PERF2BOLT: writing data for autofdo tools...\n"; NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName, @@ -514,6 +542,11 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { return Error::success(); } + if (opts::ReadLibkperfFile) { + parseLibkperfFile(); + return Error::success(); + } + if (std::optional FileBuildID = BC.getFileBuildID()) { outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n"; processFileBuildID(*FileBuildID); @@ -620,7 +653,7 @@ bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) { void DataAggregator::processProfile(BinaryContext &BC) { if (opts::ReadPreAggregated) processPreAggregated(); - else if (opts::BasicAggregation) + else if (opts::BasicAggregation || opts::ReadLibkperfFile) processBasicEvents(); else processBranchEvents(); @@ -1218,6 +1251,28 @@ ErrorOr DataAggregator::parseLocationOrOffset() { return Location(true, BuildID.get(), Offset.get()); } +ErrorOr +DataAggregator::parseLibkperfDataEntry() { + // + while (checkAndConsumeFS()) { + } + ErrorOr Addr = parseHexField(FieldSeparator); + if (std::error_code EC = Addr.getError()) + return EC; + while (checkAndConsumeFS()) { + } + ErrorOr Count = parseNumberField(FieldSeparator, true); + if (std::error_code EC = Count.getError()) + return EC; + + if (!checkAndConsumeNewLine()) { + reportError("expected end of line"); + return make_error_code(llvm::errc::io_error); + } + + return LibkperfDataEntry{Addr.get(), Count.get()}; +} + ErrorOr DataAggregator::parseAggregatedLBREntry() { while (checkAndConsumeFS()) { @@ -1721,6 +1776,29 @@ void DataAggregator::processMemEvents() { } } +std::error_code DataAggregator::parseLibkperfSamples() { + outs() << "PERF2BOLT: parsing libkperf data...\n"; + NamedRegionTimer T("parseLibkperfData", "Parsing libkperf data", + TimerGroupName, TimerGroupDesc, opts::TimeAggregator); + bool FirstLine = true; + while (hasData()) { + if (FirstLine) { + ErrorOr Event = parseString('\n'); + if (std::error_code EC = Event.getError()) + return EC; + EventNames.insert(Event.get()); + FirstLine = false; + } + ErrorOr KperfEntry = parseLibkperfDataEntry(); + if (std::error_code EC = KperfEntry.getError()) + return EC; + + BasicSamples[KperfEntry->Addr] += KperfEntry->Count; + } + + return std::error_code(); +} + std::error_code DataAggregator::parsePreAggregatedLBRSamples() { outs() << "PERF2BOLT: parsing pre-aggregated profile...\n"; NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events", -- Gitee From f7f4d2bcc619d40f40647aa3eb7c776d76e4904a Mon Sep 17 00:00:00 2001 From: rfwang07 Date: Tue, 22 Apr 2025 19:33:52 +0800 Subject: [PATCH 09/11] Add Om for Kunpeng Opts --- bolt/include/bolt/Passes/BinaryPasses.h | 8 +++ bolt/include/bolt/Passes/SplitFunctions.h | 19 +++++++ bolt/lib/Core/BinaryFunctionProfile.cpp | 6 +-- bolt/lib/Passes/BinaryPasses.cpp | 10 +--- bolt/lib/Passes/IndirectCallPromotion.cpp | 2 +- bolt/lib/Passes/Inliner.cpp | 2 +- bolt/lib/Passes/MCF.cpp | 2 +- bolt/lib/Passes/ReorderFunctions.cpp | 2 +- bolt/lib/Passes/SplitFunctions.cpp | 23 +-------- bolt/lib/Passes/VeneerElimination.cpp | 2 +- bolt/lib/Rewrite/BinaryPassManager.cpp | 6 +-- bolt/lib/Rewrite/RewriteInstance.cpp | 2 +- bolt/tools/driver/llvm-bolt.cpp | 61 +++++++++++++++++++++++ 13 files changed, 104 insertions(+), 41 deletions(-) diff --git a/bolt/include/bolt/Passes/BinaryPasses.h b/bolt/include/bolt/Passes/BinaryPasses.h index dace07e903e7..5a2fe3b01425 100644 --- a/bolt/include/bolt/Passes/BinaryPasses.h +++ b/bolt/include/bolt/Passes/BinaryPasses.h @@ -23,6 +23,14 @@ #include #include +namespace opts { +enum SctcModes : char { + SctcAlways, + SctcPreserveDirection, + SctcHeuristic +}; +} + namespace llvm { namespace bolt { diff --git a/bolt/include/bolt/Passes/SplitFunctions.h b/bolt/include/bolt/Passes/SplitFunctions.h index 4058f3317dfb..a8c3a1463a7e 100644 --- a/bolt/include/bolt/Passes/SplitFunctions.h +++ b/bolt/include/bolt/Passes/SplitFunctions.h @@ -15,6 +15,25 @@ #include "llvm/Support/CommandLine.h" #include +using namespace llvm; + +class DeprecatedSplitFunctionOptionParser : public cl::parser { +public: + explicit DeprecatedSplitFunctionOptionParser(cl::Option &O) + : cl::parser(O) {} + + bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) { + if (Arg == "2" || Arg == "3") { + Value = true; + errs() << formatv("BOLT-WARNING: specifying non-boolean value \"{0}\" " + "for option -{1} is deprecated\n", + Arg, ArgName); + return false; + } + return cl::parser::parse(O, ArgName, Arg, Value); + } +}; + namespace llvm { namespace bolt { diff --git a/bolt/lib/Core/BinaryFunctionProfile.cpp b/bolt/lib/Core/BinaryFunctionProfile.cpp index 0d705cd82f5d..c062f8d1ec91 100644 --- a/bolt/lib/Core/BinaryFunctionProfile.cpp +++ b/bolt/lib/Core/BinaryFunctionProfile.cpp @@ -44,17 +44,17 @@ static cl::alias ICPAlias("icp", extern cl::opt JumpTables; -static cl::opt FixFuncCounts( +cl::opt FixFuncCounts( "fix-func-counts", cl::desc("adjust function counts based on basic blocks execution count"), cl::Hidden, cl::cat(BoltOptCategory)); -static cl::opt FixBlockCounts( +cl::opt FixBlockCounts( "fix-block-counts", cl::desc("adjust block counts based on outgoing branch counts"), cl::init(true), cl::Hidden, cl::cat(BoltOptCategory)); -static cl::opt +cl::opt InferFallThroughs("infer-fall-throughs", cl::desc("infer execution count for fall-through blocks"), cl::Hidden, cl::cat(BoltOptCategory)); diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index ac80753a37bf..7a6ca64dea45 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -93,7 +93,7 @@ static cl::opt MinBranchClusters( "branches"), cl::Hidden, cl::cat(BoltOptCategory)); -static cl::list Peepholes( +cl::list Peepholes( "peepholes", cl::CommaSeparated, cl::desc("enable peephole optimizations"), cl::value_desc("opt1,opt2,opt3,..."), cl::values(clEnumValN(Peepholes::PEEP_NONE, "none", "disable peepholes"), @@ -181,13 +181,7 @@ static cl::opt cl::desc("print the list of functions with stale profile"), cl::Hidden, cl::cat(BoltOptCategory)); -enum SctcModes : char { - SctcAlways, - SctcPreserveDirection, - SctcHeuristic -}; - -static cl::opt +cl::opt SctcMode("sctc-mode", cl::desc("mode for simplify conditional tail calls"), cl::init(SctcAlways), diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp index 89727233ec78..e150fbacfa28 100644 --- a/bolt/lib/Passes/IndirectCallPromotion.cpp +++ b/bolt/lib/Passes/IndirectCallPromotion.cpp @@ -72,7 +72,7 @@ static cl::alias ICPMispredictThresholdAlias( cl::desc("alias for --indirect-call-promotion-mispredict-threshold"), cl::aliasopt(ICPMispredictThreshold)); -static cl::opt ICPUseMispredicts( +cl::opt ICPUseMispredicts( "indirect-call-promotion-use-mispredicts", cl::desc("use misprediction frequency for determining whether or not ICP " "should be applied at a callsite. The " diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp index e88dc442d217..bb07ec1e11de 100644 --- a/bolt/lib/Passes/Inliner.cpp +++ b/bolt/lib/Passes/Inliner.cpp @@ -50,7 +50,7 @@ ForceInlineFunctions("force-inline", cl::Hidden, cl::cat(BoltOptCategory)); -static cl::opt InlineAll("inline-all", cl::desc("inline all functions"), +cl::opt InlineAll("inline-all", cl::desc("inline all functions"), cl::cat(BoltOptCategory)); static cl::opt InlineIgnoreLeafCFI( diff --git a/bolt/lib/Passes/MCF.cpp b/bolt/lib/Passes/MCF.cpp index a6455bbebc75..bd98286cd1dc 100644 --- a/bolt/lib/Passes/MCF.cpp +++ b/bolt/lib/Passes/MCF.cpp @@ -31,7 +31,7 @@ extern cl::OptionCategory BoltOptCategory; extern cl::opt TimeOpts; -static cl::opt IterativeGuess( +cl::opt IterativeGuess( "iterative-guess", cl::desc("in non-LBR mode, guess edge counts using iterative technique"), cl::Hidden, cl::cat(BoltOptCategory)); diff --git a/bolt/lib/Passes/ReorderFunctions.cpp b/bolt/lib/Passes/ReorderFunctions.cpp index 70f87ac40c3c..da489297a3a7 100644 --- a/bolt/lib/Passes/ReorderFunctions.cpp +++ b/bolt/lib/Passes/ReorderFunctions.cpp @@ -91,7 +91,7 @@ static cl::opt CgIgnoreRecursiveCalls( cl::desc("ignore recursive calls when constructing the call graph"), cl::init(true), cl::cat(BoltOptCategory)); -static cl::opt CgUseSplitHotSize( +cl::opt CgUseSplitHotSize( "cg-use-split-hot-size", cl::desc("use hot/cold data on basic blocks to determine hot sizes for " "call graph functions"), diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp index 34973cecdf49..e934b75e707b 100644 --- a/bolt/lib/Passes/SplitFunctions.cpp +++ b/bolt/lib/Passes/SplitFunctions.cpp @@ -34,25 +34,6 @@ using namespace llvm; using namespace bolt; -namespace { -class DeprecatedSplitFunctionOptionParser : public cl::parser { -public: - explicit DeprecatedSplitFunctionOptionParser(cl::Option &O) - : cl::parser(O) {} - - bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) { - if (Arg == "2" || Arg == "3") { - Value = true; - errs() << formatv("BOLT-WARNING: specifying non-boolean value \"{0}\" " - "for option -{1} is deprecated\n", - Arg, ArgName); - return false; - } - return cl::parser::parse(O, ArgName, Arg, Value); - } -}; -} // namespace - namespace opts { extern cl::OptionCategory BoltOptCategory; @@ -61,7 +42,7 @@ extern cl::opt SplitEH; extern cl::opt ExecutionCountThreshold; extern cl::opt RandomSeed; -static cl::opt AggressiveSplitting( +cl::opt AggressiveSplitting( "split-all-cold", cl::desc("outline as many cold basic blocks as possible"), cl::cat(BoltOptCategory)); @@ -74,7 +55,7 @@ static cl::opt SplitAlignThreshold( cl::Hidden, cl::cat(BoltOptCategory)); -static cl::opt +cl::opt SplitFunctions("split-functions", cl::desc("split functions into fragments"), cl::cat(BoltOptCategory)); diff --git a/bolt/lib/Passes/VeneerElimination.cpp b/bolt/lib/Passes/VeneerElimination.cpp index eadbfc17fb97..611d02787f55 100644 --- a/bolt/lib/Passes/VeneerElimination.cpp +++ b/bolt/lib/Passes/VeneerElimination.cpp @@ -20,7 +20,7 @@ namespace opts { extern cl::OptionCategory BoltOptCategory; -static llvm::cl::opt +llvm::cl::opt EliminateVeneers("elim-link-veneers", cl::desc("run veneer elimination pass"), cl::init(true), cl::Hidden, cl::cat(BoltOptCategory)); diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 5aab26322537..485cdaceec88 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -58,7 +58,7 @@ DynoStatsAll("dyno-stats-all", cl::desc("print dyno stats after each stage"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); -static cl::opt +cl::opt EliminateUnreachable("eliminate-unreachable", cl::desc("eliminate unreachable code"), cl::init(true), cl::cat(BoltOptCategory)); @@ -212,12 +212,12 @@ static cl::opt RegReAssign( "reassign registers so as to avoid using REX prefixes in hot code"), cl::cat(BoltOptCategory)); -static cl::opt SimplifyConditionalTailCalls( +cl::opt SimplifyConditionalTailCalls( "simplify-conditional-tail-calls", cl::desc("simplify conditional tail calls by removing unnecessary jumps"), cl::init(true), cl::cat(BoltOptCategory)); -static cl::opt SimplifyRODataLoads( +cl::opt SimplifyRODataLoads( "simplify-rodata-loads", cl::desc("simplify loads from read-only sections by replacing the memory " "operand with the constant found in the corresponding section"), diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index e8d302e3f44d..e39519798b53 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -235,7 +235,7 @@ static cl::opt DWPPathName("dwp", cl::Hidden, cl::init(""), cl::cat(BoltCategory)); -static cl::opt +cl::opt UseGnuStack("use-gnu-stack", cl::desc("use GNU_STACK program header for new segment (workaround for " "issues with strip/objcopy)"), diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp index 5a3af6a44b52..8cba4d28b279 100644 --- a/bolt/tools/driver/llvm-bolt.cpp +++ b/bolt/tools/driver/llvm-bolt.cpp @@ -26,6 +26,10 @@ #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Signals.h" #include "llvm/Support/TargetSelect.h" +#include "bolt/Passes/BinaryPasses.h" +#include "bolt/Passes/ReorderFunctions.h" +#include "bolt/Passes/SplitFunctions.h" +#include "bolt/Passes/TailDuplication.h" #define DEBUG_TYPE "bolt" @@ -35,6 +39,28 @@ using namespace bolt; namespace opts { +extern cl::opt ReorderBlocks; +extern cl::opt ReorderFunctions; +extern cl::opt SplitFunctions; +extern cl::opt AggressiveSplitting; +extern cl::opt ICF; +extern cl::opt UseGnuStack; +extern cl::opt InlineAll; +extern cl::opt InferFallThroughs; +extern cl::opt SimplifyConditionalTailCalls; +extern cl::opt SimplifyRODataLoads; +extern cl::opt ICPUseMispredicts; +extern cl::opt EliminateVeneers; +extern cl::opt EliminateUnreachable; +extern cl::opt FixBlockCounts; +extern cl::opt FixFuncCounts; +extern cl::opt SctcMode; +extern cl::opt AlignBlocks; +extern cl::opt CgUseSplitHotSize; +extern cl::opt TailDuplicationMode; +extern cl::opt IterativeGuess; +extern cl::opt AssumeABI; + static cl::OptionCategory *BoltCategories[] = {&BoltCategory, &BoltOptCategory, &BoltRelocCategory, @@ -69,6 +95,12 @@ InputDataFilename2("data2", cl::Optional, cl::cat(BoltCategory)); +static cl::opt +Om("Om", + cl::desc("Kunpeng optimization"), + cl::ZeroOrMore, + cl::cat(BoltOptCategory)); + static cl::opt InputFilename2( cl::Positional, @@ -152,6 +184,34 @@ void boltDiffMode(int argc, char **argv) { opts::DiffOnly = true; } +void handleOptionOm() { + if (!opts::Om) { + return; + } + + opts::ReorderBlocks = ReorderBasicBlocks::LT_OPTIMIZE_EXT_TSP; // -reorder-blocks=ext-tsp + opts::ReorderFunctions = ReorderFunctions::RT_HFSORT_PLUS; // -reorder-functions=hfsort+ + opts::SplitFunctions = true; // -split-functions + opts::AggressiveSplitting = true; // -split-all-cold + opts::ICF = true; // -icf=1 + opts::UseGnuStack = true; // -use-gnu-stack + opts::InlineAll = true; // --inline-all + opts::InferFallThroughs = true; // --infer-fall-throughs + opts::SimplifyConditionalTailCalls = true; // --simplify-conditional-tail-calls + opts::SimplifyRODataLoads = true; // --simplify-rodata-loads + opts::ICPUseMispredicts = true; // --indirect-call-promotion-use-mispredicts + opts::EliminateVeneers = true; // --elim-link-veneers + opts::EliminateUnreachable = true; // --eliminate-unreachable + opts::FixBlockCounts = true; // --fix-block-counts + opts::FixFuncCounts = true; // --fix-func-counts + opts::SctcMode = opts::SctcModes::SctcPreserveDirection; // --sctc-mode=preserve + opts::AlignBlocks = true; // --align-blocks + opts::CgUseSplitHotSize = true; // --cg-use-split-hot-size + opts::TailDuplicationMode = TailDuplication::TD_AGGRESSIVE; // --tail-duplication=aggressive + opts::IterativeGuess = true; // --iterative-guess + opts::AssumeABI = true; // --assume-abi +} + void boltMode(int argc, char **argv) { cl::HideUnrelatedOptions(ArrayRef(opts::BoltCategories)); // Register the target printer for --version. @@ -160,6 +220,7 @@ void boltMode(int argc, char **argv) { cl::ParseCommandLineOptions(argc, argv, "BOLT - Binary Optimization and Layout Tool\n"); + handleOptionOm(); if (opts::OutputFilename.empty()) { errs() << ToolName << ": expected -o= option.\n"; -- Gitee From 6daa888d8dd0ef7323f53312fe38b9b3b364b7e3 Mon Sep 17 00:00:00 2001 From: rfwang07 Date: Thu, 22 May 2025 15:22:45 +0800 Subject: [PATCH 10/11] add -strict to -Om --- bolt/tools/driver/llvm-bolt.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp index 8cba4d28b279..60f1ad2bf278 100644 --- a/bolt/tools/driver/llvm-bolt.cpp +++ b/bolt/tools/driver/llvm-bolt.cpp @@ -60,6 +60,7 @@ extern cl::opt CgUseSplitHotSize; extern cl::opt TailDuplicationMode; extern cl::opt IterativeGuess; extern cl::opt AssumeABI; +extern cl::opt StrictMode; static cl::OptionCategory *BoltCategories[] = {&BoltCategory, &BoltOptCategory, @@ -210,6 +211,7 @@ void handleOptionOm() { opts::TailDuplicationMode = TailDuplication::TD_AGGRESSIVE; // --tail-duplication=aggressive opts::IterativeGuess = true; // --iterative-guess opts::AssumeABI = true; // --assume-abi + opts::StrictMode = true; // --strict } void boltMode(int argc, char **argv) { -- Gitee From ad7cc9dd90dde652c8f244c79ea42a89ea19f264 Mon Sep 17 00:00:00 2001 From: eastb233 Date: Wed, 10 Dec 2025 17:08:24 +0800 Subject: [PATCH 11/11] [Bolt] Move BlockCorrection option to CommandLineOpts.cpp --- bolt/include/bolt/Utils/CommandLineOpts.h | 2 ++ bolt/lib/Rewrite/RewriteInstance.cpp | 6 ------ bolt/lib/Utils/CommandLineOpts.cpp | 6 ++++++ 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h index 7b654f19f6d4..a0c7a02ec3af 100644 --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -74,6 +74,8 @@ extern llvm::cl::opt UpdateDebugSections; // dbgs() for output within DEBUG(). extern llvm::cl::opt Verbosity; +extern llvm::cl::opt BlockCorrection; + /// Return true if we should process all functions in the binary. bool processAllFunctions(); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index e39519798b53..7356431eaf6b 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -107,12 +107,6 @@ cl::opt DumpDotAll( "enable '-print-loops' for color-coded blocks"), cl::Hidden, cl::cat(BoltCategory)); -cl::opt BlockCorrection( - "block-correction", - cl::desc("capture features useful for ML model to inference the count on the binary basic block" - " and correct them on CFG."), - cl::ZeroOrMore, cl::cat(BoltOptCategory)); - static cl::list ForceFunctionNames("funcs", cl::CommaSeparated, diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index 19680fa945db..e55273f7fea1 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -191,6 +191,12 @@ cl::opt cl::init(0), cl::ZeroOrMore, cl::cat(BoltCategory), cl::sub(cl::SubCommand::getAll())); +cl::opt BlockCorrection( + "block-correction", + cl::desc("capture features useful for ML model to inference the count on the binary basic block" + " and correct them on CFG."), + cl::ZeroOrMore, cl::cat(BoltOptCategory)); + bool processAllFunctions() { if (opts::AggregateOnly) return false; -- Gitee