//===- bolt/Passes/IndirectCallPromotion.h ----------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // The indirect call promotion (ICP) optimization pass. // //===----------------------------------------------------------------------===// #ifndef BOLT_PASSES_INDIRECT_CALL_PROMOTION_H #define BOLT_PASSES_INDIRECT_CALL_PROMOTION_H #include "bolt/Passes/BinaryPasses.h" namespace llvm { namespace bolt { /// Optimize indirect calls. /// The indirect call promotion pass visits each indirect call and /// examines a branch profile for each. If the most frequent targets /// from that callsite exceed the specified threshold (default 90%), /// the call is promoted. Otherwise, it is ignored. By default, /// only one target is considered at each callsite. /// /// When an candidate callsite is processed, we modify the callsite /// to test for the most common call targets before calling through /// the original generic call mechanism. /// /// The CFG and layout are modified by ICP. /// /// A few new command line options have been added: /// -indirect-call-promotion=[none,call,jump-tables,all] /// -indirect-call-promotion-threshold=<percentage> /// -indirect-call-promotion-mispredict-threshold=<percentage> /// -indirect-call-promotion-topn=<int> /// /// The threshold is the minimum frequency of a call target needed /// before ICP is triggered. /// /// The mispredict threshold is used to disable the optimization at /// any callsite where the branch predictor does a good enough job /// that ICP wouldn't help regardless of the frequency of the most /// common target. /// /// The topn option controls the number of targets to consider for /// each callsite, e.g. ICP is triggered if topn=2 and the total /// frequency of the top two call targets exceeds the threshold. /// /// The minimize code size option controls whether or not the hot /// calls are to registers (callq %r10) or to function addresses /// (callq $foo). /// /// Example of ICP: /// /// C++ code: /// /// int B_count = 0; /// int C_count = 0; /// /// struct A { virtual void foo() = 0; } /// struct B : public A { virtual void foo() { ++B_count; }; }; /// struct C : public A { virtual void foo() { ++C_count; }; }; /// /// A* a = ... /// a->foo(); /// ... /// /// original assembly: /// /// B0: 49 8b 07 mov (%r15),%rax /// 4c 89 ff mov %r15,%rdi /// ff 10 callq *(%rax) /// 41 83 e6 01 and $0x1,%r14d /// 4d 89 e6 mov %r12,%r14 /// 4c 0f 44 f5 cmove %rbp,%r14 /// 4c 89 f7 mov %r14,%rdi /// ... /// /// after ICP: /// /// B0: 49 8b 07 mov (%r15),%rax /// 4c 89 ff mov %r15,%rdi /// 48 81 38 e0 0b 40 00 cmpq $B::foo,(%rax) /// 75 29 jne B3 /// B1: e8 45 03 00 00 callq $B::foo /// B2: 41 83 e6 01 and $0x1,%r14d /// 4d 89 e6 mov %r12,%r14 /// 4c 0f 44 f5 cmove %rbp,%r14 /// 4c 89 f7 mov %r14,%rdi /// ... /// /// B3: ff 10 callq *(%rax) /// eb d6 jmp B2 /// class IndirectCallPromotion : public BinaryFunctionPass { … }; } // namespace bolt } // namespace llvm #endif