llvm/bolt/include/bolt/Passes/IndirectCallPromotion.h

//===- bolt/Passes/IndirectCallPromotion.h ----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The indirect call promotion (ICP) optimization pass.
//
//===----------------------------------------------------------------------===//

#ifndef BOLT_PASSES_INDIRECT_CALL_PROMOTION_H
#define BOLT_PASSES_INDIRECT_CALL_PROMOTION_H

#include "bolt/Passes/BinaryPasses.h"

namespace llvm {
namespace bolt {

/// Optimize indirect calls.
/// The indirect call promotion pass visits each indirect call and
/// examines a branch profile for each. If the most frequent targets
/// from that callsite exceed the specified threshold (default 90%),
/// the call is promoted. Otherwise, it is ignored. By default,
/// only one target is considered at each callsite.
///
/// When an candidate callsite is processed, we modify the callsite
/// to test for the most common call targets before calling through
/// the original generic call mechanism.
///
/// The CFG and layout are modified by ICP.
///
/// A few new command line options have been added:
///   -indirect-call-promotion=[none,call,jump-tables,all]
///   -indirect-call-promotion-threshold=<percentage>
///   -indirect-call-promotion-mispredict-threshold=<percentage>
///   -indirect-call-promotion-topn=<int>
///
/// The threshold is the minimum frequency of a call target needed
/// before ICP is triggered.
///
/// The mispredict threshold is used to disable the optimization at
/// any callsite where the branch predictor does a good enough job
/// that ICP wouldn't help regardless of the frequency of the most
/// common target.
///
/// The topn option controls the number of targets to consider for
/// each callsite, e.g. ICP is triggered if topn=2 and the total
/// frequency of the top two call targets exceeds the threshold.
///
/// The minimize code size option controls whether or not the hot
/// calls are to registers (callq %r10) or to function addresses
/// (callq $foo).
///
/// Example of ICP:
///
/// C++ code:
///
///   int B_count = 0;
///   int C_count = 0;
///
///   struct A { virtual void foo() = 0; }
///   struct B : public A { virtual void foo() { ++B_count; }; };
///   struct C : public A { virtual void foo() { ++C_count; }; };
///
///   A* a = ...
///   a->foo();
///   ...
///
/// original assembly:
///
///   B0: 49 8b 07             mov    (%r15),%rax
///       4c 89 ff             mov    %r15,%rdi
///       ff 10                callq  *(%rax)
///       41 83 e6 01          and    $0x1,%r14d
///       4d 89 e6             mov    %r12,%r14
///       4c 0f 44 f5          cmove  %rbp,%r14
///       4c 89 f7             mov    %r14,%rdi
///       ...
///
/// after ICP:
///
///   B0: 49 8b 07             mov    (%r15),%rax
///       4c 89 ff             mov    %r15,%rdi
///       48 81 38 e0 0b 40 00 cmpq   $B::foo,(%rax)
///       75 29                jne    B3
///   B1: e8 45 03 00 00       callq  $B::foo
///   B2: 41 83 e6 01          and    $0x1,%r14d
///       4d 89 e6             mov    %r12,%r14
///       4c 0f 44 f5          cmove  %rbp,%r14
///       4c 89 f7             mov    %r14,%rdi
///       ...
///
///   B3: ff 10                callq  *(%rax)
///       eb d6                jmp    B2
///
class IndirectCallPromotion : public BinaryFunctionPass {};

} // namespace bolt
} // namespace llvm

#endif