llvm/bolt/include/bolt/Profile/BoltAddressTranslation.h

//===- bolt/Profile/BoltAddressTranslation.h --------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef BOLT_PROFILE_BOLTADDRESSTRANSLATION_H
#define BOLT_PROFILE_BOLTADDRESSTRANSLATION_H

#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/DataExtractor.h"
#include <cstdint>
#include <map>
#include <optional>
#include <system_error>
#include <unordered_map>

namespace llvm {
class MCSymbol;
class raw_ostream;

namespace object {
class ELFObjectFileBase;
} // namespace object

namespace bolt {
class BinaryBasicBlock;
class BinaryContext;
class BinaryFunction;

/// The map of output addresses to input ones to be used when translating
/// samples collected in a binary that was already processed by BOLT. We do not
/// support reoptimizing a binary already processed by BOLT, but we do support
/// collecting samples in a binary processed by BOLT. We then translate samples
/// back to addresses from the input (original) binary, one that can be
/// optimized. The goal is to avoid special deployments of non-bolted binaries
/// just for the purposes of data collection.
///
/// The in-memory representation of the map is as follows. Each function has its
/// own map. A function is identified by its output address. This is the key to
/// retrieve a translation map. The translation map is a collection of ordered
/// keys identifying the start of a region (relative to the function start) in
/// the output address space (addresses in the binary processed by BOLT).
///
/// A translation then happens when perf2bolt needs to convert sample addresses
/// in the output address space back to input addresses, valid to run BOLT in
/// the original input binary. To convert, perf2bolt first needs to fetch the
/// translation map for a sample recorded in a given function. It then finds
/// the largest key that is still smaller or equal than the recorded address.
/// It then converts this address to use the value of this key.
///
///   Example translation Map for function foo
///      KEY                             VALUE                    BB?
///    Output offset1 (first BB)         Original input offset1   Y
///    ...
///    Output offsetN (last branch)      Original input offsetN   N
///
/// The information on whether a given entry is a BB start or an instruction
/// that changes control flow is encoded in the last (highest) bit of VALUE.
///
/// Notes:
/// Instructions that will never appear in LBR because they do not cause control
/// flow change are omitted from this map. Basic block locations are recorded
/// because they can be a target of a jump (To address in the LBR) and also to
/// recreate the BB layout of this function. We use the BB layout map to
/// recreate fall-through jumps in the profile, given an LBR trace.
class BoltAddressTranslation {};
} // namespace bolt

} // namespace llvm

#endif