cpython/Python/perf_jit_trampoline.c

#include "Python.h"
#include "pycore_ceval.h"         // _PyPerf_Callbacks
#include "pycore_frame.h"
#include "pycore_interp.h"


#ifdef PY_HAVE_PERF_TRAMPOLINE

#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>             // mmap()
#include <sys/types.h>
#include <unistd.h>               // sysconf()
#include <sys/time.h>           // gettimeofday()
#include <sys/syscall.h>

// ----------------------------------
//         Perf jitdump API
// ----------------------------------

PerfMapJitState;

static PerfMapJitState perf_jit_map_state;

/*
Usually the binary and libraries are mapped in separate region like below:

  address ->
   --+---------------------+--//--+---------------------+--
     | .text | .data | ... |      | .text | .data | ... |
   --+---------------------+--//--+---------------------+--
         myprog                      libc.so

So it'd be easy and straight-forward to find a mapped binary or library from an
address.

But for JIT code, the code arena only cares about the code section. But the
resulting DSOs (which is generated by perf inject -j) contain ELF headers and
unwind info too. Then it'd generate following address space with synthesized
MMAP events. Let's say it has a sample between address B and C.

                                               sample
                                                 |
  address ->                         A       B   v   C
  ---------------------------------------------------------------------------------------------------
  /tmp/jitted-PID-0.so   | (headers) | .text | unwind info |
  /tmp/jitted-PID-1.so           | (headers) | .text | unwind info |
  /tmp/jitted-PID-2.so                   | (headers) | .text | unwind info |
    ...
  ---------------------------------------------------------------------------------------------------

If it only maps the .text section, it'd find the jitted-PID-1.so but cannot see
the unwind info. If it maps both .text section and unwind sections, the sample
could be mapped to either jitted-PID-0.so or jitted-PID-1.so and it's confusing
which one is right. So to make perf happy we have non-overlapping ranges for each
DSO:

  address ->
  -------------------------------------------------------------------------------------------------------
  /tmp/jitted-PID-0.so   | (headers) | .text | unwind info |
  /tmp/jitted-PID-1.so                         | (headers) | .text | unwind info |
  /tmp/jitted-PID-2.so                                               | (headers) | .text | unwind info |
    ...
  -------------------------------------------------------------------------------------------------------

As the trampolines are constant, we add a constant padding but in general the padding needs to have the
size of the unwind info rounded to 16 bytes. In general, for our trampolines this is 0x50
 */

#define PERF_JIT_CODE_PADDING
#define trampoline_api

uword;
CodeComments;

#define Pd
#define MB

#define EM_386
#define EM_X86_64
#define EM_ARM
#define EM_AARCH64
#define EM_RISCV

#define TARGET_ARCH_IA32
#define TARGET_ARCH_X64
#define TARGET_ARCH_ARM
#define TARGET_ARCH_ARM64
#define TARGET_ARCH_RISCV32
#define TARGET_ARCH_RISCV64

#define FLAG_generate_perf_jitdump
#define FLAG_write_protect_code
#define FLAG_write_protect_vm_isolate
#define FLAG_code_comments

#define UNREACHABLE()

static uword GetElfMachineArchitecture(void) {}

Header;

 enum PerfEvent {};

struct BaseEvent {};

CodeLoadEvent;

CodeUnwindingInfoEvent;

static const intptr_t nanoseconds_per_second =;

// Dwarf encoding constants

static const uint8_t DwarfUData4 =;
static const uint8_t DwarfSData4 =;
static const uint8_t DwarfPcRel =;
static const uint8_t DwarfDataRel =;
// static uint8_t DwarfOmit = 0xff;
EhFrameHeader;

static int64_t get_current_monotonic_ticks(void) {}

static int64_t get_current_time_microseconds(void) {}


static size_t round_up(int64_t value, int64_t multiple) {}


static void perf_map_jit_write_fully(const void* buffer, size_t size) {}

static void perf_map_jit_write_header(int pid, FILE* out_file) {}

static void* perf_map_jit_init(void) {}

/* DWARF definitions. */

#define DWRF_CIE_VERSION

enum {};

enum
  {};

enum {};

enum {};

enum {};

enum {};

enum {};

enum {};

enum {};

ELFObjectContext;

/* Append a null-terminated string. */
static uint32_t
elfctx_append_string(ELFObjectContext* ctx, const char* str)
{}

/* Append a SLEB128 value. */
static void
elfctx_append_sleb128(ELFObjectContext* ctx, int32_t v)
{}

/* Append a ULEB128 to buffer. */
static void
elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v)
{}

/* Shortcuts to generate DWARF structures. */
#define DWRF_U8(x)
#define DWRF_I8(x)
#define DWRF_U16(x)
#define DWRF_U32(x)
#define DWRF_ADDR(x)
#define DWRF_UV(x)
#define DWRF_SV(x)
#define DWRF_STR(str)
#define DWRF_ALIGNNOP(s)
#define DWRF_SECTION(name, stmt)

/* Initialize .eh_frame section. */
static void
elf_init_ehframe(ELFObjectContext* ctx)
{}

static void perf_map_jit_write_entry(void *state, const void *code_addr,
                         unsigned int code_size, PyCodeObject *co)
{}

static int perf_map_jit_fini(void* state) {}

_PyPerf_Callbacks _Py_perfmap_jit_callbacks =;

#endif