cpython/Modules/_testexternalinspection.c

#define _GNU_SOURCE

#ifdef __linux__
#    include <elf.h>
#    include <sys/uio.h>
#    if INTPTR_MAX == INT64_MAX
#define Elf_Ehdr
#define Elf_Shdr
#define Elf_Phdr
#    else
#define Elf_Ehdr
#define Elf_Shdr
#define Elf_Phdr
#    endif
#    include <sys/mman.h>
#endif

#if defined(__APPLE__)
#  include <TargetConditionals.h>
// Older macOS SDKs do not define TARGET_OS_OSX
#  if !defined(TARGET_OS_OSX)
#define TARGET_OS_OSX
#  endif
#  if TARGET_OS_OSX
#    include <libproc.h>
#    include <mach-o/fat.h>
#    include <mach-o/loader.h>
#    include <mach-o/nlist.h>
#    include <mach/mach.h>
#    include <mach/mach_vm.h>
#    include <mach/machine.h>
#    include <sys/mman.h>
#    include <sys/proc.h>
#    include <sys/sysctl.h>
#  endif
#endif

#include <errno.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#ifndef Py_BUILD_CORE_BUILTIN
#define Py_BUILD_CORE_MODULE
#endif
#include "Python.h"
#include <internal/pycore_debug_offsets.h>  // _Py_DebugOffsets
#include <internal/pycore_frame.h>          // FRAME_OWNED_BY_CSTACK
#include <internal/pycore_stackref.h>       // Py_TAG_BITS

#ifndef HAVE_PROCESS_VM_READV
#define HAVE_PROCESS_VM_READV
#endif

#if defined(__APPLE__) && TARGET_OS_OSX
static void*
analyze_macho64(mach_port_t proc_ref, void* base, void* map)
{
    struct mach_header_64* hdr = (struct mach_header_64*)map;
    int ncmds = hdr->ncmds;

    int cmd_cnt = 0;
    struct segment_command_64* cmd = map + sizeof(struct mach_header_64);

    mach_vm_size_t size = 0;
    mach_msg_type_number_t count = sizeof(vm_region_basic_info_data_64_t);
    mach_vm_address_t address = (mach_vm_address_t)base;
    vm_region_basic_info_data_64_t region_info;
    mach_port_t object_name;

    for (int i = 0; cmd_cnt < 2 && i < ncmds; i++) {
        if (cmd->cmd == LC_SEGMENT_64 && strcmp(cmd->segname, "__DATA") == 0) {
            while (cmd->filesize != size) {
                address += size;
                if (mach_vm_region(
                            proc_ref,
                            &address,
                            &size,
                            VM_REGION_BASIC_INFO_64,
                            (vm_region_info_t)&region_info,  // cppcheck-suppress [uninitvar]
                            &count,
                            &object_name)
                    != KERN_SUCCESS)
                {
                    PyErr_SetString(PyExc_RuntimeError, "Cannot get any more VM maps.\n");
                    return NULL;
                }
            }
            base = (void*)address - cmd->vmaddr;

            int nsects = cmd->nsects;
            struct section_64* sec =
                    (struct section_64*)((void*)cmd + sizeof(struct segment_command_64));
            for (int j = 0; j < nsects; j++) {
                if (strcmp(sec[j].sectname, "PyRuntime") == 0) {
                    return base + sec[j].addr;
                }
            }
            cmd_cnt++;
        }

        cmd = (struct segment_command_64*)((void*)cmd + cmd->cmdsize);
    }
    return NULL;
}

static void*
analyze_macho(char* path, void* base, mach_vm_size_t size, mach_port_t proc_ref)
{
    int fd = open(path, O_RDONLY);
    if (fd == -1) {
        PyErr_Format(PyExc_RuntimeError, "Cannot open binary %s\n", path);
        return NULL;
    }

    struct stat fs;
    if (fstat(fd, &fs) == -1) {
        PyErr_Format(PyExc_RuntimeError, "Cannot get size of binary %s\n", path);
        close(fd);
        return NULL;
    }

    void* map = mmap(0, fs.st_size, PROT_READ, MAP_SHARED, fd, 0);
    if (map == MAP_FAILED) {
        PyErr_Format(PyExc_RuntimeError, "Cannot map binary %s\n", path);
        close(fd);
        return NULL;
    }

    void* result = NULL;

    struct mach_header_64* hdr = (struct mach_header_64*)map;
    switch (hdr->magic) {
        case MH_MAGIC:
        case MH_CIGAM:
        case FAT_MAGIC:
        case FAT_CIGAM:
            PyErr_SetString(PyExc_RuntimeError, "32-bit Mach-O binaries are not supported");
            break;
        case MH_MAGIC_64:
        case MH_CIGAM_64:
            result = analyze_macho64(proc_ref, base, map);
            break;
        default:
            PyErr_SetString(PyExc_RuntimeError, "Unknown Mach-O magic");
            break;
    }

    munmap(map, fs.st_size);
    if (close(fd) != 0) {
        PyErr_SetFromErrno(PyExc_OSError);
    }
    return result;
}

static mach_port_t
pid_to_task(pid_t pid)
{
    mach_port_t task;
    kern_return_t result;

    result = task_for_pid(mach_task_self(), pid, &task);
    if (result != KERN_SUCCESS) {
        PyErr_Format(PyExc_PermissionError, "Cannot get task for PID %d", pid);
        return 0;
    }
    return task;
}

static void*
get_py_runtime_macos(pid_t pid)
{
    mach_vm_address_t address = 0;
    mach_vm_size_t size = 0;
    mach_msg_type_number_t count = sizeof(vm_region_basic_info_data_64_t);
    vm_region_basic_info_data_64_t region_info;
    mach_port_t object_name;

    mach_port_t proc_ref = pid_to_task(pid);
    if (proc_ref == 0) {
        PyErr_SetString(PyExc_PermissionError, "Cannot get task for PID");
        return NULL;
    }

    int match_found = 0;
    char map_filename[MAXPATHLEN + 1];
    void* result_address = NULL;
    while (mach_vm_region(
                   proc_ref,
                   &address,
                   &size,
                   VM_REGION_BASIC_INFO_64,
                   (vm_region_info_t)&region_info,
                   &count,
                   &object_name)
           == KERN_SUCCESS)
    {
        int path_len = proc_regionfilename(pid, address, map_filename, MAXPATHLEN);
        if (path_len == 0) {
            address += size;
            continue;
        }

        char* filename = strrchr(map_filename, '/');
        if (filename != NULL) {
            filename++;  // Move past the '/'
        } else {
            filename = map_filename;  // No path, use the whole string
        }

        // Check if the filename starts with "python" or "libpython"
        if (!match_found && strncmp(filename, "python", 6) == 0) {
            match_found = 1;
            result_address = analyze_macho(map_filename, (void*)address, size, proc_ref);
        }
        if (strncmp(filename, "libpython", 9) == 0) {
            match_found = 1;
            result_address = analyze_macho(map_filename, (void*)address, size, proc_ref);
            break;
        }

        address += size;
    }
    return result_address;
}
#endif

#ifdef __linux__
void*
find_python_map_start_address(pid_t pid, char* result_filename)
{}

void*
get_py_runtime_linux(pid_t pid)
{}
#endif

ssize_t
read_memory(pid_t pid, void* remote_address, size_t len, void* dst)
{}

int
read_string(pid_t pid, _Py_DebugOffsets* debug_offsets, void* address, char* buffer, Py_ssize_t size)
{}

void*
get_py_runtime(pid_t pid)
{}

static int
parse_code_object(
        int pid,
        PyObject* result,
        struct _Py_DebugOffsets* offsets,
        void* address,
        void** previous_frame)
{}

static int
parse_frame_object(
        int pid,
        PyObject* result,
        struct _Py_DebugOffsets* offsets,
        void* address,
        void** previous_frame)
{}

static PyObject*
get_stack_trace(PyObject* self, PyObject* args)
{}

static PyMethodDef methods[] =;

static struct PyModuleDef module =;

PyMODINIT_FUNC
PyInit__testexternalinspection(void)
{}