/* Python's malloc wrappers (see pymem.h) */ #include "Python.h" #include "pycore_code.h" // stats #include "pycore_object.h" // _PyDebugAllocatorStats() definition #include "pycore_obmalloc.h" #include "pycore_pyerrors.h" // _Py_FatalErrorFormat() #include "pycore_pymem.h" #include "pycore_pystate.h" // _PyInterpreterState_GET #include "pycore_obmalloc_init.h" #include <stdlib.h> // malloc() #include <stdbool.h> #ifdef WITH_MIMALLOC // Forward declarations of functions used in our mimalloc modifications static void _PyMem_mi_page_clear_qsbr(mi_page_t *page); static bool _PyMem_mi_page_is_safe_to_free(mi_page_t *page); static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force); static void _PyMem_mi_page_reclaimed(mi_page_t *page); static void _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap); # include "pycore_mimalloc.h" # include "mimalloc/static.c" # include "mimalloc/internal.h" // for stats #endif #if defined(Py_GIL_DISABLED) && !defined(WITH_MIMALLOC) # error "Py_GIL_DISABLED requires WITH_MIMALLOC" #endif #undef uint #define uint … /* Defined in tracemalloc.c */ extern void _PyMem_DumpTraceback(int fd, const void *ptr); static void _PyObject_DebugDumpAddress(const void *p); static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p); static void set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain); static void set_up_debug_hooks_unlocked(void); static void get_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *); static void set_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *); /***************************************/ /* low-level allocator implementations */ /***************************************/ /* the default raw allocator (wraps malloc) */ void * _PyMem_RawMalloc(void *Py_UNUSED(ctx), size_t size) { … } void * _PyMem_RawCalloc(void *Py_UNUSED(ctx), size_t nelem, size_t elsize) { … } void * _PyMem_RawRealloc(void *Py_UNUSED(ctx), void *ptr, size_t size) { … } void _PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr) { … } #ifdef WITH_MIMALLOC static void _PyMem_mi_page_clear_qsbr(mi_page_t *page) { … } // Check if an empty, newly reclaimed page is safe to free now. static bool _PyMem_mi_page_is_safe_to_free(mi_page_t *page) { … } static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force) { … } static void _PyMem_mi_page_reclaimed(mi_page_t *page) { … } static void _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap) { … } void * _PyMem_MiMalloc(void *ctx, size_t size) { … } void * _PyMem_MiCalloc(void *ctx, size_t nelem, size_t elsize) { … } void * _PyMem_MiRealloc(void *ctx, void *ptr, size_t size) { … } void _PyMem_MiFree(void *ctx, void *ptr) { … } void * _PyObject_MiMalloc(void *ctx, size_t nbytes) { … } void * _PyObject_MiCalloc(void *ctx, size_t nelem, size_t elsize) { … } void * _PyObject_MiRealloc(void *ctx, void *ptr, size_t nbytes) { … } void _PyObject_MiFree(void *ctx, void *ptr) { … } #endif // WITH_MIMALLOC #define MALLOC_ALLOC … #ifdef WITH_MIMALLOC #define MIMALLOC_ALLOC … #define MIMALLOC_OBJALLOC … #endif /* the pymalloc allocator */ // The actual implementation is further down. #if defined(WITH_PYMALLOC) void* _PyObject_Malloc(void *ctx, size_t size); void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize); void _PyObject_Free(void *ctx, void *p); void* _PyObject_Realloc(void *ctx, void *ptr, size_t size); #define PYMALLOC_ALLOC … #endif // WITH_PYMALLOC #if defined(Py_GIL_DISABLED) // Py_GIL_DISABLED requires using mimalloc for "mem" and "obj" domains. #define PYRAW_ALLOC … #define PYMEM_ALLOC … #define PYOBJ_ALLOC … #elif defined(WITH_PYMALLOC) #define PYRAW_ALLOC … #define PYMEM_ALLOC … #define PYOBJ_ALLOC … #else #define PYRAW_ALLOC … #define PYMEM_ALLOC … #define PYOBJ_ALLOC … #endif /* the default debug allocators */ // The actual implementation is further down. void* _PyMem_DebugRawMalloc(void *ctx, size_t size); void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize); void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size); void _PyMem_DebugRawFree(void *ctx, void *ptr); void* _PyMem_DebugMalloc(void *ctx, size_t size); void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize); void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size); void _PyMem_DebugFree(void *ctx, void *p); #define PYDBGRAW_ALLOC … #define PYDBGMEM_ALLOC … #define PYDBGOBJ_ALLOC … /* the low-level virtual memory allocator */ #ifdef WITH_PYMALLOC # ifdef MS_WINDOWS # include <windows.h> # elif defined(HAVE_MMAP) # include <sys/mman.h> # ifdef MAP_ANONYMOUS #define ARENAS_USE_MMAP # endif # endif #endif void * _PyMem_ArenaAlloc(void *Py_UNUSED(ctx), size_t size) { … } void _PyMem_ArenaFree(void *Py_UNUSED(ctx), void *ptr, #if defined(ARENAS_USE_MMAP) size_t size #else size_t Py_UNUSED(size) #endif ) { … } /*******************************************/ /* end low-level allocator implementations */ /*******************************************/ #if defined(__has_feature) /* Clang */ # if __has_feature(address_sanitizer) /* is ASAN enabled? */ #define _Py_NO_SANITIZE_ADDRESS … # endif # if __has_feature(thread_sanitizer) /* is TSAN enabled? */ #define _Py_NO_SANITIZE_THREAD … # endif # if __has_feature(memory_sanitizer) /* is MSAN enabled? */ #define _Py_NO_SANITIZE_MEMORY … # endif #elif defined(__GNUC__) # if defined(__SANITIZE_ADDRESS__) /* GCC 4.8+, is ASAN enabled? */ #define _Py_NO_SANITIZE_ADDRESS … # endif // TSAN is supported since GCC 5.1, but __SANITIZE_THREAD__ macro // is provided only since GCC 7. # if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1) #define _Py_NO_SANITIZE_THREAD … # endif #endif #ifndef _Py_NO_SANITIZE_ADDRESS #define _Py_NO_SANITIZE_ADDRESS #endif #ifndef _Py_NO_SANITIZE_THREAD #define _Py_NO_SANITIZE_THREAD #endif #ifndef _Py_NO_SANITIZE_MEMORY #define _Py_NO_SANITIZE_MEMORY #endif #define ALLOCATORS_MUTEX … #define _PyMem_Raw … #define _PyMem … #define _PyObject … #define _PyMem_Debug … #define _PyObject_Arena … /***************************/ /* managing the allocators */ /***************************/ static int set_default_allocator_unlocked(PyMemAllocatorDomain domain, int debug, PyMemAllocatorEx *old_alloc) { … } #ifdef Py_DEBUG static const int pydebug = 1; #else static const int pydebug = …; #endif int _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *old_alloc) { … } int _PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator) { … } static int set_up_allocators_unlocked(PyMemAllocatorName allocator) { … } int _PyMem_SetupAllocators(PyMemAllocatorName allocator) { … } static int pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b) { … } static const char* get_current_allocator_name_unlocked(void) { … } const char* _PyMem_GetCurrentAllocatorName(void) { … } int _PyMem_DebugEnabled(void) { … } #ifdef WITH_PYMALLOC static int _PyMem_PymallocEnabled(void) { … } #ifdef WITH_MIMALLOC static int _PyMem_MimallocEnabled(void) { … } #endif // WITH_MIMALLOC #endif // WITH_PYMALLOC static void set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain) { … } static void set_up_debug_hooks_unlocked(void) { … } void PyMem_SetupDebugHooks(void) { … } static void get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) { … } static void set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) { … } void PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) { … } void PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) { … } void PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator) { … } void PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator) { … } /* Note that there is a possible, but very unlikely, race in any place * below where we call one of the allocator functions. We access two * fields in each case: "malloc", etc. and "ctx". * * It is unlikely that the allocator will be changed while one of those * calls is happening, much less in that very narrow window. * Furthermore, the likelihood of a race is drastically reduced by the * fact that the allocator may not be changed after runtime init * (except with a wrapper). * * With the above in mind, we currently don't worry about locking * around these uses of the runtime-global allocators state. */ /*************************/ /* the "arena" allocator */ /*************************/ void * _PyObject_VirtualAlloc(size_t size) { … } void _PyObject_VirtualFree(void *obj, size_t size) { … } /***********************/ /* the "raw" allocator */ /***********************/ void * PyMem_RawMalloc(size_t size) { … } void * PyMem_RawCalloc(size_t nelem, size_t elsize) { … } void* PyMem_RawRealloc(void *ptr, size_t new_size) { … } void PyMem_RawFree(void *ptr) { … } /***********************/ /* the "mem" allocator */ /***********************/ void * PyMem_Malloc(size_t size) { … } void * PyMem_Calloc(size_t nelem, size_t elsize) { … } void * PyMem_Realloc(void *ptr, size_t new_size) { … } void PyMem_Free(void *ptr) { … } /***************************/ /* pymem utility functions */ /***************************/ wchar_t* _PyMem_RawWcsdup(const wchar_t *str) { … } char * _PyMem_RawStrdup(const char *str) { … } char * _PyMem_Strdup(const char *str) { … } /***********************************************/ /* Delayed freeing support for Py_GIL_DISABLED */ /***********************************************/ // So that sizeof(struct _mem_work_chunk) is 4096 bytes on 64-bit platforms. #define WORK_ITEMS_PER_CHUNK … // A pointer to be freed once the QSBR read sequence reaches qsbr_goal. struct _mem_work_item { … }; // A fixed-size buffer of pointers to be freed struct _mem_work_chunk { … }; static void free_work_item(uintptr_t ptr) { … } static void free_delayed(uintptr_t ptr) { … } void _PyMem_FreeDelayed(void *ptr) { … } void _PyObject_FreeDelayed(void *ptr) { … } static struct _mem_work_chunk * work_queue_first(struct llist_node *head) { … } static void process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr, bool keep_empty) { … } static void process_interp_queue(struct _Py_mem_interp_free_queue *queue, struct _qsbr_thread_state *qsbr) { … } void _PyMem_ProcessDelayed(PyThreadState *tstate) { … } void _PyMem_AbandonDelayed(PyThreadState *tstate) { … } void _PyMem_FiniDelayed(PyInterpreterState *interp) { … } /**************************/ /* the "object" allocator */ /**************************/ void * PyObject_Malloc(size_t size) { … } void * PyObject_Calloc(size_t nelem, size_t elsize) { … } void * PyObject_Realloc(void *ptr, size_t new_size) { … } void PyObject_Free(void *ptr) { … } /* If we're using GCC, use __builtin_expect() to reduce overhead of the valgrind checks */ #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__) #define UNLIKELY(value) … #define LIKELY(value) … #else #define UNLIKELY … #define LIKELY … #endif #ifdef WITH_PYMALLOC #ifdef WITH_VALGRIND #include <valgrind/valgrind.h> /* -1 indicates that we haven't checked that we're running on valgrind yet. */ static int running_on_valgrind = -1; #endif OMState; /* obmalloc state for main interpreter and shared by all interpreters without * their own obmalloc state. By not explicitly initializing this structure, it * will be allocated in the BSS which is a small performance win. The radix * tree arrays are fairly large but are sparsely used. */ static struct _obmalloc_state obmalloc_state_main; static bool obmalloc_state_initialized; static inline int has_own_state(PyInterpreterState *interp) { … } static inline OMState * get_state(void) { … } // These macros all rely on a local "state" variable. #define usedpools … #define allarenas … #define maxarenas … #define unused_arena_objects … #define usable_arenas … #define nfp2lasta … #define narenas_currently_allocated … #define ntimes_arena_allocated … #define narenas_highwater … #define raw_allocated_blocks … #ifdef WITH_MIMALLOC static bool count_blocks( const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* allocated_blocks) { … } static Py_ssize_t get_mimalloc_allocated_blocks(PyInterpreterState *interp) { … } #endif Py_ssize_t _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp) { … } static void free_obmalloc_arenas(PyInterpreterState *interp); void _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp) { … } static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *); /* We preserve the number of blocks leaked during runtime finalization, so they can be reported if the runtime is initialized again. */ // XXX We don't lose any information by dropping this, // so we should consider doing so. static Py_ssize_t last_final_leaks = …; void _Py_FinalizeAllocatedBlocks(_PyRuntimeState *runtime) { … } static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *runtime) { … } Py_ssize_t _Py_GetGlobalAllocatedBlocks(void) { … } #if WITH_PYMALLOC_RADIX_TREE /*==========================================================================*/ /* radix tree for tracking arena usage. */ #define arena_map_root … #ifdef USE_INTERIOR_NODES #define arena_map_mid_count … #define arena_map_bot_count … #endif /* Return a pointer to a bottom tree node, return NULL if it doesn't exist or * it cannot be created */ static inline Py_ALWAYS_INLINE arena_map_bot_t * arena_map_get(OMState *state, pymem_block *p, int create) { … } /* The radix tree only tracks arenas. So, for 16 MiB arenas, we throw * away 24 bits of the address. That reduces the space requirement of * the tree compared to similar radix tree page-map schemes. In * exchange for slashing the space requirement, it needs more * computation to check an address. * * Tracking coverage is done by "ideal" arena address. It is easier to * explain in decimal so let's say that the arena size is 100 bytes. * Then, ideal addresses are 100, 200, 300, etc. For checking if a * pointer address is inside an actual arena, we have to check two ideal * arena addresses. E.g. if pointer is 357, we need to check 200 and * 300. In the rare case that an arena is aligned in the ideal way * (e.g. base address of arena is 200) then we only have to check one * ideal address. * * The tree nodes for 200 and 300 both store the address of arena. * There are two cases: the arena starts at a lower ideal arena and * extends to this one, or the arena starts in this arena and extends to * the next ideal arena. The tail_lo and tail_hi members correspond to * these two cases. */ /* mark or unmark addresses covered by arena */ static int arena_map_mark_used(OMState *state, uintptr_t arena_base, int is_used) { … } /* Return true if 'p' is a pointer inside an obmalloc arena. * _PyObject_Free() calls this so it needs to be very fast. */ static int arena_map_is_used(OMState *state, pymem_block *p) { … } /* end of radix tree logic */ /*==========================================================================*/ #endif /* WITH_PYMALLOC_RADIX_TREE */ /* Allocate a new arena. If we run out of memory, return NULL. Else * allocate a new arena, and return the address of an arena_object * describing the new arena. It's expected that the caller will set * `usable_arenas` to the return value. */ static struct arena_object* new_arena(OMState *state) { … } #if WITH_PYMALLOC_RADIX_TREE /* Return true if and only if P is an address that was allocated by pymalloc. When the radix tree is used, 'poolp' is unused. */ static bool address_in_range(OMState *state, void *p, poolp Py_UNUSED(pool)) { … } #else /* address_in_range(P, POOL) Return true if and only if P is an address that was allocated by pymalloc. POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P) (the caller is asked to compute this because the macro expands POOL more than once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a variable and pass the latter to the macro; because address_in_range is called on every alloc/realloc/free, micro-efficiency is important here). Tricky: Let B be the arena base address associated with the pool, B = arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if B <= P < B + ARENA_SIZE Subtracting B throughout, this is true iff 0 <= P-B < ARENA_SIZE By using unsigned arithmetic, the "0 <=" half of the test can be skipped. Obscure: A PyMem "free memory" function can call the pymalloc free or realloc before the first arena has been allocated. `arenas` is still NULL in that case. We're relying on that maxarenas is also 0 in that case, so that (POOL)->arenaindex < maxarenas must be false, saving us from trying to index into a NULL arenas. Details: given P and POOL, the arena_object corresponding to P is AO = arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild stores, etc), POOL is the correct address of P's pool, AO.address is the correct base address of the pool's arena, and P must be within ARENA_SIZE of AO.address. In addition, AO.address is not 0 (no arena can start at address 0 (NULL)). Therefore address_in_range correctly reports that obmalloc controls P. Now suppose obmalloc does not control P (e.g., P was obtained via a direct call to the system malloc() or realloc()). (POOL)->arenaindex may be anything in this case -- it may even be uninitialized trash. If the trash arenaindex is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't control P. Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an allocated arena, obmalloc controls all the memory in slice AO.address : AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc, so P doesn't lie in that slice, so the macro correctly reports that P is not controlled by obmalloc. Finally, if P is not controlled by obmalloc and AO corresponds to an unused arena_object (one not currently associated with an allocated arena), AO.address is 0, and the second test in the macro reduces to: P < ARENA_SIZE If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part of the test still passes, and the third clause (AO.address != 0) is necessary to get the correct result: AO.address is 0 in this case, so the macro correctly reports that P is not controlled by obmalloc (despite that P lies in slice AO.address : AO.address + ARENA_SIZE). Note: The third (AO.address != 0) clause was added in Python 2.5. Before 2.5, arenas were never free()'ed, and an arenaindex < maxarena always corresponded to a currently-allocated arena, so the "P is not controlled by obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case was impossible. Note that the logic is excruciating, and reading up possibly uninitialized memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex) creates problems for some memory debuggers. The overwhelming advantage is that this test determines whether an arbitrary address is controlled by obmalloc in a small constant time, independent of the number of arenas obmalloc controls. Since this test is needed at every entry point, it's extremely desirable that it be this fast. */ static bool _Py_NO_SANITIZE_ADDRESS _Py_NO_SANITIZE_THREAD _Py_NO_SANITIZE_MEMORY address_in_range(OMState *state, void *p, poolp pool) { // Since address_in_range may be reading from memory which was not allocated // by Python, it is important that pool->arenaindex is read only once, as // another thread may be concurrently modifying the value without holding // the GIL. The following dance forces the compiler to read pool->arenaindex // only once. uint arenaindex = *((volatile uint *)&pool->arenaindex); return arenaindex < maxarenas && (uintptr_t)p - allarenas[arenaindex].address < ARENA_SIZE && allarenas[arenaindex].address != 0; } #endif /* !WITH_PYMALLOC_RADIX_TREE */ /*==========================================================================*/ // Called when freelist is exhausted. Extend the freelist if there is // space for a block. Otherwise, remove this pool from usedpools. static void pymalloc_pool_extend(poolp pool, uint size) { … } /* called when pymalloc_alloc can not allocate a block from usedpool. * This function takes new pool and allocate a block from it. */ static void* allocate_from_new_pool(OMState *state, uint size) { … } /* pymalloc allocator Return a pointer to newly allocated memory if pymalloc allocated memory. Return NULL if pymalloc failed to allocate the memory block: on bigger requests, on error in the code below (as a last chance to serve the request) or when the max memory limit has been reached. */ static inline void* pymalloc_alloc(OMState *state, void *Py_UNUSED(ctx), size_t nbytes) { … } void * _PyObject_Malloc(void *ctx, size_t nbytes) { … } void * _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize) { … } static void insert_to_usedpool(OMState *state, poolp pool) { … } static void insert_to_freepool(OMState *state, poolp pool) { … } /* Free a memory block allocated by pymalloc_alloc(). Return 1 if it was freed. Return 0 if the block was not allocated by pymalloc_alloc(). */ static inline int pymalloc_free(OMState *state, void *Py_UNUSED(ctx), void *p) { … } void _PyObject_Free(void *ctx, void *p) { … } /* pymalloc realloc. If nbytes==0, then as the Python docs promise, we do not treat this like free(p), and return a non-NULL result. Return 1 if pymalloc reallocated memory and wrote the new pointer into newptr_p. Return 0 if pymalloc didn't allocated p. */ static int pymalloc_realloc(OMState *state, void *ctx, void **newptr_p, void *p, size_t nbytes) { … } void * _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes) { … } #else /* ! WITH_PYMALLOC */ /*==========================================================================*/ /* pymalloc not enabled: Redirect the entry points to malloc. These will * only be used by extensions that are compiled with pymalloc enabled. */ Py_ssize_t _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp)) { return 0; } Py_ssize_t _Py_GetGlobalAllocatedBlocks(void) { return 0; } void _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp)) { return; } void _Py_FinalizeAllocatedBlocks(_PyRuntimeState *Py_UNUSED(runtime)) { return; } #endif /* WITH_PYMALLOC */ /*==========================================================================*/ /* A x-platform debugging allocator. This doesn't manage memory directly, * it wraps a real allocator, adding extra debugging info to the memory blocks. */ /* Uncomment this define to add the "serialno" field */ /* #define PYMEM_DEBUG_SERIALNO */ #ifdef PYMEM_DEBUG_SERIALNO static size_t serialno = 0; /* incremented on each debug {m,re}alloc */ /* serialno is always incremented via calling this routine. The point is * to supply a single place to set a breakpoint. */ static void bumpserialno(void) { ++serialno; } #endif #define SST … #ifdef PYMEM_DEBUG_SERIALNO #define PYMEM_DEBUG_EXTRA_BYTES … #else #define PYMEM_DEBUG_EXTRA_BYTES … #endif /* Read sizeof(size_t) bytes at p as a big-endian size_t. */ static size_t read_size_t(const void *p) { … } /* Write n as a big-endian size_t, MSB at address p, LSB at * p + sizeof(size_t) - 1. */ static void write_size_t(void *p, size_t n) { … } static void fill_mem_debug(debug_alloc_api_t *api, void *data, int c, size_t nbytes, bool is_alloc) { … } /* Let S = sizeof(size_t). The debug malloc asks for 4 * S extra bytes and fills them with useful stuff, here calling the underlying malloc's result p: p[0: S] Number of bytes originally asked for. This is a size_t, big-endian (easier to read in a memory dump). p[S] API ID. See PEP 445. This is a character, but seems undocumented. p[S+1: 2*S] Copies of PYMEM_FORBIDDENBYTE. Used to catch under- writes and reads. p[2*S: 2*S+n] The requested memory, filled with copies of PYMEM_CLEANBYTE. Used to catch reference to uninitialized memory. &p[2*S] is returned. Note that this is 8-byte aligned if pymalloc handled the request itself. p[2*S+n: 2*S+n+S] Copies of PYMEM_FORBIDDENBYTE. Used to catch over- writes and reads. p[2*S+n+S: 2*S+n+2*S] A serial number, incremented by 1 on each call to _PyMem_DebugMalloc and _PyMem_DebugRealloc. This is a big-endian size_t. If "bad memory" is detected later, the serial number gives an excellent way to set a breakpoint on the next run, to capture the instant at which this block was passed out. If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks for 3 * S extra bytes, and omits the last serialno field. */ static void * _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes) { … } void * _PyMem_DebugRawMalloc(void *ctx, size_t nbytes) { … } void * _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize) { … } /* The debug free first checks the 2*SST bytes on each end for sanity (in particular, that the FORBIDDENBYTEs with the api ID are still intact). Then fills the original bytes with PYMEM_DEADBYTE. Then calls the underlying free. */ void _PyMem_DebugRawFree(void *ctx, void *p) { … } void * _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes) { … } static inline void _PyMem_DebugCheckGIL(const char *func) { … } void * _PyMem_DebugMalloc(void *ctx, size_t nbytes) { … } void * _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize) { … } void _PyMem_DebugFree(void *ctx, void *ptr) { … } void * _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes) { … } /* Check the forbidden bytes on both ends of the memory allocated for p. * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress, * and call Py_FatalError to kill the program. * The API id, is also checked. */ static void _PyMem_DebugCheckAddress(const char *func, char api, const void *p) { … } /* Display info to stderr about the memory block at p. */ static void _PyObject_DebugDumpAddress(const void *p) { … } static size_t printone(FILE *out, const char* msg, size_t value) { … } void _PyDebugAllocatorStats(FILE *out, const char *block_name, int num_blocks, size_t sizeof_block) { … } // Return true if the obmalloc state structure is heap allocated, // by PyMem_RawCalloc(). For the main interpreter, this structure // allocated in the BSS. Allocating that way gives some memory savings // and a small performance win (at least on a demand paged OS). On // 64-bit platforms, the obmalloc structure is 256 kB. Most of that // memory is for the arena_map_top array. Since normally only one entry // of that array is used, only one page of resident memory is actually // used, rather than the full 256 kB. bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp) { … } #ifdef WITH_PYMALLOC static void init_obmalloc_pools(PyInterpreterState *interp) { … } #endif /* WITH_PYMALLOC */ int _PyMem_init_obmalloc(PyInterpreterState *interp) { … } #ifdef WITH_PYMALLOC static void free_obmalloc_arenas(PyInterpreterState *interp) { … } #ifdef Py_DEBUG /* Is target in the list? The list is traversed via the nextpool pointers. * The list may be NULL-terminated, or circular. Return 1 if target is in * list, else 0. */ static int pool_is_in_list(const poolp target, poolp list) { poolp origlist = list; assert(target != NULL); if (list == NULL) return 0; do { if (target == list) return 1; list = list->nextpool; } while (list != NULL && list != origlist); return 0; } #endif #ifdef WITH_MIMALLOC struct _alloc_stats { … }; static bool _collect_alloc_stats( const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg) { … } static void py_mimalloc_print_stats(FILE *out) { … } #endif static void pymalloc_print_stats(FILE *out) { … } /* Print summary info to "out" about the state of pymalloc's structures. * In Py_DEBUG mode, also perform some expensive internal consistency * checks. * * Return 0 if the memory debug hooks are not installed or no statistics was * written into out, return 1 otherwise. */ int _PyObject_DebugMallocStats(FILE *out) { … } #endif /* #ifdef WITH_PYMALLOC */