// This implements the reference cycle garbage collector. // The Python module interface to the collector is in gcmodule.c. // See https://devguide.python.org/internals/garbage-collector/ #include "Python.h" #include "pycore_ceval.h" // _Py_set_eval_breaker_bit() #include "pycore_context.h" #include "pycore_dict.h" // _PyInlineValuesSize() #include "pycore_initconfig.h" #include "pycore_interp.h" // PyInterpreterState.gc #include "pycore_object.h" #include "pycore_object_alloc.h" // _PyObject_MallocWithType() #include "pycore_pyerrors.h" #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_weakref.h" // _PyWeakref_ClearRef() #include "pydtrace.h" #ifndef Py_GIL_DISABLED GCState; #ifdef Py_DEBUG #define GC_DEBUG #endif // Define this when debugging the GC // #define GC_EXTRA_DEBUG #define GC_NEXT … #define GC_PREV … // update_refs() set this bit for all objects in current generation. // subtract_refs() and move_unreachable() uses this to distinguish // visited object is in GCing or not. // // move_unreachable() removes this flag from reachable objects. // Only unreachable objects have this flag. // // No objects in interpreter have this flag after GC ends. #define PREV_MASK_COLLECTING … // Lowest bit of _gc_next is used for UNREACHABLE flag. // // This flag represents the object is in unreachable list in move_unreachable() // // Although this flag is used only in move_unreachable(), move_unreachable() // doesn't clear this flag to skip unnecessary iteration. // move_legacy_finalizers() removes this flag instead. // Between them, unreachable list is not normal list and we can not use // most gc_list_* functions for it. #define NEXT_MASK_UNREACHABLE … #define AS_GC(op) … #define FROM_GC(gc) … // Automatically choose the generation that needs collecting. #define GENERATION_AUTO … static inline int gc_is_collecting(PyGC_Head *g) { … } static inline void gc_clear_collecting(PyGC_Head *g) { … } static inline Py_ssize_t gc_get_refs(PyGC_Head *g) { … } static inline void gc_set_refs(PyGC_Head *g, Py_ssize_t refs) { … } static inline void gc_reset_refs(PyGC_Head *g, Py_ssize_t refs) { … } static inline void gc_decref(PyGC_Head *g) { … } static inline int gc_old_space(PyGC_Head *g) { … } static inline int other_space(int space) { … } static inline void gc_flip_old_space(PyGC_Head *g) { … } static inline void gc_set_old_space(PyGC_Head *g, int space) { … } static PyGC_Head * GEN_HEAD(GCState *gcstate, int n) { … } static GCState * get_gc_state(void) { … } void _PyGC_InitState(GCState *gcstate) { … } PyStatus _PyGC_Init(PyInterpreterState *interp) { … } /* _gc_prev values --------------- Between collections, _gc_prev is used for doubly linked list. Lowest two bits of _gc_prev are used for flags. PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends or _PyObject_GC_UNTRACK() is called. During a collection, _gc_prev is temporary used for gc_refs, and the gc list is singly linked until _gc_prev is restored. gc_refs At the start of a collection, update_refs() copies the true refcount to gc_refs, for each object in the generation being collected. subtract_refs() then adjusts gc_refs so that it equals the number of times an object is referenced directly from outside the generation being collected. PREV_MASK_COLLECTING Objects in generation being collected are marked PREV_MASK_COLLECTING in update_refs(). _gc_next values --------------- _gc_next takes these values: 0 The object is not tracked != 0 Pointer to the next object in the GC list. Additionally, lowest bit is used temporary for NEXT_MASK_UNREACHABLE flag described below. NEXT_MASK_UNREACHABLE move_unreachable() then moves objects not reachable (whether directly or indirectly) from outside the generation into an "unreachable" set and set this flag. Objects that are found to be reachable have gc_refs set to 1. When this flag is set for the reachable object, the object must be in "unreachable" set. The flag is unset and the object is moved back to "reachable" set. move_legacy_finalizers() will remove this flag from "unreachable" set. */ /*** list functions ***/ static inline void gc_list_init(PyGC_Head *list) { … } static inline int gc_list_is_empty(PyGC_Head *list) { … } /* Append `node` to `list`. */ static inline void gc_list_append(PyGC_Head *node, PyGC_Head *list) { … } /* Remove `node` from the gc list it's currently in. */ static inline void gc_list_remove(PyGC_Head *node) { … } /* Move `node` from the gc list it's currently in (which is not explicitly * named here) to the end of `list`. This is semantically the same as * gc_list_remove(node) followed by gc_list_append(node, list). */ static void gc_list_move(PyGC_Head *node, PyGC_Head *list) { … } /* append list `from` onto list `to`; `from` becomes an empty list */ static void gc_list_merge(PyGC_Head *from, PyGC_Head *to) { … } static Py_ssize_t gc_list_size(PyGC_Head *list) { … } /* Walk the list and mark all objects as non-collecting */ static inline void gc_list_clear_collecting(PyGC_Head *collectable) { … } /* Append objects in a GC list to a Python list. * Return 0 if all OK, < 0 if error (out of memory for list) */ static int append_objects(PyObject *py_list, PyGC_Head *gc_list) { … } // Constants for validate_list's flags argument. enum flagstates { … }; #ifdef GC_DEBUG // validate_list checks list consistency. And it works as document // describing when flags are expected to be set / unset. // `head` must be a doubly-linked gc list, although it's fine (expected!) if // the prev and next pointers are "polluted" with flags. // What's checked: // - The `head` pointers are not polluted. // - The objects' PREV_MASK_COLLECTING and NEXT_MASK_UNREACHABLE flags are all // `set or clear, as specified by the 'flags' argument. // - The prev and next pointers are mutually consistent. static void validate_list(PyGC_Head *head, enum flagstates flags) { assert((head->_gc_prev & ~_PyGC_PREV_MASK) == 0); assert((head->_gc_next & ~_PyGC_PREV_MASK) == 0); uintptr_t prev_value = 0, next_value = 0; switch (flags) { case collecting_clear_unreachable_clear: break; case collecting_set_unreachable_clear: prev_value = PREV_MASK_COLLECTING; break; case collecting_clear_unreachable_set: next_value = NEXT_MASK_UNREACHABLE; break; case collecting_set_unreachable_set: prev_value = PREV_MASK_COLLECTING; next_value = NEXT_MASK_UNREACHABLE; break; default: assert(! "bad internal flags argument"); } PyGC_Head *prev = head; PyGC_Head *gc = GC_NEXT(head); while (gc != head) { PyGC_Head *trueprev = GC_PREV(gc); PyGC_Head *truenext = GC_NEXT(gc); assert(truenext != NULL); assert(trueprev == prev); assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value); assert((gc->_gc_next & NEXT_MASK_UNREACHABLE) == next_value); prev = gc; gc = truenext; } assert(prev == GC_PREV(head)); } #else #define validate_list(x, y) … #endif #ifdef GC_EXTRA_DEBUG static void gc_list_validate_space(PyGC_Head *head, int space) { PyGC_Head *gc = GC_NEXT(head); while (gc != head) { assert(gc_old_space(gc) == space); gc = GC_NEXT(gc); } } static void validate_spaces(GCState *gcstate) { int visited = gcstate->visited_space; int not_visited = other_space(visited); gc_list_validate_space(&gcstate->young.head, not_visited); for (int space = 0; space < 2; space++) { gc_list_validate_space(&gcstate->old[space].head, space); } gc_list_validate_space(&gcstate->permanent_generation.head, visited); } static void validate_consistent_old_space(PyGC_Head *head) { PyGC_Head *gc = GC_NEXT(head); if (gc == head) { return; } int old_space = gc_old_space(gc); while (gc != head) { PyGC_Head *truenext = GC_NEXT(gc); assert(truenext != NULL); assert(gc_old_space(gc) == old_space); gc = truenext; } } #else #define validate_spaces(g) … #define validate_consistent_old_space(l) … #define gc_list_validate_space(l, s) … #endif /*** end of list stuff ***/ /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 and * PREV_MASK_COLLECTING bit is set for all objects in containers. */ static void update_refs(PyGC_Head *containers) { … } /* A traversal callback for subtract_refs. */ static int visit_decref(PyObject *op, void *parent) { … } int _PyGC_VisitStackRef(_PyStackRef *ref, visitproc visit, void *arg) { … } int _PyGC_VisitFrameStack(_PyInterpreterFrame *frame, visitproc visit, void *arg) { … } /* Subtract internal references from gc_refs. After this, gc_refs is >= 0 * for all objects in containers, and is GC_REACHABLE for all tracked gc * objects not in containers. The ones with gc_refs > 0 are directly * reachable from outside containers, and so can't be collected. */ static void subtract_refs(PyGC_Head *containers) { … } /* A traversal callback for move_unreachable. */ static int visit_reachable(PyObject *op, void *arg) { … } /* Move the unreachable objects from young to unreachable. After this, * all objects in young don't have PREV_MASK_COLLECTING flag and * unreachable have the flag. * All objects in young after this are directly or indirectly reachable * from outside the original young; and all objects in unreachable are * not. * * This function restores _gc_prev pointer. young and unreachable are * doubly linked list after this function. * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag. * So we can not gc_list_* functions for unreachable until we remove the flag. */ static void move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) { … } /* In theory, all tuples should be younger than the * objects they refer to, as tuples are immortal. * Therefore, untracking tuples in oldest-first order in the * young generation before promoting them should have tracked * all the tuples that can be untracked. * * Unfortunately, the C API allows tuples to be created * and then filled in. So this won't untrack all tuples * that can be untracked. It should untrack most of them * and is much faster than a more complex approach that * would untrack all relevant tuples. */ static void untrack_tuples(PyGC_Head *head) { … } /* Return true if object has a pre-PEP 442 finalization method. */ static int has_legacy_finalizer(PyObject *op) { … } /* Move the objects in unreachable with tp_del slots into `finalizers`. * * This function also removes NEXT_MASK_UNREACHABLE flag * from _gc_next in unreachable. */ static void move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) { … } static inline void clear_unreachable_mask(PyGC_Head *unreachable) { … } /* A traversal callback for move_legacy_finalizer_reachable. */ static int visit_move(PyObject *op, void *arg) { … } /* Move objects that are reachable from finalizers, from the unreachable set * into finalizers set. */ static void move_legacy_finalizer_reachable(PyGC_Head *finalizers) { … } /* Clear all weakrefs to unreachable objects, and if such a weakref has a * callback, invoke it if necessary. Note that it's possible for such * weakrefs to be outside the unreachable set -- indeed, those are precisely * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for * overview & some details. Some weakrefs with callbacks may be reclaimed * directly by this routine; the number reclaimed is the return value. Other * weakrefs with callbacks may be moved into the `old` generation. Objects * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns, * no object in `unreachable` is weakly referenced anymore. */ static int handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) { … } static void debug_cycle(const char *msg, PyObject *op) { … } /* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable * only from such cycles). * If _PyGC_DEBUG_SAVEALL, all objects in finalizers are appended to the module * garbage list (a Python list), else only the objects in finalizers with * __del__ methods are appended to garbage. All objects in finalizers are * merged into the old list regardless. */ static void handle_legacy_finalizers(PyThreadState *tstate, GCState *gcstate, PyGC_Head *finalizers, PyGC_Head *old) { … } /* Run first-time finalizers (if any) on all the objects in collectable. * Note that this may remove some (or even all) of the objects from the * list, due to refcounts falling to 0. */ static void finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable) { … } /* Break reference cycles by clearing the containers involved. This is * tricky business as the lists can be changing and we don't know which * objects may be freed. It is possible I screwed something up here. */ static void delete_garbage(PyThreadState *tstate, GCState *gcstate, PyGC_Head *collectable, PyGC_Head *old) { … } /* Deduce which objects among "base" are unreachable from outside the list and move them to 'unreachable'. The process consist in the following steps: 1. Copy all reference counts to a different field (gc_prev is used to hold this copy to save memory). 2. Traverse all objects in "base" and visit all referred objects using "tp_traverse" and for every visited object, subtract 1 to the reference count (the one that we copied in the previous step). After this step, all objects that can be reached directly from outside must have strictly positive reference count, while all unreachable objects must have a count of exactly 0. 3. Identify all unreachable objects (the ones with 0 reference count) and move them to the "unreachable" list. This step also needs to move back to "base" all objects that were initially marked as unreachable but are referred transitively by the reachable objects (the ones with strictly positive reference count). Contracts: * The "base" has to be a valid list with no mask set. * The "unreachable" list must be uninitialized (this function calls gc_list_init over 'unreachable'). IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE flag set but it does not clear it to skip unnecessary iteration. Before the flag is cleared (for example, by using 'clear_unreachable_mask' function or by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal list and we can not use most gc_list_* functions for it. */ static inline void deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { … } /* Handle objects that may have resurrected after a call to 'finalize_garbage', moving them to 'old_generation' and placing the rest on 'still_unreachable'. Contracts: * After this function 'unreachable' must not be used anymore and 'still_unreachable' will contain the objects that did not resurrect. * The "still_unreachable" list must be uninitialized (this function calls gc_list_init over 'still_unreachable'). IMPORTANT: After a call to this function, the 'still_unreachable' set will have the PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so we can skip the expense of clearing the flag to avoid extra iteration. */ static inline void handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, PyGC_Head *old_generation) { … } static void gc_collect_region(PyThreadState *tstate, PyGC_Head *from, PyGC_Head *to, struct gc_collection_stats *stats); static inline Py_ssize_t gc_list_set_space(PyGC_Head *list, int space) { … } /* Making progress in the incremental collector * In order to eventually collect all cycles * the incremental collector must progress through the old * space faster than objects are added to the old space. * * Each young or incremental collection adds a number of * objects, S (for survivors) to the old space, and * incremental collectors scan I objects from the old space. * I > S must be true. We also want I > S * N to be where * N > 1. Higher values of N mean that the old space is * scanned more rapidly. * The default incremental threshold of 10 translates to * N == 1.4 (1 + 4/threshold) */ /* Divide by 10, so that the default incremental threshold of 10 * scans objects at 1% of the heap size */ #define SCAN_RATE_DIVISOR … static void add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) { … } static void gc_collect_young(PyThreadState *tstate, struct gc_collection_stats *stats) { … } #ifndef NDEBUG static inline int IS_IN_VISITED(PyGC_Head *gc, int visited_space) { assert(visited_space == 0 || other_space(visited_space) == 0); return gc_old_space(gc) == visited_space; } #endif struct container_and_flag { … }; /* A traversal callback for adding to container) */ static int visit_add_to_container(PyObject *op, void *arg) { … } static intptr_t expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) { … } /* Do bookkeeping for a completed GC cycle */ static void completed_scavenge(GCState *gcstate) { … } static intptr_t move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) { … } static intptr_t mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) { … } static intptr_t mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start) { … } static intptr_t mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space) { … } static intptr_t mark_at_start(PyThreadState *tstate) { … } static intptr_t assess_work_to_do(GCState *gcstate) { … } static void gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) { … } static void gc_collect_full(PyThreadState *tstate, struct gc_collection_stats *stats) { … } /* This is the main function. Read this to understand how the * collection process works. */ static void gc_collect_region(PyThreadState *tstate, PyGC_Head *from, PyGC_Head *to, struct gc_collection_stats *stats) { … } /* Invoke progress callbacks to notify clients that garbage collection * is starting or stopping */ static void do_gc_callback(GCState *gcstate, const char *phase, int generation, struct gc_collection_stats *stats) { … } static void invoke_gc_callback(GCState *gcstate, const char *phase, int generation, struct gc_collection_stats *stats) { … } static int referrersvisit(PyObject* obj, void *arg) { … } static int gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist) { … } PyObject * _PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs) { … } PyObject * _PyGC_GetObjects(PyInterpreterState *interp, int generation) { … } void _PyGC_Freeze(PyInterpreterState *interp) { … } void _PyGC_Unfreeze(PyInterpreterState *interp) { … } Py_ssize_t _PyGC_GetFreezeCount(PyInterpreterState *interp) { … } /* C API for controlling the state of the garbage collector */ int PyGC_Enable(void) { … } int PyGC_Disable(void) { … } int PyGC_IsEnabled(void) { … } // Show stats for objects in each generations static void show_stats_each_generations(GCState *gcstate) { … } Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) { … } /* Public API to invoke gc.collect() from C */ Py_ssize_t PyGC_Collect(void) { … } void _PyGC_CollectNoFail(PyThreadState *tstate) { … } void _PyGC_DumpShutdownStats(PyInterpreterState *interp) { … } static void finalize_unlink_gc_head(PyGC_Head *gc) { … } void _PyGC_Fini(PyInterpreterState *interp) { … } /* for debugging */ void _PyGC_Dump(PyGC_Head *g) { … } #ifdef Py_DEBUG static int visit_validate(PyObject *op, void *parent_raw) { PyObject *parent = _PyObject_CAST(parent_raw); if (_PyObject_IsFreed(op)) { _PyObject_ASSERT_FAILED_MSG(parent, "PyObject_GC_Track() object is not valid"); } return 0; } #endif /* extension modules might be compiled with GC support so these functions must always be available */ void PyObject_GC_Track(void *op_raw) { … } void PyObject_GC_UnTrack(void *op_raw) { … } int PyObject_IS_GC(PyObject *obj) { … } void _Py_ScheduleGC(PyThreadState *tstate) { … } void _PyObject_GC_Link(PyObject *op) { … } void _Py_RunGC(PyThreadState *tstate) { … } static PyObject * gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize) { … } PyObject * _PyObject_GC_New(PyTypeObject *tp) { … } PyVarObject * _PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems) { … } PyObject * PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *tp, size_t extra_size) { … } PyVarObject * _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems) { … } void PyObject_GC_Del(void *op) { … } int PyObject_GC_IsTracked(PyObject* obj) { … } int PyObject_GC_IsFinalized(PyObject *obj) { … } static int visit_generation(gcvisitobjects_t callback, void *arg, struct gc_generation *gen) { … } void PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg) { … } #endif // Py_GIL_DISABLED