cpython/Python/gc.c

//  This implements the reference cycle garbage collector.
//  The Python module interface to the collector is in gcmodule.c.
//  See https://devguide.python.org/internals/garbage-collector/

#include "Python.h"
#include "pycore_ceval.h"         // _Py_set_eval_breaker_bit()
#include "pycore_context.h"
#include "pycore_dict.h"          // _PyInlineValuesSize()
#include "pycore_initconfig.h"
#include "pycore_interp.h"        // PyInterpreterState.gc
#include "pycore_object.h"
#include "pycore_object_alloc.h"  // _PyObject_MallocWithType()
#include "pycore_pyerrors.h"
#include "pycore_pystate.h"       // _PyThreadState_GET()
#include "pycore_weakref.h"       // _PyWeakref_ClearRef()
#include "pydtrace.h"

#ifndef Py_GIL_DISABLED

GCState;

#ifdef Py_DEBUG
#define GC_DEBUG
#endif

// Define this when debugging the GC
// #define GC_EXTRA_DEBUG


#define GC_NEXT
#define GC_PREV

// update_refs() set this bit for all objects in current generation.
// subtract_refs() and move_unreachable() uses this to distinguish
// visited object is in GCing or not.
//
// move_unreachable() removes this flag from reachable objects.
// Only unreachable objects have this flag.
//
// No objects in interpreter have this flag after GC ends.
#define PREV_MASK_COLLECTING

// Lowest bit of _gc_next is used for UNREACHABLE flag.
//
// This flag represents the object is in unreachable list in move_unreachable()
//
// Although this flag is used only in move_unreachable(), move_unreachable()
// doesn't clear this flag to skip unnecessary iteration.
// move_legacy_finalizers() removes this flag instead.
// Between them, unreachable list is not normal list and we can not use
// most gc_list_* functions for it.
#define NEXT_MASK_UNREACHABLE

#define AS_GC(op)
#define FROM_GC(gc)

// Automatically choose the generation that needs collecting.
#define GENERATION_AUTO

static inline int
gc_is_collecting(PyGC_Head *g)
{}

static inline void
gc_clear_collecting(PyGC_Head *g)
{}

static inline Py_ssize_t
gc_get_refs(PyGC_Head *g)
{}

static inline void
gc_set_refs(PyGC_Head *g, Py_ssize_t refs)
{}

static inline void
gc_reset_refs(PyGC_Head *g, Py_ssize_t refs)
{}

static inline void
gc_decref(PyGC_Head *g)
{}

static inline int
gc_old_space(PyGC_Head *g)
{}

static inline int
other_space(int space)
{}

static inline void
gc_flip_old_space(PyGC_Head *g)
{}

static inline void
gc_set_old_space(PyGC_Head *g, int space)
{}

static PyGC_Head *
GEN_HEAD(GCState *gcstate, int n)
{}

static GCState *
get_gc_state(void)
{}


void
_PyGC_InitState(GCState *gcstate)
{}


PyStatus
_PyGC_Init(PyInterpreterState *interp)
{}


/*
_gc_prev values
---------------

Between collections, _gc_prev is used for doubly linked list.

Lowest two bits of _gc_prev are used for flags.
PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends
or _PyObject_GC_UNTRACK() is called.

During a collection, _gc_prev is temporary used for gc_refs, and the gc list
is singly linked until _gc_prev is restored.

gc_refs
    At the start of a collection, update_refs() copies the true refcount
    to gc_refs, for each object in the generation being collected.
    subtract_refs() then adjusts gc_refs so that it equals the number of
    times an object is referenced directly from outside the generation
    being collected.

PREV_MASK_COLLECTING
    Objects in generation being collected are marked PREV_MASK_COLLECTING in
    update_refs().


_gc_next values
---------------

_gc_next takes these values:

0
    The object is not tracked

!= 0
    Pointer to the next object in the GC list.
    Additionally, lowest bit is used temporary for
    NEXT_MASK_UNREACHABLE flag described below.

NEXT_MASK_UNREACHABLE
    move_unreachable() then moves objects not reachable (whether directly or
    indirectly) from outside the generation into an "unreachable" set and
    set this flag.

    Objects that are found to be reachable have gc_refs set to 1.
    When this flag is set for the reachable object, the object must be in
    "unreachable" set.
    The flag is unset and the object is moved back to "reachable" set.

    move_legacy_finalizers() will remove this flag from "unreachable" set.
*/

/*** list functions ***/

static inline void
gc_list_init(PyGC_Head *list)
{}

static inline int
gc_list_is_empty(PyGC_Head *list)
{}

/* Append `node` to `list`. */
static inline void
gc_list_append(PyGC_Head *node, PyGC_Head *list)
{}

/* Remove `node` from the gc list it's currently in. */
static inline void
gc_list_remove(PyGC_Head *node)
{}

/* Move `node` from the gc list it's currently in (which is not explicitly
 * named here) to the end of `list`.  This is semantically the same as
 * gc_list_remove(node) followed by gc_list_append(node, list).
 */
static void
gc_list_move(PyGC_Head *node, PyGC_Head *list)
{}

/* append list `from` onto list `to`; `from` becomes an empty list */
static void
gc_list_merge(PyGC_Head *from, PyGC_Head *to)
{}

static Py_ssize_t
gc_list_size(PyGC_Head *list)
{}

/* Walk the list and mark all objects as non-collecting */
static inline void
gc_list_clear_collecting(PyGC_Head *collectable)
{}

/* Append objects in a GC list to a Python list.
 * Return 0 if all OK, < 0 if error (out of memory for list)
 */
static int
append_objects(PyObject *py_list, PyGC_Head *gc_list)
{}

// Constants for validate_list's flags argument.
enum flagstates {};

#ifdef GC_DEBUG
// validate_list checks list consistency.  And it works as document
// describing when flags are expected to be set / unset.
// `head` must be a doubly-linked gc list, although it's fine (expected!) if
// the prev and next pointers are "polluted" with flags.
// What's checked:
// - The `head` pointers are not polluted.
// - The objects' PREV_MASK_COLLECTING and NEXT_MASK_UNREACHABLE flags are all
//   `set or clear, as specified by the 'flags' argument.
// - The prev and next pointers are mutually consistent.
static void
validate_list(PyGC_Head *head, enum flagstates flags)
{
    assert((head->_gc_prev & ~_PyGC_PREV_MASK) == 0);
    assert((head->_gc_next & ~_PyGC_PREV_MASK) == 0);
    uintptr_t prev_value = 0, next_value = 0;
    switch (flags) {
        case collecting_clear_unreachable_clear:
            break;
        case collecting_set_unreachable_clear:
            prev_value = PREV_MASK_COLLECTING;
            break;
        case collecting_clear_unreachable_set:
            next_value = NEXT_MASK_UNREACHABLE;
            break;
        case collecting_set_unreachable_set:
            prev_value = PREV_MASK_COLLECTING;
            next_value = NEXT_MASK_UNREACHABLE;
            break;
        default:
            assert(! "bad internal flags argument");
    }
    PyGC_Head *prev = head;
    PyGC_Head *gc = GC_NEXT(head);
    while (gc != head) {
        PyGC_Head *trueprev = GC_PREV(gc);
        PyGC_Head *truenext = GC_NEXT(gc);
        assert(truenext != NULL);
        assert(trueprev == prev);
        assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value);
        assert((gc->_gc_next & NEXT_MASK_UNREACHABLE) == next_value);
        prev = gc;
        gc = truenext;
    }
    assert(prev == GC_PREV(head));
}

#else
#define validate_list(x, y)
#endif

#ifdef GC_EXTRA_DEBUG


static void
gc_list_validate_space(PyGC_Head *head, int space) {
    PyGC_Head *gc = GC_NEXT(head);
    while (gc != head) {
        assert(gc_old_space(gc) == space);
        gc = GC_NEXT(gc);
    }
}

static void
validate_spaces(GCState *gcstate)
{
    int visited = gcstate->visited_space;
    int not_visited = other_space(visited);
    gc_list_validate_space(&gcstate->young.head, not_visited);
    for (int space = 0; space < 2; space++) {
        gc_list_validate_space(&gcstate->old[space].head, space);
    }
    gc_list_validate_space(&gcstate->permanent_generation.head, visited);
}

static void
validate_consistent_old_space(PyGC_Head *head)
{
    PyGC_Head *gc = GC_NEXT(head);
    if (gc == head) {
        return;
    }
    int old_space = gc_old_space(gc);
    while (gc != head) {
        PyGC_Head *truenext = GC_NEXT(gc);
        assert(truenext != NULL);
        assert(gc_old_space(gc) == old_space);
        gc = truenext;
    }
}


#else
#define validate_spaces(g)
#define validate_consistent_old_space(l)
#define gc_list_validate_space(l, s)
#endif

/*** end of list stuff ***/


/* Set all gc_refs = ob_refcnt.  After this, gc_refs is > 0 and
 * PREV_MASK_COLLECTING bit is set for all objects in containers.
 */
static void
update_refs(PyGC_Head *containers)
{}

/* A traversal callback for subtract_refs. */
static int
visit_decref(PyObject *op, void *parent)
{}

int
_PyGC_VisitStackRef(_PyStackRef *ref, visitproc visit, void *arg)
{}

int
_PyGC_VisitFrameStack(_PyInterpreterFrame *frame, visitproc visit, void *arg)
{}

/* Subtract internal references from gc_refs.  After this, gc_refs is >= 0
 * for all objects in containers, and is GC_REACHABLE for all tracked gc
 * objects not in containers.  The ones with gc_refs > 0 are directly
 * reachable from outside containers, and so can't be collected.
 */
static void
subtract_refs(PyGC_Head *containers)
{}

/* A traversal callback for move_unreachable. */
static int
visit_reachable(PyObject *op, void *arg)
{}

/* Move the unreachable objects from young to unreachable.  After this,
 * all objects in young don't have PREV_MASK_COLLECTING flag and
 * unreachable have the flag.
 * All objects in young after this are directly or indirectly reachable
 * from outside the original young; and all objects in unreachable are
 * not.
 *
 * This function restores _gc_prev pointer.  young and unreachable are
 * doubly linked list after this function.
 * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag.
 * So we can not gc_list_* functions for unreachable until we remove the flag.
 */
static void
move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
{}

/* In theory, all tuples should be younger than the
* objects they refer to, as tuples are immortal.
* Therefore, untracking tuples in oldest-first order in the
* young generation before promoting them should have tracked
* all the tuples that can be untracked.
*
* Unfortunately, the C API allows tuples to be created
* and then filled in. So this won't untrack all tuples
* that can be untracked. It should untrack most of them
* and is much faster than a more complex approach that
* would untrack all relevant tuples.
*/
static void
untrack_tuples(PyGC_Head *head)
{}

/* Return true if object has a pre-PEP 442 finalization method. */
static int
has_legacy_finalizer(PyObject *op)
{}

/* Move the objects in unreachable with tp_del slots into `finalizers`.
 *
 * This function also removes NEXT_MASK_UNREACHABLE flag
 * from _gc_next in unreachable.
 */
static void
move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
{}

static inline void
clear_unreachable_mask(PyGC_Head *unreachable)
{}

/* A traversal callback for move_legacy_finalizer_reachable. */
static int
visit_move(PyObject *op, void *arg)
{}

/* Move objects that are reachable from finalizers, from the unreachable set
 * into finalizers set.
 */
static void
move_legacy_finalizer_reachable(PyGC_Head *finalizers)
{}

/* Clear all weakrefs to unreachable objects, and if such a weakref has a
 * callback, invoke it if necessary.  Note that it's possible for such
 * weakrefs to be outside the unreachable set -- indeed, those are precisely
 * the weakrefs whose callbacks must be invoked.  See gc_weakref.txt for
 * overview & some details.  Some weakrefs with callbacks may be reclaimed
 * directly by this routine; the number reclaimed is the return value.  Other
 * weakrefs with callbacks may be moved into the `old` generation.  Objects
 * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in
 * unreachable are left at GC_TENTATIVELY_UNREACHABLE.  When this returns,
 * no object in `unreachable` is weakly referenced anymore.
 */
static int
handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
{}

static void
debug_cycle(const char *msg, PyObject *op)
{}

/* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable
 * only from such cycles).
 * If _PyGC_DEBUG_SAVEALL, all objects in finalizers are appended to the module
 * garbage list (a Python list), else only the objects in finalizers with
 * __del__ methods are appended to garbage.  All objects in finalizers are
 * merged into the old list regardless.
 */
static void
handle_legacy_finalizers(PyThreadState *tstate,
                         GCState *gcstate,
                         PyGC_Head *finalizers, PyGC_Head *old)
{}

/* Run first-time finalizers (if any) on all the objects in collectable.
 * Note that this may remove some (or even all) of the objects from the
 * list, due to refcounts falling to 0.
 */
static void
finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable)
{}

/* Break reference cycles by clearing the containers involved.  This is
 * tricky business as the lists can be changing and we don't know which
 * objects may be freed.  It is possible I screwed something up here.
 */
static void
delete_garbage(PyThreadState *tstate, GCState *gcstate,
               PyGC_Head *collectable, PyGC_Head *old)
{}


/* Deduce which objects among "base" are unreachable from outside the list
   and move them to 'unreachable'. The process consist in the following steps:

1. Copy all reference counts to a different field (gc_prev is used to hold
   this copy to save memory).
2. Traverse all objects in "base" and visit all referred objects using
   "tp_traverse" and for every visited object, subtract 1 to the reference
   count (the one that we copied in the previous step). After this step, all
   objects that can be reached directly from outside must have strictly positive
   reference count, while all unreachable objects must have a count of exactly 0.
3. Identify all unreachable objects (the ones with 0 reference count) and move
   them to the "unreachable" list. This step also needs to move back to "base" all
   objects that were initially marked as unreachable but are referred transitively
   by the reachable objects (the ones with strictly positive reference count).

Contracts:

    * The "base" has to be a valid list with no mask set.

    * The "unreachable" list must be uninitialized (this function calls
      gc_list_init over 'unreachable').

IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE
flag set but it does not clear it to skip unnecessary iteration. Before the
flag is cleared (for example, by using 'clear_unreachable_mask' function or
by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
list and we can not use most gc_list_* functions for it. */
static inline void
deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {}

/* Handle objects that may have resurrected after a call to 'finalize_garbage', moving
   them to 'old_generation' and placing the rest on 'still_unreachable'.

   Contracts:
       * After this function 'unreachable' must not be used anymore and 'still_unreachable'
         will contain the objects that did not resurrect.

       * The "still_unreachable" list must be uninitialized (this function calls
         gc_list_init over 'still_unreachable').

IMPORTANT: After a call to this function, the 'still_unreachable' set will have the
PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so
we can skip the expense of clearing the flag to avoid extra iteration. */
static inline void
handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
                           PyGC_Head *old_generation)
{}

static void
gc_collect_region(PyThreadState *tstate,
                  PyGC_Head *from,
                  PyGC_Head *to,
                  struct gc_collection_stats *stats);

static inline Py_ssize_t
gc_list_set_space(PyGC_Head *list, int space)
{}

/* Making progress in the incremental collector
 * In order to eventually collect all cycles
 * the incremental collector must progress through the old
 * space faster than objects are added to the old space.
 *
 * Each young or incremental collection adds a number of
 * objects, S (for survivors) to the old space, and
 * incremental collectors scan I objects from the old space.
 * I > S must be true. We also want I > S * N to be where
 * N > 1. Higher values of N mean that the old space is
 * scanned more rapidly.
 * The default incremental threshold of 10 translates to
 * N == 1.4 (1 + 4/threshold)
 */

/* Divide by 10, so that the default incremental threshold of 10
 * scans objects at 1% of the heap size */
#define SCAN_RATE_DIVISOR

static void
add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats)
{}

static void
gc_collect_young(PyThreadState *tstate,
                 struct gc_collection_stats *stats)
{}

#ifndef NDEBUG
static inline int
IS_IN_VISITED(PyGC_Head *gc, int visited_space)
{
    assert(visited_space == 0 || other_space(visited_space) == 0);
    return gc_old_space(gc) == visited_space;
}
#endif

struct container_and_flag {};

/* A traversal callback for adding to container) */
static int
visit_add_to_container(PyObject *op, void *arg)
{}

static intptr_t
expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate)
{}

/* Do bookkeeping for a completed GC cycle */
static void
completed_scavenge(GCState *gcstate)
{}

static intptr_t
move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space)
{}

static intptr_t
mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space)
{}

static intptr_t
mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start)
{}

static intptr_t
mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space)
{}

static intptr_t
mark_at_start(PyThreadState *tstate)
{}

static intptr_t
assess_work_to_do(GCState *gcstate)
{}

static void
gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
{}

static void
gc_collect_full(PyThreadState *tstate,
                struct gc_collection_stats *stats)
{}

/* This is the main function. Read this to understand how the
 * collection process works. */
static void
gc_collect_region(PyThreadState *tstate,
                  PyGC_Head *from,
                  PyGC_Head *to,
                  struct gc_collection_stats *stats)
{}

/* Invoke progress callbacks to notify clients that garbage collection
 * is starting or stopping
 */
static void
do_gc_callback(GCState *gcstate, const char *phase,
                   int generation, struct gc_collection_stats *stats)
{}

static void
invoke_gc_callback(GCState *gcstate, const char *phase,
                   int generation, struct gc_collection_stats *stats)
{}

static int
referrersvisit(PyObject* obj, void *arg)
{}

static int
gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
{}

PyObject *
_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs)
{}

PyObject *
_PyGC_GetObjects(PyInterpreterState *interp, int generation)
{}

void
_PyGC_Freeze(PyInterpreterState *interp)
{}

void
_PyGC_Unfreeze(PyInterpreterState *interp)
{}

Py_ssize_t
_PyGC_GetFreezeCount(PyInterpreterState *interp)
{}

/* C API for controlling the state of the garbage collector */
int
PyGC_Enable(void)
{}

int
PyGC_Disable(void)
{}

int
PyGC_IsEnabled(void)
{}

// Show stats for objects in each generations
static void
show_stats_each_generations(GCState *gcstate)
{}

Py_ssize_t
_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
{}

/* Public API to invoke gc.collect() from C */
Py_ssize_t
PyGC_Collect(void)
{}

void
_PyGC_CollectNoFail(PyThreadState *tstate)
{}

void
_PyGC_DumpShutdownStats(PyInterpreterState *interp)
{}

static void
finalize_unlink_gc_head(PyGC_Head *gc) {}

void
_PyGC_Fini(PyInterpreterState *interp)
{}

/* for debugging */
void
_PyGC_Dump(PyGC_Head *g)
{}


#ifdef Py_DEBUG
static int
visit_validate(PyObject *op, void *parent_raw)
{
    PyObject *parent = _PyObject_CAST(parent_raw);
    if (_PyObject_IsFreed(op)) {
        _PyObject_ASSERT_FAILED_MSG(parent,
                                    "PyObject_GC_Track() object is not valid");
    }
    return 0;
}
#endif


/* extension modules might be compiled with GC support so these
   functions must always be available */

void
PyObject_GC_Track(void *op_raw)
{}

void
PyObject_GC_UnTrack(void *op_raw)
{}

int
PyObject_IS_GC(PyObject *obj)
{}

void
_Py_ScheduleGC(PyThreadState *tstate)
{}

void
_PyObject_GC_Link(PyObject *op)
{}

void
_Py_RunGC(PyThreadState *tstate)
{}

static PyObject *
gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize)
{}


PyObject *
_PyObject_GC_New(PyTypeObject *tp)
{}

PyVarObject *
_PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
{}

PyObject *
PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *tp, size_t extra_size)
{}

PyVarObject *
_PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
{}

void
PyObject_GC_Del(void *op)
{}

int
PyObject_GC_IsTracked(PyObject* obj)
{}

int
PyObject_GC_IsFinalized(PyObject *obj)
{}

static int
visit_generation(gcvisitobjects_t callback, void *arg, struct gc_generation *gen)
{}

void
PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
{}

#endif  // Py_GIL_DISABLED