cpython/Objects/setobject.c


/* set object implementation

   Written and maintained by Raymond D. Hettinger <[email protected]>
   Derived from Objects/dictobject.c.

   The basic lookup function used by all operations.
   This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.

   The initial probe index is computed as hash mod the table size.
   Subsequent probe indices are computed as explained in Objects/dictobject.c.

   To improve cache locality, each probe inspects a series of consecutive
   nearby entries before moving on to probes elsewhere in memory.  This leaves
   us with a hybrid of linear probing and randomized probing.  The linear probing
   reduces the cost of hash collisions because consecutive memory accesses
   tend to be much cheaper than scattered probes.  After LINEAR_PROBES steps,
   we then use more of the upper bits from the hash value and apply a simple
   linear congruential random number generator.  This helps break-up long
   chains of collisions.

   All arithmetic on hash should ignore overflow.

   Unlike the dictionary implementation, the lookkey function can return
   NULL if the rich comparison returns an error.

   Use cases for sets differ considerably from dictionaries where looked-up
   keys are more likely to be present.  In contrast, sets are primarily
   about membership testing where the presence of an element is not known in
   advance.  Accordingly, the set implementation needs to optimize for both
   the found and not-found case.
*/

#include "Python.h"
#include "pycore_ceval.h"               // _PyEval_GetBuiltin()
#include "pycore_critical_section.h"    // Py_BEGIN_CRITICAL_SECTION, Py_END_CRITICAL_SECTION
#include "pycore_dict.h"                // _PyDict_Contains_KnownHash()
#include "pycore_modsupport.h"          // _PyArg_NoKwnames()
#include "pycore_object.h"              // _PyObject_GC_UNTRACK()
#include "pycore_pyatomic_ft_wrappers.h"  // FT_ATOMIC_LOAD_SSIZE_RELAXED()
#include "pycore_pyerrors.h"            // _PyErr_SetKeyError()
#include "pycore_setobject.h"           // _PySet_NextEntry() definition

#include "stringlib/eq.h"               // unicode_eq()
#include <stddef.h>                     // offsetof()
#include "clinic/setobject.c.h"

/*[clinic input]
class set "PySetObject *" "&PySet_Type"
class frozenset "PySetObject *" "&PyFrozenSet_Type"
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=97ad1d3e9f117079]*/

/*[python input]
class setobject_converter(self_converter):
    type = "PySetObject *"
[python start generated code]*/
/*[python end generated code: output=da39a3ee5e6b4b0d input=33a44506d4d57793]*/

/* Object used as dummy key to fill deleted entries */
static PyObject _dummy_struct;

#define dummy


/* ======================================================================== */
/* ======= Begin logic for probing the hash table ========================= */

/* Set this to zero to turn-off linear probing */
#ifndef LINEAR_PROBES
#define LINEAR_PROBES
#endif

/* This must be >= 1 */
#define PERTURB_SHIFT

static setentry *
set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
{}

static int set_table_resize(PySetObject *, Py_ssize_t);

static int
set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
{}

/*
Internal routine used by set_table_resize() to insert an item which is
known to be absent from the set.  Besides the performance benefit,
there is also safety benefit since using set_add_entry() risks making
a callback in the middle of a set_table_resize(), see issue 1456209.
The caller is responsible for updating the key's reference count and
the setobject's fill and used fields.
*/
static void
set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
{}

/* ======== End logic for probing the hash table ========================== */
/* ======================================================================== */

/*
Restructure the table by allocating a new table and reinserting all
keys again.  When entries have been deleted, the new table may
actually be smaller than the old one.
*/
static int
set_table_resize(PySetObject *so, Py_ssize_t minused)
{}

static int
set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
{}

#define DISCARD_NOTFOUND
#define DISCARD_FOUND

static int
set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
{}

static int
set_add_key(PySetObject *so, PyObject *key)
{}

static int
set_contains_key(PySetObject *so, PyObject *key)
{}

static int
set_discard_key(PySetObject *so, PyObject *key)
{}

static void
set_empty_to_minsize(PySetObject *so)
{}

static int
set_clear_internal(PyObject *self)
{}

/*
 * Iterate over a set table.  Use like so:
 *
 *     Py_ssize_t pos;
 *     setentry *entry;
 *     pos = 0;   # important!  pos should not otherwise be changed by you
 *     while (set_next(yourset, &pos, &entry)) {
 *              Refer to borrowed reference in entry->key.
 *     }
 *
 * CAUTION:  In general, it isn't safe to use set_next in a loop that
 * mutates the table.
 */
static int
set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
{}

static void
set_dealloc(PyObject *self)
{}

static PyObject *
set_repr_lock_held(PySetObject *so)
{}

static PyObject *
set_repr(PyObject *self)
{}

static Py_ssize_t
set_len(PyObject *self)
{}

static int
set_merge_lock_held(PySetObject *so, PyObject *otherset)
{}

/*[clinic input]
@critical_section
set.pop
    so: setobject

Remove and return an arbitrary set element.

Raises KeyError if the set is empty.
[clinic start generated code]*/

static PyObject *
set_pop_impl(PySetObject *so)
/*[clinic end generated code: output=4d65180f1271871b input=9296c84921125060]*/
{}

static int
set_traverse(PyObject *self, visitproc visit, void *arg)
{}

/* Work to increase the bit dispersion for closely spaced hash values.
   This is important because some use cases have many combinations of a
   small number of elements with nearby hashes so that many distinct
   combinations collapse to only a handful of distinct hash values. */

static Py_uhash_t
_shuffle_bits(Py_uhash_t h)
{}

/* Most of the constants in this hash algorithm are randomly chosen
   large primes with "interesting bit patterns" and that passed tests
   for good collision statistics on a variety of problematic datasets
   including powersets and graph structures (such as David Eppstein's
   graph recipes in Lib/test/test_set.py).

   This hash algorithm can be used on either a frozenset or a set.
   When it is used on a set, it computes the hash value of the equivalent
   frozenset without creating a new frozenset object. */

static Py_hash_t
frozenset_hash_impl(PyObject *self)
{}

static Py_hash_t
frozenset_hash(PyObject *self)
{}

/***** Set iterator type ***********************************************/

setiterobject;

static void
setiter_dealloc(PyObject *self)
{}

static int
setiter_traverse(PyObject *self, visitproc visit, void *arg)
{}

static PyObject *
setiter_len(setiterobject *si, PyObject *Py_UNUSED(ignored))
{}

PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");

static PyObject *
setiter_reduce(setiterobject *si, PyObject *Py_UNUSED(ignored))
{}

PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");

static PyMethodDef setiter_methods[] =;

static PyObject *setiter_iternext(PyObject *self)
{}

PyTypeObject PySetIter_Type =;

static PyObject *
set_iter(PyObject *so)
{}

static int
set_update_dict_lock_held(PySetObject *so, PyObject *other)
{}

static int
set_update_iterable_lock_held(PySetObject *so, PyObject *other)
{}

static int
set_update_lock_held(PySetObject *so, PyObject *other)
{}

// set_update for a `so` that is only visible to the current thread
static int
set_update_local(PySetObject *so, PyObject *other)
{}

static int
set_update_internal(PySetObject *so, PyObject *other)
{}

/*[clinic input]
set.update
    so: setobject
    *others: array

Update the set, adding elements from all others.
[clinic start generated code]*/

static PyObject *
set_update_impl(PySetObject *so, PyObject * const *others,
                Py_ssize_t others_length)
/*[clinic end generated code: output=017c781c992d5c23 input=ed5d78885b076636]*/
{}

/* XXX Todo:
   If aligned memory allocations become available, make the
   set object 64 byte aligned so that most of the fields
   can be retrieved or updated in a single cache line.
*/

static PyObject *
make_new_set(PyTypeObject *type, PyObject *iterable)
{}

static PyObject *
make_new_set_basetype(PyTypeObject *type, PyObject *iterable)
{}

static PyObject *
make_new_frozenset(PyTypeObject *type, PyObject *iterable)
{}

static PyObject *
frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{}

static PyObject *
frozenset_vectorcall(PyObject *type, PyObject * const*args,
                     size_t nargsf, PyObject *kwnames)
{}

static PyObject *
set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{}

/* set_swap_bodies() switches the contents of any two sets by moving their
   internal data pointers and, if needed, copying the internal smalltables.
   Semantically equivalent to:

     t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t

   The function always succeeds and it leaves both objects in a stable state.
   Useful for operations that update in-place (by allowing an intermediate
   result to be swapped into one of the original inputs).
*/

static void
set_swap_bodies(PySetObject *a, PySetObject *b)
{}

/*[clinic input]
@critical_section
set.copy
    so: setobject

Return a shallow copy of a set.
[clinic start generated code]*/

static PyObject *
set_copy_impl(PySetObject *so)
/*[clinic end generated code: output=c9223a1e1cc6b041 input=c169a4fbb8209257]*/
{}

/*[clinic input]
@critical_section
frozenset.copy
    so: setobject

Return a shallow copy of a set.
[clinic start generated code]*/

static PyObject *
frozenset_copy_impl(PySetObject *so)
/*[clinic end generated code: output=b356263526af9e70 input=fbf5bef131268dd7]*/
{}

/*[clinic input]
@critical_section
set.clear
    so: setobject

Remove all elements from this set.
[clinic start generated code]*/

static PyObject *
set_clear_impl(PySetObject *so)
/*[clinic end generated code: output=4e71d5a83904161a input=c6f831b366111950]*/
{}

/*[clinic input]
set.union
    so: setobject
    *others: array

Return a new set with elements from the set and all others.
[clinic start generated code]*/

static PyObject *
set_union_impl(PySetObject *so, PyObject * const *others,
               Py_ssize_t others_length)
/*[clinic end generated code: output=b1bfa3d74065f27e input=55a2e81db6347a4f]*/
{}

static PyObject *
set_or(PyObject *self, PyObject *other)
{}

static PyObject *
set_ior(PyObject *self, PyObject *other)
{}

static PyObject *
set_intersection(PySetObject *so, PyObject *other)
{}

/*[clinic input]
set.intersection as set_intersection_multi
    so: setobject
    *others: array

Return a new set with elements common to the set and all others.
[clinic start generated code]*/

static PyObject *
set_intersection_multi_impl(PySetObject *so, PyObject * const *others,
                            Py_ssize_t others_length)
/*[clinic end generated code: output=db9ff9f875132b6b input=36c7b615694cadae]*/
{}

static PyObject *
set_intersection_update(PySetObject *so, PyObject *other)
{}

/*[clinic input]
set.intersection_update as set_intersection_update_multi
    so: setobject
    *others: array

Update the set, keeping only elements found in it and all others.
[clinic start generated code]*/

static PyObject *
set_intersection_update_multi_impl(PySetObject *so, PyObject * const *others,
                                   Py_ssize_t others_length)
/*[clinic end generated code: output=d768b5584675b48d input=782e422fc370e4fc]*/
{}

static PyObject *
set_and(PyObject *self, PyObject *other)
{}

static PyObject *
set_iand(PyObject *self, PyObject *other)
{}

/*[clinic input]
@critical_section so other
set.isdisjoint
    so: setobject
    other: object
    /

Return True if two sets have a null intersection.
[clinic start generated code]*/

static PyObject *
set_isdisjoint_impl(PySetObject *so, PyObject *other)
/*[clinic end generated code: output=273493f2d57c565e input=32f8dcab5e0fc7d6]*/
{}

static int
set_difference_update_internal(PySetObject *so, PyObject *other)
{}

/*[clinic input]
set.difference_update
    so: setobject
    *others: array

Update the set, removing elements found in others.
[clinic start generated code]*/

static PyObject *
set_difference_update_impl(PySetObject *so, PyObject * const *others,
                           Py_ssize_t others_length)
/*[clinic end generated code: output=04a22179b322cfe6 input=93ac28ba5b233696]*/
{}

static PyObject *
set_copy_and_difference(PySetObject *so, PyObject *other)
{}

static PyObject *
set_difference(PySetObject *so, PyObject *other)
{}

/*[clinic input]
set.difference as set_difference_multi
    so: setobject
    *others: array

Return a new set with elements in the set that are not in the others.
[clinic start generated code]*/

static PyObject *
set_difference_multi_impl(PySetObject *so, PyObject * const *others,
                          Py_ssize_t others_length)
/*[clinic end generated code: output=b0d33fb05d5477a7 input=c1eb448d483416ad]*/
{}

static PyObject *
set_sub(PyObject *self, PyObject *other)
{}

static PyObject *
set_isub(PyObject *self, PyObject *other)
{}

static int
set_symmetric_difference_update_dict(PySetObject *so, PyObject *other)
{}

static int
set_symmetric_difference_update_set(PySetObject *so, PySetObject *other)
{}

/*[clinic input]
set.symmetric_difference_update
    so: setobject
    other: object
    /

Update the set, keeping only elements found in either set, but not in both.
[clinic start generated code]*/

static PyObject *
set_symmetric_difference_update(PySetObject *so, PyObject *other)
/*[clinic end generated code: output=fbb049c0806028de input=a50acf0365e1f0a5]*/
{}

/*[clinic input]
@critical_section so other
set.symmetric_difference
    so: setobject
    other: object
    /

Return a new set with elements in either the set or other but not both.
[clinic start generated code]*/

static PyObject *
set_symmetric_difference_impl(PySetObject *so, PyObject *other)
/*[clinic end generated code: output=270ee0b5d42b0797 input=624f6e7bbdf70db1]*/
{}

static PyObject *
set_xor(PyObject *self, PyObject *other)
{}

static PyObject *
set_ixor(PyObject *self, PyObject *other)
{}

/*[clinic input]
@critical_section so other
set.issubset
    so: setobject
    other: object
    /

Report whether another set contains this set.
[clinic start generated code]*/

static PyObject *
set_issubset_impl(PySetObject *so, PyObject *other)
/*[clinic end generated code: output=b2b59d5f314555ce input=f2a4fd0f2537758b]*/
{}

/*[clinic input]
@critical_section so other
set.issuperset
    so: setobject
    other: object
    /

Report whether this set contains another set.
[clinic start generated code]*/

static PyObject *
set_issuperset_impl(PySetObject *so, PyObject *other)
/*[clinic end generated code: output=ecf00ce552c09461 input=5f2e1f262e6e4ccc]*/
{}

static PyObject *
set_richcompare(PyObject *self, PyObject *w, int op)
{}

/*[clinic input]
@critical_section
set.add
    so: setobject
    object as key: object
    /

Add an element to a set.

This has no effect if the element is already present.
[clinic start generated code]*/

static PyObject *
set_add_impl(PySetObject *so, PyObject *key)
/*[clinic end generated code: output=4cc4a937f1425c96 input=03baf62cb0e66514]*/
{}

static int
set_contains_lock_held(PySetObject *so, PyObject *key)
{}

int
_PySet_Contains(PySetObject *so, PyObject *key)
{}

static int
set_contains(PyObject *self, PyObject *key)
{}

/*[clinic input]
@critical_section
@coexist
set.__contains__
    so: setobject
    object as key: object
    /

x.__contains__(y) <==> y in x.
[clinic start generated code]*/

static PyObject *
set___contains___impl(PySetObject *so, PyObject *key)
/*[clinic end generated code: output=b44863d034b3c70e input=4a7d568459617f24]*/
{}

/*[clinic input]
@critical_section
set.remove
    so: setobject
    object as key: object
    /

Remove an element from a set; it must be a member.

If the element is not a member, raise a KeyError.
[clinic start generated code]*/

static PyObject *
set_remove_impl(PySetObject *so, PyObject *key)
/*[clinic end generated code: output=0b9134a2a2200363 input=893e1cb1df98227a]*/
{}

/*[clinic input]
@critical_section
set.discard
    so: setobject
    object as key: object
    /

Remove an element from a set if it is a member.

Unlike set.remove(), the discard() method does not raise
an exception when an element is missing from the set.
[clinic start generated code]*/

static PyObject *
set_discard_impl(PySetObject *so, PyObject *key)
/*[clinic end generated code: output=eec3b687bf32759e input=861cb7fb69b4def0]*/
{}

/*[clinic input]
@critical_section
set.__reduce__
    so: setobject

Return state information for pickling.
[clinic start generated code]*/

static PyObject *
set___reduce___impl(PySetObject *so)
/*[clinic end generated code: output=9af7d0e029df87ee input=59405a4249e82f71]*/
{}

/*[clinic input]
@critical_section
set.__sizeof__
    so: setobject

S.__sizeof__() -> size of S in memory, in bytes.
[clinic start generated code]*/

static PyObject *
set___sizeof___impl(PySetObject *so)
/*[clinic end generated code: output=4bfa3df7bd38ed88 input=09e1a09f168eaa23]*/
{}

static int
set_init(PyObject *so, PyObject *args, PyObject *kwds)
{}

static PyObject*
set_vectorcall(PyObject *type, PyObject * const*args,
               size_t nargsf, PyObject *kwnames)
{}

static PySequenceMethods set_as_sequence =;

/* set object ********************************************************/

static PyMethodDef set_methods[] =;

static PyNumberMethods set_as_number =;

PyDoc_STRVAR(set_doc,
"set(iterable=(), /)\n\
--\n\
\n\
Build an unordered collection of unique elements.");

PyTypeObject PySet_Type =;

/* frozenset object ********************************************************/


static PyMethodDef frozenset_methods[] =;

static PyNumberMethods frozenset_as_number =;

PyDoc_STRVAR(frozenset_doc,
"frozenset(iterable=(), /)\n\
--\n\
\n\
Build an immutable unordered collection of unique elements.");

PyTypeObject PyFrozenSet_Type =;


/***** C API functions *************************************************/

PyObject *
PySet_New(PyObject *iterable)
{}

PyObject *
PyFrozenSet_New(PyObject *iterable)
{}

Py_ssize_t
PySet_Size(PyObject *anyset)
{}

int
PySet_Clear(PyObject *set)
{}

void
_PySet_ClearInternal(PySetObject *so)
{}

int
PySet_Contains(PyObject *anyset, PyObject *key)
{}

int
PySet_Discard(PyObject *set, PyObject *key)
{}

int
PySet_Add(PyObject *anyset, PyObject *key)
{}

int
_PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
{}

int
_PySet_NextEntryRef(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
{}

PyObject *
PySet_Pop(PyObject *set)
{}

int
_PySet_Update(PyObject *set, PyObject *iterable)
{}

/* Exported for the gdb plugin's benefit. */
PyObject *_PySet_Dummy =;

/***** Dummy Struct  *************************************************/

static PyObject *
dummy_repr(PyObject *op)
{}

static void _Py_NO_RETURN
dummy_dealloc(PyObject* ignore)
{}

static PyTypeObject _PySetDummy_Type =;

static PyObject _dummy_struct =;