/* set object implementation Written and maintained by Raymond D. Hettinger <[email protected]> Derived from Objects/dictobject.c. The basic lookup function used by all operations. This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. The initial probe index is computed as hash mod the table size. Subsequent probe indices are computed as explained in Objects/dictobject.c. To improve cache locality, each probe inspects a series of consecutive nearby entries before moving on to probes elsewhere in memory. This leaves us with a hybrid of linear probing and randomized probing. The linear probing reduces the cost of hash collisions because consecutive memory accesses tend to be much cheaper than scattered probes. After LINEAR_PROBES steps, we then use more of the upper bits from the hash value and apply a simple linear congruential random number generator. This helps break-up long chains of collisions. All arithmetic on hash should ignore overflow. Unlike the dictionary implementation, the lookkey function can return NULL if the rich comparison returns an error. Use cases for sets differ considerably from dictionaries where looked-up keys are more likely to be present. In contrast, sets are primarily about membership testing where the presence of an element is not known in advance. Accordingly, the set implementation needs to optimize for both the found and not-found case. */ #include "Python.h" #include "pycore_ceval.h" // _PyEval_GetBuiltin() #include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION, Py_END_CRITICAL_SECTION #include "pycore_dict.h" // _PyDict_Contains_KnownHash() #include "pycore_modsupport.h" // _PyArg_NoKwnames() #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_SSIZE_RELAXED() #include "pycore_pyerrors.h" // _PyErr_SetKeyError() #include "pycore_setobject.h" // _PySet_NextEntry() definition #include "stringlib/eq.h" // unicode_eq() #include <stddef.h> // offsetof() #include "clinic/setobject.c.h" /*[clinic input] class set "PySetObject *" "&PySet_Type" class frozenset "PySetObject *" "&PyFrozenSet_Type" [clinic start generated code]*/ /*[clinic end generated code: output=da39a3ee5e6b4b0d input=97ad1d3e9f117079]*/ /*[python input] class setobject_converter(self_converter): type = "PySetObject *" [python start generated code]*/ /*[python end generated code: output=da39a3ee5e6b4b0d input=33a44506d4d57793]*/ /* Object used as dummy key to fill deleted entries */ static PyObject _dummy_struct; #define dummy … /* ======================================================================== */ /* ======= Begin logic for probing the hash table ========================= */ /* Set this to zero to turn-off linear probing */ #ifndef LINEAR_PROBES #define LINEAR_PROBES … #endif /* This must be >= 1 */ #define PERTURB_SHIFT … static setentry * set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) { … } static int set_table_resize(PySetObject *, Py_ssize_t); static int set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash) { … } /* Internal routine used by set_table_resize() to insert an item which is known to be absent from the set. Besides the performance benefit, there is also safety benefit since using set_add_entry() risks making a callback in the middle of a set_table_resize(), see issue 1456209. The caller is responsible for updating the key's reference count and the setobject's fill and used fields. */ static void set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash) { … } /* ======== End logic for probing the hash table ========================== */ /* ======================================================================== */ /* Restructure the table by allocating a new table and reinserting all keys again. When entries have been deleted, the new table may actually be smaller than the old one. */ static int set_table_resize(PySetObject *so, Py_ssize_t minused) { … } static int set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash) { … } #define DISCARD_NOTFOUND … #define DISCARD_FOUND … static int set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash) { … } static int set_add_key(PySetObject *so, PyObject *key) { … } static int set_contains_key(PySetObject *so, PyObject *key) { … } static int set_discard_key(PySetObject *so, PyObject *key) { … } static void set_empty_to_minsize(PySetObject *so) { … } static int set_clear_internal(PyObject *self) { … } /* * Iterate over a set table. Use like so: * * Py_ssize_t pos; * setentry *entry; * pos = 0; # important! pos should not otherwise be changed by you * while (set_next(yourset, &pos, &entry)) { * Refer to borrowed reference in entry->key. * } * * CAUTION: In general, it isn't safe to use set_next in a loop that * mutates the table. */ static int set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr) { … } static void set_dealloc(PyObject *self) { … } static PyObject * set_repr_lock_held(PySetObject *so) { … } static PyObject * set_repr(PyObject *self) { … } static Py_ssize_t set_len(PyObject *self) { … } static int set_merge_lock_held(PySetObject *so, PyObject *otherset) { … } /*[clinic input] @critical_section set.pop so: setobject Remove and return an arbitrary set element. Raises KeyError if the set is empty. [clinic start generated code]*/ static PyObject * set_pop_impl(PySetObject *so) /*[clinic end generated code: output=4d65180f1271871b input=9296c84921125060]*/ { … } static int set_traverse(PyObject *self, visitproc visit, void *arg) { … } /* Work to increase the bit dispersion for closely spaced hash values. This is important because some use cases have many combinations of a small number of elements with nearby hashes so that many distinct combinations collapse to only a handful of distinct hash values. */ static Py_uhash_t _shuffle_bits(Py_uhash_t h) { … } /* Most of the constants in this hash algorithm are randomly chosen large primes with "interesting bit patterns" and that passed tests for good collision statistics on a variety of problematic datasets including powersets and graph structures (such as David Eppstein's graph recipes in Lib/test/test_set.py). This hash algorithm can be used on either a frozenset or a set. When it is used on a set, it computes the hash value of the equivalent frozenset without creating a new frozenset object. */ static Py_hash_t frozenset_hash_impl(PyObject *self) { … } static Py_hash_t frozenset_hash(PyObject *self) { … } /***** Set iterator type ***********************************************/ setiterobject; static void setiter_dealloc(PyObject *self) { … } static int setiter_traverse(PyObject *self, visitproc visit, void *arg) { … } static PyObject * setiter_len(setiterobject *si, PyObject *Py_UNUSED(ignored)) { … } PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it))."); static PyObject * setiter_reduce(setiterobject *si, PyObject *Py_UNUSED(ignored)) { … } PyDoc_STRVAR(reduce_doc, "Return state information for pickling."); static PyMethodDef setiter_methods[] = …; static PyObject *setiter_iternext(PyObject *self) { … } PyTypeObject PySetIter_Type = …; static PyObject * set_iter(PyObject *so) { … } static int set_update_dict_lock_held(PySetObject *so, PyObject *other) { … } static int set_update_iterable_lock_held(PySetObject *so, PyObject *other) { … } static int set_update_lock_held(PySetObject *so, PyObject *other) { … } // set_update for a `so` that is only visible to the current thread static int set_update_local(PySetObject *so, PyObject *other) { … } static int set_update_internal(PySetObject *so, PyObject *other) { … } /*[clinic input] set.update so: setobject *others: array Update the set, adding elements from all others. [clinic start generated code]*/ static PyObject * set_update_impl(PySetObject *so, PyObject * const *others, Py_ssize_t others_length) /*[clinic end generated code: output=017c781c992d5c23 input=ed5d78885b076636]*/ { … } /* XXX Todo: If aligned memory allocations become available, make the set object 64 byte aligned so that most of the fields can be retrieved or updated in a single cache line. */ static PyObject * make_new_set(PyTypeObject *type, PyObject *iterable) { … } static PyObject * make_new_set_basetype(PyTypeObject *type, PyObject *iterable) { … } static PyObject * make_new_frozenset(PyTypeObject *type, PyObject *iterable) { … } static PyObject * frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { … } static PyObject * frozenset_vectorcall(PyObject *type, PyObject * const*args, size_t nargsf, PyObject *kwnames) { … } static PyObject * set_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { … } /* set_swap_bodies() switches the contents of any two sets by moving their internal data pointers and, if needed, copying the internal smalltables. Semantically equivalent to: t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t The function always succeeds and it leaves both objects in a stable state. Useful for operations that update in-place (by allowing an intermediate result to be swapped into one of the original inputs). */ static void set_swap_bodies(PySetObject *a, PySetObject *b) { … } /*[clinic input] @critical_section set.copy so: setobject Return a shallow copy of a set. [clinic start generated code]*/ static PyObject * set_copy_impl(PySetObject *so) /*[clinic end generated code: output=c9223a1e1cc6b041 input=c169a4fbb8209257]*/ { … } /*[clinic input] @critical_section frozenset.copy so: setobject Return a shallow copy of a set. [clinic start generated code]*/ static PyObject * frozenset_copy_impl(PySetObject *so) /*[clinic end generated code: output=b356263526af9e70 input=fbf5bef131268dd7]*/ { … } /*[clinic input] @critical_section set.clear so: setobject Remove all elements from this set. [clinic start generated code]*/ static PyObject * set_clear_impl(PySetObject *so) /*[clinic end generated code: output=4e71d5a83904161a input=c6f831b366111950]*/ { … } /*[clinic input] set.union so: setobject *others: array Return a new set with elements from the set and all others. [clinic start generated code]*/ static PyObject * set_union_impl(PySetObject *so, PyObject * const *others, Py_ssize_t others_length) /*[clinic end generated code: output=b1bfa3d74065f27e input=55a2e81db6347a4f]*/ { … } static PyObject * set_or(PyObject *self, PyObject *other) { … } static PyObject * set_ior(PyObject *self, PyObject *other) { … } static PyObject * set_intersection(PySetObject *so, PyObject *other) { … } /*[clinic input] set.intersection as set_intersection_multi so: setobject *others: array Return a new set with elements common to the set and all others. [clinic start generated code]*/ static PyObject * set_intersection_multi_impl(PySetObject *so, PyObject * const *others, Py_ssize_t others_length) /*[clinic end generated code: output=db9ff9f875132b6b input=36c7b615694cadae]*/ { … } static PyObject * set_intersection_update(PySetObject *so, PyObject *other) { … } /*[clinic input] set.intersection_update as set_intersection_update_multi so: setobject *others: array Update the set, keeping only elements found in it and all others. [clinic start generated code]*/ static PyObject * set_intersection_update_multi_impl(PySetObject *so, PyObject * const *others, Py_ssize_t others_length) /*[clinic end generated code: output=d768b5584675b48d input=782e422fc370e4fc]*/ { … } static PyObject * set_and(PyObject *self, PyObject *other) { … } static PyObject * set_iand(PyObject *self, PyObject *other) { … } /*[clinic input] @critical_section so other set.isdisjoint so: setobject other: object / Return True if two sets have a null intersection. [clinic start generated code]*/ static PyObject * set_isdisjoint_impl(PySetObject *so, PyObject *other) /*[clinic end generated code: output=273493f2d57c565e input=32f8dcab5e0fc7d6]*/ { … } static int set_difference_update_internal(PySetObject *so, PyObject *other) { … } /*[clinic input] set.difference_update so: setobject *others: array Update the set, removing elements found in others. [clinic start generated code]*/ static PyObject * set_difference_update_impl(PySetObject *so, PyObject * const *others, Py_ssize_t others_length) /*[clinic end generated code: output=04a22179b322cfe6 input=93ac28ba5b233696]*/ { … } static PyObject * set_copy_and_difference(PySetObject *so, PyObject *other) { … } static PyObject * set_difference(PySetObject *so, PyObject *other) { … } /*[clinic input] set.difference as set_difference_multi so: setobject *others: array Return a new set with elements in the set that are not in the others. [clinic start generated code]*/ static PyObject * set_difference_multi_impl(PySetObject *so, PyObject * const *others, Py_ssize_t others_length) /*[clinic end generated code: output=b0d33fb05d5477a7 input=c1eb448d483416ad]*/ { … } static PyObject * set_sub(PyObject *self, PyObject *other) { … } static PyObject * set_isub(PyObject *self, PyObject *other) { … } static int set_symmetric_difference_update_dict(PySetObject *so, PyObject *other) { … } static int set_symmetric_difference_update_set(PySetObject *so, PySetObject *other) { … } /*[clinic input] set.symmetric_difference_update so: setobject other: object / Update the set, keeping only elements found in either set, but not in both. [clinic start generated code]*/ static PyObject * set_symmetric_difference_update(PySetObject *so, PyObject *other) /*[clinic end generated code: output=fbb049c0806028de input=a50acf0365e1f0a5]*/ { … } /*[clinic input] @critical_section so other set.symmetric_difference so: setobject other: object / Return a new set with elements in either the set or other but not both. [clinic start generated code]*/ static PyObject * set_symmetric_difference_impl(PySetObject *so, PyObject *other) /*[clinic end generated code: output=270ee0b5d42b0797 input=624f6e7bbdf70db1]*/ { … } static PyObject * set_xor(PyObject *self, PyObject *other) { … } static PyObject * set_ixor(PyObject *self, PyObject *other) { … } /*[clinic input] @critical_section so other set.issubset so: setobject other: object / Report whether another set contains this set. [clinic start generated code]*/ static PyObject * set_issubset_impl(PySetObject *so, PyObject *other) /*[clinic end generated code: output=b2b59d5f314555ce input=f2a4fd0f2537758b]*/ { … } /*[clinic input] @critical_section so other set.issuperset so: setobject other: object / Report whether this set contains another set. [clinic start generated code]*/ static PyObject * set_issuperset_impl(PySetObject *so, PyObject *other) /*[clinic end generated code: output=ecf00ce552c09461 input=5f2e1f262e6e4ccc]*/ { … } static PyObject * set_richcompare(PyObject *self, PyObject *w, int op) { … } /*[clinic input] @critical_section set.add so: setobject object as key: object / Add an element to a set. This has no effect if the element is already present. [clinic start generated code]*/ static PyObject * set_add_impl(PySetObject *so, PyObject *key) /*[clinic end generated code: output=4cc4a937f1425c96 input=03baf62cb0e66514]*/ { … } static int set_contains_lock_held(PySetObject *so, PyObject *key) { … } int _PySet_Contains(PySetObject *so, PyObject *key) { … } static int set_contains(PyObject *self, PyObject *key) { … } /*[clinic input] @critical_section @coexist set.__contains__ so: setobject object as key: object / x.__contains__(y) <==> y in x. [clinic start generated code]*/ static PyObject * set___contains___impl(PySetObject *so, PyObject *key) /*[clinic end generated code: output=b44863d034b3c70e input=4a7d568459617f24]*/ { … } /*[clinic input] @critical_section set.remove so: setobject object as key: object / Remove an element from a set; it must be a member. If the element is not a member, raise a KeyError. [clinic start generated code]*/ static PyObject * set_remove_impl(PySetObject *so, PyObject *key) /*[clinic end generated code: output=0b9134a2a2200363 input=893e1cb1df98227a]*/ { … } /*[clinic input] @critical_section set.discard so: setobject object as key: object / Remove an element from a set if it is a member. Unlike set.remove(), the discard() method does not raise an exception when an element is missing from the set. [clinic start generated code]*/ static PyObject * set_discard_impl(PySetObject *so, PyObject *key) /*[clinic end generated code: output=eec3b687bf32759e input=861cb7fb69b4def0]*/ { … } /*[clinic input] @critical_section set.__reduce__ so: setobject Return state information for pickling. [clinic start generated code]*/ static PyObject * set___reduce___impl(PySetObject *so) /*[clinic end generated code: output=9af7d0e029df87ee input=59405a4249e82f71]*/ { … } /*[clinic input] @critical_section set.__sizeof__ so: setobject S.__sizeof__() -> size of S in memory, in bytes. [clinic start generated code]*/ static PyObject * set___sizeof___impl(PySetObject *so) /*[clinic end generated code: output=4bfa3df7bd38ed88 input=09e1a09f168eaa23]*/ { … } static int set_init(PyObject *so, PyObject *args, PyObject *kwds) { … } static PyObject* set_vectorcall(PyObject *type, PyObject * const*args, size_t nargsf, PyObject *kwnames) { … } static PySequenceMethods set_as_sequence = …; /* set object ********************************************************/ static PyMethodDef set_methods[] = …; static PyNumberMethods set_as_number = …; PyDoc_STRVAR(set_doc, "set(iterable=(), /)\n\ --\n\ \n\ Build an unordered collection of unique elements."); PyTypeObject PySet_Type = …; /* frozenset object ********************************************************/ static PyMethodDef frozenset_methods[] = …; static PyNumberMethods frozenset_as_number = …; PyDoc_STRVAR(frozenset_doc, "frozenset(iterable=(), /)\n\ --\n\ \n\ Build an immutable unordered collection of unique elements."); PyTypeObject PyFrozenSet_Type = …; /***** C API functions *************************************************/ PyObject * PySet_New(PyObject *iterable) { … } PyObject * PyFrozenSet_New(PyObject *iterable) { … } Py_ssize_t PySet_Size(PyObject *anyset) { … } int PySet_Clear(PyObject *set) { … } void _PySet_ClearInternal(PySetObject *so) { … } int PySet_Contains(PyObject *anyset, PyObject *key) { … } int PySet_Discard(PyObject *set, PyObject *key) { … } int PySet_Add(PyObject *anyset, PyObject *key) { … } int _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash) { … } int _PySet_NextEntryRef(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash) { … } PyObject * PySet_Pop(PyObject *set) { … } int _PySet_Update(PyObject *set, PyObject *iterable) { … } /* Exported for the gdb plugin's benefit. */ PyObject *_PySet_Dummy = …; /***** Dummy Struct *************************************************/ static PyObject * dummy_repr(PyObject *op) { … } static void _Py_NO_RETURN dummy_dealloc(PyObject* ignore) { … } static PyTypeObject _PySetDummy_Type = …; static PyObject _dummy_struct = …;