/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // // Docs: https://fburl.com/fbcref_hash // /** * folly::hash provides hashing algorithms, as well as algorithms to combine * multiple hashes/hashable objects together. * * @refcode folly/docs/examples/folly/hash/Hash.cpp * @file hash/Hash.h */ #pragma once #include <cstdint> #include <cstring> #include <limits> #include <memory> #include <string> #include <string_view> #include <tuple> #include <type_traits> #include <utility> #include <folly/CPortability.h> #include <folly/Portability.h> #include <folly/Traits.h> #include <folly/Utility.h> #include <folly/functional/ApplyTuple.h> #include <folly/hash/MurmurHash.h> #include <folly/hash/SpookyHashV1.h> #include <folly/hash/SpookyHashV2.h> #include <folly/lang/Bits.h> namespace folly { namespace hash { namespace detail { namespace { is_hashable_byte_v; is_hashable_byte_v; is_hashable_byte_v; is_hashable_byte_v; } // namespace } // namespace detail /** * Reduce two 64-bit hashes into one. * * hash_128_to_64 uses the Hash128to64 function from Google's cityhash (under * the MIT License). */ FOLLY_DISABLE_UNDEFINED_BEHAVIOR_SANITIZER(…) constexpr uint64_t hash_128_to_64( const uint64_t upper, const uint64_t lower) noexcept { … } /** * Order-independent reduction of two 64-bit hashes into one. * * Commutative accumulator taken from this paper: * https://www.preprints.org/manuscript/201710.0192/v1/download */ FOLLY_DISABLE_UNDEFINED_BEHAVIOR_SANITIZER(…) constexpr uint64_t commutative_hash_128_to_64( const uint64_t upper, const uint64_t lower) noexcept { … } /** * Thomas Wang 64 bit mix hash function. * * @methodset twang */ FOLLY_DISABLE_UNDEFINED_BEHAVIOR_SANITIZER(…) constexpr uint64_t twang_mix64(uint64_t key) noexcept { … } /** * Inverse of twang_mix64. * * @methodset twang */ constexpr uint64_t twang_unmix64(uint64_t key) noexcept { … } /** * Thomas Wang downscaling hash function. * * @methodset twang */ constexpr uint32_t twang_32from64(uint64_t key) noexcept { … } /** * Robert Jenkins' reversible 32 bit mix hash function. * * @methodset jenkins */ constexpr uint32_t jenkins_rev_mix32(uint32_t key) noexcept { … } /** * Inverse of jenkins_rev_mix32. * * @methodset jenkins */ constexpr uint32_t jenkins_rev_unmix32(uint32_t key) noexcept { … } // fnv // // Fowler / Noll / Vo (FNV) Hash // http://www.isthe.com/chongo/tech/comp/fnv/ // // Discouraged for poor performance in the smhasher suite. constexpr uint32_t fnv32_hash_start = …; constexpr uint64_t fnv64_hash_start = …; constexpr uint64_t fnva64_hash_start = …; /** * Append byte to FNV hash. * * @see fnv32 * @methodset fnv */ constexpr uint32_t fnv32_append_byte(uint32_t hash, uint8_t c) { … } /** * FNV hash of a byte-range. * * @param hash The initial hash seed. * * @see fnv32 * @methodset fnv */ template <typename C, std::enable_if_t<detail::is_hashable_byte_v<C>, int> = 0> constexpr uint32_t fnv32_buf( const C* buf, size_t n, uint32_t hash = fnv32_hash_start) noexcept { … } inline uint32_t fnv32_buf( const void* buf, size_t n, uint32_t hash = fnv32_hash_start) noexcept { … } /** * FNV hash of a c-str. * * Continues hashing until a null byte is reached. * * @param hash The initial hash seed. * * @methodset fnv */ constexpr uint32_t fnv32( const char* buf, uint32_t hash = fnv32_hash_start) noexcept { … } /** * @overloadbrief FNV hash of a string. * * FNV is the Fowler / Noll / Vo Hash: * http://www.isthe.com/chongo/tech/comp/fnv/ * * Discouraged for poor performance in the smhasher suite. * * @param hash The initial hash seed. * * @methodset fnv */ inline uint32_t fnv32( const std::string& str, uint32_t hash = fnv32_hash_start) noexcept { … } /** * Append a byte to FNV hash. * * @see fnv32 * @methodset fnv */ constexpr uint64_t fnv64_append_byte(uint64_t hash, uint8_t c) { … } /** * FNV hash of a byte-range. * * @param hash The initial hash seed. * * @see fnv32 * @methodset fnv */ template <typename C, std::enable_if_t<detail::is_hashable_byte_v<C>, int> = 0> constexpr uint64_t fnv64_buf( const C* buf, size_t n, uint64_t hash = fnv64_hash_start) noexcept { … } inline uint64_t fnv64_buf( const void* buf, size_t n, uint64_t hash = fnv64_hash_start) noexcept { … } /** * FNV hash of a c-str. * * Continues hashing until a null byte is reached. * * @param hash The initial hash seed. * * @see fnv32 * @methodset fnv */ constexpr uint64_t fnv64( const char* buf, uint64_t hash = fnv64_hash_start) noexcept { … } /** * @overloadbrief FNV hash of a string. * * FNV is the Fowler / Noll / Vo Hash: * http://www.isthe.com/chongo/tech/comp/fnv/ * * Discouraged for poor performance in the smhasher suite. * * @param hash The initial hash seed. * * @see fnv32 * @methodset fnv */ inline uint64_t fnv64( const std::string& str, uint64_t hash = fnv64_hash_start) noexcept { … } /** * Append a byte to FNVA hash. * * @see fnv32 * @methodset fnv */ constexpr uint64_t fnva64_append_byte(uint64_t hash, uint8_t c) { … } /** * FNVA hash of a byte-range. * * @param hash The initial hash seed. * * @see fnv32 * @methodset fnv */ template <typename C, std::enable_if_t<detail::is_hashable_byte_v<C>, int> = 0> constexpr uint64_t fnva64_buf( const C* buf, size_t n, uint64_t hash = fnva64_hash_start) noexcept { … } inline uint64_t fnva64_buf( const void* buf, size_t n, uint64_t hash = fnva64_hash_start) noexcept { … } /** * FNVA hash of a string. * * @param hash The initial hash seed. * * @see fnv32 * @methodset fnv */ inline uint64_t fnva64( const std::string& str, uint64_t hash = fnva64_hash_start) noexcept { … } // hsieh // // Paul Hsieh: http://www.azillionmonkeys.com/qed/hash.html #define get16bits … /** * hsieh hash a byte-range. * * @see hsieh_hash32_str * @methodset hsieh */ inline constexpr uint32_t hsieh_hash32_buf_constexpr( const unsigned char* buf, size_t len) noexcept { … } #undef get16bits /** * hsieh hash a void* byte-range. * * @see hsieh_hash32_str * @methodset hsieh */ inline uint32_t hsieh_hash32_buf(const void* buf, size_t len) noexcept { … } /** * hsieh hash a c-str. * * Computes the strlen of the input, then byte-range hashes it. * * @see hsieh_hash32_str * @methodset hsieh */ inline uint32_t hsieh_hash32(const char* s) noexcept { … } /** * hsieh hash a string. * * Paul Hsieh: http://www.azillionmonkeys.com/qed/hash.html * * @methodset hsieh */ inline uint32_t hsieh_hash32_str(const std::string& str) noexcept { … } } // namespace hash namespace detail { template <typename Int> struct integral_hasher { … }; template <typename F> struct float_hasher { … }; } // namespace detail template <class Key, class Enable = void> struct hasher; struct Hash { … }; // IsAvalanchingHasher<H, K> extends std::integral_constant<bool, V>. // V will be true if it is known that when a hasher of type H computes // the hash of a key of type K, any subset of B bits from the resulting // hash value is usable in a context that can tolerate a collision rate // of about 1/2^B. (Input bits lost implicitly converting between K and // the argument of H::operator() are not considered here; K is separate // to handle the case of generic hashers like folly::Hash). // // If std::hash<T> or folly::hasher<T> is specialized for a new type T and // the implementation avalanches input entropy across all of the bits of a // std::size_t result, the specialization should be marked as avalanching. // This can be done either by adding a member type folly_is_avalanching // to the functor H that contains a constexpr bool value of true, or by // specializing IsAvalanchingHasher<H, K>. The member type mechanism is // more convenient, but specializing IsAvalanchingHasher may be required // if a hasher is polymorphic on the key type or if its definition cannot // be modified. // // The standard's definition of hash quality is based on the chance hash // collisions using the entire hash value. No requirement is made that // this property holds for subsets of the bits. In addition, hashed keys // in real-world workloads are not chosen uniformly from the entire domain // of keys, which can further increase the collision rate for a subset // of bits. For example, std::hash<uint64_t> in libstdc++-v3 and libc++ // is the identity function. This hash function has no collisions when // considering hash values in their entirety, but for real-world workloads // the high bits are likely to always be zero. // // Some hash functions provide a stronger guarantee -- the standard's // collision property is also preserved for subsets of the output bits and // for sub-domains of keys. Another way to say this is that each bit of // the hash value contains entropy from the entire input, changes to the // input avalanche across all of the bits of the output. The distinction // is useful when mapping the hash value onto a smaller space efficiently // (such as when implementing a hash table). template <typename Hasher, typename Key> struct IsAvalanchingHasher; namespace detail { template <typename Hasher, typename Void = void> struct IsAvalanchingHasherFromMemberType : std::bool_constant<!require_sizeof<Hasher>> { … }; IsAvalanchingHasherFromMemberType<Hasher, void_t<typename Hasher::folly_is_avalanching>>; } // namespace detail template <typename Hasher, typename Key> struct IsAvalanchingHasher : detail::IsAvalanchingHasherFromMemberType<Hasher> { … }; // It's ugly to put this here, but folly::transparent isn't hash specific // so it seems even more ugly to put this near its declaration IsAvalanchingHasher<transparent<H>, K>; IsAvalanchingHasher<Hash, K>; template <> struct hasher<bool> { … }; IsAvalanchingHasher<hasher<bool>, K>; template <> struct hasher<unsigned long long> : detail::integral_hasher<unsigned long long> { … }; template <> struct hasher<signed long long> : detail::integral_hasher<signed long long> { … }; template <> struct hasher<unsigned long> : detail::integral_hasher<unsigned long> { … }; template <> struct hasher<signed long> : detail::integral_hasher<signed long> { … }; template <> struct hasher<unsigned int> : detail::integral_hasher<unsigned int> { … }; template <> struct hasher<signed int> : detail::integral_hasher<signed int> { … }; template <> struct hasher<unsigned short> : detail::integral_hasher<unsigned short> { … }; template <> struct hasher<signed short> : detail::integral_hasher<signed short> { … }; template <> struct hasher<unsigned char> : detail::integral_hasher<unsigned char> { … }; template <> struct hasher<signed char> : detail::integral_hasher<signed char> { … }; template <> // char is a different type from both signed char and unsigned char struct hasher<char> : detail::integral_hasher<char> { … }; #if FOLLY_HAVE_INT128_T template <> struct hasher<signed __int128> : detail::integral_hasher<signed __int128> {}; template <> struct hasher<unsigned __int128> : detail::integral_hasher<unsigned __int128> { }; #endif template <> struct hasher<float> : detail::float_hasher<float> { … }; template <> struct hasher<double> : detail::float_hasher<double> { … }; template <> struct hasher<std::string> { … }; IsAvalanchingHasher<hasher<std::string>, K>; template <> struct hasher<std::string_view> { … }; IsAvalanchingHasher<hasher<std::string_view>, K>; hasher<T, std::enable_if_t<std::is_enum<T>::value>>; IsAvalanchingHasher<hasher<T, std::enable_if_t<std::is_enum<T>::value>>, K>; hasher<std::pair<T1, T2>>; hasher<std::tuple<Ts...>>; hasher<T *>; hasher<std::unique_ptr<T>>; hasher<std::shared_ptr<T>>; // combiner for multi-arg tuple also mixes bits IsAvalanchingHasher<hasher<std::tuple<T>>, K>; IsAvalanchingHasher<hasher<std::tuple<T1, T2, Ts...>>, K>; namespace hash { // Compatible with std::hash implementation of hashing for std::string_view. // We use hash::murmurHash64 as a replacement of libstdc++ implementation // for better performance, for other implementations of C++ Standard Libraries // we fallback to std::hash. #if defined(_GLIBCXX_STRING) && FOLLY_X64 FOLLY_ALWAYS_INLINE size_t stdCompatibleHash(std::string_view sv) noexcept { static_assert(sizeof(size_t) == sizeof(uint64_t)); constexpr uint64_t kSeed = 0xc70f6907ULL; return hash::murmurHash64(sv.data(), sv.size(), kSeed); } #else FOLLY_ALWAYS_INLINE size_t stdCompatibleHash(std::string_view sv) noexcept( noexcept(std::hash<std::string_view>{ … } #endif // defined(_GLIBCXX_STRING) && FOLLY_X64 // Simply uses std::hash to hash. Note that std::hash is not guaranteed // to be a very good hash function; provided std::hash doesn't collide on // the individual inputs, you are fine, but that won't be true for, say, // strings or pairs class StdHasher { … }; // This is a general-purpose way to create a single hash from multiple // hashable objects. hash_combine_generic takes a class Hasher implementing // hash<T>; hash_combine uses a default hasher StdHasher that uses std::hash. // hash_combine_generic hashes each argument and combines those hashes in // an order-dependent way to yield a new hash; hash_range does so (also in an // order-dependent way) for items in the range [first, last); // commutative_hash_combine_* hashes values but combines them in an // order-independent way to yield a new hash. /** * Hash a value, and combine it with a seed. Commutative. * * @param hasher The function/callable which will hash the value. * * @methodset ranges */ template <class Hash, class Value> uint64_t commutative_hash_combine_value_generic( uint64_t seed, Hash const& hasher, Value const& value) { … } /** * Combine hashes of items in the range [first, last), order-dependently. * * For order-independent hashing, such as for hashing an unordered container * (e.g. folly::dynamic::object) use commutative_hash_combine_range instead. * * @param hash The base-case hash to use. * @param hasher The function/callable which will hash the value. * * @methodset ranges */ template < class Iter, class Hash = std::hash<typename std::iterator_traits<Iter>::value_type>> uint64_t hash_range( Iter begin, Iter end, uint64_t hash = 0, Hash hasher = Hash()) { … } /** * Create a hash from multiple hashable objects, order-independently. * * For order-dependent hashing use hash_range. * * @param seed The base-case hash to use. * @param hasher The function/callable which will hash the value. * * @methodset ranges */ template <class Hash, class Iter> uint64_t commutative_hash_combine_range_generic( uint64_t seed, Hash const& hasher, Iter first, Iter last) { … } /** * Create a hash from multiple hashable objects, order-independently. * * @methodset ranges */ template <class Iter> uint64_t commutative_hash_combine_range(Iter first, Iter last) { … } namespace detail { c_array_size_t; } // namespace detail // Never used, but gcc demands it. template <class Hasher> inline size_t hash_combine_generic(const Hasher&) noexcept { … } /** * Combine hashes of multiple items, order-dependently. * * @param h The function/callable which will hash the value. * * @methodset ranges */ template <class Hasher, typename T, typename... Ts> size_t hash_combine_generic( const Hasher& h, const T& t, const Ts&... ts) noexcept(noexcept(detail::c_array_size_t{ … } /** * Combine hashes of multiple items, order-independently. * * @param hasher The function/callable which will hash the value. * * @methodset ranges */ template <typename Hash, typename... Value> uint64_t commutative_hash_combine_generic( uint64_t seed, Hash const& hasher, Value const&... value) { … } /** * Combine hashes of multiple items, order-dependently. * * @methodset ranges */ template <typename T, typename... Ts> FOLLY_NODISCARD size_t hash_combine(const T& t, const Ts&... ts) noexcept( noexcept(hash_combine_generic(StdHasher{ … } /** * Combine hashes of multiple items, order-independently. * */ template <typename... Value> uint64_t commutative_hash_combine(Value const&... value) { … } } // namespace hash // recursion template <size_t index, typename... Ts> struct TupleHasher { … }; // base TupleHasher<0, Ts...>; } // namespace folly // Custom hash functions. namespace std { // Hash function for pairs. Requires default hash functions for both // items in the pair. hash<std::pair<T1, T2>>; // Hash function for tuples. Requires default hash functions for all types. hash<std::tuple<Ts...>>; } // namespace std namespace folly { // std::hash<std::string> is avalanching on libstdc++-v3 (code checked), // libc++ (code checked), and MSVC (based on online information). // std::hash for float and double on libstdc++-v3 are avalanching, // but they are not on libc++. std::hash for integral types is not // avalanching for libstdc++-v3 or libc++. We're conservative here and // just mark std::string as avalanching. std::string_view will also be // so, once it exists. IsAvalanchingHasher<std::hash<std::basic_string<Args...>>, K>; } // namespace folly