Hash.h | Explore in Territory

/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//
// Docs: https://fburl.com/fbcref_hash
//

/**
 * folly::hash provides hashing algorithms, as well as algorithms to combine
 * multiple hashes/hashable objects together.
 *
 * @refcode folly/docs/examples/folly/hash/Hash.cpp
 * @file hash/Hash.h
 */

#pragma once

#include <cstdint>
#include <cstring>
#include <limits>
#include <memory>
#include <string>
#include <string_view>
#include <tuple>
#include <type_traits>
#include <utility>

#include <folly/CPortability.h>
#include <folly/Portability.h>
#include <folly/Traits.h>
#include <folly/Utility.h>
#include <folly/functional/ApplyTuple.h>
#include <folly/hash/MurmurHash.h>
#include <folly/hash/SpookyHashV1.h>
#include <folly/hash/SpookyHashV2.h>
#include <folly/lang/Bits.h>

namespace folly {
namespace hash {

namespace detail {

namespace {

is_hashable_byte_v;
is_hashable_byte_v;
is_hashable_byte_v;
is_hashable_byte_v;

} // namespace

} // namespace detail

/**
 * Reduce two 64-bit hashes into one.
 *
 * hash_128_to_64 uses the Hash128to64 function from Google's cityhash (under
 * the MIT License).
 */
FOLLY_DISABLE_UNDEFINED_BEHAVIOR_SANITIZER(…)
constexpr uint64_t hash_128_to_64(
    const uint64_t upper, const uint64_t lower) noexcept { … }

/**
 * Order-independent reduction of two 64-bit hashes into one.
 *
 * Commutative accumulator taken from this paper:
 * https://www.preprints.org/manuscript/201710.0192/v1/download
 */
FOLLY_DISABLE_UNDEFINED_BEHAVIOR_SANITIZER(…)
constexpr uint64_t commutative_hash_128_to_64(
    const uint64_t upper, const uint64_t lower) noexcept { … }

/**
 * Thomas Wang 64 bit mix hash function.
 *
 * @methodset twang
 */
FOLLY_DISABLE_UNDEFINED_BEHAVIOR_SANITIZER(…)
constexpr uint64_t twang_mix64(uint64_t key) noexcept { … }

/**
 * Inverse of twang_mix64.
 *
 * @methodset twang
 */
constexpr uint64_t twang_unmix64(uint64_t key) noexcept { … }

/**
 * Thomas Wang downscaling hash function.
 *
 * @methodset twang
 */
constexpr uint32_t twang_32from64(uint64_t key) noexcept { … }

/**
 * Robert Jenkins' reversible 32 bit mix hash function.
 *
 * @methodset jenkins
 */
constexpr uint32_t jenkins_rev_mix32(uint32_t key) noexcept { … }

/**
 * Inverse of jenkins_rev_mix32.
 *
 * @methodset jenkins
 */
constexpr uint32_t jenkins_rev_unmix32(uint32_t key) noexcept { … }

//  fnv
//
//  Fowler / Noll / Vo (FNV) Hash
//    http://www.isthe.com/chongo/tech/comp/fnv/
//
//  Discouraged for poor performance in the smhasher suite.

constexpr uint32_t fnv32_hash_start = …;
constexpr uint64_t fnv64_hash_start = …;
constexpr uint64_t fnva64_hash_start = …;

/**
 * Append byte to FNV hash.
 *
 * @see fnv32
 * @methodset fnv
 */
constexpr uint32_t fnv32_append_byte(uint32_t hash, uint8_t c) { … }

/**
 * FNV hash of a byte-range.
 *
 * @param hash  The initial hash seed.
 *
 * @see fnv32
 * @methodset fnv
 */
template <typename C, std::enable_if_t<detail::is_hashable_byte_v<C>, int> = 0>
constexpr uint32_t fnv32_buf(
    const C* buf, size_t n, uint32_t hash = fnv32_hash_start) noexcept { … }
inline uint32_t fnv32_buf(
    const void* buf, size_t n, uint32_t hash = fnv32_hash_start) noexcept { … }

/**
 * FNV hash of a c-str.
 *
 * Continues hashing until a null byte is reached.
 *
 * @param hash  The initial hash seed.
 *
 * @methodset fnv
 */
constexpr uint32_t fnv32(
    const char* buf, uint32_t hash = fnv32_hash_start) noexcept { … }

/**
 * @overloadbrief FNV hash of a string.
 *
 * FNV is the Fowler / Noll / Vo Hash:
 * http://www.isthe.com/chongo/tech/comp/fnv/
 *
 * Discouraged for poor performance in the smhasher suite.
 *
 * @param hash  The initial hash seed.
 *
 * @methodset fnv
 */
inline uint32_t fnv32(
    const std::string& str, uint32_t hash = fnv32_hash_start) noexcept { … }

/**
 * Append a byte to FNV hash.
 *
 * @see fnv32
 * @methodset fnv
 */
constexpr uint64_t fnv64_append_byte(uint64_t hash, uint8_t c) { … }

/**
 * FNV hash of a byte-range.
 *
 * @param hash  The initial hash seed.
 *
 * @see fnv32
 * @methodset fnv
 */
template <typename C, std::enable_if_t<detail::is_hashable_byte_v<C>, int> = 0>
constexpr uint64_t fnv64_buf(
    const C* buf, size_t n, uint64_t hash = fnv64_hash_start) noexcept { … }
inline uint64_t fnv64_buf(
    const void* buf, size_t n, uint64_t hash = fnv64_hash_start) noexcept { … }

/**
 * FNV hash of a c-str.
 *
 * Continues hashing until a null byte is reached.
 *
 * @param hash  The initial hash seed.
 *
 * @see fnv32
 * @methodset fnv
 */
constexpr uint64_t fnv64(
    const char* buf, uint64_t hash = fnv64_hash_start) noexcept { … }

/**
 * @overloadbrief FNV hash of a string.
 *
 * FNV is the Fowler / Noll / Vo Hash:
 * http://www.isthe.com/chongo/tech/comp/fnv/
 *
 * Discouraged for poor performance in the smhasher suite.
 *
 * @param hash  The initial hash seed.
 *
 * @see fnv32
 * @methodset fnv
 */
inline uint64_t fnv64(
    const std::string& str, uint64_t hash = fnv64_hash_start) noexcept { … }

/**
 * Append a byte to FNVA hash.
 *
 * @see fnv32
 * @methodset fnv
 */
constexpr uint64_t fnva64_append_byte(uint64_t hash, uint8_t c) { … }

/**
 * FNVA hash of a byte-range.
 *
 * @param hash  The initial hash seed.
 *
 * @see fnv32
 * @methodset fnv
 */
template <typename C, std::enable_if_t<detail::is_hashable_byte_v<C>, int> = 0>
constexpr uint64_t fnva64_buf(
    const C* buf, size_t n, uint64_t hash = fnva64_hash_start) noexcept { … }
inline uint64_t fnva64_buf(
    const void* buf, size_t n, uint64_t hash = fnva64_hash_start) noexcept { … }

/**
 * FNVA hash of a string.
 *
 * @param hash  The initial hash seed.
 *
 * @see fnv32
 * @methodset fnv
 */
inline uint64_t fnva64(
    const std::string& str, uint64_t hash = fnva64_hash_start) noexcept { … }

//  hsieh
//
//  Paul Hsieh: http://www.azillionmonkeys.com/qed/hash.html

#define get16bits …

/**
 * hsieh hash a byte-range.
 *
 * @see hsieh_hash32_str
 * @methodset hsieh
 */
inline constexpr uint32_t hsieh_hash32_buf_constexpr(
    const unsigned char* buf, size_t len) noexcept { … }

#undef get16bits

/**
 * hsieh hash a void* byte-range.
 *
 * @see hsieh_hash32_str
 * @methodset hsieh
 */
inline uint32_t hsieh_hash32_buf(const void* buf, size_t len) noexcept { … }

/**
 * hsieh hash a c-str.
 *
 * Computes the strlen of the input, then byte-range hashes it.
 *
 * @see hsieh_hash32_str
 * @methodset hsieh
 */
inline uint32_t hsieh_hash32(const char* s) noexcept { … }

/**
 * hsieh hash a string.
 *
 * Paul Hsieh: http://www.azillionmonkeys.com/qed/hash.html
 *
 * @methodset hsieh
 */
inline uint32_t hsieh_hash32_str(const std::string& str) noexcept { … }

} // namespace hash

namespace detail {

template <typename Int>
struct integral_hasher { … };

template <typename F>
struct float_hasher { … };

} // namespace detail

template <class Key, class Enable = void>
struct hasher;

struct Hash { … };

// IsAvalanchingHasher<H, K> extends std::integral_constant<bool, V>.
// V will be true if it is known that when a hasher of type H computes
// the hash of a key of type K, any subset of B bits from the resulting
// hash value is usable in a context that can tolerate a collision rate
// of about 1/2^B.  (Input bits lost implicitly converting between K and
// the argument of H::operator() are not considered here; K is separate
// to handle the case of generic hashers like folly::Hash).
//
// If std::hash<T> or folly::hasher<T> is specialized for a new type T and
// the implementation avalanches input entropy across all of the bits of a
// std::size_t result, the specialization should be marked as avalanching.
// This can be done either by adding a member type folly_is_avalanching
// to the functor H that contains a constexpr bool value of true, or by
// specializing IsAvalanchingHasher<H, K>.  The member type mechanism is
// more convenient, but specializing IsAvalanchingHasher may be required
// if a hasher is polymorphic on the key type or if its definition cannot
// be modified.
//
// The standard's definition of hash quality is based on the chance hash
// collisions using the entire hash value.  No requirement is made that
// this property holds for subsets of the bits.  In addition, hashed keys
// in real-world workloads are not chosen uniformly from the entire domain
// of keys, which can further increase the collision rate for a subset
// of bits.  For example, std::hash<uint64_t> in libstdc++-v3 and libc++
// is the identity function.  This hash function has no collisions when
// considering hash values in their entirety, but for real-world workloads
// the high bits are likely to always be zero.
//
// Some hash functions provide a stronger guarantee -- the standard's
// collision property is also preserved for subsets of the output bits and
// for sub-domains of keys.  Another way to say this is that each bit of
// the hash value contains entropy from the entire input, changes to the
// input avalanche across all of the bits of the output.  The distinction
// is useful when mapping the hash value onto a smaller space efficiently
// (such as when implementing a hash table).
template <typename Hasher, typename Key>
struct IsAvalanchingHasher;

namespace detail {
template <typename Hasher, typename Void = void>
struct IsAvalanchingHasherFromMemberType
    : std::bool_constant<!require_sizeof<Hasher>> { … };

IsAvalanchingHasherFromMemberType<Hasher, void_t<typename Hasher::folly_is_avalanching>>;
} // namespace detail

template <typename Hasher, typename Key>
struct IsAvalanchingHasher : detail::IsAvalanchingHasherFromMemberType<Hasher> { … };

// It's ugly to put this here, but folly::transparent isn't hash specific
// so it seems even more ugly to put this near its declaration
IsAvalanchingHasher<transparent<H>, K>;

IsAvalanchingHasher<Hash, K>;

template <>
struct hasher<bool> { … };
IsAvalanchingHasher<hasher<bool>, K>;

template <>
struct hasher<unsigned long long>
    : detail::integral_hasher<unsigned long long> { … };

template <>
struct hasher<signed long long> : detail::integral_hasher<signed long long> { … };

template <>
struct hasher<unsigned long> : detail::integral_hasher<unsigned long> { … };

template <>
struct hasher<signed long> : detail::integral_hasher<signed long> { … };

template <>
struct hasher<unsigned int> : detail::integral_hasher<unsigned int> { … };

template <>
struct hasher<signed int> : detail::integral_hasher<signed int> { … };

template <>
struct hasher<unsigned short> : detail::integral_hasher<unsigned short> { … };

template <>
struct hasher<signed short> : detail::integral_hasher<signed short> { … };

template <>
struct hasher<unsigned char> : detail::integral_hasher<unsigned char> { … };

template <>
struct hasher<signed char> : detail::integral_hasher<signed char> { … };

template <> // char is a different type from both signed char and unsigned char
struct hasher<char> : detail::integral_hasher<char> { … };

#if FOLLY_HAVE_INT128_T
template <>
struct hasher<signed __int128> : detail::integral_hasher<signed __int128> {};

template <>
struct hasher<unsigned __int128> : detail::integral_hasher<unsigned __int128> {
};
#endif

template <>
struct hasher<float> : detail::float_hasher<float> { … };

template <>
struct hasher<double> : detail::float_hasher<double> { … };

template <>
struct hasher<std::string> { … };
IsAvalanchingHasher<hasher<std::string>, K>;

template <>
struct hasher<std::string_view> { … };
IsAvalanchingHasher<hasher<std::string_view>, K>;

hasher<T, std::enable_if_t<std::is_enum<T>::value>>;

IsAvalanchingHasher<hasher<T, std::enable_if_t<std::is_enum<T>::value>>, K>;

hasher<std::pair<T1, T2>>;

hasher<std::tuple<Ts...>>;

hasher<T *>;

hasher<std::unique_ptr<T>>;

hasher<std::shared_ptr<T>>;

// combiner for multi-arg tuple also mixes bits
IsAvalanchingHasher<hasher<std::tuple<T>>, K>;
IsAvalanchingHasher<hasher<std::tuple<T1, T2, Ts...>>, K>;

namespace hash {

// Compatible with std::hash implementation of hashing for std::string_view.
// We use hash::murmurHash64 as a replacement of libstdc++ implementation
// for better performance, for other implementations of C++ Standard Libraries
// we fallback to std::hash.
#if defined(_GLIBCXX_STRING) && FOLLY_X64
FOLLY_ALWAYS_INLINE size_t stdCompatibleHash(std::string_view sv) noexcept {
  static_assert(sizeof(size_t) == sizeof(uint64_t));
  constexpr uint64_t kSeed = 0xc70f6907ULL;
  return hash::murmurHash64(sv.data(), sv.size(), kSeed);
}
#else
FOLLY_ALWAYS_INLINE size_t stdCompatibleHash(std::string_view sv) noexcept(
    noexcept(std::hash<std::string_view>{ … }
#endif // defined(_GLIBCXX_STRING) && FOLLY_X64

// Simply uses std::hash to hash.  Note that std::hash is not guaranteed
// to be a very good hash function; provided std::hash doesn't collide on
// the individual inputs, you are fine, but that won't be true for, say,
// strings or pairs
class StdHasher { … };

// This is a general-purpose way to create a single hash from multiple
// hashable objects. hash_combine_generic takes a class Hasher implementing
// hash<T>; hash_combine uses a default hasher StdHasher that uses std::hash.
// hash_combine_generic hashes each argument and combines those hashes in
// an order-dependent way to yield a new hash; hash_range does so (also in an
// order-dependent way) for items in the range [first, last);
// commutative_hash_combine_* hashes values but combines them in an
// order-independent way to yield a new hash.

/**
 * Hash a value, and combine it with a seed. Commutative.
 *
 * @param hasher  The function/callable which will hash the value.
 *
 * @methodset ranges
 */
template <class Hash, class Value>
uint64_t commutative_hash_combine_value_generic(
    uint64_t seed, Hash const& hasher, Value const& value) { … }

/**
 * Combine hashes of items in the range [first, last), order-dependently.
 *
 * For order-independent hashing, such as for hashing an unordered container
 * (e.g. folly::dynamic::object) use commutative_hash_combine_range instead.
 *
 * @param hash  The base-case hash to use.
 * @param hasher  The function/callable which will hash the value.
 *
 * @methodset ranges
 */
template <
    class Iter,
    class Hash = std::hash<typename std::iterator_traits<Iter>::value_type>>
uint64_t hash_range(
    Iter begin, Iter end, uint64_t hash = 0, Hash hasher = Hash()) { … }

/**
 * Create a hash from multiple hashable objects, order-independently.
 *
 * For order-dependent hashing use hash_range.
 *
 * @param seed  The base-case hash to use.
 * @param hasher  The function/callable which will hash the value.
 *
 * @methodset ranges
 */
template <class Hash, class Iter>
uint64_t commutative_hash_combine_range_generic(
    uint64_t seed, Hash const& hasher, Iter first, Iter last) { … }

/**
 * Create a hash from multiple hashable objects, order-independently.
 *
 * @methodset ranges
 */
template <class Iter>
uint64_t commutative_hash_combine_range(Iter first, Iter last) { … }

namespace detail {
c_array_size_t;
} // namespace detail

// Never used, but gcc demands it.
template <class Hasher>
inline size_t hash_combine_generic(const Hasher&) noexcept { … }

/**
 * Combine hashes of multiple items, order-dependently.
 *
 * @param h  The function/callable which will hash the value.
 *
 * @methodset ranges
 */
template <class Hasher, typename T, typename... Ts>
size_t hash_combine_generic(
    const Hasher& h,
    const T& t,
    const Ts&... ts) noexcept(noexcept(detail::c_array_size_t{ … }

/**
 * Combine hashes of multiple items, order-independently.
 *
 * @param hasher  The function/callable which will hash the value.
 *
 * @methodset ranges
 */
template <typename Hash, typename... Value>
uint64_t commutative_hash_combine_generic(
    uint64_t seed, Hash const& hasher, Value const&... value) { … }

/**
 * Combine hashes of multiple items, order-dependently.
 *
 * @methodset ranges
 */
template <typename T, typename... Ts>
FOLLY_NODISCARD size_t hash_combine(const T& t, const Ts&... ts) noexcept(
    noexcept(hash_combine_generic(StdHasher{ … }

/**
 * Combine hashes of multiple items, order-independently.
 *
 */
template <typename... Value>
uint64_t commutative_hash_combine(Value const&... value) { … }
} // namespace hash

// recursion
template <size_t index, typename... Ts>
struct TupleHasher { … };

// base
TupleHasher<0, Ts...>;

} // namespace folly

// Custom hash functions.
namespace std {
// Hash function for pairs. Requires default hash functions for both
// items in the pair.
hash<std::pair<T1, T2>>;

// Hash function for tuples. Requires default hash functions for all types.
hash<std::tuple<Ts...>>;
} // namespace std

namespace folly {

// std::hash<std::string> is avalanching on libstdc++-v3 (code checked),
// libc++ (code checked), and MSVC (based on online information).
// std::hash for float and double on libstdc++-v3 are avalanching,
// but they are not on libc++.  std::hash for integral types is not
// avalanching for libstdc++-v3 or libc++.  We're conservative here and
// just mark std::string as avalanching.  std::string_view will also be
// so, once it exists.
IsAvalanchingHasher<std::hash<std::basic_string<Args...>>, K>;

} // namespace folly
folly/folly/hash/Hash.h