folly/folly/lang/BitsClass.h

/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#pragma once

#ifdef __BMI__
#include <immintrin.h>
#endif

#include <cstddef>
#include <limits>
#include <type_traits>

#include <glog/logging.h>

#include <folly/Portability.h>
#include <folly/Range.h>
#include <folly/lang/Bits.h>

namespace folly {

template <class T>
struct UnalignedNoASan : public Unaligned<T> {};

// As a general rule, bit operations work on unsigned values only;
// right-shift is arithmetic for signed values, and that can lead to
// unpleasant bugs.

namespace detail {

/**
 * Helper class to make Bits<T> (below) work with both aligned values
 * (T, where T is an unsigned integral type) or unaligned values
 * (Unaligned<T>, where T is an unsigned integral type)
 */
template <class T, class Enable = void>
struct BitsTraits;

// Partial specialization for Unaligned<T>, where T is unsigned integral
// loadRMW is the same as load, but it indicates that it loads for a
// read-modify-write operation (we write back the bits we won't change);
// silence the GCC warning in that case.
template <class T>
struct BitsTraits<
    Unaligned<T>,
    typename std::enable_if<(std::is_integral<T>::value)>::type> {
  typedef T UnderlyingType;
  static T load(const Unaligned<T>& x) { return x; }
  static void store(Unaligned<T>& x, T v) { x = v; }
  static T loadRMW(const Unaligned<T>& x) {
    FOLLY_PUSH_WARNING
    FOLLY_GNU_DISABLE_WARNING("-Wuninitialized")
    FOLLY_GCC_DISABLE_WARNING("-Wmaybe-uninitialized")
    return x;
    FOLLY_POP_WARNING
  }
};

// Special version that allows one to disable address sanitizer on demand.
template <class T>
struct BitsTraits<
    UnalignedNoASan<T>,
    typename std::enable_if<(std::is_integral<T>::value)>::type> {
  typedef T UnderlyingType;
  static T FOLLY_DISABLE_ADDRESS_SANITIZER load(const UnalignedNoASan<T>& x) {
    return x.value;
  }
  static void FOLLY_DISABLE_ADDRESS_SANITIZER
  store(UnalignedNoASan<T>& x, T v) {
    x.value = v;
  }
  static T FOLLY_DISABLE_ADDRESS_SANITIZER
  loadRMW(const UnalignedNoASan<T>& x) {
    FOLLY_PUSH_WARNING
    FOLLY_GNU_DISABLE_WARNING("-Wuninitialized")
    FOLLY_GCC_DISABLE_WARNING("-Wmaybe-uninitialized")
    return x.value;
    FOLLY_POP_WARNING
  }
};

// Partial specialization for T, where T is unsigned integral
template <class T>
struct BitsTraits<
    T,
    typename std::enable_if<(std::is_integral<T>::value)>::type> {
  typedef T UnderlyingType;
  static T load(const T& x) { return x; }
  static void store(T& x, T v) { x = v; }
  static T loadRMW(const T& x) {
    FOLLY_PUSH_WARNING
    FOLLY_GNU_DISABLE_WARNING("-Wuninitialized")
    FOLLY_GCC_DISABLE_WARNING("-Wmaybe-uninitialized")
    return x;
    FOLLY_POP_WARNING
  }
};

} // namespace detail

/**
 * Wrapper class with static methods for various bit-level operations,
 * treating an array of T as an array of bits (in little-endian order).
 * (T is either an unsigned integral type or Unaligned<X>, where X is
 * an unsigned integral type)
 */
template <class T, class Traits = detail::BitsTraits<T>>
struct Bits {
  typedef typename Traits::UnderlyingType UnderlyingType;
  typedef T type;
  static_assert(sizeof(T) == sizeof(UnderlyingType), "Size mismatch");

  /**
   * Number of bits in a block.
   */
  static constexpr size_t bitsPerBlock = std::numeric_limits<
      typename std::make_unsigned<UnderlyingType>::type>::digits;

  /**
   * Byte index of the given bit.
   */
  static constexpr size_t blockIndex(size_t bit) { return bit / bitsPerBlock; }

  /**
   * Offset in block of the given bit.
   */
  static constexpr size_t bitOffset(size_t bit) { return bit % bitsPerBlock; }

  /**
   * Number of blocks used by the given number of bits.
   */
  static constexpr size_t blockCount(size_t nbits) {
    return nbits / bitsPerBlock + (nbits % bitsPerBlock != 0);
  }

  /**
   * Set the given bit.
   */
  static void set(T* p, size_t bit);

  /**
   * Clear the given bit.
   */
  static void clear(T* p, size_t bit);

  /**
   * Test the given bit.
   */
  static bool test(const T* p, size_t bit);

  /**
   * Set count contiguous bits starting at bitStart to the values
   * from the least significant count bits of value; little endian.
   * (value & 1 becomes the bit at bitStart, etc)
   * Precondition: count <= sizeof(T) * 8
   * Precondition: value can fit in 'count' bits
   */
  static void set(T* p, size_t bitStart, size_t count, UnderlyingType value);

  /**
   * Get count contiguous bits starting at bitStart.
   * Precondition: count <= sizeof(T) * 8
   */
  static UnderlyingType get(const T* p, size_t bitStart, size_t count);

  /**
   * Count the number of bits set in a range of blocks.
   */
  static size_t count(const T* begin, const T* end);

 private:
  // Same as set, assumes all bits are in the same block.
  // (bitStart < sizeof(T) * 8, bitStart + count <= sizeof(T) * 8)
  static void innerSet(
      T* p, size_t bitStart, size_t count, UnderlyingType value);

  // Same as get, assumes all bits are in the same block.
  // (bitStart < sizeof(T) * 8, bitStart + count <= sizeof(T) * 8)
  static UnderlyingType innerGet(const T* p, size_t bitStart, size_t count);

  static constexpr UnderlyingType zero = UnderlyingType(0);
  static constexpr UnderlyingType one = UnderlyingType(1);

  using UnsignedType = typename std::make_unsigned<UnderlyingType>::type;
  static constexpr UnderlyingType ones(size_t count) {
    return (count < bitsPerBlock)
        ? static_cast<UnderlyingType>((UnsignedType{1} << count) - 1)
        : ~zero;
  }
};

template <class T, class Traits>
inline void Bits<T, Traits>::set(T* p, size_t bit) {
  auto mask = static_cast<UnderlyingType>(one << bitOffset(bit));
  T& block = p[blockIndex(bit)];
  Traits::store(block, Traits::loadRMW(block) | mask);
}

template <class T, class Traits>
inline void Bits<T, Traits>::clear(T* p, size_t bit) {
  auto mask = static_cast<UnderlyingType>(one << bitOffset(bit));
  auto ksam = static_cast<UnderlyingType>(~mask);
  T& block = p[blockIndex(bit)];
  Traits::store(block, Traits::loadRMW(block) & ksam);
}

template <class T, class Traits>
inline void Bits<T, Traits>::set(
    T* p, size_t bitStart, size_t count, UnderlyingType value) {
  DCHECK_LE(count, sizeof(UnderlyingType) * 8);
  size_t cut = bitsPerBlock - count;
  if (cut != 8 * sizeof(UnderlyingType)) {
    using U = typename std::make_unsigned<UnderlyingType>::type;
    DCHECK_EQ(value, UnderlyingType(U(value) << cut) >> cut);
  }
  size_t idx = blockIndex(bitStart);
  size_t offset = bitOffset(bitStart);
  if (std::is_signed<UnderlyingType>::value) {
    value &= ones(count);
  }
  if (offset + count <= bitsPerBlock) {
    innerSet(p + idx, offset, count, value);
  } else {
    size_t countInThisBlock = bitsPerBlock - offset;
    size_t countInNextBlock = count - countInThisBlock;

    UnderlyingType thisBlock = UnderlyingType(value & ones(countInThisBlock));
    UnderlyingType nextBlock = UnderlyingType(value >> countInThisBlock);
    if (std::is_signed<UnderlyingType>::value) {
      nextBlock &= ones(countInNextBlock);
    }
    innerSet(p + idx, offset, countInThisBlock, thisBlock);
    innerSet(p + idx + 1, 0, countInNextBlock, nextBlock);
  }
}

template <class T, class Traits>
inline void Bits<T, Traits>::innerSet(
    T* p, size_t offset, size_t count, UnderlyingType value) {
  // Mask out bits and set new value
  UnderlyingType v = Traits::loadRMW(*p);
  v &= ~(ones(count) << offset);
  v |= (value << offset);
  Traits::store(*p, v);
}

template <class T, class Traits>
inline bool Bits<T, Traits>::test(const T* p, size_t bit) {
  return Traits::load(p[blockIndex(bit)]) & (one << bitOffset(bit));
}

template <class T, class Traits>
inline auto Bits<T, Traits>::get(const T* p, size_t bitStart, size_t count)
    -> UnderlyingType {
  if (count == 0) {
    return UnderlyingType{};
  }

  DCHECK_LE(count, sizeof(UnderlyingType) * 8);
  size_t idx = blockIndex(bitStart);
  size_t offset = bitOffset(bitStart);
  UnderlyingType ret;
  if (offset + count <= bitsPerBlock) {
    ret = innerGet(p + idx, offset, count);
  } else {
    size_t countInThisBlock = bitsPerBlock - offset;
    size_t countInNextBlock = count - countInThisBlock;
    UnderlyingType thisBlockValue = innerGet(p + idx, offset, countInThisBlock);
    UnderlyingType nextBlockValue = innerGet(p + idx + 1, 0, countInNextBlock);
    ret = (nextBlockValue << countInThisBlock) | thisBlockValue;
  }
  if (std::is_signed<UnderlyingType>::value) {
    size_t emptyBits = bitsPerBlock - count;
    ret <<= emptyBits;
    ret >>= emptyBits;
  }
  return ret;
}

template <class T, class Traits>
inline auto Bits<T, Traits>::innerGet(const T* p, size_t offset, size_t count)
    -> UnderlyingType {
#ifdef __BMI__
  return _bextr_u64(Traits::load(*p), offset, count);
#else
  return (Traits::load(*p) >> offset) & ones(count);
#endif
}

template <class T, class Traits>
inline size_t Bits<T, Traits>::count(const T* begin, const T* end) {
  size_t n = 0;
  for (; begin != end; ++begin) {
    n += popcount(Traits::load(*begin));
  }
  return n;
}

} // namespace folly