folly/folly/compression/Instructions.h

/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#pragma once

#include <glog/logging.h>

#ifdef _MSC_VER
#include <immintrin.h>
#endif

#include <string_view>

#include <folly/CpuId.h>
#include <folly/Portability.h>
#include <folly/lang/Assume.h>
#include <folly/portability/Builtins.h>

namespace folly {
namespace compression {
namespace instructions {

// NOTE: It's recommended to compile EF coding with -msse4.2, starting
// with Nehalem, Intel CPUs support POPCNT instruction and gcc will emit
// it for __builtin_popcountll intrinsic.
// But we provide an alternative way for the client code: it can switch to
// the appropriate version of EliasFanoReader<> at runtime (client should
// implement this switching logic itself) by specifying instruction set to
// use explicitly.

struct Default {};

#if FOLLY_X64 || defined(__i386__)
struct Nehalem : public Default {
  static std::string_view name() noexcept { return "Nehalem"; }

  static bool supported(const folly::CpuId& cpuId = {}) {
    return cpuId.popcnt();
  }

  static FOLLY_ALWAYS_INLINE uint64_t popcount(uint64_t value) {
// POPCNT is supported starting with Intel Nehalem, AMD K10.
#if defined(__GNUC__)
    // GCC and Clang won't inline the intrinsics.
    uint64_t result;
    asm("popcntq %1, %0" : "=r"(result) : "r"(value));
    return result;
#else
    return uint64_t(_mm_popcnt_u64(value));
#endif
  }
};

struct Haswell : public Nehalem {
  static std::string_view name() noexcept { return "Haswell"; }

  static bool supported(const folly::CpuId& cpuId = {}) {
    return Nehalem::supported(cpuId) && cpuId.bmi1() && cpuId.bmi2();
  }

  static FOLLY_ALWAYS_INLINE uint64_t blsr(uint64_t value) {
// BMI1 is supported starting with Intel Haswell, AMD Piledriver.
// BLSR combines two instructions into one and reduces register pressure.
#if defined(__GNUC__)
    // GCC and Clang won't inline the intrinsics.
    uint64_t result;
    asm("blsrq %1, %0" : "=r"(result) : "r"(value));
    return result;
#else
    return _blsr_u64(value);
#endif
  }

  static FOLLY_ALWAYS_INLINE uint64_t
  bextr(uint64_t value, uint32_t start, uint32_t length) {
#if defined(__GNUC__)
    // GCC and Clang won't inline the intrinsics.
    // Encode parameters in `pattern` where `pattern[0:7]` is `start` and
    // `pattern[8:15]` is `length`.
    // Ref: Intel Advanced Vector Extensions Programming Reference
    uint64_t pattern = start & 0xFF;
    pattern = pattern | ((length & 0xFF) << 8);
    uint64_t result;
    asm("bextrq %2, %1, %0" : "=r"(result) : "r"(value), "r"(pattern));
    return result;
#else
    return _bextr_u64(value, start, length);
#endif
  }

  static FOLLY_ALWAYS_INLINE uint64_t bzhi(uint64_t value, uint32_t index) {
#if defined(__GNUC__)
    // GCC and Clang won't inline the intrinsics.
    const uint64_t index64 = index;
    uint64_t result;
    asm("bzhiq %2, %1, %0" : "=r"(result) : "r"(value), "r"(index64));
    return result;
#else
    return _bzhi_u64(value, index);
#endif
  }
};
#endif

enum class Type {};

inline Type detect() {}

template <class F>
auto dispatch(Type type, F&& f) -> decltype(f(std::declval<Default>())) {}

template <class F>
auto dispatch(F&& f) -> decltype(f(std::declval<Default>())) {}

} // namespace instructions
} // namespace compression
} // namespace folly