chromium/third_party/highway/src/hwy/targets.cc

// Copyright 2019 Google LLC
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "hwy/targets.h"

#include <stdint.h>
#include <stdio.h>

#include "hwy/base.h"
#include "hwy/detect_targets.h"
#include "hwy/highway.h"
#include "hwy/per_target.h"  // VectorBytes

#if HWY_ARCH_X86
#include <xmmintrin.h>
#if HWY_COMPILER_MSVC
#include <intrin.h>
#else  // !HWY_COMPILER_MSVC
#include <cpuid.h>
#endif  // HWY_COMPILER_MSVC

#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RISCV) && \
    HWY_OS_LINUX
// sys/auxv.h does not always include asm/hwcap.h, or define HWCAP*, hence we
// still include this directly. See #1199.
#ifndef TOOLCHAIN_MISS_ASM_HWCAP_H
#include <asm/hwcap.h>
#endif
#if HWY_HAVE_AUXV
#include <sys/auxv.h>
#endif

#endif  // HWY_ARCH_*

#ifdef __APPLE__
#include <sys/sysctl.h>
#include <sys/utsname.h>
#endif  // __APPLE__

namespace hwy {
namespace {

// When running tests, this value can be set to the mocked supported targets
// mask. Only written to from a single thread before the test starts.
int64_t supported_targets_for_test_ =;

// Mask of targets disabled at runtime with DisableTargets.
int64_t supported_mask_ =;

#ifdef __APPLE__
static HWY_INLINE HWY_MAYBE_UNUSED bool HasCpuFeature(
    const char* feature_name) {
  int result = 0;
  size_t len = sizeof(int);
  return (sysctlbyname(feature_name, &result, &len, nullptr, 0) == 0 &&
          result != 0);
}

static HWY_INLINE HWY_MAYBE_UNUSED bool ParseU32(const char*& ptr,
                                                 uint32_t& parsed_val) {
  uint64_t parsed_u64 = 0;

  const char* start_ptr = ptr;
  for (char ch; (ch = (*ptr)) != '\0'; ++ptr) {
    unsigned digit = static_cast<unsigned>(static_cast<unsigned char>(ch)) -
                     static_cast<unsigned>(static_cast<unsigned char>('0'));
    if (digit > 9u) {
      break;
    }

    parsed_u64 = (parsed_u64 * 10u) + digit;
    if (parsed_u64 > 0xFFFFFFFFu) {
      return false;
    }
  }

  parsed_val = static_cast<uint32_t>(parsed_u64);
  return (ptr != start_ptr);
}

static HWY_INLINE HWY_MAYBE_UNUSED bool IsMacOs12_2OrLater() {
  utsname uname_buf;
  ZeroBytes(&uname_buf, sizeof(utsname));

  if ((uname(&uname_buf)) != 0) {
    return false;
  }

  const char* ptr = uname_buf.release;
  if (!ptr) {
    return false;
  }

  uint32_t major;
  uint32_t minor;
  if (!ParseU32(ptr, major)) {
    return false;
  }

  if (*ptr != '.') {
    return false;
  }

  ++ptr;
  if (!ParseU32(ptr, minor)) {
    return false;
  }

  // We are running on macOS 12.2 or later if the Darwin kernel version is 21.3
  // or later
  return (major > 21 || (major == 21 && minor >= 3));
}
#endif  // __APPLE__

#if HWY_ARCH_X86 && HWY_HAVE_RUNTIME_DISPATCH
namespace x86 {

// Calls CPUID instruction with eax=level and ecx=count and returns the result
// in abcd array where abcd = {eax, ebx, ecx, edx} (hence the name abcd).
HWY_INLINE void Cpuid(const uint32_t level, const uint32_t count,
                      uint32_t* HWY_RESTRICT abcd) {}

HWY_INLINE bool IsBitSet(const uint32_t reg, const int index) {}

// Returns the lower 32 bits of extended control register 0.
// Requires CPU support for "OSXSAVE" (see below).
uint32_t ReadXCR0() {}

bool IsAMD() {}

// Arbitrary bit indices indicating which instruction set extensions are
// supported. Use enum to ensure values are distinct.
enum class FeatureIndex : uint32_t {};
static_assert;

HWY_INLINE constexpr uint64_t Bit(FeatureIndex index) {}

// Returns bit array of FeatureIndex from CPUID feature flags.
uint64_t FlagsFromCPUID() {}

// Each Highway target requires a 'group' of multiple features/flags.
constexpr uint64_t kGroupSSE2 =;

constexpr uint64_t kGroupSSSE3 =;

constexpr uint64_t kGroupSSE4 =;

// We normally assume BMI/BMI2/FMA are available if AVX2 is. This allows us to
// use BZHI and (compiler-generated) MULX. However, VirtualBox lacks them
// [https://www.virtualbox.org/ticket/15471]. Thus we provide the option of
// avoiding using and requiring these so AVX2 can still be used.
#ifdef HWY_DISABLE_BMI2_FMA
constexpr uint64_t kGroupBMI2_FMA = 0;
#else
constexpr uint64_t kGroupBMI2_FMA =;
#endif

#ifdef HWY_DISABLE_F16C
constexpr uint64_t kGroupF16C = 0;
#else
constexpr uint64_t kGroupF16C =;
#endif

constexpr uint64_t kGroupAVX2 =;

constexpr uint64_t kGroupAVX3 =;

constexpr uint64_t kGroupAVX3_DL =;

constexpr uint64_t kGroupAVX3_ZEN4 =;

constexpr uint64_t kGroupAVX3_SPR =;

int64_t DetectTargets() {}

}  // namespace x86
#elif HWY_ARCH_ARM && HWY_HAVE_RUNTIME_DISPATCH
namespace arm {
int64_t DetectTargets() {
  int64_t bits = 0;               // return value of supported targets.
  using CapBits = unsigned long;  // NOLINT
  // For Android, this has been supported since API 20 (2014).
  const CapBits hw = getauxval(AT_HWCAP);
  (void)hw;

#if HWY_ARCH_ARM_A64
  bits |= HWY_NEON_WITHOUT_AES;  // aarch64 always has NEON and VFPv4..

  // .. but not necessarily AES, which is required for HWY_NEON.
#if defined(HWCAP_AES)
  if (hw & HWCAP_AES) {
    bits |= HWY_NEON;

#if defined(HWCAP_ASIMDHP) && defined(HWCAP_ASIMDDP) && defined(HWCAP_ASIMDBF16)
    const int64_t kGroupBF16 = HWCAP_ASIMDHP | HWCAP_ASIMDDP | HWCAP_ASIMDBF16;
    if ((hw & kGroupBF16) == kGroupBF16) {
      bits |= HWY_NEON_BF16;
    }
#endif  // HWCAP_ASIMDHP && HWCAP_ASIMDDP && HWCAP_ASIMDBF16
  }
#endif  // HWCAP_AES

#if defined(HWCAP_SVE)
  if (hw & HWCAP_SVE) {
    bits |= HWY_SVE;
  }
#endif

#ifndef HWCAP2_SVE2
#define HWCAP2_SVE2
#endif
#ifndef HWCAP2_SVEAES
#define HWCAP2_SVEAES
#endif
  const CapBits hw2 = getauxval(AT_HWCAP2);
  if ((hw2 & HWCAP2_SVE2) && (hw2 & HWCAP2_SVEAES)) {
    bits |= HWY_SVE2;
  }

#else  // !HWY_ARCH_ARM_A64

// Some old auxv.h / hwcap.h do not define these. If not, treat as unsupported.
#if defined(HWCAP_NEON) && defined(HWCAP_VFPv4)
  if ((hw & HWCAP_NEON) && (hw & HWCAP_VFPv4)) {
    bits |= HWY_NEON_WITHOUT_AES;
  }
#endif

  // aarch32 would check getauxval(AT_HWCAP2) & HWCAP2_AES, but we do not yet
  // support that platform, and Armv7 lacks AES entirely. Because HWY_NEON
  // requires native AES instructions, we do not enable that target here.

#endif  // HWY_ARCH_ARM_A64
  return bits;
}
}  // namespace arm
#elif HWY_ARCH_PPC && HWY_HAVE_RUNTIME_DISPATCH
namespace ppc {

#ifndef PPC_FEATURE_HAS_ALTIVEC
#define PPC_FEATURE_HAS_ALTIVEC
#endif

#ifndef PPC_FEATURE_HAS_VSX
#define PPC_FEATURE_HAS_VSX
#endif

#ifndef PPC_FEATURE2_ARCH_2_07
#define PPC_FEATURE2_ARCH_2_07
#endif

#ifndef PPC_FEATURE2_VEC_CRYPTO
#define PPC_FEATURE2_VEC_CRYPTO
#endif

#ifndef PPC_FEATURE2_ARCH_3_00
#define PPC_FEATURE2_ARCH_3_00
#endif

#ifndef PPC_FEATURE2_ARCH_3_1
#define PPC_FEATURE2_ARCH_3_1
#endif

using CapBits = unsigned long;  // NOLINT

// For AT_HWCAP, the others are for AT_HWCAP2
constexpr CapBits kGroupVSX = PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_VSX;

#if defined(HWY_DISABLE_PPC8_CRYPTO)
constexpr CapBits kGroupPPC8 = PPC_FEATURE2_ARCH_2_07;
#else
constexpr CapBits kGroupPPC8 = PPC_FEATURE2_ARCH_2_07 | PPC_FEATURE2_VEC_CRYPTO;
#endif
constexpr CapBits kGroupPPC9 = kGroupPPC8 | PPC_FEATURE2_ARCH_3_00;
constexpr CapBits kGroupPPC10 = kGroupPPC9 | PPC_FEATURE2_ARCH_3_1;

int64_t DetectTargets() {
  int64_t bits = 0;  // return value of supported targets.

#if defined(AT_HWCAP) && defined(AT_HWCAP2)
  const CapBits hw = getauxval(AT_HWCAP);

  if ((hw & kGroupVSX) == kGroupVSX) {
    const CapBits hw2 = getauxval(AT_HWCAP2);
    if ((hw2 & kGroupPPC8) == kGroupPPC8) {
      bits |= HWY_PPC8;
    }
    if ((hw2 & kGroupPPC9) == kGroupPPC9) {
      bits |= HWY_PPC9;
    }
    if ((hw2 & kGroupPPC10) == kGroupPPC10) {
      bits |= HWY_PPC10;
    }
  }  // VSX
#endif  // defined(AT_HWCAP) && defined(AT_HWCAP2)

  return bits;
}
}  // namespace ppc
#elif HWY_ARCH_S390X && HWY_HAVE_RUNTIME_DISPATCH
namespace s390x {

#ifndef HWCAP_S390_VX
#define HWCAP_S390_VX
#endif

#ifndef HWCAP_S390_VXE
#define HWCAP_S390_VXE
#endif

#ifndef HWCAP_S390_VXRS_EXT2
#define HWCAP_S390_VXRS_EXT2
#endif

using CapBits = unsigned long;  // NOLINT

constexpr CapBits kGroupZ14 = HWCAP_S390_VX | HWCAP_S390_VXE;
constexpr CapBits kGroupZ15 =
    HWCAP_S390_VX | HWCAP_S390_VXE | HWCAP_S390_VXRS_EXT2;

int64_t DetectTargets() {
  int64_t bits = 0;

#if defined(AT_HWCAP)
  const CapBits hw = getauxval(AT_HWCAP);

  if ((hw & kGroupZ14) == kGroupZ14) {
    bits |= HWY_Z14;
  }

  if ((hw & kGroupZ15) == kGroupZ15) {
    bits |= HWY_Z15;
  }
#endif

  return bits;
}
}  // namespace s390x
#elif HWY_ARCH_RISCV && HWY_HAVE_RUNTIME_DISPATCH
namespace rvv {

#ifndef HWCAP_RVV
#define COMPAT_HWCAP_ISA_V
#endif

using CapBits = unsigned long;  // NOLINT

int64_t DetectTargets() {
  int64_t bits = 0;

  const CapBits hw = getauxval(AT_HWCAP);

  if ((hw & COMPAT_HWCAP_ISA_V) == COMPAT_HWCAP_ISA_V) {
    size_t e8m1_vec_len;
#if HWY_ARCH_RISCV_64
    int64_t vtype_reg_val;
#else
    int32_t vtype_reg_val;
#endif

    // Check that a vuint8m1_t vector is at least 16 bytes and that tail
    // agnostic and mask agnostic mode are supported
    asm volatile(
        // Avoid compiler error on GCC or Clang if -march=rv64gcv1p0 or
        // -march=rv32gcv1p0 option is not specified on the command line
        ".option push\n\t"
        ".option arch, +v\n\t"
        "vsetvli %0, zero, e8, m1, ta, ma\n\t"
        "csrr %1, vtype\n\t"
        ".option pop"
        : "=r"(e8m1_vec_len), "=r"(vtype_reg_val));

    // The RVV target is supported if the VILL bit of VTYPE (the MSB bit of
    // VTYPE) is not set and the length of a vuint8m1_t vector is at least 16
    // bytes
    if (vtype_reg_val >= 0 && e8m1_vec_len >= 16) {
      bits |= HWY_RVV;
    }
  }

  return bits;
}
}  // namespace rvv
#endif  // HWY_ARCH_*

// Returns targets supported by the CPU, independently of DisableTargets.
// Factored out of SupportedTargets to make its structure more obvious. Note
// that x86 CPUID may take several hundred cycles.
int64_t DetectTargets() {}

}  // namespace

HWY_DLLEXPORT void DisableTargets(int64_t disabled_targets) {}

HWY_DLLEXPORT void SetSupportedTargetsForTest(int64_t targets) {}

HWY_DLLEXPORT int64_t SupportedTargets() {}

HWY_DLLEXPORT ChosenTarget& GetChosenTarget() {}

}  // namespace hwy