chromium/third_party/highway/src/hwy/targets.h

// Copyright 2020 Google LLC
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef HIGHWAY_HWY_TARGETS_H_
#define HIGHWAY_HWY_TARGETS_H_

// Allows opting out of C++ standard library usage, which is not available in
// some Compiler Explorer environments.
#ifndef HWY_NO_LIBCXX
#include <vector>
#endif

// For SIMD module implementations and their callers. Defines which targets to
// generate and call.

#include "hwy/base.h"
#include "hwy/detect_targets.h"
#include "hwy/highway_export.h"

#if !defined(HWY_NO_LIBCXX)
#include <atomic>
#endif

namespace hwy {

// Returns bitfield of enabled targets that are supported on this CPU; there is
// always at least one such target, hence the return value is never 0. The
// targets returned may change after calling DisableTargets. This function is
// always defined, but the HWY_SUPPORTED_TARGETS wrapper may allow eliding
// calls to it if there is only a single target enabled.
HWY_DLLEXPORT int64_t SupportedTargets();

// Evaluates to a function call, or literal if there is a single target.
#if (HWY_TARGETS & (HWY_TARGETS - 1)) == 0
#define HWY_SUPPORTED_TARGETS
#else
#define HWY_SUPPORTED_TARGETS
#endif

// Subsequent SupportedTargets will not return targets whose bit(s) are set in
// `disabled_targets`. Exception: if SupportedTargets would return 0, it will
// instead return HWY_STATIC_TARGET (there must always be one target to call).
//
// This function is useful for disabling targets known to be buggy, or if the
// best available target is undesirable (perhaps due to throttling or memory
// bandwidth limitations). Use SetSupportedTargetsForTest instead of this
// function for iteratively enabling specific targets for testing.
HWY_DLLEXPORT void DisableTargets(int64_t disabled_targets);

// Subsequent SupportedTargets will return the given set of targets, except
// those disabled via DisableTargets. Call with a mask of 0 to disable the mock
// and return to the normal SupportedTargets behavior. Used to run tests for
// all targets.
HWY_DLLEXPORT void SetSupportedTargetsForTest(int64_t targets);

#ifndef HWY_NO_LIBCXX

// Return the list of targets in HWY_TARGETS supported by the CPU as a list of
// individual HWY_* target macros such as HWY_SCALAR or HWY_NEON. This list
// is affected by the current SetSupportedTargetsForTest() mock if any.
HWY_INLINE std::vector<int64_t> SupportedAndGeneratedTargets() {}

#endif  // HWY_NO_LIBCXX

static inline HWY_MAYBE_UNUSED const char* TargetName(int64_t target) {}

// The maximum number of dynamic targets on any architecture is defined by
// HWY_MAX_DYNAMIC_TARGETS and depends on the arch.

// For the ChosenTarget mask and index we use a different bit arrangement than
// in the HWY_TARGETS mask. Only the targets involved in the current
// architecture are used in this mask, and therefore only the least significant
// (HWY_MAX_DYNAMIC_TARGETS + 2) bits of the int64_t mask are used. The least
// significant bit is set when the mask is not initialized, the next
// HWY_MAX_DYNAMIC_TARGETS more significant bits are a range of bits from the
// HWY_TARGETS or SupportedTargets() mask for the given architecture shifted to
// that position and the next more significant bit is used for HWY_SCALAR (if
// HWY_COMPILE_ONLY_SCALAR is defined) or HWY_EMU128. Because of this we need to
// define equivalent values for HWY_TARGETS in this representation.
// This mask representation allows to use ctz() on this mask and obtain a small
// number that's used as an index of the table for dynamic dispatch. In this
// way the first entry is used when the mask is uninitialized, the following
// HWY_MAX_DYNAMIC_TARGETS are for dynamic dispatch and the last one is for
// scalar.

// The HWY_SCALAR/HWY_EMU128 bit in the ChosenTarget mask format.
#define HWY_CHOSEN_TARGET_MASK_SCALAR

// Converts from a HWY_TARGETS mask to a ChosenTarget mask format for the
// current architecture.
#define HWY_CHOSEN_TARGET_SHIFT(X)

// The HWY_TARGETS mask in the ChosenTarget mask format.
#define HWY_CHOSEN_TARGET_MASK_TARGETS

#if HWY_ARCH_X86
// Maximum number of dynamic targets, changing this value is an ABI incompatible
// change
#define HWY_MAX_DYNAMIC_TARGETS
#define HWY_HIGHEST_TARGET_BIT
// These must match the order in which the HWY_TARGETS are defined
// starting by the least significant (HWY_HIGHEST_TARGET_BIT + 1 -
// HWY_MAX_DYNAMIC_TARGETS) bit. This list must contain exactly
// HWY_MAX_DYNAMIC_TARGETS elements and does not include SCALAR. The first entry
// corresponds to the best target. Don't include a "," at the end of the list.
#define HWY_CHOOSE_TARGET_LIST(func_name)

#elif HWY_ARCH_ARM
// See HWY_ARCH_X86 above for details.
#define HWY_MAX_DYNAMIC_TARGETS
#define HWY_HIGHEST_TARGET_BIT
#define HWY_CHOOSE_TARGET_LIST

#elif HWY_ARCH_RISCV
// See HWY_ARCH_X86 above for details.
#define HWY_MAX_DYNAMIC_TARGETS
#define HWY_HIGHEST_TARGET_BIT
#define HWY_CHOOSE_TARGET_LIST

#elif HWY_ARCH_PPC || HWY_ARCH_S390X
// See HWY_ARCH_X86 above for details.
#define HWY_MAX_DYNAMIC_TARGETS
#define HWY_HIGHEST_TARGET_BIT
#define HWY_CHOOSE_TARGET_LIST

#elif HWY_ARCH_WASM
// See HWY_ARCH_X86 above for details.
#define HWY_MAX_DYNAMIC_TARGETS
#define HWY_HIGHEST_TARGET_BIT
#define HWY_CHOOSE_TARGET_LIST

#else
// Unknown architecture, will use HWY_SCALAR without dynamic dispatch, though
// still creating single-entry tables in HWY_EXPORT to ensure portability.
#define HWY_MAX_DYNAMIC_TARGETS
#define HWY_HIGHEST_TARGET_BIT
#endif

// Bitfield of supported and enabled targets. The format differs from that of
// HWY_TARGETS; the lowest bit governs the first function pointer (which is
// special in that it calls FunctionCache, then Update, then dispatches to the
// actual implementation) in the tables created by HWY_EXPORT. Monostate (see
// GetChosenTarget), thread-safe except on RVV.
struct ChosenTarget {};

// For internal use (e.g. by FunctionCache and DisableTargets).
HWY_DLLEXPORT ChosenTarget& GetChosenTarget();

}  // namespace hwy

#endif  // HIGHWAY_HWY_TARGETS_H_