// Copyright 2020 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef HIGHWAY_HWY_TARGETS_H_ #define HIGHWAY_HWY_TARGETS_H_ // Allows opting out of C++ standard library usage, which is not available in // some Compiler Explorer environments. #ifndef HWY_NO_LIBCXX #include <vector> #endif // For SIMD module implementations and their callers. Defines which targets to // generate and call. #include "hwy/base.h" #include "hwy/detect_targets.h" #include "hwy/highway_export.h" #if !defined(HWY_NO_LIBCXX) #include <atomic> #endif namespace hwy { // Returns bitfield of enabled targets that are supported on this CPU; there is // always at least one such target, hence the return value is never 0. The // targets returned may change after calling DisableTargets. This function is // always defined, but the HWY_SUPPORTED_TARGETS wrapper may allow eliding // calls to it if there is only a single target enabled. HWY_DLLEXPORT int64_t SupportedTargets(); // Evaluates to a function call, or literal if there is a single target. #if (HWY_TARGETS & (HWY_TARGETS - 1)) == 0 #define HWY_SUPPORTED_TARGETS … #else #define HWY_SUPPORTED_TARGETS … #endif // Subsequent SupportedTargets will not return targets whose bit(s) are set in // `disabled_targets`. Exception: if SupportedTargets would return 0, it will // instead return HWY_STATIC_TARGET (there must always be one target to call). // // This function is useful for disabling targets known to be buggy, or if the // best available target is undesirable (perhaps due to throttling or memory // bandwidth limitations). Use SetSupportedTargetsForTest instead of this // function for iteratively enabling specific targets for testing. HWY_DLLEXPORT void DisableTargets(int64_t disabled_targets); // Subsequent SupportedTargets will return the given set of targets, except // those disabled via DisableTargets. Call with a mask of 0 to disable the mock // and return to the normal SupportedTargets behavior. Used to run tests for // all targets. HWY_DLLEXPORT void SetSupportedTargetsForTest(int64_t targets); #ifndef HWY_NO_LIBCXX // Return the list of targets in HWY_TARGETS supported by the CPU as a list of // individual HWY_* target macros such as HWY_SCALAR or HWY_NEON. This list // is affected by the current SetSupportedTargetsForTest() mock if any. HWY_INLINE std::vector<int64_t> SupportedAndGeneratedTargets() { … } #endif // HWY_NO_LIBCXX static inline HWY_MAYBE_UNUSED const char* TargetName(int64_t target) { … } // The maximum number of dynamic targets on any architecture is defined by // HWY_MAX_DYNAMIC_TARGETS and depends on the arch. // For the ChosenTarget mask and index we use a different bit arrangement than // in the HWY_TARGETS mask. Only the targets involved in the current // architecture are used in this mask, and therefore only the least significant // (HWY_MAX_DYNAMIC_TARGETS + 2) bits of the int64_t mask are used. The least // significant bit is set when the mask is not initialized, the next // HWY_MAX_DYNAMIC_TARGETS more significant bits are a range of bits from the // HWY_TARGETS or SupportedTargets() mask for the given architecture shifted to // that position and the next more significant bit is used for HWY_SCALAR (if // HWY_COMPILE_ONLY_SCALAR is defined) or HWY_EMU128. Because of this we need to // define equivalent values for HWY_TARGETS in this representation. // This mask representation allows to use ctz() on this mask and obtain a small // number that's used as an index of the table for dynamic dispatch. In this // way the first entry is used when the mask is uninitialized, the following // HWY_MAX_DYNAMIC_TARGETS are for dynamic dispatch and the last one is for // scalar. // The HWY_SCALAR/HWY_EMU128 bit in the ChosenTarget mask format. #define HWY_CHOSEN_TARGET_MASK_SCALAR … // Converts from a HWY_TARGETS mask to a ChosenTarget mask format for the // current architecture. #define HWY_CHOSEN_TARGET_SHIFT(X) … // The HWY_TARGETS mask in the ChosenTarget mask format. #define HWY_CHOSEN_TARGET_MASK_TARGETS … #if HWY_ARCH_X86 // Maximum number of dynamic targets, changing this value is an ABI incompatible // change #define HWY_MAX_DYNAMIC_TARGETS … #define HWY_HIGHEST_TARGET_BIT … // These must match the order in which the HWY_TARGETS are defined // starting by the least significant (HWY_HIGHEST_TARGET_BIT + 1 - // HWY_MAX_DYNAMIC_TARGETS) bit. This list must contain exactly // HWY_MAX_DYNAMIC_TARGETS elements and does not include SCALAR. The first entry // corresponds to the best target. Don't include a "," at the end of the list. #define HWY_CHOOSE_TARGET_LIST(func_name) … #elif HWY_ARCH_ARM // See HWY_ARCH_X86 above for details. #define HWY_MAX_DYNAMIC_TARGETS … #define HWY_HIGHEST_TARGET_BIT … #define HWY_CHOOSE_TARGET_LIST … #elif HWY_ARCH_RISCV // See HWY_ARCH_X86 above for details. #define HWY_MAX_DYNAMIC_TARGETS … #define HWY_HIGHEST_TARGET_BIT … #define HWY_CHOOSE_TARGET_LIST … #elif HWY_ARCH_PPC || HWY_ARCH_S390X // See HWY_ARCH_X86 above for details. #define HWY_MAX_DYNAMIC_TARGETS … #define HWY_HIGHEST_TARGET_BIT … #define HWY_CHOOSE_TARGET_LIST … #elif HWY_ARCH_WASM // See HWY_ARCH_X86 above for details. #define HWY_MAX_DYNAMIC_TARGETS … #define HWY_HIGHEST_TARGET_BIT … #define HWY_CHOOSE_TARGET_LIST … #else // Unknown architecture, will use HWY_SCALAR without dynamic dispatch, though // still creating single-entry tables in HWY_EXPORT to ensure portability. #define HWY_MAX_DYNAMIC_TARGETS … #define HWY_HIGHEST_TARGET_BIT … #endif // Bitfield of supported and enabled targets. The format differs from that of // HWY_TARGETS; the lowest bit governs the first function pointer (which is // special in that it calls FunctionCache, then Update, then dispatches to the // actual implementation) in the tables created by HWY_EXPORT. Monostate (see // GetChosenTarget), thread-safe except on RVV. struct ChosenTarget { … }; // For internal use (e.g. by FunctionCache and DisableTargets). HWY_DLLEXPORT ChosenTarget& GetChosenTarget(); } // namespace hwy #endif // HIGHWAY_HWY_TARGETS_H_