chromium/third_party/eigen3/src/Eigen/src/Core/util/ConfigureVectorization.h

// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2018 Gael Guennebaud <[email protected]>
// Copyright (C) 2020, Arm Limited and Contributors
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef EIGEN_CONFIGURE_VECTORIZATION_H
#define EIGEN_CONFIGURE_VECTORIZATION_H

//------------------------------------------------------------------------------------------
// Static and dynamic alignment control
//
// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES
// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.
// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,
// a default value is automatically computed based on architecture, compiler, and OS.
//
// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}
// to be used to declare statically aligned buffers.
//------------------------------------------------------------------------------------------

/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
 * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
 * so that vectorization doesn't affect binary compatibility.
 *
 * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
 * vectorized and non-vectorized code.
 */
#if (defined EIGEN_CUDACC)
#define EIGEN_ALIGN_TO_BOUNDARY
#define EIGEN_ALIGNOF
#else
#define EIGEN_ALIGN_TO_BOUNDARY(n)
#define EIGEN_ALIGNOF(x)
#endif

// If the user explicitly disable vectorization, then we also disable alignment
#if defined(EIGEN_DONT_VECTORIZE)
#if defined(EIGEN_GPUCC)
// GPU code is always vectorized and requires memory alignment for
// statically allocated buffers.
#define EIGEN_IDEAL_MAX_ALIGN_BYTES
#else
#define EIGEN_IDEAL_MAX_ALIGN_BYTES
#endif
#elif defined(__AVX512F__)
// 64 bytes static alignment is preferred only if really required
#define EIGEN_IDEAL_MAX_ALIGN_BYTES
#elif defined(__AVX__)
// 32 bytes static alignment is preferred only if really required
#define EIGEN_IDEAL_MAX_ALIGN_BYTES
#elif defined __HVX__ && (__HVX_LENGTH__ == 128)
#define EIGEN_IDEAL_MAX_ALIGN_BYTES
#else
#define EIGEN_IDEAL_MAX_ALIGN_BYTES
#endif

// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense
#define EIGEN_MIN_ALIGN_BYTES

// Defined the boundary (in bytes) on which the data needs to be aligned. Note
// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
// aligned at all regardless of the value of this #define.

#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && \
    EIGEN_MAX_STATIC_ALIGN_BYTES > 0
#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
#endif

// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprecated
// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0
#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
#ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
#undef EIGEN_MAX_STATIC_ALIGN_BYTES
#endif
#define EIGEN_MAX_STATIC_ALIGN_BYTES
#endif

#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES

// Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES

// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
// certain common platform (compiler+architecture combinations) to avoid these problems.
// Only static alignment is really problematic (relies on nonstandard compiler extensions),
// try to keep heap alignment even when we have to disable static alignment.
#if EIGEN_COMP_GNUC && \
    !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS)
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT
#else
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT
#endif

// static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT && !EIGEN_COMP_SUNCC && !EIGEN_OS_QNX
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT
#else
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT
#endif

#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
#define EIGEN_MAX_STATIC_ALIGN_BYTES
#else
#define EIGEN_MAX_STATIC_ALIGN_BYTES
#endif

#endif

// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_STATIC_ALIGN_BYTES
#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES < EIGEN_MAX_STATIC_ALIGN_BYTES
#undef EIGEN_MAX_STATIC_ALIGN_BYTES
#define EIGEN_MAX_STATIC_ALIGN_BYTES
#endif

#if EIGEN_MAX_STATIC_ALIGN_BYTES == 0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
#define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
#endif

// At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.
// It takes into account both the user choice to explicitly enable/disable alignment (by setting
// EIGEN_MAX_STATIC_ALIGN_BYTES) and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). Henceforth, only
// EIGEN_MAX_STATIC_ALIGN_BYTES should be used.

// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY
#define EIGEN_ALIGN8
#define EIGEN_ALIGN16
#define EIGEN_ALIGN32
#define EIGEN_ALIGN64
#if EIGEN_MAX_STATIC_ALIGN_BYTES > 0
#define EIGEN_ALIGN_MAX
#else
#define EIGEN_ALIGN_MAX
#endif

// Dynamic alignment control

#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES > 0
#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
#endif

#ifdef EIGEN_DONT_ALIGN
#ifdef EIGEN_MAX_ALIGN_BYTES
#undef EIGEN_MAX_ALIGN_BYTES
#endif
#define EIGEN_MAX_ALIGN_BYTES
#elif !defined(EIGEN_MAX_ALIGN_BYTES)
#define EIGEN_MAX_ALIGN_BYTES
#endif

#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
#define EIGEN_DEFAULT_ALIGN_BYTES
#else
#define EIGEN_DEFAULT_ALIGN_BYTES
#endif

#ifndef EIGEN_UNALIGNED_VECTORIZE
#define EIGEN_UNALIGNED_VECTORIZE
#endif

//----------------------------------------------------------------------

// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
#if EIGEN_MAX_ALIGN_BYTES == 0
#ifndef EIGEN_DONT_VECTORIZE
#define EIGEN_DONT_VECTORIZE
#endif
#endif

// The following (except #include <malloc.h> and _M_IX86_FP ??) can likely be
// removed as gcc 4.1 and msvc 2008 are not supported anyways.
#if EIGEN_COMP_MSVC
#include <malloc.h>  // for _aligned_malloc -- need it regardless of whether vectorization is enabled
// a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
#if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
#define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
#endif
#else
#if defined(__SSE2__)
#define EIGEN_SSE2_ON_NON_MSVC
#endif
#endif

#if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))

#if defined(EIGEN_SSE2_ON_NON_MSVC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)

// Defines symbols for compile-time detection of which instructions are
// used.
// EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_SSE
#define EIGEN_VECTORIZE_SSE2

// Detect sse3/ssse3/sse4:
// gcc and icc defines __SSE3__, ...
// there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
// want to force the use of those instructions with msvc.
#ifdef __SSE3__
#define EIGEN_VECTORIZE_SSE3
#endif
#ifdef __SSSE3__
#define EIGEN_VECTORIZE_SSSE3
#endif
#ifdef __SSE4_1__
#define EIGEN_VECTORIZE_SSE4_1
#endif
#ifdef __SSE4_2__
#define EIGEN_VECTORIZE_SSE4_2
#endif
#ifdef __AVX__
#ifndef EIGEN_USE_SYCL
#define EIGEN_VECTORIZE_AVX
#endif
#define EIGEN_VECTORIZE_SSE3
#define EIGEN_VECTORIZE_SSSE3
#define EIGEN_VECTORIZE_SSE4_1
#define EIGEN_VECTORIZE_SSE4_2
#endif
#ifdef __AVX2__
#ifndef EIGEN_USE_SYCL
#define EIGEN_VECTORIZE_AVX2
#define EIGEN_VECTORIZE_AVX
#endif
#define EIGEN_VECTORIZE_SSE3
#define EIGEN_VECTORIZE_SSSE3
#define EIGEN_VECTORIZE_SSE4_1
#define EIGEN_VECTORIZE_SSE4_2
#endif
#if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
// MSVC does not expose a switch dedicated for FMA
// For MSVC, AVX2 => FMA
#define EIGEN_VECTORIZE_FMA
#endif
#if defined(__AVX512F__)
#ifndef EIGEN_VECTORIZE_FMA
#if EIGEN_COMP_GNUC
#error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
#else
#error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
#endif
#endif
#ifndef EIGEN_USE_SYCL
#define EIGEN_VECTORIZE_AVX512
#define EIGEN_VECTORIZE_AVX2
#define EIGEN_VECTORIZE_AVX
#endif
#define EIGEN_VECTORIZE_FMA
#define EIGEN_VECTORIZE_SSE3
#define EIGEN_VECTORIZE_SSSE3
#define EIGEN_VECTORIZE_SSE4_1
#define EIGEN_VECTORIZE_SSE4_2
#ifndef EIGEN_USE_SYCL
#ifdef __AVX512DQ__
#define EIGEN_VECTORIZE_AVX512DQ
#endif
#ifdef __AVX512ER__
#define EIGEN_VECTORIZE_AVX512ER
#endif
#ifdef __AVX512BF16__
#define EIGEN_VECTORIZE_AVX512BF16
#endif
#ifdef __AVX512VL__
#define EIGEN_VECTORIZE_AVX512VL
#endif
#ifdef __AVX512FP16__
#ifdef __AVX512VL__
#define EIGEN_VECTORIZE_AVX512FP16
#else
#if EIGEN_COMP_GNUC
#error Please add -mavx512vl to your compiler flags: compiling with -mavx512fp16 alone without AVX512-VL is not supported.
#else
#error Please enable AVX512-VL in your compiler flags (e.g. -mavx512vl): compiling with AVX512-FP16 alone without AVX512-VL is not supported.
#endif
#endif
#endif
#endif
#endif

// Disable AVX support on broken xcode versions
#if (EIGEN_COMP_CLANGAPPLE == 11000033) && (__MAC_OS_X_VERSION_MIN_REQUIRED == 101500)
// A nasty bug in the clang compiler shipped with xcode in a common compilation situation
// when XCode 11.0 and Mac deployment target macOS 10.15 is https://trac.macports.org/ticket/58776#no1
#ifdef EIGEN_VECTORIZE_AVX
#undef EIGEN_VECTORIZE_AVX
#warning \
    "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. "
#ifdef EIGEN_VECTORIZE_AVX2
#undef EIGEN_VECTORIZE_AVX2
#endif
#ifdef EIGEN_VECTORIZE_FMA
#undef EIGEN_VECTORIZE_FMA
#endif
#ifdef EIGEN_VECTORIZE_AVX512
#undef EIGEN_VECTORIZE_AVX512
#endif
#ifdef EIGEN_VECTORIZE_AVX512DQ
#undef EIGEN_VECTORIZE_AVX512DQ
#endif
#ifdef EIGEN_VECTORIZE_AVX512ER
#undef EIGEN_VECTORIZE_AVX512ER
#endif
#endif
// NOTE: Confirmed test failures in XCode 11.0, and XCode 11.2 with  -macosx-version-min=10.15 and AVX
// NOTE using -macosx-version-min=10.15 with Xcode 11.0 results in runtime segmentation faults in many tests, 11.2
// produce core dumps in 3 tests NOTE using -macosx-version-min=10.14 produces functioning and passing tests in all
// cases NOTE __clang_version__ "11.0.0 (clang-1100.0.33.8)"  XCode 11.0 <- Produces many segfault and core dumping
// tests
//                                                                    with  -macosx-version-min=10.15 and AVX
// NOTE __clang_version__ "11.0.0 (clang-1100.0.33.12)" XCode 11.2 <- Produces 3 core dumping tests with
//                                                                    -macosx-version-min=10.15 and AVX
#endif

// include files

// This extern "C" works around a MINGW-w64 compilation issue
// https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354
// In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do).
// However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations
// with conflicting linkage.  The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know;
// so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too.
// notice that since these are C headers, the extern "C" is theoretically needed anyways.
extern "C" {
// In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
// Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
#if EIGEN_COMP_ICC >= 1110 || EIGEN_COMP_EMSCRIPTEN
#include <immintrin.h>
#else
#include <mmintrin.h>
#include <emmintrin.h>
#include <xmmintrin.h>
#ifdef EIGEN_VECTORIZE_SSE3
#include <pmmintrin.h>
#endif
#ifdef EIGEN_VECTORIZE_SSSE3
#include <tmmintrin.h>
#endif
#ifdef EIGEN_VECTORIZE_SSE4_1
#include <smmintrin.h>
#endif
#ifdef EIGEN_VECTORIZE_SSE4_2
#include <nmmintrin.h>
#endif
#if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
#include <immintrin.h>
#endif
#endif
}  // end extern "C"

#elif defined(__VSX__) && !defined(__APPLE__)

#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_VSX
#define EIGEN_VECTORIZE_FMA
#include <altivec.h>
// We need to #undef all these ugly tokens defined in <altivec.h>
// => use __vector instead of vector
#undef bool
#undef vector
#undef pixel

#elif defined __ALTIVEC__

#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_ALTIVEC
#define EIGEN_VECTORIZE_FMA
#include <altivec.h>
// We need to #undef all these ugly tokens defined in <altivec.h>
// => use __vector instead of vector
#undef bool
#undef vector
#undef pixel

#elif ((defined __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)

#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_NEON
#include <arm_neon.h>

// We currently require SVE to be enabled explicitly via EIGEN_ARM64_USE_SVE and
// will not select the backend automatically
#elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)

#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_SVE
#include <arm_sve.h>

// Since we depend on knowing SVE vector lengths at compile-time, we need
// to ensure a fixed lengths is set
#if defined __ARM_FEATURE_SVE_BITS
#define EIGEN_ARM64_SVE_VL
#else
#error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
#endif

#elif (defined __s390x__ && defined __VEC__)

#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_ZVECTOR
#include <vecintrin.h>

#elif defined __mips_msa

// Limit MSA optimizations to little-endian CPUs for now.
// TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs?
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#if defined(__LP64__)
#define EIGEN_MIPS_64
#else
#define EIGEN_MIPS_32
#endif
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_MSA
#include <msa.h>
#endif

#elif defined __HVX__ && (__HVX_LENGTH__ == 128)

#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_HVX
#include <hexagon_types.h>

#endif
#endif

// Following the Arm ACLE arm_neon.h should also include arm_fp16.h but not all
// compilers seem to follow this. We therefore include it explicitly.
// See also: https://bugs.llvm.org/show_bug.cgi?id=47955
#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
#include <arm_fp16.h>
#endif

// Enable FMA for ARM.
#if defined(__ARM_FEATURE_FMA)
#define EIGEN_VECTORIZE_FMA
#endif

#if defined(__F16C__) && !defined(EIGEN_GPUCC) && (!EIGEN_COMP_CLANG_STRICT || EIGEN_CLANG_STRICT_AT_LEAST(3, 8, 0))
// We can use the optimized fp16 to float and float to fp16 conversion routines
#define EIGEN_HAS_FP16_C

#if EIGEN_COMP_GNUC
// Make sure immintrin.h is included, even if e.g. vectorization is
// explicitly disabled (see also issue #2395).
// Note that FP16C intrinsics for gcc and clang are included by immintrin.h,
// as opposed to emmintrin.h as suggested by Intel:
// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=FP16C&expand=1711
#include <immintrin.h>
#endif
#endif

#if defined EIGEN_CUDACC
#define EIGEN_VECTORIZE_GPU
#include <vector_types.h>
#if EIGEN_CUDA_SDK_VER >= 70500
#define EIGEN_HAS_CUDA_FP16
#endif
#endif

#if defined(EIGEN_HAS_CUDA_FP16)
#include <cuda_runtime_api.h>
#include <cuda_fp16.h>
#endif

#if defined(EIGEN_HIPCC)
#define EIGEN_VECTORIZE_GPU
#include <hip/hip_vector_types.h>
#define EIGEN_HAS_HIP_FP16
#include <hip/hip_fp16.h>
#define EIGEN_HAS_HIP_BF16
#include <hip/hip_bfloat16.h>
#endif

/** \brief Namespace containing all symbols from the %Eigen library. */
// IWYU pragma: private
#include "../InternalHeaderCheck.h"

namespace Eigen {

inline static const char *SimdInstructionSetsInUse(void) {}

}  // end namespace Eigen

#endif  // EIGEN_CONFIGURE_VECTORIZATION_H