#pragma once
#include "../sys/platform.h"
#include "../sys/intrinsics.h"
#include "constants.h"
#include <cmath>
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "math_sycl.h"
#else
#if defined(__ARM_NEON)
#include "../simd/arm/emulation.h"
#else
#include <emmintrin.h>
#include <xmmintrin.h>
#include <immintrin.h>
#endif
#if defined(__WIN32__)
#if defined(_MSC_VER) && (_MSC_VER <= 1700)
namespace std
{
__forceinline bool isinf ( const float x ) { return _finite(x) == 0; }
__forceinline bool isnan ( const float x ) { return _isnan(x) != 0; }
__forceinline bool isfinite (const float x) { return _finite(x) != 0; }
}
#endif
#endif
namespace embree
{
__forceinline bool isvalid ( const float& v ) { … }
__forceinline int cast_f2i(float f) { … }
__forceinline float cast_i2f(int i) { … }
__forceinline int toInt (const float& a) { … }
__forceinline float toFloat(const int& a) { … }
__forceinline int asInt (const float& a) { … }
__forceinline float asFloat(const int& a) { … }
#if defined(__WIN32__)
__forceinline bool finite ( const float x ) { return _finite(x) != 0; }
#endif
__forceinline float sign ( const float x ) { … }
__forceinline float sqr ( const float x ) { … }
__forceinline float rcp ( const float x )
{ … }
__forceinline float signmsk ( const float x ) { … }
__forceinline float xorf( const float x, const float y ) { … }
__forceinline float andf( const float x, const unsigned y ) { … }
__forceinline float rsqrt( const float x )
{ … }
#if defined(__WIN32__) && defined(_MSC_VER) && (_MSC_VER <= 1700)
__forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); }
__forceinline double nextafter(double x, double y) { return _nextafter(x, y); }
__forceinline int roundf(float f) { return (int)(f + 0.5f); }
#else
__forceinline float nextafter(float x, float y) { … }
__forceinline double nextafter(double x, double y) { … }
#endif
__forceinline float abs ( const float x ) { … }
__forceinline float acos ( const float x ) { … }
__forceinline float asin ( const float x ) { … }
__forceinline float atan ( const float x ) { … }
__forceinline float atan2( const float y, const float x ) { … }
__forceinline float cos ( const float x ) { … }
__forceinline float cosh ( const float x ) { … }
__forceinline float exp ( const float x ) { … }
__forceinline float fmod ( const float x, const float y ) { … }
__forceinline float log ( const float x ) { … }
__forceinline float log10( const float x ) { … }
__forceinline float pow ( const float x, const float y ) { … }
__forceinline float sin ( const float x ) { … }
__forceinline float sinh ( const float x ) { … }
__forceinline float sqrt ( const float x ) { … }
__forceinline float tan ( const float x ) { … }
__forceinline float tanh ( const float x ) { … }
__forceinline float floor( const float x ) { … }
__forceinline float ceil ( const float x ) { … }
__forceinline float frac ( const float x ) { … }
__forceinline double abs ( const double x ) { … }
__forceinline double sign ( const double x ) { … }
__forceinline double acos ( const double x ) { … }
__forceinline double asin ( const double x ) { … }
__forceinline double atan ( const double x ) { … }
__forceinline double atan2( const double y, const double x ) { … }
__forceinline double cos ( const double x ) { … }
__forceinline double cosh ( const double x ) { … }
__forceinline double exp ( const double x ) { … }
__forceinline double fmod ( const double x, const double y ) { … }
__forceinline double log ( const double x ) { … }
__forceinline double log10( const double x ) { … }
__forceinline double pow ( const double x, const double y ) { … }
__forceinline double rcp ( const double x ) { … }
__forceinline double rsqrt( const double x ) { … }
__forceinline double sin ( const double x ) { … }
__forceinline double sinh ( const double x ) { … }
__forceinline double sqr ( const double x ) { … }
__forceinline double sqrt ( const double x ) { … }
__forceinline double tan ( const double x ) { … }
__forceinline double tanh ( const double x ) { … }
__forceinline double floor( const double x ) { … }
__forceinline double ceil ( const double x ) { … }
#if defined(__aarch64__)
__forceinline float mini(float a, float b) {
__m128 x;
__m128 y;
x[0] = a;
y[0] = b;
x = _mm_min_ps(x, y);
return x[0];
}
#elif defined(__SSE4_1__)
__forceinline float mini(float a, float b) {
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
const __m128i ci = _mm_min_epi32(ai,bi);
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
}
#endif
#if defined(__aarch64__)
__forceinline float maxi(float a, float b) {
__m128 x;
__m128 y;
x[0] = a;
y[0] = b;
x = _mm_max_ps(x, y);
return x[0];
}
#elif defined(__SSE4_1__)
__forceinline float maxi(float a, float b) {
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
const __m128i ci = _mm_max_epi32(ai,bi);
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
}
#endif
template<typename T>
__forceinline T twice(const T& a) { … }
__forceinline int min(int a, int b) { … }
__forceinline unsigned min(unsigned a, unsigned b) { … }
__forceinline int64_t min(int64_t a, int64_t b) { … }
__forceinline float min(float a, float b) { … }
__forceinline double min(double a, double b) { … }
#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
__forceinline size_t min(size_t a, size_t b) { … }
#endif
#if defined(__EMSCRIPTEN__)
__forceinline long min(long a, long b) { return a<b ? a:b; }
#endif
template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { … }
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { … }
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { … }
template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { … }
template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { … }
template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { … }
__forceinline int max(int a, int b) { … }
__forceinline unsigned max(unsigned a, unsigned b) { … }
__forceinline int64_t max(int64_t a, int64_t b) { … }
__forceinline float max(float a, float b) { … }
__forceinline double max(double a, double b) { … }
#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
__forceinline size_t max(size_t a, size_t b) { … }
#endif
#if defined(__EMSCRIPTEN__)
__forceinline long max(long a, long b) { return a<b ? b:a; }
#endif
template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { … }
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { … }
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { … }
template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { … }
template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { … }
template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { … }
#if defined(__MACOSX__)
__forceinline ssize_t min(ssize_t a, ssize_t b) { return a<b ? a:b; }
__forceinline ssize_t max(ssize_t a, ssize_t b) { return a<b ? b:a; }
#endif
#if defined(__MACOSX__) && !defined(__INTEL_COMPILER)
__forceinline void sincosf(float x, float *sin, float *cos) {
__sincosf(x,sin,cos);
}
#endif
#if defined(__WIN32__) || defined(__FreeBSD__)
__forceinline void sincosf(float x, float *s, float *c) {
*s = sinf(x); *c = cosf(x);
}
#endif
template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { … }
template<typename T> __forceinline T clampz(const T& x, const T& upper) { … }
template<typename T> __forceinline T deg2rad ( const T& x ) { … }
template<typename T> __forceinline T rad2deg ( const T& x ) { … }
template<typename T> __forceinline T sin2cos ( const T& x ) { … }
template<typename T> __forceinline T cos2sin ( const T& x ) { … }
#if defined(__AVX2__)
__forceinline float madd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
__forceinline float msub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
__forceinline float nmadd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
__forceinline float nmsub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
#elif defined (__aarch64__) && defined(__clang__)
#pragma clang fp contract(fast)
__forceinline float madd ( const float a, const float b, const float c) { return a*b + c; }
__forceinline float msub ( const float a, const float b, const float c) { return a*b - c; }
__forceinline float nmadd ( const float a, const float b, const float c) { return c - a*b; }
__forceinline float nmsub ( const float a, const float b, const float c) { return -(c + a*b); }
#pragma clang fp contract(on)
#else
__forceinline float madd ( const float a, const float b, const float c) { … }
__forceinline float msub ( const float a, const float b, const float c) { … }
__forceinline float nmadd ( const float a, const float b, const float c) { … }
__forceinline float nmsub ( const float a, const float b, const float c) { … }
#endif
template<typename T> T random() { … }
#if defined(_WIN32)
template<> __forceinline int random() { return int(rand()) ^ (int(rand()) << 8) ^ (int(rand()) << 16); }
template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 8) ^ (uint32_t(rand()) << 16); }
#else
template<> __forceinline int random() { … }
template<> __forceinline uint32_t random() { … }
#endif
template<> __forceinline float random() { … }
template<> __forceinline double random() { … }
#if _WIN32
__forceinline double drand48() {
return double(rand())/double(RAND_MAX);
}
__forceinline void srand48(long seed) {
return srand(seed);
}
#endif
__forceinline bool select(bool s, bool t , bool f) { … }
__forceinline int select(bool s, int t, int f) { … }
__forceinline float select(bool s, float t, float f) { … }
__forceinline bool none(bool s) { … }
__forceinline bool all (bool s) { … }
__forceinline bool any (bool s) { … }
__forceinline unsigned movemask (bool s) { … }
__forceinline float lerp(const float v0, const float v1, const float t) { … }
template<typename T>
__forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) { … }
template<typename T> __forceinline void xchg ( T& a, T& b ) { … }
template<typename Ty> struct mem;
template<> struct mem<float> { … };
template<class T>
__forceinline T bitReverse(const T& vin)
{ … }
template<class T>
__forceinline T bitInterleave(const T& xin, const T& yin, const T& zin)
{ … }
#if defined(__AVX2__) && !defined(__aarch64__)
template<>
__forceinline unsigned int bitInterleave(const unsigned int &xi, const unsigned int& yi, const unsigned int& zi)
{
const unsigned int xx = pdep(xi,0x49249249 );
const unsigned int yy = pdep(yi,0x92492492 );
const unsigned int zz = pdep(zi,0x24924924 );
return xx | yy | zz;
}
#endif
template<class T>
__forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){ … }
}
#endif