#include "pffft.h"
#include <stdint.h>
#include <stdlib.h>
#include <math.h>
#include <assert.h>
#if defined(_MSC_VER)
#define COMPILER_MSVC
#elif defined(__GNUC__)
#define COMPILER_GCC
#endif
#if defined(COMPILER_GCC)
#define ALWAYS_INLINE(return_type) …
#define NEVER_INLINE(return_type) …
#define RESTRICT …
#define VLA_ARRAY_ON_STACK(type__, varname__, size__) …
#define VLA_ARRAY_ON_STACK_FREE(varname__) …
#elif defined(COMPILER_MSVC)
#include <malloc.h>
#define ALWAYS_INLINE …
#define NEVER_INLINE …
#define RESTRICT …
#define VLA_ARRAY_ON_STACK …
#define VLA_ARRAY_ON_STACK_FREE …
#endif
#if !defined(PFFFT_SIMD_DISABLE) && (defined(__ppc__) || defined(__ppc64__))
typedef vector float v4sf;
#define SIMD_SZ …
#define VZERO …
#define VMUL …
#define VADD …
#define VMADD …
#define VSUB …
inline v4sf ld_ps1(const float *p) { v4sf v=vec_lde(0,p); return vec_splat(vec_perm(v, v, vec_lvsl(0, p)), 0); }
#define LD_PS1 …
#define INTERLEAVE2 …
#define UNINTERLEAVE2 …
#define VTRANSPOSE4 …
#define VSWAPHL …
#define VALIGNED …
#elif !defined(PFFFT_SIMD_DISABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(__i386__) || defined(_M_IX86))
#include <xmmintrin.h>
v4sf;
#define SIMD_SZ …
#define VZERO() …
#define VMUL(a,b) …
#define VADD(a,b) …
#define VMADD(a,b,c) …
#define VSUB(a,b) …
#define LD_PS1(p) …
#define INTERLEAVE2(in1, in2, out1, out2) …
#define UNINTERLEAVE2(in1, in2, out1, out2) …
#define VTRANSPOSE4(x0,x1,x2,x3) …
#define VSWAPHL(a,b) …
#define VALIGNED(ptr) …
#elif !defined(PFFFT_SIMD_DISABLE) && (defined(__arm__) || defined(__ARMEL__) || defined(__aarch64__) || defined(_M_ARM64))
# include <arm_neon.h>
typedef float32x4_t v4sf;
#define SIMD_SZ …
#define VZERO …
#define VMUL …
#define VADD …
#define VMADD …
#define VSUB …
#define LD_PS1 …
#define INTERLEAVE2 …
#define UNINTERLEAVE2 …
#define VTRANSPOSE4 …
#define VSWAPHL …
#define VALIGNED …
#else
# if !defined(PFFFT_SIMD_DISABLE)
# warning "building with simd disabled !\n";
#define PFFFT_SIMD_DISABLE …
# endif
#endif
#ifdef PFFFT_SIMD_DISABLE
typedef float v4sf;
#define SIMD_SZ …
#define VZERO …
#define VMUL …
#define VADD …
#define VMADD …
#define VSUB …
#define LD_PS1 …
#define VALIGNED …
#endif
#define VCPLXMUL(ar,ai,br,bi) …
#define VCPLXMULCONJ(ar,ai,br,bi) …
#ifndef SVMUL
#define SVMUL(f,v) …
#endif
#if !defined(PFFFT_SIMD_DISABLE)
v4sf_union;
#include <string.h>
#define assertv4(v,f0,f1,f2,f3) …
void validate_pffft_simd() { … }
#endif
#define MALLOC_V4SF_ALIGNMENT …
void *pffft_aligned_malloc(size_t nb_bytes) { … }
void pffft_aligned_free(void *p) { … }
int pffft_simd_size() { … }
static NEVER_INLINE(void) passf2_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1, float fsign) { … }
static NEVER_INLINE(void) passf3_ps(int ido, int l1, const v4sf *cc, v4sf *ch,
const float *wa1, const float *wa2, float fsign) { … }
static NEVER_INLINE(void) passf4_ps(int ido, int l1, const v4sf *cc, v4sf *ch,
const float *wa1, const float *wa2, const float *wa3, float fsign) { … }
static NEVER_INLINE(void) passf5_ps(int ido, int l1, const v4sf *cc, v4sf *ch,
const float *wa1, const float *wa2,
const float *wa3, const float *wa4, float fsign) { … }
static NEVER_INLINE(void) radf2_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch, const float *wa1) { … }
static NEVER_INLINE(void) radb2_ps(int ido, int l1, const v4sf *cc, v4sf *ch, const float *wa1) { … }
static void radf3_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch,
const float *wa1, const float *wa2) { … }
static void radb3_ps(int ido, int l1, const v4sf *RESTRICT cc, v4sf *RESTRICT ch,
const float *wa1, const float *wa2)
{ … }
static NEVER_INLINE(void) radf4_ps(int ido, int l1, const v4sf *RESTRICT cc, v4sf * RESTRICT ch,
const float * RESTRICT wa1, const float * RESTRICT wa2, const float * RESTRICT wa3)
{ … }
static NEVER_INLINE(void) radb4_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch,
const float * RESTRICT wa1, const float * RESTRICT wa2, const float *RESTRICT wa3)
{ … }
static void radf5_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch,
const float *wa1, const float *wa2, const float *wa3, const float *wa4)
{ … }
static void radb5_ps(int ido, int l1, const v4sf *RESTRICT cc, v4sf *RESTRICT ch,
const float *wa1, const float *wa2, const float *wa3, const float *wa4)
{ … }
static NEVER_INLINE(v4sf *) rfftf1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2,
const float *wa, const int *ifac) { … }
static NEVER_INLINE(v4sf *) rfftb1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2,
const float *wa, const int *ifac) { … }
static int decompose(int n, int *ifac, const int *ntryh) { … }
static void rffti1_ps(int n, float *wa, int *ifac)
{ … }
void cffti1_ps(int n, float *wa, int *ifac)
{ … }
v4sf *cfftf1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *work2, const float *wa, const int *ifac, int isign) { … }
struct PFFFT_Setup { … };
PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform) { … }
void pffft_destroy_setup(PFFFT_Setup *s) { … }
#if !defined(PFFFT_SIMD_DISABLE)
static void reversed_copy(int N, const v4sf *in, int in_stride, v4sf *out) { … }
static void unreversed_copy(int N, const v4sf *in, v4sf *out, int out_stride) { … }
void pffft_zreorder(PFFFT_Setup *setup, const float *in, float *out, pffft_direction_t direction) { … }
void pffft_cplx_finalize(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { … }
void pffft_cplx_preprocess(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { … }
static ALWAYS_INLINE(void) pffft_real_finalize_4x4(const v4sf *in0, const v4sf *in1, const v4sf *in,
const v4sf *e, v4sf *out) { … }
static NEVER_INLINE(void) pffft_real_finalize(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { … }
static ALWAYS_INLINE(void) pffft_real_preprocess_4x4(const v4sf *in,
const v4sf *e, v4sf *out, int first) { … }
static NEVER_INLINE(void) pffft_real_preprocess(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { … }
void pffft_transform_internal(PFFFT_Setup *setup, const float *finput, float *foutput, v4sf *scratch,
pffft_direction_t direction, int ordered) { … }
void pffft_zconvolve_accumulate(PFFFT_Setup *s, const float *a, const float *b, float *ab, float scaling) { … }
#else
#define pffft_zreorder_nosimd …
void pffft_zreorder_nosimd(PFFFT_Setup *setup, const float *in, float *out, pffft_direction_t direction) {
int k, N = setup->N;
if (setup->transform == PFFFT_COMPLEX) {
for (k=0; k < 2*N; ++k) out[k] = in[k];
return;
}
else if (direction == PFFFT_FORWARD) {
float x_N = in[N-1];
for (k=N-1; k > 1; --k) out[k] = in[k-1];
out[0] = in[0];
out[1] = x_N;
} else {
float x_N = in[1];
for (k=1; k < N-1; ++k) out[k] = in[k+1];
out[0] = in[0];
out[N-1] = x_N;
}
}
#define pffft_transform_internal_nosimd …
void pffft_transform_internal_nosimd(PFFFT_Setup *setup, const float *input, float *output, float *scratch,
pffft_direction_t direction, int ordered) {
int Ncvec = setup->Ncvec;
int nf_odd = (setup->ifac[1] & 1);
int stack_allocate = (scratch == 0 ? Ncvec*2 : 1);
VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate);
float *buff[2];
int ib;
if (scratch == 0) scratch = scratch_on_stack;
buff[0] = output; buff[1] = scratch;
if (setup->transform == PFFFT_COMPLEX) ordered = 0;
ib = (nf_odd ^ ordered ? 1 : 0);
if (direction == PFFFT_FORWARD) {
if (setup->transform == PFFFT_REAL) {
ib = (rfftf1_ps(Ncvec*2, input, buff[ib], buff[!ib],
setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1);
} else {
ib = (cfftf1_ps(Ncvec, input, buff[ib], buff[!ib],
setup->twiddle, &setup->ifac[0], -1) == buff[0] ? 0 : 1);
}
if (ordered) {
pffft_zreorder(setup, buff[ib], buff[!ib], PFFFT_FORWARD); ib = !ib;
}
} else {
if (input == buff[ib]) {
ib = !ib;
}
if (ordered) {
pffft_zreorder(setup, input, buff[!ib], PFFFT_BACKWARD);
input = buff[!ib];
}
if (setup->transform == PFFFT_REAL) {
ib = (rfftb1_ps(Ncvec*2, input, buff[ib], buff[!ib],
setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1);
} else {
ib = (cfftf1_ps(Ncvec, input, buff[ib], buff[!ib],
setup->twiddle, &setup->ifac[0], +1) == buff[0] ? 0 : 1);
}
}
if (buff[ib] != output) {
int k;
assert(input==output);
for (k=0; k < Ncvec; ++k) {
float a = buff[ib][2*k], b = buff[ib][2*k+1];
output[2*k] = a; output[2*k+1] = b;
}
ib = !ib;
}
assert(buff[ib] == output);
VLA_ARRAY_ON_STACK_FREE(scratch_on_stack);
}
#define pffft_zconvolve_accumulate_nosimd …
void pffft_zconvolve_accumulate_nosimd(PFFFT_Setup *s, const float *a, const float *b,
float *ab, float scaling) {
int i, Ncvec = s->Ncvec;
if (s->transform == PFFFT_REAL) {
ab[0] += a[0]*b[0]*scaling;
ab[2*Ncvec-1] += a[2*Ncvec-1]*b[2*Ncvec-1]*scaling;
++ab; ++a; ++b; --Ncvec;
}
for (i=0; i < Ncvec; ++i) {
float ar, ai, br, bi;
ar = a[2*i+0]; ai = a[2*i+1];
br = b[2*i+0]; bi = b[2*i+1];
VCPLXMUL(ar, ai, br, bi);
ab[2*i+0] += ar*scaling;
ab[2*i+1] += ai*scaling;
}
}
#endif
void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction) { … }
void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction) { … }