#ifndef AOM_AOM_DSP_X86_LPF_COMMON_SSE2_H_
#define AOM_AOM_DSP_X86_LPF_COMMON_SSE2_H_
#include <emmintrin.h>
#include "config/aom_config.h"
#define mm_storelu(dst, v) …
#define mm_storehu(dst, v) …
static inline void highbd_transpose6x6_sse2(__m128i *x0, __m128i *x1,
__m128i *x2, __m128i *x3,
__m128i *x4, __m128i *x5,
__m128i *d0, __m128i *d1,
__m128i *d2, __m128i *d3,
__m128i *d4, __m128i *d5) { … }
static inline void highbd_transpose4x8_8x4_low_sse2(__m128i *x0, __m128i *x1,
__m128i *x2, __m128i *x3,
__m128i *d0, __m128i *d1,
__m128i *d2, __m128i *d3) { … }
static inline void highbd_transpose4x8_8x4_high_sse2(__m128i *x0, __m128i *x1,
__m128i *x2, __m128i *x3,
__m128i *d4, __m128i *d5,
__m128i *d6, __m128i *d7) { … }
static inline void highbd_transpose4x8_8x4_sse2(__m128i *x0, __m128i *x1,
__m128i *x2, __m128i *x3,
__m128i *d0, __m128i *d1,
__m128i *d2, __m128i *d3,
__m128i *d4, __m128i *d5,
__m128i *d6, __m128i *d7) { … }
static inline void highbd_transpose8x8_low_sse2(__m128i *x0, __m128i *x1,
__m128i *x2, __m128i *x3,
__m128i *x4, __m128i *x5,
__m128i *x6, __m128i *x7,
__m128i *d0, __m128i *d1,
__m128i *d2, __m128i *d3) { … }
static inline void highbd_transpose8x8_high_sse2(__m128i *x0, __m128i *x1,
__m128i *x2, __m128i *x3,
__m128i *x4, __m128i *x5,
__m128i *x6, __m128i *x7,
__m128i *d4, __m128i *d5,
__m128i *d6, __m128i *d7) { … }
static inline void highbd_transpose8x8_sse2(
__m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3, __m128i *x4,
__m128i *x5, __m128i *x6, __m128i *x7, __m128i *d0, __m128i *d1,
__m128i *d2, __m128i *d3, __m128i *d4, __m128i *d5, __m128i *d6,
__m128i *d7) { … }
static inline void highbd_transpose8x16_sse2(
__m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3, __m128i *x4,
__m128i *x5, __m128i *x6, __m128i *x7, __m128i *d0, __m128i *d1,
__m128i *d2, __m128i *d3, __m128i *d4, __m128i *d5, __m128i *d6,
__m128i *d7) { … }
static inline void transpose4x8_8x4_low_sse2(__m128i *x0, __m128i *x1,
__m128i *x2, __m128i *x3,
__m128i *d0, __m128i *d1,
__m128i *d2, __m128i *d3) { … }
static inline void transpose4x8_8x4_sse2(__m128i *x0, __m128i *x1, __m128i *x2,
__m128i *x3, __m128i *d0, __m128i *d1,
__m128i *d2, __m128i *d3, __m128i *d4,
__m128i *d5, __m128i *d6,
__m128i *d7) { … }
static inline void transpose8x8_low_sse2(__m128i *x0, __m128i *x1, __m128i *x2,
__m128i *x3, __m128i *x4, __m128i *x5,
__m128i *x6, __m128i *x7, __m128i *d0,
__m128i *d1, __m128i *d2,
__m128i *d3) { … }
static inline void transpose8x8_sse2(__m128i *x0, __m128i *x1, __m128i *x2,
__m128i *x3, __m128i *x4, __m128i *x5,
__m128i *x6, __m128i *x7, __m128i *d0d1,
__m128i *d2d3, __m128i *d4d5,
__m128i *d6d7) { … }
static inline void transpose16x8_8x16_sse2(
__m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3, __m128i *x4,
__m128i *x5, __m128i *x6, __m128i *x7, __m128i *x8, __m128i *x9,
__m128i *x10, __m128i *x11, __m128i *x12, __m128i *x13, __m128i *x14,
__m128i *x15, __m128i *d0, __m128i *d1, __m128i *d2, __m128i *d3,
__m128i *d4, __m128i *d5, __m128i *d6, __m128i *d7) { … }
static inline void transpose8x16_16x8_sse2(
__m128i *x0, __m128i *x1, __m128i *x2, __m128i *x3, __m128i *x4,
__m128i *x5, __m128i *x6, __m128i *x7, __m128i *d0d1, __m128i *d2d3,
__m128i *d4d5, __m128i *d6d7, __m128i *d8d9, __m128i *d10d11,
__m128i *d12d13, __m128i *d14d15) { … }
static inline void transpose_16x8(unsigned char *in0, unsigned char *in1,
int in_p, unsigned char *out, int out_p) { … }
static inline void transpose_16x8_to_8x16(unsigned char *src, int in_p,
unsigned char *dst, int out_p) { … }
static inline void transpose_8xn(unsigned char *src[], int in_p,
unsigned char *dst[], int out_p,
int num_8x8_to_transpose) { … }
#endif