#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE2)
#if !defined(USE_TRANSFORM_AC3)
#define USE_TRANSFORM_AC3 …
#endif
#include <emmintrin.h>
#include "src/dsp/common_sse2.h"
#include "src/dec/vp8i_dec.h"
#include "src/utils/utils.h"
static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) { … }
#if (USE_TRANSFORM_AC3 == 1)
static void TransformAC3(const int16_t* in, uint8_t* dst) {
const __m128i A = _mm_set1_epi16(in[0] + 4);
const __m128i c4 = _mm_set1_epi16(WEBP_TRANSFORM_AC3_MUL2(in[4]));
const __m128i d4 = _mm_set1_epi16(WEBP_TRANSFORM_AC3_MUL1(in[4]));
const int c1 = WEBP_TRANSFORM_AC3_MUL2(in[1]);
const int d1 = WEBP_TRANSFORM_AC3_MUL1(in[1]);
const __m128i CD = _mm_set_epi16(0, 0, 0, 0, -d1, -c1, c1, d1);
const __m128i B = _mm_adds_epi16(A, CD);
const __m128i m0 = _mm_adds_epi16(B, d4);
const __m128i m1 = _mm_adds_epi16(B, c4);
const __m128i m2 = _mm_subs_epi16(B, c4);
const __m128i m3 = _mm_subs_epi16(B, d4);
const __m128i zero = _mm_setzero_si128();
__m128i dst0 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 0 * BPS));
__m128i dst1 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 1 * BPS));
__m128i dst2 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 2 * BPS));
__m128i dst3 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 3 * BPS));
dst0 = _mm_unpacklo_epi8(dst0, zero);
dst1 = _mm_unpacklo_epi8(dst1, zero);
dst2 = _mm_unpacklo_epi8(dst2, zero);
dst3 = _mm_unpacklo_epi8(dst3, zero);
dst0 = _mm_adds_epi16(dst0, _mm_srai_epi16(m0, 3));
dst1 = _mm_adds_epi16(dst1, _mm_srai_epi16(m1, 3));
dst2 = _mm_adds_epi16(dst2, _mm_srai_epi16(m2, 3));
dst3 = _mm_adds_epi16(dst3, _mm_srai_epi16(m3, 3));
dst0 = _mm_packus_epi16(dst0, dst0);
dst1 = _mm_packus_epi16(dst1, dst1);
dst2 = _mm_packus_epi16(dst2, dst2);
dst3 = _mm_packus_epi16(dst3, dst3);
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3));
}
#endif
#define MM_ABS(p, q) …
static WEBP_INLINE void SignedShift8b_SSE2(__m128i* const x) { … }
#define FLIP_SIGN_BIT2(a, b) …
#define FLIP_SIGN_BIT4(a, b, c, d) …
static WEBP_INLINE void GetNotHEV_SSE2(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
int hev_thresh, __m128i* const not_hev) { … }
static WEBP_INLINE void GetBaseDelta_SSE2(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
__m128i* const delta) { … }
static WEBP_INLINE void DoSimpleFilter_SSE2(__m128i* const p0,
__m128i* const q0,
const __m128i* const fl) { … }
static WEBP_INLINE void Update2Pixels_SSE2(__m128i* const pi, __m128i* const qi,
const __m128i* const a0_lo,
const __m128i* const a0_hi) { … }
static WEBP_INLINE void NeedsFilter_SSE2(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
int thresh, __m128i* const mask) { … }
static WEBP_INLINE void DoFilter2_SSE2(__m128i* const p1, __m128i* const p0,
__m128i* const q0, __m128i* const q1,
int thresh) { … }
static WEBP_INLINE void DoFilter4_SSE2(__m128i* const p1, __m128i* const p0,
__m128i* const q0, __m128i* const q1,
const __m128i* const mask,
int hev_thresh) { … }
static WEBP_INLINE void DoFilter6_SSE2(__m128i* const p2, __m128i* const p1,
__m128i* const p0, __m128i* const q0,
__m128i* const q1, __m128i* const q2,
const __m128i* const mask,
int hev_thresh) { … }
static WEBP_INLINE void Load8x4_SSE2(const uint8_t* const b, int stride,
__m128i* const p, __m128i* const q) { … }
static WEBP_INLINE void Load16x4_SSE2(const uint8_t* const r0,
const uint8_t* const r8,
int stride,
__m128i* const p1, __m128i* const p0,
__m128i* const q0, __m128i* const q1) { … }
static WEBP_INLINE void Store4x4_SSE2(__m128i* const x,
uint8_t* dst, int stride) { … }
static WEBP_INLINE void Store16x4_SSE2(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
uint8_t* r0, uint8_t* r8,
int stride) { … }
static void SimpleVFilter16_SSE2(uint8_t* p, int stride, int thresh) { … }
static void SimpleHFilter16_SSE2(uint8_t* p, int stride, int thresh) { … }
static void SimpleVFilter16i_SSE2(uint8_t* p, int stride, int thresh) { … }
static void SimpleHFilter16i_SSE2(uint8_t* p, int stride, int thresh) { … }
#define MAX_DIFF1(p3, p2, p1, p0, m) …
#define MAX_DIFF2(p3, p2, p1, p0, m) …
#define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4) …
#define LOADUV_H_EDGE(p, u, v, stride) …
#define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) …
#define STOREUV(p, u, v, stride) …
static WEBP_INLINE void ComplexMask_SSE2(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
int thresh, int ithresh,
__m128i* const mask) { … }
static void VFilter16_SSE2(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) { … }
static void HFilter16_SSE2(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) { … }
static void VFilter16i_SSE2(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) { … }
static void HFilter16i_SSE2(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) { … }
static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) { … }
static void HFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) { … }
static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) { … }
static void HFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) { … }
#define DST …
#define AVG3 …
static void VE4_SSE2(uint8_t* dst) { … }
static void LD4_SSE2(uint8_t* dst) { … }
static void VR4_SSE2(uint8_t* dst) { … }
static void VL4_SSE2(uint8_t* dst) { … }
static void RD4_SSE2(uint8_t* dst) { … }
#undef DST
#undef AVG3
static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, int size) { … }
static void TM4_SSE2(uint8_t* dst) { … }
static void TM8uv_SSE2(uint8_t* dst) { … }
static void TM16_SSE2(uint8_t* dst) { … }
static void VE16_SSE2(uint8_t* dst) { … }
static void HE16_SSE2(uint8_t* dst) { … }
static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) { … }
static void DC16_SSE2(uint8_t* dst) { … }
static void DC16NoTop_SSE2(uint8_t* dst) { … }
static void DC16NoLeft_SSE2(uint8_t* dst) { … }
static void DC16NoTopLeft_SSE2(uint8_t* dst) { … }
static void VE8uv_SSE2(uint8_t* dst) { … }
static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) { … }
static void DC8uv_SSE2(uint8_t* dst) { … }
static void DC8uvNoLeft_SSE2(uint8_t* dst) { … }
static void DC8uvNoTop_SSE2(uint8_t* dst) { … }
static void DC8uvNoTopLeft_SSE2(uint8_t* dst) { … }
extern void VP8DspInitSSE2(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE2(void) { … }
#else
WEBP_DSP_INIT_STUB(VP8DspInitSSE2)
#endif