godot/thirdparty/libwebp/src/dsp/upsampling_sse2.c

// Copyright 2011 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// SSE2 version of YUV to RGB upsampling functions.
//
// Author: [email protected] (Somnath Banerjee)

#include "src/dsp/dsp.h"

#if defined(WEBP_USE_SSE2)

#include <assert.h>
#include <emmintrin.h>
#include <string.h>
#include "src/dsp/yuv.h"

#ifdef FANCY_UPSAMPLING

// We compute (9*a + 3*b + 3*c + d + 8) / 16 as follows
// u = (9*a + 3*b + 3*c + d + 8) / 16
//   = (a + (a + 3*b + 3*c + d) / 8 + 1) / 2
//   = (a + m + 1) / 2
// where m = (a + 3*b + 3*c + d) / 8
//         = ((a + b + c + d) / 2 + b + c) / 4
//
// Let's say  k = (a + b + c + d) / 4.
// We can compute k as
// k = (s + t + 1) / 2 - ((a^d) | (b^c) | (s^t)) & 1
// where s = (a + d + 1) / 2 and t = (b + c + 1) / 2
//
// Then m can be written as
// m = (k + t + 1) / 2 - (((b^c) & (s^t)) | (k^t)) & 1

// Computes out = (k + in + 1) / 2 - ((ij & (s^t)) | (k^in)) & 1
#define GET_M

// pack and store two alternating pixel rows
#define PACK_AND_STORE

// Loads 17 pixels each from rows r1 and r2 and generates 32 pixels.
#define UPSAMPLE_32PIXELS

// Turn the macro into a function for reducing code-size when non-critical
static void Upsample32Pixels_SSE2(const uint8_t r1[], const uint8_t r2[],
                                  uint8_t* const out) {}

#define UPSAMPLE_LAST_BLOCK

#define CONVERT2RGB_32

#define SSE2_UPSAMPLE_FUNC

// SSE2 variants of the fancy upsampler.
SSE2_UPSAMPLE_FUNC
SSE2_UPSAMPLE_FUNC

#if !defined(WEBP_REDUCE_CSP)
SSE2_UPSAMPLE_FUNC
SSE2_UPSAMPLE_FUNC
SSE2_UPSAMPLE_FUNC
SSE2_UPSAMPLE_FUNC
SSE2_UPSAMPLE_FUNC
#endif   // WEBP_REDUCE_CSP

#undef GET_M
#undef PACK_AND_STORE
#undef UPSAMPLE_32PIXELS
#undef UPSAMPLE_LAST_BLOCK
#undef CONVERT2RGB
#undef CONVERT2RGB_32
#undef SSE2_UPSAMPLE_FUNC

//------------------------------------------------------------------------------
// Entry point

extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];

extern void WebPInitUpsamplersSSE2(void);

WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersSSE2(void) {}

#endif  // FANCY_UPSAMPLING

//------------------------------------------------------------------------------

extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
extern void WebPInitYUV444ConvertersSSE2(void);

#define YUV444_FUNC(FUNC_NAME, CALL, CALL_C, XSTEP)

YUV444_FUNC(Yuv444ToRgba_SSE2, VP8YuvToRgba32_SSE2, WebPYuv444ToRgba_C, 4)
YUV444_FUNC(Yuv444ToBgra_SSE2, VP8YuvToBgra32_SSE2, WebPYuv444ToBgra_C, 4)
#if !defined(WEBP_REDUCE_CSP)
YUV444_FUNC(Yuv444ToRgb_SSE2, VP8YuvToRgb32_SSE2, WebPYuv444ToRgb_C, 3)
YUV444_FUNC(Yuv444ToBgr_SSE2, VP8YuvToBgr32_SSE2, WebPYuv444ToBgr_C, 3)
YUV444_FUNC(Yuv444ToArgb_SSE2, VP8YuvToArgb32_SSE2, WebPYuv444ToArgb_C, 4)
YUV444_FUNC(Yuv444ToRgba4444_SSE2, VP8YuvToRgba444432_SSE2, \
            WebPYuv444ToRgba4444_C, 2)
YUV444_FUNC(Yuv444ToRgb565_SSE2, VP8YuvToRgb56532_SSE2, WebPYuv444ToRgb565_C, 2)
#endif   // WEBP_REDUCE_CSP

WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444ConvertersSSE2(void) {}

#else

WEBP_DSP_INIT_STUB(WebPInitYUV444ConvertersSSE2)

#endif  // WEBP_USE_SSE2

#if !(defined(FANCY_UPSAMPLING) && defined(WEBP_USE_SSE2))
WEBP_DSP_INIT_STUB(WebPInitUpsamplersSSE2)
#endif