godot/thirdparty/libwebp/src/dsp/upsampling_sse41.c

// Copyright 2011 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// SSE41 version of YUV to RGB upsampling functions.
//
// Author: [email protected] (Somnath Banerjee)

#include "src/dsp/dsp.h"

#if defined(WEBP_USE_SSE41)

#include <assert.h>
#include <smmintrin.h>
#include <string.h>
#include "src/dsp/yuv.h"

#ifdef FANCY_UPSAMPLING

#if !defined(WEBP_REDUCE_CSP)

// We compute (9*a + 3*b + 3*c + d + 8) / 16 as follows
// u = (9*a + 3*b + 3*c + d + 8) / 16
//   = (a + (a + 3*b + 3*c + d) / 8 + 1) / 2
//   = (a + m + 1) / 2
// where m = (a + 3*b + 3*c + d) / 8
//         = ((a + b + c + d) / 2 + b + c) / 4
//
// Let's say  k = (a + b + c + d) / 4.
// We can compute k as
// k = (s + t + 1) / 2 - ((a^d) | (b^c) | (s^t)) & 1
// where s = (a + d + 1) / 2 and t = (b + c + 1) / 2
//
// Then m can be written as
// m = (k + t + 1) / 2 - (((b^c) & (s^t)) | (k^t)) & 1

// Computes out = (k + in + 1) / 2 - ((ij & (s^t)) | (k^in)) & 1
#define GET_M

// pack and store two alternating pixel rows
#define PACK_AND_STORE

// Loads 17 pixels each from rows r1 and r2 and generates 32 pixels.
#define UPSAMPLE_32PIXELS

// Turn the macro into a function for reducing code-size when non-critical
static void Upsample32Pixels_SSE41(const uint8_t r1[], const uint8_t r2[],
                                  uint8_t* const out) {
  UPSAMPLE_32PIXELS(r1, r2, out);
}

#define UPSAMPLE_LAST_BLOCK

#define CONVERT2RGB_32

#define SSE4_UPSAMPLE_FUNC

// SSE4 variants of the fancy upsampler.
SSE4_UPSAMPLE_FUNC(UpsampleRgbLinePair_SSE41,  VP8YuvToRgb,  3)
SSE4_UPSAMPLE_FUNC(UpsampleBgrLinePair_SSE41,  VP8YuvToBgr,  3)

#undef GET_M
#undef PACK_AND_STORE
#undef UPSAMPLE_32PIXELS
#undef UPSAMPLE_LAST_BLOCK
#undef CONVERT2RGB
#undef CONVERT2RGB_32
#undef SSE4_UPSAMPLE_FUNC

#endif   // WEBP_REDUCE_CSP

//------------------------------------------------------------------------------
// Entry point

extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];

extern void WebPInitUpsamplersSSE41(void);

WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersSSE41(void) {
#if !defined(WEBP_REDUCE_CSP)
  WebPUpsamplers[MODE_RGB]  = UpsampleRgbLinePair_SSE41;
  WebPUpsamplers[MODE_BGR]  = UpsampleBgrLinePair_SSE41;
#endif   // WEBP_REDUCE_CSP
}

#endif  // FANCY_UPSAMPLING

//------------------------------------------------------------------------------

extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
extern void WebPInitYUV444ConvertersSSE41(void);

#define YUV444_FUNC

#if !defined(WEBP_REDUCE_CSP)
YUV444_FUNC(Yuv444ToRgb_SSE41, VP8YuvToRgb32_SSE41, WebPYuv444ToRgb_C, 3)
YUV444_FUNC(Yuv444ToBgr_SSE41, VP8YuvToBgr32_SSE41, WebPYuv444ToBgr_C, 3)
#endif  // WEBP_REDUCE_CSP

WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444ConvertersSSE41(void) {
#if !defined(WEBP_REDUCE_CSP)
  WebPYUV444Converters[MODE_RGB]       = Yuv444ToRgb_SSE41;
  WebPYUV444Converters[MODE_BGR]       = Yuv444ToBgr_SSE41;
#endif   // WEBP_REDUCE_CSP
}

#else

WEBP_DSP_INIT_STUB(WebPInitYUV444ConvertersSSE41)

#endif  // WEBP_USE_SSE41

#if !(defined(FANCY_UPSAMPLING) && defined(WEBP_USE_SSE41))
WEBP_DSP_INIT_STUB(WebPInitUpsamplersSSE41)
#endif