// Copyright 2011 Google Inc. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the COPYING file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // SSE41 version of YUV to RGB upsampling functions. // // Author: [email protected] (Somnath Banerjee) #include "src/dsp/dsp.h" #if defined(WEBP_USE_SSE41) #include <assert.h> #include <smmintrin.h> #include <string.h> #include "src/dsp/yuv.h" #ifdef FANCY_UPSAMPLING #if !defined(WEBP_REDUCE_CSP) // We compute (9*a + 3*b + 3*c + d + 8) / 16 as follows // u = (9*a + 3*b + 3*c + d + 8) / 16 // = (a + (a + 3*b + 3*c + d) / 8 + 1) / 2 // = (a + m + 1) / 2 // where m = (a + 3*b + 3*c + d) / 8 // = ((a + b + c + d) / 2 + b + c) / 4 // // Let's say k = (a + b + c + d) / 4. // We can compute k as // k = (s + t + 1) / 2 - ((a^d) | (b^c) | (s^t)) & 1 // where s = (a + d + 1) / 2 and t = (b + c + 1) / 2 // // Then m can be written as // m = (k + t + 1) / 2 - (((b^c) & (s^t)) | (k^t)) & 1 // Computes out = (k + in + 1) / 2 - ((ij & (s^t)) | (k^in)) & 1 #define GET_M … // pack and store two alternating pixel rows #define PACK_AND_STORE … // Loads 17 pixels each from rows r1 and r2 and generates 32 pixels. #define UPSAMPLE_32PIXELS … // Turn the macro into a function for reducing code-size when non-critical static void Upsample32Pixels_SSE41(const uint8_t r1[], const uint8_t r2[], uint8_t* const out) { UPSAMPLE_32PIXELS(r1, r2, out); } #define UPSAMPLE_LAST_BLOCK … #define CONVERT2RGB_32 … #define SSE4_UPSAMPLE_FUNC … // SSE4 variants of the fancy upsampler. SSE4_UPSAMPLE_FUNC(UpsampleRgbLinePair_SSE41, VP8YuvToRgb, 3) SSE4_UPSAMPLE_FUNC(UpsampleBgrLinePair_SSE41, VP8YuvToBgr, 3) #undef GET_M #undef PACK_AND_STORE #undef UPSAMPLE_32PIXELS #undef UPSAMPLE_LAST_BLOCK #undef CONVERT2RGB #undef CONVERT2RGB_32 #undef SSE4_UPSAMPLE_FUNC #endif // WEBP_REDUCE_CSP //------------------------------------------------------------------------------ // Entry point extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */]; extern void WebPInitUpsamplersSSE41(void); WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersSSE41(void) { … } #endif // FANCY_UPSAMPLING //------------------------------------------------------------------------------ extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */]; extern void WebPInitYUV444ConvertersSSE41(void); #define YUV444_FUNC(FUNC_NAME, CALL, CALL_C, XSTEP) … #if !defined(WEBP_REDUCE_CSP) YUV444_FUNC(Yuv444ToRgb_SSE41, VP8YuvToRgb32_SSE41, WebPYuv444ToRgb_C, 3) YUV444_FUNC(Yuv444ToBgr_SSE41, VP8YuvToBgr32_SSE41, WebPYuv444ToBgr_C, 3) #endif // WEBP_REDUCE_CSP WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444ConvertersSSE41(void) { … } #else WEBP_DSP_INIT_STUB(WebPInitYUV444ConvertersSSE41) #endif // WEBP_USE_SSE41 #if !(defined(FANCY_UPSAMPLING) && defined(WEBP_USE_SSE41)) WEBP_DSP_INIT_STUB(WebPInitUpsamplersSSE41) #endif