/******************************************************************** * * * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: last mod: $Id: mmxidct.c 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ /*SSE2 acceleration of Theora's iDCT.*/ #include "x86int.h" #include "sse2trans.h" #include "../dct.h" #if defined(OC_X86_ASM) /*A table of constants used by the MMX routines.*/ const unsigned short __attribute__((aligned(16),used)) OC_IDCT_CONSTS[64]= …; /*Performs the first three stages of the iDCT. xmm2, xmm6, xmm3, and xmm5 must contain the corresponding rows of the input (accessed in that order). The remaining rows must be in _x at their corresponding locations. On output, xmm7 down to xmm4 contain rows 0 through 3, and xmm0 up to xmm3 contain rows 4 through 7.*/ #define OC_IDCT_8x8_ABC(_x) … \ /*Performs the last stage of the iDCT. On input, xmm7 down to xmm4 contain rows 0 through 3, and xmm0 up to xmm3 contain rows 4 through 7. On output, xmm0 through xmm7 contain the corresponding rows.*/ #define OC_IDCT_8x8_D … \ /*Performs the last stage of the iDCT. On input, xmm7 down to xmm4 contain rows 0 through 3, and xmm0 up to xmm3 contain rows 4 through 7. On output, xmm0 through xmm7 contain the corresponding rows.*/ #define OC_IDCT_8x8_D_STORE … \ static void oc_idct8x8_slow_sse2(ogg_int16_t _y[64],ogg_int16_t _x[64]){ … } /*For the first step of the 10-coefficient version of the 8x8 iDCT, we only need to work with four columns at a time. Doing this in MMX is faster on processors with a 64-bit data path.*/ #define OC_IDCT_8x8_10_MMX … \ #define OC_IDCT_8x8_10_ABC … \ static void oc_idct8x8_10_sse2(ogg_int16_t _y[64],ogg_int16_t _x[64]){ … } /*Performs an inverse 8x8 Type-II DCT transform. The input is assumed to be scaled by a factor of 4 relative to orthonormal version of the transform.*/ void oc_idct8x8_sse2(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi){ … } #endif