godot/thirdparty/libtheora/x86/mmxidct.c

/********************************************************************
 *                                                                  *
 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
    last mod: $Id$

 ********************************************************************/

/*MMX acceleration of Theora's iDCT.
  Originally written by Rudolf Marek, based on code from On2's VP3.*/
#include "x86int.h"
#include "../dct.h"

#if defined(OC_X86_ASM)

/*These are offsets into the table of constants below.*/
/*7 rows of cosines, in order: pi/16 * (1 ... 7).*/
#define OC_COSINE_OFFSET
/*A row of 8's.*/
#define OC_EIGHT_OFFSET



/*38 cycles*/
#define OC_IDCT_BEGIN(_y,_x) \

/*38+8=46 cycles.*/
#define OC_ROW_IDCT(_y,_x) \

/*The following macro does two 4x4 transposes in place.
  At entry, we assume:
    r0 = a3 a2 a1 a0
  I(1) = b3 b2 b1 b0
    r2 = c3 c2 c1 c0
    r3 = d3 d2 d1 d0

    r4 = e3 e2 e1 e0
    r5 = f3 f2 f1 f0
    r6 = g3 g2 g1 g0
    r7 = h3 h2 h1 h0

  At exit, we have:
  I(0) = d0 c0 b0 a0
  I(1) = d1 c1 b1 a1
  I(2) = d2 c2 b2 a2
  I(3) = d3 c3 b3 a3

  J(4) = h0 g0 f0 e0
  J(5) = h1 g1 f1 e1
  J(6) = h2 g2 f2 e2
  J(7) = h3 g3 f3 e3

  I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3.
  J(4) J(5) J(6) J(7) is the transpose of r4  r5  r6 r7.

  Since r1 is free at entry, we calculate the Js first.*/
/*19 cycles.*/
#define OC_TRANSPOSE(_y) \

/*38+19=57 cycles.*/
#define OC_COLUMN_IDCT(_y) \

static void oc_idct8x8_slow_mmx(ogg_int16_t _y[64],ogg_int16_t _x[64]){}

/*25 cycles.*/
#define OC_IDCT_BEGIN_10(_y,_x) \

/*25+8=33 cycles.*/
#define OC_ROW_IDCT_10(_y,_x) \

/*25+19=44 cycles'*/
#define OC_COLUMN_IDCT_10(_y) \

static void oc_idct8x8_10_mmx(ogg_int16_t _y[64],ogg_int16_t _x[64]){}

/*Performs an inverse 8x8 Type-II DCT transform.
  The input is assumed to be scaled by a factor of 4 relative to orthonormal
   version of the transform.*/
void oc_idct8x8_mmx(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi){}

#endif