// Copyright 2009 Intel Corporation // All Rights Reserved // // Permission is granted to use, copy, distribute and prepare derivative works of this // software for any purpose and without fee, provided, that the above copyright notice // and this statement appear in all copies. Intel makes no representations about the // suitability of this software for any purpose. THIS SOFTWARE IS PROVIDED "AS IS." // INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY, // INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE, // INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Intel does not // assume any responsibility for any errors which may appear in this software nor any // responsibility to update it. // // From: // https://software.intel.com/sites/default/files/m/d/4/1/d/8/UsingIntelAVXToImplementIDCT-r1_5.pdf // https://software.intel.com/file/29048 // // Requires SSE // #ifdef _MSC_VER #include <intrin.h> #endif #include <immintrin.h> #ifdef _MSC_VER #define JPGD_SIMD_ALIGN … #else #define JPGD_SIMD_ALIGN(type, name) … #endif #define BITS_INV_ACC … #define SHIFT_INV_ROW … #define SHIFT_INV_COL … const short IRND_INV_ROW = …; //1 << (SHIFT_INV_ROW-1) const short IRND_INV_COL = …; // 1 << (SHIFT_INV_COL-1) const short IRND_INV_CORR = …; // correction -1.0 and round JPGD_SIMD_ALIGN(short, shortM128_one_corr[8]) = …; JPGD_SIMD_ALIGN(short, shortM128_round_inv_row[8]) = …; JPGD_SIMD_ALIGN(short, shortM128_round_inv_col[8]) = …; JPGD_SIMD_ALIGN(short, shortM128_round_inv_corr[8])= …; JPGD_SIMD_ALIGN(short, shortM128_tg_1_16[8]) = …; // tg * (2<<16) + 0.5 JPGD_SIMD_ALIGN(short, shortM128_tg_2_16[8]) = …; // tg * (2<<16) + 0.5 JPGD_SIMD_ALIGN(short, shortM128_tg_3_16[8]) = …; // tg * (2<<16) + 0.5 JPGD_SIMD_ALIGN(short, shortM128_cos_4_16[8]) = …;// cos * (2<<16) + 0.5 //----------------------------------------------------------------------------- // Table for rows 0,4 - constants are multiplied on cos_4_16 // w15 w14 w11 w10 w07 w06 w03 w02 // w29 w28 w25 w24 w21 w20 w17 w16 // w31 w30 w27 w26 w23 w22 w19 w18 //movq -> w05 w04 w01 w00 JPGD_SIMD_ALIGN(short, shortM128_tab_i_04[]) = …; // w31 w30 w27 w26 // Table for rows 1,7 - constants are multiplied on cos_1_16 //movq -> w05 w04 w01 w00 JPGD_SIMD_ALIGN(short, shortM128_tab_i_17[]) = …; // w31 w30 w27 w26 // Table for rows 2,6 - constants are multiplied on cos_2_16 //movq -> w05 w04 w01 w00 JPGD_SIMD_ALIGN(short, shortM128_tab_i_26[]) = …; // w31 w30 w27 w26 // Table for rows 3,5 - constants are multiplied on cos_3_16 //movq -> w05 w04 w01 w00 JPGD_SIMD_ALIGN(short, shortM128_tab_i_35[]) = …; // w31 w30 w27 w26 JPGD_SIMD_ALIGN(short, shortM128_128[8]) = …; void idctSSEShortU8(const short *pInput, uint8_t * pOutputUB) { … }