/* * Copyright (c) 2018, Alliance for Open Media. All rights reserved. * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License * was not distributed with this source code in the LICENSE file, you can * obtain it at www.aomedia.org/license/software. If the Alliance for Open * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ #include <assert.h> #include <smmintrin.h> #include <string.h> #include "config/av1_rtcd.h" #include "aom_dsp/x86/synonyms.h" #include "av1/common/enums.h" #include "av1/common/reconintra.h" //------------------------------------------------------------------------------ // filter_intra_predictor_sse4_1 // This shuffle mask selects 32-bit blocks in the order 0, 1, 0, 1, which // duplicates the first 8 bytes of a 128-bit vector into the second 8 bytes. #define DUPLICATE_FIRST_HALF … // Apply all filter taps to the given 7 packed 16-bit values, keeping the 8th // at zero to preserve the sum. static inline void filter_4x2_sse4_1(uint8_t *dst, const ptrdiff_t stride, const __m128i *pixels, const __m128i *taps_0_1, const __m128i *taps_2_3, const __m128i *taps_4_5, const __m128i *taps_6_7) { … } // 4xH transform sizes are given special treatment because xx_loadl_64 goes out // of bounds and every block involves the left column. This implementation // loads TL from the top row for the first block, so it is not static inline void filter_4xh(uint8_t *dest, ptrdiff_t stride, const uint8_t *const top_ptr, const uint8_t *const left_ptr, int mode, const int height) { … } static inline void filter_intra_predictor_sse4_1(void *const dest, ptrdiff_t stride, const void *const top_row, const void *const left_column, int mode, const int width, const int height) { … } void av1_filter_intra_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode) { … }