/* * Copyright 2019 The libgav1 Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LIBGAV1_SRC_DSP_DSP_H_ #define LIBGAV1_SRC_DSP_DSP_H_ #include <cstddef> #include <cstdint> #include <cstdlib> #include "src/dsp/common.h" #include "src/dsp/constants.h" #include "src/dsp/film_grain_common.h" #include "src/utils/cpu.h" #include "src/utils/reference_info.h" #include "src/utils/types.h" namespace libgav1 { namespace dsp { #if !defined(LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS) #define LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS … #endif enum IntraPredictor : uint8_t { … }; // List of valid 1D transforms. enum Transform1d : uint8_t { … }; // List of valid 1D transform sizes. Not all transforms may be available for all // the sizes. enum Transform1dSize : uint8_t { … }; // The maximum width of the loop filter, fewer pixels may be filtered depending // on strength thresholds. enum LoopFilterSize : uint8_t { … }; enum : uint8_t { … }; //------------------------------------------------------------------------------ // ToString() // // These functions are meant to be used only in debug logging and within tests. // They are defined inline to avoid including the strings in the release // library when logging is disabled; unreferenced functions will not be added to // any object file in that case. inline const char* ToString(const IntraPredictor predictor) { … } inline const char* ToString(const Transform1d transform) { … } inline const char* ToString(const Transform1dSize transform_size) { … } inline const char* ToString(const LoopFilterSize filter_size) { … } inline const char* ToString(const LoopFilterType filter_type) { … } //------------------------------------------------------------------------------ // Intra predictors. Section 7.11.2. // These require access to one or both of the top row and left column. Some may // access the top-left (top[-1]), top-right (top[width+N]), bottom-left // (left[height+N]) or upper-left (left[-1]). // Intra predictor function signature. Sections 7.11.2.2, 7.11.2.4 (#10,#11), // 7.11.2.5, 7.11.2.6. // |dst| is an unaligned pointer to the output block. Pixel size is determined // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to // the row above |dst|. |left| is an aligned vector of the column to the left // of |dst|. top-left and bottom-left may be accessed. // The pointer arguments do not alias one another. IntraPredictorFunc; IntraPredictorFuncs; // Directional intra predictor function signature, zone 1 (0 < angle < 90). // Section 7.11.2.4 (#7). // |dst| is an unaligned pointer to the output block. Pixel size is determined // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to // the row above |dst|. |width| and |height| give the dimensions of the block. // |xstep| is the scaled starting index to |top| from // kDirectionalIntraPredictorDerivative. |upsampled_top| indicates whether // |top| has been upsampled as described in '7.11.2.11. Intra edge upsample // process'. This can occur in cases with |width| + |height| <= 16. top-right // is accessed. // The pointer arguments do not alias one another. DirectionalIntraPredictorZone1Func; // Directional intra predictor function signature, zone 2 (90 < angle < 180). // Section 7.11.2.4 (#8). // |dst| is an unaligned pointer to the output block. Pixel size is determined // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to // the row above |dst|. |left| is an aligned vector of the column to the left of // |dst|. |width| and |height| give the dimensions of the block. |xstep| and // |ystep| are the scaled starting index to |top| and |left|, respectively, // from kDirectionalIntraPredictorDerivative. |upsampled_top| and // |upsampled_left| indicate whether |top| and |left| have been upsampled as // described in '7.11.2.11. Intra edge upsample process'. This can occur in // cases with |width| + |height| <= 16. top-left and upper-left are accessed, // up to [-2] in each if |upsampled_top/left| are set. // The pointer arguments do not alias one another. DirectionalIntraPredictorZone2Func; // Directional intra predictor function signature, zone 3 (180 < angle < 270). // Section 7.11.2.4 (#9). // |dst| is an unaligned pointer to the output block. Pixel size is determined // by bitdepth with |stride| given in bytes. |left| is an aligned vector of the // column to the left of |dst|. |width| and |height| give the dimensions of the // block. |ystep| is the scaled starting index to |left| from // kDirectionalIntraPredictorDerivative. |upsampled_left| indicates whether // |left| has been upsampled as described in '7.11.2.11. Intra edge upsample // process'. This can occur in cases with |width| + |height| <= 16. bottom-left // is accessed. // The pointer arguments do not alias one another. DirectionalIntraPredictorZone3Func; // Filter intra predictor function signature. Section 7.11.2.3. // |dst| is an unaligned pointer to the output block. Pixel size is determined // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to // the row above |dst|. |left| is an aligned vector of the column to the left // of |dst|. |width| and |height| are the size of the block in pixels. // The pointer arguments do not alias one another. FilterIntraPredictorFunc; //------------------------------------------------------------------------------ // Chroma from Luma (Cfl) prediction. Section 7.11.5. // Chroma from Luma (Cfl) intra prediction function signature. |dst| is an // unaligned pointer to the output block. Pixel size is determined by bitdepth // with |stride| given in bytes. |luma| contains subsampled luma pixels with 3 // fractional bits of precision. |alpha| is the signed Cfl alpha value for the // appropriate plane. CflIntraPredictorFunc; CflIntraPredictorFuncs; // Chroma from Luma (Cfl) subsampler function signature. |luma| is an unaligned // pointer to the output block. |src| is an unaligned pointer to the input // block. Pixel size is determined by bitdepth with |stride| given in bytes. CflSubsamplerFunc; CflSubsamplerFuncs; //------------------------------------------------------------------------------ // Intra Edge Filtering and Upsampling. Step 4 in section 7.11.2.4. // Intra edge filter function signature. |buffer| is a pointer to the top_row or // left_column that needs to be filtered. Typically the -1'th index of |top_row| // and |left_column| need to be filtered as well, so the caller can merely pass // the |buffer| as top_row[-1] or left_column[-1]. Pixel size is determined by // bitdepth. |size| is the number of pixels to be filtered. |strength| is the // filter strength. Section 7.11.2.12 in the spec. IntraEdgeFilterFunc; // Intra edge upsampler function signature. |buffer| is a pointer to the top_row // or left_column that needs to be upsampled. Pixel size is determined by // bitdepth. |size| is the number of pixels to be upsampled; valid values are: // 4, 8, 12, 16. This function needs access to negative indices -1 and -2 of // the |buffer|. Section 7.11.2.11 in the spec. IntraEdgeUpsamplerFunc; //------------------------------------------------------------------------------ // Inverse transform add function signature. // // Steps 2 and 3 of section 7.12.3 (contains the implementation of section // 7.13.3). // Apply the inverse transforms and add the residual to the destination frame // for the transform type and block size |tx_size| starting at position // |start_x| and |start_y|. |dst_frame| is a pointer to an Array2D of Pixel // values. |adjusted_tx_height| is the number of rows to process based on the // non-zero coefficient count in the block. It will be 1 (non-zero coefficient // count == 1), 4 or a multiple of 8 up to 32 or the original transform height, // whichever is less. |src_buffer| is a pointer to an Array2D of Residual // values. On input |src_buffer| contains the dequantized values, on output it // contains the residual. // The pointer arguments do not alias one another. InverseTransformAddFunc; // The final dimension holds row and column transforms indexed with kRow and // kColumn. InverseTransformAddFuncs; //------------------------------------------------------------------------------ // Post processing. // Loop filter function signature. Section 7.14. // |dst| is an unaligned pointer to the output block. Pixel size is determined // by bitdepth with |stride| given in bytes. // <threshold param> <spec name> <range> // |outer_thresh| blimit [7, 193] // |inner_thresh| limit [1, 63] // |hev_thresh| thresh [0, 63] // These are scaled by the implementation by 'bitdepth - 8' to produce // the spec variables blimitBd, limitBd and threshBd. // Note these functions are not called when the loop filter level is 0. LoopFilterFunc; LoopFilterFuncs; // Cdef direction function signature. Section 7.15.2. // |src| is a pointer to the source block. Pixel size is determined by bitdepth // with |stride| given in bytes. |direction| and |variance| are output // parameters and must not be nullptr. // The pointer arguments do not alias one another. CdefDirectionFunc; // Cdef filtering function signature. Section 7.15.3. // |source| is a pointer to the input block padded with kCdefLargeValue if at a // frame border. |source_stride| is given in units of uint16_t. // |block_width|, |block_height| are the width/height of the input block. // |primary_strength|, |secondary_strength|, and |damping| are Cdef filtering // parameters. // |direction| is the filtering direction. // |dest| is the output buffer. |dest_stride| is given in bytes. // The pointer arguments do not alias one another. CdefFilteringFunc; // The first index is block width: [0]: 4, [1]: 8. The second is based on // non-zero strengths: [0]: |primary_strength| and |secondary_strength|, [1]: // |primary_strength| only, [2]: |secondary_strength| only. CdefFilteringFuncs; // Upscaling coefficients function signature. Section 7.16. // This is an auxiliary function for SIMD optimizations and has no corresponding // C function. Different SIMD versions may have different outputs. So it must // pair with the corresponding version of SuperResFunc. // |upscaled_width| is the width of the output frame. // |step| is the number of subpixels to move the kernel for the next destination // pixel. // |initial_subpixel_x| is a base offset from which |step| increments. // |coefficients| is the upscale filter used by each pixel in a row. SuperResCoefficientsFunc; // Upscaling process function signature. Section 7.16. // |coefficients| is the upscale filter used by each pixel in a row. It is not // used by the C function. // |source| is the input frame buffer. It will be line extended. // |source_stride| is given in pixels. // |dest| is the output buffer. // |dest_stride| is given in pixels. // |height| is the height of the block to be processed. // |downscaled_width| is the width of the input frame. // |upscaled_width| is the width of the output frame. // |step| is the number of subpixels to move the kernel for the next destination // pixel. // |initial_subpixel_x| is a base offset from which |step| increments. // The pointer arguments do not alias one another. SuperResFunc; // Loop restoration function signature. Sections 7.16, 7.17. // |restoration_info| contains loop restoration information, such as filter // type, strength. // |source| is the input frame buffer, which is deblocked and cdef filtered. // |top_border| and |bottom_border| are the top and bottom borders. // |dest| is the output. // |stride| is given in pixels, and shared by |source| and |dest|. // |top_border_stride| and |bottom_border_stride| are given in pixels. // |restoration_buffer| contains buffers required for self guided filter and // wiener filter. They must be initialized before calling. // The pointer arguments do not alias one another. LoopRestorationFunc; // Index 0 is Wiener Filter. // Index 1 is Self Guided Restoration Filter. // This can be accessed as LoopRestorationType - 2. LoopRestorationFuncs; // Convolve function signature. Section 7.11.3.4. // This function applies a horizontal filter followed by a vertical filter. // |reference| is the input block (reference frame buffer). |reference_stride| // is the corresponding frame stride. // |vertical_filter_index|/|horizontal_filter_index| is the index to // retrieve the type of filter to be applied for vertical/horizontal direction // from the filter lookup table 'kSubPixelFilters'. // |horizontal_filter_id| and |vertical_filter_id| are the filter ids. // |width| and |height| are width and height of the block to be filtered. // |ref_last_x| and |ref_last_y| are the last pixel of the reference frame in // x/y direction. // |prediction| is the output block (output frame buffer). // Rounding precision is derived from the function being called. For horizontal // filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be // used. For compound vertical filtering kInterRoundBitsCompoundVertical will be // used. Otherwise kInterRoundBitsVertical & kInterRoundBitsVertical12bpp will // be used. // The pointer arguments do not alias one another. ConvolveFunc; // Convolve functions signature. Each points to one convolve function with // a specific setting: // ConvolveFunc[is_intra_block_copy][is_compound][has_vertical_filter] // [has_horizontal_filter]. // If is_compound is false, the prediction is clipped to Pixel. // If is_compound is true, the range of prediction is: // 8bpp: [-5132, 9212] (int16_t) // 10bpp: [ 3988, 61532] (uint16_t) // 12bpp: [ 3974, 61559] (uint16_t) // See src/dsp/convolve.cc ConvolveFuncs; // Convolve + scale function signature. Section 7.11.3.4. // This function applies a horizontal filter followed by a vertical filter. // |reference| is the input block (reference frame buffer). |reference_stride| // is the corresponding frame stride. // |vertical_filter_index|/|horizontal_filter_index| is the index to // retrieve the type of filter to be applied for vertical/horizontal direction // from the filter lookup table 'kSubPixelFilters'. // |subpixel_x| and |subpixel_y| are starting positions in units of 1/1024. // |step_x| and |step_y| are step sizes in units of 1/1024 of a pixel. // |width| and |height| are width and height of the block to be filtered. // |ref_last_x| and |ref_last_y| are the last pixel of the reference frame in // x/y direction. // |prediction| is the output block (output frame buffer). // Rounding precision is derived from the function being called. For horizontal // filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be // used. For compound vertical filtering kInterRoundBitsCompoundVertical will be // used. Otherwise kInterRoundBitsVertical & kInterRoundBitsVertical12bpp will // be used. // The pointer arguments do not alias one another. ConvolveScaleFunc; // Convolve functions signature for scaling version. // 0: single predictor. 1: compound predictor. ConvolveScaleFuncs; // Weight mask function signature. Section 7.11.3.12. // |prediction_0| is the first input block. // |prediction_1| is the second input block. Both blocks are int16_t* when // bitdepth == 8 and uint16_t* otherwise. // |width| and |height| are the prediction width and height. // The stride for the input buffers is equal to |width|. // The valid range of block size is [8x8, 128x128] for the luma plane. // |mask| is the output buffer. |mask_stride| is the output buffer stride. // The pointer arguments do not alias one another. WeightMaskFunc; // Weight mask functions signature. The dimensions (in order) are: // * Width index (4 => 0, 8 => 1, 16 => 2 and so on). // * Height index (4 => 0, 8 => 1, 16 => 2 and so on). // * mask_is_inverse. WeightMaskFuncs; // Average blending function signature. // Two predictors are averaged to generate the output. // Input predictor values are int16_t. Output type is uint8_t, with actual // range of Pixel value. // Average blending is in the bottom of Section 7.11.3.1 (COMPOUND_AVERAGE). // |prediction_0| is the first input block. // |prediction_1| is the second input block. Both blocks are int16_t* when // bitdepth == 8 and uint16_t* otherwise. // |width| and |height| are the same for the first and second input blocks. // The stride for the input buffers is equal to |width|. // The valid range of block size is [8x8, 128x128] for the luma plane. // |dest| is the output buffer. |dest_stride| is the output buffer stride. // The pointer arguments do not alias one another. AverageBlendFunc; // Distance weighted blending function signature. // Weights are generated in Section 7.11.3.15. // Weighted blending is in the bottom of Section 7.11.3.1 (COMPOUND_DISTANCE). // This function takes two blocks (inter frame prediction) and produces a // weighted output. // |prediction_0| is the first input block. // |prediction_1| is the second input block. Both blocks are int16_t* when // bitdepth == 8 and uint16_t* otherwise. // |weight_0| is the weight for the first block. It is derived from the relative // distance of the first reference frame and the current frame. // |weight_1| is the weight for the second block. It is derived from the // relative distance of the second reference frame and the current frame. // |width| and |height| are the same for the first and second input blocks. // The stride for the input buffers is equal to |width|. // The valid range of block size is [8x8, 128x128] for the luma plane. // |dest| is the output buffer. |dest_stride| is the output buffer stride. // The pointer arguments do not alias one another. DistanceWeightedBlendFunc; // Mask blending function signature. Section 7.11.3.14. // This function takes two blocks and produces a blended output stored into the // output block |dest|. The blending is a weighted average process, controlled // by values of the mask. // |prediction_0| is the first input block. When prediction mode is inter_intra // (or wedge_inter_intra), this refers to the inter frame prediction. It is // int16_t* when bitdepth == 8 and uint16_t* otherwise. // The stride for |prediction_0| is equal to |width|. // |prediction_1| is the second input block. When prediction mode is inter_intra // (or wedge_inter_intra), this refers to the intra frame prediction and uses // Pixel values. It is only used for intra frame prediction when bitdepth >= 10. // It is int16_t* when bitdepth == 8 and uint16_t* otherwise. // |prediction_stride_1| is the stride, given in units of [u]int16_t. When // |is_inter_intra| is false (compound prediction) then |prediction_stride_1| is // equal to |width|. // |mask| is an integer array, whose value indicates the weight of the blending. // |mask_stride| is corresponding stride. // |width|, |height| are the same for both input blocks. // If it's inter_intra (or wedge_inter_intra), the valid range of block size is // [8x8, 32x32], no 4:1/1:4 blocks (Section 5.11.28). Otherwise (including // difference weighted prediction and compound average prediction), the valid // range is [8x8, 128x128]. // If there's subsampling, the corresponding width and height are halved for // chroma planes. // |is_inter_intra| stands for the prediction mode. If it is true, one of the // prediction blocks is from intra prediction of current frame. Otherwise, two // prediction blocks are both inter frame predictions. // |is_wedge_inter_intra| indicates if the mask is for the wedge prediction. // |dest| is the output block. // |dest_stride| is the corresponding stride for dest. // The pointer arguments do not alias one another. MaskBlendFunc; // Mask blending functions signature. Each points to one function with // a specific setting: // MaskBlendFunc[subsampling_x + subsampling_y][is_inter_intra]. MaskBlendFuncs; // This function is similar to the MaskBlendFunc. It is only used when // |is_inter_intra| is true and |bitdepth| == 8. // |prediction_[01]| are Pixel values (uint8_t). // |prediction_1| is also the output buffer. // The pointer arguments do not alias one another. InterIntraMaskBlendFunc8bpp; // InterIntra8bpp mask blending functions signature. When is_wedge_inter_intra // is false, the function at index 0 must be used. Otherwise, the function at // index subsampling_x + subsampling_y must be used. InterIntraMaskBlendFuncs8bpp; // Obmc (overlapped block motion compensation) blending function signature. // Section 7.11.3.10. // This function takes two blocks and produces a blended output stored into the // first input block. The blending is a weighted average process, controlled by // values of the mask. // Obmc is not a compound mode. It is different from other compound blending, // in terms of precision. The current block is computed using convolution with // clipping to the range of pixel values. Its above and left blocks are also // clipped. Therefore obmc blending process doesn't need to clip the output. // |prediction| is the first input block, which will be overwritten. // |prediction_stride| is the stride, given in bytes. // |width|, |height| are the same for both input blocks. The range is [4x2, // 32x32] for kObmcDirectionVertical and [2x4, 32x32] for // kObmcDirectionHorizontal, see Section 7.11.3.9. // |obmc_prediction| is the second input block. // |obmc_prediction_stride| is its stride, given in bytes. // The pointer arguments do not alias one another. ObmcBlendFunc; ObmcBlendFuncs; // Warp function signature. Section 7.11.3.5. // This function applies warp filtering for each 8x8 block inside the current // coding block. The filtering process is similar to 2d convolve filtering. // The horizontal filter is applied followed by the vertical filter. // The function has to calculate corresponding pixel positions before and // after warping. // |source| is the input reference frame buffer. // |source_stride|, |source_width|, |source_height| are corresponding frame // stride, width, and height. |source_stride| is given in bytes. // |warp_params| is the matrix of warp motion: warp_params[i] = mN. // [x' (m2 m3 m0 [x // z . y' = m4 m5 m1 * y // 1] m6 m7 1) 1] // |subsampling_x/y| is the current frame's plane subsampling factor. // |block_start_x| and |block_start_y| are the starting position the current // coding block. // |block_width| and |block_height| are width and height of the current coding // block. |block_width| and |block_height| are at least 8. // |alpha|, |beta|, |gamma|, |delta| are valid warp parameters. See the // comments in the definition of struct GlobalMotion for the range of their // values. // |dest| is the output buffer of type Pixel. The output values are clipped to // Pixel values. // |dest_stride| is the stride, in units of bytes. // Rounding precision is derived from the function being called. For horizontal // filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be // used. For vertical filtering kInterRoundBitsVertical & // kInterRoundBitsVertical12bpp will be used. // // NOTE: WarpFunc assumes the source frame has left, right, top, and bottom // borders that extend the frame boundary pixels. // * The left and right borders must be at least 13 pixels wide. In addition, // Warp_NEON() may read up to 14 bytes after a row in the |source| buffer. // Therefore, there must be at least one extra padding byte after the right // border of the last row in the source buffer. // * The top and bottom borders must be at least 13 pixels high. // The pointer arguments do not alias one another. WarpFunc; // Warp for compound predictions. Section 7.11.3.5. // Similar to WarpFunc, but |dest| is a uint16_t predictor buffer, // |dest_stride| is given in units of uint16_t and |inter_round_bits_vertical| // is always 7 (kCompoundInterRoundBitsVertical). // Rounding precision is derived from the function being called. For horizontal // filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be // used. For vertical filtering kInterRoundBitsCompondVertical will be used. WarpCompoundFunc; constexpr int kNumAutoRegressionLags = …; // Applies an auto-regressive filter to the white noise in |luma_grain_buffer|. // Section 7.18.3.3, second code block // |params| are parameters read from frame header, mainly providing // auto_regression_coeff_y for the filter and auto_regression_shift to right // shift the filter sum by. Note: This method assumes // params.auto_regression_coeff_lag is not 0. Do not call this method if // params.auto_regression_coeff_lag is 0. LumaAutoRegressionFunc; // Function index is auto_regression_coeff_lag - 1. LumaAutoRegressionFuncs; // Applies an auto-regressive filter to the white noise in u_grain and v_grain. // Section 7.18.3.3, third code block // The |luma_grain_buffer| provides samples that are added to the autoregressive // sum when num_y_points > 0. // |u_grain_buffer| and |v_grain_buffer| point to the buffers of chroma noise // that were generated from the stored Gaussian sequence, and are overwritten // with the results of the autoregressive filter. |params| are parameters read // from frame header, mainly providing auto_regression_coeff_u and // auto_regression_coeff_v for each chroma plane's filter, and // auto_regression_shift to right shift the filter sums by. // The pointer arguments do not alias one another. ChromaAutoRegressionFunc; ChromaAutoRegressionFuncs; // Build an image-wide "stripe" of grain noise for every 32 rows in the image. // Section 7.18.3.5, first code block. // Each 32x32 luma block is copied at a random offset specified via // |grain_seed| from the grain template produced by autoregression, and the same // is done for chroma grains, subject to subsampling. // |width| and |height| are the dimensions of the overall image. // |noise_stripes_buffer| points to an Array2DView with one row for each stripe. // Because this function treats all planes identically and independently, it is // simplified to take one grain buffer at a time. This means duplicating some // random number generations, but that work can be reduced in other ways. // The pointer arguments do not alias one another. ConstructNoiseStripesFunc; ConstructNoiseStripesFuncs; // Compute the one or two overlap rows for each stripe copied to the noise // image. // Section 7.18.3.5, second code block. |width| and |height| are the // dimensions of the overall image. |noise_stripes_buffer| points to an // Array2DView with one row for each stripe. |noise_image_buffer| points to an // Array2D containing the allocated plane for this frame. Because this function // treats all planes identically and independently, it is simplified to take one // grain buffer at a time. // The pointer arguments do not alias one another. ConstructNoiseImageOverlapFunc; // Populate a scaling lookup table with interpolated values of a piecewise // linear function where values in |point_value| are mapped to the values in // |point_scaling|. // |num_points| can be between 0 and 15. When 0, the lookup table is set to // zero. // |point_value| and |point_scaling| have |num_points| valid elements. // The pointer arguments do not alias one another. InitializeScalingLutFunc; // Blend noise with image. Section 7.18.3.5, third code block. // |width| is the width of each row, while |height| is how many rows to compute. // |start_height| is an offset for the noise image, to support multithreading. // |min_value|, |max_luma|, and |max_chroma| are computed by the caller of these // functions, according to the code in the spec. // |source_plane_y| and |source_plane_uv| are the plane buffers of the decoded // frame. They are blended with the film grain noise and written to // |dest_plane_y| and |dest_plane_uv| as final output for display. // source_plane_* and dest_plane_* may point to the same buffer, in which case // the film grain noise is added in place. // |scaling_lut_y| and |scaling_lut| represent a piecewise linear mapping from // the frame's raw pixel value, to a scaling factor for the noise sample. // |scaling_shift| is applied as a right shift after scaling, so that scaling // down is possible. It is found in FilmGrainParams, but supplied directly to // BlendNoiseWithImageLumaFunc because it's the only member used. // The dest plane may point to the source plane, depending on the value of // frame_header.show_existing_frame. |noise_image_ptr| and scaling_lut.* do not // alias other arguments. BlendNoiseWithImageLumaFunc; BlendNoiseWithImageChromaFunc; BlendNoiseWithImageChromaFuncs; //------------------------------------------------------------------------------ struct FilmGrainFuncs { … }; // Motion field projection function signature. Section 7.9. // |reference_info| provides reference information for motion field projection. // |reference_to_current_with_sign| is the precalculated reference frame id // distance from current frame. // |dst_sign| is -1 for LAST_FRAME and LAST2_FRAME, or 0 (1 in spec) for others. // |y8_start| and |y8_end| are the start and end 8x8 rows of the current tile. // |x8_start| and |x8_end| are the start and end 8x8 columns of the current // tile. // |motion_field| is the output which saves the projected motion field // information. // Note: Only the entry from the 8-bit Dsp table is used as this function is // bitdepth agnostic. MotionFieldProjectionKernelFunc; // Compound temporal motion vector projection function signature. // Section 7.9.3 and 7.10.2.10. // |temporal_mvs| is the aligned set of temporal reference motion vectors. // |temporal_reference_offsets| specifies the number of frames covered by the // original motion vector. // |reference_offsets| specifies the number of frames to be covered by the // projected motion vector. // |count| is the number of the temporal motion vectors. // |candidate_mvs| is the aligned set of projected motion vectors. // The pointer arguments do not alias one another. // Note: Only the entry from the 8-bit Dsp table is used as this function is // bitdepth agnostic. MvProjectionCompoundFunc; // Single temporal motion vector projection function signature. // Section 7.9.3 and 7.10.2.10. // |temporal_mvs| is the aligned set of temporal reference motion vectors. // |temporal_reference_offsets| specifies the number of frames covered by the // original motion vector. // |reference_offset| specifies the number of frames to be covered by the // projected motion vector. // |count| is the number of the temporal motion vectors. // |candidate_mvs| is the aligned set of projected motion vectors. // The pointer arguments do not alias one another. // Note: Only the entry from the 8-bit Dsp table is used as this function is // bitdepth agnostic. MvProjectionSingleFunc; struct Dsp { … }; // Initializes function pointers based on build config and runtime // environment. Must be called once before first use. This function is // thread-safe. void DspInit(); // Returns the appropriate Dsp table for |bitdepth| or nullptr if one doesn't // exist. const Dsp* GetDspTable(int bitdepth); } // namespace dsp namespace dsp_internal { // Visual Studio builds don't have a way to detect SSE4_1. Only exclude the C // functions if /arch:AVX2 is used across all sources. #if !LIBGAV1_TARGETING_AVX2 && \ (defined(_MSC_VER) || (defined(_M_IX86) || defined(_M_X64))) #undef LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS #define LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS … #endif // Returns true if a more highly optimized version of |func| is not defined for // the associated bitdepth or if it is forcibly enabled with // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS. The define checked for |func| corresponds // to the LIBGAV1_Dsp<bitdepth>bpp_|func| define in the header file associated // with the module. // |func| is one of: // - FunctionName, e.g., SelfGuidedFilter. // - [sub-table-index1][...-indexN] e.g., // TransformSize4x4_IntraPredictorDc. The indices correspond to enum values // used as lookups with leading 'k' removed. // // NEON support is the only extension available for ARM and it is always // required. Because of this restriction DSP_ENABLED_8BPP_NEON(func) is always // true and can be omitted. #define DSP_ENABLED_8BPP_AVX2(func) … #define DSP_ENABLED_10BPP_AVX2(func) … #define DSP_ENABLED_8BPP_SSE4_1(func) … #define DSP_ENABLED_10BPP_SSE4_1(func) … // Initializes C-only function pointers. Note some entries may be set to // nullptr if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS is not defined. This is meant // for use in tests only, it is not thread-safe. void DspInit_C(); // Returns the appropriate Dsp table for |bitdepth| or nullptr if one doesn't // exist. This version is meant for use by test or dsp/*Init() functions only. dsp::Dsp* GetWritableDspTable(int bitdepth); } // namespace dsp_internal } // namespace libgav1 #endif // LIBGAV1_SRC_DSP_DSP_H_