// Copyright 2010 Google Inc. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the COPYING file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Frame-reconstruction function. Memory allocation. // // Author: Skal ([email protected]) #include <stdlib.h> #include "src/dec/vp8i_dec.h" #include "src/utils/utils.h" //------------------------------------------------------------------------------ // Main reconstruction function. static const uint16_t kScan[16] = …; static int CheckMode(int mb_x, int mb_y, int mode) { … } static void Copy32b(uint8_t* const dst, const uint8_t* const src) { … } static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src, uint8_t* const dst) { … } static void DoUVTransform(uint32_t bits, const int16_t* const src, uint8_t* const dst) { … } static void ReconstructRow(const VP8Decoder* const dec, const VP8ThreadContext* ctx) { … } //------------------------------------------------------------------------------ // Filtering // kFilterExtraRows[] = How many extra lines are needed on the MB boundary // for caching, given a filtering level. // Simple filter: up to 2 luma samples are read and 1 is written. // Complex filter: up to 4 luma samples are read and 3 are written. Same for // U/V, so it's 8 samples total (because of the 2x upsampling). static const uint8_t kFilterExtraRows[3] = …; static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) { … } // Filter the decoded macroblock row (if needed) static void FilterRow(const VP8Decoder* const dec) { … } //------------------------------------------------------------------------------ // Precompute the filtering strength for each segment and each i4x4/i16x16 mode. static void PrecomputeFilterStrengths(VP8Decoder* const dec) { … } //------------------------------------------------------------------------------ // Dithering // minimal amp that will provide a non-zero dithering effect #define MIN_DITHER_AMP … #define DITHER_AMP_TAB_SIZE … static const uint8_t kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = …; void VP8InitDithering(const WebPDecoderOptions* const options, VP8Decoder* const dec) { … } // Convert to range: [-2,2] for dither=50, [-4,4] for dither=100 static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) { … } static void DitherRow(VP8Decoder* const dec) { … } //------------------------------------------------------------------------------ // This function is called after a row of macroblocks is finished decoding. // It also takes into account the following restrictions: // * In case of in-loop filtering, we must hold off sending some of the bottom // pixels as they are yet unfiltered. They will be when the next macroblock // row is decoded. Meanwhile, we must preserve them by rotating them in the // cache area. This doesn't hold for the very bottom row of the uncropped // picture of course. // * we must clip the remaining pixels against the cropping area. The VP8Io // struct must have the following fields set correctly before calling put(): #define MACROBLOCK_VPOS … // Finalize and transmit a complete row. Return false in case of user-abort. static int FinishRow(void* arg1, void* arg2) { … } #undef MACROBLOCK_VPOS //------------------------------------------------------------------------------ int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) { … } //------------------------------------------------------------------------------ // Finish setting up the decoding parameter once user's setup() is called. VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) { … } int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) { … } //------------------------------------------------------------------------------ // For multi-threaded decoding we need to use 3 rows of 16 pixels as delay line. // // Reason is: the deblocking filter cannot deblock the bottom horizontal edges // immediately, and needs to wait for first few rows of the next macroblock to // be decoded. Hence, deblocking is lagging behind by 4 or 8 pixels (depending // on strength). // With two threads, the vertical positions of the rows being decoded are: // Decode: [ 0..15][16..31][32..47][48..63][64..79][... // Deblock: [ 0..11][12..27][28..43][44..59][... // If we use two threads and two caches of 16 pixels, the sequence would be: // Decode: [ 0..15][16..31][ 0..15!!][16..31][ 0..15][... // Deblock: [ 0..11][12..27!!][-4..11][12..27][... // The problem occurs during row [12..15!!] that both the decoding and // deblocking threads are writing simultaneously. // With 3 cache lines, one get a safe write pattern: // Decode: [ 0..15][16..31][32..47][ 0..15][16..31][32..47][0.. // Deblock: [ 0..11][12..27][28..43][-4..11][12..27][28... // Note that multi-threaded output _without_ deblocking can make use of two // cache lines of 16 pixels only, since there's no lagging behind. The decoding // and output process have non-concurrent writing: // Decode: [ 0..15][16..31][ 0..15][16..31][... // io->put: [ 0..15][16..31][ 0..15][... #define MT_CACHE_LINES … #define ST_CACHE_LINES … // Initialize multi/single-thread worker static int InitThreadContext(VP8Decoder* const dec) { … } int VP8GetThreadMethod(const WebPDecoderOptions* const options, const WebPHeaderStructure* const headers, int width, int height) { … } #undef MT_CACHE_LINES #undef ST_CACHE_LINES //------------------------------------------------------------------------------ // Memory setup static int AllocateMemory(VP8Decoder* const dec) { … } static void InitIo(VP8Decoder* const dec, VP8Io* io) { … } int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io) { … } //------------------------------------------------------------------------------