chromium/third_party/mediapipe/src/mediapipe/framework/formats/yuv_image.h

// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef MEDIAPIPE_FRAMEWORK_FORMATS_YUV_IMAGE_H_
#define MEDIAPIPE_FRAMEWORK_FORMATS_YUV_IMAGE_H_

#include <cstdint>
#include <functional>
#include <memory>

#include "libyuv/video_common.h"

namespace mediapipe {

// Generic data structure for representing various 8-bit YUV image formats with
// pixel format specification in FourCC. The class is also capable of
// representing higher bit depth YUV image formats (10-bit, 12-bit, or 16-bit)
// where each format uses the lower bits of a uint16_t. For these high bit depth
// configurations, only the fully planar representation (i.e., u/v are not
// interleaved) with chroma subsampling of 420 is supported. Although there are
// high bit depth fourcc codes, none of them are defined or supported by libyuv,
// and there does not appear to be a standard code for the fully planar 10-bit
// format we use (this format is efficient for in memory manipulation but not
// necessarily for transport). Therefore, when bit_depth > 8, the only allowable
// chroma subsampling is 420 and the corresponding fourc_cc will be FOURCC_ANY.
//
// This class is primarily designed as a wrapper around 8-bit YUV image formats
// used by Android (NV21, YV12) and FFmpeg (I420 a.k.a. YCbCr420P).
//
// Note that YUV and YCbCr, although often used interchangeably, are different.
// The YUV color space was developed for analog systems and is not defined
// precisely in the technical and scientific literature; instead, it refers to a
// whole family of luminance/chrominance color spaces.  On the other hand, the
// YCbCr color space is defined in the ITU-R BT.601-5 and ITU-R BT.709-5
// standards of ITU (International Telecommunication Union) for digital systems.
// Thus, YCbCr420P is referring to a specific digital color space and a specific
// storage format.
//
// Class takes ownership of the pixel data buffers provided as input to the
// constructor or Initialize().
//
// A typical FFmpeg usage would be:
//
//   AVFrame frame;
//   avcodec_decode_video2(&codec_context, &frame, &got_frame, &av_packet);
//   const size_t y_size = frame.linesize[0] * height;
//   const size_t u_size = frame.linesize[1] * ((height + 1) / 2);
//   const size_t v_size = frame.linesize[2] * ((height + 1) / 2);
//   auto y = absl::make_unique<uint8_t[]> y(y_size);
//   auto u = absl::make_unique<uint8_t[]> u(u_size);
//   auto v = absl::make_unique<uint8_t[]> v(v_size);
//   libyuv::I420Copy(frame.data[0], frame.linesize[0],
//                    frame.data[1], frame.linesize[1],
//                    frame.data[2], frame.linesize[2],
//                    y.get(), frame.linesize[0],
//                    u.get(), frame.linesize[1],
//                    v.get(), frame.linesize[2],
//                    width, height);
//   Outputs().Tag("VIDEO")->Add(new YUVImage(libyuv::FOURCC_I420,
//                                            std::move(y), frame.linesize[0],
//                                            std::move(u), frame.linesize[1],
//                                            std::move(v), frame.linesize[2],
//                                            width, height),
//                               timestamp);
//
// Note that for formats with subsampled U and V channels, like I420, the
// dimensions of the U and V channels are half the dimensions of the Y channel,
// rounded up. Rounding up can be accomplished by adding one to the Y dimensions
// before dividing by 2.
//
// Please do not add new constructors unless it is unavoidable; the default
// constructor followed by Initialize() should cover most of the use cases.
class YUVImage {
 public:
  // The matrix coefficients used (e.g., defines the conversion matrix from
  // Ycbcr
  // to RGB).
  enum ColorMatrixCoefficients {
    COLOR_MATRIX_COEFFICIENTS_RGB = 0,
    // Also ITU-R BT1361 / IEC 61966-2-4 xvYCC709 / SMPTE RP177 Annex B.
    COLOR_MATRIX_COEFFICIENTS_BT709 = 1,
    COLOR_MATRIX_COEFFICIENTS_UNSPECIFIED = 2,
    COLOR_MATRIX_COEFFICIENTS_FCC = 4,
    // Also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL &
    /// SECAM / IEC 61966-2-4 xvYCC601.
    COLOR_MATRIX_COEFFICIENTS_BT470BG = 5,
    // Also ITU-R BT601-6 525 / ITU-R BT1358 525 / ITU-R BT1700 NTSC /
    /// functionally identical to above.
    COLOR_MATRIX_COEFFICIENTS_SMPTE170M = 6,
    COLOR_MATRIX_COEFFICIENTS_SMPTE240M = 7,
    // Used by Dirac / VC-2 and H.264 FRext, see ITU-T SG16.
    COLOR_MATRIX_COEFFICIENTS_YCOCG = 8,
    // ITU-R BT2020 non-constant luminance system.
    COLOR_MATRIX_COEFFICIENTS_BT2020_NCL = 9,
    // ITU-R BT2020 constant luminance system.
    COLOR_MATRIX_COEFFICIENTS_BT2020_CL = 10,
    // SMPTE 2085, Y'D'zD'x
    COLOR_MATRIX_COEFFICIENTS_SMPTE2085 = 11,
    // Chromaticity-derived non-constant luminance.
    COLOR_MATRIX_COEFFICIENTS_CHROMA_DERIVED_NCL = 12,
    // Chromaticity-derived constant luminance.
    COLOR_MATRIX_COEFFICIENTS_CHROMA_DERIVED_CL = 13,
    // ITU-R BT.[HDR-TV] ICtCp
    COLOR_MATRIX_COEFFICIENTS_ICTCP = 14,
  };

  YUVImage() = default;
  ~YUVImage() { Clear(); }

  // Convenience constructor
  YUVImage(libyuv::FourCC fourcc,                     //
           std::unique_ptr<uint8_t[]> data_location,  //
           uint8_t* data0, int stride0,               //
           uint8_t* data1, int stride1,               //
           uint8_t* data2, int stride2,               //
           int width, int height, int bit_depth = 8) {
    uint8_t* tmp = data_location.release();
    std::function<void()> deallocate = [tmp]() { delete[] tmp; };
    Initialize(fourcc,          //
               deallocate,      //
               data0, stride0,  //
               data1, stride1,  //
               data2, stride2,  //
               width, height, bit_depth);
  }

  // Convenience constructor to construct the YUVImage with data stored
  // in three unique_ptrs.
  YUVImage(libyuv::FourCC fourcc,                          //
           std::unique_ptr<uint8_t[]> data0, int stride0,  //
           std::unique_ptr<uint8_t[]> data1, int stride1,  //
           std::unique_ptr<uint8_t[]> data2, int stride2,  //
           int width, int height, int bit_depth = 8) {
    uint8_t* tmp0 = data0.release();
    uint8_t* tmp1 = data1.release();
    uint8_t* tmp2 = data2.release();
    std::function<void()> deallocate = [tmp0, tmp1, tmp2]() {
      delete[] tmp0;
      delete[] tmp1;
      delete[] tmp2;
    };
    Initialize(fourcc,         //
               deallocate,     //
               tmp0, stride0,  //
               tmp1, stride1,  //
               tmp2, stride2,  //
               width, height, bit_depth);
  }

  // Clear and initialize member variables.
  //
  // First argument is an enum of FourCC (see http://www.fourcc.org/yuv.php)
  // defined in libyuv/video_common.h
  //
  // A deallocation function is provided which will be called on the next
  // Clear() or on destruction.
  //
  // The next three argument pairs are pointer to pixel data buffer for each
  // plane and its image stride (http://en.wikipedia.org/wiki/Stride).
  //
  // The class is very generic and it is up to the user how they want
  // to use this data holder class.  For example, if one intends to
  // use this for NV21, one can ignore data2 and stride2 by giving
  // nullptr and 0, respectively, and call the right libyuv functions
  // for actual processing.  This class is agnostic of the data and the
  // pixel format it holds.
  void Initialize(libyuv::FourCC fourcc,                        //
                  std::function<void()> deallocation_function,  //
                  uint8_t* data0, int stride0,                  //
                  uint8_t* data1, int stride1,                  //
                  uint8_t* data2, int stride2,                  //
                  int width, int height, int bit_depth = 8) {
    Clear();
    deallocation_function_ = deallocation_function;
    fourcc_ = fourcc;
    data_[0] = data0;
    stride_[0] = stride0;
    data_[1] = data1;
    stride_[1] = stride1;
    data_[2] = data2;
    stride_[2] = stride2;
    width_ = width;
    height_ = height;
    bit_depth_ = bit_depth;
  }

  void Clear() {
    if (deallocation_function_) {
      deallocation_function_();
      deallocation_function_ = nullptr;
    }
    fourcc_ = libyuv::FOURCC_ANY;
    data_[0] = nullptr;
    data_[1] = nullptr;
    data_[2] = nullptr;
    stride_[0] = 0;
    stride_[1] = 0;
    stride_[2] = 0;
    width_ = 0;
    height_ = 0;
    bit_depth_ = 0;
  }

  // Getters.
  libyuv::FourCC fourcc() const { return fourcc_; }
  const uint8_t* data(int index) const { return data_[index]; }
  int stride(int index) const { return stride_[index]; }
  int width() const { return width_; }
  int height() const { return height_; }
  int bit_depth() const { return bit_depth_; }
  ColorMatrixCoefficients matrix_coefficients() const {
    return matrix_coefficients_;
  }
  bool full_range() const { return full_range_; }

  // Setters.
  void set_fourcc(libyuv::FourCC fourcc) { fourcc_ = fourcc; }
  uint8_t* mutable_data(int index) { return data_[index]; }
  void set_stride(int index, int stride) { stride_[index] = stride; }
  void set_width(int width) { width_ = width; }
  void set_height(int height) { height_ = height; }
  void set_matrix_coefficients(ColorMatrixCoefficients coeffs) {
    matrix_coefficients_ = coeffs;
  }
  void set_full_range(bool full_range) { full_range_ = full_range; }

 private:
  static constexpr int kMaxNumPlanes = 3;

  std::function<void()> deallocation_function_;

  libyuv::FourCC fourcc_ = libyuv::FOURCC_ANY;
  uint8_t* data_[kMaxNumPlanes];
  int stride_[kMaxNumPlanes];
  int width_ = 0;
  int height_ = 0;
  int bit_depth_ = 0;
  ColorMatrixCoefficients matrix_coefficients_ =
      ColorMatrixCoefficients::COLOR_MATRIX_COEFFICIENTS_UNSPECIFIED;
  bool full_range_ = false;
};

}  // namespace mediapipe

#endif  // MEDIAPIPE_FRAMEWORK_FORMATS_YUV_IMAGE_H_