chromium/media/formats/mp4/avc_unittest.cc

// Copyright 2014 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "media/formats/mp4/avc.h"

#include <stddef.h>
#include <stdint.h>
#include <string.h>

#include <optional>
#include <ostream>

#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "media/base/decrypt_config.h"
#include "media/base/stream_parser_buffer.h"
#include "media/formats/mp4/bitstream_converter.h"
#include "media/formats/mp4/box_definitions.h"
#include "media/formats/mp4/nalu_test_helper.h"
#include "media/parsers/h264_parser.h"
#include "testing/gtest/include/gtest/gtest.h"

namespace media {
namespace mp4 {

static const uint8_t kNALU1[] = {0x01, 0x02, 0x03};
static const uint8_t kNALU2[] = {0x04, 0x05, 0x06, 0x07};
static const uint8_t kExpected[] = {0x00, 0x00, 0x00, 0x01, 0x01,
                                    0x02, 0x03, 0x00, 0x00, 0x00,
                                    0x01, 0x04, 0x05, 0x06, 0x07};

static const uint8_t kExpectedParamSets[] = {
    0x00, 0x00, 0x00, 0x01, 0x67, 0x12, 0x00, 0x00, 0x00, 0x01,
    0x67, 0x34, 0x00, 0x00, 0x00, 0x01, 0x68, 0x56, 0x78};

static std::string NALUTypeToString(int type) {
  switch (type) {
    case H264NALU::kNonIDRSlice:
      return "P";
    case H264NALU::kSliceDataA:
      return "SDA";
    case H264NALU::kSliceDataB:
      return "SDB";
    case H264NALU::kSliceDataC:
      return "SDC";
    case H264NALU::kIDRSlice:
      return "I";
    case H264NALU::kSEIMessage:
      return "SEI";
    case H264NALU::kSPS:
      return "SPS";
    case H264NALU::kSPSExt:
      return "SPSExt";
    case H264NALU::kPPS:
      return "PPS";
    case H264NALU::kAUD:
      return "AUD";
    case H264NALU::kEOSeq:
      return "EOSeq";
    case H264NALU::kEOStream:
      return "EOStr";
    case H264NALU::kFiller:
      return "FILL";
    case H264NALU::kPrefix:
      return "Prefix";
    case H264NALU::kSubsetSPS:
      return "SubsetSPS";
    case H264NALU::kDPS:
      return "DPS";

    case H264NALU::kUnspecified:
    case H264NALU::kReserved17:
    case H264NALU::kReserved18:
    case H264NALU::kCodedSliceAux:
    case H264NALU::kCodedSliceExtension:
      CHECK(false) << "Unexpected type: " << type;
      break;
  };

  return "UnsupportedType";
}

// Helper output operator, for debugging/testability.
std::ostream& operator<<(std::ostream& os,
                         const BitstreamConverter::AnalysisResult& r) {
  os << "{ is_conformant: "
     << (r.is_conformant.has_value()
             ? (r.is_conformant.value() ? "true" : "false")
             : "nullopt/unknown")
     << ", is_keyframe: "
     << (r.is_keyframe.has_value() ? (r.is_keyframe.value() ? "true" : "false")
                                   : "nullopt/unknown")
     << " }";
  return os;
}

static std::string AnnexBToString(
    const std::vector<uint8_t>& buffer,
    const std::vector<SubsampleEntry>& subsamples) {
  std::stringstream ss;

  H264Parser parser;
  parser.SetEncryptedStream(&buffer[0], buffer.size(), subsamples);

  H264NALU nalu;
  bool first = true;
  size_t current_subsample_index = 0;
  while (parser.AdvanceToNextNALU(&nalu) == H264Parser::kOk) {
    size_t subsample_index = AVC::FindSubsampleIndex(buffer, &subsamples,
                                                     nalu.data);
    if (!first) {
      ss << (subsample_index == current_subsample_index ? "," : " ");
    } else {
      DCHECK_EQ(subsample_index, current_subsample_index);
      first = false;
    }

    ss << NALUTypeToString(nalu.nal_unit_type);
    current_subsample_index = subsample_index;
  }
  return ss.str();
}

class AVCConversionTest : public testing::TestWithParam<int> {
 protected:
  void WriteLength(int length_size, int length, std::vector<uint8_t>* buf) {
    DCHECK_GE(length, 0);
    DCHECK_LE(length, 255);

    for (int i = 1; i < length_size; i++)
      buf->push_back(0);
    buf->push_back(length);
  }

  void MakeInputForLength(int length_size, std::vector<uint8_t>* buf) {
    buf->clear();

    WriteLength(length_size, sizeof(kNALU1), buf);
    buf->insert(buf->end(), kNALU1, kNALU1 + sizeof(kNALU1));

    WriteLength(length_size, sizeof(kNALU2), buf);
    buf->insert(buf->end(), kNALU2, kNALU2 + sizeof(kNALU2));
  }

};

TEST_P(AVCConversionTest, ParseCorrectly) {
  std::vector<uint8_t> buf;
  std::vector<SubsampleEntry> subsamples;
  MakeInputForLength(GetParam(), &buf);
  EXPECT_TRUE(AVC::ConvertFrameToAnnexB(GetParam(), &buf, &subsamples));

  BitstreamConverter::AnalysisResult expected;
  expected.is_conformant = true;
  expected.is_keyframe = false;
  EXPECT_PRED2(AnalysesMatch,
               AVC::AnalyzeAnnexB(buf.data(), buf.size(), subsamples),
               expected);

  EXPECT_EQ(buf.size(), sizeof(kExpected));
  EXPECT_EQ(0, memcmp(kExpected, &buf[0], sizeof(kExpected)));
  EXPECT_EQ("P,SDC", AnnexBToString(buf, subsamples));
}

// Intentionally write NALU sizes that are larger than the buffer.
TEST_P(AVCConversionTest, NALUSizeTooLarge) {
  std::vector<uint8_t> buf;
  WriteLength(GetParam(), 10 * sizeof(kNALU1), &buf);
  buf.insert(buf.end(), kNALU1, kNALU1 + sizeof(kNALU1));
  EXPECT_FALSE(AVC::ConvertFrameToAnnexB(GetParam(), &buf, nullptr));
}

TEST_P(AVCConversionTest, NALUSizeIsZero) {
  std::vector<uint8_t> buf;
  WriteLength(GetParam(), 0, &buf);

  WriteLength(GetParam(), sizeof(kNALU1), &buf);
  buf.insert(buf.end(), kNALU1, kNALU1 + sizeof(kNALU1));

  WriteLength(GetParam(), 0, &buf);

  WriteLength(GetParam(), sizeof(kNALU2), &buf);
  buf.insert(buf.end(), kNALU2, kNALU2 + sizeof(kNALU2));

  EXPECT_FALSE(AVC::ConvertFrameToAnnexB(GetParam(), &buf, nullptr));
}

TEST_P(AVCConversionTest, SubsampleSizesUpdatedAfterAnnexBConversion) {
  std::vector<uint8_t> buf;
  std::vector<SubsampleEntry> subsamples;
  SubsampleEntry subsample;

  // Write the first subsample, consisting of only one NALU
  WriteLength(GetParam(), sizeof(kNALU1), &buf);
  buf.insert(buf.end(), kNALU1, kNALU1 + sizeof(kNALU1));

  subsample.clear_bytes = GetParam() + sizeof(kNALU1);
  subsample.cypher_bytes = 0;
  subsamples.push_back(subsample);

  // Write the second subsample, containing two NALUs
  WriteLength(GetParam(), sizeof(kNALU1), &buf);
  buf.insert(buf.end(), kNALU1, kNALU1 + sizeof(kNALU1));
  WriteLength(GetParam(), sizeof(kNALU2), &buf);
  buf.insert(buf.end(), kNALU2, kNALU2 + sizeof(kNALU2));

  subsample.clear_bytes = 2*GetParam() + sizeof(kNALU1) + sizeof(kNALU2);
  subsample.cypher_bytes = 0;
  subsamples.push_back(subsample);

  // Write the third subsample, containing a single one-byte NALU
  WriteLength(GetParam(), 1, &buf);
  buf.push_back(0);
  subsample.clear_bytes = GetParam() + 1;
  subsample.cypher_bytes = 0;
  subsamples.push_back(subsample);

  EXPECT_TRUE(AVC::ConvertFrameToAnnexB(GetParam(), &buf, &subsamples));
  EXPECT_EQ(subsamples.size(), 3u);
  EXPECT_EQ(subsamples[0].clear_bytes, 4 + sizeof(kNALU1));
  EXPECT_EQ(subsamples[0].cypher_bytes, 0u);
  EXPECT_EQ(subsamples[1].clear_bytes, 8 + sizeof(kNALU1) + sizeof(kNALU2));
  EXPECT_EQ(subsamples[1].cypher_bytes, 0u);
  EXPECT_EQ(subsamples[2].clear_bytes, 4 + 1u);
  EXPECT_EQ(subsamples[2].cypher_bytes, 0u);
}

TEST_P(AVCConversionTest, ParsePartial) {
  std::vector<uint8_t> buf;
  MakeInputForLength(GetParam(), &buf);
  buf.pop_back();
  EXPECT_FALSE(AVC::ConvertFrameToAnnexB(GetParam(), &buf, nullptr));
  // This tests a buffer ending in the middle of a NAL length. For length size
  // of one, this can't happen, so we skip that case.
  if (GetParam() != 1) {
    MakeInputForLength(GetParam(), &buf);
    buf.erase(buf.end() - (sizeof(kNALU2) + 1), buf.end());
    EXPECT_FALSE(AVC::ConvertFrameToAnnexB(GetParam(), &buf, nullptr));
  }
}

TEST_P(AVCConversionTest, ParseEmpty) {
  std::vector<uint8_t> buf;
  EXPECT_TRUE(AVC::ConvertFrameToAnnexB(GetParam(), &buf, nullptr));
  EXPECT_EQ(0u, buf.size());
}

INSTANTIATE_TEST_SUITE_P(AVCConversionTestValues,
                         AVCConversionTest,
                         ::testing::Values(1, 2, 4));

TEST_F(AVCConversionTest, ConvertConfigToAnnexB) {
  AVCDecoderConfigurationRecord avc_config;
  avc_config.sps_list.resize(2);
  avc_config.sps_list[0].push_back(0x67);
  avc_config.sps_list[0].push_back(0x12);
  avc_config.sps_list[1].push_back(0x67);
  avc_config.sps_list[1].push_back(0x34);
  avc_config.pps_list.resize(1);
  avc_config.pps_list[0].push_back(0x68);
  avc_config.pps_list[0].push_back(0x56);
  avc_config.pps_list[0].push_back(0x78);

  std::vector<uint8_t> buf;
  std::vector<SubsampleEntry> subsamples;
  EXPECT_TRUE(AVC::ConvertConfigToAnnexB(avc_config, &buf));
  EXPECT_EQ(0, memcmp(kExpectedParamSets, &buf[0],
                      sizeof(kExpectedParamSets)));
  EXPECT_EQ("SPS,SPS,PPS", AnnexBToString(buf, subsamples));
}

// Verify that we can round trip string -> Annex B -> string.
TEST_F(AVCConversionTest, StringConversionFunctions) {
  std::string str =
      "AUD SPS SPSExt SPS PPS SEI SEI Prefix I P FILL EOSeq EOStr";
  std::vector<uint8_t> buf;
  std::vector<SubsampleEntry> subsamples;
  AvcStringToAnnexB(str, &buf, &subsamples);

  BitstreamConverter::AnalysisResult expected;
  expected.is_conformant = true;
  expected.is_keyframe = true;
  EXPECT_PRED2(AnalysesMatch,
               AVC::AnalyzeAnnexB(buf.data(), buf.size(), subsamples),
               expected);

  EXPECT_EQ(str, AnnexBToString(buf, subsamples));
}

TEST_F(AVCConversionTest, ValidAnnexBConstructs) {
  struct {
    const char* case_string;
    const bool is_keyframe;
  } test_cases[] = {
      {"I", true},
      {"I I I I", true},
      {"AUD I", true},
      {"AUD SPS PPS I", true},
      {"I EOSeq", true},
      {"I EOSeq EOStr", true},
      {"I EOStr", true},
      {"P", false},
      {"P P P P", false},
      {"AUD SPS PPS P", false},
      {"SEI SEI I", true},
      {"SEI SEI Prefix I", true},
      {"SPS SPSExt SPS PPS I P", true},
      {"Prefix SEI I", true},
      {"AUD,I", true},
      {"AUD,SEI I", true},
      {"AUD,SEI,SPS,PPS,I", true},

      // In reality, these might not always be conformant/valid, but assuming
      // they are, they're not keyframes because a non-IDR slice preceded the
      // IDR slice, if any.
      {"SDA SDB SDC", false},
      {"P I", false},
      {"SDA I", false},
      {"SDB I", false},
      {"SDC I", false},
  };

  for (size_t i = 0; i < std::size(test_cases); ++i) {
    std::vector<uint8_t> buf;
    std::vector<SubsampleEntry> subsamples;
    AvcStringToAnnexB(test_cases[i].case_string, &buf, NULL);

    BitstreamConverter::AnalysisResult expected;
    expected.is_conformant = true;
    expected.is_keyframe = test_cases[i].is_keyframe;
    EXPECT_PRED2(AnalysesMatch,
                 AVC::AnalyzeAnnexB(buf.data(), buf.size(), subsamples),
                 expected)
        << "'" << test_cases[i].case_string << "' failed";
  }
}

TEST_F(AVCConversionTest, InvalidAnnexBConstructs) {
  struct {
    const char* case_string;
    const std::optional<bool> is_keyframe;
  } test_cases[] = {
      // For these cases, lack of conformance is determined before detecting any
      // IDR or non-IDR slices, so the non-conformant frames' keyframe analysis
      // reports std::nullopt (which means undetermined analysis result).
      {"AUD", std::nullopt},            // No VCL present.
      {"AUD,SEI", std::nullopt},        // No VCL present.
      {"SPS PPS", std::nullopt},        // No VCL present.
      {"SPS PPS AUD I", std::nullopt},  // Parameter sets must come after AUD.
      {"SPSExt SPS P", std::nullopt},   // SPS must come before SPSExt.
      {"SPS PPS SPSExt P", std::nullopt},  // SPSExt must follow an SPS.
      {"EOSeq", std::nullopt},             // EOSeq must come after a VCL.
      {"EOStr", std::nullopt},             // EOStr must come after a VCL.

      // For these cases, IDR slice is first VCL and is detected before
      // conformance failure, so the non-conformant frame is reported as a
      // keyframe.
      {"I EOStr EOSeq", true},  // EOSeq must come before EOStr.
      {"I Prefix", true},       // Reserved14-18 must come before first VCL.
      {"I SEI", true},          // SEI must come before first VCL.

      // For this case, P slice is first VCL and is detected before conformance
      // failure, so the non-conformant frame is reported as a non-keyframe.
      {"P SPS P",
       false},  // SPS after first VCL would indicate a new access unit.
  };

  BitstreamConverter::AnalysisResult expected;
  expected.is_conformant = false;

  for (size_t i = 0; i < std::size(test_cases); ++i) {
    std::vector<uint8_t> buf;
    std::vector<SubsampleEntry> subsamples;
    AvcStringToAnnexB(test_cases[i].case_string, &buf, NULL);
    expected.is_keyframe = test_cases[i].is_keyframe;
    EXPECT_PRED2(AnalysesMatch,
                 AVC::AnalyzeAnnexB(buf.data(), buf.size(), subsamples),
                 expected)
        << "'" << test_cases[i].case_string << "' failed";
  }
}

typedef struct {
  const char* input;
  const char* expected;
} InsertTestCases;

TEST_F(AVCConversionTest, InsertParamSetsAnnexB) {
  static const InsertTestCases test_cases[] = {
    { "I", "SPS,SPS,PPS,I" },
    { "AUD I", "AUD SPS,SPS,PPS,I" },

    // Cases where param sets in |avc_config| are placed before
    // the existing ones.
    { "SPS,PPS,I", "SPS,SPS,PPS,SPS,PPS,I" },
    { "AUD,SPS,PPS,I", "AUD,SPS,SPS,PPS,SPS,PPS,I" },  // Note: params placed
                                                       // after AUD.

    // One or more NALUs might follow AUD in the first subsample, we need to
    // handle this correctly. Params should be inserted right after AUD.
    { "AUD,SEI I", "AUD,SPS,SPS,PPS,SEI I" },
  };

  AVCDecoderConfigurationRecord avc_config;
  avc_config.sps_list.resize(2);
  avc_config.sps_list[0].push_back(0x67);
  avc_config.sps_list[0].push_back(0x12);
  avc_config.sps_list[1].push_back(0x67);
  avc_config.sps_list[1].push_back(0x34);
  avc_config.pps_list.resize(1);
  avc_config.pps_list[0].push_back(0x68);
  avc_config.pps_list[0].push_back(0x56);
  avc_config.pps_list[0].push_back(0x78);

  BitstreamConverter::AnalysisResult expected;
  expected.is_conformant = true;
  expected.is_keyframe = true;

  for (size_t i = 0; i < std::size(test_cases); ++i) {
    std::vector<uint8_t> buf;
    std::vector<SubsampleEntry> subsamples;

    AvcStringToAnnexB(test_cases[i].input, &buf, &subsamples);

    EXPECT_TRUE(AVC::InsertParamSetsAnnexB(avc_config, &buf, &subsamples))
        << "'" << test_cases[i].input << "' insert failed.";
    EXPECT_PRED2(AnalysesMatch,
                 AVC::AnalyzeAnnexB(buf.data(), buf.size(), subsamples),
                 expected)
        << "'" << test_cases[i].input << "' created invalid AnnexB.";
    EXPECT_EQ(test_cases[i].expected, AnnexBToString(buf, subsamples))
        << "'" << test_cases[i].input << "' generated unexpected output.";
  }
}

}  // namespace mp4
}  // namespace media