chromium/media/gpu/android/ndk_audio_encoder.cc

// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "media/gpu/android/ndk_audio_encoder.h"

#include <aaudio/AAudio.h>
#include <media/NdkMediaCodec.h>
#include <media/NdkMediaError.h>
#include <media/NdkMediaFormat.h>

#include <memory>
#include <optional>

#include "base/containers/heap_array.h"
#include "base/containers/span.h"
#include "base/logging.h"
#include "base/numerics/safe_conversions.h"
#include "base/sequence_checker.h"
#include "base/strings/stringprintf.h"
#include "base/time/time.h"
#include "media/base/android/media_codec_util.h"
#include "media/base/audio_bus.h"
#include "media/base/audio_encoder.h"
#include "media/base/audio_sample_types.h"
#include "media/base/audio_timestamp_helper.h"
#include "media/base/converting_audio_fifo.h"
#include "media/base/encoder_status.h"
#include "media/base/media_util.h"
#include "media/base/sample_format.h"
#include "media/base/timestamp_constants.h"
#include "media/gpu/android/ndk_media_codec_wrapper.h"

#pragma clang attribute push DEFAULT_REQUIRES_ANDROID_API( \
    NDK_MEDIA_CODEC_MIN_API)

namespace media {

struct AMediaFormatDeleter {
  inline void operator()(AMediaFormat* ptr) const {
    if (ptr) {
      AMediaFormat_delete(ptr);
    }
  }
};

// AAC uses a frame size of 1024 samples.
constexpr int kAacFramesPerBuffer = 1024;

// Chosen since this offers high quality audio, while still saving some space.
// Apps might set a lower value for voice-only RTC applications, and a higher
// value for encoding music.
constexpr int kDefaultAacBitrate = 192000;

using MediaFormatPtr = std::unique_ptr<AMediaFormat, AMediaFormatDeleter>;

AudioEncoder::AacOutputFormat GetOutputFormat(
    const AudioEncoder::Options options) {
  return options.aac.value_or(AudioEncoder::AacOptions()).format;
}

MediaFormatPtr CreateAudioParams(const AudioEncoder::Options& options,
                                 std::string_view mime_type) {
  MediaFormatPtr result(AMediaFormat_new());

  AMediaFormat_setString(result.get(), AMEDIAFORMAT_KEY_MIME, mime_type.data());
  AMediaFormat_setInt32(result.get(), AMEDIAFORMAT_KEY_CHANNEL_COUNT,
                        options.channels);
  AMediaFormat_setInt32(result.get(), AMEDIAFORMAT_KEY_SAMPLE_RATE,
                        options.sample_rate);

  // AMediaCodec uses signed 16 bits input by default.
  const int input_size =
      sizeof(int16_t) * kAacFramesPerBuffer * options.channels;

  AMediaFormat_setInt32(result.get(), AMEDIAFORMAT_KEY_MAX_INPUT_SIZE,
                        input_size);

  // TODO(crbug.com/40259205) Consider adding HE-AAC profile support.

  if (options.bitrate_mode) {
    constexpr int32_t BITRATE_MODE_VBR = 1;
    constexpr int32_t BITRATE_MODE_CBR = 2;
    switch (*options.bitrate_mode) {
      case media::AudioEncoder::BitrateMode::kConstant:
        AMediaFormat_setInt32(result.get(), AMEDIAFORMAT_KEY_BITRATE_MODE,
                              BITRATE_MODE_CBR);
        break;
      case media::AudioEncoder::BitrateMode::kVariable:
        AMediaFormat_setInt32(result.get(), AMEDIAFORMAT_KEY_BITRATE_MODE,
                              BITRATE_MODE_VBR);
        break;
    }
  }

  AMediaFormat_setInt32(result.get(), AMEDIAFORMAT_KEY_BIT_RATE,
                        options.bitrate.value_or(kDefaultAacBitrate));

  auto format = GetOutputFormat(options);
  AMediaFormat_setInt32(result.get(), AMEDIAFORMAT_KEY_IS_ADTS,
                        format == AudioEncoder::AacOutputFormat::ADTS ? 1 : 0);

  return result;
}

NdkAudioEncoder::NdkAudioEncoder(
    scoped_refptr<base::SequencedTaskRunner> runner)
    : task_runner_(std::move(runner)) {}

NdkAudioEncoder::~NdkAudioEncoder() {
  ClearMediaCodec();
}

bool NdkAudioEncoder::CreateAndStartMediaCodec() {
  auto mime_type =
      MediaCodecUtil::CodecToAndroidMimeType(options_.codec, kSampleFormatS16);

  media_codec_ =
      NdkMediaCodecWrapper::CreateByMimeType(mime_type, this, task_runner_);

  if (!media_codec_) {
    LogError({EncoderStatus::Codes::kEncoderInitializationError,
              "Could not create AMediaCodec"});
    return false;
  }

  auto aac_format = CreateAudioParams(options_, mime_type);

  media_status_t status =
      AMediaCodec_configure(media_codec_->codec(), aac_format.get(), nullptr,
                            nullptr, AMEDIACODEC_CONFIGURE_FLAG_ENCODE);

  if (status != AMEDIA_OK) {
    LogError({EncoderStatus::Codes::kEncoderInitializationError,
              base::StringPrintf("Could not create AMediaCodec. Status: %d",
                                 status)});
    return false;
  }

  status = media_codec_->Start();

  if (status != AMEDIA_OK) {
    LogError({EncoderStatus::Codes::kEncoderInitializationError,
              base::StringPrintf("Could not start AMediaCodec. Status: %d",
                                 status)});
    return false;
  }

  return true;
}

void NdkAudioEncoder::Initialize(const Options& options,
                                 OutputCB output_callback,
                                 EncoderStatusCB done_cb) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);

  done_cb = BindCallbackToCurrentLoopIfNeeded(std::move(done_cb));

  // Check for `fifo_` instead of `media_codec_`, as `media_codec_` is reset
  // during a flush.
  if (fifo_) {
    LogAndReportError({EncoderStatus::Codes::kEncoderInitializeTwice,
                       "Encoder initialized twice"},
                      std::move(done_cb));
    return;
  }

  if (options.codec != AudioCodec::kAAC) {
    LogAndReportError({EncoderStatus::Codes::kEncoderInitializationError,
                       "NdkAudioEncoder only supports AAC"},
                      std::move(done_cb));
    return;
  }

  options_ = options;

  if (!CreateAndStartMediaCodec()) {
    ReportPendingError(std::move(done_cb));
    return;
  }

  output_cb_ = BindCallbackToCurrentLoopIfNeeded(std::move(output_callback));

  output_params_.Reset(
      AudioParameters::Format::AUDIO_PCM_LINEAR,
      ChannelLayoutConfig(GuessChannelLayout(options_.channels),
                          options_.channels),
      options_.sample_rate, kAacFramesPerBuffer);

  // `fifo_` will upmix/downmix and repacketize inputs to make sure there are
  // the correct number of channels and samples per buffer, without resampling.
  fifo_ = std::make_unique<ConvertingAudioFifo>(output_params_, output_params_);

  input_timestamp_tracker_ =
      std::make_unique<AudioTimestampHelper>(options_.sample_rate);

  output_timestamp_tracker_ =
      std::make_unique<AudioTimestampHelper>(options_.sample_rate);

  ReportOk(std::move(done_cb));
}

void NdkAudioEncoder::Encode(std::unique_ptr<AudioBus> audio_bus,
                             base::TimeTicks capture_time,
                             EncoderStatusCB done_cb) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);

  done_cb = BindCallbackToCurrentLoopIfNeeded(std::move(done_cb));

  if (error_occurred_) {
    ReportPendingError(std::move(done_cb));
    return;
  }

  if (flush_state_ == FlushState::kNeedsMediaCodec) {
    CHECK(!media_codec_);

    if (!CreateAndStartMediaCodec()) {
      ReportPendingError(std::move(done_cb));
      return;
    }

    flush_state_ = FlushState::kNone;
  }

  if (!media_codec_) {
    LogAndReportError(EncoderStatus::Codes::kEncoderInitializeNeverCompleted,
                      std::move(done_cb));
    return;
  }

  if (flush_state_ != FlushState::kNone) {
    CHECK(pending_flush_cb_);
    LogAndReportError({EncoderStatus::Codes::kEncoderFailedFlush,
                       "Received Encode() before Flush() completed."},
                      std::move(pending_flush_cb_));

    ReportPendingError(std::move(done_cb));
    return;
  }

  if (!input_timestamp_tracker_->base_timestamp()) {
    input_timestamp_tracker_->SetBaseTimestamp(capture_time -
                                               base::TimeTicks());
    output_timestamp_tracker_->SetBaseTimestamp(capture_time -
                                                base::TimeTicks());
  }

  fifo_->Push(std::move(audio_bus));

  FeedAllInputs();

  if (error_occurred_) {
    ReportPendingError(std::move(done_cb));
  } else {
    ReportOk(std::move(done_cb));
  }
}

void NdkAudioEncoder::Flush(EncoderStatusCB done_cb) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);

  done_cb = BindCallbackToCurrentLoopIfNeeded(std::move(done_cb));

  if (error_occurred_) {
    ReportPendingError(std::move(done_cb));
    return;
  }

  // We should have been initialized already.
  if (!fifo_) {
    LogAndReportError({EncoderStatus::Codes::kEncoderInitializeNeverCompleted,
                       "Cannot flush uninitialized encoder."},
                      std::move(done_cb));
    return;
  }

  if (flush_state_ != FlushState::kNone) {
    LogAndReportError({EncoderStatus::Codes::kEncoderIllegalState,
                       "Cannot start new Flush() before first one completes."},
                      std::move(done_cb));
    return;
  }

  // Nothing to flush if we never fed input to the encoder.
  if (!input_timestamp_tracker_->base_timestamp()) {
    ReportOk(std::move(done_cb));
    return;
  }

  CHECK(!pending_flush_cb_);
  pending_flush_cb_ = std::move(done_cb);

  flush_state_ = FlushState::kFlushingInputs;

  fifo_->Flush();

  FeedAllInputs();
}

void NdkAudioEncoder::FeedAllInputs() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  while (InputReady()) {
    FeedInput(fifo_->PeekOutput());
    fifo_->PopOutput();
  }

  if (error_occurred_ && pending_flush_cb_) {
    flush_state_ = FlushState::kNone;
    ReportPendingError(std::move(pending_flush_cb_));
    return;
  }

  // When we have fed all inputs, send an EOS to `media_codec_`.
  if (flush_state_ == FlushState::kFlushingInputs) {
    MaybeFeedEos();
  }
}

void NdkAudioEncoder::MaybeFeedEos() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  CHECK_EQ(flush_state_, FlushState::kFlushingInputs);
  CHECK(!error_occurred_);
  CHECK(pending_flush_cb_);

  // Don't send EOS until all inputs have been fed.
  if (fifo_->HasOutput()) {
    return;
  }

  // We don't have a buffer to send an EOS yet.
  if (!media_codec_->HasInput()) {
    return;
  }

  FeedEos();
}

void NdkAudioEncoder::FeedEos() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  CHECK_EQ(flush_state_, FlushState::kFlushingInputs);

  size_t capacity = 0;
  const size_t buffer_idx = media_codec_->TakeInput();

  uint8_t* buffer_ptr =
      AMediaCodec_getInputBuffer(media_codec_->codec(), buffer_idx, &capacity);

  if (!buffer_ptr) {
    LogAndReportError({EncoderStatus::Codes::kEncoderFailedFlush,
                       "Unable to get input buffer during flush"},
                      std::move(pending_flush_cb_));
    return;
  }

  const auto timestamp_us =
      input_timestamp_tracker_->GetTimestamp().InMicroseconds();

  media_status_t status = AMediaCodec_queueInputBuffer(
      media_codec_->codec(), buffer_idx, /*offset=*/0, /*size=*/0, timestamp_us,
      AMEDIACODEC_BUFFER_FLAG_END_OF_STREAM);

  flush_state_ = FlushState::kPendingEOS;

  if (status != AMEDIA_OK) {
    LogAndReportError(
        {EncoderStatus::Codes::kEncoderFailedFlush,
         base::StringPrintf("Error queueing EOS input buffer: status=%d",
                            status)},
        std::move(pending_flush_cb_));
  }
}

bool NdkAudioEncoder::InputReady() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  return !error_occurred_ && media_codec_->HasInput() && fifo_->HasOutput();
}

void NdkAudioEncoder::FeedInput(const AudioBus* audio_bus) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  CHECK(InputReady());
  CHECK(!error_occurred_);

  const size_t buffer_idx = media_codec_->TakeInput();

  size_t capacity = 0;
  uint8_t* buffer_ptr =
      AMediaCodec_getInputBuffer(media_codec_->codec(), buffer_idx, &capacity);

  if (!buffer_ptr) {
    LogError({EncoderStatus::Codes::kEncoderFailedEncode,
              "Unable to get input buffer"});
    return;
  }

  const size_t bytes_per_frame =
      audio_bus->channels() * SampleFormatToBytesPerChannel(kSampleFormatS16);
  const size_t total_bytes = bytes_per_frame * audio_bus->frames();

  if (capacity < total_bytes) {
    LogError({EncoderStatus::Codes::kEncoderFailedEncode,
              base::StringPrintf(
                  "Input capacity too small: needed=%zu, capacity=%zu",
                  total_bytes, capacity)});
    return;
  }

  // MediaCodec uses signed 16bit PCM encoding by default.
  // Configuring the encoder to use float PCM did not work in tests.
  audio_bus->ToInterleaved<SignedInt16SampleTypeTraits>(
      audio_bus->frames(), reinterpret_cast<int16_t*>(buffer_ptr));

  CHECK_EQ(audio_bus->frames(), kAacFramesPerBuffer);
  const auto timestamp_us =
      input_timestamp_tracker_->GetTimestamp().InMicroseconds();
  input_timestamp_tracker_->AddFrames(audio_bus->frames());

  media_status_t status =
      AMediaCodec_queueInputBuffer(media_codec_->codec(), buffer_idx,
                                   /*offset=*/0, total_bytes, timestamp_us,
                                   /*flags=*/0);

  if (status != AMEDIA_OK) {
    LogError(
        {EncoderStatus::Codes::kEncoderFailedEncode,
         base::StringPrintf("Error queueing input buffer: status=%d", status)});
  }
}

void NdkAudioEncoder::CompleteFlush() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  CHECK_EQ(flush_state_, FlushState::kPendingEOS);

  input_timestamp_tracker_->Reset();
  output_timestamp_tracker_->Reset();

  ClearMediaCodec();
  flush_state_ = FlushState::kNeedsMediaCodec;

  ReportOk(std::move(pending_flush_cb_));
}

void NdkAudioEncoder::ClearMediaCodec() {
  if (!media_codec_) {
    return;
  }

  media_codec_->Stop();
  media_codec_.reset();
}

bool NdkAudioEncoder::DrainConfig() {
  CHECK(media_codec_->HasOutput());

  NdkMediaCodecWrapper::OutputInfo output_buffer = media_codec_->PeekOutput();
  AMediaCodecBufferInfo& mc_buffer_info = output_buffer.info;

  // Check whether the first buffer in the queue contains config data.
  if ((mc_buffer_info.flags & AMEDIACODEC_BUFFER_FLAG_CODEC_CONFIG) == 0) {
    return false;
  }

  // We already have the info we need from `output_buffer`
  std::ignore = media_codec_->TakeOutput();

  size_t capacity = 0;
  uint8_t* buf_data = AMediaCodec_getOutputBuffer(
      media_codec_->codec(), output_buffer.buffer_index, &capacity);

  if (!buf_data) {
    LogError({EncoderStatus::Codes::kEncoderFailedEncode,
              "Can't obtain config output buffer from media codec"});
    return false;
  }

  const size_t mc_buffer_size = base::checked_cast<size_t>(mc_buffer_info.size);

  if (mc_buffer_info.offset + mc_buffer_size > capacity) {
    LogError(
        {EncoderStatus::Codes::kEncoderFailedEncode,
         base::StringPrintf("Invalid config output buffer layout."
                            "offset: %d size: %zu capacity: %zu",
                            mc_buffer_info.offset, mc_buffer_size, capacity)});
    return false;
  }

  const uint8_t* data_start = buf_data + mc_buffer_info.offset;

  if (GetOutputFormat(options_) == AudioEncoder::AacOutputFormat::ADTS) {
    NullMediaLog null_log;
    if (!aac_config_parser_.Parse(base::make_span(data_start, mc_buffer_size),
                                  &null_log)) {
      LogError({EncoderStatus::Codes::kInvalidOutputBuffer,
                "Could not parse output config"});
      return false;
    }
  } else {
    // Output format is AudioEncoder::AacOutputFormat::AAC
    codec_desc_.assign(data_start, data_start + mc_buffer_size);
  }

  AMediaCodec_releaseOutputBuffer(media_codec_->codec(),
                                  output_buffer.buffer_index, false);

  return true;
}

void NdkAudioEncoder::DrainOutput() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  if (error_occurred_) {
    return;
  }

  if (!media_codec_->HasOutput()) {
    return;
  }

  if (DrainConfig()) {
    return;
  }

  NdkMediaCodecWrapper::OutputInfo output_buffer = media_codec_->TakeOutput();
  AMediaCodecBufferInfo& mc_buffer_info = output_buffer.info;

  // The current output buffer should be data, and not a config.
  CHECK_EQ(mc_buffer_info.flags & AMEDIACODEC_BUFFER_FLAG_CODEC_CONFIG, 0u);

  if ((mc_buffer_info.flags & AMEDIACODEC_BUFFER_FLAG_END_OF_STREAM) != 0) {
    CompleteFlush();
    return;
  }

  size_t capacity = 0;
  uint8_t* buf_data = AMediaCodec_getOutputBuffer(
      media_codec_->codec(), output_buffer.buffer_index, &capacity);

  if (!buf_data) {
    LogError({EncoderStatus::Codes::kEncoderFailedEncode,
              "Unable to get output buffer"});
    return;
  }

  const size_t mc_buffer_size = base::checked_cast<size_t>(mc_buffer_info.size);
  const int32_t mc_buffer_offset = mc_buffer_info.offset;
  if (mc_buffer_size + mc_buffer_offset > capacity) {
    LogError(
        {EncoderStatus::Codes::kEncoderFailedEncode,
         base::StringPrintf("Invalid output buffer layout."
                            "offset: %d size: %zu capacity: %zu",
                            mc_buffer_info.offset, mc_buffer_size, capacity)});
    return;
  }

  auto output_format = GetOutputFormat(options_);


  auto mc_data = base::make_span(buf_data + mc_buffer_offset, mc_buffer_size);
  base::HeapArray<uint8_t> output_data;

  if (output_format == AudioEncoder::AacOutputFormat::ADTS) {
    int adts_header_size = 0;
    output_data =
        aac_config_parser_.CreateAdtsFromEsds(mc_data, &adts_header_size);
    if (output_data.empty()) {
      AMediaCodec_releaseOutputBuffer(media_codec_->codec(),
                                      output_buffer.buffer_index, false);
      LogError({EncoderStatus::Codes::kFormatConversionError,
                "Unable to convert to ADTS"});
      return;
    }

  } else {
    output_data = base::HeapArray<uint8_t>::CopiedFrom(mc_data);
  }

  AMediaCodec_releaseOutputBuffer(media_codec_->codec(),
                                  output_buffer.buffer_index, false);

  const auto timestamp =
      output_timestamp_tracker_->GetTimestamp() + base::TimeTicks();
  output_timestamp_tracker_->AddFrames(kAacFramesPerBuffer);

  std::optional<CodecDescription> desc;
  if (!codec_desc_.empty()) {
    desc = codec_desc_;
    codec_desc_.clear();
  }

  output_cb_.Run(
      EncodedAudioBuffer(
          output_params_, std::move(output_data), timestamp,
          output_timestamp_tracker_->GetFrameDuration(kAacFramesPerBuffer)),
      desc);
}

void NdkAudioEncoder::OnInputAvailable() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  FeedAllInputs();
}

void NdkAudioEncoder::OnOutputAvailable() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  DrainOutput();
}

void NdkAudioEncoder::OnError(media_status_t error) {
  LogError({EncoderStatus::Codes::kEncoderFailedEncode,
            base::StringPrintf("MediaCodec async error:%d", error)});
}

void NdkAudioEncoder::LogAndReportError(EncoderStatus status,
                                        EncoderStatusCB done_cb) {
  LogError(status);
  ReportPendingError(std::move(done_cb));
}

void NdkAudioEncoder::LogError(EncoderStatus status) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  CHECK(!status.is_ok());
  LOG(ERROR) << "ReportError(): code=" << static_cast<int>(status.code())
             << ", message=" << status.message();
  if (!error_occurred_) {
    error_occurred_ = true;
    pending_error_status_ = status;
  }
}

void NdkAudioEncoder::ReportPendingError(EncoderStatusCB done_cb) {
  CHECK(error_occurred_);

  // Already reported error.
  if (!pending_error_status_) {
    std::move(done_cb).Run({EncoderStatus::Codes::kEncoderIllegalState,
                            "Encoder already reported error"});
    return;
  }

  std::move(done_cb).Run(*pending_error_status_);
  pending_error_status_ = std::nullopt;
}

void NdkAudioEncoder::ReportOk(EncoderStatusCB done_cb) {
  CHECK(!error_occurred_);
  CHECK(!pending_error_status_);

  std::move(done_cb).Run(EncoderStatus::Codes::kOk);
}

}  // namespace media
#pragma clang attribute pop