chromium/media/filters/mac/audio_toolbox_audio_decoder.cc

// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif

#include "media/filters/mac/audio_toolbox_audio_decoder.h"

#include <optional>

#include "base/apple/osstatus_logging.h"
#include "base/auto_reset.h"
#include "base/functional/bind.h"
#include "base/logging.h"
#include "base/memory/raw_ptr.h"
#include "base/ranges/algorithm.h"
#include "base/task/bind_post_task.h"
#include "media/base/audio_buffer.h"
#include "media/base/audio_codecs.h"
#include "media/base/audio_discard_helper.h"
#include "media/base/channel_layout.h"
#include "media/base/limits.h"
#include "media/base/mac/channel_layout_util_mac.h"
#include "media/base/media_log.h"
#include "media/base/status.h"
#include "media/base/timestamp_constants.h"
#include "media/formats/mp4/es_descriptor.h"
#include "media/media_buildflags.h"

namespace media {

namespace {

bool CanUseAudioToolbox(const AudioDecoderConfig& config) {
#if BUILDFLAG(ENABLE_PLATFORM_AC3_EAC3_AUDIO)
  // AC3 is available on macOS 10.2 or later, and E-AC3 is available on
  // macOS 10.11 or later, so this is always true.
  if (config.codec() == AudioCodec::kEAC3 ||
      config.codec() == AudioCodec::kAC3) {
    return true;
  }
#endif
  // We use AudioToolbox for decoding xHE-AAC content and that's available on
  // macOS 10.15 or higher.
  return config.profile() == AudioCodecProfile::kXHE_AAC;
}

struct InputData {
  raw_ptr<DecoderBuffer> buffer = nullptr;
  AudioStreamPacketDescription packet = {};
};

// Special error code we use to differentiate real errors from end of buffer.
constexpr OSStatus kNoMoreDataError = -12345;

// Callback used to provide input data to the AudioConverter.
OSStatus ProvideInputCallback(AudioConverterRef decoder,
                              UInt32* num_packets,
                              AudioBufferList* buffer_list,
                              AudioStreamPacketDescription** packets,
                              void* user_data) {
  auto* input_data = reinterpret_cast<InputData*>(user_data);
  if (!input_data->buffer || input_data->buffer->end_of_stream()) {
    *num_packets = 0;
    return kNoMoreDataError;
  }

  *num_packets = buffer_list->mNumberBuffers = 1;
  buffer_list->mBuffers[0].mNumberChannels = 0;
  buffer_list->mBuffers[0].mDataByteSize = input_data->buffer->size();

  // No const version of this API unfortunately, so we need const_cast().
  buffer_list->mBuffers[0].mData =
      const_cast<uint8_t*>(input_data->buffer->data());

  if (packets)
    *packets = &input_data->packet;

  // This ensures that if this callback is called again, we'll exit via the
  // kNoMoreDataError path above.
  input_data->buffer = nullptr;
  return noErr;
}

}  // namespace

// static
AudioConverterRef
AudioToolboxAudioDecoder::ScopedAudioConverterRefTraits::Retain(
    AudioConverterRef converter) {
  NOTREACHED_IN_MIGRATION() << "Only compatible with ASSUME policy";
  return converter;
}

// static
void AudioToolboxAudioDecoder::ScopedAudioConverterRefTraits::Release(
    AudioConverterRef converter) {
  const auto result = AudioConverterDispose(converter);
  OSSTATUS_DLOG_IF(WARNING, result != noErr, result)
      << "AudioConverterDispose() failed";
}

AudioToolboxAudioDecoder::AudioToolboxAudioDecoder(
    std::unique_ptr<MediaLog> media_log)
    : media_log_(std::move(media_log)) {}

AudioToolboxAudioDecoder::~AudioToolboxAudioDecoder() = default;

AudioDecoderType AudioToolboxAudioDecoder::GetDecoderType() const {
  return AudioDecoderType::kAudioToolbox;
}

void AudioToolboxAudioDecoder::Initialize(const AudioDecoderConfig& config,
                                          CdmContext* cdm_context,
                                          InitCB init_cb,
                                          const OutputCB& output_cb,
                                          const WaitingCB& waiting_cb) {
  if (!CanUseAudioToolbox(config)) {
    DLOG(WARNING)
        << "Only xHE-AAC/AC3/E-AC3 decoding is supported by this decoder.";
    std::move(init_cb).Run(DecoderStatus::Codes::kUnsupportedCodec);
    return;
  }

  if (config.is_encrypted()) {
    DLOG(WARNING) << "Encrypted decoding not supported by platform decoder.";
    std::move(init_cb).Run(DecoderStatus::Codes::kUnsupportedEncryptionMode);
    return;
  }

  // This decoder supports re-initialization.
  decoder_.reset();

  output_cb_ = output_cb;
  base::BindPostTaskToCurrentDefault(std::move(init_cb))
      .Run(CreateDecoder(config)
               ? DecoderStatus::Codes::kOk
               : DecoderStatus::Codes::kFailedToCreateDecoder);
}

void AudioToolboxAudioDecoder::Decode(scoped_refptr<DecoderBuffer> buffer,
                                      DecodeCB decode_cb) {
  DecodeCB decode_cb_bound =
      base::BindPostTaskToCurrentDefault(std::move(decode_cb));

  // Make sure we are notified if https://crbug.com/49709 returns. Issue also
  // occurs with some damaged files.
  if (!buffer->end_of_stream() && buffer->timestamp() == kNoTimestamp) {
    DLOG(ERROR) << "Received a buffer without timestamps!";
    std::move(decode_cb_bound).Run(DecoderStatus::Codes::kMissingTimestamp);
    return;
  }

  if (!buffer->end_of_stream() && buffer->is_encrypted()) {
    DLOG(ERROR) << "Encrypted buffer not supported";
    std::move(decode_cb_bound)
        .Run(DecoderStatus::Codes::kUnsupportedEncryptionMode);
    return;
  }

  InputData input_data;
  input_data.buffer = buffer.get();
  if (!buffer->end_of_stream())
    input_data.packet.mDataByteSize = buffer->size();

  // Must be filled in each time in case AudioConverterFillComplexBuffer()
  // modified it during a previous call.
  output_buffer_list_->mNumberBuffers = output_bus_->channels();
  for (int i = 0; i < output_bus_->channels(); ++i) {
    output_buffer_list_->mBuffers[i].mNumberChannels = 1;
    output_buffer_list_->mBuffers[i].mDataByteSize =
        output_bus_->frames() * sizeof(float);
    output_buffer_list_->mBuffers[i].mData = output_bus_->channel(i);
  }

  // Decodes |num_frames| of encoded data into |output_bus_| by calling the
  // ProvideInputCallback to fill an AudioBufferList that points into
  // |input_data|. See media::AudioConverter for a similar mechanism.
  UInt32 num_frames = output_bus_->frames();
  auto result = AudioConverterFillComplexBuffer(
      decoder_.get(), ProvideInputCallback, &input_data, &num_frames,
      output_buffer_list_.get(), nullptr);

  if (result == kNoMoreDataError && !num_frames) {
    std::move(decode_cb_bound).Run(OkStatus());
    return;
  }

  if (result != noErr && result != kNoMoreDataError) {
    OSSTATUS_MEDIA_LOG(ERROR, result, media_log_)
        << "AudioConverterFillComplexBuffer() failed";
    std::move(decode_cb_bound)
        .Run(DecoderStatus::Codes::kPlatformDecodeFailure);
    return;
  }

  auto output_buffer =
      AudioBuffer::CopyFrom(channel_layout_, sample_rate_, buffer->timestamp(),
                            output_bus_.get(), pool_);

  if (num_frames != static_cast<UInt32>(output_bus_->frames()))
    output_buffer->TrimEnd(output_bus_->frames() - num_frames);
  if (discard_helper_->ProcessBuffers(buffer->time_info(),
                                      output_buffer.get())) {
    base::BindPostTaskToCurrentDefault(output_cb_)
        .Run(std::move(output_buffer));
  }

  std::move(decode_cb_bound).Run(OkStatus());
}

void AudioToolboxAudioDecoder::Reset(base::OnceClosure reset_cb) {
  // This could fail, but ResetCB has no error reporting mechanism, so just let
  // a subsequent decode call fail.
  const auto result = AudioConverterReset(decoder_.get());
  OSSTATUS_DLOG_IF(WARNING, result != noErr, result)
      << "AudioConverterReset() failed";
  discard_helper_->Reset(discard_helper_->decoder_delay());
  base::BindPostTaskToCurrentDefault(std::move(reset_cb)).Run();
}

bool AudioToolboxAudioDecoder::NeedsBitstreamConversion() const {
  // AAC shouldn't have an ADTS header for xHE-AAC since the AOT for xHE-AAC
  // doesn't fit within the ADTS header limitations.
  return false;
}

bool AudioToolboxAudioDecoder::CreateDecoder(const AudioDecoderConfig& config) {
  AudioStreamBasicDescription input_format = {};
  std::vector<uint8_t> magic_cookie;

  switch (config.codec()) {
    case AudioCodec::kAAC: {
      // Input is xHE-AAC / USAC.
      CHECK_EQ(config.profile(), AudioCodecProfile::kXHE_AAC);
      input_format.mFormatID = kAudioFormatMPEGD_USAC;
      magic_cookie = mp4::ESDescriptor::CreateEsds(config.aac_extra_data());

      // Have macOS fill in the rest of the input_format for us.
      UInt32 format_size = sizeof(input_format);
      auto status = AudioFormatGetProperty(
          kAudioFormatProperty_FormatInfo, magic_cookie.size(),
          magic_cookie.data(), &format_size, &input_format);
      if (status != noErr) {
        OSSTATUS_MEDIA_LOG(ERROR, status, media_log_)
            << "AudioFormatGetProperty() failed";
        return false;
      }
      break;
    }
#if BUILDFLAG(ENABLE_PLATFORM_AC3_EAC3_AUDIO)
    case AudioCodec::kAC3:
    case AudioCodec::kEAC3:
      // Input is AC3/E-AC3.
      input_format.mFormatID = config.codec() == AudioCodec::kAC3
                                   ? kAudioFormatAC3
                                   : kAudioFormatEnhancedAC3;

      // AC3/E-AC3 doesn't have an extra_data, so fill input format manually.
      input_format.mBytesPerPacket = 0;
      input_format.mSampleRate = config.samples_per_second();
      input_format.mChannelsPerFrame = config.channels();
      // This is a fixed value of 6 * 256 for AC3, and can be {1,2,3,6} * 256
      // for E-AC3. For now, set this value to 6 * 256 works for both codec
      // since we get frame count from AudioConverterFillComplexBuffer and trim
      // audio buffer each time during decoding.
      input_format.mFramesPerPacket = 6 * 256;
      break;
#endif
    default:
      NOTREACHED_IN_MIGRATION() << "Unsupported codec: " << config.codec();
      return false;
  }

  // Output is float planar.
  AudioStreamBasicDescription output_format = {};
  output_format.mFormatID = kAudioFormatLinearPCM;
  output_format.mFormatFlags =
      kLinearPCMFormatFlagIsFloat | kLinearPCMFormatFlagIsNonInterleaved;
  output_format.mFramesPerPacket = 1;
  output_format.mBitsPerChannel = 32;

  // We don't want any channel or sample rate conversion.
  sample_rate_ = output_format.mSampleRate = input_format.mSampleRate;
  channel_count_ = output_format.mChannelsPerFrame =
      input_format.mChannelsPerFrame;

  // Note: This is important to get right or AudioConverterNew will balk. For
  // interleaved data, this value should be multiplied by the channel count.
  output_format.mBytesPerPacket = output_format.mBytesPerFrame =
      output_format.mBitsPerChannel / 8;

  // Create the decoder.
  auto result = AudioConverterNew(&input_format, &output_format,
                                  decoder_.InitializeInto());
  if (result != noErr) {
    OSSTATUS_MEDIA_LOG(ERROR, result, media_log_)
        << "AudioConverterNew() failed";
    return false;
  }

  if (channel_count_ > kMaxConcurrentChannels) {
    channel_layout_ = CHANNEL_LAYOUT_DISCRETE;
  } else {
    // Get the decoder's output channel layout.
    UInt32 size;
    result = AudioConverterGetPropertyInfo(
        decoder_.get(), kAudioConverterOutputChannelLayout, &size, NULL);
    if (result != noErr) {
      OSSTATUS_MEDIA_LOG(ERROR, result, media_log_)
          << "AudioConverterGetPropertyInfo() failed";
      return false;
    }

    ScopedAudioChannelLayout output_layout(size);
    result = AudioConverterGetProperty(decoder_.get(),
                                       kAudioConverterOutputChannelLayout,
                                       &size, output_layout.layout());
    if (result != noErr) {
      OSSTATUS_MEDIA_LOG(ERROR, result, media_log_)
          << "AudioConverterGetProperty() failed";
      return false;
    }

    // First, try to find a channel layout that matches the layout from decoder.
    // NOTE: We should retrieve layout from decoder, instead of using
    // channel layout from audio decoder config. Test result shows that if audio
    // converter thinks the audio is a 7.1_WIDE one, and we set output layout
    // to 7.1, this always lead to a loss of left and right channels.
    if (!AudioChannelLayoutToChannelLayout(*output_layout.layout(),
                                           &channel_layout_)) {
      // If we couldn't find a matched layout, use the guess result and hope
      // for the best.
      channel_layout_ = GuessChannelLayout(channel_count_);
    }
  }

  if (channel_count_ != static_cast<UInt32>(config.channels()) ||
      channel_layout_ != config.channel_layout()) {
    MEDIA_LOG(INFO, media_log_)
        << "Audio config updated: channels: " << channel_count_
        << ", channel layout: " << ChannelLayoutToString(channel_layout_);
  }

  // Next, convert back this layout to an audio channel layout with the same
  // channel order description. This let decoder output correct orders.
  auto ordered_layout =
      ChannelLayoutToAudioChannelLayout(channel_layout_, channel_count_);
  result = AudioConverterSetProperty(
      decoder_.get(), kAudioConverterOutputChannelLayout,
      ordered_layout->layout_size(), ordered_layout->layout());
  if (result != noErr) {
    OSSTATUS_MEDIA_LOG(ERROR, result, media_log_)
        << "AudioConverterSetProperty() failed";
    return false;
  }

  if (config.codec() == AudioCodec::kAAC) {
    // Instill the magic!
    result = AudioConverterSetProperty(
        decoder_.get(), kAudioConverterDecompressionMagicCookie,
        magic_cookie.size(), magic_cookie.data());
    if (result != noErr) {
      OSSTATUS_MEDIA_LOG(ERROR, result, media_log_)
          << "AudioConverterSetProperty() failed";
      return false;
    }

    // macOS doesn't provide a default target loudness. Use the value
    // recommended by Fraunhofer. AC3/E-AC3 doesn't support set this,
    // so limit this to xHE-AAC only.
    const Float32 kDefaultLoudness = -16.0;
    result = AudioConverterSetProperty(
        decoder_.get(), kAudioCodecPropertyProgramTargetLevel,
        sizeof(kDefaultLoudness), &kDefaultLoudness);
    if (result != noErr) {
      OSSTATUS_MEDIA_LOG(ERROR, result, media_log_)
          << "AudioConverterSetProperty() failed to set loudness.";
      return false;
    }

    // Likewise set the effect type recommended by Fraunhofer. There doesn't
    // appear to be a key name available for this yet.
    // Values: 0=none, night=1, noisy=2, limited=3
    const UInt32 kDefaultEffectType = 3;
    result = AudioConverterSetProperty(decoder_.get(), 0x64726370 /* "drcp" */,
                                       sizeof(kDefaultEffectType),
                                       &kDefaultEffectType);
    if (result != noErr) {
      OSSTATUS_MEDIA_LOG(ERROR, result, media_log_)
          << "AudioConverterSetProperty() failed to set DRC effect type.";
      return false;
    }
  }

  discard_helper_ = std::make_unique<AudioDiscardHelper>(
      sample_rate_, config.codec_delay(), false);
  discard_helper_->Reset(config.codec_delay());

  // Create staging structures we'll give to macOS for writing data into.
  output_bus_ = AudioBus::Create(input_format.mChannelsPerFrame,
                                 input_format.mFramesPerPacket);

  // AudioBufferList is a strange variable length structure that by default only
  // includes one buffer slot, so we need to construct our own multichannel one.
  //
  // NOTE: While we can allocate the AudioBufferList once, we need to fill it in
  // each time since the call to AudioConverterFillComplexBuffer() may modify
  // the values within the structure (particularly upon underflow).
  output_buffer_list_.reset(reinterpret_cast<AudioBufferList*>(
      calloc(1, sizeof(AudioBufferList) +
                    output_bus_->channels() * sizeof(AudioBuffer))));
  return true;
}

}  // namespace media