chromium/media/audio/apple/audio_low_latency_input.cc

// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif
#include "media/audio/apple/audio_low_latency_input.h"

#include <CoreServices/CoreServices.h>
#include <dlfcn.h>
#include <memory>
#include <string>

#include "base/apple/foundation_util.h"
#include "base/apple/osstatus_logging.h"
#include "base/apple/scoped_cftyperef.h"
#include "base/apple/scoped_mach_port.h"
#include "base/feature_list.h"
#include "base/functional/bind.h"
#include "base/logging.h"
#include "base/mac/mac_util.h"
#include "base/metrics/histogram_functions.h"
#include "base/metrics/histogram_macros.h"
#include "base/strings/strcat.h"
#include "base/strings/stringprintf.h"
#include "base/strings/sys_string_conversions.h"
#include "base/time/time.h"
#include "base/trace_event/trace_event.h"
#include "media/audio/apple/audio_manager_apple.h"
#include "media/audio/apple/scoped_audio_unit.h"
#include "media/base/audio_bus.h"
#include "media/base/audio_timestamp_helper.h"
#include "media/base/data_buffer.h"

#if BUILDFLAG(IS_MAC)
#include "media/audio/mac/core_audio_util_mac.h"

namespace {
extern "C" {
// See:
// https://trac.webkit.org/browser/webkit/trunk/Source/WebCore/PAL/pal/spi/cf/CoreAudioSPI.h?rev=228264
OSStatus AudioDeviceDuck(AudioDeviceID inDevice,
                         Float32 inDuckedLevel,
                         const AudioTimeStamp* __nullable inStartTime,
                         Float32 inRampDuration) __attribute__((weak_import));
}

void UndoDucking(AudioDeviceID output_device_id) {
  if (AudioDeviceDuck != nullptr) {
    // Ramp the volume back up over half a second.
    AudioDeviceDuck(output_device_id, 1.0, nullptr, 0.5);
  }
}
}  // namespace
#endif

namespace media {

// Number of blocks of buffers used in the |fifo_|.
const int kNumberOfBlocksBufferInFifo = 2;

// Max length of sequence of TooManyFramesToProcessError errors.
// The stream will be stopped as soon as this time limit is passed.
constexpr base::TimeDelta kMaxErrorTimeout = base::Seconds(1);

// A one-shot timer is created and started in Start() and it triggers
// CheckInputStartupSuccess() after this amount of time. UMA stats marked
// Media.Audio.InputStartupSuccessMac is then updated where true is added
// if input callbacks have started, and false otherwise.
constexpr base::TimeDelta kInputCallbackStartTimeout = base::Seconds(5);

// Returns true if the format flags in |format_flags| has the "non-interleaved"
// flag (kAudioFormatFlagIsNonInterleaved) cleared (set to 0).
static bool FormatIsInterleaved(UInt32 format_flags) {
  return !(format_flags & kAudioFormatFlagIsNonInterleaved);
}

// Converts the 32-bit non-terminated 4 byte string into an std::string.
// Example: code=1735354734 <=> 'goin' <=> kAudioDevicePropertyDeviceIsRunning.
static std::string FourCharFormatCodeToString(UInt32 code) {
  char code_string[5];
  // Converts a 32-bit integer from the host’s native byte order to big-endian.
  UInt32 code_id = CFSwapInt32HostToBig(code);
  bcopy(&code_id, code_string, 4);
  code_string[4] = '\0';
  return std::string(code_string);
}

static std::ostream& operator<<(std::ostream& os,
                                const AudioStreamBasicDescription& format) {
  std::string format_string = FourCharFormatCodeToString(format.mFormatID);
  os << "sample rate       : " << format.mSampleRate << std::endl
     << "format ID         : " << format_string << std::endl
     << "format flags      : " << format.mFormatFlags << std::endl
     << "bytes per packet  : " << format.mBytesPerPacket << std::endl
     << "frames per packet : " << format.mFramesPerPacket << std::endl
     << "bytes per frame   : " << format.mBytesPerFrame << std::endl
     << "channels per frame: " << format.mChannelsPerFrame << std::endl
     << "bits per channel  : " << format.mBitsPerChannel << std::endl
     << "reserved          : " << format.mReserved << std::endl
     << "interleaved       : "
     << (FormatIsInterleaved(format.mFormatFlags) ? "yes" : "no");
  return os;
}

static OSStatus OnGetPlayoutData(void* in_ref_con,
                                 AudioUnitRenderActionFlags* flags,
                                 const AudioTimeStamp* time_stamp,
                                 UInt32 bus_number,
                                 UInt32 num_frames,
                                 AudioBufferList* io_data) {
  *flags |= kAudioUnitRenderAction_OutputIsSilence;
  return noErr;
}

// See "Technical Note TN2091 - Device input using the HAL Output Audio
// Unit"
// http://developer.apple.com/library/mac/#technotes/tn2091/_index.html
// for more details and background regarding this implementation.

AUAudioInputStream::AUAudioInputStream(
    AudioManagerApple* manager,
    const AudioParameters& input_params,
    AudioDeviceID audio_device_id,
    const AudioManager::LogCallback& log_callback,
    AudioManagerBase::VoiceProcessingMode voice_processing_mode)
    : manager_(manager),
      input_params_(input_params),
      number_of_frames_provided_(0),
      sink_(nullptr),
      audio_unit_(0),
      input_device_id_(audio_device_id),
      hardware_latency_(base::Seconds(0)),
      fifo_(input_params.channels(),
            input_params.frames_per_buffer(),
            kNumberOfBlocksBufferInFifo),
      got_input_callback_(false),
      input_callback_is_active_(false),
      noise_reduction_suppressed_(false),
      use_voice_processing_(voice_processing_mode ==
                            AudioManagerBase::VoiceProcessingMode::kEnabled),
      output_device_id_for_aec_(kAudioObjectUnknown),
      last_sample_time_(0.0),
      last_number_of_frames_(0),
      glitch_reporter_(SystemGlitchReporter::StreamType::kCapture),
      peak_detector_(base::BindRepeating(&AudioManager::TraceAmplitudePeak,
                                         base::Unretained(manager_),
                                         /*trace_start=*/true)),
      log_callback_(log_callback) {
  DCHECK(manager_);
  CHECK(log_callback_ != AudioManager::LogCallback());
  DVLOG(1) << __FUNCTION__ << " this " << this << " params "
           << input_params.AsHumanReadableString()
           << " use_voice_processing_: " << use_voice_processing_;

#if BUILDFLAG(IS_MAC)
  if (use_voice_processing_) {
    DCHECK(input_params.channels() == 1 || input_params.channels() == 2);
    const bool got_default_device =
        AudioManagerMac::GetDefaultOutputDevice(&output_device_id_for_aec_);
    DCHECK(got_default_device);
  }
#endif
  const SampleFormat kSampleFormat = kSampleFormatS16;

  // Set up the desired (output) format specified by the client.
  format_.mSampleRate = input_params.sample_rate();
  format_.mFormatID = kAudioFormatLinearPCM;
  format_.mFormatFlags =
      kLinearPCMFormatFlagIsPacked | kLinearPCMFormatFlagIsSignedInteger;
  DCHECK(FormatIsInterleaved(format_.mFormatFlags));
  format_.mBitsPerChannel = SampleFormatToBitsPerChannel(kSampleFormat);
  format_.mChannelsPerFrame = input_params.channels();
  format_.mFramesPerPacket = 1;  // uncompressed audio
  format_.mBytesPerPacket = format_.mBytesPerFrame =
      input_params.GetBytesPerFrame(kSampleFormat);
  format_.mReserved = 0;

  DVLOG(1) << __FUNCTION__ << " this " << this;
  DVLOG(1) << "device ID: 0x" << std::hex << audio_device_id;
  DVLOG(1) << "buffer size : " << input_params.frames_per_buffer();
  DVLOG(1) << "channels : " << input_params.channels();
  DVLOG(1) << "desired output format:\n" << format_;

  // Derive size (in bytes) of the buffers that we will render to.
  UInt32 data_byte_size =
      input_params.frames_per_buffer() * format_.mBytesPerFrame;
  DVLOG(1) << "size of data buffer in bytes : " << data_byte_size;

  // Allocate AudioBuffers to be used as storage for the received audio.
  // The AudioBufferList structure works as a placeholder for the
  // AudioBuffer structure, which holds a pointer to the actual data buffer.
  audio_data_buffer_.reset(new uint8_t[data_byte_size]);
  // We ask for noninterleaved audio.
  audio_buffer_list_.mNumberBuffers = 1;

  AudioBuffer* audio_buffer = audio_buffer_list_.mBuffers;
  audio_buffer->mNumberChannels = input_params.channels();
  audio_buffer->mDataByteSize = data_byte_size;
  audio_buffer->mData = audio_data_buffer_.get();
}

AUAudioInputStream::~AUAudioInputStream() {
  DVLOG(1) << __FUNCTION__ << " this " << this;
  ReportAndResetStats();
}

// Obtain and open the AUHAL AudioOutputUnit for recording.
AudioInputStream::OpenOutcome AUAudioInputStream::Open() {
  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
  DVLOG(1) << __FUNCTION__ << " this " << this;
  DCHECK(!audio_unit_);

  // Verify that we have a valid device. Send appropriate error code to
  // HandleError() to ensure that the error type is added to UMA stats.
#if BUILDFLAG(IS_MAC)
  if (input_device_id_ == kAudioObjectUnknown) {
    LOG(ERROR) << "Device ID is unknown";
    HandleError(kAudioUnitErr_InvalidElement);
    return OpenOutcome::kFailed;
  }
#endif

  // The requested sample-rate must match the hardware sample-rate.
  const int sample_rate =
      manager_->HardwareSampleRateForDevice(input_device_id_);
  DCHECK_EQ(sample_rate, format_.mSampleRate);

  log_callback_.Run(base::StrCat(
      {"AU in: Open using ", use_voice_processing_ ? "VPAU" : "AUHAL"}));

  const bool success =
      use_voice_processing_ ? OpenVoiceProcessingAU() : OpenAUHAL();

  if (!success)
    return OpenOutcome::kFailed;

    // The hardware latency is fixed and will not change during the call.
#if BUILDFLAG(IS_MAC)
  hardware_latency_ = core_audio_mac::GetHardwareLatency(
      audio_unit_, input_device_id_, kAudioDevicePropertyScopeInput,
      format_.mSampleRate, /*is_input=*/true);
#else
  AudioManagerIOS* manager_ios = static_cast<AudioManagerIOS*>(manager_);
  hardware_latency_ = base::Seconds(manager_ios->HardwareLatency(
      /*is_input=*/true));
#endif

  return OpenOutcome::kSuccess;
}

bool AUAudioInputStream::OpenAUHAL() {
  DVLOG(1) << __FUNCTION__ << " this " << this;

  // Start by obtaining an AudioOutputUnit using an AUHAL component description.

  // Description for the Audio Unit we want to use (AUHAL in this case).
  // The kAudioUnitSubType_HALOutput audio unit interfaces to any audio device.
  // The user specifies which audio device to track. The audio unit can do
  // input from the device as well as output to the device. Bus 0 is used for
  // the output side, bus 1 is used to get audio input from the device.
  AudioComponentDescription desc = {kAudioUnitType_Output,
#if BUILDFLAG(IS_MAC)
                                    kAudioUnitSubType_HALOutput,
#else
                                    kAudioUnitSubType_RemoteIO,  // for iOS
#endif
                                    kAudioUnitManufacturer_Apple, 0, 0};

  // Find a component that meets the description in |desc|.
  AudioComponent comp = AudioComponentFindNext(nullptr, &desc);
  DCHECK(comp);
  if (!comp) {
    HandleError(kAudioUnitErr_NoConnection);
    return false;
  }

  // Get access to the service provided by the specified Audio Unit.
  OSStatus result = AudioComponentInstanceNew(comp, &audio_unit_);
  if (result) {
    HandleError(result);
    return false;
  }

#if BUILDFLAG(IS_MAC)
  //  Initialize the AUHAL before making any changes or using it. The audio
  //  unit will be initialized once more as last operation in this method but
  //  that is intentional. This approach is based on a comment in the
  //  CAPlayThrough example from Apple, which states that "AUHAL needs to be
  //  initialized *before* anything is done to it".
  //  TODO(henrika): remove this extra call if we are unable to see any
  //  positive effects of it in our UMA stats.
  result = AudioUnitInitialize(audio_unit_);
  if (result != noErr) {
    HandleError(result);
    return false;
  }
#endif

  // Enable IO on the input scope of the Audio Unit.
  // Note that, these changes must be done *before* setting the AUHAL's
  // current device.

  // After creating the AUHAL object, we must enable IO on the input scope
  // of the Audio Unit to obtain the device input. Input must be explicitly
  // enabled with the kAudioOutputUnitProperty_EnableIO property on Element 1
  // of the AUHAL. Because the AUHAL can be used for both input and output,
  // we must also disable IO on the output scope.

  // kAudioOutputUnitProperty_EnableIO is not a writable property of the
  // voice processing unit (we'd get kAudioUnitErr_PropertyNotWritable returned
  // back to us). IO is always enabled.

  // Enable input on the AUHAL.
  {
    const UInt32 enableIO = 1;
    result = AudioUnitSetProperty(
        audio_unit_, kAudioOutputUnitProperty_EnableIO, kAudioUnitScope_Input,
        AUElement::INPUT, &enableIO, sizeof(enableIO));
    if (result != noErr) {
      HandleError(result);
      return false;
    }
  }

  // Disable output on the AUHAL.
  {
    const UInt32 disableIO = 0;
    result = AudioUnitSetProperty(
        audio_unit_, kAudioOutputUnitProperty_EnableIO, kAudioUnitScope_Output,
        AUElement::OUTPUT, &disableIO, sizeof(disableIO));
    if (result != noErr) {
      HandleError(result);
      return false;
    }
  }

#if BUILDFLAG(IS_MAC)
  // Next, set the audio device to be the Audio Unit's current device.
  // Note that, devices can only be set to the AUHAL after enabling IO.
  result =
      AudioUnitSetProperty(audio_unit_, kAudioOutputUnitProperty_CurrentDevice,
                           kAudioUnitScope_Global, AUElement::OUTPUT,
                           &input_device_id_, sizeof(input_device_id_));

  if (result != noErr) {
    HandleError(result);
    return false;
  }
#endif

  // Register the input procedure for the AUHAL. This procedure will be called
  // when the AUHAL has received new data from the input device.
  AURenderCallbackStruct callback;
  callback.inputProc = &DataIsAvailable;
  callback.inputProcRefCon = this;
  result = AudioUnitSetProperty(
      audio_unit_, kAudioOutputUnitProperty_SetInputCallback,
      kAudioUnitScope_Global, AUElement::OUTPUT, &callback, sizeof(callback));

  if (result != noErr) {
    HandleError(result);
    return false;
  }

  // Get the stream format for the selected input device and ensure that the
  // sample rate of the selected input device matches the desired (given at
  // construction) sample rate. We should not rely on sample rate conversion
  // in the AUHAL, only *simple* conversions, e.g., 32-bit float to 16-bit
  // signed integer format.
  AudioStreamBasicDescription input_device_format = {0};
  result =
      manager_->GetInputDeviceStreamFormat(audio_unit_, &input_device_format);
  if (result != noErr) {
    HandleError(result);
    return false;
  }

  DVLOG(1) << "Input device stream format: " << input_device_format;
  if (input_device_format.mSampleRate != format_.mSampleRate) {
    LOG(ERROR) << "Input device's sample rate does not match the client's "
                  "sample rate; input_device_format="
               << input_device_format;
    result = kAudioUnitErr_FormatNotSupported;
    HandleError(result);
    return false;
  }

  // Modify the IO buffer size if not already set correctly for the selected
  // device. The status of other active audio input and output streams is
  // involved in the final setting.

  if (!manager_->MaybeChangeBufferSize(input_device_id_, audio_unit_, 1,
                                       input_params_.frames_per_buffer())) {
    result = kAudioUnitErr_FormatNotSupported;
    HandleError(result);
    return false;
  }

  // If the requested number of frames is out of range, the closest valid buffer
  // size will be set instead. Check the current setting and log a warning for a
  // non perfect match. Any such mismatch will be compensated for in
  // OnDataIsAvailable().
#if BUILDFLAG(IS_MAC)
  UInt32 buffer_frame_size = 0;
  UInt32 property_size = sizeof(buffer_frame_size);
  result = AudioUnitGetProperty(
      audio_unit_, kAudioDevicePropertyBufferFrameSize, kAudioUnitScope_Global,
      AUElement::OUTPUT, &buffer_frame_size, &property_size);
  LOG_IF(WARNING, buffer_frame_size !=
                      static_cast<UInt32>(input_params_.frames_per_buffer()))
      << "AUHAL is using best match of IO buffer size: " << buffer_frame_size;
#endif
  // Channel mapping should be supported but add a warning just in case.
  // TODO(henrika): perhaps add to UMA stat to track if this can happen.
  DLOG_IF(WARNING,
          input_device_format.mChannelsPerFrame != format_.mChannelsPerFrame)
      << "AUHAL's audio converter must do channel conversion";

  // Set up the the desired (output) format.
  // For obtaining input from a device, the device format is always expressed
  // on the output scope of the AUHAL's Element 1.
  result = AudioUnitSetProperty(audio_unit_, kAudioUnitProperty_StreamFormat,
                                kAudioUnitScope_Output, AUElement::INPUT,
                                &format_, sizeof(format_));
  if (result != noErr) {
    HandleError(result);
    return false;
  }

  // Finally, initialize the audio unit and ensure that it is ready to render.
  // Allocates memory according to the maximum number of audio frames
  // it can produce in response to a single render call.
  result = AudioUnitInitialize(audio_unit_);
  if (result != noErr) {
    HandleError(result);
    return false;
  }

  return true;
}

bool AUAudioInputStream::OpenVoiceProcessingAU() {
  // Start by obtaining an AudioOuputUnit using an AUHAL component description.

  // Description for the Audio Unit we want to use (AUHAL in this case).
  // The kAudioUnitSubType_HALOutput audio unit interfaces to any audio device.
  // The user specifies which audio device to track. The audio unit can do
  // input from the device as well as output to the device. Bus 0 is used for
  // the output side, bus 1 is used to get audio input from the device.
  AudioComponentDescription desc = {kAudioUnitType_Output,
                                    kAudioUnitSubType_VoiceProcessingIO,
                                    kAudioUnitManufacturer_Apple, 0, 0};

  // Find a component that meets the description in |desc|.
  AudioComponent comp = AudioComponentFindNext(nullptr, &desc);
  DCHECK(comp);
  if (!comp) {
    HandleError(kAudioUnitErr_NoConnection);
    return false;
  }

  // Get access to the service provided by the specified Audio Unit.
  OSStatus result = AudioComponentInstanceNew(comp, &audio_unit_);
  if (result) {
    HandleError(result);
    return false;
  }

  // Next, set the audio device to be the Audio Unit's input device.
  result =
      AudioUnitSetProperty(audio_unit_, kAudioOutputUnitProperty_CurrentDevice,
                           kAudioUnitScope_Global, AUElement::INPUT,
                           &input_device_id_, sizeof(input_device_id_));

  if (result != noErr) {
    HandleError(result);
    return false;
  }

  // Followed by the audio device to be the Audio Unit's output device.
  result = AudioUnitSetProperty(
      audio_unit_, kAudioOutputUnitProperty_CurrentDevice,
      kAudioUnitScope_Global, AUElement::OUTPUT, &output_device_id_for_aec_,
      sizeof(output_device_id_for_aec_));

  if (result != noErr) {
    HandleError(result);
    return false;
  }

  // Register the input procedure for the AUHAL. This procedure will be called
  // when the AUHAL has received new data from the input device.
  AURenderCallbackStruct callback;
  callback.inputProc = &DataIsAvailable;
  callback.inputProcRefCon = this;

  result = AudioUnitSetProperty(
      audio_unit_, kAudioOutputUnitProperty_SetInputCallback,
      kAudioUnitScope_Global, AUElement::INPUT, &callback, sizeof(callback));
  if (result != noErr) {
    HandleError(result);
    return false;
  }

  callback.inputProc = OnGetPlayoutData;
  callback.inputProcRefCon = this;
  result = AudioUnitSetProperty(
      audio_unit_, kAudioUnitProperty_SetRenderCallback, kAudioUnitScope_Input,
      AUElement::OUTPUT, &callback, sizeof(callback));
  if (result != noErr) {
    HandleError(result);
    return false;
  }

  // Get the stream format for the selected input device and ensure that the
  // sample rate of the selected input device matches the desired (given at
  // construction) sample rate. We should not rely on sample rate conversion
  // in the AUHAL, only *simple* conversions, e.g., 32-bit float to 16-bit
  // signed integer format.
  AudioStreamBasicDescription input_device_format = {0};
  result =
      manager_->GetInputDeviceStreamFormat(audio_unit_, &input_device_format);
  if (result != noErr) {
    HandleError(result);
    return false;
  }

  DVLOG(1) << "Input device stream format: " << input_device_format;
  if (input_device_format.mSampleRate != format_.mSampleRate) {
    LOG(ERROR)
        << "Input device's sample rate does not match the client's sample rate";
    result = kAudioUnitErr_FormatNotSupported;
    HandleError(result);
    return false;
  }

  // Modify the IO buffer size if not already set correctly for the selected
  // device. The status of other active audio input and output streams is
  // involved in the final setting.
  if (!manager_->MaybeChangeBufferSize(input_device_id_, audio_unit_, 1,
                                       input_params_.frames_per_buffer())) {
    result = kAudioUnitErr_FormatNotSupported;
    HandleError(result);
    return false;
  }

  // If the requested number of frames is out of range, the closest valid buffer
  // size will be set instead. Check the current setting and log a warning for a
  // non perfect match. Any such mismatch will be compensated for in
  // OnDataIsAvailable().
#if BUILDFLAG(IS_MAC)
  UInt32 buffer_frame_size = 0;
  UInt32 property_size = sizeof(buffer_frame_size);
  result = AudioUnitGetProperty(
      audio_unit_, kAudioDevicePropertyBufferFrameSize, kAudioUnitScope_Global,
      AUElement::OUTPUT, &buffer_frame_size, &property_size);
  LOG_IF(WARNING, buffer_frame_size !=
                      static_cast<UInt32>(input_params_.frames_per_buffer()))
      << "AUHAL is using best match of IO buffer size: " << buffer_frame_size;

  // The built-in device claims to be stereo. VPAU claims 5 channels (for me)
  // but refuses to work in stereo. Just accept stero for now, use mono
  // internally and upmix.
  AudioStreamBasicDescription mono_format = format_;
  if (format_.mChannelsPerFrame == 2) {
    mono_format.mChannelsPerFrame = 1;
    mono_format.mBytesPerPacket = mono_format.mBitsPerChannel / 8;
    mono_format.mBytesPerFrame = mono_format.mBytesPerPacket;
  }

  // Set up the the desired (output) format.
  // For obtaining input from a device, the device format is always expressed
  // on the output scope of the AUHAL's Element 1.
  result = AudioUnitSetProperty(audio_unit_, kAudioUnitProperty_StreamFormat,
                                kAudioUnitScope_Output, AUElement::INPUT,
                                &mono_format, sizeof(mono_format));
  if (result != noErr) {
    HandleError(result);
    return false;
  }

  result = AudioUnitSetProperty(audio_unit_, kAudioUnitProperty_StreamFormat,
                                kAudioUnitScope_Input, AUElement::OUTPUT,
                                &mono_format, sizeof(mono_format));
  if (result != noErr) {
    HandleError(result);
    return false;
  }

  // Finally, initialize the audio unit and ensure that it is ready to render.
  // Allocates memory according to the maximum number of audio frames
  // it can produce in response to a single render call.
  result = AudioUnitInitialize(audio_unit_);
  if (result != noErr) {
    HandleError(result);
    return false;
  }

  UndoDucking(output_device_id_for_aec_);
#endif
  return true;
}

void AUAudioInputStream::Start(AudioInputCallback* callback) {
  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
  DVLOG(1) << __FUNCTION__ << " this " << this;
  DCHECK(callback);
  DCHECK(!sink_);
  DLOG_IF(ERROR, !audio_unit_) << "Open() has not been called successfully";
  if (IsRunning())
    return;

#if BUILDFLAG(IS_MAC)
  // Check if we should defer Start() for http://crbug.com/160920.
  if (manager_->ShouldDeferStreamStart()) {
    LOG(WARNING) << "Start of input audio is deferred";
    // Use a cancellable closure so that if Stop() is called before Start()
    // actually runs, we can cancel the pending start.
    deferred_start_cb_.Reset(base::BindOnce(&AUAudioInputStream::Start,
                                            base::Unretained(this), callback));
    manager_->GetTaskRunner()->PostDelayedTask(
        FROM_HERE, deferred_start_cb_.callback(),
        base::Seconds(AudioManagerMac::kStartDelayInSecsForPowerEvents));
    return;
  }
#endif

  sink_ = callback;
  last_success_time_ = base::TimeTicks::Now();

  // Don't disable built-in noise suppression when using VPAU.
  if (!use_voice_processing_ &&
      !(input_params_.effects() & AudioParameters::NOISE_SUPPRESSION) &&
      manager_->DeviceSupportsAmbientNoiseReduction(input_device_id_)) {
    noise_reduction_suppressed_ =
        manager_->SuppressNoiseReduction(input_device_id_);
  }
  StartAgc();
  OSStatus result = AudioOutputUnitStart(audio_unit_);
  OSSTATUS_DLOG_IF(ERROR, result != noErr, result)
      << "Failed to start acquiring data";
  if (result != noErr) {
    Stop();
    return;
  }
  DCHECK(IsRunning()) << "Audio unit started OK but is not yet running";

  // For UMA stat purposes, start a one-shot timer which detects when input
  // callbacks starts indicating if input audio recording starts as intended.
  // CheckInputStartupSuccess() will check if |input_callback_is_active_| is
  // true when the timer expires.
  input_callback_timer_ = std::make_unique<base::OneShotTimer>();
  input_callback_timer_->Start(FROM_HERE, kInputCallbackStartTimeout, this,
                               &AUAudioInputStream::CheckInputStartupSuccess);
  DCHECK(input_callback_timer_->IsRunning());
}

void AUAudioInputStream::Stop() {
  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
  deferred_start_cb_.Cancel();
  DVLOG(1) << __FUNCTION__ << " this " << this;

  StopAgc();
  if (noise_reduction_suppressed_) {
    manager_->UnsuppressNoiseReduction(input_device_id_);
    noise_reduction_suppressed_ = false;
  }
  if (input_callback_timer_ != nullptr) {
    input_callback_timer_->Stop();
    input_callback_timer_.reset();
  }

  if (audio_unit_ != nullptr) {
    // Stop the I/O audio unit.
    OSStatus result = AudioOutputUnitStop(audio_unit_);
    DCHECK_EQ(result, noErr);
    // Add a DCHECK here just in case. AFAIK, the call to AudioOutputUnitStop()
    // seems to set this state synchronously, hence it should always report
    // false after a successful call.
    DCHECK(!IsRunning()) << "Audio unit is stopped but still running";

    // Reset the audio unit’s render state. This function clears memory.
    // It does not allocate or free memory resources.
    result = AudioUnitReset(audio_unit_, kAudioUnitScope_Global, 0);
    DCHECK_EQ(result, noErr);
    OSSTATUS_DLOG_IF(ERROR, result != noErr, result)
        << "Failed to stop acquiring data";
  }

  SetInputCallbackIsActive(false);
  ReportAndResetStats();
  sink_ = nullptr;
  fifo_.Clear();
  got_input_callback_ = false;
}

void AUAudioInputStream::Close() {
  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
  DVLOG(1) << __FUNCTION__ << " this " << this;

  // It is valid to call Close() before calling open or Start().
  // It is also valid to call Close() after Start() has been called.
  if (IsRunning()) {
    Stop();
  }

  // Uninitialize and dispose the audio unit.
  CloseAudioUnit();

  // Inform the audio manager that we have been closed. This will cause our
  // destruction.
  manager_->ReleaseInputStream(this);
}

double AUAudioInputStream::GetMaxVolume() {
  return manager_->GetMaxInputVolume(input_device_id_);
}

void AUAudioInputStream::SetVolume(double volume) {
  DVLOG(1) << __FUNCTION__ << " this " << this << " volume=" << volume << ")";

  manager_->SetInputVolume(input_device_id_, volume);

  // Update the AGC volume level based on the last setting above. Note that,
  // the volume-level resolution is not infinite and it is therefore not
  // possible to assume that the volume provided as input parameter can be
  // used directly. Instead, a new query to the audio hardware is required.
  // This method does nothing if AGC is disabled.
  UpdateAgcVolume();
}

double AUAudioInputStream::GetVolume() {
  return manager_->GetInputVolume(input_device_id_);
}

bool AUAudioInputStream::IsMuted() {
  return manager_->IsInputMuted(input_device_id_);
}

void AUAudioInputStream::SetOutputDeviceForAec(
    const std::string& output_device_id) {
#if BUILDFLAG(IS_MAC)
  if (!use_voice_processing_)
    return;

  AudioDeviceID audio_device_id =
      AudioManagerMac::GetAudioDeviceIdByUId(false, output_device_id);
  if (audio_device_id == output_device_id_for_aec_)
    return;

  if (audio_device_id == kAudioObjectUnknown) {
    log_callback_.Run(
        base::StringPrintf("AU in: Unable to resolve output device id '%s'",
                           output_device_id.c_str()));
    return;
  }

  // If the selected device is an aggregate device, try to use the first output
  // device of the aggregate device instead.
  if (core_audio_mac::GetDeviceTransportType(audio_device_id) ==
      kAudioDeviceTransportTypeAggregate) {
    const AudioDeviceID output_subdevice_id =
        AudioManagerMac::FindFirstOutputSubdevice(audio_device_id);

    if (output_subdevice_id == kAudioObjectUnknown) {
      log_callback_.Run(base::StringPrintf(
          "AU in: Unable to find an output subdevice in aggregate device '%s'",
          output_device_id.c_str()));
      return;
    }
    audio_device_id = output_subdevice_id;
  }

  if (audio_device_id != output_device_id_for_aec_) {
    output_device_id_for_aec_ = audio_device_id;
    log_callback_.Run(base::StringPrintf(
        "AU in: Output device for AEC changed to '%s' (%d)",
        output_device_id.c_str(), output_device_id_for_aec_));
    // Only restart the stream if it has previously been started.
    if (audio_unit_)
      ReinitializeVoiceProcessingAudioUnit();
  }
#endif
}

void AUAudioInputStream::ReinitializeVoiceProcessingAudioUnit() {
  DCHECK(use_voice_processing_);
  DCHECK(audio_unit_);

  const bool was_running = IsRunning();
  OSStatus result = noErr;

  if (was_running) {
    result = AudioOutputUnitStop(audio_unit_);
    DCHECK_EQ(result, noErr);
  }

  CloseAudioUnit();

  // Reset things to a state similar to before the audio unit was opened.
  // Most of these will be no-ops if the audio unit was opened but not started.
  SetInputCallbackIsActive(false);
  ReportAndResetStats();
  got_input_callback_ = false;

  OpenVoiceProcessingAU();

  if (was_running) {
    result = AudioOutputUnitStart(audio_unit_);
    if (result != noErr) {
      OSSTATUS_DLOG(ERROR, result) << "Failed to start acquiring data";
      Stop();
      return;
    }
  }

  log_callback_.Run(base::StringPrintf(
      "AU in: Successfully reinitialized AEC for output device id=%d.",
      output_device_id_for_aec_));
}

// static
OSStatus AUAudioInputStream::DataIsAvailable(void* context,
                                             AudioUnitRenderActionFlags* flags,
                                             const AudioTimeStamp* time_stamp,
                                             UInt32 bus_number,
                                             UInt32 number_of_frames,
                                             AudioBufferList* io_data) {
  DCHECK(context);
  // Recorded audio is always on the input bus (=1).
  DCHECK_EQ(bus_number, 1u);
  // No data buffer should be allocated at this stage.
  DCHECK(!io_data);
  AUAudioInputStream* self = reinterpret_cast<AUAudioInputStream*>(context);
  // Propagate render action flags, time stamp, bus number and number
  // of frames requested to the AudioUnitRender() call where the actual data
  // is received from the input device via the output scope of the audio unit.
  return self->OnDataIsAvailable(flags, time_stamp, bus_number,
                                 number_of_frames);
}

OSStatus AUAudioInputStream::OnDataIsAvailable(
    AudioUnitRenderActionFlags* flags,
    const AudioTimeStamp* time_stamp,
    UInt32 bus_number,
    UInt32 number_of_frames) {
  TRACE_EVENT1("audio", "AUAudioInputStream::OnDataIsAvailable", "frames",
               number_of_frames);

  // Indicate that input callbacks have started.
  if (!got_input_callback_) {
    got_input_callback_ = true;
    SetInputCallbackIsActive(true);
  }

  // Update the |mDataByteSize| value in the audio_buffer_list() since
  // |number_of_frames| can be changed on the fly.
  // |mDataByteSize| needs to be exactly mapping to |number_of_frames|,
  // otherwise it will put CoreAudio into bad state and results in
  // AudioUnitRender() returning -50 for the new created stream.
  // We have also seen kAudioUnitErr_TooManyFramesToProcess (-10874) and
  // kAudioUnitErr_CannotDoInCurrentContext (-10863) as error codes.
  // See crbug/428706 for details.
  UInt32 new_size = number_of_frames * format_.mBytesPerFrame;
  AudioBuffer* audio_buffer = audio_buffer_list_.mBuffers;
  if (new_size != audio_buffer->mDataByteSize) {
    DVLOG(1) << __FUNCTION__ << " this " << this
             << "New size of number_of_frames detected: " << number_of_frames;
    if (new_size > audio_buffer->mDataByteSize) {
      // This can happen if the device is unplugged during recording. We
      // allocate enough memory here to avoid depending on how CoreAudio
      // handles it.
      // See See http://www.crbug.com/434681 for one example when we can enter
      // this scope.
      audio_data_buffer_.reset(new uint8_t[new_size]);
      audio_buffer->mData = audio_data_buffer_.get();
    }

    // Update the |mDataByteSize| to match |number_of_frames|.
    audio_buffer->mDataByteSize = new_size;
  }

  // Obtain the recorded audio samples by initiating a rendering cycle.
  // Since it happens on the input bus, the |&audio_buffer_list_| parameter is
  // a reference to the preallocated audio buffer list that the audio unit
  // renders into.
  OSStatus result;
  if (use_voice_processing_ && format_.mChannelsPerFrame != 1) {
    // Use the first part of the output buffer for mono data...
    AudioBufferList mono_buffer_list;
    mono_buffer_list.mNumberBuffers = 1;
    AudioBuffer* mono_buffer = mono_buffer_list.mBuffers;
    mono_buffer->mNumberChannels = 1;
    mono_buffer->mDataByteSize =
        audio_buffer->mDataByteSize / audio_buffer->mNumberChannels;
    mono_buffer->mData = audio_buffer->mData;

    TRACE_EVENT_BEGIN0("audio", "AudioUnitRender");
    result = AudioUnitRender(audio_unit_, flags, time_stamp, bus_number,
                             number_of_frames, &mono_buffer_list);
    TRACE_EVENT_END0("audio", "AudioUnitRender");
    // ... then upmix it by copying it out to two channels.
    UpmixMonoToStereoInPlace(audio_buffer, format_.mBitsPerChannel / 8);
  } else {
    TRACE_EVENT_BEGIN0("audio", "AudioUnitRender");
    result = AudioUnitRender(audio_unit_, flags, time_stamp, bus_number,
                             number_of_frames, &audio_buffer_list_);
    TRACE_EVENT_END0("audio", "AudioUnitRender");
  }

  if (result == noErr) {
    // Update time of successful call to AudioUnitRender().
    last_success_time_ = base::TimeTicks::Now();

    // Deliver recorded data to the consumer as a callback.
    return Provide(number_of_frames, &audio_buffer_list_, time_stamp);
  }

  TRACE_EVENT_INSTANT0("audio", "AudioUnitRender error",
                       TRACE_EVENT_SCOPE_THREAD);
  OSSTATUS_LOG(ERROR, result) << "AudioUnitRender() failed ";

  if (result == kAudioUnitErr_TooManyFramesToProcess ||
      result == kAudioUnitErr_CannotDoInCurrentContext) {
    DCHECK(!last_success_time_.is_null());
    // We delay stopping the stream for kAudioUnitErr_TooManyFramesToProcess
    // since it has been observed that some USB headsets can cause this error
    // but only for a few initial frames at startup and then then the stream
    // returns to a stable state again. See b/19524368 for details.
    // Instead, we measure time since last valid audio frame and call
    // HandleError() only if a too long error sequence is detected. We do
    // this to avoid ending up in a non recoverable bad core audio state.
    // Also including kAudioUnitErr_CannotDoInCurrentContext since long
    // sequences can be produced in combination with e.g. sample-rate changes
    // for input devices.
    if (base::TimeTicks::Now() - last_success_time_ <= kMaxErrorTimeout) {
      // Skip error handling for now.
      return result;
    }

    const char* err = (result == kAudioUnitErr_TooManyFramesToProcess)
                          ? "kAudioUnitErr_TooManyFramesToProcess"
                          : "kAudioUnitErr_CannotDoInCurrentContext";
    LOG(ERROR) << "Too long sequence of " << err << " errors!";
  }

  HandleError(result);
  return result;
}

OSStatus AUAudioInputStream::Provide(UInt32 number_of_frames,
                                     AudioBufferList* io_data,
                                     const AudioTimeStamp* time_stamp) {
  TRACE_EVENT1("audio", "AUAudioInputStream::Provide", "number_of_frames",
               number_of_frames);
  UpdateCaptureTimestamp(time_stamp);
  last_number_of_frames_ = number_of_frames;

  // TODO(grunell): We'll only care about the first buffer size change, any
  // further changes will be ignored. This is in line with output side stats.
  // It would be nice to have all changes reflected in UMA stats.
  if (number_of_frames !=
          static_cast<UInt32>(input_params_.frames_per_buffer()) &&
      number_of_frames_provided_ == 0)
    number_of_frames_provided_ = number_of_frames;

  base::TimeTicks capture_time = GetCaptureTime(time_stamp);

  // The AGC volume level is updated once every second on a separate thread.
  // Note that, |volume| is also updated each time SetVolume() is called
  // through IPC by the render-side AGC.
  double normalized_volume = 0.0;
  GetAgcVolume(&normalized_volume);

  AudioBuffer& buffer = io_data->mBuffers[0];
  uint8_t* audio_data = reinterpret_cast<uint8_t*>(buffer.mData);
  DCHECK(audio_data);
  if (!audio_data)
    return kAudioUnitErr_InvalidElement;

  // Dynamically increase capacity of the FIFO to handle larger buffers from
  // CoreAudio. This can happen in combination with Apple Thunderbolt Displays
  // when the Display Audio is used as capture source and the cable is first
  // remove and then inserted again.
  // See http://www.crbug.com/434681 for details.
  if (static_cast<int>(number_of_frames) > fifo_.GetUnfilledFrames()) {
    // Derive required increase in number of FIFO blocks. The increase is
    // typically one block.
    const int blocks =
        static_cast<int>((number_of_frames - fifo_.GetUnfilledFrames()) /
                         input_params_.frames_per_buffer()) +
        1;
    DLOG(WARNING) << "Increasing FIFO capacity by " << blocks << " blocks";
    TRACE_EVENT_INSTANT1("audio", "Increasing FIFO capacity",
                         TRACE_EVENT_SCOPE_THREAD, "increased by", blocks);
    fifo_.IncreaseCapacity(blocks);
  }

  // Compensate the capture time for the FIFO before pushing an new frames.
  capture_time -= AudioTimestampHelper::FramesToTime(fifo_.GetAvailableFrames(),
                                                     format_.mSampleRate);

  const int bytes_per_sample = format_.mBitsPerChannel / 8;

  peak_detector_.FindPeak(audio_data, number_of_frames, bytes_per_sample);

  // Copy captured (and interleaved) data into FIFO.
  fifo_.Push(audio_data, number_of_frames, bytes_per_sample);

  // Consume and deliver the data when the FIFO has a block of available data.
  while (fifo_.available_blocks()) {
    const AudioBus* audio_bus = fifo_.Consume();
    DCHECK_EQ(audio_bus->frames(),
              static_cast<int>(input_params_.frames_per_buffer()));

    sink_->OnData(audio_bus, capture_time, normalized_volume,
                  glitch_accumulator_.GetAndReset());

    // Move the capture time forward for each vended block.
    capture_time += AudioTimestampHelper::FramesToTime(audio_bus->frames(),
                                                       format_.mSampleRate);
  }

  return noErr;
}

base::TimeTicks AUAudioInputStream::GetCaptureTime(
    const AudioTimeStamp* input_time_stamp) {
  // We must subtract the hardware latency to calculate when the sample was
  // received by the hardware capture device.
  return (input_time_stamp->mFlags & kAudioTimeStampHostTimeValid
              ? base::TimeTicks::FromMachAbsoluteTime(
                    input_time_stamp->mHostTime)
              : base::TimeTicks::Now()) -
         hardware_latency_;
}

bool AUAudioInputStream::IsRunning() {
  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
  if (!audio_unit_)
    return false;
  UInt32 is_running = 0;
  UInt32 size = sizeof(is_running);
  OSStatus error = AudioUnitGetProperty(
      audio_unit_, kAudioOutputUnitProperty_IsRunning, kAudioUnitScope_Global,
      AUElement::OUTPUT, &is_running, &size);
  OSSTATUS_DLOG_IF(ERROR, error != noErr, error)
      << "AudioUnitGetProperty(kAudioOutputUnitProperty_IsRunning) failed";
  DVLOG(1) << " this " << this << " IsRunning: " << is_running;
  return (error == noErr && is_running);
}

void AUAudioInputStream::HandleError(OSStatus err,
                                     const base::Location& location) {
  // Log the latest OSStatus error message and also change the sign of the
  // error if no callbacks are active. I.e., the sign of the error message
  // carries one extra level of information.
  base::UmaHistogramSparse("Media.InputErrorMac",
                           GetInputCallbackIsActive() ? err : (err * -1));
  LOG(ERROR) << "Input error " << logging::DescriptionFromOSStatus(err) << " ("
             << err << ") at line " << location.line_number();
  if (sink_)
    sink_->OnError();
}

void AUAudioInputStream::SetInputCallbackIsActive(bool enabled) {
  base::subtle::Release_Store(&input_callback_is_active_, enabled);
}

bool AUAudioInputStream::GetInputCallbackIsActive() {
  return (base::subtle::Acquire_Load(&input_callback_is_active_) != false);
}

void AUAudioInputStream::CheckInputStartupSuccess() {
  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
  DCHECK(IsRunning());
  // Only add UMA stat related to failing input audio for streams where
  // the AGC has been enabled, e.g. WebRTC audio input streams.
  if (GetAutomaticGainControl()) {
    // Check if we have called Start() and input callbacks have actually
    // started in time as they should. If that is not the case, we have a
    // problem and the stream is considered dead.
    const bool input_callback_is_active = GetInputCallbackIsActive();
    base::UmaHistogramBoolean("Media.Audio.InputStartupSuccessMac",
                              input_callback_is_active);
    DVLOG(1) << __FUNCTION__ << " this " << this
             << "input_callback_is_active: " << input_callback_is_active;
  }
}

void AUAudioInputStream::CloseAudioUnit() {
  DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
  DVLOG(1) << __FUNCTION__ << " this " << this;
  if (!audio_unit_)
    return;
  OSStatus result = AudioUnitUninitialize(audio_unit_);
  OSSTATUS_DLOG_IF(ERROR, result != noErr, result)
      << "AudioUnitUninitialize() failed.";
  result = AudioComponentInstanceDispose(audio_unit_);
  OSSTATUS_DLOG_IF(ERROR, result != noErr, result)
      << "AudioComponentInstanceDispose() failed.";
  audio_unit_ = 0;
}

void AUAudioInputStream::UpdateCaptureTimestamp(
    const AudioTimeStamp* timestamp) {
  if ((timestamp->mFlags & kAudioTimeStampSampleTimeValid) == 0)
    return;

  if (last_sample_time_) {
    DCHECK_NE(0U, last_number_of_frames_);
    UInt32 sample_time_diff =
        static_cast<UInt32>(timestamp->mSampleTime - last_sample_time_);
    DCHECK_GE(sample_time_diff, last_number_of_frames_);
    UInt32 lost_frames = sample_time_diff - last_number_of_frames_;
    base::TimeDelta lost_audio_duration = AudioTimestampHelper::FramesToTime(
        lost_frames, input_params_.sample_rate());
    glitch_reporter_.UpdateStats(lost_audio_duration);
    if (lost_audio_duration.is_positive()) {
      glitch_accumulator_.Add(AudioGlitchInfo::SingleBoundedSystemGlitch(
          lost_audio_duration, AudioGlitchInfo::Direction::kCapture));
    }
  }

  // Store the last sample time for use next time we get called back.
  last_sample_time_ = timestamp->mSampleTime;
}

void AUAudioInputStream::ReportAndResetStats() {
  if (last_sample_time_ == 0)
    return;  // No stats gathered to report.

  // A value of 0 indicates that we got the buffer size we asked for.
  base::UmaHistogramCounts10000("Media.Audio.Capture.FramesProvided",
                                number_of_frames_provided_);

  SystemGlitchReporter::Stats stats =
      glitch_reporter_.GetLongTermStatsAndReset();

  std::string log_message = base::StringPrintf(
      "AU in: (num_glitches_detected=[%d], cumulative_audio_lost=[%llu ms], "
      "largest_glitch=[%llu ms])",
      stats.glitches_detected, stats.total_glitch_duration.InMilliseconds(),
      stats.largest_glitch_duration.InMilliseconds());

  log_callback_.Run(log_message);
  if (stats.glitches_detected != 0) {
    DLOG(WARNING) << log_message;
  }

  number_of_frames_provided_ = 0;
  last_sample_time_ = 0;
  last_number_of_frames_ = 0;
}

// TODO(ossu): Ideally, we'd just use the mono stream directly. However, since
// mono or stereo (may) depend on if we want to run the echo canceller, and
// since we can't provide two sets of AudioParameters for a device, this is the
// best we can do right now.
//
// The algorithm works by copying a sample at offset N to 2*N and 2*N + 1, e.g.:
//  ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
// | a1 | a2 | a3 | b1 | b2 | b3 | c1 | c2 | c3 | -- | -- | -- | -- | -- | ...
//  ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
//  into
//  ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
// | a1 | a2 | a3 | a1 | a2 | a3 | b1 | b2 | b3 | b1 | b2 | b3 | c1 | c2 | ...
//  ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
//
// To support various different sample sizes, this is done byte-by-byte. Only
// the first half of the buffer will be used as input. It is expected to contain
// mono audio. The second half is output only. Since the data is expanding, the
// algorithm starts copying from the last sample. Otherwise it would overwrite
// data not already copied.
void AUAudioInputStream::UpmixMonoToStereoInPlace(AudioBuffer* audio_buffer,
                                                  int bytes_per_sample) {
  constexpr int channels = 2;
  DCHECK_EQ(audio_buffer->mNumberChannels, static_cast<UInt32>(channels));
  const int total_bytes = audio_buffer->mDataByteSize;
  const int frames = total_bytes / bytes_per_sample / channels;
  char* byte_ptr = reinterpret_cast<char*>(audio_buffer->mData);
  for (int i = frames - 1; i >= 0; --i) {
    int in_offset = (bytes_per_sample * i);
    int out_offset = (channels * bytes_per_sample * i);
    for (int b = 0; b < bytes_per_sample; ++b) {
      const char byte = byte_ptr[in_offset + b];
      byte_ptr[out_offset + b] = byte;
      byte_ptr[out_offset + bytes_per_sample + b] = byte;
    }
  }
}

}  // namespace media