chromium/chromeos/ash/services/libassistant/audio/audio_device_owner.cc

// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chromeos/ash/services/libassistant/audio/audio_device_owner.h"

#include <algorithm>
#include <utility>

#include "base/sequence_checker.h"
#include "chromeos/ash/services/libassistant/public/mojom/audio_output_delegate.mojom.h"
#include "media/audio/audio_device_description.h"
#include "media/base/audio_parameters.h"
#include "media/base/limits.h"
#include "services/media_session/public/mojom/media_session.mojom.h"

namespace ash::libassistant {

// A macro which ensures we are running on the background thread.
#define ENSURE_BACKGROUND_THREAD(method, ...)                               \
  if (!background_task_runner_->RunsTasksInCurrentSequence()) {             \
    background_task_runner_->PostTask(                                      \
        FROM_HERE,                                                          \
        base::BindOnce(method, weak_factory_.GetWeakPtr(), ##__VA_ARGS__)); \
    return;                                                                 \
  }

namespace {

// The reduced audio (the ducked track) volume level while listening to user
// speech.
constexpr double kDuckingVolume = 0.2;

constexpr int kNumberOfBuffersPerSec = 10;

int32_t GetBytesPerSample(const assistant_client::OutputStreamFormat& format) {
  switch (format.encoding) {
    case assistant_client::OutputStreamEncoding::STREAM_PCM_S16:
      return 2;
    case assistant_client::OutputStreamEncoding::STREAM_PCM_S32:
    case assistant_client::OutputStreamEncoding::STREAM_PCM_F32:
      return 4;
    default:
      break;
  }
  NOTREACHED_IN_MIGRATION();
  return 1;
}

int32_t GetBytesPerFrame(const assistant_client::OutputStreamFormat& format) {
  return GetBytesPerSample(format) * format.pcm_num_channels;
}

void FillAudioFifoWithDataOfBufferFormat(
    media::AudioBlockFifo* fifo,
    const std::vector<uint8_t>& data,
    const assistant_client::OutputStreamFormat& output_format,
    int num_bytes) {
  int bytes_per_frame = GetBytesPerFrame(output_format);
  int bytes_per_sample = GetBytesPerSample(output_format);
  int frames = num_bytes / bytes_per_frame;
  fifo->Push(data.data(), frames, bytes_per_sample);
}

int32_t GetBufferSizeInBytesFromBufferFormat(
    const assistant_client::OutputStreamFormat& format) {
  return GetBytesPerFrame(format) * format.pcm_sample_rate /
         kNumberOfBuffersPerSec;
}

media::AudioParameters GetAudioParametersFromBufferFormat(
    const assistant_client::OutputStreamFormat& output_format) {
  DCHECK(output_format.pcm_num_channels <= 2 &&
         output_format.pcm_num_channels > 0);

  return media::AudioParameters(
      media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
      media::ChannelLayoutConfig::Guess(output_format.pcm_num_channels),
      output_format.pcm_sample_rate,
      output_format.pcm_sample_rate / kNumberOfBuffersPerSec);
}

}  // namespace

AudioDeviceOwner::AudioDeviceOwner(const std::string& device_id)
    : device_id_(device_id) {}

AudioDeviceOwner::~AudioDeviceOwner() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
}

void AudioDeviceOwner::Start(
    mojom::AudioOutputDelegate* audio_output_delegate,
    assistant_client::AudioOutput::Delegate* delegate,
    mojo::PendingRemote<media::mojom::AudioStreamFactory> stream_factory,
    const assistant_client::OutputStreamFormat& format) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  DCHECK(!output_device_);

  base::AutoLock lock(lock_);

  delegate_ = delegate;
  format_ = format;
  audio_param_ = GetAudioParametersFromBufferFormat(format_);
  audio_data_.resize(GetBufferSizeInBytesFromBufferFormat(format_));
  // |audio_fifo_| contains 8x the number of frames to render.
  audio_fifo_ = std::make_unique<media::AudioBlockFifo>(
      format.pcm_num_channels, audio_param_.frames_per_buffer(), 8);

  // TODO(wutao): There is a bug LibAssistant sends wrong format. Do not run
  // in this case.
  if (format_.pcm_num_channels >
      static_cast<int>(media::limits::kMaxChannels)) {
    delegate_->OnEndOfStream();
    return;
  }

  ScheduleFillLocked(base::TimeTicks::Now());

  // |stream_factory| is null in unittest.
  if (stream_factory)
    StartDevice(std::move(stream_factory), audio_output_delegate);
}

void AudioDeviceOwner::Stop() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  output_device_.reset();
  base::AutoLock lock(lock_);
  if (delegate_) {
    delegate_->OnStopped();
    delegate_ = nullptr;
  }
}

void AudioDeviceOwner::MediaSessionInfoChanged(
    media_session::mojom::MediaSessionInfoPtr info) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  // We only handle media ducking case here as intended. Other media
  // operactions, such as pausing and resuming, are handled by Libassistant
  // |MediaManager| API in |AssistantManagerServiceImpl|.
  const bool is_ducking =
      info->state ==
      media_session::mojom::MediaSessionInfo::SessionState::kDucking;

  if (output_device_)
    output_device_->SetVolume(is_ducking ? kDuckingVolume : 1.0);
}

void AudioDeviceOwner::StartDevice(
    mojo::PendingRemote<media::mojom::AudioStreamFactory> stream_factory,
    mojom::AudioOutputDelegate* audio_output_delegate) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  lock_.AssertAcquired();

  output_device_ = std::make_unique<audio::OutputDevice>(
      std::move(stream_factory), audio_param_, this, device_id_);
  output_device_->Play();

  audio_output_delegate->AddMediaSessionObserver(
      session_receiver_.BindNewPipeAndPassRemote());
}

// Runs on audio renderer thread (started internally in |output_device_|).
int AudioDeviceOwner::Render(base::TimeDelta delay,
                             base::TimeTicks delay_timestamp,
                             const media::AudioGlitchInfo& glitch_info,
                             media::AudioBus* dest) {
  base::AutoLock lock(lock_);

  if (!is_filling_ && audio_fifo_->GetAvailableFrames() <= 0) {
    if (delegate_)
      delegate_->OnEndOfStream();
    return 0;
  }
  if (audio_fifo_->GetAvailableFrames() <= 0) {
    // Wait for the next round of filling. This should only happen at the
    // very beginning.
    return 0;
  }

  int available_frames = audio_fifo_->GetAvailableFrames();
  if (available_frames < dest->frames()) {
    // In our setting, dest->frames() == frames per block in |audio_fifo_|.
    DCHECK_EQ(audio_fifo_->available_blocks(), 0);

    int frames_to_fill = audio_param_.frames_per_buffer() - available_frames;

    DCHECK_GE(frames_to_fill, 0);

    // Fill up to one block with zero data so that |audio_fifo_| has 1 block
    // to consume. This avoids DCHECK in audio_fifo_->Consume() and also
    // prevents garbage data being copied to |dest| in production.
    audio_fifo_->PushSilence(frames_to_fill);
  }

  audio_fifo_->Consume()->CopyTo(dest);

  ScheduleFillLocked(base::TimeTicks::Now() - delay);
  return dest->frames();
}

// Runs on audio renderer thread (started internally in |output_device_|).
void AudioDeviceOwner::OnRenderError() {
  DVLOG(1) << "OnRenderError()";
  base::AutoLock lock(lock_);
  if (delegate_)
    delegate_->OnError(assistant_client::AudioOutput::Error::FATAL_ERROR);
}

void AudioDeviceOwner::SetDelegate(
    assistant_client::AudioOutput::Delegate* delegate) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  base::AutoLock lock(lock_);
  delegate_ = delegate;
}

// Runs on audio renderer thread (started internally in |output_device_|).
void AudioDeviceOwner::ScheduleFillLocked(const base::TimeTicks& time) {
  lock_.AssertAcquired();
  if (is_filling_)
    return;
  is_filling_ = true;
  // FillBuffer will not be called after delegate_->OnEndOfStream, after which
  // AudioDeviceOwner will be destroyed. Thus |this| is valid for capture
  // here.

  if (!delegate_)
    return;

  delegate_->FillBuffer(
      audio_data_.data(),
      std::min(static_cast<int>(audio_data_.size()),
               GetBytesPerFrame(format_) * audio_fifo_->GetUnfilledFrames()),
      time.since_origin().InMicroseconds(),
      [this](int num) { this->BufferFillDone(num); });
}

// Runs on audio renderer thread (started internally in |output_device_|).
void AudioDeviceOwner::BufferFillDone(int num_bytes) {
  base::AutoLock lock(lock_);
  is_filling_ = false;
  if (num_bytes == 0)
    return;
  FillAudioFifoWithDataOfBufferFormat(audio_fifo_.get(), audio_data_, format_,
                                      num_bytes);
  if (audio_fifo_->GetUnfilledFrames() > 0)
    ScheduleFillLocked(base::TimeTicks::Now());
}

}  // namespace ash::libassistant