chromium/chrome/browser/speech/tts_ash.cc

// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chrome/browser/speech/tts_ash.h"

#include <memory>
#include <utility>
#include <vector>

#include "base/functional/bind.h"
#include "base/memory/weak_ptr.h"
#include "base/not_fatal_until.h"
#include "base/notreached.h"
#include "base/unguessable_token.h"
#include "chrome/browser/ash/crosapi/browser_util.h"
#include "chrome/browser/profiles/profile_manager.h"
#include "chrome/browser/speech/crosapi_tts_engine_delegate_ash.h"
#include "chrome/browser/speech/extension_api/tts_engine_extension_api.h"
#include "chrome/browser/speech/tts_crosapi_util.h"
#include "chromeos/crosapi/mojom/tts.mojom.h"
#include "content/public/browser/browser_context.h"
#include "content/public/browser/tts_controller.h"
#include "content/public/browser/tts_utterance.h"
#include "url/gurl.h"

namespace {

// This class is used as the UtteranceEventDelegate for the TtsUtterance
// instance to be procssed by Ash's TtsController which is created for the
// utterance sent from Lacros via crosapi.
// The lifetime of instance of this class is bound to the lifetime of the
// associated TtsUtterance. It will be deleted when the associated TtsUtterance
// receives the final event.
class LacrosUtteranceEventDelegate : public content::UtteranceEventDelegate {
 public:
  LacrosUtteranceEventDelegate(
      int utterance_id,
      int remote_utterance_id,
      mojo::PendingRemote<crosapi::mojom::TtsUtteranceClient> client)
      : utterance_id_(utterance_id),
        remote_utterance_id_(remote_utterance_id),
        client_(std::move(client)) {
    client_.set_disconnect_handler(base::BindOnce(
        &LacrosUtteranceEventDelegate::OnTtsUtteranceClientDisconnected,
        weak_ptr_factory_.GetWeakPtr()));
  }

  LacrosUtteranceEventDelegate(const LacrosUtteranceEventDelegate&) = delete;
  LacrosUtteranceEventDelegate& operator=(const LacrosUtteranceEventDelegate&) =
      delete;
  ~LacrosUtteranceEventDelegate() override = default;

  // content::UtteranceEventDelegate methods:
  void OnTtsEvent(content::TtsUtterance* utterance,
                  content::TtsEventType event_type,
                  int char_index,
                  int char_length,
                  const std::string& error_message) override {
    // Note: If |client_| is disconnected, this will be a no-op.
    client_->OnTtsEvent(tts_crosapi_util::ToMojo(event_type), char_index,
                        char_length, error_message);

    if (utterance->IsFinished())
      delete this;
  }

  int GetRemoteUtteranceId() const { return remote_utterance_id_; }

 private:
  void OnTtsUtteranceClientDisconnected() {
    content::TtsController::GetInstance()->OnTtsUtteranceBecameInvalid(
        utterance_id_);
  }

  // Id of the TtsUtterance to be processed by Ash's TtsController.
  int utterance_id_;

  // Id of the associate TtsUtterace living in Lacros.
  int remote_utterance_id_;

  // Can be used to forward the Tts events back to Lacros, or notify Ash
  // TtsController when the original utterance in Lacros becomes invalid.
  mojo::Remote<crosapi::mojom::TtsUtteranceClient> client_;

  base::WeakPtrFactory<LacrosUtteranceEventDelegate> weak_ptr_factory_{this};
};

// Returns id for the TtsUtterance living in Lacros associated with the given
// |utternace| (in Ash).
// Note: A Lacros utterance has a TtsUtterance object created in Lacros
// which forwards TTS events back to callback function in Lacros; it also
// has a TtsUtterance object created in Ash to be queued in the Ash
// TtsController's utterance queue.
int GetRemoteUtteranceId(content::TtsUtterance* utterance) {
  DCHECK(utterance->ShouldAlwaysBeSpoken());
  LacrosUtteranceEventDelegate* lacros_utterance_event_delegate =
      static_cast<LacrosUtteranceEventDelegate*>(utterance->GetEventDelegate());
  DCHECK(lacros_utterance_event_delegate);
  return lacros_utterance_event_delegate->GetRemoteUtteranceId();
}

}  // namespace

namespace crosapi {

// This is bound as the pending remote of |ash_utterance_client| argument of
// TtsClient::SpeakWithLacrosVoice when TtsAsh::SpeakWithLacrosVoice calls
// this crosapi to send an Ash utterance to Lacros.
// The remote passed to Lacros will call OnTtsEvent to forward the Tts event
// generated by the remote speech engine (in Lacros) to the callback of the
// original TtsUtterance in Ash.
class TtsAsh::TtsUtteranceClient : public mojom::TtsUtteranceClient {
 public:
  TtsUtteranceClient(TtsAsh* owner, int utterance_id)
      : owner_(owner), utterance_id_(utterance_id) {}

  TtsUtteranceClient(const TtsUtteranceClient&) = delete;
  TtsUtteranceClient& operator=(const TtsUtteranceClient&) = delete;
  ~TtsUtteranceClient() override = default;

  // crosapi::mojom::TtsUtteranceClient:
  void OnTtsEvent(crosapi::mojom::TtsEventType mojo_tts_event,
                  uint32_t char_index,
                  uint32_t char_length,
                  const std::string& error_message) override {
    content::TtsEventType event_type =
        tts_crosapi_util::FromMojo(mojo_tts_event);
    content::TtsController::GetInstance()->OnTtsEvent(
        utterance_id_, event_type, char_index, char_length, error_message);

    if (content::IsFinalTtsEventType(event_type)) {
      owner_->DeletePendingAshUtteranceClient(utterance_id_);
      // Note: |this| is deleted at this point.
    }
  }

  mojo::PendingRemote<crosapi::mojom::TtsUtteranceClient>
  BindTtsUtteranceClient() {
    return receiver_.BindNewPipeAndPassRemoteWithVersion();
  }

 private:
  // TtsAsh is responsible for creating and destroying the instances of this
  // class, and its lifetime is guaranteed to outlive |this|.
  const raw_ptr<TtsAsh> owner_;  // not owned.
  //  Id of the original TtsUtterance instance owned by Ash's TtsController.
  int utterance_id_;
  mojo::Receiver<crosapi::mojom::TtsUtteranceClient> receiver_{this};
};

TtsAsh::TtsAsh(ProfileManager* profile_manager)
    : profile_manager_(profile_manager),
      primary_profile_browser_context_id_(base::UnguessableToken::Null()) {
  DCHECK(profile_manager_);
  profile_manager_observation_.Observe(profile_manager);
  voices_changed_observation_.Observe(content::TtsController::GetInstance());
}

TtsAsh::~TtsAsh() = default;

void TtsAsh::BindReceiver(mojo::PendingReceiver<mojom::Tts> pending_receiver) {
  receivers_.Add(this, std::move(pending_receiver));
}

bool TtsAsh::HasTtsClient() const {
  return tts_clients_.size() > 0;
}

base::UnguessableToken TtsAsh::GetPrimaryProfileBrowserContextId() const {
  return primary_profile_browser_context_id_;
}

void TtsAsh::RegisterTtsClient(mojo::PendingRemote<mojom::TtsClient> client,
                               const base::UnguessableToken& browser_context_id,
                               bool from_primary_profile) {
  CHECK(tts_crosapi_util::ShouldEnableLacrosTtsSupport());
  DCHECK(from_primary_profile);
  if (from_primary_profile)
    primary_profile_browser_context_id_ = browser_context_id;

  mojo::Remote<mojom::TtsClient> remote(std::move(client));
  remote.set_disconnect_handler(base::BindOnce(&TtsAsh::TtsClientDisconnected,
                                               weak_ptr_factory_.GetWeakPtr(),
                                               browser_context_id));
  tts_clients_.emplace(browser_context_id, std::move(remote));
}

void TtsAsh::VoicesChanged(const base::UnguessableToken& browser_context_id,
                           std::vector<mojom::TtsVoicePtr> lacros_voices) {
  if (!HasTtsClient())
    return;

  // TODO(crbug.com/40792881): Support secondary profile.
  DCHECK(browser_context_id == primary_profile_browser_context_id_);

  std::vector<content::VoiceData> voices;
  for (const auto& mojo_voice : lacros_voices)
    voices.push_back(tts_crosapi_util::FromMojo(mojo_voice));

  // Cache Lacros voices.
  crosapi_voices_[browser_context_id] = std::move(voices);

  // Notify TtsController about VoicesChanged.
  content::TtsController::GetInstance()->VoicesChanged();
}

void TtsAsh::SpeakOrEnqueue(
    mojom::TtsUtterancePtr mojo_utterance,
    mojo::PendingRemote<mojom::TtsUtteranceClient> utterance_client) {
  std::unique_ptr<content::TtsUtterance> lacros_utterance =
      tts_crosapi_util::CreateUtteranceFromMojo(
          mojo_utterance, /*should_always_be_spoken=*/true);
  lacros_utterance->SetEventDelegate(new LacrosUtteranceEventDelegate(
      lacros_utterance->GetId(), mojo_utterance->utterance_id,
      std::move(utterance_client)));

  content::TtsController::GetInstance()->SpeakOrEnqueue(
      std::move(lacros_utterance));
}

void TtsAsh::Stop(const GURL& source_url) {
  content::TtsController::GetInstance()->Stop(source_url);
}

void TtsAsh::Pause() {
  content::TtsController::GetInstance()->Pause();
}

void TtsAsh::Resume() {
  content::TtsController::GetInstance()->Resume();
}

void TtsAsh::IsSpeaking(IsSpeakingCallback callback) {
  std::move(callback).Run(content::TtsController::GetInstance()->IsSpeaking());
}

void TtsAsh::SpeakWithLacrosVoice(content::TtsUtterance* utterance,
                                  const content::VoiceData& voice) {
  if (!HasTtsClient())
    return;

  DCHECK(voice.from_remote_tts_engine);
  auto mojo_voice = tts_crosapi_util::ToMojo(voice);
  auto mojo_utterance = tts_crosapi_util::ToMojo(utterance);
  // TODO(crbug.com/40792881): Add secondary profile Tts support for lacros.
  base::UnguessableToken browser_context_id =
      GetPrimaryProfileBrowserContextId();
  mojo_utterance->browser_context_id = browser_context_id;
  auto item = tts_clients_.find(browser_context_id);
  CHECK(item != tts_clients_.end(), base::NotFatalUntil::M130);
  auto& tts_client = item->second;
  // Note: TtsUtterance::ShouldAlwaysBeSpoken() is a misleading name. It should
  // be renamed as TtsUtterance::FromExternalPlatform(), which indicates whether
  // the utterance is originated from an external platform(Lacros) or not (Ash).
  // TODO(crbug.com/40189267): Rename TtsUtterance::ShouldAlwaysBeSpoken().
  if (utterance->ShouldAlwaysBeSpoken()) {
    // Speak Lacros utterance.
    mojo_utterance->utterance_id = GetRemoteUtteranceId(utterance);

    // Don't need to pass utterance text back to Lacros via crosapi, since its
    // associated TtsUtterance object living in Lacros already has it.
    mojo_utterance->text = "";

    tts_client->SpeakWithLacrosVoice(std::move(mojo_utterance),
                                     std::move(mojo_voice),
                                     /*ash_utterance_client=*/{});
  } else {
    // Speak Ash utterance.
    int utterance_id = utterance->GetId();
    auto pending_utterance_client =
        std::make_unique<TtsUtteranceClient>(this, utterance_id);
    tts_client->SpeakWithLacrosVoice(
        std::move(mojo_utterance), std::move(mojo_voice),
        pending_utterance_client->BindTtsUtteranceClient());
    DCHECK_EQ(pending_ash_utterance_clients_.size(), 0u);
    pending_ash_utterance_clients_.emplace(utterance_id,
                                           std::move(pending_utterance_client));
  }
}

void TtsAsh::StopRemoteEngine(content::TtsUtterance* utterance) {
  if (!utterance->ShouldAlwaysBeSpoken()) {
    // When an Ash utterance being spoken by a remote speech engine (in Lacros)
    // should be stopped due to tts.Stop or its associated WebContents being
    // destroyed, the TtsUtterance instance owned by Ash's TtsController will be
    // deleted. The pending utterance client should also be deleted, which will
    // trigger its disconnect handler in Lacros to notify that the utterance
    // has become invalid in Ash.
    DeletePendingAshUtteranceClient(utterance->GetId());
  }
  auto item = tts_clients_.find(GetPrimaryProfileBrowserContextId());
  CHECK(item != tts_clients_.end(), base::NotFatalUntil::M130);
  item->second->Stop(utterance->GetEngineId());
}

void TtsAsh::PauseRemoteEngine(content::TtsUtterance* utterance) {
  auto item = tts_clients_.find(GetPrimaryProfileBrowserContextId());
  CHECK(item != tts_clients_.end(), base::NotFatalUntil::M130);
  item->second->Pause(utterance->GetEngineId());
}

void TtsAsh::ResumeRemoteEngine(content::TtsUtterance* utterance) {
  auto item = tts_clients_.find(GetPrimaryProfileBrowserContextId());
  CHECK(item != tts_clients_.end(), base::NotFatalUntil::M130);
  item->second->Resume(utterance->GetEngineId());
}

void TtsAsh::GetCrosapiVoices(base::UnguessableToken browser_context_id,
                              std::vector<content::VoiceData>* out_voices) {
  // Returns the cached Lacros voices.
  auto it_voices = crosapi_voices_.find(browser_context_id);
  if (it_voices != crosapi_voices_.end()) {
    for (auto voice : it_voices->second) {
      out_voices->push_back(voice);
    }
  }
}

void TtsAsh::DeletePendingAshUtteranceClient(int utterance_id) {
  pending_ash_utterance_clients_.erase(utterance_id);
  DCHECK(pending_ash_utterance_clients_.empty());
}

void TtsAsh::OnVoicesChanged() {
  if (!HasTtsClient())
    return;

  // Notify Lacros about voices change in Ash's TtsController.
  // TtsController in ash manages all the voices from both Ash and Lacros,
  // which is the ultimate truth of source to return all the voices when
  // asked by Lacros.
  std::vector<content::VoiceData> all_voices;
  content::TtsController::GetInstance()->GetVoices(
      ProfileManager::GetActiveUserProfile(), GURL(), &all_voices);

  // Convert to mojo voices.
  std::vector<crosapi::mojom::TtsVoicePtr> mojo_voices;
  for (const auto& voice : all_voices)
    mojo_voices.push_back(tts_crosapi_util::ToMojo(voice));

  auto item = tts_clients_.find(primary_profile_browser_context_id_);
  CHECK(item != tts_clients_.end(), base::NotFatalUntil::M130);
  item->second->VoicesChanged(std::move(mojo_voices));
}

void TtsAsh::OnProfileAdded(Profile* profile) {
  if (tts_crosapi_util::ShouldEnableLacrosTtsSupport()) {
    content::TtsController::GetInstance()->SetRemoteTtsEngineDelegate(
        CrosapiTtsEngineDelegateAsh::GetInstance());
  }
}

void TtsAsh::OnProfileManagerDestroying() {
  profile_manager_observation_.Reset();
  profile_manager_ = nullptr;
}

void TtsAsh::TtsClientDisconnected(
    const base::UnguessableToken& browser_context_id) {
  tts_clients_.erase(browser_context_id);
  if (browser_context_id == primary_profile_browser_context_id_)
    primary_profile_browser_context_id_ = base::UnguessableToken::Null();

  // Remove the cached lacros voices.
  size_t erase_count = crosapi_voices_.erase(browser_context_id);
  if (erase_count > 0)
    content::TtsController::GetInstance()->VoicesChanged();
}

}  // namespace crosapi