// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/speech/tts_ash.h"
#include <memory>
#include <utility>
#include <vector>
#include "base/functional/bind.h"
#include "base/memory/weak_ptr.h"
#include "base/not_fatal_until.h"
#include "base/notreached.h"
#include "base/unguessable_token.h"
#include "chrome/browser/ash/crosapi/browser_util.h"
#include "chrome/browser/profiles/profile_manager.h"
#include "chrome/browser/speech/crosapi_tts_engine_delegate_ash.h"
#include "chrome/browser/speech/extension_api/tts_engine_extension_api.h"
#include "chrome/browser/speech/tts_crosapi_util.h"
#include "chromeos/crosapi/mojom/tts.mojom.h"
#include "content/public/browser/browser_context.h"
#include "content/public/browser/tts_controller.h"
#include "content/public/browser/tts_utterance.h"
#include "url/gurl.h"
namespace {
// This class is used as the UtteranceEventDelegate for the TtsUtterance
// instance to be procssed by Ash's TtsController which is created for the
// utterance sent from Lacros via crosapi.
// The lifetime of instance of this class is bound to the lifetime of the
// associated TtsUtterance. It will be deleted when the associated TtsUtterance
// receives the final event.
class LacrosUtteranceEventDelegate : public content::UtteranceEventDelegate {
public:
LacrosUtteranceEventDelegate(
int utterance_id,
int remote_utterance_id,
mojo::PendingRemote<crosapi::mojom::TtsUtteranceClient> client)
: utterance_id_(utterance_id),
remote_utterance_id_(remote_utterance_id),
client_(std::move(client)) {
client_.set_disconnect_handler(base::BindOnce(
&LacrosUtteranceEventDelegate::OnTtsUtteranceClientDisconnected,
weak_ptr_factory_.GetWeakPtr()));
}
LacrosUtteranceEventDelegate(const LacrosUtteranceEventDelegate&) = delete;
LacrosUtteranceEventDelegate& operator=(const LacrosUtteranceEventDelegate&) =
delete;
~LacrosUtteranceEventDelegate() override = default;
// content::UtteranceEventDelegate methods:
void OnTtsEvent(content::TtsUtterance* utterance,
content::TtsEventType event_type,
int char_index,
int char_length,
const std::string& error_message) override {
// Note: If |client_| is disconnected, this will be a no-op.
client_->OnTtsEvent(tts_crosapi_util::ToMojo(event_type), char_index,
char_length, error_message);
if (utterance->IsFinished())
delete this;
}
int GetRemoteUtteranceId() const { return remote_utterance_id_; }
private:
void OnTtsUtteranceClientDisconnected() {
content::TtsController::GetInstance()->OnTtsUtteranceBecameInvalid(
utterance_id_);
}
// Id of the TtsUtterance to be processed by Ash's TtsController.
int utterance_id_;
// Id of the associate TtsUtterace living in Lacros.
int remote_utterance_id_;
// Can be used to forward the Tts events back to Lacros, or notify Ash
// TtsController when the original utterance in Lacros becomes invalid.
mojo::Remote<crosapi::mojom::TtsUtteranceClient> client_;
base::WeakPtrFactory<LacrosUtteranceEventDelegate> weak_ptr_factory_{this};
};
// Returns id for the TtsUtterance living in Lacros associated with the given
// |utternace| (in Ash).
// Note: A Lacros utterance has a TtsUtterance object created in Lacros
// which forwards TTS events back to callback function in Lacros; it also
// has a TtsUtterance object created in Ash to be queued in the Ash
// TtsController's utterance queue.
int GetRemoteUtteranceId(content::TtsUtterance* utterance) {
DCHECK(utterance->ShouldAlwaysBeSpoken());
LacrosUtteranceEventDelegate* lacros_utterance_event_delegate =
static_cast<LacrosUtteranceEventDelegate*>(utterance->GetEventDelegate());
DCHECK(lacros_utterance_event_delegate);
return lacros_utterance_event_delegate->GetRemoteUtteranceId();
}
} // namespace
namespace crosapi {
// This is bound as the pending remote of |ash_utterance_client| argument of
// TtsClient::SpeakWithLacrosVoice when TtsAsh::SpeakWithLacrosVoice calls
// this crosapi to send an Ash utterance to Lacros.
// The remote passed to Lacros will call OnTtsEvent to forward the Tts event
// generated by the remote speech engine (in Lacros) to the callback of the
// original TtsUtterance in Ash.
class TtsAsh::TtsUtteranceClient : public mojom::TtsUtteranceClient {
public:
TtsUtteranceClient(TtsAsh* owner, int utterance_id)
: owner_(owner), utterance_id_(utterance_id) {}
TtsUtteranceClient(const TtsUtteranceClient&) = delete;
TtsUtteranceClient& operator=(const TtsUtteranceClient&) = delete;
~TtsUtteranceClient() override = default;
// crosapi::mojom::TtsUtteranceClient:
void OnTtsEvent(crosapi::mojom::TtsEventType mojo_tts_event,
uint32_t char_index,
uint32_t char_length,
const std::string& error_message) override {
content::TtsEventType event_type =
tts_crosapi_util::FromMojo(mojo_tts_event);
content::TtsController::GetInstance()->OnTtsEvent(
utterance_id_, event_type, char_index, char_length, error_message);
if (content::IsFinalTtsEventType(event_type)) {
owner_->DeletePendingAshUtteranceClient(utterance_id_);
// Note: |this| is deleted at this point.
}
}
mojo::PendingRemote<crosapi::mojom::TtsUtteranceClient>
BindTtsUtteranceClient() {
return receiver_.BindNewPipeAndPassRemoteWithVersion();
}
private:
// TtsAsh is responsible for creating and destroying the instances of this
// class, and its lifetime is guaranteed to outlive |this|.
const raw_ptr<TtsAsh> owner_; // not owned.
// Id of the original TtsUtterance instance owned by Ash's TtsController.
int utterance_id_;
mojo::Receiver<crosapi::mojom::TtsUtteranceClient> receiver_{this};
};
TtsAsh::TtsAsh(ProfileManager* profile_manager)
: profile_manager_(profile_manager),
primary_profile_browser_context_id_(base::UnguessableToken::Null()) {
DCHECK(profile_manager_);
profile_manager_observation_.Observe(profile_manager);
voices_changed_observation_.Observe(content::TtsController::GetInstance());
}
TtsAsh::~TtsAsh() = default;
void TtsAsh::BindReceiver(mojo::PendingReceiver<mojom::Tts> pending_receiver) {
receivers_.Add(this, std::move(pending_receiver));
}
bool TtsAsh::HasTtsClient() const {
return tts_clients_.size() > 0;
}
base::UnguessableToken TtsAsh::GetPrimaryProfileBrowserContextId() const {
return primary_profile_browser_context_id_;
}
void TtsAsh::RegisterTtsClient(mojo::PendingRemote<mojom::TtsClient> client,
const base::UnguessableToken& browser_context_id,
bool from_primary_profile) {
CHECK(tts_crosapi_util::ShouldEnableLacrosTtsSupport());
DCHECK(from_primary_profile);
if (from_primary_profile)
primary_profile_browser_context_id_ = browser_context_id;
mojo::Remote<mojom::TtsClient> remote(std::move(client));
remote.set_disconnect_handler(base::BindOnce(&TtsAsh::TtsClientDisconnected,
weak_ptr_factory_.GetWeakPtr(),
browser_context_id));
tts_clients_.emplace(browser_context_id, std::move(remote));
}
void TtsAsh::VoicesChanged(const base::UnguessableToken& browser_context_id,
std::vector<mojom::TtsVoicePtr> lacros_voices) {
if (!HasTtsClient())
return;
// TODO(crbug.com/40792881): Support secondary profile.
DCHECK(browser_context_id == primary_profile_browser_context_id_);
std::vector<content::VoiceData> voices;
for (const auto& mojo_voice : lacros_voices)
voices.push_back(tts_crosapi_util::FromMojo(mojo_voice));
// Cache Lacros voices.
crosapi_voices_[browser_context_id] = std::move(voices);
// Notify TtsController about VoicesChanged.
content::TtsController::GetInstance()->VoicesChanged();
}
void TtsAsh::SpeakOrEnqueue(
mojom::TtsUtterancePtr mojo_utterance,
mojo::PendingRemote<mojom::TtsUtteranceClient> utterance_client) {
std::unique_ptr<content::TtsUtterance> lacros_utterance =
tts_crosapi_util::CreateUtteranceFromMojo(
mojo_utterance, /*should_always_be_spoken=*/true);
lacros_utterance->SetEventDelegate(new LacrosUtteranceEventDelegate(
lacros_utterance->GetId(), mojo_utterance->utterance_id,
std::move(utterance_client)));
content::TtsController::GetInstance()->SpeakOrEnqueue(
std::move(lacros_utterance));
}
void TtsAsh::Stop(const GURL& source_url) {
content::TtsController::GetInstance()->Stop(source_url);
}
void TtsAsh::Pause() {
content::TtsController::GetInstance()->Pause();
}
void TtsAsh::Resume() {
content::TtsController::GetInstance()->Resume();
}
void TtsAsh::IsSpeaking(IsSpeakingCallback callback) {
std::move(callback).Run(content::TtsController::GetInstance()->IsSpeaking());
}
void TtsAsh::SpeakWithLacrosVoice(content::TtsUtterance* utterance,
const content::VoiceData& voice) {
if (!HasTtsClient())
return;
DCHECK(voice.from_remote_tts_engine);
auto mojo_voice = tts_crosapi_util::ToMojo(voice);
auto mojo_utterance = tts_crosapi_util::ToMojo(utterance);
// TODO(crbug.com/40792881): Add secondary profile Tts support for lacros.
base::UnguessableToken browser_context_id =
GetPrimaryProfileBrowserContextId();
mojo_utterance->browser_context_id = browser_context_id;
auto item = tts_clients_.find(browser_context_id);
CHECK(item != tts_clients_.end(), base::NotFatalUntil::M130);
auto& tts_client = item->second;
// Note: TtsUtterance::ShouldAlwaysBeSpoken() is a misleading name. It should
// be renamed as TtsUtterance::FromExternalPlatform(), which indicates whether
// the utterance is originated from an external platform(Lacros) or not (Ash).
// TODO(crbug.com/40189267): Rename TtsUtterance::ShouldAlwaysBeSpoken().
if (utterance->ShouldAlwaysBeSpoken()) {
// Speak Lacros utterance.
mojo_utterance->utterance_id = GetRemoteUtteranceId(utterance);
// Don't need to pass utterance text back to Lacros via crosapi, since its
// associated TtsUtterance object living in Lacros already has it.
mojo_utterance->text = "";
tts_client->SpeakWithLacrosVoice(std::move(mojo_utterance),
std::move(mojo_voice),
/*ash_utterance_client=*/{});
} else {
// Speak Ash utterance.
int utterance_id = utterance->GetId();
auto pending_utterance_client =
std::make_unique<TtsUtteranceClient>(this, utterance_id);
tts_client->SpeakWithLacrosVoice(
std::move(mojo_utterance), std::move(mojo_voice),
pending_utterance_client->BindTtsUtteranceClient());
DCHECK_EQ(pending_ash_utterance_clients_.size(), 0u);
pending_ash_utterance_clients_.emplace(utterance_id,
std::move(pending_utterance_client));
}
}
void TtsAsh::StopRemoteEngine(content::TtsUtterance* utterance) {
if (!utterance->ShouldAlwaysBeSpoken()) {
// When an Ash utterance being spoken by a remote speech engine (in Lacros)
// should be stopped due to tts.Stop or its associated WebContents being
// destroyed, the TtsUtterance instance owned by Ash's TtsController will be
// deleted. The pending utterance client should also be deleted, which will
// trigger its disconnect handler in Lacros to notify that the utterance
// has become invalid in Ash.
DeletePendingAshUtteranceClient(utterance->GetId());
}
auto item = tts_clients_.find(GetPrimaryProfileBrowserContextId());
CHECK(item != tts_clients_.end(), base::NotFatalUntil::M130);
item->second->Stop(utterance->GetEngineId());
}
void TtsAsh::PauseRemoteEngine(content::TtsUtterance* utterance) {
auto item = tts_clients_.find(GetPrimaryProfileBrowserContextId());
CHECK(item != tts_clients_.end(), base::NotFatalUntil::M130);
item->second->Pause(utterance->GetEngineId());
}
void TtsAsh::ResumeRemoteEngine(content::TtsUtterance* utterance) {
auto item = tts_clients_.find(GetPrimaryProfileBrowserContextId());
CHECK(item != tts_clients_.end(), base::NotFatalUntil::M130);
item->second->Resume(utterance->GetEngineId());
}
void TtsAsh::GetCrosapiVoices(base::UnguessableToken browser_context_id,
std::vector<content::VoiceData>* out_voices) {
// Returns the cached Lacros voices.
auto it_voices = crosapi_voices_.find(browser_context_id);
if (it_voices != crosapi_voices_.end()) {
for (auto voice : it_voices->second) {
out_voices->push_back(voice);
}
}
}
void TtsAsh::DeletePendingAshUtteranceClient(int utterance_id) {
pending_ash_utterance_clients_.erase(utterance_id);
DCHECK(pending_ash_utterance_clients_.empty());
}
void TtsAsh::OnVoicesChanged() {
if (!HasTtsClient())
return;
// Notify Lacros about voices change in Ash's TtsController.
// TtsController in ash manages all the voices from both Ash and Lacros,
// which is the ultimate truth of source to return all the voices when
// asked by Lacros.
std::vector<content::VoiceData> all_voices;
content::TtsController::GetInstance()->GetVoices(
ProfileManager::GetActiveUserProfile(), GURL(), &all_voices);
// Convert to mojo voices.
std::vector<crosapi::mojom::TtsVoicePtr> mojo_voices;
for (const auto& voice : all_voices)
mojo_voices.push_back(tts_crosapi_util::ToMojo(voice));
auto item = tts_clients_.find(primary_profile_browser_context_id_);
CHECK(item != tts_clients_.end(), base::NotFatalUntil::M130);
item->second->VoicesChanged(std::move(mojo_voices));
}
void TtsAsh::OnProfileAdded(Profile* profile) {
if (tts_crosapi_util::ShouldEnableLacrosTtsSupport()) {
content::TtsController::GetInstance()->SetRemoteTtsEngineDelegate(
CrosapiTtsEngineDelegateAsh::GetInstance());
}
}
void TtsAsh::OnProfileManagerDestroying() {
profile_manager_observation_.Reset();
profile_manager_ = nullptr;
}
void TtsAsh::TtsClientDisconnected(
const base::UnguessableToken& browser_context_id) {
tts_clients_.erase(browser_context_id);
if (browser_context_id == primary_profile_browser_context_id_)
primary_profile_browser_context_id_ = base::UnguessableToken::Null();
// Remove the cached lacros voices.
size_t erase_count = crosapi_voices_.erase(browser_context_id);
if (erase_count > 0)
content::TtsController::GetInstance()->VoicesChanged();
}
} // namespace crosapi