// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/ash/accessibility/service/tts_client_impl.h"
#include "content/public/browser/browser_context.h"
#include "content/public/browser/tts_controller.h"
#include "services/accessibility/public/mojom/tts.mojom.h"
#include "ui/base/l10n/l10n_util.h"
namespace ash {
namespace {
// The max utterance length allowed by the TTS extension API.
const int kMaxUtteranceLength = 32768;
// The minimum speech rate allowed by the TTS extension API.
const double kMinRate = 0.1;
// The maximum speech rate allowed by the TTS extension API.
const double kMaxRate = 10.0;
// The maximum speech pitch allowed by the TTS extension API.
const double kMaxPitch = 2.0;
// The maximum speech volume allowed by the TTS extension API.
const double kMaxVolume = 1.0;
ax::mojom::TtsEventType ToMojo(content::TtsEventType event_type) {
switch (event_type) {
case content::TTS_EVENT_START:
return ax::mojom::TtsEventType::kStart;
case content::TTS_EVENT_END:
return ax::mojom::TtsEventType::kEnd;
case content::TTS_EVENT_WORD:
return ax::mojom::TtsEventType::kWord;
case content::TTS_EVENT_SENTENCE:
return ax::mojom::TtsEventType::kSentence;
case content::TTS_EVENT_MARKER:
return ax::mojom::TtsEventType::kMarker;
case content::TTS_EVENT_INTERRUPTED:
return ax::mojom::TtsEventType::kInterrupted;
case content::TTS_EVENT_CANCELLED:
return ax::mojom::TtsEventType::kCancelled;
case content::TTS_EVENT_ERROR:
return ax::mojom::TtsEventType::kError;
case content::TTS_EVENT_PAUSE:
return ax::mojom::TtsEventType::kPause;
case content::TTS_EVENT_RESUME:
return ax::mojom::TtsEventType::kResume;
}
}
// Self-owned, compare to TtsExtensionEventHandler.
class AtpTtsEventHandler : public content::UtteranceEventDelegate {
public:
// static creator deals with "new" so clients don't have to think about it.
static AtpTtsEventHandler* Create() { return new AtpTtsEventHandler(); }
~AtpTtsEventHandler() override = default;
AtpTtsEventHandler(const AtpTtsEventHandler&) = delete;
AtpTtsEventHandler& operator=(const AtpTtsEventHandler&) = delete;
// content::UtteranceEventDelegate:
void OnTtsEvent(content::TtsUtterance* utterance,
content::TtsEventType event_type,
int char_index,
int length,
const std::string& error_message) override {
auto mojom_event = ax::mojom::TtsEvent::New();
mojom_event->type = ToMojo(event_type);
mojom_event->char_index = char_index;
mojom_event->length = length;
mojom_event->is_final = utterance->IsFinished();
if (event_type == content::TTS_EVENT_ERROR) {
mojom_event->error_message = error_message;
}
utterance_client_->OnEvent(std::move(mojom_event));
if (utterance->IsFinished()) {
// Expected to self-destroy on call to TtsEvent, see
// tts_utterance_impl.cc.
delete this;
}
}
mojo::PendingReceiver<ax::mojom::TtsUtteranceClient> PassReceiver() {
return utterance_client_.BindNewPipeAndPassReceiver();
}
private:
AtpTtsEventHandler() = default;
mojo::Remote<ax::mojom::TtsUtteranceClient> utterance_client_;
};
} // namespace
TtsClientImpl::TtsClientImpl(content::BrowserContext* profile)
: profile_(profile) {
CHECK(profile_);
}
TtsClientImpl::~TtsClientImpl() = default;
void TtsClientImpl::Bind(mojo::PendingReceiver<Tts> tts_receiver) {
tts_receivers_.Add(this, std::move(tts_receiver));
}
void TtsClientImpl::Speak(const std::string& utterance,
ax::mojom::TtsOptionsPtr options,
SpeakCallback callback) {
auto result = ax::mojom::TtsSpeakResult::New();
if (utterance.size() > kMaxUtteranceLength) {
result->error = ax::mojom::TtsError::kErrorUtteranceTooLong;
std::move(callback).Run(std::move(result));
return;
}
// Check for errors in options.
// TODO(crbug.com/41278287): Centralize the struct validation.
if (options->rate < kMinRate || options->rate > kMaxRate) {
result->error = ax::mojom::TtsError::kErrorInvalidRate;
std::move(callback).Run(std::move(result));
return;
}
if (options->pitch < 0.0 || options->pitch > kMaxPitch) {
result->error = ax::mojom::TtsError::kErrorInvalidPitch;
std::move(callback).Run(std::move(result));
return;
}
if (options->volume < 0.0 || options->volume > kMaxVolume) {
result->error = ax::mojom::TtsError::kErrorInvalidVolume;
std::move(callback).Run(std::move(result));
return;
}
// Only make the utterance once we know we aren't going to return early.
std::unique_ptr<content::TtsUtterance> tts_utterance =
content::TtsUtterance::Create(profile_);
tts_utterance->SetText(utterance);
// TODO(b:277221897): Pass a fake GURL matching the ash extension URL.
// This will support both UMA and using enhanced network voices in ATP
// select-to-speak.
tts_utterance->SetSrcUrl(GURL(""));
tts_utterance->SetContinuousParameters(options->rate, options->pitch,
options->volume);
tts_utterance->SetShouldClearQueue(!options->enqueue);
if (options->lang) {
std::string lang = options->lang.value();
if (!lang.empty() && !l10n_util::IsValidLocaleSyntax(lang)) {
result->error = ax::mojom::TtsError::kErrorInvalidLang;
std::move(callback).Run(std::move(result));
return;
}
tts_utterance->SetLang(options->lang.value());
}
if (options->voice_name) {
tts_utterance->SetVoiceName(options->voice_name.value());
}
if (options->engine_id) {
tts_utterance->SetEngineId(options->engine_id.value());
}
if (options->on_event) {
auto* atpTtsEventHandler = AtpTtsEventHandler::Create();
result->utterance_client = atpTtsEventHandler->PassReceiver();
tts_utterance->SetEventDelegate(atpTtsEventHandler);
}
// Note: we don't need desired/required event types because they aren't
// passed by ChromeVox or STS. We don't need an options_dict, it's redundant,
// and we don't need a src_id because each ATP utterance has its own callback.
// Send the callback back to ATP with the utterance client.
result->error = ax::mojom::TtsError::kNoError;
std::move(callback).Run(std::move(result));
// Start speech.
content::TtsController* controller = content::TtsController::GetInstance();
controller->SpeakOrEnqueue(std::move(tts_utterance));
}
void TtsClientImpl::Stop() {
content::TtsController* controller = content::TtsController::GetInstance();
// TODO(b:277221897): Pass a fake GURL matching the ash extension URL so that
// extensions cannot clobber other speech.
controller->Stop(GURL(""));
}
void TtsClientImpl::Pause() {
content::TtsController::GetInstance()->Pause();
}
void TtsClientImpl::Resume() {
content::TtsController::GetInstance()->Resume();
}
void TtsClientImpl::IsSpeaking(IsSpeakingCallback callback) {
std::move(callback).Run(content::TtsController::GetInstance()->IsSpeaking());
}
void TtsClientImpl::GetVoices(GetVoicesCallback callback) {
std::vector<content::VoiceData> voices;
// TODO(b:277221897): Pass a fake GURL matching the extension URL so that
// Select to Speak can get the enhanced network voices.
content::TtsController::GetInstance()->GetVoices(profile_, GURL(""), &voices);
std::vector<ax::mojom::TtsVoicePtr> results;
for (auto& voice : voices) {
auto result = ax::mojom::TtsVoice::New();
result->voice_name = voice.name;
result->lang = voice.lang;
result->remote = voice.remote;
result->engine_id = voice.engine_id;
if (!voice.events.empty()) {
result->event_types = std::vector<ax::mojom::TtsEventType>();
for (auto type : voice.events) {
result->event_types->emplace_back(ToMojo(type));
}
}
results.emplace_back(std::move(result));
}
std::move(callback).Run(std::move(results));
}
} // namespace ash