// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROMEOS_ASH_COMPONENTS_ENHANCED_NETWORK_TTS_ENHANCED_NETWORK_TTS_IMPL_H_
#define CHROMEOS_ASH_COMPONENTS_ENHANCED_NETWORK_TTS_ENHANCED_NETWORK_TTS_IMPL_H_
#include <list>
#include <memory>
#include <optional>
#include <string>
#include "base/component_export.h"
#include "base/feature_list.h"
#include "base/memory/scoped_refptr.h"
#include "base/memory/weak_ptr.h"
#include "base/metrics/field_trial_params.h"
#include "base/values.h"
#include "chromeos/ash/components/enhanced_network_tts/mojom/enhanced_network_tts.mojom.h"
#include "mojo/public/cpp/bindings/receiver.h"
#include "mojo/public/cpp/bindings/remote.h"
#include "services/data_decoder/public/cpp/data_decoder.h"
#include "services/network/public/cpp/shared_url_loader_factory.h"
#include "services/network/public/cpp/simple_url_loader.h"
namespace ash::enhanced_network_tts {
// Whether or not to override enhanced TTS params.
BASE_DECLARE_FEATURE(kEnhancedNetworkTtsOverride);
// The implementation of the enhanced network text-to-speech mojom receiver.
// The remote of this mojom pipe will be invoked from the enhanced network tts
// JS extension. This receiver fetches audio data on behalf of the remote. The
// audio data is generated by Google's Speakr API.
class COMPONENT_EXPORT(CHROMEOS_ASH_COMPONENTS_ENHANCED_NETWORK_TTS)
EnhancedNetworkTtsImpl : public mojom::EnhancedNetworkTts {
public:
// Getter for the singleton.
static EnhancedNetworkTtsImpl& GetInstance();
EnhancedNetworkTtsImpl();
EnhancedNetworkTtsImpl(const EnhancedNetworkTtsImpl&) = delete;
void operator=(const EnhancedNetworkTtsImpl&) = delete;
~EnhancedNetworkTtsImpl() override;
// Binds a pending receiver and a url factory.
void BindReceiverAndURLFactory(
mojo::PendingReceiver<mojom::EnhancedNetworkTts> receiver,
scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory);
// mojom::EnhancedNetworkTts:
void GetAudioData(mojom::TtsRequestPtr request,
GetAudioDataCallback callback) override;
// Set the character limit of text piece in each |ServerRequest|. Unit tests
// can use this method to modify the limit. Otherwise, the limit is set to
// |mojom::kEnhancedNetworkTtsMaxCharacterSize|.
void SetCharLimitPerRequestForTesting(int limit);
private:
// An input utterance may be chopped into several text pieces, which will be
// sent over several |ServerRequest|. A |ServerRequest| contains three
// elements: |url_loader| holding one text piece, |start_index| indicating the
// text piece's start in the original input utterance, and a bool
// |is_last_request| indicating whether it is the last request.
struct ServerRequest {
ServerRequest(std::unique_ptr<network::SimpleURLLoader> url_loader,
int start_index,
bool is_last_request);
~ServerRequest();
const std::unique_ptr<network::SimpleURLLoader> url_loader;
const int start_index;
const bool is_last_request;
};
// List of ServerRequest.
using ServerRequestList = std::list<ServerRequest>;
// Create a URL loader for a network request with an attached API key.
std::unique_ptr<network::SimpleURLLoader> MakeRequestLoader();
// Process the next |ServerRequest| in the |server_requests_|. Resets
// |server_requests_| and |on_data_received_observer_| if there is no more
// request.
void ProcessNextServerRequest();
// Called when the ReadAloud server responds with audio data, which is
// encoded as a JSON string.
void OnServerResponseReceived(
const ServerRequestList::iterator server_request_it,
const std::unique_ptr<std::string> json_response);
// Called when the data decoder service provides parsed JSON data for a
// server response. The server response corresponds to the text piece that
// has the |start_index| in the original input utterance. |is_last_request|
// indicates if this is the last response we expect.
void OnResponseJsonParsed(const int start_index,
const bool is_last_request,
data_decoder::DataDecoder::ValueOrError result);
// Sends the response to the |mojom::AudioDataObserver|.
void SendResponse(mojom::TtsResponsePtr response);
void ResetServerRequestsAndObserver();
void ResetAndSendErrorResponse(mojom::TtsRequestError error_code);
// A list of HTTP requests to the ReadAloud server. The requests are
// processed one by one from the front of the list. Any new request should be
// pushed into the back of the list.
ServerRequestList server_requests_;
// The observer waiting for |TtsResponse|. This will be reset after we send
// out the last response to it. It will also be reset if the receiver
// (JS extension) is disconnected. For examples, the JS extension actively
// stops the request by calling |Reset()|. Or the JS extension accidentally
// gets shut down and closes the pipe passively.
mojo::Remote<mojom::AudioDataObserver> on_data_received_observer_;
// Decoder for data decoding service.
data_decoder::DataDecoder data_decoder_;
// Url loader factory to be loaded.
scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory_;
mojo::Receiver<mojom::EnhancedNetworkTts> receiver_{this};
const std::string api_key_;
// The character limit of text piece in each |ServerRequest|. The limit is set
// to |mojom::kEnhancedNetworkTtsMaxCharacterSize| but can be overridden by
// |SetCharLimitPerRequestForTesting|.
int char_limit_per_request_;
// Used for all callbacks.
base::WeakPtrFactory<EnhancedNetworkTtsImpl> weak_factory_{this};
// An override Google API key. If empty, the API key with which the browser
// was built (if any) will be used instead.
static constexpr base::FeatureParam<std::string> kApiKey{
&kEnhancedNetworkTtsOverride, "api_key", ""};
};
} // namespace ash::enhanced_network_tts
#endif // CHROMEOS_ASH_COMPONENTS_ENHANCED_NETWORK_TTS_ENHANCED_NETWORK_TTS_IMPL_H_