enhanced_network_tts_impl.h | Explore in Territory

// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CHROMEOS_ASH_COMPONENTS_ENHANCED_NETWORK_TTS_ENHANCED_NETWORK_TTS_IMPL_H_
#define CHROMEOS_ASH_COMPONENTS_ENHANCED_NETWORK_TTS_ENHANCED_NETWORK_TTS_IMPL_H_

#include <list>
#include <memory>
#include <optional>
#include <string>

#include "base/component_export.h"
#include "base/feature_list.h"
#include "base/memory/scoped_refptr.h"
#include "base/memory/weak_ptr.h"
#include "base/metrics/field_trial_params.h"
#include "base/values.h"
#include "chromeos/ash/components/enhanced_network_tts/mojom/enhanced_network_tts.mojom.h"
#include "mojo/public/cpp/bindings/receiver.h"
#include "mojo/public/cpp/bindings/remote.h"
#include "services/data_decoder/public/cpp/data_decoder.h"
#include "services/network/public/cpp/shared_url_loader_factory.h"
#include "services/network/public/cpp/simple_url_loader.h"

namespace ash::enhanced_network_tts {

// Whether or not to override enhanced TTS params.
BASE_DECLARE_FEATURE(kEnhancedNetworkTtsOverride);

// The implementation of the enhanced network text-to-speech mojom receiver.
// The remote of this mojom pipe will be invoked from the enhanced network tts
// JS extension. This receiver fetches audio data on behalf of the remote. The
// audio data is generated by Google's Speakr API.
class COMPONENT_EXPORT(CHROMEOS_ASH_COMPONENTS_ENHANCED_NETWORK_TTS)
    EnhancedNetworkTtsImpl : public mojom::EnhancedNetworkTts {
 public:
  // Getter for the singleton.
  static EnhancedNetworkTtsImpl& GetInstance();

  EnhancedNetworkTtsImpl();
  EnhancedNetworkTtsImpl(const EnhancedNetworkTtsImpl&) = delete;
  void operator=(const EnhancedNetworkTtsImpl&) = delete;
  ~EnhancedNetworkTtsImpl() override;

  // Binds a pending receiver and a url factory.
  void BindReceiverAndURLFactory(
      mojo::PendingReceiver<mojom::EnhancedNetworkTts> receiver,
      scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory);

  // mojom::EnhancedNetworkTts:
  void GetAudioData(mojom::TtsRequestPtr request,
                    GetAudioDataCallback callback) override;

  // Set the character limit of text piece in each |ServerRequest|. Unit tests
  // can use this method to modify the limit. Otherwise, the limit is set to
  // |mojom::kEnhancedNetworkTtsMaxCharacterSize|.
  void SetCharLimitPerRequestForTesting(int limit);

 private:
  // An input utterance may be chopped into several text pieces, which will be
  // sent over several |ServerRequest|. A |ServerRequest| contains three
  // elements: |url_loader| holding one text piece, |start_index| indicating the
  // text piece's start in the original input utterance, and a bool
  // |is_last_request| indicating whether it is the last request.
  struct ServerRequest {
    ServerRequest(std::unique_ptr<network::SimpleURLLoader> url_loader,
                  int start_index,
                  bool is_last_request);
    ~ServerRequest();

    const std::unique_ptr<network::SimpleURLLoader> url_loader;
    const int start_index;
    const bool is_last_request;
  };

  // List of ServerRequest.
  using ServerRequestList = std::list<ServerRequest>;

  // Create a URL loader for a network request with an attached API key.
  std::unique_ptr<network::SimpleURLLoader> MakeRequestLoader();

  // Process the next |ServerRequest| in the |server_requests_|. Resets
  // |server_requests_| and |on_data_received_observer_| if there is no more
  // request.
  void ProcessNextServerRequest();

  // Called when the ReadAloud server responds with audio data, which is
  // encoded as a JSON string.
  void OnServerResponseReceived(
      const ServerRequestList::iterator server_request_it,
      const std::unique_ptr<std::string> json_response);

  // Called when the data decoder service provides parsed JSON data for a
  // server response. The server response corresponds to the text piece that
  // has the |start_index| in the original input utterance. |is_last_request|
  // indicates if this is the last response we expect.
  void OnResponseJsonParsed(const int start_index,
                            const bool is_last_request,
                            data_decoder::DataDecoder::ValueOrError result);

  // Sends the response to the |mojom::AudioDataObserver|.
  void SendResponse(mojom::TtsResponsePtr response);

  void ResetServerRequestsAndObserver();

  void ResetAndSendErrorResponse(mojom::TtsRequestError error_code);

  // A list of HTTP requests to the ReadAloud server. The requests are
  // processed one by one from the front of the list. Any new request should be
  // pushed into the back of the list.
  ServerRequestList server_requests_;

  // The observer waiting for |TtsResponse|. This will be reset after we send
  // out the last response to it. It will also be reset if the receiver
  // (JS extension) is disconnected. For examples, the JS extension actively
  // stops the request by calling |Reset()|. Or the JS extension accidentally
  // gets shut down and closes the pipe passively.
  mojo::Remote<mojom::AudioDataObserver> on_data_received_observer_;

  // Decoder for data decoding service.
  data_decoder::DataDecoder data_decoder_;

  // Url loader factory to be loaded.
  scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory_;

  mojo::Receiver<mojom::EnhancedNetworkTts> receiver_{this};

  const std::string api_key_;

  // The character limit of text piece in each |ServerRequest|. The limit is set
  // to |mojom::kEnhancedNetworkTtsMaxCharacterSize| but can be overridden by
  // |SetCharLimitPerRequestForTesting|.
  int char_limit_per_request_;

  // Used for all callbacks.
  base::WeakPtrFactory<EnhancedNetworkTtsImpl> weak_factory_{this};

  // An override Google API key. If empty, the API key with which the browser
  // was built (if any) will be used instead.
  static constexpr base::FeatureParam<std::string> kApiKey{
      &kEnhancedNetworkTtsOverride, "api_key", ""};
};

}  // namespace ash::enhanced_network_tts

#endif  // CHROMEOS_ASH_COMPONENTS_ENHANCED_NETWORK_TTS_ENHANCED_NETWORK_TTS_IMPL_H_
chromium/chromeos/ash/components/enhanced_network_tts/enhanced_network_tts_impl.h