tts.mojom | Explore in Territory

// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

module crosapi.mojom;

import "mojo/public/mojom/base/unguessable_token.mojom";
import "mojo/public/mojom/base/values.mojom";
import "url/mojom/url.mojom";

// Events sent back from the TTS engine indicating the progress.
[Stable, Extensible]
enum TtsEventType {
  [Default] kStart = 0,
  kEnd = 1,
  kWord = 2,
  kSentence = 3,
  kMarker = 4,
  kInterrupted = 5,
  kCanceled = 6,
  kError = 7,
  kPause = 8,
  kResume = 9,
};

// Represents a tts voice.
[Stable]
struct TtsVoice {
  // Name of the voice.
  string voice_name;

  // The language that this voice supports.
  string lang;

  // If true, the synthesis engine is a remote network resource.
  bool remote;

  // The ID of the extension providing this voice.
  string engine_id;

  // All of the callback events that this voice is capable of sending.
  array<TtsEventType> events;

  // If true, this is implemented by chromeOS platform's subclass of
  // TtsPlatformImpl. Otherwise, this is implemented in a content embedder.
  bool native;

  // Id of the native voice.
  string native_voice_identifier;
};

// Represents a Tts utterance.
[Stable]
struct TtsUtterance {
  // Unique id of utterance that helps route a Tts event back to the client.
  // This is needed when speaking an Ash utterance with a Lacros voice.
  // |utterance_id| is created by TtsController (in Ash) and passed to TtsEngine
  // extension API (in Lacros) onSpeak event, ttsEngine extension (in Lacros)
  // will pass it back in sendTtsEvent, which will eventually pass it to
  // TtsController::OnTtsEvent (in Ash), so that TtsController (in Ash) can
  // track and process the async TtsEvent for the utterances.
  int32 utterance_id;

  // Text to speak.
  string text;

  // Language to use for synthesis.
  string lang;

  // Name of the voice to use for synthesis.
  string voice_name;

  // Speaking volume.
  double volume;

  // Speaking rate.
  double rate;

  // Speaking pitch.
  double pitch;

  // Extension id of the speech engine to use.
  string engine_id;

  // If false, enqueues this utterance if TTS is already in progress. Otherwise,
  // interrupts any current speech and flushes the speech queue before speaking
  // this new utterance.
  bool should_clear_queue;

  // TTS event types that the client is interested in listening to.
  array<TtsEventType> desired_event_types;

  // The TTS event types the voice must support.
  array<TtsEventType> required_event_types;

  // The source engine's ID of this utterance, so that it can route the events
  // back to the correct tts.speak call.
  uint32 src_id;

  // The URL of the page from which the speech request is called.
  url.mojom.Url src_url;

  // Options passed from tts.speak argument, which will be passed to ttsEngine
  // onSpeak event with sanitizing process.
  mojo_base.mojom.DictionaryValue options;

  // True if the utterance is associated with a WebContents.
  bool was_created_with_web_contents;

  // Unique id of the browser context of the utterance.
  mojo_base.mojom.UnguessableToken browser_context_id;
};

// Interface for Tts, implemented in ash-chrome. Used by lacros-chrome to
// communicate with ash TtsController to send the voice data and
// speech requests to ash.
// Next version: 5
// Next method id: 7
[Stable, Uuid="8550e8d0-a818-49a3-93c1-d8053a33b2e6"]
interface Tts {
  // A TtsClient can register itself with Tts, so that Tts can communicate with
  // the remote TtsClient associated with a particular |browser_context_id| in
  // Lacros. |from_primary_profile| is true if |browser_context_id| is
  // associated with the primary user profile in Lacros.
  RegisterTtsClient@0(pending_remote<TtsClient> client,
      mojo_base.mojom.UnguessableToken browser_context_id,
      bool from_primary_profile);

  // Called when Lacros voices changed for BrowserContext associated with
  // |browser_context_id|, |lacros_voices| contains new Lacros voices.
  VoicesChanged@1(mojo_base.mojom.UnguessableToken browser_context_id,
    array<TtsVoice> lacros_voices);

  // Speaks or enqueues |utterance|.
  [MinVersion=1]
  SpeakOrEnqueue@2(TtsUtterance utterance,
                   pending_remote<TtsUtteranceClient> utterance_client);

  // Requests Ash TtsController to stop the current utterance if it matches
  // the given |source_url|.
  [MinVersion=2]
  Stop@3(url.mojom.Url source_url);

  // Requests Ash TtsController to pause speeach synthesis.
  [MinVersion=3]
  Pause@4();

  // Requests Ash TtsController to resume speeach synthesis.
  [MinVersion=3]
  Resume@5();

  // Returns true if Ash TtsController is currnetly speaking an utterance.
  [MinVersion=4]
  IsSpeaking@6() => (bool speaking);
};

// Interface for tts client. Implemented in lacros-chrome.
// Each Tts client is associated with a browser context object in Lacros.
// Used by ash-chrome to send voices to Lacros.
// Next version: 3
// Next method id: 5
[Stable, Uuid="60ce0365-451e-402d-9a1c-e57350f9a202"]
interface TtsClient {
  // Called when voices changed in ash TtsController .
  // |all_voices| contains the new voices (provided by both Ash and
  // Lacros).
  VoicesChanged@0(array<TtsVoice> all_voices);

  // Called from Ash to request a Lacros Tts engine to speak the |utterance|
  // with the given |voice|. |utterance_client| is provided only if |utterance|
  // is issued by Ash.
  [MinVersion=1]
  SpeakWithLacrosVoice@1(TtsUtterance utterance, TtsVoice voice,
      pending_remote<TtsUtteranceClient>? ash_utterance_client);

  // Called from Ash to request the specified Lacros speech engine to stop
  // speaking.
  [MinVersion=1]
  Stop@2(string engine_id);

  // Called from Ash to request the specified Lacros speech engine to pause
  // speaking.
  [MinVersion=2]
  Pause@3(string engine_id);

  // Called from Ash to request the specified Lacros speech engine to resume
  // speaking.
  [MinVersion=2]
  Resume@4(string engine_id);
};

// This interface receives events from an utterance which is being spoken by
// a remote process.
// Can be used in either lacros-chrome or ash.
//
// For example, when Tts extension API or SpeechSynthesis web API is called from
// Lacros browser to speak an utterance, We will call Tts::SpeakOrEnqueue to
// send the utterance to Ash along with a pending_remote<TtsUtteranceClient>
// object, which can be used to forward the TtsEvent back to Lacros. In
// addition, its disconnect handler will be invoked in Ash when the original
// utterance becomes invalid in Lacros.
// Similarly, it is also passed as argument in TtsClient::SpeakWithLacrosVoice
// when Ash calls this API to speak an Ash Utterance with a Lacros speech
// engine.
[Stable, Uuid="edf0536d-53ea-445c-9519-cb39af32364f"]
interface TtsUtteranceClient {
  // Forwards a Tts event received from the remote speech engine to the callback
  // function of the TtsUtterance. |error_message| may contain an error message
  // from the speech engine only if |event_type| is TtsEventType::kError.
  OnTtsEvent@0(TtsEventType event_type, uint32 char_index,
             uint32 length, string error_message);
};
chromium/chromeos/crosapi/mojom/tts.mojom