chromium/third_party/blink/public/mojom/mediastream/media_stream.mojom

// Copyright 2017 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

module blink.mojom;

import "media/capture/mojom/video_capture_types.mojom";
import "media/mojo/mojom/audio_parameters.mojom";
import "media/mojo/mojom/display_media_information.mojom";
import "mojo/public/mojom/base/token.mojom";
import "mojo/public/mojom/base/unguessable_token.mojom";
import "third_party/blink/public/mojom/mediastream/media_devices.mojom";

// Types of media streams. When updating this list, make sure to update the
// predicates declared in content/public/browser/media_stream_request.h,
// e.g. IsVideoScreenCaptureMediaType().
enum MediaStreamType {
  NO_SERVICE,

  // A device provided by the operating system (e.g., webcam input).
  DEVICE_AUDIO_CAPTURE,
  DEVICE_VIDEO_CAPTURE,

  // Below GUM_* types represent the streams generated by
  // getUserMedia() calls with constraints that are collected with legacy
  // APIs for desktop and tab capture.
  // Mirroring of a browser tab.
  GUM_TAB_AUDIO_CAPTURE,
  GUM_TAB_VIDEO_CAPTURE,

  // Desktop media sources.
  GUM_DESKTOP_VIDEO_CAPTURE,
  // Capture system audio (post-mix loopback stream).
  GUM_DESKTOP_AUDIO_CAPTURE,

  // Enables the capture of a user's display, generated by getDisplayMedia()
  // call.
  DISPLAY_VIDEO_CAPTURE,
  DISPLAY_AUDIO_CAPTURE,

  // Enables the capture of a user's display. Like DISPLAY_VIDEO_CAPTURE, this
  // is generated by the Screen Capture API (www.w3.org/TR/screen-capture/).
  // Unlike DISPLAY_VIDEO_CAPTURE, this one specifies that the current tab
  // is the one that should be captured.
  DISPLAY_VIDEO_CAPTURE_THIS_TAB,

  // Enables the capture of multiple display surfaces at the same time,
  // generated by a getAllScreensMedia() call.
  DISPLAY_VIDEO_CAPTURE_SET,

  // TODO(crbug.com/971228): Remove NUM_MEDIA_TYPES, as per the conventions
  // in docs/security/mojo.md#do-not-define-placeholder-enumerator-values.
  NUM_MEDIA_TYPES
};

// Elements in this enum should not be deleted or rearranged; the only
// permitted operation is to add new elements before NUM_MEDIA_REQUEST_RESULTS.
enum MediaStreamRequestResult {
  OK,
  PERMISSION_DENIED,
  PERMISSION_DISMISSED,
  INVALID_STATE,
  NO_HARDWARE,
  INVALID_SECURITY_ORIGIN,
  TAB_CAPTURE_FAILURE,
  SCREEN_CAPTURE_FAILURE,
  CAPTURE_FAILURE,
  CONSTRAINT_NOT_SATISFIED,
  TRACK_START_FAILURE_AUDIO,
  TRACK_START_FAILURE_VIDEO,
  NOT_SUPPORTED,
  FAILED_DUE_TO_SHUTDOWN,
  KILL_SWITCH_ON,
  SYSTEM_PERMISSION_DENIED,
  DEVICE_IN_USE,
  REQUEST_CANCELLED,
  NUM_MEDIA_REQUEST_RESULTS
};

// Type of state change for the corresponding requests.
enum MediaStreamStateChange {
  PLAY,
  PAUSE,
};

// Type of preferred display surface to indicate which display surface type
// (screen, windows, or tabs) should be most prominently offered to the user.
enum PreferredDisplaySurface {
  NO_PREFERENCE,
  MONITOR,
  WINDOW,
  BROWSER,
};

// This empty struct is used as a union option. Its presence will trigger
// searching only by device ID. There is no other information needed.
struct SearchOnlyByDeviceId {};
struct SearchBySessionId {
  // If `session_id_map` contains a mapping for the selected device id, then
  // the session will be re-used, otherwise a new session will be created.
  map<string, mojo_base.mojom.UnguessableToken> session_id_map;
};

// See MediaStreamDispatcherHost below for information.
union StreamSelectionInfo {
  SearchOnlyByDeviceId search_only_by_device_id;
  SearchBySessionId search_by_session_id;
};

// See public/common/mediastream/media_stream_request.h.
struct MediaStreamDevice {
  MediaStreamType type;
  string id;
  int64 display_id;
  media.mojom.VideoFacingMode video_facing;
  string? group_id;
  string? matched_output_device_id;
  string name;
  media.mojom.AudioParameters input;
  mojo_base.mojom.UnguessableToken? session_id;
  media.mojom.DisplayMediaInformation? display_media_info;
};

// See common/mediastream/media_stream_controls.h.
struct TrackControls {
  MediaStreamType stream_type;
  array<string> device_ids;
};

// See common/mediastream/media_stream_controls.h.
struct StreamControls {
  TrackControls audio;
  TrackControls video;
  bool hotword_enabled;
  bool disable_local_echo;
  bool suppress_local_audio_playback;
  bool exclude_system_audio;
  bool exclude_self_browser_surface;
  bool request_pan_tilt_zoom_permission;
  bool request_all_screens;
  PreferredDisplaySurface preferred_display_surface;
  bool dynamic_surface_switching_requested;
  bool exclude_monitor_type_surfaces;
};

// Results returned by a successful GetOpenDevice call.
struct GetOpenDeviceResponse {
  string label;
  MediaStreamDevice device;
  bool pan_tilt_zoom_allowed;
};

// Mouse-wheel events to be delivered from a capturing app to a captured tab.
// https://screen-share.github.io/captured-surface-control/
//
// `relative_x` is a value from [0, 1). It denotes the relative position
// in the coordinate space of the captured surface, which is unknown to the
// capturer. A value of 0 denotes the leftmost pixel; increasing values denote
// values further to the right. The sender of the message scales from its own
// coordinate space down to the relative values, and the receiver scales
// back up to its own coordinates.
//
// `relative_y` is defined analogously to `relative_x`.
//
// `wheel_delta_x` and `wheel_delta_y` represent the scroll deltas in pixels.
struct CapturedWheelAction {
  double relative_x;
  double relative_y;
  int32 wheel_delta_x;
  int32 wheel_delta_y;
};

// The result of a call to any of the Captured Surface Control APIs.
// https://screen-share.github.io/captured-surface-control/
enum CapturedSurfaceControlResult {
  kSuccess,
  kUnknownError,
  kNoPermissionError,
  kCapturerNotFoundError,
  kCapturedSurfaceNotFoundError,
  kDisallowedForSelfCaptureError,
  kCapturerNotFocusedError,
};

// Per-frame renderer-side interface that receives device stopped/change
// notifications or pause requests from the browser process.
interface MediaStreamDeviceObserver {
  OnDeviceStopped(string label, MediaStreamDevice device);
  OnDeviceChanged(string label,
                  MediaStreamDevice old_device,
                  MediaStreamDevice new_device);
  // Requests to pause or resume the corresponding media stream device.
  OnDeviceRequestStateChange(string label, MediaStreamDevice device, MediaStreamStateChange new_state);
  // Informs the renderer-side that a configuration change has been detected in
  // the corresponding media stream device.
  OnDeviceCaptureConfigurationChange(string label, MediaStreamDevice device);
  // Informs the renderer-side that the device's capture handle has changed.
  OnDeviceCaptureHandleChange(string label,
                              MediaStreamDevice device);
  // Informs the renderer-side that captured surface's the zoom-level has
  // changed.
  OnZoomLevelChange(string label, MediaStreamDevice device, int32 zoom_level);
};

// Contains devices that are assigned to a specific stream. At least one of
// audio_device / video_device must be set.
struct StreamDevices {
  MediaStreamDevice? audio_device;
  MediaStreamDevice? video_device;
};

struct StreamDevicesSet {
  array<StreamDevices> stream_devices;
};

// Per-frame browser-side interface that is used by the renderer process to
// make media stream requests.
interface MediaStreamDispatcherHost {
  // Requests a new media stream.
  // `request_id` is used by the renderer to identify the stream generation
  // request.
  // `controls` contains track-related settings such as device ID or MediaStream
  // type.
  // `user_gesture` indicates whether the call was made in the context of a user
  // gesture.
  // `audio_stream_selection_info` is used to request a specific stream using
  // the associated session ID and is only used for audio devices when
  // `audio_stream_selection_info.strategy` is SEARCH_BY_SESSION_ID; when it is
  // FORCE_NEW_STREAM, a new stream with a new session ID is always generated;
  // finally, when it is SEARCH_BY_DEVICE_ID, an existing stream is used if the
  // device associated to the request already has an opened stream available, or
  // a new one otherwise.
  // The result callback provides:
  // `result`: The overall result of the stream generation.
  // `label`: The request label.
  // `stream_devices`: A list of devices per stream. If `result` is not ok, this
  // list may be null.
  // `pan_tilt_zoom_allowed`: A flag indicating whether pan / tilt / zoom is
  // allowed.
  // TODO(crbug.com/1327958): Use expected<T,E> for the return values.
  GenerateStreams(int32 request_id, StreamControls controls, bool user_gesture,
                 StreamSelectionInfo audio_stream_selection_info)
      => (MediaStreamRequestResult result, string label,
         StreamDevicesSet? stream_devices,
         bool pan_tilt_zoom_allowed);

  // The `focus` bit is `true` if focus should be switched to
  // the captured source (tab/window).
  // It is an error to set it to `true` for anything other than
  // a video track derived from tab/window screen-capture.
  [EnableIfNot=is_android_or_ios]
  FocusCapturedSurface(string label, bool focus);

  // Cancels the request for a new media stream or opening a device.
  CancelRequest(int32 request_id);

  // Closes a stream device that has been opened by GenerateStream.
  // If the corresponding MediaStreamDevice (identified by `device_id` and
  // `session_id`) has ongoing transfers, in case of transferred
  // MediaStreamTracks, the MediaStreamDevice will not stop immediately and
  // will be kept alive until all the transfers are complete.
  StopStreamDevice(string device_id, mojo_base.mojom.UnguessableToken? session_id);

  // Opens a device identified by `device_id`.
  OpenDevice(int32 request_id, string device_id, MediaStreamType type)
      => (bool success, string label, MediaStreamDevice device);

  // Cancels an open request identified by `label`.
  CloseDevice(string label);

  // Tells the browser process if the video capture is secure (i.e., all
  // connected video sinks meet the requirement of output protection.).
  // Note: the browser process only trusts the `is_secure` value in this Mojo
  // message if it's comimg from a trusted, whitelisted extension. Extensions
  // run in separate render processes. So it shouldn't be possible, for example,
  // for a user's visit to a malicious web page to compromise a render process
  // running a trusted extension to make it report falsehood in this Mojo
  // message.
  SetCapturingLinkSecured(mojo_base.mojom.UnguessableToken? session_id, MediaStreamType type,
                          bool is_secure);

  // Start/stop cropping or restricting the video track.
  //
  // Non-empty `sub_capture_target_id` sets (or changes) the target.
  // Empty `sub_capture_target_id` reverts the capture to its original state.
  //
  // `sub_capture_target_version` is plumbed down to Viz, which associates that
  // value with all subsequent frames.
  //
  // For a given device, new calls to ApplySubCaptureTarget() must be with a
  // `sub_capture_target_version` that is greater than the value from the
  // previous call, but not necessarily by exactly one.
  // (If a call to cropTo or restrictTo is rejected earlier in the pipeline,
  // the sub-capture-target-version can increase in Blink, and later calls to
  // cropTo() or restrictTo() can appear over this mojom pipe with
  // a higher version.)
  //
  // The callback reports success/failure.
  [EnableIfNot=is_android_or_ios]
  ApplySubCaptureTarget(mojo_base.mojom.UnguessableToken session_id,
       media.mojom.SubCaptureTargetType type,
       mojo_base.mojom.Token sub_capture_target,
       uint32 sub_capture_target_version)
    => (media.mojom.ApplySubCaptureTargetResult result);

  // Deliver a wheel event from a capturing app to a captured tab.
  //
  // `session_id` identifies the captured tab.
  // `action` is the action (mouse event) to be delivered.
  // `callback` will be invoked with the result.
  [EnableIfNot=is_android_or_ios]
  SendWheel(mojo_base.mojom.UnguessableToken session_id,
            CapturedWheelAction action) =>
    (CapturedSurfaceControlResult result);

  // Sets the zoom level of the captured tab.
  //
  // `session_id` identifies the captured tab.
  // `zoom_level` is the zoom level to be set.
  // `result` reports if the call was successful or what type of error occurred.
  //
  // TODO(crbug.com/1512609): Create a new device mojo interface to replace the
  // use of the device_id token.
  [EnableIfNot=is_android_or_ios]
  SetZoomLevel(mojo_base.mojom.UnguessableToken session_id,
               int32 zoom_level)
           => (CapturedSurfaceControlResult result);

  // Request permission to use Captured Surface Control.
  // `session_id` identifies the captured tab.
  [EnableIfNot=is_android_or_ios]
  RequestCapturedSurfaceControlPermission(
        mojo_base.mojom.UnguessableToken session_id)
    => (CapturedSurfaceControlResult result);

  // Get a MediaStreamDevice metadata object which refers to the same flow of
  // media backing an existing MediaStreamDevice.
  //
  // This allows for example transferring of MediaStreamTracks between
  // contexts: the receiving context calls GetOpenDevice with the session id
  // of the MediaStreamDevice created by the original context.
  //
  // This call is synchronous since the MediaStreamTrack needs to be created
  // synchronously during deserialization of a transferred track.
  // `request_id` is used by the renderer to identify the stream generation
  // request.
  // `session_id` is used to uniquely identify a session/MediaStreamDevice.
  // `transfer_id` is used to identify a MediaStreamTrack transfer request.
  // This is generated in the original context and passed to the receiving
  // context to identify this transfer in the browser process.
  // `response` is null if and only if result != OK.
  [Sync]
  GetOpenDevice(int32 request_id, mojo_base.mojom.UnguessableToken session_id,
                mojo_base.mojom.UnguessableToken transfer_id)
            => (MediaStreamRequestResult result, GetOpenDeviceResponse? response);

  // Keeps a MediaStreamDevice alive even if it has been requested to stop.
  //
  // This method is called by the original renderer while transferring
  // MediaStreamTrack between contexts to ensure the MediaStreamDevice does not
  // stop before the transfer finishes. Once the transfer is complete, we check
  // for any other ongoing transfers of this MediaStreamDevice. If there are
  // none and the MediaStreamDevice is marked to be stopped, stop the
  // MediaStreamDevice.
  // `session_id` is used to uniquely identify a session/MediaStreamDevice.
  // This is the same MediaStreamDevice that is to be cloned in GetOpenDevice
  // to be connected to the transferred track.
  // `transfer_id` is used to identify a MediaStreamTrack transfer request.
  // This is generated in the original context, which calls this function, and
  // is passed to the receiving context to identify this transfer in the
  // browser process.
  // `device_found` is true if the MediaStreamDevice was found.
  KeepDeviceAliveForTransfer(mojo_base.mojom.UnguessableToken session_id,
                  mojo_base.mojom.UnguessableToken transfer_id)
              => (bool device_found);
};

// Browser-side interface that is used by the renderer process to notify the
// addition or deletion of tracks.
interface MediaStreamTrackMetricsHost {
  // Adds the track with the specified information to the list of tracks.
  AddTrack(uint64 id, bool is_audio, bool is_remote);

  // Removes the track with the specified ID from the list of tracks.
  RemoveTrack(uint64 id);
};