chromium/third_party/blink/public/mojom/content_extraction/inner_text.mojom

// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

module blink.mojom;

import "third_party/blink/public/mojom/tokens/tokens.mojom";

// Represents a single frame.
struct InnerTextFrame {
  blink.mojom.LocalFrameToken token;
  // Content of the frame.
  array<InnerTextSegment> segments;
};

enum NodeLocationType {
  kStart,
};

// Frame content consists of any number of text strings and frames and may
// contain a single node location.
union InnerTextSegment {
  // Used when the segment represents the starting location of a node.
  NodeLocationType node_location;
  string text;
  InnerTextFrame frame;
};

struct InnerTextParams {
  // If set, identifies a node to obtain the position of. The position will
  // be added to the segments of the frame containing the node. The id is
  // a DomNodeId (see WebNode::GetDomeNodeId()).
  int32? node_id;

  // If set, a passage extraction algorithm is applied instead of collecting
  // inner text from all iframes, and this specifies an aggregation parameter:
  // The maximum number of words in a passage comprised of multiple nodes. A
  // passage with text from only a single node may exceed this max. Default 200.
  uint32? max_words_per_aggregate_passage;

  // If set, a passage extraction algorithm is applied instead of collecting
  // inner text from all iframes, and this specifies an aggregation parameter:
  // When true, sibling nodes are greedily aggregated into passages under
  // max_words_per_aggregate_passage words. When false, each sibling node is
  // output as a separate passage if they cannot all be combined into a single
  // passage under max_words_per_aggregate_passage words. Default is true.
  bool? greedily_aggregate_sibling_nodes;

  // If set to X, only the first X passages encountered during the extraction
  // will be returned. If X == 0, all passages extracted will be returned.
  uint32 max_passages;

  // If set, will put a hard limit on the minimum number of words in each
  // passage; passages without enough words will be dropped.
  uint32? min_words_per_passage;
};

// Used to obtain the inner-text of a frame, as well as the inner-text of all
// iframes with the same origin.
interface InnerTextAgent {
  // Requests the snapshot for the frame. This implicitly includes any local
  // frames associated with the frame, but does not include any cross origin
  // frames.
  GetInnerText(InnerTextParams params) => (InnerTextFrame frame);
};