chromium/services/screen_ai/proto/chrome_screen_ai.proto

// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// This proto should be in sync in the following two locations:
// https://source.chromium.org/chromium/chromium/src/+/main:services/screen_ai/proto/chrome_screen_ai.proto
// http://google3/chrome/accessibility/machine_intelligence/chrome_screen_ai//chrome_screen_ai.proto

syntax = "proto3";

package chrome_screen_ai;

// Defines a rectangle.
message Rect {
  int32 x = 1;
  int32 y = 2;
  int32 width = 3;
  int32 height = 4;

  // Angle of rotation of (in degrees, clockwise is positive) of the box about
  // the top-left corner.
  float angle = 5;
}

enum Direction {
  DIRECTION_UNSPECIFIED = 0;
  DIRECTION_LEFT_TO_RIGHT = 1;
  DIRECTION_RIGHT_TO_LEFT = 2;
  DIRECTION_TOP_TO_BOTTOM = 3;
}

// Text line with associated bounding box.
// This proto is copied from google3/ocr/photo/proto/image.proto.
message LineBox {
  // Words in the text line with their bounding boxes.
  repeated WordBox words = 1;

  // Line bounding box relative to the original image.
  Rect bounding_box = 2;

  // Text line in UTF8 format.
  string utf8_string = 3;

  // Language guess for the line. The format  is the ISO 639-1 two-letter
  // language code if that is defined (e.g. "en"), or else the ISO 639-2
  // three-letter code if that is defined, or else a Google-specific code.
  string language = 4;

  // ID of the text block that this line belongs to.
  int32 block_id = 5;

  // Index within the block that this line belongs to.
  int32 order_within_block = 6;

  // The direction of the script contained in the line.
  Direction direction = 7;

  // Content type for this line.
  ContentType content_type = 8;

  // Line bounding box relative to the original image with bottom edge
  // representing estimated baseline of text.
  Rect baseline_box = 9;

  // Confidence as computed by the OCR engine. The value is in range [0, 1].
  float confidence = 10;
}

// Word with associated bounding box.
// This proto is copied from google3/ocr/photo/proto/image.proto.
message WordBox {
  // Symbols in the word with their bounding boxes (relative to the word
  // ImagePatch).
  repeated SymbolBox symbols = 1;

  // Word bounding box relative to the original image.
  Rect bounding_box = 2;

  // Word string in UTF8 format.
  string utf8_string = 3;

  // True if the word passes the internal beamsearch dictionary check.
  bool dictionary_word = 4;

  // Language guess for the word. The format  is the ISO 639-1 two-letter
  // language code if that is defined (e.g. "en"), or else the ISO 639-2
  // three-letter code if that is defined, or else a Google-specific code.
  string language = 5;

  // This word is separated from next word by space.
  bool has_space_after = 6;

  // If true, foreground and background colors successfully detected.
  bool estimate_color_success = 7;

  // Estimated grayscale value of foreground.
  int32 foreground_gray_value = 8;

  // Estimated grayscale value of background.
  int32 background_gray_value = 9;

  // Estimated RGB of foreground. Extracting RGB channels from this
  // integer is best done using the leptonica helper extractRGBValues().
  int32 foreground_rgb_value = 10;

  // Estimated RGB of background. Extracting RGB channels from this
  // integer is best done using the leptonica helper extractRGBValues().
  int32 background_rgb_value = 11;

  // The direction of the script contained in the word.
  Direction direction = 12;

  // Content type for this word.
  ContentType content_type = 13;

  // Detected orientation of the text.
  Orientation orientation = 14;

  // Confidence as computed by the OCR engine.
  float confidence = 15;

  // If true, foreground and background gray values are valid.
  bool estimate_gray_success = 16;
}

// Symbol with associated bounding box.
// This proto is copied from google3/ocr/photo/proto/image.proto.
message SymbolBox {
  // Bounding box of the symbol relative to the original image.
  Rect bounding_box = 1;

  // Symbol in UTF8 format.
  string utf8_string = 2;

  // Confidence as computed by the OCR engine.
  float confidence = 3;
}

// Copied from google3/ocr/photo/proto/image.proto.
enum Orientation {
  ORIENTATION_DEFAULT = 0;
  ORIENTATION_HORIZONTAL = 1;
  ORIENTATION_VERTICAL = 2;
  ORIENTATION_ROTATED_HORIZONTAL = 3;
  ORIENTATION_ROTATED_VERTICAL = 4;
}

// Copied from google3/ocr/goodoc/page-layout.proto
enum ContentType {
  CONTENT_TYPE_PRINTED_TEXT = 0;
  CONTENT_TYPE_HANDWRITTEN_TEXT = 1;
  CONTENT_TYPE_IMAGE = 2;
  CONTENT_TYPE_LINE_DRAWING = 3;

  // E.g. a dividing line.
  CONTENT_TYPE_SEPARATOR = 4;
  CONTENT_TYPE_UNREADABLE_TEXT = 5;

  // Formula: a mathematical or chemical formula.
  CONTENT_TYPE_FORMULA = 6;

  // Same as FORMULA, but handwritten.
  CONTENT_TYPE_HANDWRITTEN_FORMULA = 7;

  // Signature or intitals.
  CONTENT_TYPE_SIGNATURE = 8;
}

message VisualAnnotation {
  // `ui_component` deprecated in Chrome M128.
  reserved 1;

  repeated LineBox lines = 2;
}