chromium/third_party/mediapipe/src/mediapipe/modules/objectron/calculators/a_r_capture_metadata.proto

// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto2";

package mediapipe;

// Info about the camera characteristics used to capture images and depth data.
// See developer.apple.com/documentation/avfoundation/avcameracalibrationdata
// for more information.
message AVCameraCalibrationData {
  // 3x3 row-major matrix relating a camera's internal properties to an ideal
  // pinhole-camera model.
  // See
  // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881135-intrinsicmatrix
  // for detailed usage information.
  repeated float intrinsic_matrix = 1 [packed = true];

  // The image dimensions to which the intrinsic_matrix values are relative.
  optional float intrinsic_matrix_reference_dimension_width = 2;
  optional float intrinsic_matrix_reference_dimension_height = 3;

  // 3x4 row-major matrix relating a camera's position and orientation to a
  // world or scene coordinate system. Consists of a unitless 3x3 rotation
  // matrix (R) on the left and a translation (t) 3x1 vector on the right. The
  // translation vector's units are millimeters. For example:
  //
  //            |r1,1  r2,1  r3,1 | t1|
  //  [R | t] = |r1,2  r2,2  r3,2 | t2|
  //            |r1,3  r2,3  r3,3 | t3|
  //
  //  is stored as [r11, r21, r31, t1, r12, r22, r32, t2, ...]
  //
  // See
  // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881130-extrinsicmatrix?language=objc
  // for more information.
  repeated float extrinsic_matrix = 4 [packed = true];

  // The size, in millimeters, of one image pixel.
  optional float pixel_size = 5;

  // A list of floating-point values describing radial distortions imparted by
  // the camera lens, for use in rectifying camera images.
  // See
  // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881129-lensdistortionlookuptable?language=objc
  // for more information.
  repeated float lens_distortion_lookup_values = 6 [packed = true];

  // A list of floating-point values describing radial distortions for use in
  // reapplying camera geometry to a rectified image.
  // See
  // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881132-inverselensdistortionlookuptable?language=objc
  // for more information.
  repeated float inverse_lens_distortion_lookup_values = 7 [packed = true];

  // The offset of the distortion center of the camera lens from the top-left
  // corner of the image.
  // See
  // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881131-lensdistortioncenter?language=objc
  // for more information.
  optional float lens_distortion_center_x = 8;
  optional float lens_distortion_center_y = 9;
}

// Container for depth data information.
// See developer.apple.com/documentation/avfoundation/avdepthdata for more info.
message AVDepthData {
  // PNG representation of the grayscale depth data map. See discussion about
  // depth_data_map_original_minimum_value, below, for information about how
  // to interpret the pixel values.
  optional bytes depth_data_map = 1;

  // Pixel format type of the original captured depth data.
  // See
  // developer.apple.com/documentation/corevideo/1563591-pixel_format_identifiers?language=objc
  // for the complete list of possible pixel format types. This value represents
  // a string for the associated OSType/FourCharCode.
  optional string depth_data_type = 2;

  // Indicates the general accuracy of the depth_data_map.
  // See developer.apple.com/documentation/avfoundation/avdepthdataaccuracy for
  // more information.
  enum Accuracy {
    UNDEFINED_ACCURACY = 0;
    // Values in the depth map are usable for foreground/background separation
    // but are not absolutely accurate in the physical world.
    RELATIVE = 1;
    // Values in the depth map are absolutely accurate in the physical world.
    ABSOLUTE = 2;
  }
  optional Accuracy depth_data_accuracy = 3 [default = RELATIVE];

  // Indicates whether the depth_data_map contains temporally smoothed data.
  optional bool depth_data_filtered = 4;

  // Quality of the depth_data_map.
  enum Quality {
    UNDEFINED_QUALITY = 0;
    HIGH = 1;
    LOW = 2;
  }
  optional Quality depth_data_quality = 5;

  // Associated calibration data for the depth_data_map.
  optional AVCameraCalibrationData camera_calibration_data = 6;

  // The original range of values expressed by the depth_data_map, before
  // grayscale normalization. For example, if the minimum and maximum values
  // indicate a range of [0.5, 2.2], and the depth_data_type value indicates
  // it was a depth map, then white pixels (255, 255, 255) will map to 0.5 and
  // black pixels (0, 0, 0) will map to 2.2 with the grayscale range linearly
  // interpolated inbetween. Conversely, if the depth_data_type value indicates
  // it was a disparity map, then white pixels will map to 2.2 and black pixels
  // will map to 0.5.
  optional float depth_data_map_original_minimum_value = 7;
  optional float depth_data_map_original_maximum_value = 8;

  // The width of the depth buffer map.
  optional int32 depth_data_map_width = 9;

  // The height of the depth buffer map.
  optional int32 depth_data_map_height = 10;

  // The row-major flattened array of the depth buffer map pixels. This will be
  // either a float32 or float16 byte array, depending on 'depth_data_type'.
  optional bytes depth_data_map_raw_values = 11;
}

// Estimated scene lighting information associated with a captured video frame.
// See developer.apple.com/documentation/arkit/arlightestimate for more info.
message ARLightEstimate {
  // The estimated intensity, in lumens, of ambient light throughout the scene.
  optional double ambient_intensity = 1;

  // The estimated color temperature, in degrees Kelvin, of ambient light
  // throughout the scene.
  optional double ambient_color_temperature = 2;

  // Data describing the estimated lighting environment in all directions.
  // Second-level spherical harmonics in separate red, green, and blue data
  // planes. Thus, this buffer contains 3 sets of 9 coefficients, or a total of
  // 27 values.
  // See
  // https://developer.apple.com/documentation/arkit/ardirectionallightestimate/2928222-sphericalharmonicscoefficients?language=objc
  // for more information.
  repeated float spherical_harmonics_coefficients = 3 [packed = true];

  message DirectionVector {
    optional float x = 1;
    optional float y = 2;
    optional float z = 3;
  }
  // A vector indicating the orientation of the strongest directional light
  // source, normalized in the world-coordinate space.
  // See
  // https://developer.apple.com/documentation/arkit/ardirectionallightestimate/2928221-primarylightdirection?language=objc
  // for more information;
  optional DirectionVector primary_light_direction = 4;

  // The estimated intensity, in lumens, of the strongest directional light
  // source in the scene.
  // See
  // https://developer.apple.com/documentation/arkit/ardirectionallightestimate/2928219-primarylightintensity?language=objc
  // for more information.
  optional float primary_light_intensity = 5;
}

// Information about the camera position and imaging characteristics for a
// captured video frame.
// See developer.apple.com/documentation/arkit/arcamera for more information.
message ARCamera {
  // The general quality of position tracking available when the camera captured
  // a frame.
  enum TrackingState {
    UNDEFINED_TRACKING_STATE = 0;
    // Camera position tracking is not available.
    UNAVAILABLE = 1;
    // Tracking is available, but the quality of results is questionable.
    LIMITED = 2;
    // Camera position tracking is providing optimal results.
    NORMAL = 3;
  }
  optional TrackingState tracking_state = 1 [default = UNAVAILABLE];

  // A possible diagnosis for limited position tracking quality as of when the
  // frame was captured.
  enum TrackingStateReason {
    UNDEFINED_TRACKING_STATE_REASON = 0;
    // The current tracking state is not limited.
    NONE = 1;
    // Not yet enough camera or motion data to provide tracking information.
    INITIALIZING = 2;
    // The device is moving too fast for accurate image-based position tracking.
    EXCESSIVE_MOTION = 3;
    // Not enough distinguishable features for image-based position tracking.
    INSUFFICIENT_FEATURES = 4;
    // Tracking is limited due to a relocalization in progress.
    RELOCALIZING = 5;
  }
  optional TrackingStateReason tracking_state_reason = 2 [default = NONE];

  // 4x4 row-major matrix expressing position and orientation of the camera in
  // world coordinate space.
  // See developer.apple.com/documentation/arkit/arcamera/2866108-transform for
  // more information.
  repeated float transform = 3 [packed = true];

  // The orientation of the camera, expressed as roll, pitch, and yaw values.
  message EulerAngles {
    optional float roll = 1;
    optional float pitch = 2;
    optional float yaw = 3;
  }
  optional EulerAngles euler_angles = 4;

  // The width and height, in pixels, of the captured camera image.
  optional int32 image_resolution_width = 5;
  optional int32 image_resolution_height = 6;

  // 3x3 row-major matrix that converts between the 2D camera plane and 3D world
  // coordinate space.
  // See developer.apple.com/documentation/arkit/arcamera/2875730-intrinsics for
  // usage information.
  repeated float intrinsics = 7 [packed = true];

  // 4x4 row-major transform matrix appropriate for rendering 3D content to
  // match the image captured by the camera.
  // See
  // developer.apple.com/documentation/arkit/arcamera/2887458-projectionmatrix
  // for usage information.
  repeated float projection_matrix = 8 [packed = true];

  // 4x4 row-major transform matrix appropriate for converting from world-space
  // to camera space. Relativized for the captured_image orientation (i.e.
  // UILandscapeOrientationRight).
  // See
  // https://developer.apple.com/documentation/arkit/arcamera/2921672-viewmatrixfororientation?language=objc
  // for more information.
  repeated float view_matrix = 9 [packed = true];
}

// Container for a 3D mesh describing face topology.
message ARFaceGeometry {
  // Each vertex represents a 3D point in the face mesh, in the face coordinate
  // space.
  // See developer.apple.com/documentation/arkit/arfacegeometry/2928201-vertices
  // for more information.
  message Vertex {
    optional float x = 1;
    optional float y = 2;
    optional float z = 3;
  }
  repeated Vertex vertices = 1;

  // The number of elements in the vertices list.
  optional int32 vertex_count = 2;

  // Each texture coordinate represents UV texture coordinates for the vertex at
  // the corresponding index in the vertices buffer.
  // See
  // developer.apple.com/documentation/arkit/arfacegeometry/2928203-texturecoordinates
  // for more information.
  message TextureCoordinate {
    optional float u = 1;
    optional float v = 2;
  }
  repeated TextureCoordinate texture_coordinates = 3;

  // The number of elements in the texture_coordinates list.
  optional int32 texture_coordinate_count = 4;

  // Each integer value in this ordered list represents an index into the
  // vertices and texture_coordinates lists. Each set of three indices
  // identifies the vertices comprising a single triangle in the mesh. Each set
  // of three indices forms a triangle, so the number of indices in the
  // triangle_indices buffer is three times the triangle_count value.
  // See
  // developer.apple.com/documentation/arkit/arfacegeometry/2928199-triangleindices
  // for more information.
  repeated int32 triangle_indices = 5 [packed = true];

  // The number of triangles described by the triangle_indices buffer.
  // See
  // developer.apple.com/documentation/arkit/arfacegeometry/2928207-trianglecount
  // for more information.
  optional int32 triangle_count = 6;
}

// Contains a list of blend shape entries wherein each item maps a specific
// blend shape location to its associated coefficient.
message ARBlendShapeMap {
  message MapEntry {
    // Identifier for the specific facial feature.
    // See developer.apple.com/documentation/arkit/arblendshapelocation for a
    // complete list of identifiers.
    optional string blend_shape_location = 1;

    // Indicates the current position of the feature relative to its neutral
    // configuration, ranging from 0.0 (neutral) to 1.0 (maximum movement).
    optional float blend_shape_coefficient = 2;
  }
  repeated MapEntry entries = 1;
}

// Information about the pose, topology, and expression of a detected face.
// See developer.apple.com/documentation/arkit/arfaceanchor for more info.
message ARFaceAnchor {
  // A coarse triangle mesh representing the topology of the detected face.
  optional ARFaceGeometry geometry = 1;

  // A map of named coefficients representing the detected facial expression in
  // terms of the movement of specific facial features.
  optional ARBlendShapeMap blend_shapes = 2;

  // 4x4 row-major matrix encoding the position, orientation, and scale of the
  // anchor relative to the world coordinate space.
  // See
  // https://developer.apple.com/documentation/arkit/aranchor/2867981-transform?language=objc
  // for more information.
  repeated float transform = 3;

  // Indicates whether the anchor's transform is valid. Frames that have a face
  // anchor with this value set to NO should probably be ignored.
  optional bool is_tracked = 4;
}

// Container for a 3D mesh.
message ARPlaneGeometry {
  message Vertex {
    optional float x = 1;
    optional float y = 2;
    optional float z = 3;
  }

  // Each texture coordinate represents UV texture coordinates for the vertex at
  // the corresponding index in the vertices buffer.
  // See
  // https://developer.apple.com/documentation/arkit/arfacegeometry/2928203-texturecoordinates
  // for more information.
  message TextureCoordinate {
    optional float u = 1;
    optional float v = 2;
  }

  // A buffer of vertex positions for each point in the plane mesh.
  repeated Vertex vertices = 1;

  // The number of elements in the vertices buffer.
  optional int32 vertex_count = 2;

  // A buffer of texture coordinate values for each point in the plane mesh.
  repeated TextureCoordinate texture_coordinates = 3;

  // The number of elements in the texture_coordinates buffer.
  optional int32 texture_coordinate_count = 4;

  // Each integer value in this ordered list represents an index into the
  // vertices and texture_coordinates lists. Each set of three indices
  // identifies the vertices comprising a single triangle in the mesh. Each set
  // of three indices forms a triangle, so the number of indices in the
  // triangle_indices buffer is three times the triangle_count value.
  // See
  // https://developer.apple.com/documentation/arkit/arplanegeometry/2941051-triangleindices
  // for more information.
  repeated int32 triangle_indices = 5 [packed = true];

  // Each set of three indices forms a triangle, so the number of indices in the
  // triangle_indices buffer is three times the triangle_count value.
  // See
  // https://developer.apple.com/documentation/arkit/arplanegeometry/2941058-trianglecount
  // for more information.
  optional int32 triangle_count = 6;

  // Each value in this buffer represents the position of a vertex along the
  // boundary polygon of the estimated plane. The owning plane anchor's
  // transform matrix defines the coordinate system for these points.
  // See
  // https://developer.apple.com/documentation/arkit/arplanegeometry/2941052-boundaryvertices
  // for more information.
  repeated Vertex boundary_vertices = 7;

  // The number of elements in the boundary_vertices buffer.
  optional int32 boundary_vertex_count = 8;
}

// Information about the position and orientation of a real-world flat surface.
// See https://developer.apple.com/documentation/arkit/arplaneanchor for more
// information.
message ARPlaneAnchor {
  enum Alignment {
    UNDEFINED = 0;
    // The plane is perpendicular to gravity.
    HORIZONTAL = 1;
    // The plane is parallel to gravity.
    VERTICAL = 2;
  }

  // Wrapper for a 3D point / vector within the plane. See extent and center
  // values for more information.
  message PlaneVector {
    optional float x = 1;
    optional float y = 2;
    optional float z = 3;
  }

  enum PlaneClassification {
    NONE = 0;
    WALL = 1;
    FLOOR = 2;
    CEILING = 3;
    TABLE = 4;
    SEAT = 5;
  }

  // The classification status for the plane.
  enum PlaneClassificationStatus {
    // The classfication process for the plane anchor has completed but the
    // result is inconclusive.
    UNKNOWN = 0;
    // No classication information can be provided (set on error or if the
    // device does not support plane classification).
    UNAVAILABLE = 1;
    // The classification process has not completed.
    UNDETERMINED = 2;
    // The classfication process for the plane anchor has completed.
    KNOWN = 3;
  }

  // The ID of the plane.
  optional string identifier = 1;

  // 4x4 row-major matrix encoding the position, orientation, and scale of the
  // anchor relative to the world coordinate space.
  // See
  // https://developer.apple.com/documentation/arkit/aranchor/2867981-transform
  // for more information.
  repeated float transform = 2;

  // The general orientation of the detected plane with respect to gravity.
  optional Alignment alignment = 3;

  // A coarse triangle mesh representing the general shape of the detected
  // plane.
  optional ARPlaneGeometry geometry = 4;

  // The center point of the plane relative to its anchor position.
  // Although the type of this property is a 3D vector, a plane anchor is always
  // two-dimensional, and is always positioned in only the x and z directions
  // relative to its transform position. (That is, the y-component of this
  // vector is always zero.)
  // See
  // https://developer.apple.com/documentation/arkit/arplaneanchor/2882056-center
  // for more information.
  optional PlaneVector center = 5;

  // The estimated width and length of the detected plane.
  // See
  // https://developer.apple.com/documentation/arkit/arplaneanchor/2882055-extent
  // for more information.
  optional PlaneVector extent = 6;

  // A Boolean value that indicates whether plane classification is available on
  // the current device. On devices without plane classification support, all
  // plane anchors report a classification value of NONE
  // and a classification_status value of UNAVAILABLE.
  optional bool classification_supported = 7;

  // A general characterization of what kind of real-world surface the plane
  // anchor represents.
  // See
  // https://developer.apple.com/documentation/arkit/arplaneanchor/2990936-classification
  // for more information.
  optional PlaneClassification classification = 8;

  // The current state of ARKit's process for classifying the plane anchor.
  // When this property's value is KNOWN, the classification property represents
  // ARKit's characterization of the real-world surface corresponding to the
  // plane anchor.
  // See
  // https://developer.apple.com/documentation/arkit/arplaneanchor/2990937-classificationstatus
  // for more information.
  optional PlaneClassificationStatus classification_status = 9;
}

// A collection of points in the world coordinate space.
// See https://developer.apple.com/documentation/arkit/arpointcloud for more
// information.
message ARPointCloud {
  message Point {
    optional float x = 1;
    optional float y = 2;
    optional float z = 3;
  }

  // The number of points in the cloud.
  optional int32 count = 1;

  // The list of detected points.
  repeated Point point = 2;

  // A list of unique identifiers corresponding to detected feature points.
  // Each identifier in this list corresponds to the point at the same index
  // in the points array.
  repeated int64 identifier = 3 [packed = true];
}

// Video image and face position tracking information.
// See developer.apple.com/documentation/arkit/arframe for more information.
message ARFrame {
  // The timestamp for the frame.
  optional double timestamp = 1;

  // The depth data associated with the frame. Not all frames have depth data.
  optional AVDepthData depth_data = 2;

  // The depth data object timestamp associated with the frame. May differ from
  // the frame timestamp value. Is only set when the frame has depth_data.
  optional double depth_data_timestamp = 3;

  // Camera information associated with the frame.
  optional ARCamera camera = 4;

  // Light information associated with the frame.
  optional ARLightEstimate light_estimate = 5;

  // Face anchor information associated with the frame. Not all frames have an
  // active face anchor.
  optional ARFaceAnchor face_anchor = 6;

  // Plane anchors associated with the frame. Not all frames have a plane
  // anchor. Plane anchors and face anchors are mutually exclusive.
  repeated ARPlaneAnchor plane_anchor = 7;

  // The current intermediate results of the scene analysis used to perform
  // world tracking.
  // See
  // https://developer.apple.com/documentation/arkit/arframe/2887449-rawfeaturepoints
  // for more information.
  optional ARPointCloud raw_feature_points = 8;
}