// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Describes camera motion between two frames with various degree of freedom
// parametric motion models.
// In addition, stores features describing how reliable the estimated motion
// model is.
// Flags indicate several properties derived from the camera motion, e.g. if a
// frame is sharp, blurry or contains overlays.
syntax = "proto2";
package mediapipe;
import "mediapipe/util/tracking/motion_models.proto";
// Next tag: 33
message CameraMotion {
// Background motion expressed in various models.
// These are per-frame pair motions (from current to previous frame).
// Models are expressed in the un-normalized domain frame_width x frame_height
// that is passed to MotionEstimation (storred below).
optional TranslationModel translation = 1;
optional SimilarityModel similarity = 2;
optional LinearSimilarityModel linear_similarity = 3;
optional AffineModel affine = 4;
optional Homography homography = 5;
optional MixtureHomography mixture_homography = 8;
// Frame dimensions camera motion was computed over.
optional float frame_width = 31;
optional float frame_height = 32;
// Mixture homographies computed w.r.t. exponentially increasing
// regularizers. Above mixture_homography member is selected from spectrum
// based on amount of rolling shutter present in the video.
repeated MixtureHomography mixture_homography_spectrum = 23;
// Relative row sigma w.r.t. frame_height for mixture models.
optional float mixture_row_sigma = 10;
// Average of all motion vector magnitudes (without accounting for any motion
// model), within 10th to 90th percentile (to remove outliers).
optional float average_magnitude = 24 [default = 0.0];
// Inlier-weighted variance of the translation model.
// Specified, w.r.t. unnormalized video domain that motion models
// are computed for.
optional float translation_variance = 25 [default = 0.0];
// Ratio of inliers w.r.t. regular and stricter thresholds. In [0, 1].
optional float similarity_inlier_ratio = 29 [default = 0];
optional float similarity_strict_inlier_ratio = 30 [default = 0];
// Average registration error of homography in pixels.
// Note: These two parameters default to zero in-case homographies have not
// been estimated.
optional float average_homography_error = 11;
// Fraction, in [0,1], of homography inliers.
optional float homography_inlier_coverage = 12;
// Same as above but with stricter threshold.
// (For details, see: MotionEstimationOptions::strict_coverage_scale).
// Coverage is designed to measure the amount of significant outliers,
// which can affect the validity of the estimated homography.
// However, it does not discount small outliers, which occur in case
// of small rolling shutter wobbles. For this a stricter version of coverage
// is needed, which is essential for computing the rolling_shutter_guess,
// i.e. the increase in coverage by using mixtures vs. homographies.
optional float homography_strict_inlier_coverage = 22;
// Per-block inlier fraction for mixtures.
repeated float mixture_inlier_coverage = 13;
// Set based on stability analysis indicating if frame is likely to originate
// from a rolling shutter camera. (-1 is used to indicate frame was not
// tested, e.g. due to mixture deemed unstable for analysis).
// Guess is a scaler indicating by how much the mixture models (suitable for
// rolling shutter distortions) increased inlier coverage compared to a single
// homography. For example a value, of 1.3 indicates, that the mixture models
// increased inlier coverage by 30%.
// If not -1, range is in [0, inf] (values slightly smaller than 1 are
// possible due to suppression of noisy feature tracks during estimation).
optional float rolling_shutter_guess = 14;
// Indicating if CameraMotion is deemed to originate from rolling
// shutter camera (index >= 0), and if so, denotes the index in the
// mixture_homography_spectrum, where higher indices correspond to heavier
// regularized motions. If motion is not deemed to originate from a rolling
// shutter camera, index is set to -1.
optional int32 rolling_shutter_motion_index = 16 [default = -1];
// List of overlay indices (cell locations in column major format) over domain
// of size overlay_domain x overlay_domain, where
// overlay_domain is set by MotionEstimation to
// MotionEstimationOptions::OverlayDetectionOptions::analysis_mask_size.
// Overlay analysis is performed over chunk of frames, as specified by
// MotionEstimationOptions::overlay_analysis_chunk_size, with the resulting
// overlay indices being assigned to each frame of the chunk.
// Consequently it suffices to store the result only for the first frame
// of every chunk. Subsequent frames store a single negative index relative
// to the first chunk frame indicating where to locate the overlay indicies.
// Specifically if for frame f, overlay_indices(0) == -2, overlay indices for
// corresponding chunk can be found at frame f - 2.
// For details about how overlay indices are used to flag a frame to contain
// an overlay, see MotionFilterOptions::OverlayOptions.
repeated int32 overlay_indices = 17;
optional int32 overlay_domain = 18 [default = 10];
// CameraMotion type indicates whether highest degree of freedom (DOF)
// model estimation was deemed stable, in which case CameraMotion::Type is set
// to VALID.
// If a model was deemed not stable (according to *StabilityBounds in
// MotionEstimationOptions), it is set to the lower dof type which was deemed
// stable.
enum Type {
VALID = 0; // All requested motion models estimated reliably.
UNSTABLE_HOMOG = 1; // Fallback to homographies, mixture unreliable.
UNSTABLE_SIM = 2; // Fallback to similarity model, homography
// unreliable.
UNSTABLE = 3; // Fallback to translation model, similarity
// unreliable, legacy naming.
INVALID = 4; // Identity model, translation unreliable.
}
optional Type type = 6 [default = VALID];
// If set, stores original type in case it was overriden (by filtering
// functions, etc.).
optional Type overridden_type = 15 [default = VALID];
// Set of optional *bit* flags set for various purposes.
enum Flags {
FLAG_SHOT_BOUNDARY = 1; // Set to indicate presence of a
// shot boundary.
FLAG_BLURRY_FRAME = 2;
FLAG_MAJOR_OVERLAY = 4;
FLAG_SHARP_FRAME = 8; // Set if frame is considered sharp
// in a neighborhood of frames.
FLAG_SINGULAR_ESTIMATION = 16; // Indicates that estimation resulted
// in singular optimization problem.
// Used internally by MotionEstimation.
// Indicates if shot boundary is part of a fade. If so, all frames of the
// fade will be labeled with the FLAG but only the begin and end of the fade
// will have the FLAG_SHOT_BOUNDARY set.
FLAG_SHOT_FADE = 32;
FLAG_DUPLICATED = 64; // Set if frame is exact duplicate of
// previous frame.
FLAG_CENTER_FRAME = 128; // Indicates this frame is at the
// center of the sequence. Currently
// used to constrain stabilizing crop
// transform.
}
optional int32 flags = 19 [default = 0];
// Same as in RegionFlowFeatureList (from region_flow.proto), measures blur
// as average cornerness over textured areas. As it depends on the image
// content, should only be used relative.
optional float blur_score = 20;
// Quanitifies amount of blur. Specified as ratio w.r.t. sharpest matching
// frame, i.e. 1 indicates no blur, values > 1 amount of blur w.r.t. sharpest
// frame.
optional float bluriness = 21 [default = 0.0];
// Same as in RegionFlowFeatureList (from region_flow.proto). Stores fraction
// of long feature tracks that got rejected for this frame.
optional float frac_long_features_rejected = 26;
// Same as in RegionFlowFeatureList (from region_flow.proto).
// Timestamp in micro seconds of the underlying frame.
optional int64 timestamp_usec = 27 [default = 0];
// Same as in RegionFlowFeatureList (from region_flow.proto).
// Denotes frame that motion was computed w.r.t. to, locally to the current
// frame. Values < 0 indicate backward tracking, while values > 0 indicate
// forward tracking. For example, match_frame = -1, indicates tracking is
// from current to previous frame.
optional int32 match_frame = 28 [default = 0];
// Deprecated fields.
extensions 9;
}