motion_estimation.proto | Explore in Territory

// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Settings for MotionEstimation.
syntax = "proto2";

package mediapipe;

// Note: In general for Estimation modes, the prefix are used as follows:
// L2:        minimize squared norm of error
// IRLS:      iterative reweighted least square, L2 minimization using multiple
//            iterations, downweighting outliers.
// Next tag: 69
message MotionEstimationOptions {
  // Specifies which camera models should be estimated, translation is always
  // estimated.
  optional bool estimate_translation_irls = 1 [default = true];

  enum LinearSimilarityEstimation {
    ESTIMATION_LS_NONE = 0;
    ESTIMATION_LS_L2 = 1;    // L2 estimation
    ESTIMATION_LS_IRLS = 4;  // good performance, robust to outliers.

    // DEPRECATED modes.
    ESTIMATION_LS_L2_RANSAC = 2;  // DEPRECATED, use IRLS instead.
    ESTIMATION_LS_L1 = 3;         // DEPRECATED, use IRLS instead, or static
                                  // function MotionEstimation::
                                  // EstimateLinearSimilarityL1
  }

  optional LinearSimilarityEstimation linear_similarity_estimation = 3
      [default = ESTIMATION_LS_IRLS];

  enum AffineEstimation {
    ESTIMATION_AFFINE_NONE = 0;
    ESTIMATION_AFFINE_L2 = 1;
    ESTIMATION_AFFINE_IRLS = 2;
  }

  optional AffineEstimation affine_estimation = 30
      [default = ESTIMATION_AFFINE_NONE];

  enum HomographyEstimation {
    ESTIMATION_HOMOG_NONE = 0;
    ESTIMATION_HOMOG_L2 = 1;
    ESTIMATION_HOMOG_IRLS = 2;
  }

  optional HomographyEstimation homography_estimation = 5
      [default = ESTIMATION_HOMOG_IRLS];

  // By default, homography estimation minimizes an objective that is not
  // strictly the L2 distance between matched points. If the flag is set, each
  // row of the linear system is scaled with the exact denominator which results
  // in an objective that minimizes the L2 distance.
  optional bool homography_exact_denominator_scaling = 53 [default = false];

  // Per default, we use exact solver for over-determined system using
  // well-conditioned QR decomposition. For better speed, set value to false to
  // use estimation via normal equations.
  optional bool use_exact_homography_estimation = 54 [default = true];

  // If set uses double instead of float when computing normal equations.
  optional bool use_highest_accuracy_for_normal_equations = 55 [default = true];

  // Regularizer for perspective part of the homography. If zero, no
  // regularization is performed. Should be >= 0.
  optional float homography_perspective_regularizer = 61 [default = 0];

  // Note: Mixture models have high DOF are much more affected by outliers
  // than models above. It is recommended that if IRLS estimation is NOT used,
  // that mixture_regularizer is increased by a factor >=3.
  enum MixtureHomographyEstimation {
    ESTIMATION_HOMOG_MIX_NONE = 0;
    ESTIMATION_HOMOG_MIX_L2 = 1;
    ESTIMATION_HOMOG_MIX_IRLS = 2;  // robust to outliers.
  }

  optional MixtureHomographyEstimation mix_homography_estimation = 12
      [default = ESTIMATION_HOMOG_MIX_NONE];

  // If row-wise mixture models are estimated, determines number of them.
  // Note, changing number of mixtures, interpolation sigma and regularizer
  // is very likely to impact the stability analysis for mixtures and rolling
  // shutter scoring. At least MixtureHomographyBounds would need to be adjusted
  // to the new values.
  optional int32 num_mixtures = 13 [default = 10];

  // If row-wise mixture models are estimated, determines how much each point is
  // influenced by its neigbhoring mixtures. Specified as relative sigma
  // (standard deviation) w.r.t. frame_height.
  optional float mixture_row_sigma = 14 [default = 0.1];

  // Mixture estimation uses L2 regularizer to assure that adjacent mixture
  // models are similar.
  optional float mixture_regularizer = 15 [default = 1e-4];

  // Mixtures are estimated across a spectrum of exponentially increasingly
  // regularizers. In particular the regularizer at level L is given as
  // mixture_regularizer * mixture_regularizer_base^L.
  // A maximum of 10 levels are supported (checked!).
  // Note: When changing the number of levels you probably want to adapt the
  //       MotionStabilizationOptions::rolling_shutter_increment value as well,
  //       as the number of levels directly controls the highest threshold for
  //       the rolling shutter index analysis.
  optional float mixture_regularizer_levels = 42 [default = 3];
  optional float mixture_regularizer_base = 43 [default = 2.2];
  optional int32 mixture_rs_analysis_level = 44 [default = 2];

  // IRLS rounds to down-weight outliers (default across all models).
  // Note: IRLS in combination with full mixture models (as opposed to the
  // default reduced ones) is somewhat expensive.
  optional int32 irls_rounds = 17 [default = 10];

  // If set to > 0 (always needs be less than 1.0), influence of supplied prior
  // irls weights is linearlly decreased from the specified prior scale (weight
  // 1.0) to prior_scale. Effectively, biases the solution to the
  // supplied prior features.
  // Note: Without irls_weights_preinitialized set to true, this option is
  // effectively a no op.
  // TODO: Retire this option.
  optional float irls_prior_scale = 50 [default = 0.2];

  // Determine how to normalize irls weights w.r.t. average motion magnitude.
  // In general a residual of 1 pixel is assigned an IRLS weight of 1.
  // However as larger motions in general are affected by a larger error, we
  // normalize irls weights, such that a residual
  // of distance of irls_motion_magnitude_fraction times
  // <average translation magnitude> equals an IRLS weight of 1.
  // Must be larger than zero.
  optional float irls_motion_magnitude_fraction = 31 [default = 0.08];

  // Scale that is applied for mixture (where error is expected to be bigger).
  optional float irls_mixture_fraction_scale = 68 [default = 1.5];

  // By default, irls weight of all features are set uniformly to one before
  // estimating EACH model, refining them in subsequent irls iterations.
  // If flag below is set, input irls weights are used instead for each motion
  // model.
  optional bool irls_weights_preinitialized = 39 [default = false];

  // If weights are pre-initialized optionally min filter weights along track
  // ids when long tracks are used. This can be used to consistently label
  // outliers in time before estimation.
  optional bool filter_initialized_irls_weights = 67 [default = false];

  // If activated, irls weight of outlier features are reset. Outliers are
  // defined as those features, for which the best model fit after #rounds
  // iterations of RANSAC did NOT yield an error lower than cutoff.
  // Only applies to translation and similarity estimation.
  message IrlsOutlierInitialization {
    optional bool activated = 1 [default = false];

    optional int32 rounds = 2 [default = 100];
    optional float cutoff = 3 [default = 0.003];
  }
  optional IrlsOutlierInitialization irls_initialization = 56;

  // Normalizes feature's irls weights prior to estimation such that
  // feature in high density areas are downweighted. Multiplicative in case
  // irls_weights_preinitialized is set to true.
  optional bool feature_density_normalization = 62 [default = false];

  // A regular grid of size feature_mask_size x feature_mask_size
  // is used to normalize features w.r.t. their density.
  optional int32 feature_mask_size = 63 [default = 10];

  // In addition to above outlier and density initialization, long features
  // that are present for a specified ratio of the analysis interval can be
  // upweighted. This greatly improves temporal consistency.
  message LongFeatureInitialization {
    optional bool activated = 1 [default = false];

    // Tracks with a length greater of equal to the specified percentile
    // are upweighted by the specified upweight_multiplier.
    optional float min_length_percentile = 2 [default = 0.95];

    // Features passing above test have their irls weight increased by the
    // specified multiplier prior to estimation.
    optional float upweight_multiplier = 3 [default = 5];
  }

  optional LongFeatureInitialization long_feature_initialization = 66;

  // Irls initialization can be performed in a temporal depdent manner,
  // (if estimation_policy() == TEMPORALLY_DEPENDENT), where
  // the previous frame's motion estimation biases the IrlsInitialization of the
  // currently processed frame. In particular the location and magnitude of
  // inliers is used during the RANSAC selection stage, to favor those features
  // that agree with the prior, represented as confidence mask of inliers
  // (using same dimension as above feature_mask_size).
  // After estimation, the prior is updated.
  message IrlsMaskOptions {
    // Amount prior is decayed after each iteration.
    optional float decay = 2 [default = 0.7];

    // Score that each inlier adds to the current prior. Specified w.r.t. total
    // number of features, i.e. each feature increases a bins score by
    // inlier_score.
    optional float inlier_score = 3 [default = 0.4];

    // Each inlier scores at least this value regardless of the inlier mask
    // (additive).
    optional float base_score = 4 [default = 0.2];

    // Motions are scored relative to previous motion. Threshold denotes
    // absolute minimum of denominator.
    optional float min_translation_norm = 5 [default = 2e-3];

    // Translation is updated in every step by blending it with the previous
    // estimated translation. (alpha is within 0 to 1, where 0 indicates to use
    // only measured translation, i.e. no blending).
    optional float translation_blend_alpha = 6 [default = 0.7];

    // Every time translation is updated, prior (in [0, 1]) is increased by the
    // specified amount.
    optional float translation_prior_increase = 7 [default = 0.2];

    // Deprecated fields.
    extensions 1;
  }

  optional IrlsMaskOptions irls_mask_options = 57;

  // Describes how long feature tracks are leveraged for joint estimation across
  // many frames.
  message JointTrackEstimationOptions {
    // For each frame-pair motion model, describing the motion between frame
    // I and I - 1, estimate in addition several additional motion
    // models along long feature tracks describing the motion between frame
    // I and I - k * motion_stride (additional models are not output,
    // but help to filter irls weights).
    // Specifies total number of estimated motion models per frame-pair. Must be
    // greater than zero.
    optional int32 num_motion_models = 1 [default = 3];

    // Spacing in frames for additional motion models.
    optional int32 motion_stride = 2 [default = 15];

    // If set, performs temporal smoothing across frames of the obtained irls
    // weights.
    optional bool temporal_smoothing = 3 [default = false];

    // TODO: Specify which filter is used during temporal smoothing.
    // TODO: Limit joint estimation to static scenes
    //                 (dont violate rigidity of alignment in case of wide
    //                  baselines). Adopt stride and num_motion_models
    //                  accordingly.
  }

  optional JointTrackEstimationOptions joint_track_estimation = 59;

  // Options being used to bias IRLS features if estimation mode
  // TEMPORAL_LONG_FEATURE_BIAS is being used.
  // Next Tag: 15
  message LongFeatureBiasOptions {
    // Estimation is performed multiple times, alternating between model
    // estimation and smooth temporal feature biasing for the specified number
    // of rounds.
    optional int32 total_rounds = 13 [default = 1];

    // Controls how fast the bias for a track gets updated, in case feature is
    // an inlier. Use higher values for less decay of background motion over
    // time.
    optional float inlier_bias = 1 [default = 0.98];

    // Same as above for outliers (or features with low prior), i.e those that
    // got recently seeded.
    optional float outlier_bias = 2 [default = 0.7];

    // Number of elements after which we deem estimation to be stable.
    // Used to control weight of bias if fewer than the specified number have
    // been observed. Also used as maximum ring buffer size (only most recent
    // number of observations are kept). Must be > 0.
    optional int32 num_irls_observations = 3 [default = 10];

    // Change in irls weight magnitude (from outlier to inlier) above which we
    // reset the current bias.
    optional float max_irls_change_ratio = 4 [default = 10.0];

    // Irls weight above which we consider it to be an inlier for bias update
    // purposes (see above inlier and outlier bias). By default, outliers are
    // allowed to update their bias faster than inliers. Must be > 0.
    optional float inlier_irls_weight = 5 [default = 0.2];

    // Standard deviation used during feature initialization. Current bias of a
    // track is used to pre-weight features via gaussian weighting with
    // specified standard deviation.
    optional float bias_stdev = 12 [default = 1.0];

    // When seeding new tracks (on the first frame), we bilaterally pool
    // neighboring feature biases as seed. Details are controlled by options
    // below. If false, the feature's estimation error is used instead
    // (faster, but less spatially smooth).
    // If activated it is advised to use a patch descriptor radius of at least
    // 20 pixels.
    optional bool use_spatial_bias = 6 [default = true];

    // Newly observered tracks's biases are seeded by similar looking features
    // in close spatial proximity. For efficieny a grid is used to determine
    // proximity.
    // Grid size in normalized coordinates w.r.t. frame domain.
    optional float grid_size = 7 [default = 0.04];

    // Sigma's for combining feature biases.
    optional float spatial_sigma = 8 [default = 0.02];
    optional float color_sigma = 9 [default = 20.0];

    // Defines what we consider to be a long track. Features spawned around
    // locations of similar looking long tracks are considered to have
    // high prior, e.g. their initilization is given more weight.
    optional int32 long_track_threshold = 10 [default = 30];

    // Determines with fraction of long tracks is considered to be sufficient
    // for highly confident bias seed.
    optional float long_track_confidence_fraction = 11 [default = 0.25];

    // If activated, uses the irls weights from the estimation of the lower
    // degree of freedom model to seed the bias of the higher degree of freedom
    // model. This improves rigidity of the computed motion.
    optional bool seed_priors_from_bias = 14 [default = false];
  }

  optional LongFeatureBiasOptions long_feature_bias_options = 64;

  // Controls how multiple models via EstimateMotionsParallel are estimated.
  enum EstimationPolicy {
    INDEPENDENT_PARALLEL = 1;  // Models are estimated independently across
                               // frames in parallel.
    TEMPORAL_IRLS_MASK = 2;    // Previous frame's estimation biases
                               // current one, controlled via above
                               // IrlsMaskOptions.
    TEMPORAL_LONG_FEATURE_BIAS = 4;  // Frame's estimation is biased along
                                     // long features, controlled via above
                                     // LongFeatureBiasOptions.
    JOINTLY_FROM_TRACKS = 3;         // Estimation is performed jointly over
                              // chunks of frames, exercising a consistent
                              // irls weight PER track. Expects as input
                              // RegionFlowFeatureList's with long_tracks.
                              // Controlled via above
                              // JointTrackEstimationOptions.
  }
  optional EstimationPolicy estimation_policy = 58
      [default = INDEPENDENT_PARALLEL];

  optional int32 coverage_grid_size = 51 [default = 10];

  // Degree of freedom of estimated homography mixtures. If desired, specific
  // parts of the homography can be held constant across the mixture.
  // For fast draft TRANSLATION_MIXTURE is recommended, for high quality
  // SKEW_ROTATION_MIXTURE.
  enum MixtureModelMode {
    FULL_MIXTURE = 0;           // 8 dof * num_mixtures
    TRANSLATION_MIXTURE = 1;    // 6 dof + 2 dof * num_mixtures
    SKEW_ROTATION_MIXTURE = 2;  // 4 dof + 4 dof * num_mixtures
  }

  optional MixtureModelMode mixture_model_mode = 23
      [default = SKEW_ROTATION_MIXTURE];

  // If specified, only features that agree with the estimated linear similarity
  // will be used to estimate the homography.
  // If set, linear_similarity_estimation can not be ESTIMATION_NONE! (checked)
  optional bool use_only_lin_sim_inliers_for_homography = 6 [default = true];

  // Max. deviation to be considered an inlier w.r.t. estimated similarity for
  // above flag. This value is set w.r.t. normalized frame diameter.
  // TODO: Should take GetIRLSResidualScale into account.
  optional float lin_sim_inlier_threshold = 20 [default = 0.003];

  // If any parameter of the input flow or estimated translation exceeds these
  // thresholds we deem the motion INVALID.
  message TranslationBounds {
    // Absolute minimum of features present.
    optional int32 min_features = 1 [default = 3];

    // Max magnitude of the translation expressed w.r.t. frame diameter
    optional float frac_max_motion_magnitude = 2 [default = 0.15];

    // Motion magnitude is only tested for if standard deviation of estimated
    // translation exceeds threshold.
    optional float max_motion_stdev_threshold = 4 [default = 0.01];

    // Max standard deviation of the estimated translation (normalized to frame
    // diameter).
    optional float max_motion_stdev = 3 [default = 0.065];

    // Maximum acceleration between frames. Specified relative to minimum
    // velocity across two adjacent frames (absolute minimum of 0.001 is
    // enforced, ~1 pix for 480p).
    // If exceeded for one frame, the whole batch passed to
    // EstimateMotionsParallel is labeled unstable.
    optional float max_acceleration = 5 [default = 20.0];
  }

  optional TranslationBounds stable_translation_bounds = 32;

  // If any test/bound is violated, the motion is deemed UNSTABLE.
  message SimilarityBounds {
    // Input frame has to be labeled stable, i.e. enough features and coverage
    // present.
    optional bool only_stable_input = 1 [default = true];

    // Minimum number of inlier features (absolute and as fraction of total
    // number of features).
    // TODO: Dataset run setting this to 0.15
    optional float min_inlier_fraction = 2 [default = 0.2];
    optional float min_inliers = 3 [default = 30];

    // Bounds on valid similarities. We use larger values compared to
    // homographies. Note: Bounds are necessary,  to guarantee invertability
    // of the resulting similarity.
    optional float lower_scale = 4 [default = 0.8];
    optional float upper_scale = 5 [default = 1.25];     // 1 / 0.8.
    optional float limit_rotation = 6 [default = 0.25];  // 15 degrees.

    // Thresholds for a feature to be considered inlier w.r.t similarity
    // transform, expressed in terms of pixel residual error. Max of absolute
    // and fractional thresholds is used.
    // Ratio of inliers that pass regular and strict thresholds are storred in
    // CameraMotion.
    //
    // TODO: Just use lin_sim_inlier_threshold directly, however that
    // recomputes the error, and requires regression testing. Using an extra
    // fractional inlier threshold for now.
    //
    // Absolute in pixels.
    optional float inlier_threshold = 7 [default = 4.0];
    // Scaled by frame diameter.
    optional float frac_inlier_threshold = 8 [default = 0];

    // TODO: Revisit after frame selection change.
    // Absolute in pixels.
    optional float strict_inlier_threshold = 9 [default = 0.5];
  }

  optional SimilarityBounds stable_similarity_bounds = 33;

  // If any parameter of the estimated homography exceeds these bounds,
  // we deem it UNSTABLE_SIM and use estimated similarity instead.
  message HomographyBounds {
    optional float lower_scale = 1 [default = 0.8];
    optional float upper_scale = 2 [default = 1.25];     // 1 / 0.8.
    optional float limit_rotation = 3 [default = 0.25];  // 15 degrees.
    optional float limit_perspective = 4 [default = 0.0004];

    // Inlier coverage is only tested for if average homography error exceeds
    // registration_thresholds. Max of the following two thresholds is used.
    //
    // Absolute in pixels.
    optional float registration_threshold = 5 [default = 0.1];
    // Scaled by frame diameter.
    optional float frac_registration_threshold = 8 [default = 0];

    // Minimum fraction of inlier features w.r.t. frame area.
    optional float min_inlier_coverage = 6 [default = 0.3];

    // Grid coverage inlier threshold. Pixel errors below this
    // threshold are considered inliers. Defined w.r.t. frame diameter, approx.
    // 1.5 for 16:9 SD video (480p), i.e. threshold is multiplied by frame
    // diameter.
    optional float frac_inlier_threshold = 7 [default = 2.0e-3];
  }

  optional HomographyBounds stable_homography_bounds = 11;

  // If any parameter of the estimated homography mixture exceeds these bounds,
  // we deem it UNSTABLE_HOMOG and use the estimated homography instead.
  message MixtureHomographyBounds {
    // Minimum fraction of inlier features w.r.t. block area.
    optional float min_inlier_coverage = 1 [default = 0.4];

    // Each block is tested to be stable, regarding the outliers.
    // A frame is labeled unstable, if more or equal than the specified adjacent
    // blocks are labeled outliers.
    optional int32 max_adjacent_outlier_blocks = 2 [default = 5];

    // Maximum number of adjacent empty blocks (no inliers).
    optional int32 max_adjacent_empty_blocks = 3 [default = 3];

    // Grid coverage threshold inlier threshold. See identical parameter in
    // HomographyBounds.
    optional float frac_inlier_threshold = 7 [default = 2.5e-3];
  }

  optional MixtureHomographyBounds stable_mixture_homography_bounds = 34;

  // Scale for stricter coverage evaluation. Used for rolling shutter guess
  // computation, by only using high quality inliers. Larger values reflect
  // stricter coverage.
  // Specifically, when computing coverage via GridCoverage call,
  // frac_inlier_threshold is reduced (divided) by specified scale below.
  optional float strict_coverage_scale = 41 [default = 1.333];

  // By default frames with zero trackable features (e.g. at the beginning,
  // empty frame or shot boundary) are set identity model but still labeled as
  // valid. If set to false, these frames are flagged as invalid, which can be
  // useful to locate shot boundaries, etc.
  optional bool label_empty_frames_as_valid = 22 [default = true];

  // Setting for temporal smoothing of irls weights in optional post-processing
  // step.
  // In normalized coordinates w.r.t. frame domain.
  optional float feature_grid_size = 24 [default = 0.05];
  optional float spatial_sigma = 25 [default = 0.01];

  // Frame diameter across which smoothing is performed.
  optional int32 temporal_irls_diameter = 26 [default = 20];
  optional float temporal_sigma = 27 [default = 5];  // in frames.

  // Bilateral weight (for un-normalized color domain [0, .. 255]).
  optional float feature_sigma = 28 [default = 30.0];

  // If set to false 3 taps are used.
  optional bool filter_5_taps = 29 [default = false];

  // If set, during temporal smoothing, each frame is weighted by its
  // confidence, defined as the square coverage (or square mean mixture
  // coverage). Therefore, low confidence fits do not errornouesly propagate
  // over time. In addition, if the confidence is below the specified
  // confidence_threshold (relative the the maximum coverage observed in the
  // test interval), irls weights are reset to 1, i.e. biased to be
  // agree with the (unkown) background motion.
  optional bool frame_confidence_weighting = 48 [default = true];
  optional float reset_confidence_threshold = 49 [default = 0.4];

  // Filters irls weights before smoothing them according to specified
  // operation.
  enum IRLSWeightFilter {
    IRLS_FILTER_NONE = 0;
    IRLS_FILTER_TEXTURE = 1;
    IRLS_FILTER_CORNER_RESPONSE = 2;
  }

  // Calls TextureFilteredRegionFlowFeatureIRLSWeights on computed irls weights
  // before smoothing them.
  optional IRLSWeightFilter irls_weight_filter = 35
      [default = IRLS_FILTER_NONE];

  // Attempts to detect overlays, i.e. static elements burned-into the video
  // that potentially corrupt motion estimation.
  optional bool overlay_detection = 36 [default = false];
  // Overlay detection is performed over specified number of frames.
  optional int32 overlay_analysis_chunk_size = 37 [default = 8];

  message OverlayDetectionOptions {
    // Potential overlay features are aggregated over a mask with cells
    // mask_size x mask_size as specified below.
    optional int32 analysis_mask_size = 1 [default = 10];

    // There are two types of candidates of overlay features, strict and loose
    // ones. Strict candidates are used to determine if a grid bin should be
    // considered an overlay cell. If the grid is an overlay cell, *all*
    // candidates, i.e. strict and loose ones will be flagged as overlay
    // features by setting their corresponding irls weight to zero.

    // A feature is a strict overlay feature if its motion is less than
    // near_zero_motion and AND less than max_translation_ratio times the
    // estimated translation magnitude at that frame AND is texturedness is
    // sufficiently high.
    optional float strict_near_zero_motion = 2 [default = 0.2];
    optional float strict_max_translation_ratio = 3 [default = 0.2];
    // Minimum texturedness of a feature to be considered an overlay.
    // Motivation: Overlays are mostly text or graphics, i.e. have visually
    // distinguished features.
    optional float strict_min_texturedness = 5 [default = 0.1];

    // A feature is a loose overlay feature if its motion is less than
    // loose_near_zero_motion.
    optional float loose_near_zero_motion = 4 [default = 1.0];

    // Minimum fraction of strict overlay features within a cell to be
    // considered an overlay cell.
    optional float overlay_min_ratio = 6 [default = 0.3];

    // Absolute minimum number of strict overlay features within a cell to be
    // considered an overlay cel..
    optional float overlay_min_features = 7 [default = 10];
  }

  optional OverlayDetectionOptions overlay_detection_options = 38;

  // Shot boundaries are introduced in 3 different scenarios:
  // a) Frame has zero tracked features w.r.t. previous frame
  // b) Estimated motion is deemed invalid (CameraMotion::INVALID).
  // c) Visual consistency is above threshold of two adjacent frames.
  message ShotBoundaryOptions {
    // After cases a & b are determined from features/camera motion, they
    // are verified by ensuring visual consistency is above specified threshold,
    // if visual consistency has been computed. Only if this is case will the
    // frame be labeled as shot boundary. Motivation is, that there should
    // always be some (even small) measurable increase in the frame difference
    // at a shot boundary.
    // Verification is only performed if visual_consistency has been evaluated
    // (value >= 0).
    optional float motion_consistency_threshold = 1 [default = 0.02];

    // Threshold for case c). Sometimes, motion estimation will miss shot
    // boundaries. We define shot boundaries for which the visual consistency is
    // higher than the specified threshold for at least two adjacent frames.
    optional float appearance_consistency_threshold = 2 [default = 0.075];
  }

  optional ShotBoundaryOptions shot_boundary_options = 60;

  // By default, irls weights of each feature are overwritten with refined irls
  // weights of the last iteration for the highest degree of freedom model that
  // was estimated stable. If set to false, original irls weights are retained.
  // Note: If overlay detection is activated, features to be deemed overlays
  //       have their irls weight set to zero, regardless of this setting.
  //       Similarily, an IRLSWeightFilter is applied if requested, regardless
  //       of this setting.
  optional bool output_refined_irls_weights = 40 [default = true];

  // Weight initialization for homography estimation. This is to bias homography
  // estimation either to foreground or background.
  enum HomographyIrlsWeightInitialization {
    IRLS_WEIGHT_CONSTANT_ONE = 1;     // Constant, treat all features equally.
    IRLS_WEIGHT_CENTER_GAUSSIAN = 2;  // Weight features in the center higher.
                                      // Tends to lock onto foreground.
    IRLS_WEIGHT_PERIMETER_GAUSSIAN = 3;  // Weight features around the
                                         // perimeter higher, tends to lock onto
                                         // background and improves rigidity of
                                         // the perspective degree of freedom.
  }

  // IRLS weights for homography estimation are initialized based on the
  // specified options. If, options irls_weights_preinitialized is set,
  // weights are multiplied instead of reset.
  optional HomographyIrlsWeightInitialization
      homography_irls_weight_initialization = 45
      [default = IRLS_WEIGHT_PERIMETER_GAUSSIAN];

  // If set to false use L1 norm irls weights instead of L0 norm irls weights.
  optional bool irls_use_l0_norm = 46 [default = true];

  // IRLS weights are determined in a limited domain (in particular helpful
  // for stabilization analysis on HD videos).
  // TODO: Make this the default.
  optional bool domain_limited_irls_scaling = 65 [default = false];

  // For comparison and debugging purposes. Simply estimates requested models
  // without checking their stability via the stable_*_bounds parameters.
  // However, invertibility is still checked to avoid invalid data being passed
  // to later stages of the stabilizer.
  optional bool deactivate_stable_motion_estimation = 47 [default = false];

  // Projects higher order motions if estimated correctly down to lower order
  // motions, therefore replacing the previously estimated motions.
  optional bool project_valid_motions_down = 52 [default = false];

  // DEPRECATED functionality. Use static functions as indicated instead.
  //
  // Non-linear similarity, use MotionEstimation::EstimateSimilarityModelL2.
  optional bool estimate_similarity = 2 [deprecated = true];

  // Deprecated fields.
  extensions 7, 8, 16;
}
chromium/third_party/mediapipe/src/mediapipe/util/tracking/motion_estimation.proto