// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Fits several linear motion models to the tracked features obtained
// from RegionFlowComputation.
//
// --- Multi-threaded usage (parallel motion estimation over frames) ---
// assume input: vector<RegionFlowFeatureList*> feature_lists
// // Can be obtained after getting RegionFlowFrame from RegionFlowComputation
// // and executing GetRegionFlowFeatureList (region_flow.h)
// MotionEstimation motion_estimation(MotionEstimationOptions(),
// frame_width,
// frame_height);
// vector<CameraMotion> camera_motions;
// motion_estimation.EstimateMotionsParallel(false, // no IRLS smoothing.
// &feature_lists,
// &camera_motions);
// // RegionFlowFeatureList can be discarded or passed to Cropper.
//
//
// --- DEPRECATED, per-frame usage ---
// assume input: RegionFlowFrame* flow_frame // from RegionFlowComputation.
//
// // Initialize with standard options.
// MotionEstimation motion_estimation(MotionEstimationOptions(),
// frame_width,
// frame_height);
// CameraMotion estimated_motion;
// motion_estimation.EstimateMotion(flow_frame,
// NULL, // deprecated param.
// NULL, // deprecated param.
// &estimation_motion);
//
// // If features are not needed anymore flow_frame can be be discarded now.
//
// // Pass motion models in estimated_motion onto MotionStabilization,
// // if stabilization is desired.
#ifndef MEDIAPIPE_UTIL_TRACKING_MOTION_ESTIMATION_H_
#define MEDIAPIPE_UTIL_TRACKING_MOTION_ESTIMATION_H_
#include <algorithm>
#include <cstdint>
#include <deque>
#include <list>
#include <memory>
#include <unordered_map>
#include <vector>
#include "mediapipe/framework/port/vector.h"
#include "mediapipe/util/tracking/camera_motion.pb.h"
#include "mediapipe/util/tracking/motion_estimation.pb.h"
#include "mediapipe/util/tracking/motion_models.pb.h"
#include "mediapipe/util/tracking/region_flow.h"
namespace mediapipe {
class Homography;
class LinearSimilarityModel;
class MixtureHomography;
class MixtureRowWeights;
class RegionFlowFeature;
class RegionFlowFeatureList;
class RegionFlowFrame;
class EstimateMotionIRLSInvoker;
class InlierMask;
class IrlsInitializationInvoker;
// Thread local storage for pre-allocated memory.
class MotionEstimationThreadStorage;
class TrackFilterInvoker;
class MotionEstimation {
public:
MotionEstimation(const MotionEstimationOptions& options, int frame_width,
int frame_height);
virtual ~MotionEstimation();
MotionEstimation(const MotionEstimation&) = delete;
MotionEstimation& operator=(const MotionEstimation&) = delete;
// Can be used to re-initialize options between EstimateMotion /
// EstimateMotionsParallel calls.
void InitializeWithOptions(const MotionEstimationOptions& options);
// Estimates motion models from RegionFlowFeatureLists based on
// MotionEstimationOptions, in a multithreaded manner (frame parallel).
// The computed IRLS weights used on the last iteration of the highest
// degree of freedom model are *written* to the irls_weight member for each
// RegionFlowFeature in RegionFlowFeatureList which can be a useful
// feature for later processing.
// In addition the returned irls weights can be smoothed spatially
// and temporally before they are output.
// Note: The actual vector feature_lists is not modified.
virtual void EstimateMotionsParallel(
bool post_irls_weight_smoothing,
std::vector<RegionFlowFeatureList*>* feature_lists,
std::vector<CameraMotion>* camera_motions) const;
// DEPRECATED function, estimating Camera motion from a single
// RegionFlowFrame.
virtual void EstimateMotion(const RegionFlowFrame& region_flow_frame,
const int*, // deprecated, must be NULL.
const int*, // deprecated, must be NULL.
CameraMotion* camera_motion) const;
// Public facing API to directly estimate motion models (as opposed to
// a cascade of increasing degree of freedom motion models with appropiate
// stability analysis via above EstimateMotionsParallel).
// Use this if all you need is just the a specific motion
// model describing/summarizing the motion of the RegionFlowFeatureList.
// Returns false if model estimation failed (in this case an identity model
// is set in camera_motion).
// NOTE: All direct estimation functions assume normalized feature input,
// i.e. transformed via NormalizeRegionFlowFeatureList.
//
// NOTE 2: For easy direct use see Fit* functions below class.
bool EstimateTranslationModel(RegionFlowFeatureList* feature_list,
CameraMotion* camera_motion);
bool EstimateLinearSimilarityModel(RegionFlowFeatureList* feature_list,
CameraMotion* camera_motion);
bool EstimateAffineModel(RegionFlowFeatureList* feature_list,
CameraMotion* camera_motion);
bool EstimateHomography(RegionFlowFeatureList* feature_list,
CameraMotion* camera_motion);
bool EstimateMixtureHomography(RegionFlowFeatureList* feature_list,
CameraMotion* camera_motion);
// Static function which sets motion models (requested in options) to identity
// models.
static void ResetMotionModels(const MotionEstimationOptions& options,
CameraMotion* camera_motion);
// The following functions ResetTo* functions reset all models that are
// present in camera_motion (tested via has_*) to identity or
// the passed model (which is embedded in higher degree of freedom models
// if applicable). CameraMotion::Type is set in accordance to function name.
static void ResetToIdentity(CameraMotion* camera_motion,
bool consider_valid = false); // Set to true
// for type VALID
// Resets every specified model to embedded translation model.
// CameraMotion type is set to UNSTABLE.
static void ResetToTranslation(const TranslationModel& translation,
CameraMotion* camera_motion);
// Resets every specified model with more or equal DOF than a similarity
// to the passed model.
// Camera Motion type is set to UNSTABLE_SIM.
static void ResetToSimilarity(const LinearSimilarityModel& model,
CameraMotion* camera_motion);
// Resets every specified model with more or equal DOF than a homography
// to the passed model. If flag_as_unstable_model is set, camera motion type
// is set to UNSTABLE_HOMOG.
static void ResetToHomography(const Homography& model,
bool flag_as_unstable_model,
CameraMotion* camera_motion);
private:
// Simple enum indicating with motion model should be estimated, mapped from
// MotionEstimationOptions.
enum MotionType {
MODEL_AVERAGE_MAGNITUDE = 0,
MODEL_TRANSLATION = 1,
MODEL_LINEAR_SIMILARITY = 2,
MODEL_AFFINE = 3,
MODEL_HOMOGRAPHY = 4,
MODEL_MIXTURE_HOMOGRAPHY = 5,
// ... internal enum values used for mixture spectrum (up to 10 mixtures are
// supported). Do not use directly.
// Change value if new motions are added.
MODEL_NUM_VALUES = 16,
};
// Determines shot boundaries from estimated motion models and input
// feature_lists by setting the corresponding flag in CameraMotion.
// Make sure, this function is called after motion estimation.
void DetermineShotBoundaries(
const std::vector<RegionFlowFeatureList*>& feature_lists,
std::vector<CameraMotion>* camera_motions) const;
// Implementation function to estimate CameraMotion's from
// RegionFlowFeatureLists.
void EstimateMotionsParallelImpl(
bool irls_weights_preinitialized,
std::vector<RegionFlowFeatureList*>* feature_lists,
std::vector<CameraMotion>* camera_motions) const;
struct SingleTrackClipData;
struct EstimateModelOptions;
EstimateModelOptions DefaultModelOptions() const;
// Implementation function to estimate all motions for a specific type
// across multiple single tracks. Motions are only estimated for those
// CameraMotions with type less or equal to max_unstable_type. Flag
// irls_weights_preinitialized enables some optimizations in case it is
// set to false as features are not pre-initialized.
// Optionally pass thread_storage.
// Returns true if requested type was attempt to be estimated
// (based on options) , false otherwise.
bool EstimateMotionModels(
const MotionType& max_type, const CameraMotion::Type& max_unstable_type,
const EstimateModelOptions& options,
const MotionEstimationThreadStorage* thread_storage, // optional.
std::vector<SingleTrackClipData>* clip_datas) const;
// Multiplies input irls_weights by an upweight multiplier for each feature
// that is part of a sufficiently large track (contribution of each track
// length is by track_length_multiplier, mapping each track length
// to an importance weight in [0, 1]).
void LongFeatureInitialization(
const RegionFlowFeatureList& feature_list,
const LongFeatureInfo& feature_info,
const std::vector<float>& track_length_importance,
std::vector<float>* irls_weights) const;
// Multiplies input irls_weights by normalization factor that downweights
// features is areas of high density.
void FeatureDensityNormalization(const RegionFlowFeatureList& feature_list,
std::vector<float>* irls_weights) const;
// Initializes irls weights (if
// MotionEstimationOptions::irls_initialization::activated is true),
// based on a multitude of options (RANSAC based pre-fitting of motion
// models, homography initialization, etc.).
// Processes one frame or all (if frame = -1) within a clip_data,
void IrlsInitialization(const MotionType& type, int max_unstable_type,
int frame, // Specify -1 for all frames.
const EstimateModelOptions& options,
SingleTrackClipData* clip_data) const;
// Estimation functions for models, called via options by EstimateMotion and
// EstimateMotionsParallel.
// NOTE: All direct estimation functions assume normalized feature input,
// i.e. transformed via
// TransformRegionFlowFeatureList(normalization_transform, feature_list);
// where normalization_transform =
// LinearSimilarityAdapter::NormalizationTransform(frame_width,
// frame_height);
//
// Direct estimation functions perform estimation via iterated reweighted
// least squares (IRLS). In this case specify number of iterations (10 is a
// good default), and optionally the PriorFeatureWeights for each iteration.
// The alphas specify, how much weight should be given to the
// prior weight that the feature has before optimization. An alpha of zero
// indicates, no prior weighting, whereas as an alpha of one corresponds to
// full prior weighting. The actual prior is stored in priors.
// Each iteration is reweighted by numerator / error, where error is the L2
// fitting error after estimation and
// numerator = (1.0 - alpha) * 1.0 + alpha * prior
struct PriorFeatureWeights {
explicit PriorFeatureWeights(int num_iterations)
: alphas(num_iterations, 0.0f) {}
PriorFeatureWeights(int num_iterations, int num_features)
: alphas(num_iterations, 0.0f), priors(num_features, 1.0f) {}
// Tests for correct dimensions of PriorFeatureWeights.
bool HasCorrectDimension(int num_iterations, int num_features) const {
return alphas.size() == num_iterations && priors.size() == num_features;
}
// Returns true if at least one alpha is non-zero.
bool HasNonZeroAlpha() const {
return !alphas.empty() &&
*std::max_element(alphas.begin(), alphas.end()) > 0;
}
// Returns true, if a prior was specified.
bool HasPrior() const { return !priors.empty(); }
std::vector<float> alphas; // Alpha for each IRLS round.
std::vector<float> priors; // Prior weight for each feature.
// If set, above alpha are not adjusted with iterations, but always set to
// 1.0, given full weight to the prior.
bool use_full_prior = false;
};
// In addition, each estimation function can compute its corresponding
// stability features and store it in CameraMotion. These features are needed
// to test via the IsStable* functions further below.
// Estimates 2 DOF translation model.
// Note: feature_list is assumed to be normalized/transformed by
// LinearSimilarity::NormalizationTransform N. Returned irls weights and
// linear similarity are expressed in original frame, i.e. for estimated model
// M, M' = N^(-1) M N is returned.
void EstimateTranslationModelIRLS(
int irls_rounds, bool compute_stability,
RegionFlowFeatureList* feature_list,
const PriorFeatureWeights* prior_weights, // optional.
CameraMotion* camera_motion) const;
// Estimates linear similarity from feature_list using irls_rounds iterative
// reweighted least squares iterations. For L2 estimation, use irls_round = 1.
// The irls_weight member of each RegionFlowFeature in feature_list will be
// set to the inverse residual w.r.t. estimated LinearSimilarityModel.
// Note: feature_list is assumed to be normalized/transformed by
// LinearSimilarity::NormalizationTransform N. Returned irls weights and
// linear similarity are expressed in original frame, i.e. for estimated model
// M, M' = N^(-1) M N is returned.
// Returns true if estimation was successful, otherwise returns false and sets
// the CameraMotion::type to INVALID.
bool EstimateLinearSimilarityModelIRLS(
int irls_rounds, bool compute_stability,
RegionFlowFeatureList* feature_list,
const PriorFeatureWeights* prior_weights, // optional.
CameraMotion* camera_motion) const;
// Same as above for affine motion.
// Note: feature_list is assumed to be normalized/transformed by
// LinearSimilarity::NormalizationTransform N. Returned irls weights and
// affine model are expressed in original frame, i.e. for estimated model
// M, M' = N^(-1) M N is returned.
bool EstimateAffineModelIRLS(int irls_rounds,
RegionFlowFeatureList* feature_list,
CameraMotion* camera_motion) const;
// Same as above for homography.
// Note: feature_list is assumed to be normalized/transformed by
// LinearSimilarity::NormalizationTransform N. Returned irls weights and
// homography are expressed in original frame, i.e. for estimated model
// M, M' = N^(-1) M N is returned.
// Returns true if estimation was successful, otherwise returns false and sets
// the CameraMotion::type to INVALID.
bool EstimateHomographyIRLS(
int irls_rounds, bool compute_stability,
const PriorFeatureWeights* prior_weights, // optional.
MotionEstimationThreadStorage* thread_storage, // optional.
RegionFlowFeatureList* feature_list, CameraMotion* camera_motion) const;
// Same as above for mixture homography.
// Note: feature_list is assumed to be normalized/transformed by
// LinearSimilarity::NormalizationTransform N. Returned irls weights and
// mixture homography are expressed in original frame, i.e. for estimated
// model M, M' = N^(-1) M N is returned.
// Mixture model estimation customized by MotionEstimationOptions.
// Returns true if estimation was successful, otherwise returns false and sets
// the CameraMotion::type to INVALID.
// Supports computation for mixture spectrum, i.e. mixtures with different
// regularizers. For default regularizer pass
// MotionEstimationOptions::mixture_regularizer. Estimated motion will be
// stored in CameraMotion::mixture_homography_spectrum(spectrum_idx).
bool EstimateMixtureHomographyIRLS(
int irls_rounds, bool compute_stability, float regularizer,
int spectrum_idx, // 0 by default.
const PriorFeatureWeights* prior_weights, // optional.
MotionEstimationThreadStorage* thread_storage, // optional.
RegionFlowFeatureList* feature_list, CameraMotion* camera_motion) const;
// Returns weighted variance for mean translation from feature_list (assumed
// to be in normalized coordinates). Returned variance is in unnormalized
// domain.
float TranslationVariance(const RegionFlowFeatureList& feature_list,
const Vector2_f& translation) const;
// Replace each features irls weight by robust min-filtered irls weight
// across each track.
void MinFilterIrlsWeightByTrack(SingleTrackClipData* clip_data) const;
// Performs filtering of irls weight across several tracking clip datas,
// to yield consistent irls weights.
void EnforceTrackConsistency(
std::vector<SingleTrackClipData>* clip_datas) const;
// Initializes or modifies prior_weights for passed feature_list by
// biasing toward previous (filtered) IRLS weight for that feature.
// This enables temporal coherence.
void BiasLongFeatures(RegionFlowFeatureList* feature_list, MotionType type,
const EstimateModelOptions& model_options,
PriorFeatureWeights* prior_weights) const;
// Called by above function to determine the bias each feature is multiplied
// with.
void BiasFromFeatures(const RegionFlowFeatureList& feature_list,
MotionType type,
const EstimateModelOptions& model_options,
std::vector<float>* bias) const;
// Maps track index to tuple of spatial bias and number of similar
// looking long tracks.
typedef std::unordered_map<int, std::pair<float, float>> SpatialBiasMap;
void ComputeSpatialBias(MotionType type,
const EstimateModelOptions& model_options,
RegionFlowFeatureList* feature_list,
SpatialBiasMap* spatial_bias) const;
// Updates features weights in feature_list by temporally consistent bias.
void UpdateLongFeatureBias(MotionType type,
const EstimateModelOptions& model_options,
bool remove_terminated_tracks,
bool update_irls_observation,
RegionFlowFeatureList* feature_list) const;
// Bilateral filtering of irls weights across the passed list.
void SmoothIRLSWeights(std::deque<float>* irls) const;
// Helper function. Returns number of irls iterations for passed MotionType
// derived from current MotionEstimationOptions. Returns zero, if no
// estimation should be attempted.
int IRLSRoundsFromSettings(const MotionType& type) const;
// Partitions irls_rounds into several rounds with each having irls_per_round
// interations each based on MotionEstimationOptions::EstimationPolicy.
// Post-condition: total_rounds * irls_per_rounds == irls_rounds.
void PolicyToIRLSRounds(int irls_rounds, int* total_rounds,
int* irls_per_round) const;
// Check for specified MotionType is estimated model is stable. If not, resets
// feature's irls weights to reset_irls_weights (optional) and resets motion
// model in camera_motion to lower degree of freedom model. In this case,
// CameraMotion::Type is flagged as UNSTABLE_* where * denotes the lower
// degree of freedom model.
// Model is only checked those CameraMotions with type less than or equal to
// max_unstable_type.
void CheckModelStability(
const MotionType& type, const CameraMotion::Type& max_unstable_type,
const std::vector<std::vector<float>>* reset_irls_weights,
std::vector<RegionFlowFeatureList*>* feature_lists,
std::vector<CameraMotion>* camera_motions) const;
// Implementation function called by above function, to check for a single
// model.
void CheckSingleModelStability(const MotionType& type,
const CameraMotion::Type& max_unstable_type,
const std::vector<float>* reset_irls_weights,
RegionFlowFeatureList* feature_list,
CameraMotion* camera_motion) const;
// Projects motion model specified by type to lower degree of freedom models.
void ProjectMotionsDown(const MotionType& type,
std::vector<CameraMotion>* camera_motions) const;
// Filters passed feature_lists based on
// MotionEstimationOptions::irls_weight_filter.
void IRLSWeightFilter(
std::vector<RegionFlowFeatureList*>* feature_lists) const;
// Inlier scale based on average motion magnitude and the fraction
// of the magnitude that is still considered an inlier.
// In general a residual of 1 pixel is assigned an IRLS weight of 1,
// this function returns a residual scale, such that a residual
// of distance_fraction * translation_magnitude equals an IRLS weight of 1
// if multiplied by returned scale.
float GetIRLSResidualScale(const float avg_motion_magnitude,
float distance_fraction) const;
const LinearSimilarityModel& InverseNormalizationTransform() const {
return inv_normalization_transform_;
}
const LinearSimilarityModel& NormalizationTransform() const {
return normalization_transform_;
}
// Returns domain normalized features fall in.
Vector2_f NormalizedDomain() const { return normalized_domain_; }
// Returns index within the inlier mask for each feature point.
// Also returns for each bin normalizer to account for different number of
// features per bin during weighting.
void ComputeFeatureMask(const RegionFlowFeatureList& feature_list,
std::vector<int>* mask_indices,
std::vector<float>* bin_normalizer) const;
// Runs multiple rounds of RANSAC, resetting outlier IRLS weight to
// a low score.
// Optionally can perform temporally consistent selection if inlier_mask is
// specified.
// Returns best model across all iterations in best_model and true if
// estimated model was deemed stable.
bool GetTranslationIrlsInitialization(
RegionFlowFeatureList* feature_list,
const EstimateModelOptions& model_options, float avg_camera_motion,
InlierMask* inlier_mask, // optional.
TranslationModel* best_model) const;
// Same as above for linear similarities.
bool GetSimilarityIrlsInitialization(
RegionFlowFeatureList* feature_list,
const EstimateModelOptions& model_options, float avg_camera_motion,
InlierMask* inlier_mask, // optional.
LinearSimilarityModel* best_model) const;
// Computes number of inliers and strict inliers (satisfying much stricter
// threshold) for a given feature list after model fitting.
void ComputeSimilarityInliers(const RegionFlowFeatureList& feature_list,
int* num_inliers,
int* num_strict_inliers) const;
// Initializes irls weights based on setting
// MotionEstimationOptions::homography_irls_weight_initialization.
void GetHomographyIRLSCenterWeights(const RegionFlowFeatureList& feature_list,
std::vector<float>* center_weights) const;
// Checks for unreasonable large accelerationas between frames as specified by
// MotionEstimationOptions::StableTranslationBounds.
void CheckTranslationAcceleration(
std::vector<CameraMotion>* camera_motions) const;
// Functions below, test passed model is deemed stable according to
// several heuristics set by Stable[MODEL]Bounds in MotionEstimationOptions.
bool IsStableTranslation(const TranslationModel& normalized_translation,
float translation_variance,
const RegionFlowFeatureList& features) const;
// Tests if passed similarity is stable. Pass number of inliers from
// ComputeSimilarityInliers.
bool IsStableSimilarity(const LinearSimilarityModel& model,
const RegionFlowFeatureList& features,
int num_inliers) const;
bool IsStableHomography(const Homography& homography,
float average_homography_error,
float inlier_coverage) const;
bool IsStableMixtureHomography(
const MixtureHomography& homography, float min_block_inlier_coverage,
const std::vector<float>& block_inlier_coverage) const;
// Computes fraction (in [0, 1]) of inliers w.r.t. frame area using a grid of
// occupancy cells. A feature is consider an inlier if its irls_weight is
// larger or equal to min_inlier_score.
float GridCoverage(const RegionFlowFeatureList& feature_list,
float min_inlier_score,
MotionEstimationThreadStorage* thread_storage) const;
// Estimates per scanline-block coverage of mixture. If
// assume_rolling_shutter_camera is set, low textured features are allowed to
// have higher error as registration errors would not be as visible here.
void ComputeMixtureCoverage(const RegionFlowFeatureList& feature_list,
float min_inlier_score,
bool assume_rolling_shutter_camera,
MotionEstimationThreadStorage* thread_storage,
CameraMotion* camera_motion) const;
// Returns average motion magnitude as mean of the translation magnitude from
// the 10th to 90th percentile.
void EstimateAverageMotionMagnitude(const RegionFlowFeatureList& feature_list,
CameraMotion* camera_motion) const;
// Returns per iteration weight of the feature's irls weight initialization.
float IRLSPriorWeight(int iteration, int irls_rounds) const;
// Implementation function for above function. Estimates mixture homography
// from features and returns true if estimation was non-degenerate.
bool MixtureHomographyFromFeature(
const TranslationModel& translation, int irls_rounds, float regularizer,
const PriorFeatureWeights* prior_weights, // optional.
RegionFlowFeatureList* feature_list,
MixtureHomography* mix_homography) const;
// Determines overlay indices (spatial bin locations that are likely to be
// affected by overlays) and stores them in corresponding member in
// CameraMotion. Features that fall within these bins will be assigned a
// weight of zero.
void DetermineOverlayIndices(
bool irls_weights_preinitialized,
std::vector<CameraMotion>* camera_motions,
std::vector<RegionFlowFeatureList*>* feature_lists) const;
// Determine features likely to be part of a static overlay, by setting their
// irls weight to zero.
// Returns fraction of the image domain that is considered to be occupied by
// overlays and specific overlay cell indices in overlay_indices.
float OverlayAnalysis(const std::vector<TranslationModel>& translations,
std::vector<RegionFlowFeatureList*>* feature_lists,
std::vector<int>* overlay_indices) const;
// Smooths feature's irls_weights spatio-temporally.
void PostIRLSSmoothing(
const std::vector<CameraMotion>& camera_motions,
std::vector<RegionFlowFeatureList*>* feature_lists) const;
// Initializes LUT for gaussian weighting. By default discretizes the domain
// [0, max_range] into 4K bins, returning scale to map from a value in the
// domain to the corresponding bin. If scale is nullptr max_range bins are
// created instead (in this case scale would be 1.0, i.e. value equals bin
// index).
void InitGaussLUT(float sigma, float max_range, std::vector<float>* lut,
float* scale) const;
// Performs fast volumetric smoothing / filtering of irls weights. Weights are
// expected to be already binned using BuildFeatureGrid.
void RunTemporalIRLSSmoothing(
const std::vector<FeatureGrid<RegionFlowFeature>>& feature_grid,
const std::vector<std::vector<int>>& feature_taps_3,
const std::vector<std::vector<int>>& feature_taps_5,
const std::vector<float>& frame_confidence,
std::vector<RegionFlowFeatureView>* feature_views) const;
private:
MotionEstimationOptions options_;
int frame_width_;
int frame_height_;
LinearSimilarityModel normalization_transform_;
LinearSimilarityModel inv_normalization_transform_;
// Transform from normalized features to irls domain.
LinearSimilarityModel irls_transform_;
// Frame dimensions transformed by normalization transform.
Vector2_f normalized_domain_;
std::unique_ptr<MixtureRowWeights> row_weights_;
// For initialization biased towards previous frame.
std::unique_ptr<InlierMask> inlier_mask_;
// Stores current bias for each track and the last K irls observations.
struct LongFeatureBias {
explicit LongFeatureBias(float initial_weight) : bias(initial_weight) {
irls_values.push_back(1.0f / initial_weight);
}
LongFeatureBias() : LongFeatureBias(1.0f) {}
float bias = 1.0f; // Current bias, stores pixel error,
// i.e. 1 / IRLS.
std::vector<float> irls_values; // Recently observed IRLS values;
// Ring buffer.
int total_observations = 1;
};
// Maps track id to LongFeatureBias.
typedef std::unordered_map<int, LongFeatureBias> LongFeatureBiasMap;
// Bias map indexed by MotionType.
mutable std::vector<LongFeatureBiasMap> long_feature_bias_maps_ =
std::vector<LongFeatureBiasMap>(static_cast<int>(MODEL_NUM_VALUES));
// Lookup tables and scale for FeatureBias computation.
struct FeatureBiasLUT {
// For ComputeSpatialBias weighting.
std::vector<float> spatial_lut;
float spatial_scale;
std::vector<float> color_lut;
float color_scale;
// For BiasFromFeature computation.
std::vector<float> bias_weight_lut;
float bias_weight_scale;
};
FeatureBiasLUT feature_bias_lut_;
// Counts the number of consecutive duplicate frames for each motion model.
mutable std::vector<int> num_duplicate_frames_ =
std::vector<int>(static_cast<int>(MODEL_NUM_VALUES));
friend class EstimateMotionIRLSInvoker;
friend class IrlsInitializationInvoker;
friend class TrackFilterInvoker;
friend class MotionEstimationThreadStorage;
};
// Meta-data set in the header of filter streams to communicate information used
// during camera motion estimation.
struct CameraMotionStreamHeader {
CameraMotionStreamHeader() : frame_width(0), frame_height(0) {}
int32_t frame_width;
int32_t frame_height;
};
// Direct fitting functions.
TranslationModel FitTranslationModel(const RegionFlowFeatureList& features);
LinearSimilarityModel FitLinearSimilarityModel(
const RegionFlowFeatureList& features);
AffineModel FitAffineModel(const RegionFlowFeatureList& features);
Homography FitHomography(const RegionFlowFeatureList& features);
MixtureHomography FitMixtureHomography(const RegionFlowFeatureList& features);
// Templated fitting functions.
template <class Model>
Model FitModel(const RegionFlowFeatureList& features);
template <>
inline TranslationModel FitModel(const RegionFlowFeatureList& features) {
return FitTranslationModel(features);
}
template <>
inline LinearSimilarityModel FitModel(const RegionFlowFeatureList& features) {
return FitLinearSimilarityModel(features);
}
template <>
inline AffineModel FitModel(const RegionFlowFeatureList& features) {
return FitAffineModel(features);
}
template <>
inline Homography FitModel(const RegionFlowFeatureList& features) {
return FitHomography(features);
}
template <>
inline MixtureHomography FitModel(const RegionFlowFeatureList& features) {
return FitMixtureHomography(features);
}
// Generic projection function that projects models in an arbitrary direction
// (that is from lower to higher or vice versa) via fast model fits, without
// any error bound checking.
// MixtureRowWeights are only necessary for ToModel == MixtureHomography.
template <class ToModel, class FromModel>
ToModel ProjectViaFit(const FromModel& model, int frame_width, int frame_height,
MixtureRowWeights* row_weights = nullptr,
int grid_dim = 10) {
// Build a grid of features.
const float dx = frame_width * 1.0f / grid_dim;
const float dy = frame_height * 1.0f / grid_dim;
// Create region flow from grid.
RegionFlowFeatureList grid_features;
grid_features.set_frame_width(frame_width);
grid_features.set_frame_height(frame_height);
grid_features.set_match_frame(-1);
for (int k = 0; k <= grid_dim; ++k) {
for (int l = 0; l <= grid_dim; ++l) {
auto* feat = grid_features.add_feature();
feat->set_x(l * dx);
feat->set_y(k * dy);
}
}
RegionFlowFeatureListViaTransform(model, &grid_features, 1.0f,
0.0f, // Replace flow.
false, // Don't change feature loc.
row_weights);
return FitModel<ToModel>(grid_features);
}
} // namespace mediapipe
#endif // MEDIAPIPE_UTIL_TRACKING_MOTION_ESTIMATION_H_