// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Computes MotionSaliency points that can be used for stabilization and
// retargeting.
#ifndef MEDIAPIPE_UTIL_TRACKING_MOTION_SALIENCY_H_
#define MEDIAPIPE_UTIL_TRACKING_MOTION_SALIENCY_H_
#include <utility>
#include <vector>
#include "mediapipe/framework/port/vector.h"
#include "mediapipe/util/tracking/motion_saliency.pb.h"
#include "mediapipe/util/tracking/region_flow.h"
namespace mediapipe {
class RegionFlowFeatureList;
class RegionFlowFrame;
class SalientPointFrame;
} // namespace mediapipe
namespace mediapipe {
class MotionSaliency {
public:
MotionSaliency(const MotionSaliencyOptions& options, int frame_width,
int frame_height);
~MotionSaliency();
// Finds modes in the RegionFlowFeatureList (clusters for high IRLS weight,
// per default features agreeing with the background motion).
// Optionally, per feature irls weights can be supplied instead of using the
// features weight to adapt modes that will be found, e.g. see
// ForegroundWeightsFromFeatures below.
void SaliencyFromFeatures(const RegionFlowFeatureList& feature_list,
std::vector<float>* irls_weights, // optional.
SalientPointFrame* salient_frame);
// Finds saliency points (modes) from a list of points and their respective
// weights, outputs a SalientPointFrame.
void SaliencyFromPoints(const std::vector<Vector2_f>* points,
const std::vector<float>* weights,
SalientPointFrame* salient_frame);
// Selects saliency inliers, by searching for close-by salient points
// (within fractional MotionSaliencyOptions::filtering_support_distance)
// across adjacent frames (considered are
// #MotionSaliencyOptions::filtering_frame_radius before and after the
// current frame).
// If at least #MotionSaliencyOptions::filtering_minimum_support
// supporting points are found the tested salient point is kept, otherwise
// discarded.
// If desired performs rescaling, such that the median salient point weight
// equals MotionSaliencyOptions::saliency_weight().
void SelectSaliencyInliers(std::vector<SalientPointFrame*>* motion_saliency,
bool rescale_to_median_saliency_weight);
// Averages all salient points (unweighted average) per frame. The resulting
// mean salient point is assigned weight one, and the specified normalized
// bounds (as tuple left, bottom, right, top).
void CollapseMotionSaliency(const SaliencyPointList& input_saliency,
const Vector4_f& bounds,
SaliencyPointList* output_saliency);
// Smooths saliency in space and time.
void FilterMotionSaliency(
std::vector<SalientPointFrame*>* saliency_point_list);
// Aggregates location in image domain and salient weight.
struct SalientLocation {
SalientLocation() {}
SalientLocation(const Vector2_f& _pt, float _weight)
: pt(_pt), weight(_weight) {}
Vector2_f pt;
float weight = 0;
};
private:
// Locates modes in a set of SalientLocation's.
// (using mean shift with bilateral weights, i.e. weight * spatial
// gaussian weighting).
// Only modes with for which the sum of total saliency weight is
// above min_irls_mode_sum are returned.
// Returns modes in the image domain as 2D points, sum of their
// assignment weights and spatial extend along major and minor axis.
// Modes are sorted w.r.t. their assignment irls weights (from highest to
// lowest).
struct SalientMode {
Vector2_f location;
// Total sum of irls weights assigned to this mode.
float assignment_weight = 0;
// Magnitude of major and minor axis storred in x and y, respectively.
Vector2_f axis_magnitude;
// Angle in radians w.r.t. x-axis.
float angle = 0;
};
// Note: input vector locations is not mutated by function.
void SalientModeFinding(std::vector<SalientLocation>* locations,
std::vector<SalientMode>* modes);
// Determines the salient frame for a list of SalientLocations by performing
// mode finding and scaling each point based on frame size.
void DetermineSalientFrame(std::vector<SalientLocation> locations,
SalientPointFrame* salient_frame);
MotionSaliencyOptions options_;
int frame_width_;
int frame_height_;
};
// Returns foregroundness weights in [0, 1] for each feature, by mapping irls
// weight to foreground score in [0, 1].
// In particular, the foreground threshold indicates the *inverse* registration
// error (i.e. the irls weight) that is deemed a complete inlier.
// Weights in the interval [0, foreground_threshold] (corresponding to
// pixel errors in the interval [1 / foreground_threshold, inf])
// are mapped to 1 - [0, 1], i.e. foreground threshold is mapped to zero
// with weights below the threshold being assigned values > 0.
// Therefore, larger values will increase amount of detected foreground
// as well as noise.
// In addition, foreground_gamma's < 1 can be used to increase the resolution
// of small foreground motions (irls weight close to the foreground_threshold)
// at the expense of larger foreground motions (irls weight close to zero).
// If optional parameter camera_motion is specified, the passed foreground
// threshold is scaled by the InlierCoverage of the camera_motion
// (which is in 0, 1). That is for unstable frames with small coverage,
// the threshold is tighter and fewer features are considered foreground.
void ForegroundWeightsFromFeatures(
const RegionFlowFeatureList& feature_list,
float foreground_threshold, // 0.5 is a good default value.
float foreground_gamma, // use 1.0 for default
const CameraMotion* camera_motion, // optional, can be nullptr.
std::vector<float>* weights);
} // namespace mediapipe
#endif // MEDIAPIPE_UTIL_TRACKING_MOTION_SALIENCY_H_