// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/util/tracking/flow_packager.h"
#include <math.h>
#include <algorithm>
#include <cmath>
#include <memory>
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/vector.h"
#include "mediapipe/util/tracking/camera_motion.h"
#include "mediapipe/util/tracking/camera_motion.pb.h"
#include "mediapipe/util/tracking/motion_estimation.h"
#include "mediapipe/util/tracking/motion_models.h"
#include "mediapipe/util/tracking/motion_models.pb.h"
#include "mediapipe/util/tracking/region_flow.pb.h"
namespace mediapipe {
FlowPackager::FlowPackager(const FlowPackagerOptions& options)
: options_(options) {
if (options_.binary_tracking_data_support()) {
ABSL_CHECK_LE(options.domain_width(), 256);
ABSL_CHECK_LE(options.domain_height(), 256);
}
}
namespace {
// Performs rounding of float vector position to int.
class FeatureIntegerPosition {
public:
// Scales a feature's location in x and y by scale_x and scale_y respectively.
// Limits feature position to the integer domain
// [0, width - 1] x [0, height - 1]
FeatureIntegerPosition(float scale_x, float scale_y, int width, int height)
: scale_x_(scale_x), scale_y_(scale_y), width_(width), height_(height) {}
Vector2_i ToIntPosition(const RegionFlowFeature& feature) const {
return Vector2_i(
std::max(0, std::min<int>(width_ - 1, feature.x() * scale_x_ + 0.5f)),
std::max(0, std::min<int>(height_ - 1, feature.y() * scale_y_ + 0.5f)));
}
private:
float scale_x_;
float scale_y_;
int width_;
int height_;
};
// Lexicographic compare (first in x, then in y) under scaled integer rounding
// as specified by FeatureIntegerPosition.
class IntegerColumnComparator {
public:
IntegerColumnComparator(float scale_x, float scale_y, int width, int height)
: integer_pos_(scale_x, scale_y, width, height) {}
bool operator()(const RegionFlowFeature& lhs,
const RegionFlowFeature& rhs) const {
const Vector2_i vec_lhs = integer_pos_.ToIntPosition(lhs);
const Vector2_i vec_rhs = integer_pos_.ToIntPosition(rhs);
return (vec_lhs.x() < vec_rhs.x()) ||
(vec_lhs.x() == vec_rhs.x() && vec_lhs.y() < vec_rhs.y());
}
private:
const FeatureIntegerPosition integer_pos_;
};
template <typename T>
inline std::string EncodeToString(const T& value) {
std::string s(sizeof(T), 0);
memcpy(&s[0], &value, sizeof(T));
return s;
}
template <typename T>
inline std::string EncodeVectorToString(const std::vector<T>& vec) {
std::string s(vec.size() * sizeof(T), 0);
typename std::vector<T>::const_iterator iter;
char* ptr;
for (iter = vec.begin(), ptr = &s[0]; iter != vec.end();
++iter, ptr += sizeof(T)) {
memcpy(ptr, &(*iter), sizeof(T));
}
return s;
}
template <typename T>
inline bool DecodeFromStringView(absl::string_view str, T* result) {
ABSL_CHECK(result != nullptr);
if (sizeof(*result) != str.size()) {
return false;
}
memcpy(result, str.data(), sizeof(T));
return true;
}
template <typename T>
inline bool DecodeVectorFromStringView(absl::string_view str,
std::vector<T>* result) {
ABSL_CHECK(result != nullptr);
if (str.size() % sizeof(T) != 0) return false;
result->clear();
result->reserve(str.size() / sizeof(T));
T value;
const char* begin = str.data();
const char* end = str.data() + str.size();
for (const char* ptr = begin; ptr != end; ptr += sizeof(T)) {
memcpy(&value, ptr, sizeof(T));
result->push_back(value);
}
return true;
}
} // namespace.
void FlowPackager::PackFlow(const RegionFlowFeatureList& feature_list,
const CameraMotion* camera_motion,
TrackingData* tracking_data) const {
ABSL_CHECK(tracking_data);
ABSL_CHECK_GT(feature_list.frame_width(), 0);
ABSL_CHECK_GT(feature_list.frame_height(), 0);
// Scale flow to output domain.
const float dim_x_scale =
options_.domain_width() * (1.0f / feature_list.frame_width());
const float dim_y_scale =
options_.domain_height() * (1.0f / feature_list.frame_height());
const bool long_tracks = feature_list.long_tracks();
// Sort features lexicographically.
RegionFlowFeatureList sorted_feature_list(feature_list);
SortRegionFlowFeatureList(dim_x_scale, dim_y_scale, &sorted_feature_list);
tracking_data->set_domain_width(options_.domain_width());
tracking_data->set_domain_height(options_.domain_height());
tracking_data->set_frame_aspect(feature_list.frame_width() * 1.0f /
feature_list.frame_height());
tracking_data->set_global_feature_count(feature_list.feature_size());
int flags = 0;
if (camera_motion == nullptr ||
camera_motion->type() > CameraMotion::UNSTABLE_SIM) {
flags |= TrackingData::FLAG_BACKGROUND_UNSTABLE;
} else {
Homography transform;
CameraMotionToHomography(*camera_motion, &transform);
Homography normalization = HomographyAdapter::Embed(
AffineAdapter::FromArgs(0, 0, dim_x_scale, 0, 0, dim_y_scale));
Homography inv_normalization =
HomographyAdapter::Embed(AffineAdapter::FromArgs(
0, 0, 1.0f / dim_x_scale, 0, 0, 1.0f / dim_y_scale));
*tracking_data->mutable_background_model() =
ModelCompose3(normalization, transform, inv_normalization);
}
if (camera_motion != nullptr) {
tracking_data->set_average_motion_magnitude(
camera_motion->average_magnitude());
}
if (feature_list.is_duplicated()) {
flags |= TrackingData::FLAG_DUPLICATED;
}
tracking_data->set_frame_flags(flags);
const int num_vectors = sorted_feature_list.feature_size();
TrackingData::MotionData* data = tracking_data->mutable_motion_data();
data->set_num_elements(num_vectors);
// Initialize col starts with "unseen" marker.
std::vector<float> col_start(options_.domain_width() + 1, -1);
int last_col = -1;
int last_row = -1;
FeatureIntegerPosition integer_pos(dim_x_scale, dim_y_scale,
options_.domain_width(),
options_.domain_height());
// Store feature and corresponding motion (minus camera motion) in
// compressed sparse column format:
// https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_.28CSC_or_CCS.29
for (const auto& feature : sorted_feature_list.feature()) {
float flow_x = feature.dx() * dim_x_scale;
float flow_y = feature.dy() * dim_y_scale;
Vector2_i loc = integer_pos.ToIntPosition(feature);
// Convert back to float for accurate background model computation.
Vector2_f loc_f = Vector2_f::Cast(loc);
if (camera_motion) {
Vector2_f residual = HomographyAdapter::TransformPoint(
tracking_data->background_model(), loc_f) -
loc_f;
flow_x -= residual.x();
flow_y -= residual.y();
}
data->add_vector_data(flow_x);
data->add_vector_data(flow_y);
data->add_row_indices(loc.y());
if (feature.has_binary_feature_descriptor()) {
data->add_feature_descriptors()->set_data(
feature.binary_feature_descriptor().data());
}
if (long_tracks) {
data->add_track_id(feature.track_id());
}
const int curr_col = loc.x();
if (curr_col != last_col) {
ABSL_CHECK_LT(last_col, curr_col);
ABSL_CHECK_EQ(-1, col_start[curr_col]);
col_start[curr_col] = data->row_indices_size() - 1;
last_col = curr_col;
} else {
ABSL_CHECK_LE(last_row, loc.y());
}
last_row = loc.y();
}
col_start[0] = 0;
col_start[options_.domain_width()] = num_vectors;
// Fill unset values with previously set value. Propagate end value.
for (int i = options_.domain_width() - 1; i > 0; --i) {
if (col_start[i] < 0) {
ABSL_DCHECK_GE(col_start[i + 1], 0);
col_start[i] = col_start[i + 1];
}
}
for (const auto& col_idx : col_start) {
data->add_col_starts(col_idx);
}
// Check monotonicity of the row indices.
for (int c = 0; c < options_.domain_width(); ++c) {
const int r_start = data->col_starts(c);
const int r_end = data->col_starts(c + 1);
for (int r = r_start; r < r_end - 1; ++r) {
ABSL_CHECK_LE(data->row_indices(r), data->row_indices(r + 1));
}
}
ABSL_CHECK_EQ(data->vector_data_size(), 2 * data->row_indices_size());
*data->mutable_actively_discarded_tracked_ids() =
feature_list.actively_discarded_tracked_ids();
}
void FlowPackager::EncodeTrackingData(const TrackingData& tracking_data,
BinaryTrackingData* binary_data) const {
ABSL_CHECK(options_.binary_tracking_data_support());
ABSL_CHECK(binary_data != nullptr);
int32_t frame_flags = 0;
const bool high_profile = options_.use_high_profile();
if (high_profile) {
frame_flags |= TrackingData::FLAG_PROFILE_HIGH;
} else {
frame_flags |= TrackingData::FLAG_PROFILE_BASELINE; // No op.
}
if (options_.high_fidelity_16bit_encode()) {
frame_flags |= TrackingData::FLAG_HIGH_FIDELITY_VECTORS;
}
// Copy background flag.
frame_flags |=
tracking_data.frame_flags() & TrackingData::FLAG_BACKGROUND_UNSTABLE;
const TrackingData::MotionData& motion_data = tracking_data.motion_data();
int32_t num_vectors = motion_data.num_elements();
// Compute maximum vector or delta vector value.
float max_vector_value = 0;
if (high_profile) {
for (int k = 2; k < 2 * num_vectors; ++k) {
max_vector_value = std::max<float>(
max_vector_value,
fabs(motion_data.vector_data(k) - motion_data.vector_data(k - 2)) *
1.02f); // Expand by 2% to account for
// rounding issues.
}
} else {
for (const float vector_value : motion_data.vector_data()) {
max_vector_value = std::max<float>(max_vector_value, fabs(vector_value));
}
}
const int32_t domain_width = tracking_data.domain_width();
const int32_t domain_height = tracking_data.domain_height();
ABSL_CHECK_LT(domain_height, 256) << "Only heights below 256 are supported.";
const float frame_aspect = tracking_data.frame_aspect();
// Limit vector value from above (to 20% frame diameter) and below (small
// eps).
const float max_vector_threshold = hypot(domain_width, domain_height) * 0.2f;
// Warn if too much truncation.
if (max_vector_value > max_vector_threshold * 1.5f) {
ABSL_LOG(WARNING) << "A lot of truncation will occur during encoding. "
<< "Vector magnitudes are larger than 20% of the "
<< "frame diameter.";
}
max_vector_value =
std::min<float>(max_vector_threshold, std::max(1e-4f, max_vector_value));
// Compute scales for 16bit and 8bit float -> int conversion.
// Use highest bit for sign.
const int kByteMax16 = (1 << 15) - 1;
const int kByteMax8 = (1 << 7) - 1;
// Scale such that highest vector value is mapped to kByteMax
int scale_16 = std::ceil(kByteMax16 / max_vector_value);
int scale_8 = std::ceil(kByteMax8 / max_vector_value);
const int32_t scale =
options_.high_fidelity_16bit_encode() ? scale_16 : scale_8;
const float inv_scale = 1.0f / scale;
const int kByteMax =
options_.high_fidelity_16bit_encode() ? kByteMax16 : kByteMax8;
// Compressed flow to be encoded in binary format.
std::vector<int16_t> flow_compressed_16;
std::vector<int8_t> flow_compressed_8;
flow_compressed_16.reserve(num_vectors);
flow_compressed_8.reserve(num_vectors);
std::vector<uint8_t> row_idx;
row_idx.reserve(num_vectors);
float average_error = 0;
std::vector<int> col_starts(motion_data.col_starts().begin(),
motion_data.col_starts().end());
// Separate both implementations for easier readability.
// For details please refer to description in proto.
// Low profile:
// * Encode vectors by scaling to integer format.
// * Keep sparse matrix format as is
// High profile:
// * Encode deltas between vectors scaling them to integers
// * Re-use encoded vectors if delta is small, use ADVANCE flag in row
// index.
// * Delta encode row indices to reduce magnitude.
// * If two row deltas are small (< 8), encode in one byte
if (!high_profile) {
// Traverse columns.
for (int c = 0; c < col_starts.size() - 1; ++c) {
const int r_start = col_starts[c];
const int r_end = col_starts[c + 1];
for (int r = r_start; r < r_end; ++r) {
const float flow_x_32f = motion_data.vector_data(2 * r);
const float flow_y_32f = motion_data.vector_data(2 * r + 1);
const int flow_x =
std::max(-kByteMax, std::min<int>(kByteMax, flow_x_32f * scale));
const int flow_y =
std::max(-kByteMax, std::min<int>(kByteMax, flow_y_32f * scale));
average_error += 0.5f * (fabs(flow_x * inv_scale - flow_x_32f) +
fabs(flow_y * inv_scale - flow_y_32f));
if (options_.high_fidelity_16bit_encode()) {
flow_compressed_16.push_back(flow_x);
flow_compressed_16.push_back(flow_y);
} else {
flow_compressed_8.push_back(flow_x);
flow_compressed_8.push_back(flow_y);
}
ABSL_DCHECK_LT(motion_data.row_indices(r), 256);
row_idx.push_back(motion_data.row_indices(r));
}
}
} else {
// Compress flow.
int prev_flow_x = 0;
int prev_flow_y = 0;
const float reuse_threshold = options_.high_profile_reuse_threshold();
int compressible = 0;
std::vector<int> compressions_per_column(domain_width, 0);
const int kAdvanceFlag = FlowPackagerOptions::ADVANCE_FLAG;
const int kDoubleIndexEncode = FlowPackagerOptions::DOUBLE_INDEX_ENCODE;
const int kIndexMask = FlowPackagerOptions::INDEX_MASK;
// Traverse columns.
for (int c = 0; c < motion_data.col_starts().size() - 1; ++c) {
const int r_start = col_starts[c];
const int r_end = col_starts[c + 1];
for (int r = r_start; r < r_end; ++r) {
int flow_x = 0;
int flow_y = 0;
bool advance = true;
const float flow_x_32f = motion_data.vector_data(2 * r);
const float flow_y_32f = motion_data.vector_data(2 * r + 1);
// Delta coding of vectors.
const float diff_x = flow_x_32f - prev_flow_x * inv_scale;
const float diff_y = flow_y_32f - prev_flow_y * inv_scale;
// Determine if previous flow can be re-used.
if (fabs(diff_x) < reuse_threshold && fabs(diff_y) < reuse_threshold) {
advance = false;
} else {
flow_x = std::max(-kByteMax, std::min<int>(kByteMax, diff_x * scale));
flow_y = std::max(-kByteMax, std::min<int>(kByteMax, diff_y * scale));
prev_flow_x += flow_x;
prev_flow_y += flow_y;
}
average_error += 0.5f * (fabs(prev_flow_x * inv_scale - flow_x_32f) +
fabs(prev_flow_y * inv_scale - flow_y_32f));
// Combine into one 32 or 16 bit value (clear sign bits for the
// right part before combining).
if (advance) {
if (options_.high_fidelity_16bit_encode()) {
flow_compressed_16.push_back(flow_x);
flow_compressed_16.push_back(flow_y);
} else {
flow_compressed_8.push_back(flow_x);
flow_compressed_8.push_back(flow_y);
}
}
// Delta code row indices in high profile mode and use two top bits
// for status:
// 10: single row encode, use next vector data.
// (ADVANCE_FLAG)
//
// 11: double row encode: (3 bit + 3 bit = maximum of 7 + 7 row delta),
// use next vector data for each.
// (ADVANCE_FLAG | DOUBLE_INDEX_ENCODE)
//
// 00: single row encode + no advance (re-use previous vector data).
// (no flags set)
//
// 01: double row encode + no advance (re-use previous vector data for
// each).
// (DOUBLE_INDEX_ENCODE)
// Delta compress.
int delta_row = motion_data.row_indices(r) -
(r == r_start ? 0 : motion_data.row_indices(r - 1));
ABSL_CHECK_GE(delta_row, 0);
bool combined = false;
if (r > r_start) {
int prev_row_idx = row_idx.back();
if (!(prev_row_idx & kDoubleIndexEncode) && // Single encode.
(prev_row_idx & kAdvanceFlag) == advance) { // Same advance flag.
// Both compressible (each index fits in 3 bit).
if (delta_row < 8 && (prev_row_idx & kIndexMask) < 8) {
// Encode two deltas into 6 bit.
prev_row_idx = ((prev_row_idx & 0x07) << 3) | delta_row |
kDoubleIndexEncode | (advance ? kAdvanceFlag : 0);
row_idx.back() = prev_row_idx;
// Record as one compression for this column.
++compressions_per_column[c];
++compressible;
combined = true;
}
}
}
if (!combined) {
while (delta_row > kIndexMask) {
// Special case of large displacement. Duplicate vector until sum of
// deltas reaches target delta).
row_idx.push_back(kIndexMask | (advance ? kAdvanceFlag : 0));
delta_row -= kIndexMask;
advance = false; // Store same vector again, re-use previously
// encoded vector data.
// Record as one addition for the column.
--compressions_per_column[c];
++num_vectors;
}
row_idx.push_back(delta_row | (advance ? kAdvanceFlag : 0));
}
}
}
// Count number of advance flags encoded.
int encoded = 0;
for (int idx : row_idx) {
if (idx & kAdvanceFlag) {
encoded += (idx & kDoubleIndexEncode) ? 2 : 1;
}
}
if (options_.high_fidelity_16bit_encode()) {
ABSL_CHECK_EQ(2 * encoded, flow_compressed_16.size());
} else {
ABSL_CHECK_EQ(2 * encoded, flow_compressed_8.size());
}
// Adjust column start by compressions.
int curr_adjust = 0;
for (int k = 0; k < domain_width; ++k) {
curr_adjust -= compressions_per_column[k];
col_starts[k + 1] += curr_adjust;
ABSL_CHECK_LE(col_starts[k], col_starts[k + 1]);
}
ABSL_CHECK_EQ(row_idx.size(), col_starts.back());
ABSL_CHECK_EQ(num_vectors, row_idx.size() + compressible);
}
// Delta compress col_starts.
std::vector<uint8_t> col_start_delta(domain_width + 1, 0);
col_start_delta[0] = col_starts[0];
for (int k = 1; k < domain_width + 1; ++k) {
const int delta = col_starts[k] - col_starts[k - 1];
ABSL_CHECK_LT(delta, 256) << "Only up to 255 items per column supported.";
col_start_delta[k] = delta;
}
VLOG(1) << "error: " << average_error / (num_vectors + 1)
<< " additions: " << num_vectors - motion_data.num_elements();
const Homography& background_model = tracking_data.background_model();
const float scale_x = 1.0f / tracking_data.domain_width();
const float scale_y = 1.0f / tracking_data.domain_height();
Homography homog_scale = HomographyAdapter::Embed(
AffineAdapter::FromArgs(0, 0, scale_x, 0, 0, scale_y));
Homography inv_homog_scale = HomographyAdapter::Embed(
AffineAdapter::FromArgs(0, 0, 1.0f / scale_x, 0, 0, 1.0f / scale_y));
// Might be just the identity if not set.
const Homography background_model_scaled =
ModelCompose3(homog_scale, background_model, inv_homog_scale);
std::string background_model_string =
absl::StrCat(EncodeToString(background_model.h_00()),
EncodeToString(background_model.h_01()),
EncodeToString(background_model.h_02()),
EncodeToString(background_model.h_10()),
EncodeToString(background_model.h_11()),
EncodeToString(background_model.h_12()),
EncodeToString(background_model.h_20()),
EncodeToString(background_model.h_21()));
std::string* data = binary_data->mutable_data();
data->clear();
int32_t vector_size = options_.high_fidelity_16bit_encode()
? flow_compressed_16.size()
: flow_compressed_8.size();
int32_t row_idx_size = row_idx.size();
absl::StrAppend(data, EncodeToString(frame_flags),
EncodeToString(domain_width), EncodeToString(domain_height),
EncodeToString(frame_aspect), background_model_string,
EncodeToString(scale), EncodeToString(num_vectors),
EncodeVectorToString(col_start_delta),
EncodeToString(row_idx_size), EncodeVectorToString(row_idx),
EncodeToString(vector_size),
(options_.high_fidelity_16bit_encode()
? EncodeVectorToString(flow_compressed_16)
: EncodeVectorToString(flow_compressed_8)));
VLOG(1) << "Binary data size: " << data->size() << " for " << num_vectors
<< " (" << vector_size << ")";
}
std::string PopSubstring(int len, absl::string_view* piece) {
std::string result = std::string(piece->substr(0, len));
piece->remove_prefix(len);
return result;
}
void FlowPackager::DecodeTrackingData(const BinaryTrackingData& container_data,
TrackingData* tracking_data) const {
ABSL_CHECK(tracking_data != nullptr);
absl::string_view data(container_data.data());
int32_t frame_flags = 0;
int32_t domain_width = 0;
int32_t domain_height = 0;
std::vector<float> background_model;
int32_t scale = 0;
int32_t num_vectors = 0;
float frame_aspect = 0.0f;
DecodeFromStringView(PopSubstring(4, &data), &frame_flags);
DecodeFromStringView(PopSubstring(4, &data), &domain_width);
DecodeFromStringView(PopSubstring(4, &data), &domain_height);
DecodeFromStringView(PopSubstring(4, &data), &frame_aspect);
ABSL_CHECK_LE(domain_width, 256);
ABSL_CHECK_LE(domain_height, 256);
DecodeVectorFromStringView(
PopSubstring(4 * HomographyAdapter::NumParameters(), &data),
&background_model);
DecodeFromStringView(PopSubstring(4, &data), &scale);
DecodeFromStringView(PopSubstring(4, &data), &num_vectors);
tracking_data->set_frame_flags(frame_flags);
tracking_data->set_domain_width(domain_width);
tracking_data->set_domain_height(domain_height);
tracking_data->set_frame_aspect(frame_aspect);
*tracking_data->mutable_background_model() =
HomographyAdapter::FromFloatPointer(&background_model[0], false);
TrackingData::MotionData* motion_data = tracking_data->mutable_motion_data();
motion_data->set_num_elements(num_vectors);
const bool high_profile = frame_flags & TrackingData::FLAG_PROFILE_HIGH;
const bool high_fidelity =
frame_flags & TrackingData::FLAG_HIGH_FIDELITY_VECTORS;
const float flow_denom = 1.0f / scale;
std::vector<uint8_t> col_starts_delta;
DecodeVectorFromStringView(PopSubstring(domain_width + 1, &data),
&col_starts_delta);
// Delta decompress.
std::vector<int> col_starts;
col_starts.reserve(domain_width + 1);
int column = 0;
for (auto col : col_starts_delta) {
column += col;
col_starts.push_back(column);
}
std::vector<uint8_t> row_idx;
int32_t row_idx_size;
DecodeFromStringView(PopSubstring(4, &data), &row_idx_size);
// Should not have more row indices than vectors. (One for each in baseline
// profile, less in high profile).
ABSL_CHECK_LE(row_idx_size, num_vectors);
DecodeVectorFromStringView(PopSubstring(row_idx_size, &data), &row_idx);
// Records for each vector whether to advance pointer in the vector data array
// or re-use previously read data.
std::vector<bool> advance(num_vectors, true);
if (high_profile) {
// Unpack row indices, populate advance.
const int kAdvanceFlag = FlowPackagerOptions::ADVANCE_FLAG;
const int kDoubleIndexEncode = FlowPackagerOptions::DOUBLE_INDEX_ENCODE;
const int kIndexMask = FlowPackagerOptions::INDEX_MASK;
std::vector<int> column_expansions(domain_width, 0);
std::vector<uint8_t> row_idx_unpacked;
row_idx_unpacked.reserve(num_vectors);
advance.clear();
for (int c = 0; c < col_starts.size() - 1; ++c) {
const int r_start = col_starts[c];
const int r_end = col_starts[c + 1];
uint8_t prev_row_idx = 0;
for (int r = r_start; r < r_end; ++r) {
// Use top bit as indicator to advance.
advance.push_back(row_idx[r] & kAdvanceFlag);
// Double encode?
if (row_idx[r] & kDoubleIndexEncode) {
// Indices are encoded as each 3 bit offset within kIndexMask.
prev_row_idx += (row_idx[r] >> 3) & 0x7;
row_idx_unpacked.push_back(prev_row_idx);
prev_row_idx += row_idx[r] & 0x7;
row_idx_unpacked.push_back(prev_row_idx);
// Duplicate advance setting.
advance.push_back(advance.back());
++column_expansions[c];
} else {
// Single encode.
prev_row_idx += row_idx[r] & kIndexMask; // Clear status.
row_idx_unpacked.push_back(prev_row_idx);
}
}
}
row_idx.swap(row_idx_unpacked);
ABSL_CHECK_EQ(num_vectors, row_idx.size());
// Adjust column start by expansions.
int curr_adjust = 0;
for (int k = 0; k < domain_width; ++k) {
curr_adjust += column_expansions[k];
col_starts[k + 1] += curr_adjust;
}
}
ABSL_CHECK_EQ(num_vectors, col_starts.back());
int vector_data_size;
DecodeFromStringView(PopSubstring(4, &data), &vector_data_size);
int prev_flow_x = 0;
int prev_flow_y = 0;
if (high_fidelity) {
std::vector<int16_t> vector_data;
DecodeVectorFromStringView(
PopSubstring(sizeof(vector_data[0]) * vector_data_size, &data),
&vector_data);
int counter = 0;
for (int k = 0; k < num_vectors; ++k) {
if (advance[k]) { // Read new vector data.
int flow_x = vector_data[counter++];
int flow_y = vector_data[counter++];
if (high_profile) { // Delta decode in high profile.
flow_x += prev_flow_x;
flow_y += prev_flow_y;
prev_flow_x = flow_x;
prev_flow_y = flow_y;
}
motion_data->add_vector_data(flow_x * flow_denom);
motion_data->add_vector_data(flow_y * flow_denom);
} else { // Re-use previous vector data.
motion_data->add_vector_data(prev_flow_x * flow_denom);
motion_data->add_vector_data(prev_flow_y * flow_denom);
}
}
ABSL_CHECK_EQ(vector_data_size, counter);
} else {
std::vector<int8_t> vector_data;
DecodeVectorFromStringView(
PopSubstring(sizeof(vector_data[0]) * vector_data_size, &data),
&vector_data);
int counter = 0;
for (int k = 0; k < num_vectors; ++k) {
if (advance[k]) { // Read new vector data.
int flow_x = vector_data[counter++];
int flow_y = vector_data[counter++];
if (high_profile) { // Delta decode in high profile.
flow_x += prev_flow_x;
flow_y += prev_flow_y;
prev_flow_x = flow_x;
prev_flow_y = flow_y;
}
motion_data->add_vector_data(flow_x * flow_denom);
motion_data->add_vector_data(flow_y * flow_denom);
} else { // Re-use previous vector data.
motion_data->add_vector_data(prev_flow_x * flow_denom);
motion_data->add_vector_data(prev_flow_y * flow_denom);
}
}
ABSL_CHECK_EQ(vector_data_size, counter);
}
for (auto idx : row_idx) {
motion_data->add_row_indices(idx);
}
for (auto column : col_starts) {
motion_data->add_col_starts(column);
}
}
void FlowPackager::BinaryTrackingDataToContainer(
const BinaryTrackingData& binary_data, TrackingContainer* container) const {
ABSL_CHECK(container != nullptr);
container->Clear();
container->set_header("TRAK");
container->set_version(1);
container->set_size(binary_data.data().size());
*container->mutable_data() = binary_data.data();
}
void FlowPackager::BinaryTrackingDataFromContainer(
const TrackingContainer& container, BinaryTrackingData* binary_data) const {
ABSL_CHECK_EQ("TRAK", container.header());
ABSL_CHECK_EQ(1, container.version()) << "Unsupported version.";
*binary_data->mutable_data() = container.data();
}
void FlowPackager::DecodeMetaData(const TrackingContainer& container_data,
MetaData* meta_data) const {
ABSL_CHECK(meta_data != nullptr);
ABSL_CHECK_EQ("META", container_data.header());
ABSL_CHECK_EQ(1, container_data.version()) << "Unsupported version.";
absl::string_view data(container_data.data());
int32_t num_frames;
DecodeFromStringView(PopSubstring(4, &data), &num_frames);
meta_data->set_num_frames(num_frames);
for (int k = 0; k < num_frames; ++k) {
int32_t msec;
int32_t stream_offset;
DecodeFromStringView(PopSubstring(4, &data), &msec);
DecodeFromStringView(PopSubstring(4, &data), &stream_offset);
MetaData::TrackOffset* track_offset = meta_data->add_track_offsets();
track_offset->set_msec(msec);
track_offset->set_stream_offset(stream_offset);
}
}
void FlowPackager::FinalizeTrackingContainerFormat(
std::vector<uint32_t>* timestamps,
TrackingContainerFormat* container_format) {
ABSL_CHECK(container_format != nullptr);
// Compute binary sizes of track_data.
const int num_frames = container_format->track_data_size();
std::vector<uint32_t> msecs(num_frames, 0);
if (timestamps) {
ABSL_CHECK_EQ(num_frames, timestamps->size());
msecs = *timestamps;
}
std::vector<int> sizes(num_frames, 0);
for (int f = 0; f < num_frames; ++f) {
// Default size of container: 12 bytes + binary data size (see comment for
// TrackingContainer in flow_packager.proto).
sizes[f] = container_format->track_data(f).data().size() + 12;
}
// Store relative offsets w.r.t. end of MetaData.
MetaData meta_data;
InitializeMetaData(num_frames, msecs, sizes, &meta_data);
// Serialize metadata to binary.
TrackingContainer* meta = container_format->mutable_meta_data();
meta->Clear();
meta->set_header("META");
std::string* binary_metadata = meta->mutable_data();
absl::StrAppend(binary_metadata, EncodeToString(meta_data.num_frames()));
for (auto& track_offset : *meta_data.mutable_track_offsets()) {
absl::StrAppend(binary_metadata, EncodeToString(track_offset.msec()),
EncodeToString(track_offset.stream_offset()));
}
meta->set_size(binary_metadata->size());
// Add term header.
TrackingContainer* term = container_format->mutable_term_data();
term->set_header("TERM");
term->set_size(0);
}
void FlowPackager::FinalizeTrackingContainerProto(
std::vector<uint32_t>* timestamps, TrackingContainerProto* proto) {
ABSL_CHECK(proto != nullptr);
// Compute binary sizes of track_data.
const int num_frames = proto->track_data_size();
std::vector<uint32_t> msecs(num_frames, 0);
if (timestamps) {
ABSL_CHECK_EQ(num_frames, timestamps->size());
msecs = *timestamps;
}
std::vector<int> sizes(num_frames, 0);
TrackingContainerProto temp_proto;
BinaryTrackingData* temp_track_data = temp_proto.add_track_data();
for (int f = 0; f < num_frames; ++f) {
// Swap current track data in and out of temp_track_data to determine total
// encoding size with proto preamble.
proto->mutable_track_data(f)->Swap(temp_track_data);
sizes[f] = temp_proto.ByteSize();
proto->mutable_track_data(f)->Swap(temp_track_data);
}
proto->clear_meta_data();
InitializeMetaData(num_frames, msecs, sizes, proto->mutable_meta_data());
}
void FlowPackager::InitializeMetaData(int num_frames,
const std::vector<uint32_t>& msecs,
const std::vector<int>& data_sizes,
MetaData* meta_data) const {
meta_data->set_num_frames(num_frames);
ABSL_CHECK_EQ(num_frames, msecs.size());
ABSL_CHECK_EQ(num_frames, data_sizes.size());
int curr_offset = 0;
for (int f = 0; f < num_frames; ++f) {
MetaData::TrackOffset* track_offset = meta_data->add_track_offsets();
track_offset->set_msec(msecs[f]);
track_offset->set_stream_offset(curr_offset);
curr_offset += data_sizes[f];
}
}
void FlowPackager::AddContainerToString(const TrackingContainer& container,
std::string* binary_data) {
ABSL_CHECK(binary_data != nullptr);
std::string header_string(container.header());
ABSL_CHECK_EQ(4, header_string.size());
std::vector<char> header{header_string[0], header_string[1], header_string[2],
header_string[3]};
absl::StrAppend(binary_data, EncodeVectorToString(header),
EncodeToString(container.version()),
EncodeToString(container.size()), container.data());
}
std::string FlowPackager::SplitContainerFromString(
absl::string_view* binary_data, TrackingContainer* container) {
ABSL_CHECK(binary_data != nullptr);
ABSL_CHECK(container != nullptr);
ABSL_CHECK_GE(binary_data->size(), 12)
<< "Data does not contain " << "valid container";
container->set_header(PopSubstring(4, binary_data));
int version;
DecodeFromStringView(PopSubstring(4, binary_data), &version);
int size;
DecodeFromStringView(PopSubstring(4, binary_data), &size);
container->set_version(version);
container->set_size(size);
if (size > 0) {
container->set_data(PopSubstring(size, binary_data));
}
return container->header();
}
void FlowPackager::TrackingContainerFormatToBinary(
const TrackingContainerFormat& container_format, std::string* binary) {
ABSL_CHECK(binary != nullptr);
binary->clear();
AddContainerToString(container_format.meta_data(), binary);
for (const auto& track_data : container_format.track_data()) {
AddContainerToString(track_data, binary);
}
AddContainerToString(container_format.term_data(), binary);
}
void FlowPackager::TrackingContainerFormatFromBinary(
const std::string& binary, TrackingContainerFormat* container_format) {
ABSL_CHECK(container_format != nullptr);
container_format->Clear();
absl::string_view data(binary);
ABSL_CHECK_EQ("META", SplitContainerFromString(
&data, container_format->mutable_meta_data()));
MetaData meta_data;
DecodeMetaData(container_format->meta_data(), &meta_data);
for (int f = 0; f < meta_data.num_frames(); ++f) {
TrackingContainer* container = container_format->add_track_data();
ABSL_CHECK_EQ("TRAK", SplitContainerFromString(&data, container));
}
ABSL_CHECK_EQ("TERM", SplitContainerFromString(
&data, container_format->mutable_term_data()));
}
void FlowPackager::SortRegionFlowFeatureList(
float scale_x, float scale_y, RegionFlowFeatureList* feature_list) const {
ABSL_CHECK(feature_list != nullptr);
// Sort features lexicographically.
std::sort(feature_list->mutable_feature()->begin(),
feature_list->mutable_feature()->end(),
IntegerColumnComparator(scale_x, scale_y, options_.domain_width(),
options_.domain_height()));
}
bool FlowPackager::CompatibleForEncodeWithoutDuplication(
const TrackingData& tracking_data) const {
const TrackingData::MotionData& motion_data = tracking_data.motion_data();
for (int c = 0; c < motion_data.col_starts_size() - 1; ++c) {
const int r_start = motion_data.col_starts(c);
const int r_end = motion_data.col_starts(c + 1);
for (int r = r_start; r < r_end; ++r) {
if (motion_data.row_indices(r) -
(r == r_start ? 0 : motion_data.row_indices(r - 1)) >=
64) {
return false;
}
}
}
return true;
}
} // namespace mediapipe