// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/util/tracking/motion_models.proto";
import "mediapipe/util/tracking/region_flow.proto";
// Messages encapsulating compressed and uncompressed TrackingData.
//
// Uncompressed tracking data can be aggregated via an TrackingDataChunk
// (e.g. to be cached to file per chunk). The whole chunk persists in memory
// after reading.
//
// Compressed tracking data can be aggregated as binary encoded TrackingData
// messages into two container formats (with support for random seeking):
// 1) TrackingContainerProto:
// Encoding using proto buffer wire format, using default proto
// serialization and de-serialization to binary string.
// The container uses the MetaData message to store the stream offsets and
// milliseconds for each frame of TrackingData. TrackingData itself is custom
// encoded to binary using FlowPackager::EncodeTrackingData and the resulting
// binary blob wrapped in a BinaryTrackingData message.
// 2) TrackingContainerFormat:
// Encoding without any dependencies to protobuffers, for clients without
// proto buffer support.
// Encoding is based on encoding binary blobs of data wrapped into repeated
// containers. The layout of a container is described by the message
// TrackingContainer and serialized to binary data as described below
// (without using proto encoding). Therefore, message TrackingContainer is
// mostly for documentation purposes than for direct use.
// The format is described by the proto message TrackingContainerFormat (used
// internally by FlowPackager) however serialization and de-serialization
// to binary string is performed using custom methods supplied by
// FlowPackager (TrackingContainerFormatToBinary and
// TrackingContainerFormatFromBinary).
// The format stores the MetaData first as above, although using custom
// encoding. TrackingData is encoded to binary as above using
// FlowPackager::EncodeTrackingData and the resulting binary blob is storred
// within a TrackingContainer.
// Next flag: 9
message TrackingData {
enum FrameFlags {
FLAG_PROFILE_BASELINE = 0;
FLAG_PROFILE_HIGH = 1;
FLAG_HIGH_FIDELITY_VECTORS = 2;
FLAG_BACKGROUND_UNSTABLE = 4; // Background model could not be estimated.
FLAG_DUPLICATED = 8; // Frame is duplicated, i.e. identical to
// previous one.
// Indicates the beginning of a new chunk. In this case the track_id's
// are not compatible w.r.t. previous one.
FLAG_CHUNK_BOUNDARY = 16;
}
optional int32 frame_flags = 1 [default = 0];
// Tracking data is resolution independent specified w.r.t.
// specified domain.
optional int32 domain_width = 2;
optional int32 domain_height = 3;
// Aspect ratio (w/h) of the original frame tracking data was computed from.
optional float frame_aspect = 6 [default = 1.0];
optional Homography background_model = 4;
// Stores num_elements vectors of motion data. (x,y) position encoded via
// row_indices and col_starts, as compressed sparse column matrix storage
// format:
// (https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_.28CSC_or_CCS.29),
// Vector data is stored as (dx, dy) position. Optionally we store the fitting
// error and track id for each feature.
message MotionData {
optional int32 num_elements = 1;
// #num_elements pairs (flow_x, flow_y) densely packed.
repeated float vector_data = 2 [packed = true];
// Stores corresponding track index for each feature. Features belonging
// to the same track over time are assigned the same id.
// NOTE: Due to size, tracking ids are never stored as compressed binary
// tracking data.
repeated int32 track_id = 3 [packed = true];
// # num_elements row indices.
repeated int32 row_indices = 4 [packed = true];
// Start index in above array for each column (#domain_width + 1 entries).
repeated int32 col_starts = 5 [packed = true];
// Feature descriptors for num_elements feature points.
repeated BinaryFeatureDescriptor feature_descriptors = 6;
// Stores all the tracked ids that have been discarded actively. This
// information will be used by downstreaming to avoid misjudgement on
// tracking continuity.
repeated int32 actively_discarded_tracked_ids = 7;
}
optional MotionData motion_data = 5;
// Total number of features in our analysis
optional uint32 global_feature_count = 7;
// Average of all motion vector magnitudes (without accounting for any motion
// model), within 10th to 90th percentile (to remove outliers).
optional float average_motion_magnitude = 8;
}
message TrackingDataChunk {
message Item {
optional TrackingData tracking_data = 1;
// Global frame index.
optional int32 frame_idx = 2;
// Corresponding timestamp.
optional int64 timestamp_usec = 3;
// Previous frame timestamp.
optional int64 prev_timestamp_usec = 4;
}
repeated Item item = 1;
// Set as marker for last chunk.
optional bool last_chunk = 2 [default = false];
// Set as marker for first chunk.
optional bool first_chunk = 3 [default = false];
}
// TrackingData in compressed binary format. Obtainable via
// FlowPackager::EncodeTrackingData. Details of binary encode are below.
message BinaryTrackingData { // TrackingContainer::header = "TRAK"
optional bytes data = 1;
}
// Detailed explanation of binary Tracking data encode (LITTLE ENDIAN encode!)
// TrackingData is stored in binary as a struct of the above fields and the
// compressed motion data in sparse column matrix storage format.
// (https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_.28CSC_or_CCS.29)
// Specifically, TrackingData is encoded as:
// { frame_flags : 32 bit int (from member)
// domain_width : 32 bit int (from member)
// domain_height : 32 bit int (from member)
// frame_aspect : 32 bit float (from member)
//
// background_model : 6 * 32 bit float (dx, dy, a, b, c, d of AffineModel)
// scale : 32 bit float (scale vectors are multiplied with)
// num_vectors : 32 bit int (from member num_elements)
//
// col_start_delta : (domain_width + 1) * 8 bit uint (col starts delta
// encoded)
// row_idx_size : 32 bit int (size of row_idx array <= num_vectors)
// row_idx : row_idx_size * 8 bit uint
// vector_size : 32 bit int (size of vector_data)
// vector_data : vector_size * [8 bit | 16 bit] int
// (depending on FLAG_HIGH_FIDELITY_VECTORS)
// }
//
// >> Baseline encode <<
// Scale is determined such that maximum vector value (maximum across x and y)
// is mapped to highest 8 bit or 16 bit SIGNED int
// (i.e. 7 or 15 bit resolution respectively).
// Vector values are multiplied by this scale (storring float in int with
// truncation) and (dx, dy) is packed as [dy | dx] into a 16bit or 32 bit word.
// Unpacking requires therefore dividing the vector values by scale.
//
// Column starts are delta compressed, that is, col_start_delta[i] stores
// col_starts(i) - col_starts(i - 1) from MotionData.
//
// Row indices are directly stored at 8 bit uints, that is row_idx_size ==
// num_vectors in this case.
//
//
// >> High profile encode <<
// Scale is determined as above but for maximum vector deltas (maximum across x
// and y of magnitude in difference between two adjacent vectors). Vector value
// deltas are multiplied by this scale before encoding.
//
// Encoding is more complex compared to baseline. Instead of vector value, delta
// vector values (difference in dx = ddx, difference in dy = ddy)
// are multiplied by scale and storred packed as [ddy | ddx] into to 16bit or
// 32bit word. Compression algorithm accounts for error accumulation, so
// unpacking should first add deltas in integer domain (for x and y separately)
// and then divide by scale to yield (an approximation) of the
// original vector value.
// Most importantly, not every vector value is storred, but only if the delta is
// above the FlowPackagerOptions::high_profile_reuse_threshold, in which case we
// advance to the next vector data. Otherwise the previous vector is used.
//
// The information whether to advance is stored for each vector in the
// highest bit of of the row index (FlowPackagerOptions::ADVANCE_FLAG). Row
// indicies are not storred as in the baseline profile directly, but as deltas
// (reset at the beginning of every column). As deltas are small it is often
// possible to store two deltas (if both are < 8) in a single byte. This is
// indicated by the second highest flag in the row index
// (FlowPackagerOptions::DOUBLE_INDEX_ENCODE). If set, row index stores
// [row_delta_1 | row_delta_2] in the lower 6 bit. Note, that the advance flag
// applies uniformly to both deltas in this case.
// Sidenote (edge case): Due to the use of the top 2 bits as flags,
// at times we cannot store the full row delta in the lower 6 bits.
// In this case the vector is duplicated (using the ADVANCE_FLAG)
// until the delta sum of duplicated vectors reaches the original delta.
// Consequently, the compressed vector field in high profile may contain a few
// vectors more than the original.
//
// Column starts are delta compressed as in baseline, but account for double
// index encodes. Therefore each column delta is reduced by the number of double
// index encodes occuring for this column. This has to be replicated on the
// decoding side, each delta needs to be increased by the number of double index
// encodes encountered during encoding.
// Stores offsets for random seek and time offsets for each frame of
// TrackingData. Stream offsets are specified relative w.r.t. end of metadata
// blob.
// Offsets specify start of the corresponding binary encoded TrackingContainer
// (for TrackingContainerFormat) or BinaryTrackingData proto (for
// TrackingContainerProto).
message MetaData { // TrackingContainer::header = "META"
optional fixed32 num_frames = 2;
message TrackOffset {
optional fixed32 msec = 1; // Time offset of the metadata in msec.
optional fixed32 stream_offset = 2; // Offset of TrackingContainer or
// respectively BinaryTrackingData
// in stream.
// Specifed w.r.t. end of the Metadata.
}
repeated TrackOffset track_offsets = 3;
}
// TrackingContainer is self-describing container format to store arbitrary
// chunks of binary data. Each container is typed via its 4 character header,
// versioned via an int, and followed by the size of the binary data and the
// actual data. Designed for clients without availability of protobuffer
// support.
// Note: This message is mainly used for documentation purposes and uses custom
// encoding as specified by FlowPackager::TrackingContainerFormatToBinary.
// Default binary size of a TrackingContainer (DO NOT CHANGE!):
// header: 4 byte +
// version: 4 byte +
// size: 4 byte +
// data #size
// SUM: 12 + #size.
message TrackingContainer {
optional string header = 1; // 4 character header.
optional fixed32 version = 2 [default = 1]; // Version information.
optional fixed32 size = 3; // Size of binary data held by container
optional bytes data = 4; // Binary data encoded.
// DO NOT alter layout of TrackingContainer.
// Use version to extend or alter encoded binary data.
}
// Container format for clients without proto support (written via
// FlowPackager::TrackingContainerFormatToBinary and read via
// FlowPackager::TrackingContainerFormatFromBinary).
// Proto here is intermediate format for documentationa and internal use.
// Stores multiple TrackingContainers of different types.
// Meta data is storred first, to facilitate random seek (via stream offset
// positions) to arbitrary binary TrackinData. Termination container signals end
// of stream.
message TrackingContainerFormat {
optional TrackingContainer meta_data = 1; // Wraps binary meta data, via
// custom encode.
repeated TrackingContainer track_data = 2; // Wraps BinaryTrackingData.
// Add new TrackingContainers above before end of stream indicator.
// Zero sized termination container with TrackingContainer::header = "TERM".
optional TrackingContainer term_data = 3;
}
// Simplified proto format of above TrackingContainerFormat. Instead of using
// self-describing TrackingContainer's, we simply use the proto wire format for
// encoding and decoding (proto format is typed and versioned via ids).
message TrackingContainerProto {
optional MetaData meta_data = 1;
repeated BinaryTrackingData track_data = 2;
}
// Options controlling compression and encoding.
message FlowPackagerOptions {
// Tracking data is resolution independent specified w.r.t.
// specified domain. Only values <= 256 are supported if binary tracking data
// is requested to be supported (see below).
optional int32 domain_width = 1 [default = 256];
optional int32 domain_height = 2 [default = 192];
// Needs to be set for calls to FlowPackager::EncodeTrackingData. If encoding
// is not required, can be set to false in which case a higher domain_width
// can be used.
optional bool binary_tracking_data_support = 6 [default = true];
optional bool use_high_profile = 3 [default = false];
// If set uses 16 bit encode for vector data, in BinaryTrackingData,
// otherwise only 8 bits are used.
optional bool high_fidelity_16bit_encode = 4 [default = true];
// In high profile encode, re-use previously encoded vector when absolute
// difference to current vector is below threshold.
optional float high_profile_reuse_threshold = 5 [default = 0.5];
// High profile encoding flags.
enum HighProfileEncoding {
ADVANCE_FLAG = 0x80;
DOUBLE_INDEX_ENCODE = 0x40;
INDEX_MASK = 0x3F;
}
}