syntax = "proto2";
package mediapipe;
import "mediapipe/util/tracking/box_tracker.proto";
import "mediapipe/util/tracking/region_flow.proto";
option java_package = "com.google.mediapipe.tracking";
option java_outer_classname = "BoxDetectorProto";
message BoxDetectorOptions {
// Available types of detector's index and search structure.
enum IndexType {
// BFMatcher from OpenCV
optional IndexType index_type = 1 [default = OPENCV_BF];
// Decide whether we force detector run every N frame.
// 0 means detection will never be called.
// 1 means detect every frame. 2 means detect every other frame. etc..
// Currently only applied to image query mode.
optional int32 detect_every_n_frame = 2 [default = 0];
// Enable box detection when tracked boxes is out of FOV. Detection will be
// ceased after the detector successfully re-acquire the box.
optional bool detect_out_of_fov = 4 [default = false];
// Options only for detection from image queries.
message ImageQuerySettings {
// Resize the input image's longer edge to this size. Skip resizing if the
// input size is already smaller than this size.
optional int32 pyramid_bottom_size = 1 [default = 640];
// Scale factor between adjacent pyramid levels.
optional float pyramid_scale_factor = 2 [default = 1.2];
// Maximum number of pyramid levels.
optional int32 max_pyramid_levels = 3 [default = 4];
// Max number of features the detector uses.
optional int32 max_features = 4 [default = 500];
// Options for detection function with image query.
optional ImageQuerySettings image_query_settings = 3;
// Dimensions (number of elements) for feature descriptor.
optional int32 descriptor_dims = 5 [default = 40];
// Minimum number of correspondence to go through RANSAC.
optional int32 min_num_correspondence = 6 [default = 5];
// Reprojection threshold for RANSAC to find inliers.
optional float ransac_reprojection_threshold = 7 [default = 0.005];
// Max distance to match 2 NIMBY features.
optional float max_match_distance = 8 [default = 0.9];
// Max persepective change factor.
optional float max_perspective_factor = 9 [default = 0.1];
// Proto to hold BoxDetector's internal search index.
message BoxDetectorIndex {
// Message to hold keypoints and descriptors for each box.
message BoxEntry {
// Message to hold keypoints and descriptors for each appearance. One box
// could have multiple appearances to account for shape and perspective
// change, etc..
message FrameEntry {
optional TimedBoxProto box = 1;
repeated float keypoints = 2;
repeated BinaryFeatureDescriptor descriptors = 3;
repeated FrameEntry frame_entry = 1;
repeated BoxEntry box_entry = 1;