chromium/third_party/mediapipe/src/mediapipe/util/tracking/box_detector.proto

// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto2";

package mediapipe;

import "mediapipe/util/tracking/box_tracker.proto";
import "mediapipe/util/tracking/region_flow.proto";

option java_package = "com.google.mediapipe.tracking";
option java_outer_classname = "BoxDetectorProto";

message BoxDetectorOptions {
  // Available types of detector's index and search structure.
  enum IndexType {
    INDEX_UNSPECIFIED = 0;
    // BFMatcher from OpenCV
    OPENCV_BF = 1;
  }

  optional IndexType index_type = 1 [default = OPENCV_BF];

  // Decide whether we force detector run every N frame.
  // 0 means detection will never be called.
  // 1 means detect every frame. 2 means detect every other frame. etc..
  // Currently only applied to image query mode.
  optional int32 detect_every_n_frame = 2 [default = 0];

  // Enable box detection when tracked boxes is out of FOV. Detection will be
  // ceased after the detector successfully re-acquire the box.
  optional bool detect_out_of_fov = 4 [default = false];

  // Options only for detection from image queries.
  message ImageQuerySettings {
    // Resize the input image's longer edge to this size. Skip resizing if the
    // input size is already smaller than this size.
    optional int32 pyramid_bottom_size = 1 [default = 640];

    // Scale factor between adjacent pyramid levels.
    optional float pyramid_scale_factor = 2 [default = 1.2];

    // Maximum number of pyramid levels.
    optional int32 max_pyramid_levels = 3 [default = 4];

    // Max number of features the detector uses.
    optional int32 max_features = 4 [default = 500];
  }

  // Options for detection function with image query.
  optional ImageQuerySettings image_query_settings = 3;

  // Dimensions (number of elements) for feature descriptor.
  optional int32 descriptor_dims = 5 [default = 40];

  // Minimum number of correspondence to go through RANSAC.
  optional int32 min_num_correspondence = 6 [default = 5];

  // Reprojection threshold for RANSAC to find inliers.
  optional float ransac_reprojection_threshold = 7 [default = 0.005];

  // Max distance to match 2 NIMBY features.
  optional float max_match_distance = 8 [default = 0.9];

  // Max persepective change factor.
  optional float max_perspective_factor = 9 [default = 0.1];
}

// Proto to hold BoxDetector's internal search index.
message BoxDetectorIndex {
  // Message to hold keypoints and descriptors for each box.
  message BoxEntry {
    // Message to hold keypoints and descriptors for each appearance. One box
    // could have multiple appearances to account for shape and perspective
    // change, etc..
    message FrameEntry {
      optional TimedBoxProto box = 1;
      repeated float keypoints = 2;
      repeated BinaryFeatureDescriptor descriptors = 3;
    }

    repeated FrameEntry frame_entry = 1;
  }

  repeated BoxEntry box_entry = 1;
}