// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// The option proto for the TensorsToDetectionsCalculator.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
message TensorsToDetectionsCalculatorOptions {
extend .mediapipe.CalculatorOptions {
optional TensorsToDetectionsCalculatorOptions ext = 335742639;
}
// [Required] The number of output classes predicted by the detection model.
optional int32 num_classes = 1;
// [Required] The number of output boxes predicted by the detection model.
optional int32 num_boxes = 2;
// [Required] The number of output values per boxes predicted by the detection
// model. The values contain bounding boxes, keypoints, etc.
optional int32 num_coords = 3;
// The offset of keypoint coordinates in the location tensor.
optional int32 keypoint_coord_offset = 9;
// The number of predicted keypoints.
optional int32 num_keypoints = 10 [default = 0];
// The dimension of each keypoint, e.g. number of values predicted for each
// keypoint.
optional int32 num_values_per_keypoint = 11 [default = 2];
// The offset of box coordinates in the location tensor.
optional int32 box_coord_offset = 12 [default = 0];
// Parameters for decoding SSD detection model.
optional float x_scale = 4 [default = 0.0];
optional float y_scale = 5 [default = 0.0];
optional float w_scale = 6 [default = 0.0];
optional float h_scale = 7 [default = 0.0];
optional bool apply_exponential_on_box_size = 13 [default = false];
// Whether to reverse the order of predicted x, y from output.
// If false, the order is [y_center, x_center, h, w], if true the order is
// [x_center, y_center, w, h].
// DEPRECATED. Use `box_format` instead.
optional bool reverse_output_order = 14 [default = false];
// The ids of classes that should be ignored during decoding the score for
// each predicted box. Can be overridden with IGNORE_CLASSES side packet.
// `ignore_classes` and `allow_classes` are mutually exclusive.
repeated int32 ignore_classes = 8;
// The ids of classes that should be allowed during decoding the score for
// each predicted box. `ignore_classes` and `allow_classes` are mutually
// exclusive.
repeated int32 allow_classes = 21 [packed = true];
optional bool sigmoid_score = 15 [default = false];
optional float score_clipping_thresh = 16;
// Whether the detection coordinates from the input tensors should be flipped
// vertically (along the y-direction). This is useful, for example, when the
// input tensors represent detections defined with a coordinate system where
// the origin is at the top-left corner, whereas the desired detection
// representation has a bottom-left origin (e.g., in OpenGL).
optional bool flip_vertically = 18 [default = false];
// Score threshold for preserving decoded detections.
optional float min_score_thresh = 19;
// The maximum number of the detection results to return. If < 0, all
// available results will be returned.
// For the detection models that have built-in non max suppression op, the
// output detections are the top-scored results. Otherwise, the output
// detections are the first N results that have higher scores than
// `min_score_thresh`.
optional int32 max_results = 20 [default = -1];
// The maximum number of classes per detection.
optional int32 max_classes_per_detection = 25 [default = 1];
// The custom model output tensor mapping.
// The indices of the "detections" tensor and the "scores" tensor are always
// required. If the model outputs an "anchors" tensor, `anchors_tensor_index`
// must be specified. If the model outputs both "classes" tensor and "number
// of detections" tensors, `classes_tensor_index` and
// `num_detections_tensor_index` must be set.
message TensorMapping {
optional int32 detections_tensor_index = 1;
optional int32 classes_tensor_index = 2;
optional int32 scores_tensor_index = 3;
optional int32 num_detections_tensor_index = 4;
optional int32 anchors_tensor_index = 5;
}
optional TensorMapping tensor_mapping = 22;
// Represents the bounding box by using the combination of boundaries,
// {ymin, xmin, ymax, xmax}.
// The default order is {ymin, xmin, ymax, xmax}.
message BoxBoundariesIndices {
optional int32 ymin = 1 [default = 0];
optional int32 xmin = 2 [default = 1];
optional int32 ymax = 3 [default = 2];
optional int32 xmax = 4 [default = 3];
}
oneof box_indices {
BoxBoundariesIndices box_boundaries_indices = 23;
}
// Tells the calculator how to convert the detector output to bounding boxes.
// Replaces `reverse_output_order` to support more bbox output formats.
// As with `reverse_output_order`, this also informs calculator the order
// of keypoint predictions.
enum BoxFormat {
// if UNSPECIFIED, the calculator assumes YXHW
UNSPECIFIED = 0;
// bbox [y_center, x_center, height, width], keypoint [y, x]
YXHW = 1;
// bbox [x_center, y_center, width, height], keypoint [x, y]
XYWH = 2;
// bbox [xmin, ymin, xmax, ymax], keypoint [x, y]
XYXY = 3;
}
optional BoxFormat box_format = 24 [default = UNSPECIFIED];
}