object_detector_metadata_schema.fbs

// Copyright 2023 The MediaPipe Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

namespace mediapipe.tasks;

// ObjectDetectorOptions.min_parser_version indicates the minimum necessary
// object detector metadata parser version to fully understand all fields in a
// given metadata flatbuffer. This min_parser_version is specific for the
// object detector metadata defined in this schema file.
//
// New fields and types will have associated comments with the schema version
// for which they were added.
//
// Schema Semantic version: 1.0.0

// This indicates the flatbuffer compatibility. The number will bump up when a
// break change is applied to the schema, such as removing fields or adding new
// fields to the middle of a table.
file_identifier "V001";


// History:
// 1.0.0 - Initial version.

// A fixed size anchor.
table FixedAnchor {
  x_center: float;
  y_center: float;
  width: float;
  height: float;
}

// The schema for a list of anchors with fixed size.
table FixedAnchorsSchema {
  anchors: [FixedAnchor];
}

// The ssd anchors options used in the object detector.
table SsdAnchorsOptions {
  fixed_anchors_schema: FixedAnchorsSchema;
}

// The options for decoding the raw model output tensors. The options are mostly
// used in TensorsToDetectionsCalculatorOptions.
table TensorsDecodingOptions {
  // The number of output classes predicted by the detection model.
  num_classes: int;
  // The number of output boxes predicted by the detection model.
  num_boxes: int;
  // The number of output values per boxes predicted by the detection
  // model. The values contain bounding boxes, keypoints, etc.
  num_coords: int;
  // The offset of keypoint coordinates in the location tensor.
  keypoint_coord_offset: int;
  // The number of predicted keypoints.
  num_keypoints: int;
  // The dimension of each keypoint, e.g. number of values predicted for each
  // keypoint.
  num_values_per_keypoint: int;
  // Parameters for decoding SSD detection model.
  x_scale: float;
  y_scale: float;
  w_scale: float;
  h_scale: float;
  // Whether to apply exponential on box size.
  apply_exponential_on_box_size: bool;
  // Whether to apply sigmod function on the score.
  sigmoid_score: bool;
}

table ObjectDetectorOptions {
  // TODO: automatically populate min parser string.
  // The minimum necessary object detector metadata parser version to fully
  // understand all fields in a given metadata flatbuffer. This field is
  // automatically populated by the MetadataPopulator when the metadata is
  // populated into a TFLite model. This min_parser_version is specific for the
  // object detector metadata defined in this schema file.
  min_parser_version:string;

  // The options of ssd anchors configs used by the detection model.
  ssd_anchors_options:SsdAnchorsOptions;

  // The tensors decoding options to convert raw tensors to detection results.
  tensors_decoding_options:TensorsDecodingOptions;
}

root_type ObjectDetectorOptions;
chromium/third_party/mediapipe/src/mediapipe/tasks/metadata/object_detector_metadata_schema.fbs