chromium/third_party/coremltools/mlmodel/format/VisionFeaturePrint.proto

// Copyright (c) 2018, Apple Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-3-clause license that can be
// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause

syntax = "proto3";
option optimize_for = LITE_RUNTIME;

package CoreML.Specification.CoreMLModels;

/*
 * A model which takes an input image and outputs array(s) of features
 * according to the specified feature types
 */
message VisionFeaturePrint {
  // Specific vision feature print types

  // Scene extracts features useful for identifying contents of natural images
  // in both indoor and outdoor environments
  message Scene {
    enum SceneVersion {
      SCENE_VERSION_INVALID = 0;
      // VERSION_1 is available on iOS,tvOS 12.0+, macOS 10.14+
      // It uses a 299x299 input image and yields a 2048 float feature vector
      SCENE_VERSION_1 = 1;

      // VERSION_2 is available on iOS,tvOS 17.0+, macOS 14.0+
      // It uses a 360x360 input image and yields a 768 float feature vector
      SCENE_VERSION_2 = 2;
    }

    SceneVersion version = 1;
  }

  // Objects extracts features useful for identifying and localizing
  // objects in natural images
  message Objects {
    enum ObjectsVersion {
      OBJECTS_VERSION_INVALID = 0;
      // VERSION_1 is available on iOS,tvOS 14.0+, macOS 11.0+
      // It uses a 299x299 input image and yields two multiarray
      // features: one at high resolution of shape (288, 35, 35)
      // the other at low resolution of shape (768, 17, 17)
      OBJECTS_VERSION_1 = 1;
    }

    ObjectsVersion version = 1;

    /*
     * Stores the names of the output features according to the
     * order of them being computed from the neural network, i.e.,
     * the first element in the output is the earliest being
     * computed, while the last is the latest being computed. In
     * general, the order reflects the resolution of the feature.
     * The earlier it is computed, the higher the feature resolution.
     */
    repeated string output = 100;
  }

  // Vision feature print type
  oneof VisionFeaturePrintType {
    Scene scene = 20;
    Objects objects = 21;
  }
}