chromium/third_party/mediapipe/src/mediapipe/calculators/tensor/inference_calculator.proto

// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto2";

package mediapipe;

import "mediapipe/framework/calculator.proto";
import "mediapipe/framework/calculator_options.proto";

option java_package = "com.google.mediapipe.calculator.proto";
option java_outer_classname = "InferenceCalculatorProto";

// Full Example:
//
// node {
//   calculator: "InferenceCalculator"
//   input_stream: "TENSOR_IN:image_tensors"
//   output_stream: "TENSOR_OUT:result_tensors"
//   options {
//     [mediapipe.InferenceCalculatorOptions.ext] {
//       model_path: "model.tflite"
//       try_mmap_model: true
//       delegate { gpu {} }
//     }
//   }
// }
message InferenceCalculatorOptions {
  extend mediapipe.CalculatorOptions {
    optional InferenceCalculatorOptions ext = 336783863;
  }

  message Delegate {
    // Default inference provided by tflite.
    message TfLite {}
    // Delegate to run GPU inference depending on the device.
    // (Can use OpenGl, OpenCl, Metal depending on the device.)
    message Gpu {
      // Experimental, Android/Linux only. Use TFLite GPU delegate API2 for
      // the NN inference.
      // example:
      //   delegate: { gpu { use_advanced_gpu_api: true } }
      optional bool use_advanced_gpu_api = 1 [default = false];

      // This option is valid for TFLite GPU delegate API2 only,
      // Choose any of available APIs to force running inference using it.
      enum Api {
        ANY = 0;
        OPENGL = 1;
        OPENCL = 2;
      }
      optional Api api = 4 [default = ANY];

      // This option is valid for TFLite GPU delegate API2 only,
      // Set to true to use 16-bit float precision. If max precision is needed,
      // set to false for 32-bit float calculations only.
      optional bool allow_precision_loss = 3 [default = true];

      // Load pre-compiled serialized binary cache to accelerate init process.
      //
      // Kernel caching will only be enabled if this path is set.
      //
      // NOTE: available for OpenCL delegate on Android only when
      // "use_advanced_gpu_api" is set  to true and either "model_path" or
      // "model_token" is specified saving to:
      // $cached_kernel_path/basename($model_path).ker
      // or
      // $cached_kernel_path/$model_token.ker if model_path is not specified.
      // TODO b/330806422 - update to prefer model_token
      //
      // NOTE: binary cache usage may be skipped if valid serialized model,
      // specified by "serialized_model_dir", exists.
      //
      // TODO b/202767296 - update to cached_kernel_dir
      optional string cached_kernel_path = 2;

      // A dir to load from and save to a pre-compiled serialized model used to
      // accelerate init process.
      //
      // NOTE: available for OpenCL delegate on Android only when
      // "use_advanced_gpu_api" is set to true and "model_token" is set
      // properly.
      //
      // NOTE: serialized model takes precedence over binary cache
      // specified by "cached_kernel_path", which still can be used if
      // serialized model is invalid or missing.
      optional string serialized_model_dir = 7;

      enum CacheWritingBehavior {
        // Do not write any caches.
        NO_WRITE = 0;

        // Try to write caches, log on failure.
        TRY_WRITE = 1;

        // Write caches or return an error if write fails.
        WRITE_OR_ERROR = 2;
      }
      // Specifies how GPU caches are written to disk.
      optional CacheWritingBehavior cache_writing_behavior = 10
          [default = WRITE_OR_ERROR];

      // Unique token identifying the model. Used in conjunction with
      // "serialized_model_dir". It is the caller's responsibility to ensure
      // there is no clash of the tokens.
      optional string model_token = 8;

      // Encapsulated compilation/runtime tradeoffs.
      enum InferenceUsage {
        UNSPECIFIED = 0;

        // InferenceRunner will be used only once. Therefore, it is important to
        // minimize bootstrap time as well.
        FAST_SINGLE_ANSWER = 1;

        // Prefer maximizing the throughput. Same inference runner will be used
        // repeatedly on different inputs.
        SUSTAINED_SPEED = 2;
      }
      optional InferenceUsage usage = 5 [default = SUSTAINED_SPEED];
    }

    // Android only.
    message Nnapi {
      // Directory to store compilation cache. If unspecified, NNAPI will not
      // try caching the compilation.
      optional string cache_dir = 1;
      // Unique token identifying the model. It is the caller's responsibility
      // to ensure there is no clash of the tokens. If unspecified, NNAPI will
      // not try caching the compilation.
      optional string model_token = 2;
      // The name of an accelerator to be used for NNAPI delegate, e.g.
      // "google-edgetpu". When not specified, it will be selected by NNAPI.
      optional string accelerator_name = 3;
    }
    message Xnnpack {
      reserved 3;
      // Number of threads for XNNPACK delegate. (By default, calculator tries
      // to choose optimal number of threads depending on the device.)
      optional int32 num_threads = 1 [default = -1];
      // Enables an experimental TfLite feature to directly access the MP input
      // tensors (and this way avoids copying the data). Note that this requires
      // *all* input tensors to be aligned to tflite::kDefaultTensorAlignment
      // bytes and that the model has no duplicate output tensors (tensors with
      // identical TfLite tensor indices) and no passthrough input->output
      // tensors (input and output tensors with identical TfLite tensor
      // indices).
      optional bool enable_zero_copy_tensor_io = 7;
    }

    oneof delegate {
      TfLite tflite = 1;
      Gpu gpu = 2;
      Nnapi nnapi = 3;
      Xnnpack xnnpack = 4;
    }
  }

  // Path to the TF Lite model (ex: /path/to/modelname.tflite).
  // On mobile, this is generally just modelname.tflite.
  optional string model_path = 1;

  // If true, and model_path is specified, tries to load the model as memory
  // mapped file if available. This can be significantly faster that loading the
  // model into a buffer first. If memory mapping is not available or fails,
  // falls back to loading from buffer.
  optional bool try_mmap_model = 7;

  // Whether the TF Lite GPU or CPU backend should be used. Effective only when
  // input tensors are on CPU. For input tensors on GPU, GPU backend is always
  // used.
  // DEPRECATED: configure "delegate" instead.
  optional bool use_gpu = 2 [deprecated = true, default = false];

  // Android only. When true, an NNAPI delegate will be used for inference.
  // If NNAPI is not available, then the default CPU delegate will be used
  // automatically.
  // DEPRECATED: configure "delegate" instead.
  optional bool use_nnapi = 3 [deprecated = true, default = false];

  // The number of threads available to the interpreter. Effective only when
  // input tensors are on CPU and 'use_gpu' is false.
  optional int32 cpu_num_thread = 4 [default = -1];

  // TfLite delegate to run inference.
  // If not specified, TFLite GPU delegate is used by default (as if "gpu {}"
  // is specified) unless GPU support is disabled in the build (i.e., with
  // --define MEDIAPIPE_DISABLE_GPU=1), in which case regular TFLite on CPU is
  // used (as if "tflite {}" is specified) except when building with emscripten
  // where xnnpack is used.
  // NOTE: use_gpu/use_nnapi are ignored if specified. (Delegate takes
  // precedence over use_* deprecated options.)
  optional Delegate delegate = 5;

  // InputOutputConfig enables us to reorder the tensors in the input and output
  // of the InferenceCalculator. This is useful when the order of the tensors in
  // the input and output of the InferenceCalculator does not match the order of
  // the tensors in the input and output of the underlying TFLite model. In this
  // example, Tensor_1 and Tensor_2 are swapped.
  //
  //                 Inference Calculator Inputs
  //    ┌─────▼─────────▼─────────▼────────▼──────────▼──────────┐
  //    │  Tensor_0, Tensor_1, Tensor_2, Tensor_3, Tensor_4      │
  //    └────────────────────────────────────────────────────────┘
  //                      [input_tensor_map]
  //    ┌────────────────────────────────────────────────────────┐
  //    │  Tensor_0, Tensor_2, Tensor_1, Tensor_3, Tensor_4      │
  //    └─────┬─────────┬─────────┬────────┬──────────┬──────────┘
  //          │         │         │        │          │
  //    ┌─────▼─────────▼─────────▼────────▼──────────▼──────────┐
  //    │                       Inference                        │
  //    └─────┬─────────┬─────────┬────────┬──────────┬──────────┘
  //          │         │         │        │          │
  //    ┌─────▼─────────▼─────────▼────────▼──────────▼──────────┐
  //    │  Tensor_0, Tensor_2, Tensor_1, Tensor_3, Tensor_4      │
  //    └────────────────────────────────────────────────────────┘
  //                     [output_tensor_map]
  //    ┌────────────────────────────────────────────────────────┐
  //    │  Tensor_0, Tensor_1, Tensor_2, Tensor_3, Tensor_4      │
  //    └──────┬─────────┬─────────┬────────┬──────────┬─────────┘
  //           ▼         ▼         ▼        ▼          ▼
  //                 Inference Calculator Outputs
  //
  message InputOutputConfig {
    // Defines tensor order described by the indices.
    message TensorIndicesMap {
      // Ordered list of tensor indices describing a bijective mapping between
      // the InferenceCalculator tensor index and the TFLite model I/O index.
      // The nth entry in the list maps the nth InferenceCalculator tensor to
      // *[nth] model I/O index. The size of the list must match the number of
      // tensors in the input or output of the InferenceCalculator.
      //
      // Example configurations:
      // Input (Input<std::vector<Tensor>> or Input<Tensor>::Multiple) config:
      // - Desired inference calculator input order: tensor0, tensor1, tensor2
      // - Given model tensor input order: [Tensor2, Tensor1, Tensor0]
      // - model_tensor_indices: [2, 1, 0]
      // Output (Output<std::vector<Tensor>> or Output<Tensor>::Multiple)
      // config:
      // - Given model tensor output order: [Tensor2, Tensor1, Tensor0]
      // - Desired InferenceCalculator output order: tensor0, tensor1, tensor2
      // - model_tensor_indices: [2, 1, 0]
      repeated int32 model_tensor_indices = 1 [packed = true];
    }

    // Defines tensor order described by the TfLite signature names.
    message TensorNamesMap {
      // Ordered list of tensor names describing a bijective mapping between
      // the InferenceCalculator tensor I/O index and the TFLite model I/O
      // tensor names (only models with a single signature are supported). The
      // nth entry in the list maps the nth InferenceCalculator I/O tensor. The
      // size of the list must match the number of tensors in the input or
      // output of the InferenceCalculator.
      //
      // Example configurations:
      // Input (Input<std::vector<Tensor>> or Input<Tensor>::Multiple) config:
      // - Desired inference calculator input order: tensor0, tensor1, tensor2
      // - Given model tensor input order: [Tensor2, Tensor1, Tensor0]
      // - tensor_names_map: ["Tensor0", "Tensor1", "Tensor2"]
      // Output (Output<std::vector<Tensor>> or Output<Tensor>::Multiple)
      // config:
      // - Given model tensor output order: [Tensor2, Tensor1, Tensor0]
      // - Desired InferenceCalculator output order: tensor0, tensor1, tensor2
      // - tensor_names_map: ["Tensor0", "Tensor1", "Tensor2"]
      repeated string tensor_names = 1;
    }

    // Maps the input tensors of the InferenceCalculator to the input tensors of
    // the TFLite model. Values are model I/O tensor indices or tensor names.
    oneof InputTensorMap {
      TensorIndicesMap input_tensor_indices_map = 1;
      TensorNamesMap input_tensor_names_map = 3;
    }

    // Maps the output tensors of the TfLite model to the the output tensors of
    // the InferenceCalculator. Values are model I/O tensor indices or tensor
    // names.
    oneof OutputTensorMap {
      TensorIndicesMap output_tensor_indices_map = 2;
      TensorNamesMap output_tensor_names_map = 4;
    }

    // Feedback tensor links are pairs of model input / output tensors where
    // the output should be set as inputs in the next model invocation. This
    // allows to manage a notion of temporal state by continuously feeding from
    // the model's output to the model's input during each inference step.  Note
    // that these feedback tensors must be excluded from the input/output
    // tensor maps above as they are not used as regular inputs/outputs of the
    // inference calculator.
    message FeedbackTensorLink {
      // TfLite output tensor name from default TfLite signature to use as
      // source.
      optional string from_output_tensor_name = 1;
      // TfLite tensor name from default TfLitesignature to pass input
      // tensor to.
      optional string to_input_tensor_name = 2;
    }

    // Defines a mapping between output tensors that should be
    // used as input tensors during the next inference invocation.
    repeated FeedbackTensorLink feedback_tensor_links = 5;
  }

  // Optionally remaps input and output tensors to align with TfLite model and
  // InferenceCalculator input/output stream order.
  optional InputOutputConfig input_output_config = 8;
}