chromium/components/ml/mojom/web_platform_model.mojom

// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// This file needs to be synced between,
// - "components/ml/mojom/web_platform_model.mojom" in the chromium repo
// - "src/platform2/ml/mojom/web_platform_model.mojom" in the chromiumos repo
// Modifications needed are,
// - "big_buffer.mojom" needs to be included from mojo's base folder in chrome.
//   whereas it should be included from "ml/mojom" in chromiumos repo.

module ml.model_loader.mojom;

import "mojo/public/mojom/base/big_buffer.mojom";

// Model formats that will be loaded. Currently only supports TfLite
// flatbuffer.
// TODO(honglinyu): we may also need to add the versions of the formats, or/and
// we can add feature query interfaces to tell the supported formats and
// versions. And how to best version the API is still an open question, see
// https://github.com/webmachinelearning/model-loader/issues/25.
[Stable, Extensible]
enum ModelFormat {
  // This means the backend should decide the model format automatically.
  [Default] kAuto = 0,
  kTfLite = 1,
};

// Types of devices used to do model inference.
// Currently only supports CPU.
[Stable, Extensible]
enum DevicePreference {
  // Means the backend can arbitrarily select the device.
  [Default] kAuto = 0,
  // Prefers running model inference on CPU.
  kCpu = 1,
  // Prefers running the model inference on GPU. If some of the operators can
  // not be supported on GPU, the backend can try to select a best fallback
  // strategy: it may run the whole graph on CPU or still run part of the OPs on
  // GPU. We do not directly return an error in this case to avoid directly
  // exposing user's GPU information.
  kGpu = 2,
};

[Stable]
struct CreateModelLoaderOptions {
  // #Threads used in model inference.
  // 0 means the backend can decide it automatically (e.g. equals the number of
  // physical cores). And normally backends may have a upper cap on the number
  // of threads a model can use. So setting a bigger number than that will have
  // no difference.
  uint32 num_threads@0 = 0;
  // The format of model (e.g. "tflite").
  ModelFormat model_format@1;
  // The device to be used (e.g. "auto", "cpu", "gpu" etc.).
  DevicePreference device_preference@2;
};

// Currently, the tensor type is only used to inform the user, and is not
// meaningful to `compute()`. The `kUnknown` type just means the type is not
// exposed in the mojo interface --- it does not mean the type is "unsupported".
// We define this type just to avoid exposing unnecessary TfLite details from
// the beginning. So even the type is `kUnknown`, user can still use the
// `compute()` function by feeding in appropriate input binary buffers.
[Stable, Extensible]
enum DataType {
  [Default] kUnknown = 0,
  kInt64 = 1,
  kUint64 = 2,
  kFloat64 = 3,
  kInt32 = 4,
  kUint32 = 5,
  kFloat32 = 6,
  kInt16 = 7,
  kUint16 = 8,
  kFloat16 = 9,
  kInt8 = 10,
  kUint8 = 11,
  kBool = 12,
};

// Represents the information of a tensor. The tensor infos of input and output
// tensors are sent from ml-service to the client as a result of model loading.
[Stable]
struct TensorInfo {
  // The total size of the tensor buffer in bytes. This is the most important
  // information. Ml-service will use it to check whether the input tensor is
  // valid.
  uint32 byte_size@0;
  // The type of the tensor data.
  DataType data_type@1;
  // The dimensions of the tensor.
  array<uint32> dimensions@2;
};

// Represents the model information. Currently it contains the tensor info of
// input and output tensors.
[Stable]
struct ModelInfo {
  map<string, TensorInfo> input_tensor_info@0;
  map<string, TensorInfo> output_tensor_info@1;
};

// Used by `Load()` of interface Model.
[Stable, Extensible]
enum LoadModelResult {
  kOk,
  kUnknownError,
  // The model is invalid (e.g. the interpreter fails to parse the model).
  kInvalidModel,
  // The model can not be supported, e.g. some OPs are not supported.
  kNotSupported,
};

[Stable, Extensible]
enum CreateModelLoaderResult {
  kOk,
  kUnknownError,
  // The input configuration is not supported.
  kNotSupported,
};

// Used by `Compute()` of interface Model.
[Stable, Extensible]
enum ComputeResult {
  kOk,
  kUnknownError,
  // There is no model that has been loaded yet.
  kModelNotLoaded,
  // The number of inputs is different from that required by the model.
  kIncorrectNumberOfInputs,
  // Some needed inputs are missing.
  kMissingInput,
  // The size of some input buffer is wrong.
  kInvalidInputBufferSize,
};

// Corresponds to the `MLModelLoader` object in the WebIDL definition.
[Stable]
interface ModelLoader {
  // We define this function in `Model` interface rather than in `MLService` to
  // avoid unnecessary IPC hops of model content.
  Load@0(mojo_base.mojom.BigBuffer model_content)
      => (LoadModelResult result, pending_remote<Model>? remote,
          ModelInfo? model_info);
};

// Represents a model instance. The user can use its `Compute` interface to do
// model inferences.
// Corresponds to the `MLModelLoaded` object in the WebIDL definition.
[Stable]
interface Model {
  // The backend/frontend should already know the dimension needed for each
  // input/output tensors. So we only need to know the buffer content of the
  // input/output tensors.
  Compute@0(map<string, array<uint8>> input_tensors)
      => (ComputeResult result,
          map<string, array<uint8>>? output_tensors);
};