chromium/third_party/mediapipe/src/mediapipe/tasks/cc/components/containers/proto/embeddings.proto

/* Copyright 2022 The MediaPipe Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

syntax = "proto2";

package mediapipe.tasks.components.containers.proto;

option java_package = "com.google.mediapipe.tasks.components.containers.proto";
option java_outer_classname = "EmbeddingsProto";

// Defines a dense floating-point embedding.
message FloatEmbedding {
  repeated float values = 1 [packed = true];
}

// Defines a dense scalar-quantized embedding.
message QuantizedEmbedding {
  optional bytes values = 1;
}

// Embedding result for a given embedder head.
message Embedding {
  // The actual embedding, either floating-point or quantized.
  oneof embedding {
    FloatEmbedding float_embedding = 1;
    QuantizedEmbedding quantized_embedding = 2;
  }
  // The index of the embedder head that produced this embedding. This is useful
  // for multi-head models.
  optional int32 head_index = 3;
  // The name of the embedder head, which is the corresponding tensor metadata
  // name (if any). This is useful for multi-head models.
  optional string head_name = 4;
}

// Embedding results for a given embedder model.
message EmbeddingResult {
  // The embedding results for each model head, i.e. one for each output tensor.
  repeated Embedding embeddings = 1;
  // The optional timestamp (in milliseconds) of the start of the chunk of data
  // corresponding to these results.
  //
  // This is only used for embedding extraction on time series (e.g. audio
  // embedding). In these use cases, the amount of data to process might
  // exceed the maximum size that the model can process: to solve this, the
  // input data is split into multiple chunks starting at different timestamps.
  optional int64 timestamp_ms = 2;
}