syntax = "proto2";

package mediapipe.tasks.components.containers.proto;

option java_package = "";
option java_outer_classname = "EmbeddingsProto";

// Defines a dense floating-point embedding.
message FloatEmbedding {
  repeated float values = 1 [packed = true];

// Defines a dense scalar-quantized embedding.
message QuantizedEmbedding {
  optional bytes values = 1;

// Embedding result for a given embedder head.
message Embedding {
  // The actual embedding, either floating-point or quantized.
  oneof embedding {
    FloatEmbedding float_embedding = 1;
    QuantizedEmbedding quantized_embedding = 2;
  // The index of the embedder head that produced this embedding. This is useful
  // for multi-head models.
  optional int32 head_index = 3;
  // The name of the embedder head, which is the corresponding tensor metadata
  // name (if any). This is useful for multi-head models.
  optional string head_name = 4;

// Embedding results for a given embedder model.
message EmbeddingResult {
  // The embedding results for each model head, i.e. one for each output tensor.
  repeated Embedding embeddings = 1;
  // The optional timestamp (in milliseconds) of the start of the chunk of data
  // corresponding to these results.
  // This is only used for embedding extraction on time series (e.g. audio
  // embedding). In these use cases, the amount of data to process might
  // exceed the maximum size that the model can process: to solve this, the
  // input data is split into multiple chunks starting at different timestamps.
  optional int64 timestamp_ms = 2;