chromium/third_party/mediapipe/src/mediapipe/tasks/cc/genai/inference/proto/llm_file_metadata.proto

// Copyright 2024 The ODML Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package odml.infra.proto;

import "mediapipe/tasks/cc/genai/inference/proto/llm_params.proto";

option java_package = "com.google.odml.infra.proto";
option java_outer_classname = "LlmFileMetadataProto";

// Metadata for an LLM file loaded by `GpuOptimizedTensorLoader`.
message LlmFileMetadata {
  message TensorInfo {
    // The name of the tensor, e.g. "params.lm.softmax.logits_ffn.linear.w".
    string name = 1;

    // The offset of the tensor data in the file in bytes.
    uint64 offset = 2;

    // The size of the tensor data in bytes.
    uint64 size = 3;

    // In some cases, it's insufficient to use `size` to determine the data type
    // e.g. 4 bit could be INT4 or UINT4. Then `data_type` will give more
    // precise information.
    enum DataType {
      UNSPECIFIED = 0;
      FLOAT32 = 1;
      INT8 = 2;
      INT4 = 3;
      UINT4 = 4;
    }
    DataType data_type = 4;
  }
  repeated TensorInfo tensors = 1;

  odml.infra.proto.LlmParameters model_params = 2;

  // The LoRA rank if this is a set of LoRA weights.
  int32 lora_rank = 3;
}