// Copyright (c) 2019, Apple Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-3-clause license that can be
// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
/*
* - A Program is the container with following information
* - set of functions: Function defines a program block to be executed
* - A model can have multiple functions defined and will have a single
* point of entry.
* - A Function consists of
* - List of named inputs and output types
* - A block defining scope for a function - similar to a function in C/C++
* - A Block consists of
* - List of named inputs and output names
* - Topologically sorted Ops
* - A Op consists of
* - List of named inputs and outputs (name, type) pair
* - Optionally, blocks for Control-Flow
*
* Programs, functions, blocks, ops, and tensor types all can contain an
* optional set of attributes.
*
* == Identifiers ==
* Identifiers, generally used for names and keys, must match the
* regular expression [A-Za-z\_][A-Za-z0-9\_@]*
*/
syntax = "proto3";
option optimize_for = LITE_RUNTIME;
package CoreML.Specification.MILSpec;
// The top level container.
message Program {
int64 version = 1;
// Must be unique within the containing program
// Names must be valid identifiers as described above.
map<string, Function> functions = 2;
string docString = 3;
// Any other attributes not described by other fields.
// Keys must be valid identifiers as described above.
map<string, Value> attributes = 4;
}
// A program-level function.
message Function {
// Function inputs are unordered (name, ValueType) pairs.
// Inputs intended to process images must be rank-4 Float32 tensors.
// Dimensions are interpreted as NCHW, with N == 1 and C being 1 for grayscale
// and 3 for RGB. Names must be valid identifiers as described above.
repeated NamedValueType inputs = 1;
// The active block is drawn from this named specialization.
// This key must exist in `block_specializations`.
string opset = 2;
// Named specializations of this function.
//
// Specialization keys are the name of the opset that the
// function specialization is written in. They must be valid
// identifiers as described above.
//
// Outputs from all blocks must match. They define the outputs
// of the function.
// Each block inherits the lexical scope from the function.
map<string, Block> block_specializations = 3;
// Any other attributes not described by other fields.
// Keys must be valid identifiers as described above.
map<string, Value> attributes = 4;
}
// A basic block with a single entry and exit in SSA form.
message Block {
// Infrequently used, these are for operators that may need to give
// block-local names to input values (e.g. while_loop).
repeated NamedValueType inputs = 1;
// The names to give to values returned by this block. They must be
// identifiers as described above.
//
// ValueType of outputs[i] is Operation[j].outputs[k].type where
// i, j and k are indices of block output, block Operation and
// jth operation's output respectively.
// this is due to
// 1. An operation can have more than one output
// 2. Any one of operation's output could be potentially block's output
repeated string outputs = 2;
repeated Operation operations = 3;
// Any other attributes not described by other fields.
// Keys must be valid identifiers as described above.
map<string, Value> attributes = 4;
}
// Argument is list of Binding to either name or value
message Argument {
message Binding {
oneof binding {
// The name of a previously defined value.
string name = 1;
// A compile time constant.
Value value = 2;
}
}
repeated Binding arguments = 1;
};
// A single operation/node/layer.
message Operation {
// Examples: "convolution", "cropResize". Operation type defines the
// expected inputs and output.
string type = 1;
// Operator arguments
//
// Key: parameter name
// Value: Argument (list of bindings)
//
// Value is list of argument binding to given parameter
// Binding can be a string name (previous operation output or input given to
// model/block/function)
// or a Value (known compile time value for given operation)
// Argument can be of length 1 (general) or variable length (e.g. concat
// layer) e.g. {'stride' : ['input_01']} e.g. {'x' : ['input_01', 'input_02',
// 'input_03', false]}
map<string, Argument> inputs = 2;
// Names to which to bind values returned by this operation.
// Names must be:
// (*) valid identifiers as described above; and
// (*) unique within the current scope.
repeated NamedValueType outputs = 3;
// Nested blocks for loops and conditionals. For example,
// a conditional block will have two entries here.
repeated Block blocks = 4;
// Any other information not captured by other fields.
// Keys must be valid identifiers as described above.
map<string, Value> attributes = 5;
}
// Named Value parameters
// (name, type) pair
message NamedValueType {
// The name of this parameter; must be a valid identifier as described above.
string name = 1;
// This parameter's required type.
ValueType type = 2;
}
/* ======== Types ======= */
// Primer: Two fundamental representations of state:
//
// Variable: Variables are NEVER materialized at compile time and are only
// available at run time. Therefore, for Variables we only have ValueType,
// which may have unknown shapes in the IR. Variable encompases familiar
// concepts such as placeholder, output of an Op.
//
// Value: Values are ALWAYS materialized at compile time, and MAY be modified
// at runtime (e.g., during on-device training). Value describes notions
// such as parameter, attributes of an op. Value is either stored inside
// proto (e.g., attributes) or outside of proto (e.g. parameters) and
// NEVER contains unknown shape in the IR.
//
// Comment(daviddai): A Variable with the potential to be materialized at
// compile time (e.g., through constant propagation) does NOT preclude it to
// be a Variable. Certain Ops such as LoadParameter and Const, their output
// has potential to be materialized at compile time but is still represented
// as Variable.
// A type of any kind
message ValueType {
oneof type {
TensorType tensorType = 1;
ListType listType = 2;
TupleType tupleType = 3;
DictionaryType dictionaryType = 4;
}
}
// Supported data types
enum DataType {
// Comment: Two schemes of specifying field id: just start with 0
// without reserving numbers, but keep track of the next field ID. The
// other is assign blocks of ID to int / float / uint etc.
// 0-10 reserved for special types
UNUSED_TYPE = 0; // not currently in use
BOOL = 1;
STRING = 2; // arbitrary sequence of bytes
// Floats
FLOAT16 = 10;
FLOAT32 = 11;
FLOAT64 = 12;
BFLOAT16 = 13;
// Ints
INT8 = 21;
INT16 = 22;
INT32 = 23;
INT64 = 24;
// UInts
UINT8 = 31;
UINT16 = 32;
UINT32 = 33;
UINT64 = 34;
}
message TensorType {
// The data type stored in a tensor of this type
DataType dataType = 1;
// The number of dimensions in the tensor shape. rank == -1 implies
// variable (not fixed) rank
int64 rank = 2;
// Tensor shape values; must be of length "rank"
repeated Dimension dimensions = 3;
// Any other tensor type attributes not described by other fields.
// Keys must be valid identifiers in MIL text syntax.
map<string, Value> attributes = 4;
}
message TupleType {
// Recursively define TupleType from ValueType.
repeated ValueType types = 1;
}
message ListType {
// The type of element stored in a list of this type
ValueType type = 1;
// The number of elements in a list of this type. May be unknown (variable
// length)
Dimension length = 2;
}
// An unordered key-value mapping
message DictionaryType {
ValueType keyType = 1;
ValueType valueType = 2;
}
message Dimension {
oneof dimension {
ConstantDimension constant = 1;
UnknownDimension unknown = 2;
}
message ConstantDimension {
uint64 size = 1;
}
message UnknownDimension {
bool variadic = 1;
}
}
/* ======== Values ======= */
// See Variable vs Value primer above.
message Value {
string docString = 1; // optional human-readable texts.
ValueType type = 2;
// An immediate value stored within the proto
message ImmediateValue {
oneof value {
TensorValue tensor = 1;
TupleValue tuple = 2;
ListValue list = 3;
DictionaryValue dictionary = 4;
}
}
// Reference to a "blob v2" storage file
message BlobFileValue {
// name of file
string fileName = 1;
// byte offset to metadata
uint64 offset = 2;
}
oneof value {
ImmediateValue immediateValue = 3;
BlobFileValue blobFileValue = 5;
}
}
message TensorValue {
oneof value {
RepeatedFloats floats = 1;
RepeatedInts ints = 2;
RepeatedBools bools = 3;
RepeatedStrings strings = 4;
RepeatedLongInts longInts = 5;
RepeatedDoubles doubles = 6;
RepeatedBytes bytes = 7;
}
message RepeatedFloats {
repeated float values = 1 [packed = true];
}
message RepeatedDoubles {
repeated double values = 1 [packed = true];
}
message RepeatedInts {
repeated int32 values = 1 [packed = true];
}
message RepeatedLongInts {
repeated int64 values = 1 [packed = true];
}
message RepeatedBools {
repeated bool values = 1 [packed = true];
}
message RepeatedStrings {
repeated string values = 1;
}
message RepeatedBytes {
bytes values = 1;
}
}
message TupleValue {
// Comment: TupleValue is recursively defined from Value.
repeated Value values = 1;
}
message ListValue {
repeated Value values = 1;
}
message DictionaryValue {
message KeyValuePair {
Value key = 1;
Value value = 2;
}
repeated KeyValuePair values = 1;
}