// Copyright (c) 2017-2019, Apple Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-3-clause license that can be
// found in LICENSE.txt or at https://opensource.org/licenses/BSD-3-Clause
/*
* A neural network is defined through a collection of layers
* and represents a directed acyclic graph (DAG).
* Each layer has a name, a layer type,
* a list of input names, a list of output names,
* and a collection of parameters specific to the layer type.
*
* The graph structure and connectivity of the neural network
* is inferred from the input and output names.
* A neural network starts with the layer
* whose input name is equal to the value specified in
* ``Model.description.input.name``,
* and ends with the layer
* whose output name is equal to the value specified in
* ``Model.description.output.name``.
* Layers must have unique input and output names,
* and a layer may not have input or output names that
* refer to layers that are not yet defined.
*
* For Core ML specification version <=3,
* all inputs are mapped to static rank 5 tensors, with axis notations
* [Sequence, Batch, Channel, Height, Width].
*
* From specification version 4 onwards (iOS >= 13, macOS >= 10.15), more
* options are available (see enums ``NeuralNetworkMultiArrayShapeMapping``,
* ``NeuralNetworkImageShapeMapping``) to map inputs to generic N-Dimensional
* (or N rank) tensors, where N >= 1.
*
* Each layer type may have specific constraints on the ranks of its inputs and
* outputs.
*
* Some of the layers (such as softmax, reduce, etc) have parameters that have
* been described in terms of notational axis "Channel", "Height", "Width" or
* "Sequence". They can be re-interpreted easily in the general ND setting by
* using the following rule: "width" is same as axis = -1 (i.e. the last axis
* from the end) "height" is same as axis = -2 (i.e. the second last axis from
* the end) "channel" is same as axis = -3 (i.e. the third last axis from the
* end) "sequence" is same as axis = -5 (i.e. the fifth last axis from the end)
*
* Several layers are available in 3 different variations, with the names ending
* in identifiers: ``like``, ``static`` and ``dynamic``. For instance,
* ``FillLike``,
* ``FillStatic`` and ``FillDynamic``. The ``static`` variation generally will
* have a property corresponding to the shape of the output. For instance, if
* the output of the ``FillStatic`` layer is desired to be of shape (10, 4), the
* property ``targetShape`` will have to be set to [10, 4]. In the ``dynamic``
* case, the shape is an input, hence it can be changed at runtime. For
* instance, for a ``FillDynamic`` layer, the input would have to be an array
* containing the values 10 and 4, if the desired output is of shape (10, 4).
* Whereas in the
* ``like`` case, the additional input's shape is used as the output shape,
* ignoring its values. For instance, for a ``FillLike`` layer, for an input
* with shape (10, 4), the output generated will also be of shape (10, 4),
* values of the input will be ignored.
*/
syntax = "proto3";
option optimize_for = LITE_RUNTIME;
import public "DataStructures.proto";
import public "Parameters.proto";
package CoreML.Specification;
enum NeuralNetworkMultiArrayShapeMapping {
/*
* Describes how the MultiArray shape for the inputs,
* provided in Features Types proto via model description,
* is mapped to construct tensors that are fed into the Neural Network layers.
*/
/*
* Default legacy value. Only supported for Core ML Specification version
* <= 3.
*
* The default legacy shape mapping resolves all input shapes to a rank 5
* equivalent with axis notation of [Seq, Batch, Channel, Height, Width].
*
* When this enum value is selected,
* the repeated shape field in the message "ArrayFeatureType" in feature types
* proto, must be either length 1 or length 3.
*
* The following rule is used to map the values in the shape field to the
* actual tensor shape: rank 1 shape is mapped to shape [1,1,C,1,1] rank 3
* shape is mapped to shape [1,1,C,H,W] At runtime, the first two dimensions
* (Seq or Batch) can be presented as well, with non-1 values.
*
* It is invalid to use this enum value if any of the layers added
* Specification version 4 (iOS >= 13, macOS >= 10.15) onwards are used in the
* network. Validator will raise an error in that case.
*/
RANK5_ARRAY_MAPPING = 0;
/*
* The exact shape and rank (i.e. number of dimensions in the shape) of the
* input, as specified in the message "ArrayFeatureType", is passed through to
* the layers. Supported only for Specification version >= 4 (iOS >= 13, macOS
* >= 10.15).
*/
EXACT_ARRAY_MAPPING = 1;
}
enum NeuralNetworkImageShapeMapping {
/*
* Describes how the shape of the input tensors is constructed from image
* inputs.
*/
/*
* In this case, image input is mapped to a rank 5 tensor.
* For Color images, input tensor is shaped as [1,1,3,H,W].
* For Gray images, input tensor is shaped as [1,1,1,H,W].
*/
RANK5_IMAGE_MAPPING = 0;
/*
* For Color images, input tensor is shaped as [1,3,H,W].
* For Gray images, input tensor is shaped as [1,1,H,W].
* Supported only for Specification version >= 4 (iOS >= 13, macOS >= 10.15).
*/
RANK4_IMAGE_MAPPING = 1;
}
/*
A neural network.
*/
message NeuralNetwork {
repeated NeuralNetworkLayer layers = 1;
repeated NeuralNetworkPreprocessing preprocessing = 2;
// use this enum value to determine the input tensor shapes to the neural
// network, for multiarray inputs
NeuralNetworkMultiArrayShapeMapping arrayInputShapeMapping = 5;
// use this enum value to determine the input tensor shapes to the neural
// network, for image inputs
NeuralNetworkImageShapeMapping imageInputShapeMapping = 6;
NetworkUpdateParameters updateParams = 10;
}
// Preprocessing
// -------------
/*
* A neural network preprocessor that
* performs a scalar multiplication of an image
* followed by addition of scalar biases to the channels.
*
* Input: X
* An image in BGR or RGB format with shape ``[3, H, W]``
* or in grayscale format with shape ``[1, H, W]``.
* Output: Y
* An image with format and shape corresponding to the input.
*
* If the input image is in BGR format:
*
* .. code::
*
* Y[0, :, :] = channelScale * X[0, :, :] + blueBias
* Y[1, :, :] = channelScale * X[1, :, :] + greenBias
* Y[2, :, :] = channelScale * X[2, :, :] + redBias
*
* If the input image is in RGB format:
*
* .. code::
*
* Y[0, :, :] = channelScale * X[0, :, :] + redBias
* Y[1, :, :] = channelScale * X[1, :, :] + greenBias
* Y[2, :, :] = channelScale * X[2, :, :] + blueBias
*
* If the input image is in grayscale format:
*
* .. code::
*
* Y[0, :, :] = channelScale * X[0, :, :] + grayBias
*/
message NeuralNetworkImageScaler {
float channelScale = 10; // Scalar to be multiplied.
float blueBias = 20; // Scalar blue bias to be added.
float greenBias = 21; // Scalar green bias to be added.
float redBias = 22; // Scalar red bias to be added.
float grayBias = 30; // Scalar bias to be added for grayscale images.
}
/*
* A neural network preprocessor that
* subtracts the provided mean image from the input image.
* The mean image is subtracted from the input named
* ``NeuralNetworkPreprocessing.featureName``.
*/
message NeuralNetworkMeanImage {
/*
* Mean image stored as a flattened array of floats,
* representing shape [Channel,Height,Width].
*/
repeated float meanImage = 1;
}
// Preprocessing parameters for image inputs.
message NeuralNetworkPreprocessing {
string featureName = 1; // must be equal to the input name to which the
// preprocessing is applied
oneof preprocessor {
NeuralNetworkImageScaler scaler = 10;
NeuralNetworkMeanImage meanImage = 11;
}
}
// Activation Functions
// --------------------
/*
* A rectified linear unit (ReLU) activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \text{max}(0, x)
*/
message ActivationReLU {}
/*
* A leaky rectified linear unit (ReLU) activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \begin{cases}
* x & \text{if } x \geq 0 \\
* \alpha x & \text{if } x < 0
* \end{cases}
*/
message ActivationLeakyReLU {
float alpha = 1; // negative slope value for leakyReLU
}
/*
* A hyperbolic tangent activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \dfrac{1 - e^{-2x}}{1 + e^{-2x}}
*/
message ActivationTanh {}
/*
* A scaled hyperbolic tangent activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \alpha \tanh(\beta x)
*/
message ActivationScaledTanh {
float alpha = 1;
float beta = 2;
}
/*
* A sigmoid activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \dfrac{1}{1 + e^{-x}}
*/
message ActivationSigmoid {}
/*
* A linear activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \alpha x + \beta
*/
message ActivationLinear {
float alpha = 1;
float beta = 2;
}
/*
* A hard sigmoid activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \text{min}(\text{max}(\alpha x + \beta, 0), 1)
*/
message ActivationSigmoidHard {
float alpha = 1;
float beta = 2;
}
/*
* A parameterized rectified linear unit (PReLU) activation function.
* Input must be at least rank 3. Axis = -3 is denoted by "C", or channels.
* "alpha" parameter can be a vector of length C.
*
* This function has the following formula:
*
* .. math::
* f(x_i) = \begin{cases}
* x_i & \text{if } x_i \geq 0 \\
* \alpha_i x_i & \text{if } x_i < 0
* \end{cases} \;,\;i=1,...,C
*/
message ActivationPReLU {
// parameter of length C or 1.
// If length is 1, same value is used for all channels
WeightParams alpha = 1;
}
/*
* An exponential linear unit (ELU) activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \begin{cases}
* x & \text{if } x \geq 0 \\
* \alpha (e^x - 1) & \text{if } x < 0
* \end{cases}
*/
message ActivationELU {
float alpha = 1;
}
/*
* A thresholded rectified linear unit (ReLU) activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \begin{cases}
* x & \text{if } x \geq \alpha \\
* 0 & \text{if } x < \alpha
* \end{cases}
*/
message ActivationThresholdedReLU {
float alpha = 1;
}
/*
* A softsign activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \dfrac{x}{1 + |x|}
*/
message ActivationSoftsign {}
/*
* A softplus activation function.
*
* This function has the following formula:
*
* .. math::
* f(x) = \text{log}(1 + e^x)
*/
message ActivationSoftplus {}
/*
* A parametric softplus activation function.
* Input must be at least rank 3. axis = -3 is denoted by "C", or channels.
* "alpha"/"beta" parameter can be a vector of length C.
*
* This function has the following formula:
*
* .. math::
* f(x_i) = \alpha_i \text{log}(1 + e^{\beta_i x_i}) \;,\;i=1,...,C
*/
message ActivationParametricSoftplus {
// If length is 1, same value is used for all channels
WeightParams alpha = 1; // parameter of length C or 1
WeightParams beta = 2; // parameter of length C or 1
}
message ActivationParams {
oneof NonlinearityType {
ActivationLinear linear = 5;
ActivationReLU ReLU = 10;
ActivationLeakyReLU leakyReLU = 15;
ActivationThresholdedReLU thresholdedReLU = 20;
ActivationPReLU PReLU = 25;
ActivationTanh tanh = 30;
ActivationScaledTanh scaledTanh = 31;
ActivationSigmoid sigmoid = 40;
ActivationSigmoidHard sigmoidHard = 41;
ActivationELU ELU = 50;
ActivationSoftsign softsign = 60;
ActivationSoftplus softplus = 70;
ActivationParametricSoftplus parametricSoftplus = 71;
}
}
/*
* Representation of the intermediate tensors
*/
message Tensor {
// Number of dimensions in the tensor shape
uint32 rank = 1;
// actual value of the tensor shape.
// must be of length "rank". Can contain -1s for unknown dimensions.
repeated int64 dimValue = 2;
}
/*
* A single neural network layer.
*/
message NeuralNetworkLayer {
string name = 1; // descriptive name of the layer
repeated string input = 2;
repeated string output = 3;
repeated Tensor inputTensor =
4; // must be the same length as the "input" field
repeated Tensor outputTensor =
5; // must be the same length as the "output" field
// Must be set to true to mark the layer as updatable.
// If true, the weightParams in the layer's properties must also be set to
// updatable If false, the value of the isUpdatable parameter within the
// layer's weights are ignored
bool isUpdatable = 10;
oneof layer {
// Start at 100 here
ConvolutionLayerParams convolution = 100;
PoolingLayerParams pooling = 120;
ActivationParams activation = 130;
InnerProductLayerParams innerProduct = 140;
EmbeddingLayerParams embedding = 150;
// Normalization-related Layers
BatchnormLayerParams batchnorm = 160;
MeanVarianceNormalizeLayerParams mvn = 165;
L2NormalizeLayerParams l2normalize = 170;
SoftmaxLayerParams softmax = 175;
LRNLayerParams lrn = 180;
CropLayerParams crop = 190;
PaddingLayerParams padding = 200;
UpsampleLayerParams upsample = 210;
ResizeBilinearLayerParams resizeBilinear = 211;
CropResizeLayerParams cropResize = 212;
UnaryFunctionLayerParams unary = 220;
// Element-wise Operations
AddLayerParams add = 230;
MultiplyLayerParams multiply = 231;
AverageLayerParams average = 240;
ScaleLayerParams scale = 245;
BiasLayerParams bias = 250;
MaxLayerParams max = 260;
MinLayerParams min = 261;
DotProductLayerParams dot = 270;
ReduceLayerParams reduce = 280;
LoadConstantLayerParams loadConstant = 290;
// Data Reorganization
ReshapeLayerParams reshape = 300;
FlattenLayerParams flatten = 301;
PermuteLayerParams permute = 310;
ConcatLayerParams concat = 320;
SplitLayerParams split = 330;
SequenceRepeatLayerParams sequenceRepeat = 340;
ReorganizeDataLayerParams reorganizeData = 345;
SliceLayerParams slice = 350;
// Recurrent Layers
SimpleRecurrentLayerParams simpleRecurrent = 400;
GRULayerParams gru = 410;
UniDirectionalLSTMLayerParams uniDirectionalLSTM = 420;
BiDirectionalLSTMLayerParams biDirectionalLSTM = 430;
// Custom (user-implemented) Layer
CustomLayerParams custom = 500;
// Following layers are available only after Core ML Specification
// version >= 4 (iOS >= 13, macOS >= 10.15)
// Control Flow related Layers
CopyLayerParams copy = 600;
BranchLayerParams branch = 605;
LoopLayerParams loop = 615;
LoopBreakLayerParams loopBreak = 620;
LoopContinueLayerParams loopContinue = 625;
RangeStaticLayerParams rangeStatic = 635;
RangeDynamicLayerParams rangeDynamic = 640;
// Element-wise Unary Layers
ClipLayerParams clip = 660;
CeilLayerParams ceil = 665;
FloorLayerParams floor = 670;
SignLayerParams sign = 680;
RoundLayerParams round = 685;
Exp2LayerParams exp2 = 700;
SinLayerParams sin = 710;
CosLayerParams cos = 715;
TanLayerParams tan = 720;
AsinLayerParams asin = 730;
AcosLayerParams acos = 735;
AtanLayerParams atan = 740;
SinhLayerParams sinh = 750;
CoshLayerParams cosh = 755;
TanhLayerParams tanh = 760;
AsinhLayerParams asinh = 770;
AcoshLayerParams acosh = 775;
AtanhLayerParams atanh = 780;
ErfLayerParams erf = 790;
GeluLayerParams gelu = 795;
// Element-wise Binary with Broadcasting Support
EqualLayerParams equal = 815;
NotEqualLayerParams notEqual = 820;
LessThanLayerParams lessThan = 825;
LessEqualLayerParams lessEqual = 827;
GreaterThanLayerParams greaterThan = 830;
GreaterEqualLayerParams greaterEqual = 832;
LogicalOrLayerParams logicalOr = 840;
LogicalXorLayerParams logicalXor = 845;
LogicalNotLayerParams logicalNot = 850;
LogicalAndLayerParams logicalAnd = 855;
ModBroadcastableLayerParams modBroadcastable = 865;
MinBroadcastableLayerParams minBroadcastable = 870;
MaxBroadcastableLayerParams maxBroadcastable = 875;
AddBroadcastableLayerParams addBroadcastable = 880;
PowBroadcastableLayerParams powBroadcastable = 885;
DivideBroadcastableLayerParams divideBroadcastable = 890;
FloorDivBroadcastableLayerParams floorDivBroadcastable = 895;
MultiplyBroadcastableLayerParams multiplyBroadcastable = 900;
SubtractBroadcastableLayerParams subtractBroadcastable = 905;
// Tensor Manipulations
TileLayerParams tile = 920;
StackLayerParams stack = 925;
GatherLayerParams gather = 930;
ScatterLayerParams scatter = 935;
GatherNDLayerParams gatherND = 940;
ScatterNDLayerParams scatterND = 945;
SoftmaxNDLayerParams softmaxND = 950;
GatherAlongAxisLayerParams gatherAlongAxis = 952;
ScatterAlongAxisLayerParams scatterAlongAxis = 954;
ReverseLayerParams reverse = 960;
ReverseSeqLayerParams reverseSeq = 965;
SplitNDLayerParams splitND = 975;
ConcatNDLayerParams concatND = 980;
TransposeLayerParams transpose = 985;
SliceStaticLayerParams sliceStatic = 995;
SliceDynamicLayerParams sliceDynamic = 1000;
SlidingWindowsLayerParams slidingWindows = 1005;
TopKLayerParams topK = 1015;
ArgMinLayerParams argMin = 1020;
ArgMaxLayerParams argMax = 1025;
EmbeddingNDLayerParams embeddingND = 1040;
BatchedMatMulLayerParams batchedMatmul = 1045;
// Tensor Allocation / Reshape-related Operations
GetShapeLayerParams getShape = 1065;
LoadConstantNDLayerParams loadConstantND = 1070;
FillLikeLayerParams fillLike = 1080;
FillStaticLayerParams fillStatic = 1085;
FillDynamicLayerParams fillDynamic = 1090;
BroadcastToLikeLayerParams broadcastToLike = 1100;
BroadcastToStaticLayerParams broadcastToStatic = 1105;
BroadcastToDynamicLayerParams broadcastToDynamic = 1110;
SqueezeLayerParams squeeze = 1120;
ExpandDimsLayerParams expandDims = 1125;
FlattenTo2DLayerParams flattenTo2D = 1130;
ReshapeLikeLayerParams reshapeLike = 1135;
ReshapeStaticLayerParams reshapeStatic = 1140;
ReshapeDynamicLayerParams reshapeDynamic = 1145;
RankPreservingReshapeLayerParams rankPreservingReshape = 1150;
ConstantPaddingLayerParams constantPad = 1155;
// Random Distributions
RandomNormalLikeLayerParams randomNormalLike = 1170;
RandomNormalStaticLayerParams randomNormalStatic = 1175;
RandomNormalDynamicLayerParams randomNormalDynamic = 1180;
RandomUniformLikeLayerParams randomUniformLike = 1190;
RandomUniformStaticLayerParams randomUniformStatic = 1195;
RandomUniformDynamicLayerParams randomUniformDynamic = 1200;
RandomBernoulliLikeLayerParams randomBernoulliLike = 1210;
RandomBernoulliStaticLayerParams randomBernoulliStatic = 1215;
RandomBernoulliDynamicLayerParams randomBernoulliDynamic = 1220;
CategoricalDistributionLayerParams categoricalDistribution = 1230;
// Reduction-related Layers:
ReduceL1LayerParams reduceL1 = 1250;
ReduceL2LayerParams reduceL2 = 1255;
ReduceMaxLayerParams reduceMax = 1260;
ReduceMinLayerParams reduceMin = 1265;
ReduceSumLayerParams reduceSum = 1270;
ReduceProdLayerParams reduceProd = 1275;
ReduceMeanLayerParams reduceMean = 1280;
ReduceLogSumLayerParams reduceLogSum = 1285;
ReduceSumSquareLayerParams reduceSumSquare = 1290;
ReduceLogSumExpLayerParams reduceLogSumExp = 1295;
// Masking / Selection Layers
WhereNonZeroLayerParams whereNonZero = 1313;
MatrixBandPartLayerParams matrixBandPart = 1315;
LowerTriangularLayerParams lowerTriangular = 1320;
UpperTriangularLayerParams upperTriangular = 1325;
WhereBroadcastableLayerParams whereBroadcastable = 1330;
// Normalization Layers
LayerNormalizationLayerParams layerNormalization = 1350;
NonMaximumSuppressionLayerParams NonMaximumSuppression = 1400;
// Following layers are available only after Core ML Specification
// version >= 5 (iOS >= 14, macOS >= 11.0)
OneHotLayerParams oneHot = 1450;
CumSumLayerParams cumSum = 1455;
ClampedReLULayerParams clampedReLU = 1460;
ArgSortLayerParams argSort = 1461;
Pooling3DLayerParams pooling3d = 1465;
GlobalPooling3DLayerParams globalPooling3d = 1466;
SliceBySizeLayerParams sliceBySize = 1470;
Convolution3DLayerParams convolution3d = 1471;
}
}
/*
* Branching Layer
*
* A layer that provides the functionality of branching or an If-Else block.
*
* Must have 1 input. There are no outputs as the execution is transferred to
* either the if or the else branch based on the value of the input.
*
* Input is the condition predicate. Must be a scalar (length 1 tensor).
*
*/
message BranchLayerParams {
/*
* execute this graph if the absolute value of the input Tensor is greater
* than 1e-6 This must be present.
*/
NeuralNetwork ifBranch = 1;
/*
* execute this graph if the absolute value of the input Tensor is less than
* 1e-6 This is optional.
*/
NeuralNetwork elseBranch = 2;
}
/*
* Loop Layer
*
* A layer that provides the functionality of a "for" loop or a "while" loop.
*
* There are either no inputs or 1 input. When an input is present, it
* corresponds to the maximum loop count, in that case the value of the
* "maxLoopIterations" field is ignored. Input must be a scalar. (For
* description below, maxLoopIterations is assumed to be the value of the input,
* when its present)
*
* No outputs are produced. Blobs produced by the condition or the body network
* are visible in the scope of the overall network.
*
* "conditionNetwork" must produce a tensor with the name specified in the
* "conditionVar" field.
*
* There are 3 possible cases for determining the termination condition:
*
* Case 1:
*
* If there is no "conditionNetwork", in this case the layer corresponds to a
* pure for loop, which is run "maxLoopIterations" number of times. Equivalent
* pseudo-code:
*
* for loopIterator = 0 : maxLoopIterations
* bodyNetwork()
*
*
* Case 2:
*
* "conditionNetwork" is present, and "maxLoopIterations" is 0 and there is no
* input, in this case the layer corresponds to a while loop. Equivalent
* pseudo-code:
*
* conditionVar = conditionNetwork()
* while conditionVar:
* bodyNetwork()
* conditionVar = conditionNetwork()
*
*
* Case 3:
*
* "conditionNetwork" is provided, and "maxLoopIterations" is positive or there
* is an input, in this case the layer corresponds to a while loop with a joint
* condition. Equivalent pseudo-code:
*
* loopIterator = 0
* conditionVar = conditionNetwork()
* while (conditionVar and loopIterator < maxLoopIterations):
* bodyNetwork()
* loopIterator = loopIterator + 1
* conditionVar = conditionNetwork()
*
*/
message LoopLayerParams {
/*
* maximum number of iterations. Ignored if input is present.
*/
uint64 maxLoopIterations = 1;
/*
* This field provides the name of the tensor which is produced by the
* conditionNetwork and whose value is checked to start/continue/terminate the
* loop. Value close to 0.0f is treated as False. This field is optional. Must
* be a non empty string if and only if "conditionNetwork" is present.
*/
string conditionVar = 2;
/*
* Must generate a tensor with the name provided in the "conditionVar" field.
* This field is optional.
* Must be present if and only if "conditionVar" field is a non empty string.
*/
NeuralNetwork conditionNetwork = 3;
/*
* Body of the loop.
* This field must be present.
*/
NeuralNetwork bodyNetwork = 4;
}
/*
* Loop break Layer
*
* Terminate the loop that has this layer.
* If present, it should always reside in the "bodyNetwork" of the loop layer
*
* No inputs/outputs
*
*/
message LoopBreakLayerParams {}
/*
* Loop Continue Layer
*
* Stop the current loop iteration and continue on the next iteration.
* If present, it should always reside in the "bodyNetwork" of the loop layer
*
* No inputs/outputs
*
*/
message LoopContinueLayerParams {}
/*
* Copy Layer
*
* A layer that copies its input tensor to the output tensor.
* Must have 1 input and 1 output, with distinct names.
* This is the only layer that is allowed to re-generate an output that is
* already present in the neural network prior to this layer, in which case it
* will overwrite the output tensor.
*
*/
message CopyLayerParams {}
/*
* GreaterThan Layer
*
* Either 1 or 2 inputs.
* Produces 1 output.
* Perform elementwise greater than operation.
*
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = x1 > x2
* or
* y = x1 > alpha, if only one input is provided
*
* Broadcasting is supported.
*
*/
message GreaterThanLayerParams {
/*
* Compare to the scalar value provided here if there is 1 input
*/
float alpha = 2;
}
/*
* GreaterEqual Layer
*
* Either 1 or 2 inputs.
* Produces 1 output.
* Perform elementwise greater equal operation.
*
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = x1 >= x2
* or
* y = x1 >= alpha, if only one input is provided
*
* Broadcasting is supported.
*
*/
message GreaterEqualLayerParams {
/*
* Compare to the scalar value provided here if there is 1 input
*/
float alpha = 2;
}
/*
* LessThan Layer
*
* Either 1 or 2 inputs.
* Produces 1 output.
* Perform elementwise less than operation.
*
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = x1 < x2
* or
* y = x1 < alpha, if only one input is provided
*
* Broadcasting is supported.
*
*/
message LessThanLayerParams {
/*
* Compare to the scalar value provided here if there is 1 input
*/
float alpha = 2;
}
/*
* LessEqual Layer
*
* Either 1 or 2 inputs.
* Produces 1 output.
* Perform elementwise less equal operation.
*
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = x1 <= x2
* or
* y = x1 <= alpha, if only one input is provided
*
* Broadcasting is supported.
*
*/
message LessEqualLayerParams {
/*
* Compare to the scalar value provided here if there is 1 input
*/
float alpha = 2;
}
/*
* Equal Layer
*
* Either 1 or 2 inputs.
* Produces 1 output.
* Perform elementwise equal operation.
*
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = x1 == x2
* or
* y = x1 == alpha, if only one input is provided
*
* Broadcasting is supported.
*
*/
message EqualLayerParams {
/*
* Compare to the scalar value provided here if there is 1 input
*/
float alpha = 1;
}
/*
* NotEqual Layer
*
* Either 1 or 2 inputs.
* Produces 1 output.
* Perform elementwise not equal operation.
*
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = x1 != x2
* or
* y = x1 != alpha, if only one input is provided
*
* Broadcasting is supported.
*
*/
message NotEqualLayerParams {
/*
* Compare to the scalar value provided here if there is 1 input
*/
float alpha = 1;
}
/*
* LogicalAnd Layer
*
* Must have 2 inputs, produces 1 output.
* Perform elementwise logical AND operation.
*
* Input is considered False if equal to 0.0f otherwise True.
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = AND(x1, x2)
*
* Broadcasting is supported.
*
*/
message LogicalAndLayerParams {}
/*
* LogicalOr Layer
*
* Must have 2 inputs, produces 1 output.
* Perform elementwise logical OR operation.
*
* Input is considered False if equal to 0.0f otherwise True.
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = OR(x1, x2)
*
* Broadcasting is supported.
*
*/
message LogicalOrLayerParams {}
/*
* LogicalXor Layer
*
* Must have 2 inputs, produces 1 output.
* Perform elementwise logical XOR operation.
*
* Input is considered False if equal to 0.0f otherwise True.
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = XOR(x1, x2)
*
* Broadcasting is supported.
*
*/
message LogicalXorLayerParams {}
/*
* LogicalNot Layer
*
* Must have 1 input, produces 1 output.
* Perform elementwise logical NOT operation.
*
* Input is considered False if equal to 0.0f otherwise True.
* Output is 1.0f if the condition is true otherwise 0.0f.
*
* .. code::
*
* y = NOT(x)
*
*
*/
message LogicalNotLayerParams {}
// Border Amounts
// --------------
/*
* Specifies the amount of spatial border to be either padded or cropped.
*
* For padding:
*
* .. code::
*
* H_out = borderAmounts[0].startEdgeSize + H_in +
* borderAmounts[0].endEdgeSize W_out = borderAmounts[1].startEdgeSize + W_in +
* borderAmounts[1].endEdgeSize
*
* topPaddingAmount == Height startEdgeSize
* bottomPaddingAmount == Height endEdgeSize
* leftPaddingAmount == Width startEdgeSize
* rightPaddingAmount == Width endEdgeSize
*
* For cropping:
*
* .. code::
*
* H_out = (-borderAmounts[0].startEdgeSize) + H_in +
* (-borderAmounts[0].endEdgeSize) W_out = (-borderAmounts[1].startEdgeSize) +
* W_in + (-borderAmounts[1].endEdgeSize)
*
* topCropAmount == Height startEdgeSize
* bottomCropAmount == Height endEdgeSize
* leftCropAmount == Width startEdgeSize
* rightCropAmount == Width endEdgeSize
*/
message BorderAmounts {
message EdgeSizes {
/*
* The amount to be padded or cropped from the beginning.
*/
uint64 startEdgeSize = 1;
/*
* The amount to be padded or cropped from the end.
*/
uint64 endEdgeSize = 2;
}
/*
* The border amounts.
* This must be length 2 in the order ``[H, W]``.
*/
repeated EdgeSizes borderAmounts = 10;
}
/*
* Specifies the type of padding to be used with Convolution/Deconvolution and
* Pooling layers. After padding, input spatial shape: ``[H_in, W_in]``, gets
* modified to the output spatial shape ``[H_out, W_out]``.
*
* .. code::
*
* topPaddingAmount == Height startEdgeSize ==
* borderAmounts[0].startEdgeSize bottomPaddingAmount == Height endEdgeSize ==
* borderAmounts[0].endEdgeSize leftPaddingAmount == Width startEdgeSize ==
* borderAmounts[1].startEdgeSize rightPaddingAmount == Width endEdgeSize ==
* borderAmounts[1].endEdgeSize
*
* With Convolution or Pooling:
*
* .. code::
*
* H_out = int_division_round_down((H_in + topPaddingAmount +
* bottomPaddingAmount - KernelSize[0]),stride[0]) + 1
*
* which is same as:
*
* .. code::
*
* H_out = int_division_round_up((H_in + topPaddingAmount +
* bottomPaddingAmount - KernelSize[0] + 1),stride[0])
*
* With Deconvolution:
*
* .. code::
*
* H_out = (H_in-1) * stride[0] + kernelSize[0] - (topPaddingAmount +
* bottomPaddingAmount)
*
*
* The equivalent expressions hold true for ``W_out`` as well.
*
*
* By default, the values of ``paddingAmounts`` are set to ``0``,
* which results in a "true" valid padding.
* If non-zero values are provided for ``paddingAmounts``,
* "valid" convolution/pooling is performed within the spatially expanded input.
*
*/
message ValidPadding {
BorderAmounts paddingAmounts = 1;
}
/*
* Specifies the type of padding to be used with Convolution/Deconvolution and
* pooling layers. After padding, input spatial shape: ``[H_in, W_in]``, gets
* modified to the output spatial shape ``[H_out, W_out]``. With Convolution or
* pooling:
*
* .. code::
*
* H_out = int_division_round_up(H_in,stride[0])
* W_out = int_division_round_up(W_in,stride[1])
*
* This is achieved by using the following padding amounts:
*
* .. code::
*
* totalPaddingHeight = max(0,(H_out-1) * stride[0] + KernelSize[0] - Hin)
* totalPaddingWidth = max(0,(W_out-1) * stride[1] + KernelSize[1] - Win)
*
* There are two modes of asymmetry:
* ``BOTTOM_RIGHT_HEAVY``, and ``TOP_LEFT_HEAVY``.
*
* If the mode is ``BOTTOM_RIGHT_HEAVY``:
*
* .. code::
*
* topPaddingAmount = floor(totalPaddingHeight / 2)
* bottomPaddingAmount = totalPaddingHeight - topPaddingAmount
* leftPaddingAmount = floor(totalPaddingWidth / 2)
* rightPaddingAmount = totalPaddingWidth - leftPaddingAmount
*
* If the mode is ``TOP_LEFT_HEAVY``:
*
* .. code::
*
* bottomPaddingAmount = floor(totalPaddingHeight / 2)
* topPaddingAmount = totalPaddingHeight - bottomPaddingAmount
* rightPaddingAmount = floor(totalPaddingWidth / 2)
* leftPaddingAmount = totalPaddingWidth - rightPaddingAmount
*
*
* With Deconvolution:
*
* .. code::
*
* H_out = H_in * stride[0]
* W_out = W_in * stride[1]
*/
message SamePadding {
enum SamePaddingMode {
BOTTOM_RIGHT_HEAVY = 0;
TOP_LEFT_HEAVY = 1;
}
SamePaddingMode asymmetryMode = 1;
}
/*
* Specifies how grid points are sampled from an interval.
* Without the loss of generality, assume the interval to be [0, X-1] from which
* N points are to be sampled. Here X may correspond to an input image's height
* or width. All the methods can be expressed in terms of numpy's linspace
* function, along with the constraint that grid points have to lie in the
* interval [0, X-1]. Note: numpy.linspace(start = start, end = end, num = N,
* endpoint = True) corresponds to sampling N points uniformly from the interval
* [start, end], endpoints included. The methods vary in how the ``start`` and
* ``end`` values are computed.
*/
message SamplingMode {
enum Method {
/*
* start = 0, end = X-1
* grid points = numpy.linspace(start, end)
*/
STRICT_ALIGN_ENDPOINTS_MODE = 0;
/*
* if N == 1: start = end = (X-1)/2
* otherwise, start = 0, end = X-1
* grid points = numpy.linspace(start, end)
*/
ALIGN_ENDPOINTS_MODE = 1;
/*
* start = 0, end = X - X/N
* grid points = min(X-1, numpy.linspace(start, end))
* This is same as the mode used in the upsample layer in this
* specification, when used with bilinear interpolation. In that case N/X =
* upsample ratio.
*/
UPSAMPLE_MODE = 2;
/*
* spacing = max(1, X-1)/N
* start = 0.5 * spacing
* end = start + (N-1) * spacing
* grid points = min(X-1, numpy.linspace(start, end))
*/
ROI_ALIGN_MODE = 3;
}
Method samplingMethod = 1;
}
/*
* Specifies the convention used to specify four bounding box coordinates for an
* image of size (Height, Width). The (0,0) coordinate corresponds to the
* top-left corner of the image.
*/
message BoxCoordinatesMode {
enum Coordinates {
/*
* [h_start, w_start, h_end, w_end]
*/
CORNERS_HEIGHT_FIRST = 0;
/*
* [w_start, h_start, w_end, h_end]
*/
CORNERS_WIDTH_FIRST = 1;
/*
* [h_center, w_center, box_height, box_width]
*/
CENTER_SIZE_HEIGHT_FIRST = 2;
/*
* [w_center, h_center, box_width, box_height]
*/
CENTER_SIZE_WIDTH_FIRST = 3;
}
Coordinates boxMode = 1;
}
/*
* Weights for layer parameters.
* Weights are stored as repeated floating point numbers
* using row-major ordering
* and can represent 1-, 2-, 3-, or 4-dimensional data.
*/
message WeightParams {
/*
* Values specified in single / float / FP32 precision.
*/
repeated float floatValue = 1;
/*
* Values in 16-bit half precision floating point.
*/
bytes float16Value = 2;
/*
* Raw value specification for quantized lower precisions.
*
* This field is interpreted as uintN, where N is the number of bits in
* quantization. E.g. if n=8, the field is interpreted as an array of UINT8.
* Use this field for quantized parameters unless specifically noted to use
* int8RawValue.
*/
bytes rawValue = 30;
/*
* Field to be used if int8DynamicQuantize is set in the parent layer.
* Cannot be set if rawValue is also set.
* The values in this field are interpreted as INT8.
*
* If this field is set, following conditions must hold true:
* * QuantizationType == LinearQuantizationParams, such that
* * size of the "scale" field is 1 and "bias" field is empty in
* "LinearQuantizationParams"
*/
bytes int8RawValue = 31;
/*
* Quantization related parameters.
*/
QuantizationParams quantization = 40;
bool isUpdatable = 50;
}
/*
* Quantization parameters.
*/
message QuantizationParams {
uint64 numberOfBits = 1;
oneof QuantizationType {
LinearQuantizationParams linearQuantization = 101;
LookUpTableQuantizationParams lookupTableQuantization = 102;
}
}
message LinearQuantizationParams {
/*
* Stores scale and bias values corresponding to the quantized weights.
* Must be an array of 1 element, or an array of C elements, where C
* is number of output channels. For recurrent layers it is equal to
* the output vector size.
*
* Relationship between quantized weights, unquantized weights, scale and
* bias:
*
* W_unquantized = W_quantized * scale + bias
*
*/
repeated float scale = 1;
repeated float bias = 2;
}
message LookUpTableQuantizationParams {
/* Stores look-up table quantization values. Must be an array of
(2^numberOfBits) Elements.
*/
repeated float floatValue = 1;
}
// Layers
// ------
/*
* A layer that performs spatial convolution or deconvolution.
*
* .. code::
*
* y = ConvolutionLayer(x)
*
* Requires 1 or 2 inputs and produces 1 output.
*
* Input
* First Input:
* A blob with rank greater than or equal to 4.
* Rank 4 blob represents [Batch, channels, height, width].
* For ranks greater than 4, the leading dimensions, starting from 0 to -4
* (inclusive), are all treated as batch.
*
* From Core ML specification version 4 onwards (iOS >= 13, macOS >= 10.15).
* convolution layer can have 2 inputs, in which case the second input is
* the blob representing the weights. This is allowed when "isDeconvolution"
* = False. The weight blob should have shape
* ``[outputChannels, kernelChannels, kernelHeight, kernelWidth]``,
* where kernelChannels == inputChannels / nGroups.
*
* Output
* Rank is same as the input. e.g.: for rank 4 input, output shape is [B,
* C_out, H_out, W_out]
*
*
* If ``dilationFactor`` is not 1, effective kernel size is
* modified as follows:
*
* .. code::
*
* KernelSize[0] <-- (kernelSize[0]-1) * dilationFactor[0] + 1
* KernelSize[1] <-- (kernelSize[1]-1) * dilationFactor[1] + 1
*
* Type of padding can be ``valid`` or ``same``. Output spatial dimensions
* depend on the the type of padding. For details, refer to the descriptions of
* the messages "ValidPadding" and "SamePadding". Padded values are all zeros.
*
* For Deconvolution, ``ConvolutionPaddingType`` (``valid`` or ``same``) is
* ignored when ``outputShape`` is set.
*
*
*/
message ConvolutionLayerParams {
/*
* The number of kernels.
* Same as ``C_out`` used in the layer description.
*/
uint64 outputChannels = 1;
/*
* Channel dimension of the kernels.
* Must be equal to ``inputChannels / nGroups``, if isDeconvolution == False
* Must be equal to ``inputChannels``, if isDeconvolution == True
*/
uint64 kernelChannels = 2;
/*
* Group convolution, i.e. weight reuse along channel axis.
* Input and kernels are divided into g groups
* and convolution / deconvolution is applied within the groups independently.
* If not set or 0, it is set to the default value 1.
*/
uint64 nGroups = 10;
/*
* Must be length 2 in the order ``[H, W]``.
* If not set, default value ``[3, 3]`` is used.
*/
repeated uint64 kernelSize = 20;
/*
* Must be length 2 in the order ``[H, W]``.
* If not set, default value ``[1, 1]`` is used.
*/
repeated uint64 stride = 30;
/*
* Must be length 2 in order ``[H, W]``.
* If not set, default value ``[1, 1]`` is used.
* It is ignored if ``isDeconvolution == true``.
*/
repeated uint64 dilationFactor = 40;
/*
* The type of padding.
*/
oneof ConvolutionPaddingType {
ValidPadding valid = 50;
SamePadding same = 51;
}
/*
* Flag to specify whether it is a deconvolution layer.
*/
bool isDeconvolution = 60;
/*
* Flag to specify whether a bias is to be added or not.
*/
bool hasBias = 70;
/*
* Weights associated with this layer.
* If convolution (``isDeconvolution == false``), weights have the shape
* ``[outputChannels, kernelChannels, kernelHeight, kernelWidth]``, where
* kernelChannels == inputChannels / nGroups If deconvolution
* (``isDeconvolution == true``) weights have the shape
* ``[kernelChannels, outputChannels / nGroups, kernelHeight, kernelWidth]``,
* where kernelChannels == inputChannels
*/
WeightParams weights = 90;
WeightParams bias = 91; // Must be of size [outputChannels].
/*
* The output shape, which has length 2 ``[H_out, W_out]``.
* This is used only for deconvolution (``isDeconvolution == true``).
* If not set, the deconvolution output shape is calculated
* based on ``ConvolutionPaddingType``.
*/
repeated uint64 outputShape = 100;
}
/*
* A layer that performs a 3-dimensional convolution.
*
* .. code::
*
* y = Convolution3DLayer(x)
*
* Input
* A blob of rank 5.
* The input blob's shape should be ``[batch, channels, depth, height,
* width]``.
*
* Fields
* The bias field, if set, should have shape of ``[channelsOut]``.
*
* Output
* A blob of rank 5.
* The output blob's shape is ``[batch, channelsOut, depthOut, heightOut,
* widthOut]``.
*
* Type of padding can be ``custom``, ``valid``, or ``same``. Padded values are
* all zeros. Output spatial dimensions depend on the the type of padding. For
* details, refer to the descriptions of the ``PaddingType`` field of this
* ``Convolution3DLayerParams`` message.
*
* Example
* For example, given an input of size ``[1, 3, 3, 8, 8]``, a stride of 2 in
* each dimension, a kernel of 3 in each dimension, 2 output channels, and
* ``same`` padding, this layer will compute the total padding applied in the
* depth, height, and width dimensions to be 2, 1, and 1, respectively. The
* depth padding is even and will be applied equally to both sides of the depth
* dimension. Since the height and width padding values are odd, they'll be
* applied to the bottom/right of the height/width dimensions. Thus, the padding
* applied to the input will be
* ``[1, 1, 0, 1, 0, 1]`` (front, back, top, bottom, left, right). Finally,
* the output produced will have size ``[1, 2, 2, 4, 4]``.
*
*/
message Convolution3DLayerParams {
/*
* The number of channels in the output (channelsOut). Must be a positive
* integer.
*/
int32 outputChannels = 1;
/*
* The number of channels in the input (channels). Must be a positive integer.
*/
int32 inputChannels = 2;
/*
* Group convolution, i.e., weight reuse along the channel axis.
* It must evenly divide both the number of input and output channels and be
* at most the number of input channels (a depthwise convolution). Input and
* kernels are divided into g groups and convolution is applied within the
* groups independently.
*/
int32 nGroups = 10;
/* Depth of the convolution kernel. Must be a positive integer.
*/
int32 kernelDepth = 20;
/* Height of the convolution kernel. Must be a positive integer.
*/
int32 kernelHeight = 21;
/* Width of the convolution kernel. Must be a positive integer.
*/
int32 kernelWidth = 22;
/* Stride along the depth direction. Must be a positive integer.
*/
int32 strideDepth = 31;
/* Stride along the height direction. Must be a positive integer.
*/
int32 strideHeight = 32;
/* Stride along the width direction. Must be a positive integer.
*/
int32 strideWidth = 33;
/* Dilation along the depth direction. Must be a positive integer.
*/
int32 dilationDepth = 40;
/* Dilation along the height direction. Must be a positive integer.
*/
int32 dilationHeight = 41;
/* Dilation along the width direction. Must be a positive integer.
*/
int32 dilationWidth = 42;
/*
* Flag to specify whether a bias is to be added or not.
* If false, then no bias is added.
*/
bool hasBias = 50;
/*
* Weights associated with this layer.
* Weights have the shape
* if deconvolution == False
* ``[outputChannels, kernelChannels, kernelDepth, kernelHeight,
* kernelWidth]``, where kernelChannels == inputChannels / nGroups else if
* deconvolution == True
* ``[outputChannels / nGroups, kernelChannels, kernelDepth, kernelHeight,
* kernelWidth]``, where
*/
WeightParams weights = 60;
/*
* Must be of size ``[outputChannels]``.
*/
WeightParams bias = 61;
/*
* The type of padding.
* All padding types pad the input shape with zeros.
* CUSTOM padding will add the custom padding values specified below to their
* respective dimensions, e.g., `customPaddingFront` number of zeros will be
* added to one side of the input's depth dimension and `customPaddingBack`
* number of zeros will be added to the other side of the input's depth
* dimension. VALID padding adds no padding to any dimension. In this case,
* the last convolution along each dimension will be dropped if the input
* dimension and the kernel size, stride, and dilation do not match. SAME
* padding adds enough padding to each dimension such that the output of the
* convolution has size ``Ceiling(inputShape / stride)``. Padding is added
* evenly to both sides of each dimension unless the total padding to add is
* odd, in which case it is added to the back/bottom/right side of the
* respective dimension. For example, if the total padding needed in the depth
* dimension is 3, 1 zero will be added to the front side of the depth
* dimension and 2 zeros will be added to the back side.
*/
enum PaddingType {
CUSTOM = 0;
VALID = 1;
SAME = 2;
}
PaddingType paddingType = 70;
/* Padding before the input in the depth direction. Must be zero or a positive
* integer. Used when the `PaddingType` is `CustomPadding`, otherwise ignored
* by other padding types.
*/
int32 customPaddingFront = 80;
/* Padding after the input in the depth direction. Must be zero or a positive
* integer. Used when the `PaddingType` is `CustomPadding`, otherwise ignored
* by other padding types.
*/
int32 customPaddingBack = 81;
/* Padding before the input in the height direction. Must be zero or a
* positive integer. Used when the `PaddingType` is `CustomPadding`, otherwise
* ignored by other padding types.
*/
int32 customPaddingTop = 82;
/* Padding after the input in the height direction. Must be zero or a positive
* integer. Used when the `PaddingType` is `CustomPadding`, otherwise ignored
* by other padding types.
*/
int32 customPaddingBottom = 83;
/* Padding before the input in the width direction. Must be zero or a positive
* integer. Used when the `PaddingType` is `CustomPadding`, otherwise ignored
* by other padding types.
*/
int32 customPaddingLeft = 84;
/* Padding after the input in the width direction. Must be zero or a positive
* integer. Used when the `PaddingType` is `CustomPadding`, otherwise ignored
* by other padding types.
*/
int32 customPaddingRight = 85;
/* Flag to specify if this is Convolution Transpose or not.
*/
bool isDeconvolution = 86;
/*
* The output shape, which has length 3 ``[D_out, H_out, W_out]``.
* This is used only for deconvolution (``isDeconvolution == true``).
* If not set, the deconvolution output shape is calculated
* based on ``PaddingType``.
*/
repeated uint64 outputShape = 87;
}
/*
* A layer that performs a matrix-vector or matrix-matrix product.
* This is equivalent to a fully-connected, or dense layer.
* The weight parameters correspond to a matrix of dimensions (inputChannels,
* outputChannels) i.e. (C_in, C_out)
*
* .. code::
*
* y = InnerProductLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* Input can have rank 1 to rank 5. This is how it is reshaped in to the
* matrix (for rank > 1): rank 1 (x1) : in this case, the layer corresponds to a
* matrix-vector product. x1 must be equal to C_in rank 2 (x1, x2): x2 must be
* equal to C_in rank 3 (x1, x2, x3) --> (x1 * x2, x3). x3 must be equal to C_in
* rank 4 (x1, x2, x3, x4) ---> (x1, x2 * x3 * x4). x2 * x3 * x4 must be
* equal to C_in rank 5 (x1, x2, x3, x4, x5) ---> (x1 * x2, x3 * x4 * x5). x3 *
* x4 * x5 must be equal to C_in
*
* Output
* Output rank is same as the input rank
* rank 1: (C_out)
* rank 2: (x1, C_out)
* rank 3: (x1, x2, C_out)
* rank 4: (x1, C_out, 1, 1)
* rank 5: (x1, x2, C_out, 1, 1)
*
*/
message InnerProductLayerParams {
uint64 inputChannels = 1; // Input size: C_in.
uint64 outputChannels = 2; // Output size: C_out.
bool hasBias = 10; // Whether a bias is added or not.
WeightParams weights = 20; // Weight matrix [C_out, C_in].
WeightParams bias = 21; // Bias vector [C_out].
/*
* If set, this layer, at runtime, quantizes the floating point input blob to
* int8 before applying an inner product using INT8 weight matrix parameters,
* as provided in weights->int8RawValue. The result is then dequantized.
* Requires:
* * hasBias == false
* * QuantizationType == LinearQuantizationParams, such that
* * size of the "scale" field is 1 and "bias" field is empty in
* "LinearQuantizationParams"
* * numberOfBits == 8
* * weights->rawValue_size to be empty
*/
bool int8DynamicQuantize = 22;
}
/*
* A layer that performs a matrix lookup and optionally adds a bias.
* The weights matrix is stored with dimensions [outputChannels, inputDim].
*
* .. code::
*
* y = EmbeddingLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* Input values must be in the range ``[0, inputDim - 1]``.
*
* Input must have rank equal to 4 or 5, such that the last 3 dimensions are
* all 1. rank 4: shape (x1, 1, 1, 1). x1 is effectively the batch/sequence
* length. rank 5: shape (x1, x2 , 1, 1, 1). x1 * x2 is effectively the combined
* batch/sequence length.
*
* Output
* Output rank is same as the input rank. Please see input description
* above. rank 4: shape (x1, outputChannels, 1, 1) rank 5: shape (x1, x2,
* outputChannels, 1, 1)
*
*/
message EmbeddingLayerParams {
uint64 inputDim = 1; // Size of the input dictionary.
uint64 outputChannels = 2; // Size of the output vectors.
bool hasBias = 10; // Whether a bias is added or not.
WeightParams weights =
20; // 2-D weights of dimensions [outputChannels, inputDim].
WeightParams bias = 21; // Bias of size [outputChannels].
}
/*
* A layer that performs a matrix lookup and optionally adds a bias.
* The weights matrix is stored with dimensions [embeddingSize, vocabSize].
*
* .. code::
*
* y = EmbeddingNDLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* Input values must be in the range ``[0, vocabSize - 1]``.
* Input must have rank at least 2. The last dimension must always be 1.
* rank 2: shape (x1, 1). x1 is the batch/sequence length.
* rank 3: shape (x1, x2, 1). x1 * x2 is effectively the combined
* batch/sequence length. rank 4: shape (x1, x2, x3, 1). x1 * x2 * x2 is
* effectively the combined batch/sequence length. rank 5: shape (x1, x2 , x3,
* x4, 1). x1 * x2 * x3 * x4 is effectively the combined batch/sequence length.
*
* Output
* Output rank is same as the input rank. Please see input description
* above. rank 2: shape (x1, embeddingSize) rank 3: shape (x1, x2,
* embeddingSize) rank 4: shape (x1, x2, x3, embeddingSize) rank 5: shape (x1,
* x2, x3, x4, embeddingSize)
*
*/
message EmbeddingNDLayerParams {
uint64 vocabSize = 1; // Size of the input dictionary.
uint64 embeddingSize = 2; // Size of the output vectors.
bool hasBias = 3; // Whether a bias is added or not.
WeightParams weights =
20; // 2-D weights of dimensions [embeddingSize, vocabSize].
WeightParams bias = 21; // Bias of size [embeddingSize].
}
/*
* A layer that performs batch normalization,
* which is performed along axis = -3,
* and repeated along the other axes, if present.
*
* .. code::
*
* y = BatchnormLayer(x)
*
* Requires 1 input and produces 1 output.
*
* This operation is described by the following formula:
*
* .. math::
* y_i = \gamma_i \dfrac{ (x_i - \mu_i)}{\sqrt{\sigma_i^2 + \epsilon}} +
* \beta_i \;,\;i=1,....,C
*
* Input
* A blob with rank greater than equal to 3.
* Example: Rank 4 blob represents [Batch, channels, height, width]
* For ranks greater than 3, the leading dimensions, starting from 0 to -4
* (inclusive), are all treated as batch.
*
* Output
* A blob with the same shape as the input.
*/
message BatchnormLayerParams {
uint64 channels = 1; // Size of the channel dimension in the input.
/*
* If ``computeMeanVar == true``,
* the mean and variance are calculated from either
* the single input instance, if ``instanceNormalization == true``,
* or the whole batch, if ``instanceNormalization = false``.
* and the values provided in parameters "mean" and "variance" are ignored.
*/
bool computeMeanVar = 5;
bool instanceNormalization = 6;
/*
* A small constant to avoid division by 0 while normalizing by variance.
* Defaults to ``1e-5`` if not set or set to ``0``.
*/
float epsilon = 10;
WeightParams gamma = 15; // Parameter of length [channels]
WeightParams beta = 16; // Parameter of length [channels]
WeightParams mean = 17; // Parameter of length [channels]
WeightParams variance = 18; // Parameter of length [channels]
}
/*
* A spatial pooling layer.
*
* .. code::
*
* y = PoolingLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank greater than equal to 4.
* Rank 4 blob represents [Batch, channels, height, width]
* For ranks greater than 4, the leading dimensions, starting from 0 to -4
* (inclusive), are all treated as batch.
*
* Output
* Rank is same as the input. e.g.: for rank 4 input, output shape is [B, C,
* H_out, W_out]
*
* Padding options are similar to ``ConvolutionLayerParams``
* with the additional option of ``ValidCompletePadding``
* (``includeLastPixel``), which ensures that the last application of the kernel
* always includes the last pixel of the input image, if there is padding.
*
* .. code::
*
* H_out = ceil(float(H_in + 2 * paddingAmounts[0] -
* kernelSize[0])/float(Stride[0])) + 1 if (paddingAmounts[0] > 0 or
* paddingAmounts[1] > 0) if ((H_out - 1) * Stride >= H_in + paddingAmounts[0])
* { H_out = H_out - 1
* }
* }
*
* The equivalent expressions hold true for ``W_out`` as well.
* Only symmetric padding is supported with this option.
*/
message PoolingLayerParams {
enum PoolingType {
MAX = 0;
AVERAGE = 1;
L2 = 2;
}
PoolingType type = 1; // Type of pooling operation.
/*
* Must be length 2 in the order ``[H, W]``.
* If not set, default value ``[3, 3]`` is used.
*/
repeated uint64 kernelSize = 10;
/*
* Must be length 2 in the order ``[H, W]``.
* If not set, default value ``[1, 1]`` is used.
*/
repeated uint64 stride = 20;
message ValidCompletePadding {
/*
* Must be length 2 in order ``[H, W]``.
* If not set, value ``[0, 0]`` is used.
*/
repeated uint64 paddingAmounts = 10;
}
oneof PoolingPaddingType {
ValidPadding valid = 30;
SamePadding same = 31;
ValidCompletePadding includeLastPixel = 32;
}
/*
* If true, padded values are excluded from the count (denominator)
* when computing average pooling.
*/
bool avgPoolExcludePadding = 50;
/*
* If true, global pooling is performed.
* Kernel size is inferred from the input data spatial dimensions.
*/
bool globalPooling = 60;
}
/*
* A layer to pool three spatial dimensions
*
* Input
* A blob with rank equal to 5, representing [Batch, channels, depth,
* height, width].
*
* Output
* Rank is same as the input: A blob with rank equal to 5, representing
* [Batch, channels, depth, height, width].
*
* Requires 1 input and produces 1 output.
*
* For example, given an input of shape (1,1,2,3,3):
* +----+----+----+
* / | 10 | 11 | 12 |
* / +----+----+----+
* / | 13 | 14 | 15 |
* / +----+----+----+
* / | 16 | 17 | 18 |
* / +----+----+----+
* +----+----+----+ /
* | 1 | 2 | 3 | /
* +----+----+----+ /
* | 4 | 5 | 6 | /
* +----+----+----+ /
* | 7 | 8 | 9 | /
* +----+----+----+
*
* And applying MAX pooling using:
* Kernel: 2x2x2
* Stride: 1x1x1
* Valid Padding
* We expect to get an output with shape: (1,1,1,2,2) and value:
* +----+----+
* | 14 | 15 |
* +----+----+
* | 17 | 18 |
* +----+----+
*/
message Pooling3DLayerParams {
enum PoolingType3D {
MAX = 0;
AVERAGE = 1;
}
// Whether to use Max or Average
PoolingType3D type = 1;
// Depth of the pooling region.
int32 kernelDepth = 2;
// Height of the pooling region.
int32 kernelHeight = 3;
// Width of the pooling region.
int32 kernelWidth = 4;
// Stride along the depth direction
int32 strideDepth = 5;
// Stride along the height direction
int32 strideHeight = 6;
// Stride along the width direction
int32 strideWidth = 7;
/*
* The type of padding.
* All padding types pad the input shape with zeros.
* CUSTOM padding will add the custom padding values specified below to their
* respective dimensions, e.g., `customPaddingFront` number of zeros will be
* added to one side of the input's depth dimension and `customPaddingBack`
* number of zeros will be added to the other side of the input's depth
* dimension. VALID padding adds no padding to any dimension. In this case,
* the last pool along each dimension will be dropped if the input dimension
* and the kernel size, and stride do not match. SAME padding adds enough
* padding to each dimension such that the output has the same spatial
* dimensions as the input. Padding is added evenly to both sides of each
* dimension unless the total padding to add is odd, in which case the extra
* padding is added to the back/bottom/right side of the respective dimension.
* For example, if the the total horizontal padding is 3, then there will be 1
* padding on the left, and 2 padding on the right.
*/
enum Pooling3DPaddingType {
CUSTOM = 0;
VALID = 1;
SAME = 2;
}
Pooling3DPaddingType paddingType = 15;
// Padding before the input in the depth direction.
int32 customPaddingFront = 8;
// Padding after the input in the depth direction.
int32 customPaddingBack = 9;
// Padding before the input in the height direction.
int32 customPaddingTop = 10;
// Padding after the input in the height direction.
int32 customPaddingBottom = 11;
// Padding before the input in the width direction.
int32 customPaddingLeft = 12;
// Padding after the input in the width direction.
int32 customPaddingRight = 13;
// If true, exclude zeros from padding in Average pooling. Meaningless in Max
// Pooling.
bool countExcludePadding = 14;
}
/*
* A layer to pool three spatial dimensions down to one value.
* This behaves like a special case of Pooling3DLayerParams in which
* the Kernel is the size of the input and there is no padding.
*
* Input
* A blob with rank equal to 5, representing [Batch, channels, depth,
* height, width].
*
* Output
* Rank is same as the input: A blob with rank equal to 5, representing
* [Batch, channels, depth, height, width]. Depth, height, and width of the
* output will always be 1.
*
* Requires 1 input and produces 1 output.
*
* For example, given an input of shape (1,1,2,3,3):
* +----+----+----+
* / | 10 | 11 | 12 |
* / +----+----+----+
* / | 13 | 14 | 15 |
* / +----+----+----+
* / | 16 | 17 | 18 |
* / +----+----+----+
* +----+----+----+ /
* | 1 | 2 | 3 | /
* +----+----+----+ /
* | 4 | 5 | 6 | /
* +----+----+----+ /
* | 7 | 8 | 9 | /
* +----+----+----+
*
* And applying MAX global 3d pooling, we expect to get an output with shape:
* (1,1,1,1,1) and value:
* +----+
* | 18 |
* +----+
*/
message GlobalPooling3DLayerParams {
enum GlobalPoolingType3D {
MAX = 0;
AVERAGE = 1;
}
// Whether to use Max or Average
GlobalPoolingType3D type = 1;
}
/*
* A layer that performs padding along spatial dimensions.
*
* .. code::
*
* y = PaddingLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank at least 2.
* e.g.: blob with shape ``[H_in, W_in]``.
* For ranks greater than 2, the leading dimensions, starting from 0 to -4
* (inclusive), are all treated as batch i.e. Padding is applied on last two
* dimensions.
*
* Output
* Same rank as the input.
* e.g.: blob with shape ``[H_out, W_out]``.
*
* Output dimensions are calculated as follows:
*
* .. code::
*
* H_out = H_in + topPaddingAmount + bottomPaddingAmount
* W_out = W_in + leftPaddingAmount + rightPaddingAmount
*
* topPaddingAmount == Height startEdgeSize ==
* borderAmounts[0].startEdgeSize bottomPaddingAmount == Height endEdgeSize ==
* borderAmounts[0].endEdgeSize leftPaddingAmount == Width startEdgeSize ==
* borderAmounts[1].startEdgeSize rightPaddingAmount == Width endEdgeSize ==
* borderAmounts[1].endEdgeSize
*
* There are three types of padding:
*
* - ``PaddingConstant``, which fills a constant value at the border.
* - ``PaddingReflection``, which reflects the values at the border.
* - ``PaddingReplication``, which replicates the values at the border.
*
* Given the following input:
*
* .. code::
*
* [1, 3, 4] : 1 2 3 4
* 5 6 7 8
* 9 10 11 12
*
* Here is the output of applying the padding
* ``(top=2, left=2, bottom=0, right=0)``
* with each of the supported types:
*
* - ``PaddingConstant`` (``value = 0``):
* .. code::
*
* [1, 5, 6] : 0 0 0 0 0 0
* 0 0 0 0 0 0
* 0 0 1 2 3 4
* 0 0 5 6 7 8
* 0 0 9 10 11 12
*
* - ``PaddingReflection``:
* .. code::
*
* [1, 5, 6] : 11 10 9 10 11 12
* 7 6 5 6 7 8
* 3 2 1 2 3 4
* 7 6 5 6 7 8
* 11 10 9 10 11 12
*
* - ``PaddingReplication``:
* .. code::
*
* [1, 5, 6] : 1 1 1 2 3 4
* 1 1 1 2 3 4
* 1 1 1 2 3 4
* 5 5 5 6 7 8
* 9 9 9 10 11 12
*/
message PaddingLayerParams {
/*
* Fill a constant value in the padded region.
*/
message PaddingConstant {
float value = 1;
}
/*
* Reflect the values at the border for padding.
*/
message PaddingReflection {}
/*
* Replicate the values at the border for padding.
*/
message PaddingReplication {}
oneof PaddingType {
PaddingConstant constant = 1;
PaddingReflection reflection = 2;
PaddingReplication replication = 3;
}
BorderAmounts paddingAmounts = 10; // Amounts to be padded to the input.
}
/*
* A layer that concatenates along the axis = -3 or -5.
* For general concatenation along any axis, see ConcatNDLayer.
*
* .. code::
*
* y = ConcatLayer(x1,x2,....)
*
* Requires more than 1 input and produces 1 output.
*
* Input
* All input blobs must have same rank.
* If "sequenceConcat" = False, rank must be greater than equal to 3. In this
* case concatenation is along axis = -3 If "sequenceConcat" = True, rank must
* be greater than equal to 5. In this case concatenation is along axis = -5
*
* Output
* Same rank as the input.
*
*/
message ConcatLayerParams {
/*
* If true, concatenate along the axis = -5 instead of axis = -3.
*/
bool sequenceConcat = 100;
}
/*
* A layer that performs local response normalization (LRN).
*
* .. code::
*
* y = LRNLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank greater than equal to 3.
* Example: Rank 4 blob represents [Batch, channels, height, width]
* For ranks greater than 3, the leading dimensions, starting from 0 to -4
* (inclusive), are all treated as batch. Output A blob with the same shape as
* the input.
*
* This layer is described by the following formula:
*
* .. math::
* x_i \leftarrow \dfrac{x_i}{\left ( k + \dfrac{\alpha}{\text{localSize}}
* \sum_j x_j^2 \right )^\beta}
*
* where the summation is done over a ``(localSize, 1, 1)`` neighborhood ---
* that is, over a window "across" channels in 1x1 spatial neighborhoods.
*/
message LRNLayerParams {
float alpha = 1;
float beta = 2;
uint64 localSize = 3; // Number of channels in the normalization window.
float k = 4; // Defaults to 1 if not set or 0. Must be strictly positive.
}
/*
* Softmax Normalization Layer
*
* A layer that performs softmax normalization.
* Normalization is applied along axis = -3 or N-3 (where N is the rank of the
* input) For softmax layer that can operate on any axis, see SoftmaxNDLayer.
*
*
* .. code::
*
* y = SoftmaxLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* Must be a blob with rank >= 3.
* Output
* A blob with the same shape as the input.
*
* This layer is described by the following formula:
*
* .. math::
* x_i \leftarrow \dfrac{e^{x_i}}{\sum_i{e^{x_i}}}
*/
message SoftmaxLayerParams {}
/*
* A layer that uniformly splits across axis = -3 to produce a specified number
* of outputs. For general split operation along any axis, see SplitNDLayer.
*
* .. code::
*
* (y1,y2,...yN) = SplitLayer(x), where N = nOutputs
*
* Requires 1 input and produces multiple outputs.
*
* Input
* A blob with rank at least 3.
* e.g.: blob with shape ``[C, H, W]``
* Output
* ``nOutputs`` blobs each with same rank as the input.
* e.g.: For input that is of shape ``[C, H, W]``, output shapes will be
* ``[C/nOutputs, H, W]``
*/
message SplitLayerParams {
uint64 nOutputs = 1; // The number of outputs.
}
/*
* A layer that performs elementwise addition.
* This layer has limited broadcasting support. For general broadcasting see
* AddBroadcastableLayer.
*
* .. code::
*
* y = AddLayer(x1,x2,...)
*
* Requires 1 or more than 1 input and produces 1 output.
*
* Input
* In general, there are no rank constraints.
* However, only certain set of shapes are broadcastable. For example:
* [B, 1, 1, 1], [B, C, 1, 1], [B, 1, H, W], [B, C, H, W]
* Output
* A blob with shape equal to the input blob.
*
* If only one input is provided, scalar addition is performed:
*
* .. math::
* y = x + \alpha
*
*/
message AddLayerParams {
/*
* Scalar to be added to the input.
* Only used if there is a single input.
*/
float alpha = 1;
}
/*
* A layer that performs elementwise multiplication.
* This layer has limited broadcasting support. For general broadcasting see
* MultiplyBroadcastableLayer.
*
* .. code::
*
* y = MultiplyLayer(x1,x2,...)
*
* Requires 1 or more than 1 input and produces 1 output.
*
* Input
* In general, there are no rank constraints.
* However, only certain set of shapes are broadcastable. For example:
* [B, 1, 1, 1], [B, C, 1, 1], [B, 1, H, W], [B, C, H, W]
* Output
* A blob with shape equal to the first input blob.
*
* If only one input is provided, scalar multiplication is performed:
*
* .. math::
* y = \alpha x
*
*/
message MultiplyLayerParams {
/*
* Scalar to be multiplied with the input.
* Only used if there is a single input.
*/
float alpha = 1;
}
/*
* A layer that applies a unary function.
*
* .. code::
*
* y = UnaryFunctionLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with no rank constraints.
* Output
* A blob with the same shape as the input.
*
* The input is first modified by shifting and scaling:
*
* .. math::
* x \leftarrow \text{scale} \cdot x + \text{shift}
*/
message UnaryFunctionLayerParams {
/*
* A unary operator.
*
* The following functions are supported:
*
* ``SQRT``
* .. math:: f(x) = \sqrt{x}
*
* ``RSQRT``
* .. math:: f(x) = \dfrac{1}{\sqrt{x + \epsilon}}
*
* ``INVERSE``
* .. math:: f(x) = \dfrac{1}{x + \epsilon}
*
* ``POWER``
* .. math:: f(x) = x^\alpha
*
* ``EXP``
* .. math:: f(x) = e^x
*
* ``LOG``
* .. math:: f(x) = \log x
*
* ``ABS``
* .. math:: f(x) = |x|
*
* ``THRESHOLD``
* .. math:: f(x) = \text{max}(\alpha, x)
*/
enum Operation {
SQRT = 0;
RSQRT = 1;
INVERSE = 2;
POWER = 3;
EXP = 4;
LOG = 5;
ABS = 6;
THRESHOLD = 7;
}
Operation type = 1; // The type of unary function.
/*
* A constant used in ``POWER`` and ``THRESHOLD`` functions.
*/
float alpha = 2;
/*
* A small constant to avoid division by 0 while normalizing variance.
* Defaults to ``1e-6`` if not set or set to ``0``.
*/
float epsilon = 3;
/*
* Input is shifted by this amount
* before the unary function is applied.
* Defaults to ``0.0`` if not set.
*/
float shift = 4;
/*
* Input is scaled by this amount
* before the unary function is applied.
* Defaults to ``1.0`` if not set or set to ``0``.
*/
float scale = 5;
}
/*
* A layer that scales up spatial dimensions.
* It supports two modes: nearest neighbour (default) and bilinear.
*
* .. code::
*
* y = UpsampleLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank at least 3.
* e.g.: blob with shape ``[C, H, W]``.
* For ranks greater than 3, the leading dimensions, starting from 0 to -4
* (inclusive), are all treated as batch.
*
* Output
* Same rank as the input.
* e.g.: blob with shape ``[C, scalingFactor[0] * H, scalingFactor[1] * W]``
*/
message UpsampleLayerParams {
/*
* Scaling Factor. Mutually exclusive with fractionalScalingFactor.
* Must be length 2 in order ``[H, W]``.
* If not set, default value ``[1, 1]`` is used.
*/
repeated uint64 scalingFactor = 1;
/*
* Fractional scaling factor. Mutually exclusive with scalingFactor.
* Must be length 2 in order ``[H, W]``.
* If not set, default value ``[1.0, 1.0]`` is used.
*/
repeated float fractionalScalingFactor = 7;
/*
* Overall mode for interpolating new elements when upsampling.
* NN - Nearest Neighbors - simply pick the nearest true value for
* interpolated values. BILINEAR - Use bilinear interpolation. See
* LinearUpsamplingMode for behavior.
*/
enum InterpolationMode {
NN = 0; // Nearest Neighbour
BILINEAR = 1; // Bilinear
}
InterpolationMode mode = 5;
/*
* LinearUpsampleMode specifies the behavior for linear upsampling. Only valid
* when Interpolation Mode is BILINEAR. If input grid is [0, Xin-1]
* (corresponding to an input size of Xin), and if the output size is Xout,
* then the grid points are sampled in the following manner:
* DEFAULT:
* spacing = (Xin-Xin/Xout) / (Xout-1)
* grid_point[i] = min(Xin-1, max(0, i * spacing)), for i = 0,1,2,….,Xout-1
* ALIGN_CORNERS_TRUE:
* spacing = (Xin-1) / (Xout-1)
* grid_point[i] = min(Xin-1, max(0, i * spacing)), for i = 0,1,2,….,Xout-1
* ALIGN_CORNERS_FALSE:
* spacing = Xin / Xout
* grid_point[i] = min(Xin-1, max(0, i * spacing + 0.5 * spacing - 0.5)),
* for i = 0,1,2,….,Xout-1
*/
enum LinearUpsampleMode {
DEFAULT = 0;
ALIGN_CORNERS_TRUE = 1;
ALIGN_CORNERS_FALSE = 2;
}
LinearUpsampleMode linearUpsampleMode = 6;
}
/*
* A layer that resizes the input to a pre-specified spatial size using bilinear
* interpolation.
*
* .. code::
*
* y = ResizeBilinearLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank at least 3.
* e.g.: blob with shape ``[C, H_in, W_in]``.
* For ranks greater than 3, the leading dimensions, starting from 0 to -4
* (inclusive), are all treated as batch.
*
* Output
* Same rank as the input.
* e.g.: blob with shape ``[C, H_out, W_out]``.
*
*/
message ResizeBilinearLayerParams {
/*
* Target Spatial Size.
* Must be length 2 in order ``[Height, Width]``, i.e. ``[H_out, W_out]``.
* If not set, default value ``[1, 1]`` is used.
*/
repeated uint64 targetSize = 1;
/*
* Mode used to compute the grid on which the spatial output values are
* evaluated. Same mode is applied to both the height and width axes.
*/
SamplingMode mode = 2;
}
/*
* A layer that extracts cropped spatial patches or RoIs (regions of interest)
* from the input and resizes them to a pre-specified size using bilinear
* interpolation. Note that RoI Align layer can be implemented with this layer
* followed by a pooling layer.
*
* .. code::
*
* y = CropResizeLayer(x)
*
* Requires 2 inputs and produces 1 output.
*
* Input
* There are two inputs.
* First input represents an image feature map.
* Second input represents the bounding box coordinates for N patches or
* RoIs (region of interest).
*
* First input is rank 5: [1, Batch, C, H_in, W_in].
* Second input is rank 5. Its shape can be either [N, 1, 4, 1, 1] or [N, 1,
* 5, 1, 1].
*
* N: number of patches/RoIs to be extracted
*
* If RoI shape = ``[N, 1, 4, 1, 1]``
* The axis=-3 corresponds to the four coordinates specifying
* the bounding box. All the N RoIs are extracted from all the batches of the
* input.
*
* If RoI shape = ``[N, 1, 5, 1, 1]``
* The first element of the axis=-3 specifies the input
* batch id from which to extract the RoI and must be in the interval ``[0,
* Batch - 1]``. That is, n-th RoI is extracted from the RoI[n,0,0,0,0]-th input
* batch id. The last four elements of the axis=-3 specify the bounding box
* coordinates.
*
* Output
* A blob with rank 5.
* - Shape is [N, Batch, C, H_out, W_out] if input RoI shape is [N, 1,
* 4, 1, 1]
* - Shape is [N, 1, C, H_out, W_out] if input RoI shape is [N, 1, 5,
* 1, 1]
*
*/
message CropResizeLayerParams {
/*
* Target Spatial Size.
* Must be length 2 in order ``[Height, Width]``, i.e. ``[H_out, W_out]``.
* If not set, default value ``[1, 1]`` is used.
*/
repeated uint64 targetSize = 1;
/*
* If true the bounding box coordinates must be in the interval [0, 1].
* They are scaled by (H_in - 1), (W_in - 1), i.e. based on the input spatial
* dimensions. If false the bounding box coordinates must be in the interval
* [0, H_in -1] and [0, W_in - 1], respectively for height and width
* dimensions.
*/
bool normalizedCoordinates = 2;
/*
* Mode used to compute the grid on which the spatial output values are
* evaluated. Same mode is applied to both the height and width axes.
*/
SamplingMode mode = 3;
/*
* Representation used to express the bounding box coordinates.
* It determines how the values of the second input are interpreted.
*/
BoxCoordinatesMode boxIndicesMode = 4;
/*
* Additional spatial scale that multiplies the bounding box coordinates.
* Generally used while implementing the RoI Align layer,
* which uses unnormalized RoI coordinates along with a spatial scale less
* than or equal to 1.
*/
float spatialScale = 5;
}
/*
* A layer that performs elementwise addition of a bias,
* which is broadcasted to match the input shape.
*
* .. code::
*
* y = BiasLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank at least 3.
* e.g.: blob with shape ``[C, H, W]``.
* For ranks greater than 3, the leading dimensions, starting from 0 to -4
* (inclusive), are all treated as batch. Output A blob with the same shape as
* the input.
*/
message BiasLayerParams {
/*
* The shape of the bias.
* Must be one of the following:
* ``[1]``, ``[C]``, ``[1, H, W]`` or ``[C, H, W]``.
*/
repeated uint64 shape = 1;
/*
* The bias values.
* The size must be equal to the product of the ``shape`` dimensions.
*/
WeightParams bias = 2;
}
/*
* A layer that performs elmentwise multiplication by a scale factor
* and optionally adds a bias;
* both the scale and bias are broadcasted to match the input shape.
*
* .. code::
*
* y = ScaleLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank at least 3.
* e.g.: blob with shape ``[C, H, W]``.
* For ranks greater than 3, the leading dimensions, starting from 0 to -4
* (inclusive), are all treated as batch. Output A blob with the same shape as
* the input.
*/
message ScaleLayerParams {
/*
* The shape of the scale.
* Must be one of the following:
* ``[1]``, ``[C]``, ``[1, H, W]`` or ``[C, H, W]``.
*/
repeated uint64 shapeScale = 1;
/*
* The scale values.
* The size must be equal to the product of the ``shape`` dimensions.
*/
WeightParams scale = 2; // Scale values. Size must be equal to the product of
// dimensions specified in shapeScale.
bool hasBias = 3; // If true, a bias is added after scaling.
/*
* The shape of the bias.
* Must be one of the following:
* ``[1]``, ``[C]``, ``[1, H, W]`` or ``[C, H, W]``.
*/
repeated uint64 shapeBias = 4;
/*
* The bias values.
* The size must be equal to the product of the ``shape`` dimensions.
*/
WeightParams bias = 5;
}
/*
* A layer that loads data as a parameter and provides it as an output.
* The output is rank 5. For general rank, see LoadConstantNDLayer.
*
* .. code::
*
* y = LoadConstantLayer()
*
* Requires no input and produces 1 output.
*
* Output:
* A blob with rank 5 and shape ``[1, 1, C, H, W]``
*/
message LoadConstantLayerParams {
/*
* The shape of the constant to be loaded,
* which must be``[C, H, W]``, that is length 3.
*/
repeated uint64 shape = 1;
/*
* The data values,
* of size ``C * H * W``.
*/
WeightParams data = 2;
}
/*
* A layer that performs L2 normalization, i.e. divides by the
* the square root of the sum of squares of all elements of input.
*
* .. code::
*
* y = L2NormalizeLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank greater than equal to 3.
* For ranks greater than 3, the leading dimensions, starting from 0 to -4
* (inclusive), are all treated as batch. Output A blob with the same shape as
* the input.
*
* This layer is described by the following formula:
*
* .. math::
* x_i \leftarrow \dfrac{x_i}{\sqrt{\sum{x_i^2} + \epsilon}}
*/
message L2NormalizeLayerParams {
/*
* A small constant to avoid division by 0 while normalizing variance.
* Defaults to ``1e-6`` if not set or set to ``0``.
*/
float epsilon = 1;
}
// Data Reorganization Layers
// --------------------------
/*
* A layer that flattens the input.
*
* .. code::
*
* y = FlattenLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank greater than equal to 3.
* e.g.: Rank 4 blob represents [Batch, C, H, W]
* For ranks greater than 3, the leading dimensions, starting from 0 to -4
* (inclusive), are all treated as batch. Output Same rank as the input, such
* that last two dimensions are both 1. e.g.: For rank 4 input, output shape is
* ``[Batch, C * H * W, 1, 1]``
*
* There are two X orders: ``CHANNEL_FIRST`` and ``CHANNEL_LAST``.
* ``CHANNEL_FIRST`` does not require data to be rearranged,
* because row major ordering is used by internal storage.
* ``CHANNEL_LAST`` requires data to be rearranged.
*/
message FlattenLayerParams {
enum FlattenOrder {
CHANNEL_FIRST = 0;
CHANNEL_LAST = 1;
}
FlattenOrder mode = 1;
}
/*
* A layer that recasts the input into a new shape.
*
* .. code::
*
* y = ReshapeLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank 5.
* e.g.: ``[1, 1, C, H, W]`` or ``[Seq, 1, C, H, W]``.
* Output
* A blob with rank 5.
* e.g.: ``[1, 1, C_out, H_out, W_out]`` or ``[Seq_out, 1, C_out, H_out,
* W_out]``.
*
* There are two reshape orders: ``CHANNEL_FIRST`` and ``CHANNEL_LAST``.
* ``CHANNEL_FIRST`` is equivalent to
* flattening the input to ``[Seq, 1, C * H * W, 1, 1]`` in channel first order
* and then reshaping it to the target shape;
* no data rearrangement is required.
* ``CHANNEL_LAST`` is equivalent to
* flattening the input to ``[Seq, 1, H * W * C, 1, 1]`` in channel last order,
* reshaping it to ``[Seq_out, 1, H_out, W_out, C_out]`` (it is now in
* "H_out-major"" order), and then permuting it to ``[C_out, H_out, W_out]``;
* both the flattening and permuting requires the data to be rearranged.
*/
message ReshapeLayerParams {
/*
* The shape of the output.
* Must be of length 3 or 4.
* If set to 3, ``targetShape`` is interpreted as
* ``[1, 1, C_out, H_out, W_out]``, and sequence length of the input is
* preserved. If set to 4, ``targetShape`` is interpreted as
* ``[Seq_out, 1, C_out, H_out, W_out]``,
* where ``Seq_out`` is the new sequence length.
*/
repeated int64 targetShape = 1;
enum ReshapeOrder {
CHANNEL_FIRST = 0;
CHANNEL_LAST = 1;
}
ReshapeOrder mode = 2;
}
/*
* A layer that rearranges the dimensions and data of an input.
* For generic transpose/permute operation see TransposeLayer.
*
* .. code::
*
* y = PermuteLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* Must be a rank 5 blob.
* e.g.: shape ``[Seq, B, C, H, W]``.
* Output
* Rank 5 blob. Transposed version of the input, such that dimensions at
* axis=1 or axis=-4 is unchanged.
*
*
* Examples:
*
* Assume input shape is [Seq, B, C, H, W]
*
* - If ``axis`` is set to ``[0, 3, 1, 2]``,
* then the output has shape ``[Seq, B, W, C, H]``
*
* - If ``axis`` is set to ``[3, 1, 2, 0]``,
* then the output has shape ``[W, B, C, H, Seq]``
*
* - If ``axis`` is set to ``[0, 3, 2, 1]``,
* then the output has shape ``[Seq, B, W, H, C]``
*
* - If ``axis`` is not set, or is set to ``[0, 1, 2, 3]``,
* the output is the same as the input.
*/
message PermuteLayerParams {
/*
* The order in which to permute the dimensions.
* Must have length 4 and a permutation of ``[0, 1, 2, 3]``.
*/
repeated uint64 axis = 1;
}
/*
* A layer that reorganizes data in the input in specific ways.
*
* .. code::
*
* y = ReorganizeDataLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank at least 3.
* e.g.: blob with shape ``[C, H, W]``.
* For ranks greater than 3, the leading dimensions, starting from 0 to -4
* (inclusive), are all treated as batch. Output Same rank as the input. e.g.:
* blob with shape ``[C_out, H_out, W_out]``.
*
* mode == SPACE_TO_DEPTH
* ``[C_out, H_out, W_out]`` : ``[C * blockSize * blockSize, H/blockSize,
* W/blockSize]``. blockSize must divide H and W. Data is moved from the spatial
* dimensions to the channel dimension. Input is spatially divided into
* non-overlapping blocks of size blockSize X blockSize and data from each
* block is moved into the channel dimension.
*
* mode == DEPTH_TO_SPACE
* ``[C_out, H_out, W_out]`` : ``[C/(blockSize * blockSize), H * blockSize, W *
* blockSize]``. Square of blockSize must divide C. Reverse of SPACE_TO_DEPTH.
* Data is moved from the channel dimension to the spatial dimensions.
*
* mode == PIXEL_SHUFFLE
* ``[C_out, H_out, W_out]`` : ``[C/(blockSize * blockSize), H * blockSize, W *
* blockSize]``. Square of blockSize must divide C. Similar to DEPTH_TO_SPACE,
* but using the pixel-shuffle semantics for channel order in the output space.
* In both modes, elements along the channel dimension are collapsed into
* blocks in the spatial dimensions. The difference is in the arrangement of
* the input-channels' data in the output space. See below example for more
* detail.
* (Only available in Core ML Specification >= 5 (iOS >= 14, macOS >= 11.0)
*
*
* Examples:
*
* Assume input is the following [C = 8, H = 1, W = 2] tensor:
*
* .. code::
*
* [[[1 2]] [[3 4]] [[5 6]] [[7 8]] [[9 10]] [[11 12]] [[13 14]] [[15 16]]]
*
* If block_size == 2 and mode == DEPTH_TO_SPACE, output will be the following
* [C = 2, H = 2, W = 4] tensor:
*
* .. code::
*
* [[[ 1 5 2 6]
* [ 9 13 10 14]]
*
* [[ 3 7 4 8]
* [11 15 12 16]]]
*
* For mode == SPACE_TO_DEPTH, the behavior is the same as mode ==
* DEPTH_TO_SPACE, but with the input and output swapped.
*
* If block_size == 2 and mode == PIXEL_SHUFFLE, output will be the following
* [C = 2, H = 2, W = 4] tensor:
*
* .. code::
*
* [[[ 1 3 2 4]
* [ 5 7 6 8]]
*
* [[ 9 11 10 12]
* [13 15 14 16]]]
*
*/
message ReorganizeDataLayerParams {
enum ReorganizationType {
SPACE_TO_DEPTH = 0;
DEPTH_TO_SPACE = 1;
PIXEL_SHUFFLE = 2;
}
ReorganizationType mode = 1;
uint64 blockSize = 2; // must be greater than 1
}
/*
* A layer that slices the input data along axis = -1 or -2 or -3.
* For general slice along any axis, please see
* SliceStaticLayer/SliceDynamicLayer.
*
* .. code::
*
* y = SliceLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob that can, in general, have any rank. However, depending on the
* value of "axis" , there may be additional rank constraints. Output A blob
* with the same rank as the input.
*
* Sliced section is taken from the interval ``[startIndex, endIndex)``, i.e.
* startIndex is inclusive while endIndex is exclusive.
* stride must be positive and represents the step size for slicing.
* Negative indexing is supported for startIndex and endIndex.
* -1 denotes N-1, -2 denotes N-2 and so on, where N is the length of the
* dimension to be sliced.
*
*/
message SliceLayerParams {
int64 startIndex = 1; // start of the sliced section. Inclusive.
int64 endIndex = 2; // end of sliced section. Exclusive.
uint64 stride = 3; // The step size. Must be positive.
enum SliceAxis {
CHANNEL_AXIS = 0;
HEIGHT_AXIS = 1;
WIDTH_AXIS = 2;
}
// The following mapping is used for interpreting this parameter:
// CHANNEL_AXIS => axis = -3, input must have rank at least 3.
// HEIGHT_AXIS => axis = -2, input must have rank at least 2.
// WIDTH_AXIS => axis = -1
SliceAxis axis = 4;
}
/*
* A layer that reduces the input using a specified operation.
*
* .. code::
*
* y = ReduceLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob that can, in general, have any rank. However, depending on the
* value of "axis" , there may be additional rank constraints. Output A blob
* with the same rank as the input, which has 1s on the dimensions specified in
* the parameter "axis"
*
* Values supported for axis are [-1], [-2], [-3], [-2,-1], [-3,-2,-1]
* and the equivalent positive values (depending on the rank of the input)
* For mode == 'ArgMax', axis must be [-1] or [-2] or [-3].
*/
message ReduceLayerParams {
/*
* The following reduction operations are supported
* and are applied on the specified axis of the input array:
*
* ``SUM``
* Sum of all elements
*
* .. math:: \sum{x_i}
*
* ``AVG``
* Sum of all elements divided by the number of elements
*
* .. math:: \dfrac{\sum^n{x_i}}{n}
*
* ``PROD``
* Product of all elements
*
* .. math:: \prod{x_i}
*
* ``LOGSUM``
* Sum of the natural logarithm of all elements
*
* .. math:: \sum{\ln{(x_i + \epsilon)}}
*
* ``SUMSQUARE``
* Sum of squares of all elements
*
* .. math:: \sum{x^2}
*
* ``L1``
* L1 normalization of all elements
*
* .. math:: ||x||_1 = \sum{|x_i|}
*
* ``L2``
* L2 normalization of all elements
*
* .. math:: ||x||_2 = \sqrt{\sum{x_i^2}}
*
* ``MAX``
* Maximum of all elements
*
* .. math:: \text{max}(x_i)
*
* ``MIN``
* Minumum of all elements
*
* .. math:: \text{min}(x_i)
*
* ``ARGMAX``
* Argument of the maximum of all elements
*
* .. math:: \text{argmax}(x_i)
*
*/
enum ReduceOperation {
SUM = 0;
AVG = 1;
PROD = 2;
LOGSUM = 3;
SUMSQUARE = 4;
L1 = 5;
L2 = 6;
MAX = 7;
MIN = 8;
ARGMAX = 9; // only supported with axis = C, H or W.
}
ReduceOperation mode = 1; // Specifies function used to reduce.
/*
* Used if mode is ``LOGSUM``.
* Defaults to ``1e-6`` if not set or is set to ``0``.
*/
float epsilon = 2;
enum ReduceAxis {
CHW = 0;
HW = 1;
C = 2;
H = 3;
W = 4;
}
// The following mapping is used for interpreting this parameter:
// CHW = axis [-3, -2, -1], input must have rank at least 3.
// HW = axis [-2, -1], input must have rank at least 2.
// C = axis [-3]
// H = axis [-2]
// W = axis [-1]
ReduceAxis axis = 3;
}
/*
* A layer that crops the spatial dimensions of an input.
* If two inputs are provided, the shape of the second input is used as the
* reference shape.
*
* .. code::
*
* y = CropLayer(x1) or y = CropLayer(x1,x2)
*
* Requires 1 or 2 inputs and produces 1 output.
*
* Input
* 1 or 2 tensors, each with rank at least 3, both inputs must have equal
* rank. Example:
* - 1 input case: A blob with shape ``[C, H_in, W_in]``.
* - 2 input case: 1st blob with shape ``[C, H_in, W_in]``, 2nd blob with
* shape ``[C, H_out, W_out]``.
*
* For ranks greater than 3, the leading dimensions, starting from 0 to -4
* (inclusive), are all treated as batch.
*
* Output
* Same rank as the inputs.
* e.g.: A blob with shape ``[C, H_out, W_out]``.
*
* If one input is used, output is computed as follows:
*
* .. code::
*
* y = x1[:, topCropAmount:H_in - bottomCropAmount, leftCropAmount:W_in -
* rightCropAmount]
*
* topCropAmount == Height startEdgeSize == borderAmounts[0].startEdgeSize
* bottomCropAmount == Height endEdgeSize == borderAmounts[0].endEdgeSize
* leftCropAmount == Width startEdgeSize == borderAmounts[1].startEdgeSize
* rightCropAmount == Width endEdgeSize == borderAmounts[1].endEdgeSize
*
* H_out = H_in - topCropAmount - bottomCropAmount
* W_out = W_in - leftCropAmount - rightCropAmount
*
* If two inputs are used, output is computed as follows:
*
* .. code::
*
* y = x1[:, offset[0]:offset[0] + H_out, offset[1]:offset[1] + W_out]
*/
message CropLayerParams {
/*
* The amounts to be cropped from the input.
* Used only if a single input is provided.
*/
BorderAmounts cropAmounts = 1;
/*
* The offset amounts.
* Used only if two inputs are provided.
* Must be of length 2, in order ``[H, W]``.
*/
repeated uint64 offset = 5;
}
/*
* A layer that computes the elementwise average of the inputs.
* This layer has limited broadcasting support. For general broadcasting see
* AddBroadcastableLayer.
*
* .. code::
*
* y = AverageLayer(x1,x2,...)
*
* Requires multiple inputs and produces 1 output.
*
* Input
* In general, there are no rank constraints.
* However, only certain set of shapes are broadcastable. For example:
* [B, 1, 1, 1], [B, C, 1, 1], [B, 1, H, W], [B, C, H, W]
* Output
* A blob with the same shape as each input.
*/
message AverageLayerParams {}
/*
* A layer that computes the elementwise maximum over the inputs.
*
* .. code::
*
* y = MaxLayer(x1,x2,...)
*
* Requires multiple inputs and produces 1 output.
*
* Input
* In general, there are no rank constraints.
* However, only certain set of shapes are broadcastable. For example:
* [B, C, 1, 1], [B, C, H, W]
* Output
* A blob with the same shape as each input.
*/
message MaxLayerParams {}
/*
* A layer that computes the elementwise minimum over the inputs.
*
* .. code::
*
* y = MinLayer(x1,x2,...)
*
* Requires multiple inputs and produces 1 output.
*
* Input
* In general, there are no rank constraints.
* However, only certain set of shapes are broadcastable. For example:
* [B, C, 1, 1], [B, C, H, W]
* Output
* A blob with the same shape as each input.
*/
message MinLayerParams {}
/*
* A layer that computes the dot product of two vectors.
*
* .. code::
*
* y = DotProductLayer(x1,x2)
*
* Requires 2 inputs and produces 1 output.
*
* Input
* Two blobs with rank at least 3, such that the last two dimensions must
* be 1. e.g.: blobs with shape ``[B, C, 1, 1]``. For ranks greater than 3, the
* leading dimensions, starting from 0 to -4 (inclusive), are all treated as
* batch.
*
* Output
* Same rank as the input.
* e.g. for rank 4 inputs, output shape: [B, 1, 1, 1]
*/
message DotProductLayerParams {
/*
* If true, inputs are normalized first,
* thereby computing the cosine similarity.
*/
bool cosineSimilarity = 1;
}
/*
* A layer that performs mean variance normalization, along axis = -3.
*
* .. code::
*
* y = MeanVarianceNormalizeLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank greater than equal to 3.
* Example: Rank 4 blob represents [Batch, channels, height, width]
* For ranks greater than 3, the leading dimensions, starting from 0 to -4
* (inclusive), are all treated as batch.
*
* Output
* A blob with the same shape as the input.
*
* If ``acrossChannels == true``
* normalization is performed on flattened input, i.e. the input is reshaped to
* (Batch,C), where "Batch" contains all dimensions from 0 to -4 (inclusive),
* and C contains dimensions -1, -2, -3.
*
* If ``acrossChannels == false``
* normalization is performed within a channel,
* across spatial dimensions (i.e. last two dimensions).
*/
message MeanVarianceNormalizeLayerParams {
/*
* If true, mean and variance are computed across channels.
*/
bool acrossChannels = 1;
/*
* If false, only mean is subtracted.
*/
bool normalizeVariance = 2;
/*
* A small constant to avoid division by 0 while normalizing variance.
* Defaults to ``1e-6`` if not set or set to ``0``.
*/
float epsilon = 3;
}
/*
* A layer that repeats a sequence or the dimension sitting at axis = -5
*
* .. code::
*
* y = SequenceRepeatLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A blob with rank at least 5.
* e.g: shape ``[Seq, B, C, H, W]``
* Output
* A blob with the same rank as the input.
* e.g.: for input shape ``[Seq, B, C, H, W]``, output shape is
* ``[nRepetitions * Seq, B, C, H, W]``.
*/
message SequenceRepeatLayerParams {
/*
* Number of repetitions.
* Defaults to ``1`` if not set or set to ``0``.
*/
uint64 nRepetitions = 1;
}
// Recurrent Layers
// ----------------
/*
* The following activations are supported with recurrent layers:
* - Linear
* - Sigmoid
* - Tanh
* - ReLU
* - Scaled Hyperbolic Tangent: alpha * tanh(beta * x), currently only supported
* for alpha = 1.7159, beta = 2/3
* - Hard Sigmoid: min(max(alpha * x + beta, 0), 1), currently only supported
* for alpha = 0.2, beta = 0.5
*/
/*
* A simple recurrent layer.
*
* .. code::
*
* y_t = SimpleRecurrentLayer(x_t, y_{t-1})
*
* Input
* A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``.
* This represents a sequence of vectors of size ``inputVectorSize``.
* Output
* Same rank as the input.
* Represents a vector of size ``outputVectorSize``. It is either the final
* output or a sequence of outputs at all time steps.
*
* - Output Shape: ``[1, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput
* == false``
* - Output Shape: ``[Seq, Batch, outputVectorSize, 1, 1]`` , if
* ``sequenceOutput == true``
*
* This layer is described by the following equation:
*
* .. math::
* \boldsymbol{y_t} = f(\mathrm{clip}(W \boldsymbol{x_t} + \
* R \boldsymbol{y_{t-1}} + b))
*
* - ``W`` is a 2-dimensional weight matrix
* (``[outputVectorSize, inputVectorSize]``, row-major)
* - ``R`` is a 2-dimensional recursion matrix
* (``[outputVectorSize, outputVectorSize]``, row-major)
* - ``b`` is a 1-dimensional bias vector (``[outputVectorSize]``)
* - ``f()`` is an activation
* - ``clip()`` is a function that constrains values between ``[-50.0, 50.0]``
*/
message SimpleRecurrentLayerParams {
uint64 inputVectorSize = 1; // The size of the input vectors.
uint64 outputVectorSize = 2; // The size of the output vectors.
/*
* Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha
* = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5)
*/
ActivationParams activation = 10; // The activation function.
/*
If false output is just the result after final state update.
If true, output is a sequence, containing outputs at all time steps.
*/
bool sequenceOutput = 15;
bool hasBiasVector = 20; // If false, no bias is added.
WeightParams weightMatrix = 30; // Weight matrix W.
WeightParams recursionMatrix = 31; // Recursion Weight matrix R.
WeightParams biasVector = 32; // Bias vector b.
bool reverseInput = 100;
// If true, then the node processes the input sequence from right to left
}
/*
* Gated-Recurrent Unit (GRU) Layer
*
* .. code::
*
* y_t = GRULayer(x_t, y_{t-1})
*
* Input
* A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``.
* This represents a sequence of vectors of size ``inputVectorSize``.
* Output
* Same rank as the input.
* Represents a vector of size ``outputVectorSize``. It is either the final
* output or a sequence of outputs at all time steps.
*
* - Output Shape: ``[1, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput
* == false``
* - Output Shape: ``[Seq, Batch, outputVectorSize, 1, 1]`` , if
* ``sequenceOutput == true``
*
* This layer is described by the following equations:
*
* Update Gate
* .. math::
* \boldsymbol{z_t} = \
* f(\mathrm{clip}(W_z \boldsymbol{x_t} + \
* R_z \boldsymbol{y_{t-1}} + b_z)
*
* Reset Gate
* .. math::
* \boldsymbol{r_t} = \
* f(\mathrm{clip}(W_r \boldsymbol{x_t} + \
* R_r \boldsymbol{y_{t-1}} + b_r))
*
* Cell Memory State
* .. math::
* \boldsymbol{c_t} = \
* \boldsymbol{y_{t-1}} \odot \boldsymbol{r_t}
*
* Output Gate
* .. math::
* \boldsymbol{o_t} = \
* g(\mathrm{clip}(W_o \boldsymbol{x_t} + \
* R_o \boldsymbol{c_t} + b_o))
*
* Output
* .. math::
* \boldsymbol{y_t} = \
* (1 - \boldsymbol{z_t}) \odot \boldsymbol{o_t} + \
* \boldsymbol{z_t} \odot \boldsymbol{y_{t-1}}
*
* - ``W_z``, ``W_r``, ``W_o`` are 2-dimensional input weight matrices
* (``[outputVectorSize, inputVectorSize]``, row-major)
* - ``R_z``, ``R_r``, ``R_o`` are 2-dimensional recursion matrices
* (``[outputVectorSize, outputVectorSize]``, row-major)
* - ``b_z``, ``b_r``, ``b_o`` are 1-dimensional bias vectors
* (``[outputVectorSize]``)
* - ``f()``, ``g()`` are activations
* - ``clip()`` is a function that constrains values between ``[-50.0, 50.0]``
* - ``⊙`` denotes the elementwise product of matrices
*/
message GRULayerParams {
uint64 inputVectorSize = 1; // Size of the input vectors.
uint64 outputVectorSize = 2; // Size of the output vectors.
/*
* 2 element array representing activations [f(), g()] in that order.
* Typical values used = [sigmoid, tanh].
* Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha
* = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5)
*/
repeated ActivationParams activations = 10;
/*
* If false output is just the result after final state update.
* If true, output is a sequence, containing outputs at all time steps.
*/
bool sequenceOutput = 15;
/*
* If false, no biases (``b_z``, ``b_r``, ``b_o``) are added.
*/
bool hasBiasVectors = 20;
WeightParams updateGateWeightMatrix = 30; // Weight Matrix W_z.
WeightParams resetGateWeightMatrix = 31; // Weight Matrix W_r.
WeightParams outputGateWeightMatrix = 32; // Weight Matrix W_o.
WeightParams updateGateRecursionMatrix = 50; // Recursion Weight Matrix R_z.
WeightParams resetGateRecursionMatrix = 51; // Recursion Weight Matrix R_r.
WeightParams outputGateRecursionMatrix = 52; // Recursion Weight Matrix R_o.
WeightParams updateGateBiasVector = 70; // Bias vector b_z.
WeightParams resetGateBiasVector = 71; // Bias vector b_r.
WeightParams outputGateBiasVector = 72; // Bias vector b_o.
// If true, then the node processes the input sequence from right to left
bool reverseInput = 100;
}
/*
* Long short-term memory (LSTM) parameters.
*
* This is described by the following equations:
*
* Input Gate
* .. math::
* \boldsymbol{i_t} = \
* f(\mathrm{clip}(W_i \boldsymbol{x_t} + \
* R_i \boldsymbol{y_{t-1}} + \
* p_i \odot c_{t-1} + b_i))
*
* Forget Gate
* .. math::
* \boldsymbol{f_t} = \
* f(\mathrm{clip}(W_f \boldsymbol{x_t} + \
* R_f \boldsymbol{y_{t-1}} + \
* p_f \odot c_{t-1} + b_f))
*
* Block Input
* .. math::
* \boldsymbol{z_t} = \
* g(\mathrm{clip}(W_z \boldsymbol{x_t} + \
* R_z \boldsymbol{y_{t-1}} + b_z))
*
* Cell Memory State
* .. math::
* \boldsymbol{c_t} = \
* \boldsymbol{c_{t-1}} \odot \boldsymbol{f_t} + \
* \boldsymbol{i_t} \odot \boldsymbol{z_t}
*
* Output Gate
* .. math::
* \boldsymbol{o_t} = \
* f(\mathrm{clip}(W_o \boldsymbol{x_t} + \
* R_o \boldsymbol{y_{t-1}} + \
* p_o \odot c_t + b_o))
*
* Output
* .. math::
* \boldsymbol{y_t} = \
* h(\boldsymbol{c_t}) \odot \boldsymbol{o_t}
*
* - ``W_i``, ``W_f``, ``W_z``, ``W_o`` are 2-dimensional input weight matrices
* (``[outputVectorSize, inputVectorSize]``, row-major)
* - ``R_i``, ``R_f``, ``R_z``, ``R_o`` are 2-dimensional recursion matrices
* (``[outputVectorSize, outputVectorSize]``, row-major)
* - ``b_i``, ``b_f``, ``b_z``, ``b_o`` are 1-dimensional bias vectors
* (``[outputVectorSize]``)
* - ``p_``, ``p_f``, ``p_o`` are 1-dimensional peephole vectors
* (``[outputVectorSize]``)
* - ``f()``, ``g()``, ``h()`` are activations
* - ``clip()`` is a function that constrains values between ``[-50.0, 50.0]``
* - ``⊙`` denotes the elementwise product of matrices
*/
message LSTMParams {
/*
* If true, output is a sequence, containing outputs at all time steps.
* If false, output is just the result after final state update.
*/
bool sequenceOutput = 10;
/*
* If false, no biases (``b_i``, ``b_f``, ``b_z``, ``b_o``) are added.
*/
bool hasBiasVectors = 20;
/*
* If true, a vector of ``1`` values is added to ``b_f``.
*/
bool forgetBias = 30;
/*
* If true, peephole vectors are included.
*/
bool hasPeepholeVectors = 40;
/*
* If the coupled Input and Forget flag is on, the behaviour of
* ``c_t`` is changed to the following (i.e. forget gate is not used):
*
* .. math::
* \boldsymbol{c_t} = \
* \boldsymbol{c_{t-1}} \odot (1 - \boldsymbol{i_t}) + \
* \boldsymbol{i_t} \odot \boldsymbol{z_t}
*
*/
bool coupledInputAndForgetGate = 50;
/*
* Places a limit on the maximum and minimum values of ``c_t``.
* c_t = min(c_t, cellClipThreshold)
* c_t = max(c_t, -cellClipThreshold)
* If 0, it is set to its default value = 50.0.
*/
float cellClipThreshold = 60;
}
/*
* Weights for long short-term memory (LSTM) layers
*/
message LSTMWeightParams {
WeightParams inputGateWeightMatrix = 1; // Weight Matrix W_i.
WeightParams forgetGateWeightMatrix = 2; // Weight Matrix W_f.
WeightParams blockInputWeightMatrix = 3; // Weight Matrix W_z.
WeightParams outputGateWeightMatrix = 4; // Weight Matrix W_o.
WeightParams inputGateRecursionMatrix = 20; // Recursion Weight Matrix R_i.
WeightParams forgetGateRecursionMatrix = 21; // Recursion Weight Matrix R_f.
WeightParams blockInputRecursionMatrix = 22; // Recursion Weight Matrix R_z.
WeightParams outputGateRecursionMatrix = 23; // Recursion Weight Matrix R_o.
// biases:
WeightParams inputGateBiasVector = 40; // Bias vector b_i.
WeightParams forgetGateBiasVector = 41; // Bias vector b_f.
WeightParams blockInputBiasVector = 42; // Bias vector b_z.
WeightParams outputGateBiasVector = 43; // Bias vector b_o.
// peepholes:
WeightParams inputGatePeepholeVector = 60; // Peephole vector p_i.
WeightParams forgetGatePeepholeVector = 61; // Peephole vector p_f.
WeightParams outputGatePeepholeVector = 62; // Peephole vector p_o.
}
/*
* A unidirectional long short-term memory (LSTM) layer.
*
* .. code::
*
* (y_t, c_t) = UniDirectionalLSTMLayer(x_t, y_{t-1}, c_{t-1})
*
* Input
* A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``.
* This represents a sequence of vectors of size ``inputVectorSize``.
* Output
* Same rank as the input.
* Represents a vector of size ``outputVectorSize``. It is either the final
* output or a sequence of outputs at all time steps.
*
* - Output Shape: ``[1, Batch, outputVectorSize, 1, 1]`` , if ``sequenceOutput
* == false``
* - Output Shape: ``[Seq, Batch, outputVectorSize, 1, 1]`` , if
* ``sequenceOutput == true``
*
*/
message UniDirectionalLSTMLayerParams {
uint64 inputVectorSize = 1; // Size of the input vectors.
uint64 outputVectorSize = 2; // Size of the output vectors.
/*
* 3 element array representing activations [f(),g(),h()] in that order.
* Typical values used = [sigmoid, tanh, tanh].
* Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha
* = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5)
*/
repeated ActivationParams activations = 10;
LSTMParams params = 15;
LSTMWeightParams weightParams = 20; // Weights, biases and peepholes.
// If true, then the node processes the input sequence from right to left
bool reverseInput = 100;
}
/*
* Bidirectional long short-term memory (LSTM) layer
*
* .. code::
*
* (y_t, c_t, y_t_reverse, c_t_reverse) = BiDirectionalLSTMLayer(x_t,
* y_{t-1}, c_{t-1}, y_{t-1}_reverse, c_{t-1}_reverse)
*
* Input
* A blob of rank 5, with shape `[Seq, Batch, inputVectorSize, 1, 1]``.
* This represents a sequence of vectors of size ``inputVectorSize``.
* Output
* Same rank as the input.
* Represents a vector of size ``2 * outputVectorSize``. It is either the
* final output or a sequence of outputs at all time steps.
*
* - Output Shape: ``[1, Batch, 2 * outputVectorSize, 1, 1]`` , if
* ``sequenceOutput == false``
* - Output Shape: ``[Seq, Batch, 2 * outputVectorSize, 1, 1]`` , if
* ``sequenceOutput == true``
*
*
* The first LSTM operates on the input sequence in the forward direction.
* The second LSTM operates on the input sequence in the reverse direction.
*
* Example: given the input sequence ``[x_1, x_2, x_3]``,
* where ``x_i`` are vectors at time index ``i``:
*
* The forward LSTM output is ``[yf_1, yf_2, yf_3]``,
*
* where ``yf_i`` are vectors of size ``outputVectorSize``:
*
* - ``yf_1`` is the output at the end of sequence {``x_1``}
* - ``yf_2`` is the output at the end of sequence {``x_1``, ``x_2``}
* - ``yf_3`` is the output at the end of sequence {``x_1``, ``x_2``, ``x_3``}
*
* The backward LSTM output: ``[yb_1, yb_2, yb_3]``,
*
* where ``yb_i`` are vectors of size ``outputVectorSize``:
*
* - ``yb_1`` is the output at the end of sequence {``x_3``}
* - ``yb_2`` is the output at the end of sequence {``x_3``, ``x_2``}
* - ``yb_3`` is the output at the end of sequence {``x_3``, ``x_2``, ``x_1``}
*
* Output of the bi-dir layer:
*
* - if ``sequenceOutput = True`` : { ``[yf_1, yb_3]``, ``[yf_2, yb_2]``,
* ``[yf_3, yb_1]`` }
* - if ``sequenceOutput = False`` : { ``[yf_3, yb_3]`` }
*/
message BiDirectionalLSTMLayerParams {
/*
* Size of the input vectors.
*/
uint64 inputVectorSize = 1;
/*
* Size of the outputs vectors.
* It is same for both forward and backward LSTMs.
*/
uint64 outputVectorSize = 2;
/*
* 3 element array representing activations [f(),g(),h()] in that order.
* Typical values used = [sigmoid, tanh, tanh].
* Activations supported are Linear, Sigmoid, Tanh, ReLU, Scaled Tanh (alpha
* = 1.71, beta = 2/3), Hard sigmoid (alpha = 0.2, beta = 0.5)
*/
repeated ActivationParams activationsForwardLSTM = 10;
/*
* Currently, backward LSTM activations
* must be same as the ones for the forward LSTM.
*/
repeated ActivationParams activationsBackwardLSTM = 11;
/*
* Common parameters shared by the forward and backward LSTMs.
*/
LSTMParams params = 15;
/*
* Weights and biases.
* Must be a length 2 message,
* for the forward and backward LSTM respectively.
*/
repeated LSTMWeightParams weightParams = 20;
}
message CustomLayerParams {
message CustomLayerParamValue {
oneof value {
double doubleValue = 10;
string stringValue = 20;
int32 intValue = 30;
int64 longValue = 40;
bool boolValue = 50;
}
}
string className = 10; // The name of the class (conforming to MLCustomLayer)
// corresponding to this layer
repeated WeightParams weights = 20; // Any weights -- these are serialized in
// binary format and memmapped at runtime
map<string, CustomLayerParamValue> parameters =
30; // these may be handled as strings, so this should not be large
string description =
40; // An (optional) description of the layer provided by the model
// creator. This information is displayed when viewing the model, but
// does not affect the model's execution on device.
}
/*
* A layer that rearranges the dimensions and data of an input.
*
* .. code::
*
* y = TransposeLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* A N-Dimensional tensor.
* Output
* A N-Dimensional tensor of the same rank but with dimensions and data
* permuted according to axes. Shape: ``[InputShape[axis[0]],
* InputShape[axis[1]], ... , InputShape[axis[N-1]]]``
*
* Examples:
*
* - If ``axes`` is set to ``[3, 1, 2, 0]`` and the input shape is
* ``[6,7,8,9]``, then the output has shape ``[9,7,8,6]``
*/
message TransposeLayerParams {
/*
* Length of "axes" should match the rank of input & output tensor
* "axes" should be a permutation of "[0,1,2,...,N-1]" where N is the rank.
*/
repeated uint64 axes = 1; //
}
/*
* A layer that computes the matrix multiplication of two tensors with
* numpy-like broadcasting where the matrices reside in the last two indices of
* the tensor.
*
* .. code::
*
* y = BatchedMatMul(a,b)
*
* Requires 1 or 2 inputs and produces 1 output.
*
* The first tensor, "a", must be provided as an input. The second tensor can
* either be an input or provided as a weight matrix parameter.
*
* Input
* - a: First N-Dimensional tensor
* - b: Second N-Dimensional tensor (either a rank-N input or a matrix, i.e.
* N=2, provided as a layer parameter)
*
* Output
* A tensor containing the matrix product of two tensors.
* When there are two inputs: rank is max(2, rank(a), rank(b))
* When there is one input: rank is same as that of the input.
*
* This operation behaves as following:
*
* When there are two inputs:
* - If N >= 2 for both tensors, it is treated as a batch of matrices
* residing in the last two indices. All the indices, except for the last two,
* are broadcasted using conventional rules.
* - If the first tensor is 1-D, it is converted to a 2-D tensor by
* prepending a 1 to its shape. Eg. (D) -> (1,D)
* - If the second tensor is 1-D, it is converted to a 2-D tensor by
* appending a 1 to its shape. Eg. (D) -> (D,1)
*
* When there is one input:
* - The weight matrix corresponds to a matrix, of shape (X1, X2). Values
* of X1, X2 must be provided as layer parameters.
* - The input, "a", is reshaped into a matrix by combining all the leading
* dimensions, except the last, into a batch dimension. eg:
* - if "a" is rank 1 (X1,) --> (1, X1). Output shape will be (X2,)
* - if "a" is rank 2 (B1, X1) --> no need to reshape. Output shape
* will be (B1, X2)
* - if "a" is rank 3 (B1, B2, X1) --> (B1 * B2, X1). Output shape
* will be (B1, B2, X2)
* - etc
*/
message BatchedMatMulLayerParams {
/*
* If transposeA is true, it transposes the left matrix on the fly before
* matrix multiplication. (is ignored when there is one input)
*/
bool transposeA = 1;
/*
* If transposeB is true, it transposes the right matrix on the fly before
* matrix multiplication. (is ignored when there is one input)
*/
bool transposeB = 2;
/*
* Following parameters are ignored when there are two inputs.
*/
uint64 weightMatrixFirstDimension =
5; // X1: same as the last dimension of the input tensor
uint64 weightMatrixSecondDimension =
6; // X2: same as the last dimension of the output tensor
bool hasBias = 7; // Whether a bias is added or not. Supported only when
// there is one input.
/*
* Weight matrix representing shape [X1, X2].
* Values are however stored in column major order,
* in the "repeated float" or "bytes" fields of the message "WeightParams"
*/
WeightParams weights = 8;
WeightParams bias =
9; // Bias vector [X2]. Supported only when there is one input.
/*
* If set, this layer, at runtime, quantizes the floating point input blob to
* int8 before applying the matrix multiplication using the INT8 weight
* parameters provided in weights->int8RawValue. The result is then
* dequantized. Requires:
* * number of inputs to be 1
* * hasBias == false
* * QuantizationType == LinearQuantizationParams, such that
* * size of the "scale" field is 1 and "bias" field is empty in
* "LinearQuantizationParams"
* * numberOfBits == 8
* * weights->rawValue_size to be empty
*/
bool int8DynamicQuantize = 10;
}
/*
* A layer that concatenates a list of tensors along a specified axis.
*
* .. code::
*
* y = ConcatNDLayer(x1,x2,....)
*
* Requires at least 2 input and produces 1 output.
*
* Input
* The rank of the input tensors must match and all dimensions also must
* match, except for the dimension 'axis'.
*
*
* Output
* Same rank as the input. The dimension along "axis", is the sum of the
* dimensions of the inputs.
*
* example:
*
* in1 : shape (3, 2), value = [[1, 2], [3, 4], [5, 6]]
* in2 : shape (3, 2), value = [[7, 8], [9, 10], [11, 12]]
* axis = 0
*
* if interleave = False (default)
* output : shape (6, 2)
* output[0:3, :] = in1
* output[3:6, :] = in2
* value = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]]
*
* if interleave = True
* output : shape (6, 2)
* output[0::2, :] = in1
* output[1::2, :] = in2
* value = [[1, 2], [7, 8], [3, 4], [9, 10], [5, 6], [11, 12]]
*
*/
message ConcatNDLayerParams {
/*
* Dimension along which to concatenate. Supports negative values of the
* parameter 'axis'.
*/
int64 axis = 1;
/*
* (Only available in Core ML Specification >= 5 (iOS >= 14, macOS >= 11.0)
* Interleave option. If True, concatenation is done via interleaving the
* inputs. This requires all inputs to have the exact same shape.
*/
bool interleave = 2;
}
/*
* A layer that performs softmax normalization along a specified axis.
*
* .. code::
*
* y = SoftmaxNDLayer(x)
*
* Requires 1 input and produces 1 output.
*
* Output shape is same as the input.
*/
message SoftmaxNDLayerParams {
/*
* Dimension on which the softmax would be performed. Supports negative values
* of the parameter 'axis'.
*/
int64 axis = 1;
}
/*
* A layer that reverses specific dimensions of the input tensor.
* It is similar in functionality to the numpy.flip method.
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*/
message ReverseLayerParams {
/*
* Reverses each dimension of the input tensor for which corresponding
* reverseDim is set to True. Requires len(reverseDim) == rank(inputTensor)
*/
repeated bool reverseDim = 1;
}
/*
* A layer that reverses variable length slices.
*
* Requires 2 inputs and produces 1 output.
*
* 2 inputs, in order are denoted by "data", "seq_lengths".
* "seq_lenghts" must be a rank 1 tensor, i.e. seq_lengths.shape = (B,)
* which contains the lengths of the amount of sequence to be reversed, for each
* element of the batch. Dimension "batchAxis" in "data" must be equal to B,
* i.e, data.shape[batchAxis] = B.
*
* According to the batch axis, input "data" is first divided into a batch of B
* inputs, each of which is flipped along the dimension "sequenceAxis", by the
* amount specified in "seq_lengths", the second input.
*
* e.g.:
*
* data [shape = (2,4)]:
* [0 1 2 3]
* [4 5 6 7]
* seq_lengths [shape = (2,)]:
* [3, 0]
* batchAxis = 0
* sequenceAxis = 1
*
* output [shape = (2,4)]:
* [2 1 0 3]
* [4 5 6 7]
*
*
* data [shape = (2,3,2)]:
* [0 1]
* [2 3]
* [4 5] (slice = 0)
* [6 7]
* [8 9]
* [10 11] (slice = 1)
* seq_lengths [shape = (2,)]:
* [2, 3]
* batchAxis = 0
* sequenceAxis = 1
*
* output [shape = (2,3,2)]:
* [2 3]
* [0 1]
* [4 5] (slice = 0)
* [10 11]
* [8 9]
* [6 7] (slice = 1)
*
* Output shape is same as the input.
*/
message ReverseSeqLayerParams {
int64 batchAxis = 1; // batch axis has to be strictly less than seq_axis
int64 sequenceAxis = 2;
}
/*
* A layer that loads data as a parameter and provides it as an output.
*
* .. code::
*
* y = LoadConstantNDLayer()
*
* Requires no input and produces 1 output.
*
* Output: A tensor with shape as provided in the parameter "shape"
*/
message LoadConstantNDLayerParams {
/*
* The shape of the constant to be loaded.
*/
repeated uint64 shape = 1;
WeightParams data = 2;
}
/*
* A layer that generates an output tensor with a constant value.
* Input is only used to determine the shape of the output.
* This layer is used to allocate a tensor with a dynamic shape (that of the
* input) and constant value.
*
* Requires 1 input and produces 1 output.
*
* .. code::
*
* y = FillLikeLayer(x)
*
* Input
* A N-Dimensional tensor, whose values are ignored. Only the shape is used
* to infer the shape of the output.
*
* Output
* A N-Dimensional tensor with the same shape as the input tensor.
*
*/
message FillLikeLayerParams {
float value = 1;
}
/*
* A layer that generates an output tensor with a constant value.
* This layer is used to allocate a tensor with a static shape and constant
* value.
*
* Requires no input and produces 1 output.
*
* .. code::
*
* y = FillStaticLayer(x)
*
* Output
* A N-Dimensional tensor of shape "targetShape".
*
*/
message FillStaticLayerParams {
float value = 1;
repeated uint64 targetShape = 2;
}
/*
* A layer that generates an output tensor with a constant value.
* This layer is used to allocate a tensor with a dynamic shape (as specified by
* the input) and constant value.
*
* Requires 1 input and produces 1 output.
*
* .. code::
*
* y = FillDynamicLayer(x)
*
* Input
* A rank 1 tensor specifying the shape of the output
*
* Output
* An N-Dimensional tensor with the shape specified by the values in the
* input tensor.
*
*/
message FillDynamicLayerParams {
float value = 1;
}
/*
* A layer that returns the elements either from tensor x or tensor y,
* depending on the value in the condition tensor.
* It is similar in functionality to the numpy.where method with 3 inputs.
*
* Requires 3 inputs and produces 1 output.
* Inputs, in order, are the condition tensor, x and y.
*
* for each vector index (i,...,j):
* output[i,...,j] = x[i,...,j] if condition[i,...,j] = True
* y[i,...,j] if condition[i,...,j] = False
*
* All the 3 inputs are first broadcasted to a common shape.
* (the shapes must be broadcastable)
*
* output.rank = max(input[0].rank, input[1].rank, input[2].rank)
*
*/
message WhereBroadcastableLayerParams {}
/*
* A layer that computes elementwise trigonometric sine function.
*
*
* .. code::
*
* y = SinLayer(x)
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*
*/
message SinLayerParams {}
/*
* A layer that computes elementwise trigonometric cosine function.
*
*
* .. code::
*
* y = CosLayer(x)
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*
*/
message CosLayerParams {}
/*
* A layer that computes elementwise trigonometric tangent function.
*
*
* .. code::
*
* y = TanLayer(x)
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*
*/
message TanLayerParams {}
/*
* A layer that computes elementwise trigonometric arcsine function.
*
*
* .. code::
*
* y = AsinLayer(x)
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*
*/
message AsinLayerParams {}
/*
* A layer that computes elementwise trigonometric arccosine function.
*
*
* .. code::
*
* y = AcosLayer(x)
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*
*/
message AcosLayerParams {}
/*
* A layer that computes elementwise trigonometric arctangent function.
*
*
* .. code::
*
* y = AtanLayer(x)
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*
*/
message AtanLayerParams {}
/*
* A layer that computes elementwise trigonometric hyperbolic sine function.
*
*
* .. code::
*
* y = SinhLayer(x)
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*
*/
message SinhLayerParams {}
/*
* A layer that computes elementwise trigonometric hyperbolic cosine function.
*
*
* .. code::
*
* y = CoshLayer(x)
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*
*/
message CoshLayerParams {}
/*
* A layer that computes elementwise trigonometric hyperbolic tangent function.
*
*
* .. code::
*
* y = TanhLayer(x)
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*
*/
message TanhLayerParams {}
/*
* A layer that computes elementwise trigonometric hyperbolic arcsine function.
*
*
* .. code::
*
* y = AsinhLayer(x)
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*
*/
message AsinhLayerParams {}
/*
* A layer that computes elementwise trigonometric hyperbolic arccosine
* function.
*
*
* .. code::
*
* y = AcoshLayer(x)
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*
*/
message AcoshLayerParams {}
/*
* A layer that computes elementwise trigonometric hyperbolic arctangent
* function.
*
*
* .. code::
*
* y = AtanhLayer(x)
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*
*/
message AtanhLayerParams {}
/*
* A layer that raises each element in first tensor to the power of
* corresponding element in the second tensor.
* Supports conventional numpy-like broadcasting.
*
* .. code::
*
* y = PowBroadcastableLayer(x)
*
* Requires 2 inputs and produces 1 output.
*
* Input
* - First N-Dimensional tensor
* - Second N-Dimensional tensor
*
* Output
* An N-Dimensional tensor with the broadcast shape.
*
*/
message PowBroadcastableLayerParams {}
/*
* A layer that computes the exponential of all elements in the input tensor,
* with the base 2.
*
*
* .. code::
*
* y = Exp2Layer(x)
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*
*/
message Exp2LayerParams {}
/*
* A layer that returns a tensor containing the indices of all non-zero
* elements of input tensor.
* It is similar in functionality to the numpy.where method with 1 input.
*
* Requires 1 input and produces 1 output.
* Output is of rank 2, of shape (N,R),
* where N is the number of non-zero elements in the input and R is the rank of
* the input.
*
* Output contains indices represented in the multi-index form
*
* e.g.:
* input {shape = (4,)}:
* [0 1 0 2]
* output {shape = (2,1)}:
* [1]
* [3]
*
*
* input {shape = (3, 3)}:
* [1 2 1]
* [0 2 2]
* [2 1 0]
* output {shape = (7,1)}:
* [0. 0.]
* [0. 1.]
* [0. 2.]
* [1. 1.]
* [1. 2.]
* [2. 0.]
* [2. 1.]
*
*/
message WhereNonZeroLayerParams {}
/*
* A layer that copies a tensor setting everything outside a central band in
* each inner-most matrix to zero.
*
* Requires 1 input and produces 1 output.
*
* Parameters for matrix_band_part layer
* band(m, n) = (num_lower < 0 || (m-n) <= num_lower) && (num_upper < 0 || (n-m)
* <= num_upper). output[i, j, k, ..., m, n] = band(m, n) * input[i, j, k, ...,
* m, n]
*
*
* Output shape is same as the input shape.
* Rank of the input must be at least 2.
* For rank higher than 2, the last 2 dimensions are treated as the matrix,
* while the rest are treated as batch.
*/
message MatrixBandPartLayerParams {
int64 numLower = 1;
int64 numUpper = 2;
}
/*
* A layer that copies a tensor setting everything outside upper triangular to
* zero.
*
* Requires 1 input and produces 1 output.
*
* Output shape is same as the input shape.
* Rank of the input must be at least 2.
* For rank higher than 2, the last 2 dimensions are treated as the matrix,
* while the rest are treated as batch.
*/
message UpperTriangularLayerParams {
int64 k = 1; // Diagonal below which to zero elements. k = 0 (the default) is
// the main diagonal, k < 0 is below it and k > 0 is above
}
/*
* A layer that copies a tensor setting everything outside lower triangular to
* zero.
*
* Requires 1 input and produces 1 output.
*
* Output shape is same as the input shape.
* Rank of the input must be at least 2.
* For rank higher than 2, the last 2 dimensions are treated as the matrix,
* while the rest are treated as batch.
*/
message LowerTriangularLayerParams {
int64 k = 1; // Diagonal above which to zero elements. k = 0 (the default) is
// the main diagonal, k < 0 is below it and k > 0 is above
}
/*
*
* A layer that broadcasts a tensor to a new shape.
*
* Requires 2 inputs and produces 1 output.
*
* First input is broadcast to produce the output, while the second input is
* only used to determine the shape of the output. Values of second input are
* not used.
*
* Output is a tensor with the same shape as the second input.
*
*/
message BroadcastToLikeLayerParams {}
/*
*
* A layer that broadcasts a tensor to a new shape.
*
* Requires 1 input and produces 1 output.
*
* Output tensor is the broadcasted version of the input and has shape as
* specified in the parameter "targetShape".
*/
message BroadcastToStaticLayerParams {
repeated uint64 targetShape = 1;
}
/*
*
* A layer that broadcasts a tensor to a new shape.
*
* Requires 2 inputs and produces 1 output.
*
* First input is the one that is broadcasted to produce the output.
* Second input is a rank 1 tensor specifying the shape of the output.
* Output tensor has shape as specified by the values in the 2nd input tensor.
*/
message BroadcastToDynamicLayerParams {}
/*
* A layer that performs element-wise addition operation with broadcast support.
*
* Requires 2 inputs and produces 1 output.
*/
message AddBroadcastableLayerParams {}
/*
* A layer that performs element-wise maximum operation with broadcast support.
*
* Requires 2 inputs and produces 1 output.
*/
message MaxBroadcastableLayerParams {}
/*
* A layer that performs element-wise minimum operation with broadcast support.
*
* Requires 2 inputs and produces 1 output.
*/
message MinBroadcastableLayerParams {}
/*
* A layer that performs element-wise modular operation with broadcast support.
*
* Requires 2 inputs and produces 1 output.
*/
message ModBroadcastableLayerParams {}
/*
* A layer that performs element-wise floor division operation with broadcast
* support.
*
* Requires 2 inputs and produces 1 output.
*/
message FloorDivBroadcastableLayerParams {}
/*
* A layer that performs element-wise subtract operation with broadcast support.
*
* Requires 2 inputs and produces 1 output.
*/
message SubtractBroadcastableLayerParams {}
/*
* A layer that performs element-wise multiply operation with broadcast support.
*
* Requires 2 inputs and produces 1 output.
*/
message MultiplyBroadcastableLayerParams {}
/*
* A layer that performs element-wise division operation with broadcast support.
*
* Requires 2 inputs and produces 1 output.
*/
message DivideBroadcastableLayerParams {}
/*
* Gather layer that gathers elements from the first input, along a specified
* axis, at indices specified in the second input. It is similar in
* functionality to the numpy.take method.
*
* Requires 2 inputs and produces 1 output.
*
* Given two inputs, 'data' and 'indices', gather the slices of 'data'
* and store into output.
* e.g.
* for i in [0, length(indices) - 1]
* output[i] = data[indices[i]] (1-D case, axis=0)
*
* if axis = 0:
* for each vector index (i,...,j)
* output[i,...,j,:,..,:] = data[indices[i,...,j],:,..,:]
*
* output.rank = (data.rank - 1) + indices.rank
*
* Negative indices and negative axis are supported.
*
* e.g:
*
* data shape = (2, 3)
* indices shape = (6, 8)
* axis = 0
* output shape = (6, 8) + (3,) = (6, 8, 3)
*
* data shape = (2, 3, 5)
* indices shape = (6, 8)
* axis = 1
* output shape = (2,) + (6, 8) + (5,) = (2, 6, 8, 5)
*
*/
message GatherLayerParams {
int64 axis = 1;
}
/*
* Scatter accumulation mode.
*/
enum ScatterMode {
SCATTER_UPDATE = 0;
SCATTER_ADD = 1; // add
SCATTER_SUB = 2; // subtract
SCATTER_MUL = 3; // multiply
SCATTER_DIV = 4; // divide
SCATTER_MAX = 5; // maximum
SCATTER_MIN = 6; // minimum
}
/*
* A layer that scatters data into a new tensor according to indices from the
* input. This is the inverse operation of Gather.
*
* Requires 3 inputs and produces 1 output.
*
* Output is initialized with the first input.
* Then updated with the values in the third input, at indices specified by the
* second input.
*
* An example when axis=0:
* Given three inputs, in order, "container", "indices", "updates", where
*
* - "container" is a rank R+1 tensor of shape [D_0, D_1, ..., D_R], which
* contains D_0 number of tensors, each with shape [D_1, ..., D_R].
*
* - "indices" is a rank 1 tensor with shape [N], where N is the number of
* updates. The values in this tensor must be in the range [0, D_0 - 1].
* (negative indexing is supported)
*
* - "updates" is a rank R+1 tensor with shape [N, D_1, ..., D_R], which
* represents a total number of N tensors, each of shape [D_1, ..., D_R].
*
* The effect of this operation is as follows:
*
* output = container;
* For each i in 0, ..., N - 1
* output[indices[i], :, ..., :] = updates[i, :, ..., :] // if mode ==
* "SCATTER_UPDATE"
*
* or
* For each i in 0, ..., N - 1
* output[indices[i], :, ..., :] += updates[i, :, ..., :] // if mode ==
* "SCATTER_ADD"
*
* etc
*
* When "indices" is a tensor of rank greater than 1, the equation becomes (for
* axis=0): For each vector index (i,...,j) output[indices[i,...,j],...] -=
* updates[i,...,j,...] // if mode == "SCATTER_SUB"
*
*
* The output has the same shape as the first input.
* "indices" input must have rank less than or equal to the "updates" input and
* its shape must be a subset of the the shape of the "updates" input.
*
* e.g:
*
* container shape = (4, 3)
* indices shape = (5, 2, 3)
* updates shape = (4, 5, 2, 3)
* axis = 1
* output shape = (4, 3)
*
* container shape = (4, 4, 3)
* indices shape = (6,)
* updates shape = (4, 6, 3)
* axis = -2
* output shape = (4, 4, 3)
*
* container shape = (5,)
* indices shape = (5, 7, 5, 6)
* updates shape = (5, 7, 5, 6)
* axis = -1
* output shape = (5,)
*/
message ScatterLayerParams {
int64 axis = 1;
ScatterMode mode = 2; // mode of accumulation.
}
/*
* A layer that gathers elements from the first input, 'params', at the
* multi-indices specified by the second input, 'indices'.
*
* Requires 2 inputs and produces 1 output.
*
* 'params' = input[0], 'indices' = input[1]
*
* 'indices' is a rank K+1 tensor of shape [I_0, I_1, .., I_(K-1), I_K] which is
* viewed as a collection of indices of (I_0 * I_1 * ... * I_(K-1)) points in
* the I_K dimensional space. For instance, the multi-index of the first point
* is indices[0,0,...,0,:].
*
* Here is how the output is constructed:
*
* for i = 0,1,...,(I_0-1)
* ...
* for j = 0,1,....,(I_(K-1)-1)
* output[i,....,j,:,:,..,:] = params[indices[i,...,j,:], :,:,..,:]
*
* Hence, output shape is [I_0, I_1,...,I(K-1)] + params.shape[I_K:]
*
* output.rank = indices.rank - 1 + params.rank - indices.shape[-1]
*
* e.g:
*
* input[0] shape = (4, 2, 3, 4)
* input[1] shape = (6, 2)
* output shape = (6,) + (3, 4) = (6, 3, 4)
*
* input[0] shape = (3, 3, 3, 4, 7)
* input[1] shape = (3, 5)
* output shape = (3,) + () = (3,)
*
* input[0] shape = (5, 3, 2, 5)
* input[1] shape = (2, 7, 3, 2)
* output shape = (2, 7, 3) + (2, 5) = (2, 7, 3, 2, 5)
*
*/
message GatherNDLayerParams {}
/*
* A layer that scatters data into a new tensor according to multi-indices from
* the input. This is the inverse operation of GatherND.
*
* Requires 3 inputs and produces 1 output.
* 3 inputs, in order are denoted as "container", "indices", "updates".
*
* 'indices' is a rank K+1 tensor of shape [I_0, I_1, .., I_(K-1), I_K] which is
* viewed as a collection of indices of (I_0 * I_1 * ... * I_(K-1)) points in
* the I_K dimensional space. For instance, the multi-index of the first point
* is indices[0,0,...,0,:].
*
* container.rank >= I_K
* updates.rank = K + (container.rank - I_K)
* shape of 'updates' = [I_0, I_1,...,I(K-1)] + container.shape[I_K:]
*
* output = container
* For each vector index (i,...,j) s.t. 0<=i<I_0,..., 0<=j<I_K
* output[indices[i,...,j,:], :,:,..,:] = updates[i,....,j,:,:,..,:] // if
* mode == "SCATTER_UPDATE"
*
* The output has the same shape as the first input.
*
* e.g:
*
* container shape = (3, 2)
* indices shape = (4, 2)
* updates shape = (4,)
* output shape = (3, 2)
*
* container shape = (7, 6)
* indices shape = (4, 7, 2, 5, 1)
* updates shape = (4, 7, 2, 5, 6)
* output shape = (7, 6)
*
*/
message ScatterNDLayerParams {
ScatterMode mode = 1; // mode of accumulation.
}
/*
* Gather layer that gathers elements from the first input, along a specified
* axis, at indices specified in the second input. It is similar in
* functionality to the numpy.take_along_axis method.
*
* Requires 2 inputs and produces 1 output.
*
* Given two inputs, 'data' and 'indices', gather the slices of 'data'
* and store into output.
*
* Both inputs and output have the same rank.
* Output shape is same as the shape of 'indices'
* Shapes of 'indices' and 'data' match, except at the 'axis' dimension.
*
* This operation performs the following operation for axis=0:
* for each vector index (i,j,....,k)
* output[i,j,....,k] = data[index[i,j,....,k],j,....,k]
*
* Negative indices and negative axis are supported.
*
* e.g:
*
* data shape = (4, 4, 7)
* indices shape = (4, 5, 7)
* axis = 1
* output shape = (4, 5, 7)
*
*/
message GatherAlongAxisLayerParams {
int64 axis = 1;
}
/*
* A layer that scatters data into a new tensor according to indices from
* the input along the given axis into the output tensor.
* This is the inverse operation of GatherAlongAxis.
* It is similar in functionality to the numpy.put_along_axis method.
*
* Requires 3 inputs and produces 1 output.
* 3 inputs, in order are denoted as "container", "indices", "updates".
*
* All inputs and output have the same rank.
* Output shape is same as the shape of 'container'
* Shapes of 'indices' and 'updates' match, which is same as the shape of
* 'container' except at the 'axis' dimension.
*
* Negative indices and negative axis are supported.
*
* This operation performs the following operation for axis=0:
* output = container
* for each vector index (i,j,....,k)
* output[index[i,j,....,k],j,....,k] = updates[i,j,....,k]
*
* e.g.:
*
* container shape = (2, 5, 6)
* indices shape = (2, 2, 6)
* updates shape = (2, 2, 6)
* axis = -2
* output shape = (2, 5, 6)
*
*/
message ScatterAlongAxisLayerParams {
int64 axis = 1;
ScatterMode mode = 2; // mode of accumulation.
}
/*
* A layer that stacks the input tensors along the given axis.
* It is similar in functionality to the numpy.stack method.
*
* Requires at least 2 inputs and produces 1 output.
* All inputs must have the same shape.
* Rank of the output is 1 greater than the rank of the inputs.
*
* Negative indexing is supported for the "axis" parameter.
*
* e.g.:
*
* input shape = (2, 4, 2)
* number of inputs = 5
* axis = 3
* output shape = (2, 4, 2, 5)
*
* input shape = (2, 4, 2)
* number of inputs = 5
* axis = -2
* output shape = (2, 4, 5, 2)
*/
message StackLayerParams {
int64 axis = 1;
}
/*
* A layer that reshapes a tensor that does not alter the rank of the input.
* Order of the data is left unchanged.
*
* Requires 1 input and produces 1 output.
*
* e.g:
*
* input shape = (20,10)
* targetShape = (5,-1)
* output shape = (5,40)
*
* input shape = (20,10,5)
* targetShape = (0,2,25)
* output shape = (20,2,25)
*
* input shape = (10,3,5)
* targetShape = (25,0,-1)
* output shape = (25,3,2)
*/
message RankPreservingReshapeLayerParams {
/*
* Length of this field must be same as the input/output rank.
* It can have 0's, in which case the corresponding input dimension is kept
* intact. At most one element can be -1, in which case the output dimension
* is calculated from rest of the shape.
*/
repeated int64 targetShape = 1;
}
/*
* Constant padding layer.
* Pad the input array with a constant value, either along a single given axis
* or along a set of axes.
*
* Requires 1 or 2 inputs and produces 1 output.
* The amount of padding can be either set as a parameter ("padAmounts") or
* provided as a second input.
*
* Output rank is same as the rank of the first input.
*
* when "padToGivenOutputSizeMode" is False:
*
* output_shape[i] = input_shape[i] + padAmounts[2*i] + padAmounts[2*i+1],
* i=0,...,rank-1
*
* Examples:
*
* input shape = (20,10)
* padAmounts = [0,1,4,0]
* output shape = (21,14)
*
* input shape = (20,10,5)
* padAmounts = [0,0,3,4,0,9]
* output shape = (20,17,14)
*
*
* when "padToGivenOutputSizeMode" is True
*
* output_shape[i] = max(input_shape[i], max(padAmounts[2*i] +
* padAmounts[2*i+1])), i=0,...,rank-1
*
* input shape = (20,10)
* padAmounts = [0,21,14,0]
* output shape = (21,14)
*
* input shape = (20,10,5)
* padAmounts = [0,0,17,0,0,14]
* output shape = (20,17,14)
*/
message ConstantPaddingLayerParams {
/*
* The value to be used for padding.
*/
float value = 1;
/*
* Length of this repeated field must be twice the rank of the first input.
* 2*i-th and (2*i+1)-th values represent the amount of padding to be applied
* to the the i-th input dimension, "before" and "after" the input values,
* respectively.
*/
repeated uint64 padAmounts = 2;
/*
* When this is True, positive values in "padAmounts" are equivalent to the
* output shape. In that case only one of padAmounts[2*i] and
* padAmounts[2*i+1] can be non zero, for i=0,..,rank-1.
*/
bool padToGivenOutputSizeMode = 3;
}
/*
* A layer that returns a tensor filled with values from the normal
* distribution.
*
* Requires 1 input and produces 1 output.
*
* Parameters
* seed: seed used for the normal distribution.
* mean: mean of the normal distribution.
* stdDev: standard deviation of the normal distribution.
*
* Input
* An N-Dimensional tensor, whose values are ignored. Only the shape is used
* to infer the shape of the output.
*
* Output
* An N-Dimensional tensor with the same shape as the input tensor.
*
*/
message RandomNormalLikeLayerParams {
int64 seed = 1;
float mean = 2;
float stdDev = 3;
}
/*
* A layer that returns a tensor filled with values from the normal
* distribution.
*
* Requires no input and produces 1 output.
*
* Parameters
* seed: seed used for the normal distribution.
* mean: mean of the normal distribution.
* stdDev: standard deviation of the normal distribution.
* outputShape: shape of the output tensor.
*
* Output
* An N-Dimensional tensor of shape "outputShape".
*
*/
message RandomNormalStaticLayerParams {
int64 seed = 1;
float mean = 2;
float stdDev = 3;
repeated uint64 outputShape = 4;
}
/*
* A layer that returns a tensor filled with values from the normal
* distribution.
*
* Requires 1 input and produces 1 output.
*
* Parameters:
* seed: seed used for the normal distribution.
* mean: mean of the normal distribution.
* stdDev: standard deviation of the normal distribution.
*
* Input
* A rank 1 tensor specifying the shape of the output
*
* Output
* An N-Dimensional tensor with the shape specified by the values in the
* input tensor.
*/
message RandomNormalDynamicLayerParams {
int64 seed = 1;
float mean = 2;
float stdDev = 3;
}
/*
* A layer that returns a tensor filled with values from the uniform
* distribution.
*
* Requires 1 input and produces 1 output.
*
* Parameters
* seed: seed used for the uniform distribution.
* minVal: lower bound on the range of random values for the uniform
* distribution. maxVal: upper bound on the range of random values for the
* uniform distribution.
*
* Input
* An N-Dimensional tensor, whose values are ignored. Only the shape is used
* to infer the shape of the output.
*
* Output
* An N-Dimensional tensor with the same shape as the input tensor.
*
*/
message RandomUniformLikeLayerParams {
int64 seed = 1;
float minVal = 2;
float maxVal = 3;
}
/*
* A layer that returns a tensor filled with values from the uniform
* distribution.
*
* Requires no input and produces 1 output.
*
* Parameters
* seed: seed used for the uniform distribution.
* minVal: lower bound on the range of random values for the uniform
* distribution. maxVal: upper bound on the range of random values for the
* uniform distribution. outputShape: shape of the output tensor.
*
* Output
* An N-Dimensional tensor of shape "outputShape".
*
*/
message RandomUniformStaticLayerParams {
int64 seed = 1;
float minVal = 2;
float maxVal = 3;
repeated uint64 outputShape = 4;
}
/*
* A layer that returns a tensor filled with values from the uniform
* distribution.
*
* Requires 1 input and produces 1 output.
*
* Parameters:
* seed: seed used for the uniform distribution.
* minVal: lower bound on the range of random values for the uniform
* distribution. maxVal: upper bound on the range of random values for the
* uniform distribution.
*
* Input
* A rank 1 tensor specifying the shape of the output
*
* Output
* An N-Dimensional tensor with the shape specified by the values in the
* input tensor.
*
*/
message RandomUniformDynamicLayerParams {
int64 seed = 1;
float minVal = 2;
float maxVal = 3;
}
/*
* A layer that returns a tensor filled with values from the Bernoulli
* distribution.
*
* Requires 1 input and produces 1 output.
*
* Parameters
* seed: seed used for the Bernoulli distribution.
* prob: probability of a 1 event.
*
* Input
* An N-Dimensional tensor, whose values are ignored. Only the shape is used
* to infer the shape of the output.
*
* Output
* An N-Dimensional tensor with the same shape as the input tensor.
*
*/
message RandomBernoulliLikeLayerParams {
int64 seed = 1;
float prob = 2;
}
/*
* A layer that returns a tensor filled with values from the Bernoulli
* distribution.
*
* Requires no input and produces 1 output.
*
* Parameters
* seed: seed used for the Bernoulli distribution.
* prob: probability of a 1 event.
* outputShape: shape of the output tensor.
*
* Output
* An N-Dimensional tensor of shape "outputShape".
*/
message RandomBernoulliStaticLayerParams {
int64 seed = 1;
float prob = 2;
repeated uint64 outputShape = 3;
}
/*
* A layer that returns a tensor filled with values from the Bernoulli
* distribution.
*
* Requires 1 input and produces 1 output.
*
* Parameters:
* seed: seed used for the Bernoulli distribution.
* prob: probability of a 1 event.
*
* Input
* A rank 1 tensor specifying the shape of the output
*
* Output
* An N-Dimensional tensor with the shape specified by the values in the
* input tensor.
*/
message RandomBernoulliDynamicLayerParams {
int64 seed = 1;
float prob = 2;
}
/*
* A layer that returns a tensor of the specified shape filled with values from
* the categorical distribution.
*
* Requires 1 input and produces 1 output.
*
* Parameter:
* seed: seed used for the categorical distribution.
* numSamples: number of samples to draw.
* isLogits: true if the inputs are logits, false if the inputs are
* probabilities. eps: default value is 1e-10. temperature: default value
* is 1.0.
*
* Input tensor shape = [D_1, D_2, ... , D_(R-1), D_R] (Rank = R)
* Then the shape of the output is [D_1, D_2, ... , D_(R-1), numSamples] (Rank =
* R)
*
*/
message CategoricalDistributionLayerParams {
int64 seed = 1;
int64 numSamples = 2;
bool isLogits = 3;
float eps = 4;
float temperature = 5;
}
/*
* A layer that performs reduction with L1 normalization operation.
*
* Negative indexing is supported.
* Requires 1 input and produces 1 output.
*
* Parameters:
* axes: dimensions along which to perform reduction
* keepDims: if True, keep the reduced dimensions (value will be 1),
* otherwise, reduced dimensions are squeezed reduceAll: ignore the "axes"
* parameter, perform reduction along all axes
*
*/
message ReduceL1LayerParams {
repeated int64 axes = 1;
bool keepDims = 2;
bool reduceAll = 3;
}
/*
* A layer that performs reduction with L2 normalization operation.
*
* Negative indexing is supported.
* Requires 1 input and produces 1 output.
*
* Parameters:
* axes: dimensions along which to perform reduction
* keepDims: if True, keep the reduced dimensions (value will be 1),
* otherwise, reduced dimensions are squeezed reduceAll: ignore the "axes"
* parameter, perform reduction along all axes
*
*/
message ReduceL2LayerParams {
repeated int64 axes = 1;
bool keepDims = 2;
bool reduceAll = 3;
}
/*
* A layer that performs reduction with max operation.
*
* Negative indexing is supported.
* Requires 1 input and produces 1 output.
*
* Parameters:
* axes: dimensions along which to perform reduction
* keepDims: if True, keep the reduced dimensions (value will be 1),
* otherwise, reduced dimensions are squeezed reduceAll: ignore the "axes"
* parameter, perform reduction along all axes
*
*/
message ReduceMaxLayerParams {
repeated int64 axes = 1;
bool keepDims = 2;
bool reduceAll = 3;
}
/*
* A layer that performs reduction with min operation.
*
* Negative indexing is supported.
* Requires 1 input and produces 1 output.
*
* Parameters:
* axes: dimensions along which to perform reduction
* keepDims: if True, keep the reduced dimensions (value will be 1),
* otherwise, reduced dimensions are squeezed reduceAll: ignore the "axes"
* parameter, perform reduction along all axes
*
*/
message ReduceMinLayerParams {
repeated int64 axes = 1;
bool keepDims = 2;
bool reduceAll = 3;
}
/*
* A layer that performs reduction with sum operation.
*
* Negative indexing is supported.
* Requires 1 input and produces 1 output.
*
* Parameters:
* axes: dimensions along which to perform reduction
* keepDims: if True, keep the reduced dimensions (value will be 1),
* otherwise, reduced dimensions are squeezed reduceAll: ignore the "axes"
* parameter, perform reduction along all axes
*
*/
message ReduceSumLayerParams {
repeated int64 axes = 1;
bool keepDims = 2;
bool reduceAll = 3;
}
/*
* A layer that performs reduction with prod operation.
*
* Negative indexing is supported.
* Requires 1 input and produces 1 output.
*
* Parameters:
* axes: dimensions along which to perform reduction
* keepDims: if True, keep the reduced dimensions (value will be 1),
* otherwise, reduced dimensions are squeezed reduceAll: ignore the "axes"
* parameter, perform reduction along all axes
*
*/
message ReduceProdLayerParams {
repeated int64 axes = 1;
bool keepDims = 2;
bool reduceAll = 3;
}
/*
* A layer that performs reduction with mean operation.
*
* Negative indexing is supported.
* Requires 1 input and produces 1 output.
*
* Parameters:
* axes: dimensions along which to perform reduction
* keepDims: if True, keep the reduced dimensions (value will be 1),
* otherwise, reduced dimensions are squeezed reduceAll: ignore the "axes"
* parameter, perform reduction along all axes
*
*/
message ReduceMeanLayerParams {
repeated int64 axes = 1;
bool keepDims = 2;
bool reduceAll = 3;
}
/*
* A layer that performs reduction with logSum operation.
*
* Negative indexing is supported.
* Requires 1 input and produces 1 output.
*
* Parameters:
* axes: dimensions along which to perform reduction
* keepDims: if True, keep the reduced dimensions (value will be 1),
* otherwise, reduced dimensions are squeezed reduceAll: ignore the "axes"
* parameter, perform reduction along all axes
*
*/
message ReduceLogSumLayerParams {
repeated int64 axes = 1;
bool keepDims = 2;
bool reduceAll = 3;
}
/*
* A layer that performs reduction with logSumExp operation.
*
* Negative indexing is supported.
* Requires 1 input and produces 1 output.
*
* Parameters:
* axes: dimensions along which to perform reduction
* keepDims: if True, keep the reduced dimensions (value will be 1),
* otherwise, reduced dimensions are squeezed reduceAll: ignore the "axes"
* parameter, perform reduction along all axes
*
*/
message ReduceSumSquareLayerParams {
repeated int64 axes = 1;
bool keepDims = 2;
bool reduceAll = 3;
}
/*
* A layer that performs reduction with logSumExp operation.
*
* Negative indexing is supported.
* Requires 1 input and produces 1 output.
*
* Parameters:
* axes: dimensions along which to perform reduction
* keepDims: if True, keep the reduced dimensions (value will be 1),
* otherwise, reduced dimensions are squeezed reduceAll: ignore the "axes"
* parameter, perform reduction along all axes
*
*/
message ReduceLogSumExpLayerParams {
repeated int64 axes = 1;
bool keepDims = 2;
bool reduceAll = 3;
}
/*
* A layer that increases the rank of the input tensor by adding unit
* dimensions.
*
* Requires 1 input and produces 1 output.
*
* e.g.:
*
* input shape = (10,5)
* axes = (0,1)
* output shape = (1,1,10,5)
*
* input shape = (10,5)
* axes = (0,2)
* output shape = (1,10,1,5)
*
* input shape = (10,5)
* axes = (-2,-1)
* output shape = (10,5,1,1)
*
*/
message ExpandDimsLayerParams {
/*
* Axis values provided here get dimension 1 in the output tensor.
* Negative indexing is supported.
*/
repeated int64 axes = 1;
}
/*
* A layer that flattens the input tensor into a 2-dimensional matrix.
*
* Requires 1 input and produces 1 output.
* Output tensor is always rank 2.
*
* First dimension of output is the product of all the dimensions in
* input[:axis] ("axis" is exclusive) Second dimension of output is the product
* of all the dimensions in input[axis:] ("axis" is inclusive)
*
* e.g.:
* input shape: (3,)
* axis: -1
* output shape: (1, 3)
*
* input shape: (3,)
* axis: 1
* output shape: (3, 1)
*
* input shape: (4, 3)
* axis: -1
* output shape: (4, 3)
*
* input shape: (5, 2)
* axis: 0
* output shape: (1, 10)
*
* input shape: (5, 5, 3)
* axis: -2
* output shape: (5, 15)
*
* input shape: (2, 3, 2)
* axis: -1
* output shape: (6, 2)
*
*/
message FlattenTo2DLayerParams {
int64 axis = 1;
}
/*
* A layer that reshapes a tensor.
*
* Requires 1 input and produces 1 output.
*
* Output tensor is the reshaped version of the input and has shape as specified
* in the parameter "targetShape".
*
*/
message ReshapeStaticLayerParams {
repeated int64 targetShape = 1;
}
/*
* A layer that reshapes a tensor.
*
* Requires 2 inputs and produces 1 output.
*
* First input is reshaped to produce the output, while the second input is only
* used to determine the shape of the output. Values of the second input are not
* used.
*
* Output is a tensor with the same shape as the second input.
*
*/
message ReshapeLikeLayerParams {}
/*
* A layer that reshapes a tensor.
*
* Requires 2 inputs and produces 1 output.
*
* First input is the one that is reshaped to produce the output.
* Second input is a rank 1 tensor specifying the shape of the output.
* Output tensor has shape as specified by the values in the 2nd input tensor.
*/
message ReshapeDynamicLayerParams {}
/*
* A layer that decreases the rank of the input tensor by removing unit
* dimensions.
*
* Requires 1 input and produces 1 output.
*
* Output rank is one less than input rank, if input rank is more than 1.
* If input rank is 1, output rank is also 1.
*
* e.g.:
*
* input shape = (1,1,10,5)
* axes = (0,1)
* output shape = (10,5)
*
* input shape = (1,10,5,1)
* axes = (0,3)
* output shape = (10,5)
*
* input shape = (10,5,1,1)
* axes = (-2,-1)
* output shape = (10,5)
*
* input shape = (1,)
* axes = (0)
* output shape = (1,)
*
*/
message SqueezeLayerParams {
/*
* Axis values provided here get removed from the input tensor.
* Negative indexing is supported.
*/
repeated int64 axes = 1;
bool squeezeAll = 2; // if true squeeze all dimensions that are 1.
}
/*
* A layer that returns top K (or bottom K) values and the corresponding indices
* of the input along a given axis.
*
* Requires 1 or 2 inputs and produces 2 outputs.
*
* The second input is the value of the K, and is optional.
* If there is only one input, value of K that is specified in the layer
* parameter is used.
*
* Both outputs have the same rank as the first input.
* Second input must correspond to a scalar tensor.
*
* e.g.:
*
* first input's shape = (45, 34, 10, 5)
* axis = 1
* output shape, for both outputs = (45, K, 10, 5)
*
*/
message TopKLayerParams {
int64 axis = 1; // negative indexing is supported
uint64 K = 2; // is ignored if a second input is present.
bool useBottomK =
3; // if true, bottom K (values, indices) are returned instead
}
/*
* A layer that returns the indices of the maximum value along a specified axis
* in a tensor.
*
* Requires 1 input and produces 1 output. Negative indexing is supported.
*
* Output has the same rank as the input if "removeDim" is False (default).
* Output has rank one less than the input if "removeDim" is True and input rank
* is more than 1.
*
* e.g.:
*
* input shape = (45, 34, 10, 5)
* axis = -2
* output shape = (45, 1, 10, 5), if removeDim = False (default)
* output shape = (45, 10, 5), if removeDim = True
*
* input shape = (5,)
* axis = 0
* output shape = (1,), if removeDim = False or True
*
*/
message ArgMaxLayerParams {
int64 axis = 1;
bool removeDim = 2;
}
/*
* A layer that returns the indices of the minimum value along a specified axis
* in a tensor.
*
* Requires 1 input and produces 1 output. Negative indexing is supported.
*
* Output has the same rank as the input if "removeDim" is False (default).
* Output has rank one less than the input if "removeDim" is True and input rank
* is more than 1.
*
* e.g.:
*
* input shape = (45, 34, 10, 5)
* axis = -2
* output shape = (45, 1, 10, 5), if removeDim = False (default)
* output shape = (45, 10, 5), if removeDim = True
*
* input shape = (5,)
* axis = 0
* output shape = (1,), if removeDim = False or True
*
*/
message ArgMinLayerParams {
int64 axis = 1;
bool removeDim = 2;
}
/*
* A layer layer that splits the input tensor into multiple output tensors,
* along the specified axis.
*
* The layer either uniformly splits the input tensor into ``num_splits``
* tensors, or splits according to the given split sizes in ``split_sizes``.
* Supports unequal splits and negative indexing.
*
* Requires 1 input and produces at least 2 outputs.
* Rank of all the outputs is same as that of the input.
*
* If parameter "splitSizes" is provided, value of the parameter "numSplits" is
* ignored, since in that case "numSplits" is automatically inferred to be the
* length of "splitSizes".
*
*
* e.g.:
* input shape: (5, 3, 4)
* axis = -3, split_sizes = [3, 2]
* output shape: (3, 3, 4)
* output shape: (2, 3, 4)
*/
message SplitNDLayerParams {
int64 axis = 1;
uint64 numSplits = 2;
repeated uint64 splitSizes = 3;
}
/*
* A layer that performs element-wise ceil operation on the input tensor that
* rounds the value to the smallest integer not less than x.
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*
*/
message CeilLayerParams {}
/*
* A layer that performs element-wise round operation on the input tensor
* that rounds the value to the nearest integer.
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*
*/
message RoundLayerParams {}
/*
* A layer that performs element-wise floor operation on the input tensor
* that rounds the value to the largest integer not greater than x.
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*
*/
message FloorLayerParams {}
/*
* A layer that performs element-wise sign operation (+1 for positive values,
* -1 for negative values, 0 for zeros).
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*
*/
message SignLayerParams {}
/*
* A layer that performs element-wise clip operation. Clip the values in the
* input tensor to the threshold values [min_value, max_value].
*
* Requires 1 input and produces 1 output.
*
* Parameter minVal: the minimum threshold.
* Parameter maxVal: the maximum threshold.
*
* output = min(max(input, minVal), maxVal)
*
* Output shape is same as the input.
*/
message ClipLayerParams {
float minVal = 1;
float maxVal = 2;
}
/*
* A layer that extracts a slice of size ``(end - begin) / stride``
* from the given input tensor.
* Support negative indexing and negative strides.
*
* Requires 1 input and produces 1 output.
* Output rank is same as the input rank.
*
* Value of beginIds, beginMasks, endIds, endMasks, strides are required
* parameters. Lengths of all the parameters must equal the rank of the input.
*
* i-th element of "beginIds" is ignored and assumed to be 0 if the i-th element
* of "beginMasks" is True
*
* i-th element of "endIds" is ignored and assumed to be -1 if the i-th element
* of "endMasks" is True
*
* e.g.:
* if i-th element of "squeezeMasks" is set to True, only beginIds[i] would be
* sliced out, and all other masks and inputs are ignored.
*
* e.g. (without squeezeMasks):
* input shape: (5, 5, 5)
* beginIds: [1, 2, 3]
* beginMasks: [True, False, True]
* endIds: [3, -3, 2]
* endMasks: [False, True, True]
* strides: [2, 2, 2]
* SqueezeMasks: [False, False, False]
* output shape: (2, 2, 3)
* This is equivalent to input[:3:2, 2::2, ::2]
*
* e.g. (with squeezeMasks):
* input shape: (5, 5, 5)
* beginIds: [1, 2, 3]
* beginMasks: [True, False, True]
* endIds: [3, -3, 2]
* endMasks: [False, True, True]
* strides: [2, 2, 2]
* SqueezeMasks: [False, True, False]
* output shape: (2, 3)
* This is equivalent to input[:3:2, 2, ::2]
*
*/
message SliceStaticLayerParams {
repeated int64 beginIds = 1;
repeated bool beginMasks = 2;
repeated int64 endIds = 3;
repeated bool endMasks = 4;
repeated int64 strides = 5;
repeated bool squeezeMasks = 6;
}
/*
* A layer that extracts a slice of size ``(end - begin) / stride``
* from the given input tensor.
* Support negative indexing and negative strides.
* See "SliceStaticLayerParams" for the description and an example of the
* functionality of the layer.
*
* Requires 2 to 7 inputs and produces 1 output.
* Rank of the output is same as the rank of the first input unless squeezeMask
* is set.
*
* Value of beginIds, beginMasks, endIds, endMasks, strides can be passed in
* either as dynamic inputs or as static parameters. Lengths of all the
* parameters or inputs from 2-6 must equal the rank of the first input.
*
* The 2nd input represents the "beginIds".
* The 3rd input, if present, corresponds to "endIds". In this case the value of
* the "endIds" parameter is ignored. The 4th input, if present, corresponds to
* "strides". In this case the value of the "strides" parameter is ignored. The
* 5th input, if present, corresponds to "beginMasks". In this case the value of
* the "beginMasks" parameter is ignored. The 6th input, if present, corresponds
* to "endMasks". In this case the value of the "endMasks" parameter is ignored.
* The 7th input, if present, corresponds to "squeezeMasks". In this case the
* value of the "squeezeMasks" parameter is ignored.
*
*/
message SliceDynamicLayerParams {
repeated bool beginMasks = 2;
repeated int64 endIds = 3;
repeated bool endMasks = 4;
repeated int64 strides = 5;
repeated bool squeezeMasks = 6;
}
/*
* A layer that constructs a tensor by repeating the input tensor multiple
* number of times.
*
* Requires 1 or 2 inputs and produces 1 output.
* Output rank is same as the input rank.
*
* If two inputs are provided, second input is used as "reps"
* and "reps" parameter is ignored.
*
* If only one input is provided,
* length of the "reps" parameter must be at least 1 and
* not greater than the rank of the input.
* If it is less than the input rank, it is made equal to the input rank by
* prepending 1's to it.
*
* e.g.:
*
* input shape = (2, 4, 2)
* reps = (1, 2, 6)
* output shape = (2, 8, 12)
*
* input shape = (2, 4, 2)
* reps = (6)
* reps after prepending ones = (1, 1, 6)
* output shape = (2, 4, 12)
*
* input shape = (2, 4, 2)
* second input = [1, 2, 6] -> shape: (3,)
* reps = N/A [Ignored]
* output shape = (2, 8, 12)
*
*/
message TileLayerParams {
repeated uint64 reps = 1;
}
/*
* A layer that returns the shape of an input tensor.
*
* Requires 1 input and produces 1 output.
*
* Input: a tensor.
* Output: a vector of length R, where R is the rank of the input tensor
* Output is always a rank 1 tensor.
*/
message GetShapeLayerParams {}
/*
* A layer that computes the Gauss error function,
* which is defined as:
*
* .. math::
* f(x) = \dfrac{1}{\sqrt{\pi}}\int_{-x}^{x}{e^{-t^2}dt}
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*/
message ErfLayerParams {}
/*
* A layer that evaluates the Gaussian Error Linear Unit (GELU) activation.
* Following equations are used to compute the activation based on the value of
* the "mode" parameter:
*
* mode == 'EXACT':
* .. math::
* f(x) = 0.5x\left ( 1+\rm{erf}\left ( \frac{x}{\sqrt{2}} \right ) \right )
*
* mode == 'TANH_APPROXIMATION':
* .. math::
* f(x) = 0.5x\left ( 1+\rm{tanh}\left ( \sqrt{2/\pi}\left ( x + 0.044715x^3
* \right ) \right ) \right )
*
* mode == 'SIGMOID_APPROXIMATION':
* .. math::
* f(x) = x*\rm{sigmoid}(1.702x)
*
* Requires 1 input and produces 1 output.
* Output shape is same as the input.
*
*/
message GeluLayerParams {
enum GeluMode {
EXACT = 0;
TANH_APPROXIMATION = 1;
SIGMOID_APPROXIMATION = 2;
}
GeluMode mode = 1; // mode of GELU operation.
}
/*
* RangeStatic layer that returns a tensor that contains evenly spaced values.
* It is similar in functionality to the numpy.arange method.
*
* Requires no input and produces 1 output.
* Output is a rank 1 tensor.
*/
message RangeStaticLayerParams {
float endValue = 1;
float startValue = 2;
float stepSizeValue = 3;
}
/*
* A layer that returns a tensor that contains evenly spaced values.
* Its functionality is similar to the numpy.arange method.
*
* Requires at least 1 input, up to a maximum of 3 inputs.
* Produces 1 output, which is a rank 1 tensor.
*
* Each input must be a scalar, or rank 1 and shape (1,).
*
* The first input represents the "endValue".
* The second input, if present, corresponds to "startValue". In this case the
* value of the "startValue" parameter is ignored. The third input, if present,
* corresponds to "stepSizeValue". In this case the value of the "stepSizeValue"
* parameter is ignored.
*
*/
message RangeDynamicLayerParams {
float startValue = 2;
float stepSizeValue = 3;
}
/*
* A layer that returns a tensor containing all windows of size ``windowSize``
* separated by ``step`` along the dimension ``axis``.
*
* .. code::
*
* y = SlidingWindows(x)
*
* Requires 1 input and produces 1 output.
*
* Input
* An N-Dimensional tensor.
*
* Output
* An (N+1)-Dimensional tensor.
*
* This operation behaves as following:
* - if axis = 0 & input is rank 1 (L,). Output shape will be (M, W).
* - if axis = 1 & input is rank 3 (B1, L, C1). Output shape will be (B1,
* M, W, C1)
* - if axis = 2 & input is rank 5 (B1, B2, L, C1, C2) --> (B1 * B2, L, C1
* * C2) --> (B1 * B2, M, W, C1 * C2). Output shape will be (B1, B2, M, W, C1,
* C2)
* - etc.
* where
* - L, C, B refer to input length, feature dimension length & batch size
* respectively
* - W is the window size.
* - M is the number of windows/slices calculated as M = (L - W) / step + 1
*/
message SlidingWindowsLayerParams {
int64 axis = 1;
uint64 windowSize = 2;
uint64 step = 3;
}
/*
* A layer that applies layer normalization over the input tensor.
*
* Requires 1 input and produces 1 output.
*
* output = gamma * (input - computed_mean) / (sqrt(computed_variance + eps)) +
* beta
*
* Parameters
* normalizedShape: subset of the input shape, along with layer norm is
* performed, rest of the input shape is treated as the batch dimension. The
* mean and variance are computed for the input, over the last few dimensions as
* specified by the normalizedShape parameter. gamma: must have shape =
* "normalizedShape" beta: must have shape = "normalizedShape" eps: small
* constant to avoid division by 0
*
* Output shape is same as the input.
*
* e.g.:
* input shape = (10,5)
* normalized shape = (5,) or (10,5)
*
* input shape = (10,5,6,7)
* normalized shape = (7,) or (6,7) or (5,6,7) or (10,5,6,7)
*/
message LayerNormalizationLayerParams {
repeated int64 normalizedShape = 1;
float eps = 2;
WeightParams gamma = 3;
WeightParams beta = 4;
}
/*
* Non maximum suppression (NMS) layer.
* Applies the non maximum suppression algorithm to input bounding box
* coordinates. The effect of this layer is similar to the functionality of the
* "NonMaximumSuppression" model type (for details please see
* NonMaximumSuppression.proto) with a couple of differences. One, this is a
* layer in a neural network model, whereas that is a different model type.
* Second, this layer supports a batch of bounding boxes.
*
* The NMS layer requires at least 2 inputs, and up to a maximum of 5 inputs. It
* produces 4 outputs. Following is the description of inputs and outputs:
*
* input 1, shape (B,N,4): coordinates of N boxes, for a batch size B.
* input 2, shape (B,N,C): class scores for each box. C can be 1 when there is
* only 1 score per box, i.e., no class specific score.
*
* input 3, optional, shape (1,): IoU threshold. When present, it overwrites the
* value provided in layer parameter "iouThreshold". input 4, optional, shape
* (1,): Score threshold. When present, it overwrites the value provided in
* layer parameter "scoreThreshold". input 5, optional, shape (1,): Maximum
* number of boxes. When present, it overwrites the value provided in layer
* parameter "maxBoxes".
*
* output 1, shape (B,maxBoxes,4): box coordinates, corresponding to the
* surviving boxes. output 2, shape (B,maxBoxes,C): box scores, corresponding to
* the surviving boxes. output 3, shape (B,maxBoxes): indices of the surviving
* boxes. Hence it will have values in the range [0,N-1], except for padding.
* output 4, shape (B,): number of boxes selected after the NMS algorithm, for
* each batch.
*
* When surviving boxes are less than "maxBoxes", the first 3 outputs are
* padded. For the first two outputs, the padding is done using values 0,
* whereas for the third output the padding value used is -1, since the output
* values represent indices.
*
* If no box survives, that is, all the scores are below the "scoreThreshold",
* then for that batch, number of boxes (value of the fourth output) will be 1.
* The first 3 outputs will correspond to the box with the highest score. This
* is to avoid generating an "empty" output.
*
* The four values that describe the box dimensions are (in order):
*
* - x (center location of the box along the horizontal axis)
* - y (center location of the box along the vertical axis)
* - width (size of box along the horizontal axis)
* - height (size of box on along the vertical axis)
*
* In each batch,
* the N scores for N boxes, used for suppression, are generated by taking the
* max of the matrix (N,C) along the columns. If "perClassSuppression" flag is
* false, suppression happens across all classes. If "perClassSuppression" flag
* is true, each box is assigned to the class with the highest score and then
* the suppression happens separately for boxes within the same class.
*
* Note that the 4th output can be used to dynamically slice the first 3
* outputs, in case the padded outputs are not required.
*
*/
message NonMaximumSuppressionLayerParams {
/*
* The intersection over union (IoU) threshold over which boxes are
* suppressed.
*/
float iouThreshold = 1;
/*
* Before IoU suppression is performed, boxes with class scores below this
* threshold are rejected.
*/
float scoreThreshold = 2;
/*
* The maximum number of boxes to be given out as output.
* If the number of surviving boxes are less, output is padded up to this
* number.
*/
uint64 maxBoxes = 3;
/*
* If true, suppression is performed independently within boxes of each class.
*/
bool perClassSuppression = 4;
}
/*
* A layer that performs element-wise clamped ReLU operation.
*
* Requires 1 input and produces 1 output.
*
* This function has the following formula:
*
* .. math::
* f(x) = \begin{cases}
* \text{min}(\text{beta},x) \;\; \text{if} \;\; x \geq 0\\
* \text{min}(\text{beta} ,\text{alpha}\cdot x) \;\; \text{if}
* \;\; x<0 \end{cases}
*
* Output shape is same as the input.
*
* Available (iOS >= 14, macOS >= 11.0, watchOS >= 7)
*/
message ClampedReLULayerParams {
float alpha = 1;
float beta = 2;
}
/*
* A layer that returns the indices that would sort the input tensor, along a
* specified axis.
*
* Requires 1 input and produces 1 output.
*
* Output has the same rank and shape as the input.
*
* Value of "axis" must be positive and less than the rank of the input.
*
* e.g.:
*
* input shape = (5,)
* axis = 0
* input values = [3.1, 5.4, 32.9, 3.2, 77.0]
* output shape = (5,)
* output values = [0, 3, 1, 2, 4], descending = False
* output values = [4, 2, 1, 3, 0], descending = True
*
* input shape = (2,3)
* axis = 1
* input values = [[3, 5, 32], [3, 77, 6]]
* output shape = (2,3)
* output values = [[0, 1, 2], [0, 2, 1]], descending = False
* output values = [[2, 1, 0], [1, 2, 0]], descending = True
*
*/
message ArgSortLayerParams {
int64 axis = 1; // must be between [0, input_rank - 1]
bool descending = 2;
}
/*
* A layer that does slice operation by providing size to be extracted
* from the given input tensor.
*
* Requires 2 inputs and produces 1 output.
* Rank of the output is same as the rank of the first input.
*
* The 1st input represents the tensor to be sliced.
* The 2nd input represents the beginning index to be sliced from.
*
* Example:
* Input 1: x (x.shape = (2, 3, 4))
* Input 2: begin
* size: 2
* axis: 1
*
* Output: x[:, begin:begin+2, :]
*
*/
message SliceBySizeLayerParams {
int64 size = 2;
int64 axis = 3;
}
// Neural Network Specializations
// ------------------------------
/*
* A neural network specialized as a classifier.
*/
message NeuralNetworkClassifier {
repeated NeuralNetworkLayer layers = 1;
repeated NeuralNetworkPreprocessing preprocessing = 2;
// use this enum value to determine the input tensor shapes to the neural
// network, for multiarray inputs
NeuralNetworkMultiArrayShapeMapping arrayInputShapeMapping = 5;
// use this enum value to determine the input tensor shapes to the neural
// network, for image inputs
NeuralNetworkImageShapeMapping imageInputShapeMapping = 6;
NetworkUpdateParameters updateParams = 10;
// The set of labels for every possible class.
oneof ClassLabels {
StringVector stringClassLabels = 100;
Int64Vector int64ClassLabels = 101;
}
// The name of the output blob containing the probability of each class.
// In other words, the score vector. Must be a 1-D tensor with the same
// number and order of elements as ClassLabels.
string labelProbabilityLayerName = 200;
}
/*
* A layer that computes the one hot representation of the input.
*
* Requires 1 or 2 inputs and produces 1 output.
* Rank of the output is one more than the first input.
* If the second input is present, it is used to determine the value of
* "oneHotVectorSize" and the parameter "oneHotVectorSize" is ignored.
*
* Input values correspond to indices and should typically be in the range
* [0,"oneHotVectorSize" -1]. If it is outside this range, a vector of all
* "offValue" will be chosen.
*
* Typically one hot vectors contain 0s everywhere, except 1 at the index that
* the input corresponds to. However, instead of 0, any float value could be
* generated by using the "offValue" parameter. Similarly, instead of 1, any
* other value can be used by employing the "onValue" parameter.
*
* e.g.:
* input shape: (10,), "oneHotVectorSize" : 32, axis=-1, then output shape will
* be (10,32) input shape: (10,23), "oneHotVectorSize" : 32, axis=1, then output
* shape will be (10,32,23) input shape: (10,), "oneHotVectorSize" : 32, axis=0,
* then output shape will be (32,10)
*
* input shape: (2,), "oneHotVectorSize" : 4, axis=-1, then output shape will be
* (2,4) say input values = [2, 0], and "onValue" = 5, and "offValue" = -1, then
* output will be:
* [-1, -1, 5, -1
* 5, -1, -1, -1]
*
* say input values = [2, -1], and "onValue" = 5, and "offValue" = -1, then
* output will be:
* [-1, -1, 5, -1
* -1, -1, -1, -1]
*
* Available (iOS >= 14, macOS >= 11.0, watchOS >= 7)
*/
message OneHotLayerParams {
uint64 oneHotVectorSize = 1; // size of the one hot vector
int64 axis = 2; // negative indexing is supported. It refers to the axis in
// the output tensor.
float onValue = 3;
float offValue = 4;
}
/*
* A layer that computes the cumsum values of the input along a given axis.
*
* Requires 1 or 2 inputs and produces 1 output.
*
* Output shape and rank is same as the first input.
* If the second input is present, it is used to determine the value of "axis"
* and the parameter "axis" is ignored.
*
* e.g.:
* Input shape = (3,), values it has: [4, 6, 7]
*
* Then output values will be:
*
* if "excludeFinalSum" = False and "reverse" = False:
* output values : [4, 10, 17]
*
* if "excludeFinalSum" = True and "reverse" = False:
* output values : [0, 4, 10]
*
* if "excludeFinalSum" = False and "reverse" = True:
* output values : [17, 13, 7]
*
* if "excludeFinalSum" = True and "reverse" = True:
* output values : [13, 7, 0]
*
*
* Available (iOS >= 14, macOS >= 11.0, watchOS >= 7)
*/
message CumSumLayerParams {
int64 axis = 1; // negative indexing is supported
// if true, the first element of the output is 0, and the last element
// contains the sum of the input up to the penultimate value if false, the
// first element of the output is same as the input and the last element is
// the sum of all the input values (this behavior is reversed when "reverse"
// flag is True)
bool excludeFinalSum = 2;
bool reverse = 3; // if true, cumsum is performed in the opposite direction
}
/*
* A neural network specialized as a regressor.
*/
message NeuralNetworkRegressor {
repeated NeuralNetworkLayer layers = 1;
repeated NeuralNetworkPreprocessing preprocessing = 2;
// use this enum value to determine the input tensor shapes to the neural
// network, for multiarray inputs
NeuralNetworkMultiArrayShapeMapping arrayInputShapeMapping = 5;
// use this enum value to determine the input tensor shapes to the neural
// network, for image inputs
NeuralNetworkImageShapeMapping imageInputShapeMapping = 6;
NetworkUpdateParameters updateParams = 10;
}
// ---------------------------------------------------------
// On-device Training related messages
// ---------------------------------------------------------
/*
* Details on how the network will be updated
*/
message NetworkUpdateParameters {
repeated LossLayer lossLayers = 1;
Optimizer optimizer = 2;
Int64Parameter epochs = 3;
/*
* Describes whether to shuffle the batch of data between epochs.
*/
BoolParameter shuffle = 10;
/*
* The seed to be used in an associated random number generator.
*/
Int64Parameter seed = 20;
}
/*
* Loss layer - categorical cross entropy and mean squared error are the only
* supported loss functions currently
*/
message LossLayer {
string name = 1;
oneof LossLayerType {
CategoricalCrossEntropyLossLayer categoricalCrossEntropyLossLayer = 10;
MeanSquaredErrorLossLayer meanSquaredErrorLossLayer = 11;
}
}
/*
* Categorical cross entropy loss layer
* Categorical cross entropy is used for single label categorization (only one
* category is applicable for each data point).
*
* The input is a vector of length N representing the distribution over N
* categories. It must be the output of a softmax.
*
* The target is a single value representing the true category or class label.
* If the target is the predictedFeatureName of a neural network classifier it
* will be inverse mapped to the corresponding categorical index for you.
*
* math:
* Loss_{CCE}(input, target) = -\sum_{i=1}^{N} (target == i) log( input[i] ) = -
* log (input[target])
*/
message CategoricalCrossEntropyLossLayer {
string input = 1;
string target = 2;
}
/*
* Mean squared error loss layer,
* specifying input and target
*/
message MeanSquaredErrorLossLayer {
string input = 1;
string target = 2;
}
/*
* Optimizer - stochastic gradient descent and adam are the only supported
* optimizers currently
*/
message Optimizer {
oneof OptimizerType {
SGDOptimizer sgdOptimizer = 10;
AdamOptimizer adamOptimizer = 11;
}
}
/*
* Stochastic gradient descent optimizer,
* specifying configurable learning rate, mini batch size, and momentum
*/
message SGDOptimizer {
DoubleParameter learningRate = 1;
Int64Parameter miniBatchSize = 2;
DoubleParameter momentum = 3;
}
/*
* Adam optimizer,
* specifying configurable learning rate, mini batch size, betas, and eps
*/
message AdamOptimizer {
DoubleParameter learningRate = 1;
Int64Parameter miniBatchSize = 2;
DoubleParameter beta1 = 3;
DoubleParameter beta2 = 4;
DoubleParameter eps = 5;
}