/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_ #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_ #include <algorithm> #include <cmath> #include <cstdint> #include "tensorflow/lite/kernels/internal/common.h" #include "tensorflow/lite/kernels/internal/reference/concatenation.h" #include "tensorflow/lite/kernels/internal/reference/fully_connected.h" #include "tensorflow/lite/kernels/internal/types.h" namespace tflite { namespace reference_ops { inline void LstmCell( const LstmCellParams& params, const RuntimeShape& unextended_input_shape, const float* input_data, const RuntimeShape& unextended_prev_activ_shape, const float* prev_activ_data, const RuntimeShape& weights_shape, const float* weights_data, const RuntimeShape& unextended_bias_shape, const float* bias_data, const RuntimeShape& unextended_prev_state_shape, const float* prev_state_data, const RuntimeShape& unextended_output_state_shape, float* output_state_data, const RuntimeShape& unextended_output_activ_shape, float* output_activ_data, const RuntimeShape& unextended_concat_temp_shape, float* concat_temp_data, const RuntimeShape& unextended_activ_temp_shape, float* activ_temp_data) { … } // Quantized LSTM cell implementation. // The quantization of the input, output arrays is as follows: // - The input activations are quantized as uint8 on the interval // [-1, 127/128]. // The rationale for that is that is the natural interval for output // activations (see next point) and these need to be concatenated together. // We could accommodate different ranges by re-scaling, but we empirically // found that setting the input activations range to be [-1, 127/128] in the // first place, removing the need for re-scaling, greatly improves accuracy. // - The output activations are quantized as uint8 on the interval // [-1, 127/128]. // The rationale for that is that the definition of a LSTM cell makes them // intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128] // makes for simpler, more accurate fixed-point arithmetic. // - The output-at-previous-timestep state array is obviously quantized as // the output activations. // - The internal LSTM memory (not the output-at-previous-timestep, the other // internal state array) is int16-quantized and may use any power-of-two, // symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call // StateIntegerBits below, see the below discussion of that template // parameter ("The StateIntegerBits template parameter"). // - The output of the internal fully-connected node is int16-quantized // on the interval [-8, 8 * 32767/32768], the rationale for which is // explained just below ("Why [-8, 8] for fully-connected output?"). // // // === The StateIntegerBits template parameter === // // The StateIntegerBits template parameter controls the fixed-point format used // to represent the internal memory of the LSTM cell (not the // output-at-previous-timestep, the other internal state array). It's currently // a template parameter so that the model can control that. The most typical // value for StateIntegerBits is 4. Other plausible values are anywhere between // 3 and 5. We might eventually standardize on a single supported value, e.g. 4, // and drop that template parameter. The reason why it can't be a runtime // parameter is that this controls the fixed-point format used, i.e. we need to // generate actually different code based on it. In particular, we generate code // for a fixed-point tanh() implementation for that format, which internally // uses a fixed-point exp() implementation, which internally uses a // barrel-shifter with a number of steps that depends on StateIntegerBits. // Another consequence of that is that a higher value of StateIntegerBits // results in a more expensive implementation (more barrel shifter steps // needed). // // // === Why [-8, 8] for fully-connected output? === // // This array is only fed to Logistic and Tanh functions, for which // the quantized implementation will want to use fixed-point arithmetic, // requiring a power-of-two representation interval. Thus, we should right // away quantize this array to a power-of-two interval; otherwise, // implementation will need to rescale that, losing any benefit that a tighter // representation interval might otherwise yield, while introducing some // numerical error and computational overhead. // // Now, Logistic and Tanh // are nearly constant (nearly equal to their horizontal asymptotes) // outside of a small bounded interval around 0: // // Logistic(4) = 1 - 1.8e-2 Tanh(4) = 1 - 6.7e-4 // Logistic(8) = 1 - 3.4e-4 Tanh(8) = 1 - 2.3e-7 // Logistic(16) = 1 - 1.1e-7 Tanh(16) = 1 - 2.5e-14 // // From this, we see that clamping to [-4, 4] would be too inaccurate // (the error of 1.8e-2 on Logistic would be felt even in 8bit precision) // while clamping to [-16, 16] would make no difference even in float32. // However, for a fixed-point implementation in 16-bit integers, using 5 // integer bits to represent the [-16, 16] range would leave only 11 // fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive // representable values. Notice that is higher than the // worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic. // Using [-8, 8] thus seems like the better compromise overall, enjoying // an increment of 2.4e-4 between representable values and a worst-case // clamping error of 3.4e-4, both better than the increment of 4.9e-4 with // [-16, 16]. // // Moreover, all other things being equal, it is nice to choose the narrower // representation range, as that makes the implementation of fixed-point // math functions a little cheaper (each integer bit requires an additional // barrel-shifter atep in the implementation of exp(-x)). That is further // reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make // sense for 32-bit float or 32-bit fixed-point quantization, but we are // aiming for 16-bit fixed-point quantization of these internal nodes here. // template <int StateIntegerBits> inline void LstmCell(const LstmCellParams& params, const RuntimeShape& unextended_input_shape, const uint8_t* input_data_uint8, const RuntimeShape& unextended_prev_activ_shape, const uint8_t* prev_activ_data_uint8, const RuntimeShape& weights_shape, const uint8_t* weights_data_uint8, const RuntimeShape& unextended_bias_shape, const int32_t* bias_data_int32, const RuntimeShape& unextended_prev_state_shape, const int16_t* prev_state_data_int16, const RuntimeShape& unextended_output_state_shape, int16_t* output_state_data_int16, const RuntimeShape& unextended_output_activ_shape, uint8_t* output_activ_data_uint8, const RuntimeShape& unextended_concat_temp_shape, uint8_t* concat_temp_data_uint8, const RuntimeShape& unextended_activ_temp_shape, int16_t* activ_temp_data_int16, void* gemmlowp_context) { … } } // namespace reference_ops } // namespace tflite #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_