#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/kernels/internal/runtime_shape.h"
#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
#ifdef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
#endif
#endif
#include <cmath>
#include <functional>
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/core/macros.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
constexpr int kReverseShift = …;
template <int MAX_DIM = 6>
bool ReduceDimensionsForBroadcast(const RuntimeShape& input1_shape,
const RuntimeShape& input2_shape,
size_t* compressed_input1_stride,
size_t* compressed_input2_stride,
size_t* compressed_output_shape) { … }
inline void GetActivationMinMax(FusedActivationFunctionType ac,
float* output_activation_min,
float* output_activation_max) { … }
template <typename T>
inline T ActivationFunctionWithMinMax(T x, T output_activation_min,
T output_activation_max) { … }
template <FusedActivationFunctionType Ac>
float ActivationFunction(float x) { … }
inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size,
const float* bias_data, int array_size,
float* array_data) { … }
TFLITE_NOINLINE int32_t MultiplyByQuantizedMultiplier(
int32_t x, int32_t quantized_multiplier, int shift);
TFLITE_NOINLINE int32_t MultiplyByQuantizedMultiplier(
int64_t x, int32_t quantized_multiplier, int shift);
#if TFLITE_SINGLE_ROUNDING
inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(
int32_t x, int32_t quantized_multiplier, int shift) {
TFLITE_DCHECK_LE(shift, 0);
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
}
inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(
int32_t x, int32_t quantized_multiplier, int shift) {
TFLITE_DCHECK_GE(shift, 0);
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
}
#ifdef USE_NEON
inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows(
int32x4x4_t input_val, int32_t quantized_multiplier, int shift) {
TFLITE_DCHECK(quantized_multiplier >= 0);
const int right_shift = std::min(-1, shift);
const int left_shift = shift - right_shift;
const int32x4_t multiplier_dup = vdupq_n_s32(quantized_multiplier);
const int32x4_t left_shift_dup = vdupq_n_s32(left_shift);
const int32x4_t right_shift_dup = vdupq_n_s32(right_shift);
int32x4x4_t result;
result.val[0] = vrshlq_s32(
vqdmulhq_s32(vshlq_s32(input_val.val[0], left_shift_dup), multiplier_dup),
right_shift_dup);
result.val[1] = vrshlq_s32(
vqdmulhq_s32(vshlq_s32(input_val.val[1], left_shift_dup), multiplier_dup),
right_shift_dup);
result.val[2] = vrshlq_s32(
vqdmulhq_s32(vshlq_s32(input_val.val[2], left_shift_dup), multiplier_dup),
right_shift_dup);
result.val[3] = vrshlq_s32(
vqdmulhq_s32(vshlq_s32(input_val.val[3], left_shift_dup), multiplier_dup),
right_shift_dup);
return result;
}
#endif
#else
inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(
int32_t x, int32_t quantized_multiplier, int left_shift) { … }
inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(
int32_t x, int32_t quantized_multiplier, int left_shift) { … }
#ifdef USE_NEON
inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows(
int32x4x4_t input_val, int32_t quantized_multiplier, int shift) {
const int left_shift = std::max(shift, 0);
const int right_shift = std::min(shift, 0);
int32x4x4_t result;
int32x4_t multiplier_dup = vdupq_n_s32(quantized_multiplier);
int32x4_t left_shift_dup = vdupq_n_s32(left_shift);
int32x4_t right_shift_dup = vdupq_n_s32(right_shift);
result.val[0] =
vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[0], left_shift_dup),
multiplier_dup),
right_shift_dup);
result.val[1] =
vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[1], left_shift_dup),
multiplier_dup),
right_shift_dup);
result.val[2] =
vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[2], left_shift_dup),
multiplier_dup),
right_shift_dup);
result.val[3] =
vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[3], left_shift_dup),
multiplier_dup),
right_shift_dup);
return result;
}
#endif
#endif
template <typename T>
int CountLeadingZeros(T integer_input) { … }
template <typename T>
inline int CountLeadingSignBits(T integer_input) { … }
template <typename Integer>
inline Integer FloorLog2(Integer n) { … }
namespace detail {
template <typename FloatT, typename Func>
inline typename std::enable_if<std::is_same<Func, FloatT (*)(FloatT)>::value,
FloatT>::type
LUTTransform(Func transform, const void* , FloatT value) { … }
template <typename FloatT, typename Func>
inline typename std::enable_if<
std::is_same<Func, FloatT (*)(FloatT, const void*)>::value, FloatT>::type
LUTTransform(Func transform, const void* transform_params, FloatT value) { … }
template <typename T, typename Func>
inline void LUTPopulateInt8(float input_scale, int32_t input_zero_point,
float output_scale, int32_t output_zero_point,
Func transform, const void* transform_params,
T* lut) { … }
template <typename FloatT, typename Func>
inline void LUTPopulateInt16(FloatT input_scale, int32_t input_zero_point,
FloatT output_scale, int32_t output_zero_point,
Func transform, const void* transform_params,
int16_t* lut) { … }
}
template <typename T>
inline typename std::enable_if<std::is_same<T, uint8_t>::value ||
std::is_same<T, int8_t>::value,
void>::type
LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale,
int32_t output_zero_point, float (*transform)(float), T* lut) { … }
template <typename T>
inline typename std::enable_if<std::is_same<T, uint8_t>::value ||
std::is_same<T, int8_t>::value,
void>::type
LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale,
int32_t output_zero_point, float (*transform)(float, const void*),
const void* transform_params, T* lut) { … }
template <typename T>
inline typename std::enable_if<std::is_same<T, int16_t>::value, void>::type
LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale,
int32_t output_zero_point, float (*transform)(float), T* lut) { … }
template <typename T>
inline typename std::enable_if<std::is_same<T, int16_t>::value, void>::type
LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale,
int32_t output_zero_point, float (*transform)(float, const void*),
const void* transform_params, T* lut) { … }
template <typename T>
inline typename std::enable_if<std::is_same<T, int16_t>::value, void>::type
LUTPopulate(double input_scale, int32_t input_zero_point, double output_scale,
int32_t output_zero_point, double (*transform)(double), T* lut) { … }
template <typename T>
constexpr int LUTSize() { … }
inline int16_t LUTLookup(int16_t value, const int16_t* lut) { … }
inline int8_t LUTLookup(int8_t value, const int8_t* lut) { … }
inline uint8_t LUTLookup(uint8_t value, const uint8_t* lut) { … }
static const uint16_t sigmoid_table_uint16[256] = …;
template <typename IntegerType>
IntegerType SaturatingAddNonGemmlowp(IntegerType a, IntegerType b) { … }
template <>
inline std::int32_t SaturatingAddNonGemmlowp(std::int32_t a, std::int32_t b) { … }
template <typename tRawType, int tIntegerBits>
gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingAddNonGemmlowp(
gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
gemmlowp::FixedPoint<tRawType, tIntegerBits> b) { … }
template <typename IntegerType>
IntegerType SaturatingSub(IntegerType a, IntegerType b) { … }
template <>
inline std::int16_t SaturatingSub(std::int16_t a, std::int16_t b) { … }
template <>
inline std::int32_t SaturatingSub(std::int32_t a, std::int32_t b) { … }
template <typename tRawType, int tIntegerBits>
gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingSub(
gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
gemmlowp::FixedPoint<tRawType, tIntegerBits> b) { … }
template <typename IntegerType>
IntegerType SaturatingRoundingMultiplyByPOTParam(IntegerType x, int exponent) { … }
template <typename tRawType, int tIntegerBits>
gemmlowp::FixedPoint<tRawType, tIntegerBits>
SaturatingRoundingMultiplyByPOTParam(
gemmlowp::FixedPoint<tRawType, tIntegerBits> a, int exponent) { … }
inline void DownScaleInt32ToInt16Multiplier(int32_t multiplier_int32_t,
int16_t* multiplier_int16_t) { … }
constexpr int min_log_x_output_bits(int input_bits) { … }
template <int OutputIntegerBits, int InputIntegerBits>
inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
log_x_for_x_greater_than_or_equal_to_1_impl(
gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) { … }
template <int OutputIntegerBits, int InputIntegerBits>
inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
log_x_for_x_greater_than_or_equal_to_1(
gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) { … }
inline int32_t GetReciprocal(int32_t x, int x_integer_digits,
int* num_bits_over_unit) { … }
inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift,
int32_t* output_inv_sqrt,
int* output_shift) { … }
template <int N>
struct NdArrayDesc { … };
inline int SubscriptToIndex(const NdArrayDesc<4>& desc, int i0, int i1, int i2,
int i3) { … }
inline int SubscriptToIndex(const NdArrayDesc<5>& desc, int indexes[5]) { … }
inline int SubscriptToIndex(const NdArrayDesc<8>& desc, int indexes[8]) { … }
template <int N>
inline void NdArrayDescsForElementwiseBroadcast(const Dims<N>& input0_dims,
const Dims<N>& input1_dims,
NdArrayDesc<N>* desc0_out,
NdArrayDesc<N>* desc1_out) { … }
template <int N>
TFLITE_NOINLINE void CopyDimsToDesc(const RuntimeShape& input_shape,
NdArrayDesc<N>* desc_out) { … }
template <int N>
inline void NdArrayDescsForElementwiseBroadcast(
const RuntimeShape& input0_shape, const RuntimeShape& input1_shape,
NdArrayDesc<N>* desc0_out, NdArrayDesc<N>* desc1_out) { … }
template <int N>
inline void NdArrayDescsForElementwiseBroadcast(
const RuntimeShape& input0_shape, const RuntimeShape& input1_shape,
const RuntimeShape& input2_shape, NdArrayDesc<N>* desc0_out,
NdArrayDesc<N>* desc1_out, NdArrayDesc<N>* desc2_out) { … }
template <int N, int DIM, typename Calc>
typename std::enable_if<DIM != N - 1, void>::type NDOpsHelperImpl(
const NdArrayDesc<N>& output, const Calc& calc, int indexes[N]) { … }
template <int N, int DIM, typename Calc>
typename std::enable_if<DIM == N - 1, void>::type NDOpsHelperImpl(
const NdArrayDesc<N>& output, const Calc& calc, int indexes[N]) { … }
template <int N, typename Calc>
inline void NDOpsHelper(const NdArrayDesc<N>& output, const Calc& calc) { … }
template <unsigned Modulus, typename Integer>
Integer RoundDown(Integer i) { … }
template <unsigned Modulus, typename Integer>
Integer RoundUp(Integer i) { … }
template <typename Integer>
Integer CeilQuotient(Integer a, Integer b) { … }
template <int KernelRows>
inline int LegacyHowManyThreads(int max_num_threads, int rows, int cols,
int depth) { … }
template <typename T>
void optimized_ops_preload_l1_stream(const T* ptr) { … }
template <typename T>
void optimized_ops_preload_l1_keep(const T* ptr) { … }
template <typename T>
void optimized_ops_prefetch_write_l1_keep(const T* ptr) { … }
}
#endif