#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include <algorithm>
#include <cmath>
#include <limits>
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
namespace tflite {
namespace {
constexpr uint64_t kSignMask = …;
constexpr uint64_t kExponentMask = …;
constexpr int32_t kExponentShift = …;
constexpr int32_t kExponentBias = …;
constexpr uint32_t kExponentIsBadNum = …;
constexpr uint64_t kFractionMask = …;
constexpr uint32_t kFractionShift = …;
constexpr uint32_t kFractionRoundingMask = …;
constexpr uint32_t kFractionRoundingThreshold = …;
}
void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
int* shift) { … }
void QuantizeMultiplierGreaterThanOne(double double_multiplier,
int32_t* quantized_multiplier,
int* left_shift) { … }
void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
int32_t* quantized_multiplier,
int* left_shift) { … }
int64_t IntegerFrExp(double input, int* shift) { … }
double DoubleFromFractionAndShift(int64_t fraction, int shift) { … }
double IntegerDoubleMultiply(double a, double b) { … }
int IntegerDoubleCompare(double a, double b) { … }
void PreprocessSoftmaxScaling(double beta, double input_scale,
int input_integer_bits,
int32_t* quantized_multiplier, int* left_shift) { … }
void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
int input_integer_bits,
int32_t* quantized_multiplier,
int* left_shift,
int32_t* reverse_scaling_divisor,
int* reverse_scaling_left_shift) { … }
int CalculateInputRadius(int input_integer_bits, int input_left_shift,
int total_signed_bits) { … }
void NudgeQuantizationRange(const float min, const float max,
const int quant_min, const int quant_max,
float* nudged_min, float* nudged_max,
float* nudged_scale) { … }
void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
const float nudged_max, const float* input_data,
float* output_data, const float size) { … }
bool CheckedLog2(const float x, int* log2_result) { … }
void QuantizeMultiplierArray(const double* effective_scales, size_t size,
int32_t* effective_scale_significand,
int* effective_shift) { … }
}