#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_FULLY_CONNECTED_4BIT_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_FULLY_CONNECTED_4BIT_H_
#include <stdint.h>
#ifndef TFLITE_MMAP_DISABLED
#include <sys/mman.h>
#endif
#include <cstdlib>
#include <memory>
#if defined(FC_4BIT_SSE) && defined(__SSSE3__)
#include "tensorflow/lite/kernels/internal/optimized/4bit/sse_fully_connected.h"
#elif defined(FC_4BIT_NEON) && (defined(__ARM_NEON__) || defined(__ARM_NEON))
#include "tensorflow/lite/kernels/internal/optimized/4bit/neon_fully_connected.h"
#else
#include "tensorflow/lite/kernels/internal/optimized/4bit/fully_connected_reference.h"
#endif
namespace tflite {
namespace optimized_4bit {
constexpr int FilterWidth = …;
constexpr int FilterDepth = …;
constexpr int kDefaultAlignmentPadding = …;
struct Deleter { … };
struct OpData4Bit { … };
namespace api {
inline void Prepack(uint8_t* dest, const int8_t* tensor, int layout_rows,
int layout_cols, int src_rows, int src_cols, int width,
int depth) { … }
inline void BatchQuantizeFloats4Bit(const float* float_data_ptr, int n_batch,
int n_data, int8_t* quantized_data_ptr,
float* scaling_factors, int width,
int depth, int32_t* input_offsets) { … }
inline void AssignBiasAndComputeOffsets(const int32_t* input_offsets,
const float* batch_scales,
float* filter_scales,
const float* bias_ptr,
float* output_ptr, int output_depth,
int batch_size) { … }
inline void RunAndUnpack(int rhs_width, const uint8_t* lhs, const int8_t* rhs,
int32_t* dst, int output_depth, int batch_size,
int lhs_layout_rows, int lhs_layout_cols,
int rhs_layout_rows, int rhs_layout_cols,
int dst_layout_rows, int dst_layout_cols,
float* output_ptr, const float* scaling_factors,
const float* filter_scales) { … }
}
}
}
#endif