#include <assert.h>
#include <inttypes.h>
#include <math.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <fp16/fp16.h>
#include "xnnpack.h"
#include "xnnpack/allocator.h"
#include "xnnpack/cache.h"
#include "xnnpack/common.h"
#include "xnnpack/compute.h"
#include "xnnpack/config.h"
#include "xnnpack/log.h"
#include "xnnpack/math.h"
#include "xnnpack/microfnptr.h"
#include "xnnpack/microkernel-type.h"
#include "xnnpack/microparams-init.h"
#include "xnnpack/microparams.h"
#include "xnnpack/operator-type.h"
#include "xnnpack/operator-utils.h"
#include "xnnpack/operator.h"
#include "xnnpack/pack.h"
#include "xnnpack/packq.h"
#include "xnnpack/params.h"
#include "pthreadpool.h"
static enum xnn_status create_fully_connected_nc(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
const void* kernel,
const void* bias,
uint32_t flags,
size_t block_size,
size_t extra_bl_bytes,
const uint16_t* blockwise_kernel_scale_params,
uint32_t log2_input_element_size,
uint32_t log2_filter_element_size,
bool filter_is_nibble,
uint32_t bias_element_size,
xnn_packw_gemm_gio_ukernel_fn pack_gemm_gio_w,
xnn_packw_gemm_goi_ukernel_fn pack_gemm_goi_w,
xnn_packw_gemm_goi_bl_ukernel_fn pack_gemm_goi_bl_w,
const void* packing_params,
int packed_weights_padding_byte,
size_t extra_weights_bytes,
xnn_init_qs8_qc8w_scale_params_fn init_scale_params,
const float* scale_params,
xnn_init_qs8_qc8w_scale_params_fn init_kernel_scale_params,
const float* kernel_scale_params,
const void* params,
size_t params_size,
const struct xnn_gemm_config* gemm_config,
const struct gemm_fused_ukernels* gemm_ukernels,
const struct jit_gemm_params *jit_gemm_params,
enum xnn_operator_type operator_type,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out)
{ … }
enum xnn_status xnn_create_fully_connected_nc_f16(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
const void* kernel,
const void* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out)
{ … }
enum xnn_status xnn_create_fully_connected_nc_qd8_f16_qc4w(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
uint8_t kernel_zero_point,
const float* kernel_scale,
const void* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out)
{ … }
enum xnn_status xnn_create_fully_connected_nc_qd8_f16_qb4w(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
size_t block_size,
uint8_t kernel_zero_point,
const uint16_t* kernel_scale,
const void* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out)
{ … }
enum xnn_status xnn_create_fully_connected_nc_qd8_f32_qc4w(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
uint8_t kernel_zero_point,
const float* kernel_scale,
const void* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out)
{ … }
enum xnn_status xnn_create_fully_connected_nc_qp8_f32_qc4w(
size_t input_channels, size_t output_channels, size_t input_stride,
size_t output_stride, uint8_t kernel_zero_point, const float* kernel_scale,
const void* kernel, const float* bias, float output_min, float output_max,
uint32_t flags, xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache, xnn_operator_t* fully_connected_op_out) { … }
enum xnn_status xnn_create_fully_connected_nc_qd8_f32_qb4w(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
size_t block_size,
uint8_t kernel_zero_point,
const uint16_t* kernel_scale,
const void* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out)
{ … }
enum xnn_status xnn_create_fully_connected_nc_qd8_f32_qc8w(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
const float* kernel_scale,
const int8_t* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out)
{ … }
enum xnn_status xnn_create_fully_connected_nc_qd8_f16_qc8w(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
const float* kernel_scale,
const int8_t* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out)
{ … }
enum xnn_status xnn_create_fully_connected_nc_f32(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
const float* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out)
{ … }
enum xnn_status xnn_create_fully_connected_nc_f32_qc4w(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
uint8_t kernel_zero_point,
const float* kernel_scale,
const uint8_t* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out)
{ … }
enum xnn_status xnn_create_fully_connected_nc_f32_qc8w(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
const float* kernel_scale,
const int8_t* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out)
{ … }
enum xnn_status xnn_create_fully_connected_nc_qs8(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
int8_t input_zero_point,
float input_scale,
float kernel_scale,
const int8_t* kernel,
const int32_t* bias,
int8_t output_zero_point,
float output_scale,
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out)
{ … }
enum xnn_status xnn_create_fully_connected_nc_qs8_qc8w(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
int8_t input_zero_point,
float input_scale,
const float* kernel_scale,
const int8_t* kernel,
const int32_t* bias,
int8_t output_zero_point,
float output_scale,
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out)
{ … }
enum xnn_status xnn_create_fully_connected_nc_qu8(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
uint8_t input_zero_point,
float input_scale,
uint8_t kernel_zero_point,
float kernel_scale,
const uint8_t* kernel,
const int32_t* bias,
uint8_t output_zero_point,
float output_scale,
uint8_t output_min,
uint8_t output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out)
{ … }
static enum xnn_status reshape_fully_connected_nc(
xnn_operator_t fully_connected_op,
enum xnn_operator_type expected_operator_type,
size_t batch_size,
uint32_t log2_input_element_size,
uint32_t log2_filter_element_size,
bool filter_is_nibble,
bool dynamic_quantization,
uint32_t log2_output_element_size,
const void* params,
size_t params_size,
pthreadpool_t threadpool)
{ … }
enum xnn_status xnn_reshape_fully_connected_nc_f16(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool)
{ … }
enum xnn_status xnn_reshape_fully_connected_nc_f32(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool)
{ … }
enum xnn_status xnn_reshape_fully_connected_nc_f32_qc4w(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool)
{ … }
enum xnn_status xnn_reshape_fully_connected_nc_f32_qc8w(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool)
{ … }
enum xnn_status xnn_reshape_fully_connected_nc_qd8_f16_qc4w(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool)
{ … }
enum xnn_status xnn_reshape_fully_connected_nc_qd8_f16_qb4w(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool)
{ … }
enum xnn_status xnn_reshape_fully_connected_nc_qd8_f32_qc4w(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool)
{ … }
enum xnn_status xnn_reshape_fully_connected_nc_qd8_f32_qb4w(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool)
{ … }
enum xnn_status xnn_reshape_fully_connected_nc_qd8_f16_qc8w(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool)
{ … }
enum xnn_status xnn_reshape_fully_connected_nc_qd8_f32_qc8w(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool)
{ … }
enum xnn_status xnn_reshape_fully_connected_nc_qp8_f32_qc4w(
xnn_operator_t fully_connected_op, size_t batch_size,
pthreadpool_t threadpool) { … }
enum xnn_status xnn_reshape_fully_connected_nc_qs8(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool)
{ … }
enum xnn_status xnn_reshape_fully_connected_nc_qs8_qc8w(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool)
{ … }
enum xnn_status xnn_reshape_fully_connected_nc_qu8(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool)
{ … }
static enum xnn_status setup_fully_connected_nc(
xnn_operator_t fully_connected_op,
enum xnn_operator_type expected_operator_type,
const void* input,
void* output,
const void* quantization_params)
{ … }
enum xnn_status xnn_setup_fully_connected_nc_f16(
xnn_operator_t fully_connected_op,
const void* input,
void* output)
{ … }
enum xnn_status xnn_setup_fully_connected_nc_f32(
xnn_operator_t fully_connected_op,
const float* input,
float* output)
{ … }
enum xnn_status xnn_setup_fully_connected_nc_f32_qc4w(
xnn_operator_t fully_connected_op,
const float* input,
float* output)
{ … }
enum xnn_status xnn_setup_fully_connected_nc_f32_qc8w(
xnn_operator_t fully_connected_op,
const float* input,
float* output)
{ … }
enum xnn_status xnn_setup_fully_connected_nc_qd8_f16_qc4w(
xnn_operator_t fully_connected_op,
const int8_t* input,
void* output,
const struct xnn_dynamic_quantization_params* quantization_params)
{ … }
enum xnn_status xnn_setup_fully_connected_nc_qd8_f16_qb4w(
xnn_operator_t fully_connected_op,
const int8_t* input,
void* output,
const struct xnn_dynamic_quantization_params* quantization_params)
{ … }
enum xnn_status xnn_setup_fully_connected_nc_qd8_f32_qc4w(
xnn_operator_t fully_connected_op,
const int8_t* input,
float* output,
const struct xnn_dynamic_quantization_params* quantization_params)
{ … }
enum xnn_status xnn_setup_fully_connected_nc_qd8_f32_qb4w(
xnn_operator_t fully_connected_op,
const int8_t* input,
float* output,
const struct xnn_dynamic_quantization_params* quantization_params)
{ … }
enum xnn_status xnn_setup_fully_connected_nc_qd8_f16_qc8w(
xnn_operator_t fully_connected_op,
const int8_t* input,
void* output,
const struct xnn_dynamic_quantization_params* quantization_params)
{ … }
enum xnn_status xnn_setup_fully_connected_nc_qp8_f32_qc4w(
xnn_operator_t fully_connected_op, const int8_t* input, float* output) { … }
enum xnn_status xnn_setup_fully_connected_nc_qd8_f32_qc8w(
xnn_operator_t fully_connected_op,
const int8_t* input,
float* output,
const struct xnn_dynamic_quantization_params* quantization_params)
{ … }
enum xnn_status xnn_setup_fully_connected_nc_qs8(
xnn_operator_t fully_connected_op,
const int8_t* input,
int8_t* output)
{ … }
enum xnn_status xnn_setup_fully_connected_nc_qs8_qc8w(
xnn_operator_t fully_connected_op,
const int8_t* input,
int8_t* output)
{ … }
enum xnn_status xnn_setup_fully_connected_nc_qu8(
xnn_operator_t fully_connected_op,
const uint8_t* input,
uint8_t* output)
{ … }