#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "xnnpack.h"
#include "xnnpack/common.h"
#include "xnnpack/config-types.h"
#include "xnnpack/log.h"
#include "xnnpack/math.h"
#include "xnnpack/microfnptr.h"
#include "xnnpack/operator.h"
#include "xnnpack/pack.h"
#include "xnnpack/unaligned.h"
#if XNN_ENABLE_KLEIDIAI
#include "kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4cxp_qsu4cxs1s0.h"
#include "kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0.h"
#endif
#include <fp16/fp16.h>
void xnn_pack_f32_gemm_goi_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const float* k,
const float* b,
const void* scale,
float* packed_weights,
size_t extra_bytes,
const void* params)
{ … }
void xnn_pack_f16_gemm_goi_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const uint16_t* k,
const uint16_t* b,
const void* scale,
uint16_t* packed_weights,
size_t extra_bytes,
const void* params)
{ … }
void xnn_pack_f32_to_f16_gemm_goi_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const float* k,
const float* b,
const void* scale,
uint16_t* packed_weights,
size_t extra_bytes,
const void* params)
{ … }
void xnn_pack_qu8_gemm_goi_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const uint8_t* k,
const int32_t* b,
const void* scale,
void* packed_weights,
size_t extra_bytes,
const struct xnn_qu8_packing_params* params)
{ … }
void xnn_pack_qs8_gemm_goi_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const int8_t* k,
const int32_t* b,
const float* scale,
void* packed_weights,
size_t extra_bytes,
const struct xnn_qs8_packing_params* params)
{ … }
void xnn_pack_qs8_to_qu8_gemm_goi_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const int8_t* k,
const int32_t* b,
const float* scale,
void* packed_weights,
size_t extra_bytes,
const struct xnn_qs8_packing_params* params)
{ … }
static int8_t sign_extend_int4(int8_t value) { … }
void xnn_pack_qs8_qc4w_gemm_goi_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const uint8_t* k,
const int32_t* b,
const float* scale,
void* packed_weights,
size_t extra_bytes,
const struct xnn_qs8_qc4w_packing_params* params)
{ … }
void xnn_pack_qs8_qc4uw_gemm_goi_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const uint8_t* k,
const int32_t* b,
const float* scale,
void* packed_weights,
size_t extra_bytes,
const struct xnn_qs8_qc4w_packing_params* params)
{ … }
void xnn_pack_qs8_qb4w_gemm_goi_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
size_t bl,
const uint8_t* k,
const float* bias,
const uint16_t* scale,
void* packed_weights,
size_t extra_bytes_bl,
size_t extra_bytes_n,
const struct xnn_qs8_qc4w_packing_params* params)
{ … }
void xnn_pack_qs8_qb4w_gemm_gio_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
size_t k_stride,
size_t bl,
const uint8_t* k,
const float* bias,
const uint16_t* scale,
void* packed_weights,
size_t extra_bytes_bl,
size_t extra_bytes_n,
const struct xnn_qs8_qc4w_packing_params* params)
{ … }
void xnn_pack_qs8_qc4w_gemm_gio_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
size_t k_stride,
const uint8_t* k,
const int32_t* b,
const float* scale,
void* packed_weights,
size_t extra_bytes,
const struct xnn_qs8_qc4w_packing_params* params)
{ … }
void xnn_pack_qs8_qc4uw_gemm_gio_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
size_t k_stride,
const uint8_t* k,
const int32_t* b,
const float* scale,
void* packed_weights,
size_t extra_bytes,
const struct xnn_qs8_qc4w_packing_params* params)
{ … }
void xnn_pack_f32_qs8w_gemm_goi_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const int8_t* k,
const float* bias,
const float* scale,
void* packed_weights,
size_t extra_bytes,
const void* params)
{ … }
void xnn_pack_f32_qc4w_gemm_goi_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const void* k,
const float* bias,
const float* scale,
void* packed_weights,
size_t extra_bytes,
const void* params)
{ … }
void xnn_pack_f32_gemm_gio_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
size_t k_stride,
const float* k,
const float* b,
const void* scale,
float* packed_weights,
size_t extra_bytes,
const void* params)
{ … }
void xnn_pack_f16_gemm_gio_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
size_t k_stride,
const uint16_t* k,
const uint16_t* b,
const void* scale,
uint16_t* packed_weights,
size_t extra_bytes,
const void* params)
{ … }
void xnn_pack_f32_to_f16_gemm_gio_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
size_t k_stride,
const float* k,
const float* b,
const void* scale,
uint16_t* packed_weights,
size_t extra_bytes,
const void* params)
{ … }
void xnn_pack_qu8_gemm_gio_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
size_t k_stride,
const uint8_t* k,
const int32_t* b,
const void* scale,
void* packed_weights,
size_t extra_bytes,
const struct xnn_qu8_packing_params* params)
{ … }
void xnn_pack_qs8_to_qu8_gemm_gio_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
size_t k_stride,
const int8_t* k,
const int32_t* b,
const float* scale,
void* packed_weights,
size_t extra_bytes,
const struct xnn_qs8_packing_params* params)
{ … }
void xnn_pack_qs8_gemm_gio_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
size_t k_stride,
const int8_t* k,
const int32_t* b,
const float* scale,
void* packed_weights,
size_t extra_bytes,
const struct xnn_qs8_packing_params* params)
{ … }
void pack_weights_and_biases(uint32_t flags,
const struct xnn_gemm_config* gemm_config,
size_t input_channels,
size_t output_channels,
size_t groups,
size_t weights_stride,
xnn_packw_gemm_gio_ukernel_fn pack_gemm_gio_w,
xnn_packw_gemm_goi_ukernel_fn pack_gemm_goi_w,
const void* accumulator_init,
const void* weights,
xnn_init_scale_params_fn init_extra_data0_fn,
const void* extra_data0,
size_t extra_data0_element_size,
xnn_init_scale_params_fn init_extra_data1_fn,
const void* extra_data1,
size_t extra_data1_element_size,
void* packed_weights_ptr,
size_t extra_bytes,
const void* params) { … }
size_t xnn_packed_stride_qs8_weights_and_biases(
const struct xnn_gemm_config* gemm_config, size_t unused_k, size_t k_stride,
size_t extra_bytes) { … }
void xnn_pack_qs8_weights_and_biases(
uint32_t flags, const struct xnn_gemm_config* gemm_config,
size_t input_channels, size_t output_channels, size_t groups,
size_t k_stride, const void* accumulator_init, const void* weights,
xnn_init_scale_params_fn init_extra_data0_fn, const void* extra_data0,
size_t extra_data0_element_size,
xnn_init_scale_params_fn init_extra_data1_fn, const void* extra_data1,
size_t extra_data1_element_size, void* packed_weights_ptr,
const void* params) { … }
size_t xnn_packed_stride_qs4_weights_and_biases(
const struct xnn_gemm_config* gemm_config, size_t unused_k, size_t k_stride,
size_t extra_bytes) { … }
void xnn_pack_qs4_weights_and_biases(
uint32_t flags, const struct xnn_gemm_config* gemm_config,
size_t input_channels, size_t output_channels, size_t groups,
size_t k_stride, const void* accumulator_init, const void* weights,
xnn_init_scale_params_fn init_extra_data0_fn, const void* extra_data0,
size_t extra_data0_element_size,
xnn_init_scale_params_fn init_extra_data1_fn, const void* extra_data1,
size_t extra_data1_element_size, void* packed_weights_ptr,
const void* params) { … }
size_t xnn_packed_stride_qu8_weights_and_biases(
const struct xnn_gemm_config* gemm_config, size_t unused_k, size_t k_stride,
size_t extra_bytes) { … }
void xnn_pack_qu8_weights_and_biases(
uint32_t flags, const struct xnn_gemm_config* gemm_config,
size_t input_channels, size_t output_channels, size_t groups,
size_t k_stride, const void* accumulator_init, const void* weights,
xnn_init_scale_params_fn init_extra_data0_fn, const void* extra_data0,
size_t extra_data0_element_size,
xnn_init_scale_params_fn init_extra_data1_fn, const void* extra_data1,
size_t extra_data1_element_size, void* packed_weights_ptr,
const void* params) { … }
#if XNN_ENABLE_KLEIDIAI
size_t xnn_packed_stride_kai_qs4_weights_and_biases(
const struct xnn_gemm_config* gemm_config, size_t k, size_t unused_k_stride,
size_t extra_bytes) {
const uint32_t nr = gemm_config->nr;
const uint32_t kr = UINT32_C(1) << gemm_config->log2_kr;
const uint32_t sr = UINT32_C(1) << gemm_config->log2_sr;
return kai_get_rhs_packed_stride_rhs_pack_kxn_qsi4cxp_qsu4cxs1s0(k, nr, kr,
sr);
}
void xnn_pack_kai_qs4_weights_and_biases(
uint32_t flags, const struct xnn_gemm_config* gemm_config,
size_t input_channels, size_t output_channels, size_t groups,
size_t k_stride, const void* accumulator_init, const void* weights,
xnn_init_scale_params_fn init_extra_data0_fn, const void* extra_data0,
size_t extra_data0_element_size,
xnn_init_scale_params_fn init_extra_data1_fn, const void* extra_data1,
size_t extra_data1_element_size, void* packed_weights_ptr,
const void* params) {
const uint32_t nr = gemm_config->nr;
const uint32_t kr = UINT32_C(1) << gemm_config->log2_kr;
const uint32_t sr = UINT32_C(1) << gemm_config->log2_sr;
const struct xnn_qs8_qc4w_packing_params* xnn_params = params;
if (flags & XNN_FLAG_TRANSPOSE_WEIGHTS) {
struct kai_rhs_pack_kxn_qsi4cxp_qsu4cxs1s0_params kai_params;
kai_params.lhs_zero_point = xnn_params->input_zero_point;
kai_params.rhs_zero_point = xnn_params->kernel_zero_point;
kai_run_rhs_pack_kxn_qsi4cxp_qsu4cxs1s0(
groups, output_channels, input_channels, nr, kr, sr,
weights,
extra_data0,
extra_data1,
packed_weights_ptr,
0,
&kai_params);
} else {
struct kai_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0_params kai_params;
kai_params.lhs_zero_point = xnn_params->input_zero_point;
kai_params.rhs_zero_point = xnn_params->kernel_zero_point;
kai_run_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0(
groups, output_channels, input_channels, nr, kr, sr,
weights,
extra_data0,
extra_data1,
packed_weights_ptr,
0,
&kai_params);
}
}
#endif
void xnn_pack_f32_qs8w_gemm_gio_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
size_t k_stride,
const int8_t* k,
const float* bias,
const float* scale,
void* packed_weights,
size_t extra_bytes,
const void* params)
{ … }
void xnn_pack_f32_conv_goki_w(
size_t g,
size_t nc,
size_t ks,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const float* k,
const float* b,
const void* scale,
float* packed_weights,
size_t extra_bytes,
const void* params)
{ … }
void xnn_pack_f16_conv_goki_w(
size_t g,
size_t nc,
size_t ks,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const uint16_t* k,
const uint16_t* b,
const void* scale,
uint16_t* packed_weights,
size_t extra_bytes,
const void* params)
{ … }
void xnn_pack_f32_to_f16_conv_goki_w(
size_t g,
size_t nc,
size_t ks,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const float* k,
const float* b,
const void* scale,
uint16_t* packed_weights,
size_t extra_bytes,
const void* params)
{ … }
void xnn_pack_qu8_conv_goki_w(
size_t g,
size_t nc,
size_t ks,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const uint8_t* k,
const int32_t* b,
const void* scale,
void* packed_weights,
size_t extra_bytes,
const struct xnn_qu8_packing_params* params)
{ … }
void xnn_pack_qs8_to_qu8_conv_goki_w(
size_t g,
size_t nc,
size_t ks,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const int8_t* k,
const int32_t* b,
const float* scale,
void* packed_weights,
size_t extra_bytes,
const struct xnn_qs8_packing_params* params)
{ … }
void xnn_pack_qs8_conv_goki_w(
size_t g,
size_t nc,
size_t ks,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const int8_t* k,
const int32_t* b,
const float* scale,
void* packed_weights,
size_t extra_bytes,
const struct xnn_qs8_packing_params* params)
{ … }
void xnn_pack_f32_conv_kgo_w(
size_t g,
size_t nc,
size_t ks,
size_t nr,
size_t kr,
size_t sr,
const float* k,
const float* b,
const void* scale,
float* packed_weights,
size_t extra_bytes,
const void* params)
{ … }
void xnn_pack_f16_conv_kgo_w(
size_t g,
size_t nc,
size_t ks,
size_t nr,
size_t kr,
size_t sr,
const uint16_t* k,
const uint16_t* b,
const void* scale,
uint16_t* packed_weights,
size_t extra_bytes,
const void* params)
{ … }
void xnn_pack_f32_to_f16_conv_kgo_w(
size_t g,
size_t nc,
size_t ks,
size_t nr,
size_t kr,
size_t sr,
const float* k,
const float* b,
const void* scale,
uint16_t* packed_weights,
size_t extra_bytes,
const void* params)
{ … }
void xnn_pack_qu8_conv_kgo_w(
size_t g,
size_t nc,
size_t ks,
size_t nr,
size_t kr,
size_t sr,
const uint8_t* k,
const int32_t* b,
const void* scale,
void* packed_weights,
size_t extra_bytes,
const struct xnn_qu8_packing_params* params)
{ … }
void pack_qs8_conv_kgo_w(
size_t g,
size_t nc,
size_t ks,
size_t nr,
size_t kr,
size_t sr,
const int8_t* k,
const int32_t* b,
const float* scale,
void* packed_weights,
size_t extra_bytes,
int32_t zero_point_offset,
const struct xnn_qs8_packing_params* params)
{ … }
void xnn_pack_qs8_conv_kgo_w(
size_t g,
size_t nc,
size_t ks,
size_t nr,
size_t kr,
size_t sr,
const int8_t* k,
const int32_t* b,
const float* scale,
void* packed_weights,
size_t extra_bytes,
const struct xnn_qs8_packing_params* params)
{ … }
void xnn_pack_qs8_to_qu8_conv_kgo_w(
size_t g,
size_t nc,
size_t ks,
size_t nr,
size_t kr,
size_t sr,
const int8_t* k,
const int32_t* b,
const float* scale,
void* packed_weights,
size_t extra_bytes,
const struct xnn_qs8_packing_params* params)
{ … }
void xnn_pack_f32_deconv_goki_w(
size_t g,
size_t nc,
size_t kh,
size_t kw,
size_t kc,
size_t sh,
size_t sw,
size_t nr,
size_t kr,
size_t sr,
const float* k,
const float* b,
const void* scale,
float* packed_weights,
size_t extra_bytes,
struct subconvolution_params* subconv_params,
const void* params)
{ … }
void xnn_pack_f16_deconv_goki_w(
size_t g,
size_t nc,
size_t kh,
size_t kw,
size_t kc,
size_t sh,
size_t sw,
size_t nr,
size_t kr,
size_t sr,
const uint16_t* k,
const uint16_t* b,
const void* scale,
uint16_t* packed_weights,
size_t extra_bytes,
struct subconvolution_params* subconv_params,
const void* params)
{ … }
void xnn_pack_f32_to_f16_deconv_goki_w(
size_t g,
size_t nc,
size_t kh,
size_t kw,
size_t kc,
size_t sh,
size_t sw,
size_t nr,
size_t kr,
size_t sr,
const float* k,
const float* b,
const void* scale,
uint16_t* packed_weights,
size_t extra_bytes,
struct subconvolution_params* subconv_params,
const void* params)
{ … }
void pack_qs8_deconv_goki_w(
size_t groups,
size_t nc,
size_t kh,
size_t kw,
size_t kc,
size_t sh,
size_t sw,
size_t nr,
size_t kr,
size_t sr,
const int8_t* k,
const int32_t* b,
const float* scale,
void* packed_weights,
size_t extra_bytes,
int32_t zero_point_offset,
struct subconvolution_params* subconv_params,
const struct xnn_qs8_packing_params* params)
{ … }
void xnn_pack_qs8_deconv_goki_w(
size_t g,
size_t nc,
size_t kh,
size_t kw,
size_t kc,
size_t sh,
size_t sw,
size_t nr,
size_t kr,
size_t sr,
const int8_t* k,
const int32_t* b,
const float* scale,
void* packed_weights,
size_t extra_bytes,
struct subconvolution_params* subconv_params,
const struct xnn_qs8_packing_params* params)
{ … }
void xnn_pack_qs8_to_qu8_deconv_goki_w(
size_t g,
size_t nc,
size_t kh,
size_t kw,
size_t kc,
size_t sh,
size_t sw,
size_t nr,
size_t kr,
size_t sr,
const int8_t* k,
const int32_t* b,
const float* scale,
void* packed_weights,
size_t extra_bytes,
struct subconvolution_params* subconv_params,
const struct xnn_qs8_packing_params* params)
{ … }
void xnn_pack_qu8_deconv_goki_w(
size_t g,
size_t nc,
size_t kh,
size_t kw,
size_t kc,
size_t sh,
size_t sw,
size_t nr,
size_t kr,
size_t sr,
const uint8_t* k,
const int32_t* b,
const void* scale,
void* packed_weights,
size_t extra_bytes,
struct subconvolution_params* subconv_params,
const struct xnn_qu8_packing_params* params)
{ … }
inline static void advance_x_y(size_t h, size_t* x, size_t* y) { … }
void xnn_pack_f32_dwconv_ghw_w(
size_t first_pass_tile,
size_t middle_pass_tile,
size_t last_pass_tile,
size_t h,
size_t w,
size_t c,
size_t channel_tile,
size_t channel_subtile,
size_t channel_round,
const float* k,
const float* b,
const void* scale,
float* packed_weights,
size_t per_tile_extra_bytes,
size_t per_subtile_extra_bytes,
const void* params)
{ … }
void xnn_pack_f16_dwconv_ghw_w(
size_t first_pass_tile,
size_t middle_pass_tile,
size_t last_pass_tile,
size_t h,
size_t w,
size_t c,
size_t channel_tile,
size_t channel_subtile,
size_t channel_round,
const uint16_t* k,
const uint16_t* b,
const void* scale,
uint16_t* packed_weights,
size_t per_tile_extra_bytes,
size_t per_subtile_extra_bytes,
const void* params)
{ … }
void xnn_pack_f32_to_f16_dwconv_ghw_w(
size_t first_pass_tile,
size_t middle_pass_tile,
size_t last_pass_tile,
size_t h,
size_t w,
size_t c,
size_t channel_tile,
size_t channel_subtile,
size_t channel_round,
const float* k,
const float* b,
const void* scale,
uint16_t* packed_weights,
size_t per_tile_extra_bytes,
size_t per_subtile_extra_bytes,
const void* params)
{ … }
void xnn_pack_qu8_dwconv_ghw_w(
size_t first_pass_tile,
size_t middle_pass_tile,
size_t last_pass_tile,
size_t h,
size_t w,
size_t c,
size_t channel_tile,
size_t channel_subtile,
size_t channel_round,
const uint8_t* k,
const int32_t* b,
const void* scale,
void* packed_weights,
size_t per_tile_extra_bytes,
size_t per_subtile_extra_bytes,
const struct xnn_qu8_packing_params* params)
{ … }
void xnn_pack_qs8_dwconv_ghw_w(
size_t first_pass_tile,
size_t middle_pass_tile,
size_t last_pass_tile,
size_t h,
size_t w,
size_t c,
size_t channel_tile,
size_t channel_subtile,
size_t channel_round,
const int8_t* k,
const int32_t* b,
const float* scale,
void* packed_weights,
size_t per_tile_extra_bytes,
size_t per_subtile_extra_bytes,
const struct xnn_qs8_packing_params* params)
{ … }
void xnn_pack_f32_dwconv_hwg_w(
size_t first_pass_tile,
size_t middle_pass_tile,
size_t last_pass_tile,
size_t h,
size_t w,
size_t c,
size_t channel_tile,
size_t channel_subtile,
size_t channel_round,
const float* k,
const float* b,
const void* scale,
float* packed_weights,
size_t per_tile_extra_bytes,
size_t per_subtile_extra_bytes,
const void* params)
{ … }
void xnn_pack_f16_dwconv_hwg_w(
size_t first_pass_tile,
size_t middle_pass_tile,
size_t last_pass_tile,
size_t h,
size_t w,
size_t c,
size_t channel_tile,
size_t channel_subtile,
size_t channel_round,
const uint16_t* k,
const uint16_t* b,
const void* scale,
uint16_t* packed_weights,
size_t per_tile_extra_bytes,
size_t per_subtile_extra_bytes,
const void* params)
{ … }
void xnn_pack_f32_to_f16_dwconv_hwg_w(
size_t first_pass_tile,
size_t middle_pass_tile,
size_t last_pass_tile,
size_t h,
size_t w,
size_t c,
size_t channel_tile,
size_t channel_subtile,
size_t channel_round,
const float* k,
const float* b,
const void* scale,
uint16_t* packed_weights,
size_t per_tile_extra_bytes,
size_t per_subtile_extra_bytes,
const void* params)
{ … }
void xnn_pack_qu8_dwconv_hwg_w(
size_t first_pass_tile,
size_t middle_pass_tile,
size_t last_pass_tile,
size_t h,
size_t w,
size_t c,
size_t channel_tile,
size_t channel_subtile,
size_t channel_round,
const uint8_t* k,
const int32_t* b,
const void* scale,
void* packed_weights,
size_t per_tile_extra_bytes,
size_t per_subtile_extra_bytes,
const struct xnn_qu8_packing_params* params)
{ … }
void xnn_pack_qs8_dwconv_hwg_w(
size_t first_pass_tile,
size_t middle_pass_tile,
size_t last_pass_tile,
size_t h,
size_t w,
size_t c,
size_t channel_tile,
size_t channel_subtile,
size_t channel_round,
const int8_t* k,
const int32_t* b,
const float* scale,
void* packed_weights,
size_t per_tile_extra_bytes,
size_t per_subtile_extra_bytes,
const struct xnn_qs8_packing_params* params)
{ … }
void xnn_pack_f32_gemminc_goi_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const float* k,
float* packed_weights,
const void* params)
{ … }
void xnn_pack_f16_gemminc_goi_w(
size_t g,
size_t nc,
size_t kc,
size_t nr,
size_t kr,
size_t sr,
const uint16_t* k,
uint16_t* packed_weights,
const void* params)
{ … }
void xnn_pack_f32_dconv_oki_w(
size_t nc,
size_t kc,
size_t nr,
size_t kh,
size_t kw,
const float* k,
const float* b,
float* packed_weights,
const void* params)
{ … }
void xnn_pack_f32_to_f16_dconv_oki_w(
size_t nc,
size_t kc,
size_t nr,
size_t kh,
size_t kw,
const float* k,
const float* b,
uint16_t* packed_weights,
const void* params)
{ … }
void xnn_pack_f16_dconv_oki_w(
size_t nc,
size_t kc,
size_t nr,
size_t kh,
size_t kw,
const uint16_t* k,
const uint16_t* b,
uint16_t* packed_weights,
const void* params)
{ … }
void xnn_pack_f32_chw_dwconv_ghw_w(
size_t kernel_size,
size_t groups,
const float* k,
const float* b,
float* packed_weights,
const void* params)
{ … }
void xnn_pack_f32_to_f16_chw_dwconv_ghw_w(
size_t kernel_size,
size_t groups,
const float* k,
const float* b,
uint16_t* packed_weights,
const void* params)
{ … }
void xnn_pack_f16_chw_dwconv_ghw_w(
size_t kernel_size,
size_t groups,
const uint16_t* k,
const uint16_t* b,
uint16_t* packed_weights,
const void* params)
{ … }
void xnn_pack_f32_chw_dwconv_hwg_w(
size_t kernel_size,
size_t groups,
const float* k,
const float* b,
float* packed_weights,
const void* params)
{ … }
void xnn_pack_f16_chw_dwconv_hwg_w(
size_t kernel_size,
size_t groups,
const uint16_t* k,
const uint16_t* b,
uint16_t* packed_weights,
const void* params)
{ … }
void xnn_pack_f32_to_f16_chw_dwconv_hwg_w(
size_t kernel_size,
size_t groups,
const float* k,
const float* b,
uint16_t* packed_weights,
const void* params)
{ … }
void xnn_pack_f32_vmulcaddc_w(
size_t c,
size_t cr,
const float* s,
const float* b,
float* packed_weights,
const void* params)
{ … }
void xnn_pack_f16_vmulcaddc_w(
size_t c,
size_t cr,
const uint16_t* s,
const uint16_t* b,
uint16_t* packed_weights,
const void* params)
{ … }
void xnn_pack_f32_to_f16_vmulcaddc_w(
size_t c,
size_t cr,
const float* s,
const float* b,
uint16_t* packed_weights,
const void* params)
{ … }
void xnn_pack_f32_prelu_w(
size_t input_channels,
size_t slope_channels,
const float* s,
float* packed_weights)
{ … }
void xnn_pack_f16_prelu_w(
size_t input_channels,
size_t slope_channels,
const uint16_t* s,
uint16_t* packed_weights)
{ … }
void xnn_pack_f32_to_f16_prelu_w(
size_t input_channels,
size_t slope_channels,
const float* s,
uint16_t* packed_weights)
{ … }
void xnn_analyze_f32_spmm_w(
size_t group_output_channels,
size_t group_input_channels,
const float* kernel,
struct xnn_spmm_packing_params* params)
{ … }
void xnn_analyze_f16_spmm_w(
size_t group_output_channels,
size_t group_input_channels,
const uint16_t* kernel,
struct xnn_spmm_packing_params* params)
{ … }
enum xnn_status xnn_pack_f32_spmm_w(
size_t group_output_channels,
size_t output_channels_block_size,
size_t group_input_channels,
const float* kernel,
const float* bias,
int32_t* input_channel_diffs,
uint32_t* output_channel_nonzeros,
float* nonzero_values,
size_t* first_input_channel)
{ … }
enum xnn_status xnn_pack_f32_to_f16_spmm_w(
size_t group_output_channels,
size_t output_channels_block_size,
size_t group_input_channels,
const float* kernel,
const float* bias,
int32_t* input_channel_diffs,
uint32_t* output_channel_nonzeros,
uint16_t* nonzero_values,
size_t* first_input_channel)
{ … }
enum xnn_status xnn_pack_f16_spmm_w(
size_t group_output_channels,
size_t output_channels_block_size,
size_t group_input_channels,
const uint16_t* kernel,
const uint16_t* bias,
int32_t* input_channel_diffs,
uint32_t* output_channel_nonzeros,
uint16_t* nonzero_values,
size_t* first_input_channel)
{ … }