#include <assert.h>
#include <immintrin.h>
#include "xnnpack/common.h"
#include "xnnpack/dwconv.h"
#include "xnnpack/gemm.h"
#include "xnnpack/igemm.h"
#include "xnnpack/intrinsics-polyfill.h"
#include "xnnpack/lut.h"
#include "xnnpack/math.h"
#include "xnnpack/prefetch.h"
#include "xnnpack/reduce.h"
#include "xnnpack/unaligned.h"
#include "xnnpack/vbinary.h"
#include "xnnpack/vcvt.h"
void xnn_f16_f32_vcvt_ukernel__avx512skx_u16(
size_t batch,
const void* input,
float* output,
const union xnn_f16_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_f32acc_rdsum_ukernel_7p7x__avx512skx_c64(
size_t rows,
size_t channels,
const void* input,
size_t input_stride,
const void* zero,
float* output,
const union xnn_f16_f32acc_scale_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_f32acc_rsum_ukernel__avx512skx_u64_acc4(
size_t batch,
const void* input,
float* output,
const union xnn_f16_f32acc_scale_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_rmax_ukernel__avx512skx_u64_acc4(
size_t batch,
const void* input,
void* output,
const union xnn_f16_default_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_rminmax_ukernel__avx512skx_u64_acc4(
size_t batch,
const void* input,
void* output,
const union xnn_f16_default_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f32_f16_vcvt_ukernel__avx512skx_u16(
size_t batch,
const float* input,
void* output,
const union xnn_f32_f16_cvt_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f32_qc4w_gemm_minmax_ukernel_1x32__avx512skx_broadcast(
size_t mr,
size_t nc,
size_t kc,
const float* restrict a,
size_t a_stride,
const void* restrict w,
float* restrict c,
size_t cm_stride,
size_t cn_stride,
const union xnn_f32_qc4w_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f32_qc4w_gemm_minmax_ukernel_7x32__avx512skx_broadcast(
size_t mr,
size_t nc,
size_t kc,
const float* restrict a,
size_t a_stride,
const void* restrict w,
float* restrict c,
size_t cm_stride,
size_t cn_stride,
const union xnn_f32_qc4w_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f32_qc8w_gemm_minmax_ukernel_1x32__avx512skx_broadcast(
size_t mr,
size_t nc,
size_t kc,
const float* restrict a,
size_t a_stride,
const void* restrict w,
float* restrict c,
size_t cm_stride,
size_t cn_stride,
const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f32_qc8w_gemm_minmax_ukernel_7x32__avx512skx_broadcast(
size_t mr,
size_t nc,
size_t kc,
const float* restrict a,
size_t a_stride,
const void* restrict w,
float* restrict c,
size_t cm_stride,
size_t cn_stride,
const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f32_qs8_vcvt_ukernel__avx512skx_u128(
size_t batch,
const float* input,
int8_t* output,
const union xnn_f32_qs8_cvt_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f32_qu8_vcvt_ukernel__avx512skx_u128(
size_t batch,
const float* input,
uint8_t* output,
const union xnn_f32_qu8_cvt_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x16c8__avx512skx_madd_prfm(
size_t mr,
size_t nc,
size_t kc,
const int8_t* restrict a,
size_t a_stride,
const void* restrict w,
float* restrict c,
size_t cm_stride,
size_t cn_stride,
const union xnn_f32_qc4w_minmax_params params[restrict XNN_MIN_ELEMENTS(1)],
const struct xnn_qd8_quantization_params quantization_params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x16c8__avx512skx_madd_prfm(
size_t mr,
size_t nc,
size_t kc,
const int8_t* restrict a,
size_t a_stride,
const void* restrict w,
float* restrict c,
size_t cm_stride,
size_t cn_stride,
const union xnn_f32_qc4w_minmax_params params[restrict XNN_MIN_ELEMENTS(1)],
const struct xnn_qd8_quantization_params quantization_params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qd8_f32_qc8w_gemm_minmax_ukernel_1x16c8__avx512skx_prfm(
size_t mr,
size_t nc,
size_t kc,
const int8_t* restrict a,
size_t a_stride,
const void* restrict w,
float* restrict c,
size_t cm_stride,
size_t cn_stride,
const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)],
const struct xnn_qd8_quantization_params quantization_params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qd8_f32_qc8w_gemm_minmax_ukernel_8x16c8__avx512skx_prfm(
size_t mr,
size_t nc,
size_t kc,
const int8_t* restrict a,
size_t a_stride,
const void* restrict w,
float* restrict c,
size_t cm_stride,
size_t cn_stride,
const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)],
const struct xnn_qd8_quantization_params quantization_params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qd8_f32_qc8w_igemm_minmax_ukernel_1x16c8__avx512skx_prfm(
size_t mr,
size_t nc,
size_t kc,
size_t ks,
const int8_t** restrict a,
const void* restrict w,
float* restrict c,
size_t cm_stride,
size_t cn_stride,
size_t a_offset,
const int8_t* zero,
const int8_t* zero_data,
const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)],
const struct xnn_qd8_quantization_params quantization_params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qd8_f32_qc8w_igemm_minmax_ukernel_8x16c8__avx512skx_prfm(
size_t mr,
size_t nc,
size_t kc,
size_t ks,
const int8_t** restrict a,
const void* restrict w,
float* restrict c,
size_t cm_stride,
size_t cn_stride,
size_t a_offset,
const int8_t* zero,
const int8_t* zero_data,
const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)],
const struct xnn_qd8_quantization_params quantization_params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qs8_dwconv_minmax_fp32_ukernel_25p32c__avx512skx_mul32(
size_t channels,
size_t output_width,
const int8_t** input,
const void* weights,
int8_t* output,
intptr_t input_stride,
size_t output_increment,
size_t input_offset,
const int8_t* zero,
const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qs8_dwconv_minmax_fp32_ukernel_9p32c__avx512skx_mul32(
size_t channels,
size_t output_width,
const int8_t** input,
const void* weights,
int8_t* output,
intptr_t input_stride,
size_t output_increment,
size_t input_offset,
const int8_t* zero,
const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qs8_f32_vcvt_ukernel__avx512skx_u32(
size_t batch,
const int8_t* input,
float* output,
const union xnn_qs8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qs8_qc8w_dwconv_minmax_fp32_ukernel_25p32c__avx512skx_mul32(
size_t channels,
size_t output_width,
const int8_t** input,
const void* weights,
int8_t* output,
intptr_t input_stride,
size_t output_increment,
size_t input_offset,
const int8_t* zero,
const union xnn_qs8_qc8w_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qs8_qc8w_dwconv_minmax_fp32_ukernel_3p32c__avx512skx_mul32(
size_t channels,
size_t output_width,
const int8_t** input,
const void* weights,
int8_t* output,
intptr_t input_stride,
size_t output_increment,
size_t input_offset,
const int8_t* zero,
const union xnn_qs8_qc8w_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qs8_qc8w_dwconv_minmax_fp32_ukernel_9p32c__avx512skx_mul32(
size_t channels,
size_t output_width,
const int8_t** input,
const void* weights,
int8_t* output,
intptr_t input_stride,
size_t output_increment,
size_t input_offset,
const int8_t* zero,
const union xnn_qs8_qc8w_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_1x16c8__avx512skx_prfm(
size_t mr,
size_t nc,
size_t kc,
const int8_t* restrict a,
size_t a_stride,
const void* restrict w,
int8_t* restrict c,
size_t cm_stride,
size_t cn_stride,
const union xnn_qs8_qc8w_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_7x16c8__avx512skx_prfm(
size_t mr,
size_t nc,
size_t kc,
const int8_t* restrict a,
size_t a_stride,
const void* restrict w,
int8_t* restrict c,
size_t cm_stride,
size_t cn_stride,
const union xnn_qs8_qc8w_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_1x16c8__avx512skx_prfm(
size_t mr,
size_t nc,
size_t kc,
size_t ks,
const int8_t** restrict a,
const void* restrict w,
int8_t* restrict c,
size_t cm_stride,
size_t cn_stride,
size_t a_offset,
const int8_t* zero,
const union xnn_qs8_qc8w_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_7x16c8__avx512skx_prfm(
size_t mr,
size_t nc,
size_t kc,
size_t ks,
const int8_t** restrict a,
const void* restrict w,
int8_t* restrict c,
size_t cm_stride,
size_t cn_stride,
size_t a_offset,
const int8_t* zero,
const union xnn_qs8_qc8w_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_u16(
size_t batch,
const int8_t* input_a,
const int8_t* input_b,
int8_t* output,
const union xnn_qs8_add_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_u16(
size_t batch,
const int8_t* input_a,
const int8_t* input_b,
int8_t* output,
const union xnn_qs8_add_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_qu8_dwconv_minmax_fp32_ukernel_25p32c__avx512skx_mul32(
size_t channels,
size_t output_width,
const uint8_t** input,
const void* weights,
uint8_t* output,
intptr_t input_stride,
size_t output_increment,
size_t input_offset,
const uint8_t* zero,
const union xnn_qu8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qu8_dwconv_minmax_fp32_ukernel_9p32c__avx512skx_mul32(
size_t channels,
size_t output_width,
const uint8_t** input,
const void* weights,
uint8_t* output,
intptr_t input_stride,
size_t output_increment,
size_t input_offset,
const uint8_t* zero,
const union xnn_qu8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qu8_f32_vcvt_ukernel__avx512skx_u32(
size_t batch,
const uint8_t* input,
float* output,
const union xnn_qu8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qu8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx_prfm(
size_t mr,
size_t nc,
size_t kc,
const uint8_t* restrict a,
size_t a_stride,
const void* restrict w,
uint8_t* restrict c,
size_t cm_stride,
size_t cn_stride,
const union xnn_qu8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qu8_gemm_minmax_fp32_ukernel_7x16c8__avx512skx_prfm(
size_t mr,
size_t nc,
size_t kc,
const uint8_t* restrict a,
size_t a_stride,
const void* restrict w,
uint8_t* restrict c,
size_t cm_stride,
size_t cn_stride,
const union xnn_qu8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx_prfm(
size_t mr,
size_t nc,
size_t kc,
size_t ks,
const uint8_t** restrict a,
const void* restrict w,
uint8_t* restrict c,
size_t cm_stride,
size_t cn_stride,
size_t a_offset,
const uint8_t* zero,
const union xnn_qu8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qu8_igemm_minmax_fp32_ukernel_7x16c8__avx512skx_prfm(
size_t mr,
size_t nc,
size_t kc,
size_t ks,
const uint8_t** restrict a,
const void* restrict w,
uint8_t* restrict c,
size_t cm_stride,
size_t cn_stride,
size_t a_offset,
const uint8_t* zero,
const union xnn_qu8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS
{ … }
void xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_u16(
size_t batch,
const uint8_t* input_a,
const uint8_t* input_b,
uint8_t* output,
const union xnn_qu8_add_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_u16(
size_t batch,
const uint8_t* input_a,
const uint8_t* input_b,
uint8_t* output,
const union xnn_qu8_add_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_x8_lut_ukernel__avx512skx_vpshufb_u64(
size_t batch,
const uint8_t* input,
uint8_t* output,
const uint8_t table[restrict XNN_MIN_ELEMENTS(256)])
{ … }