#include <assert.h>
#include <immintrin.h>
#include "xnnpack/common.h"
#include "xnnpack/gemm.h"
#include "xnnpack/igemm.h"
#include "xnnpack/intrinsics-polyfill.h"
#include "xnnpack/reduce.h"
#include "xnnpack/vbinary.h"
void xnn_f16_gemm_minmax_ukernel_1x64__avx512fp16_broadcast(
size_t mr,
size_t nc,
size_t kc,
const void* restrict a,
size_t a_stride,
const void* restrict w,
void* restrict c,
size_t cm_stride,
size_t cn_stride,
const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_gemm_minmax_ukernel_7x64__avx512fp16_broadcast(
size_t mr,
size_t nc,
size_t kc,
const void* restrict a,
size_t a_stride,
const void* restrict w,
void* restrict c,
size_t cm_stride,
size_t cn_stride,
const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_igemm_minmax_ukernel_1x64__avx512fp16_broadcast(
size_t mr,
size_t nc,
size_t kc,
size_t ks,
const void** restrict a,
const void* restrict w,
void* restrict c,
size_t cm_stride,
size_t cn_stride,
size_t a_offset,
const void* zero,
const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_igemm_minmax_ukernel_7x64__avx512fp16_broadcast(
size_t mr,
size_t nc,
size_t kc,
size_t ks,
const void** restrict a,
const void* restrict w,
void* restrict c,
size_t cm_stride,
size_t cn_stride,
size_t a_offset,
const void* zero,
const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_rmax_ukernel__avx512fp16_u128_acc4(
size_t batch,
const void* input,
void* output,
const union xnn_f16_default_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_rminmax_ukernel__avx512fp16_u128_acc4(
size_t batch,
const void* input,
void* output,
const union xnn_f16_default_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_vadd_minmax_ukernel__avx512fp16_u64(
size_t batch,
const void* restrict input_a,
const void* restrict input_b,
void* restrict output,
const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_vaddc_minmax_ukernel__avx512fp16_u64(
size_t batch,
const void* restrict input_a,
const void* restrict input_b,
void* restrict output,
const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_vdiv_minmax_ukernel__avx512fp16_u64(
size_t batch,
const void* restrict input_a,
const void* restrict input_b,
void* restrict output,
const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_vdivc_minmax_ukernel__avx512fp16_u64(
size_t batch,
const void* restrict input_a,
const void* restrict input_b,
void* restrict output,
const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_vmax_ukernel__avx512fp16_u64(
size_t batch,
const void* restrict input_a,
const void* restrict input_b,
void* restrict output,
const union xnn_f16_default_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_vmaxc_ukernel__avx512fp16_u64(
size_t batch,
const void* restrict input_a,
const void* restrict input_b,
void* restrict output,
const union xnn_f16_default_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_vmin_ukernel__avx512fp16_u64(
size_t batch,
const void* restrict input_a,
const void* restrict input_b,
void* restrict output,
const union xnn_f16_default_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_vminc_ukernel__avx512fp16_u64(
size_t batch,
const void* restrict input_a,
const void* restrict input_b,
void* restrict output,
const union xnn_f16_default_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_vmul_minmax_ukernel__avx512fp16_u64(
size_t batch,
const void* restrict input_a,
const void* restrict input_b,
void* restrict output,
const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_vmulc_minmax_ukernel__avx512fp16_u64(
size_t batch,
const void* restrict input_a,
const void* restrict input_b,
void* restrict output,
const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_vrdivc_minmax_ukernel__avx512fp16_u64(
size_t batch,
const void* restrict input_a,
const void* restrict input_b,
void* restrict output,
const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_vrsubc_minmax_ukernel__avx512fp16_u64(
size_t batch,
const void* restrict input_a,
const void* restrict input_b,
void* restrict output,
const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_vsqrdiff_ukernel__avx512fp16_u64(
size_t batch,
const void* restrict input_a,
const void* restrict input_b,
void* restrict output,
const union xnn_f16_default_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_vsqrdiffc_ukernel__avx512fp16_u64(
size_t batch,
const void* restrict input_a,
const void* restrict input_b,
void* restrict output,
const union xnn_f16_default_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_vsub_minmax_ukernel__avx512fp16_u64(
size_t batch,
const void* restrict input_a,
const void* restrict input_b,
void* restrict output,
const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }
void xnn_f16_vsubc_minmax_ukernel__avx512fp16_u64(
size_t batch,
const void* restrict input_a,
const void* restrict input_b,
void* restrict output,
const union xnn_f16_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
{ … }