#pragma once
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include "pthreadpool.h"
#ifdef __cplusplus
extern "C" {
#endif
#if XNN_ARCH_HEXAGON
#define XNN_EXTRA_BYTES …
#else
#define XNN_EXTRA_BYTES …
#endif
#define XNN_MAX_TENSOR_DIMS …
#define XNN_FLAG_HINT_SPARSE_INFERENCE …
#define XNN_FLAG_HINT_FP16_INFERENCE …
#define XNN_FLAG_FORCE_FP16_INFERENCE …
#define XNN_FLAG_BASIC_PROFILING …
#define XNN_FLAG_JIT …
#define XNN_FLAG_DEPTHWISE_CONVOLUTION …
#define XNN_FLAG_TRANSPOSE_WEIGHTS …
#define XNN_FLAG_INPUT_NHWC …
#define XNN_FLAG_TENSORFLOW_SAME_PADDING …
#define XNN_FLAG_TRANSPOSE_B …
#define XNN_FLAG_TRANSPOSE_A …
#define XNN_FLAG_TENSORFLOW_RESHAPE_2D …
#define XNN_FLAG_TENSORFLOW_LEGACY_MODE …
#define XNN_FLAG_FP32_STATIC_WEIGHTS …
#define XNN_FLAG_ALIGN_CORNERS …
#define XNN_FLAG_YIELD_WORKERS …
#define XNN_FLAG_TRANSIENT_INDIRECTION_BUFFER …
#define XNN_FLAG_KEEP_DIMS …
#define XNN_EXTRA_QUANTIZATION_PARAMS …
#define XNN_MIN_BLOCKSIZE …
struct xnn_dynamic_quantization_params { … };
enum xnn_status { … };
struct xnn_allocator { … };
enum xnn_status xnn_initialize(const struct xnn_allocator* allocator);
enum xnn_status xnn_deinitialize(void);
const void* xnn_experimental_get_build_identifier_data();
size_t xnn_experimental_get_build_identifier_size();
bool xnn_experimental_check_build_identifier(const void* data, size_t size);
xnn_subgraph_t;
enum xnn_status xnn_create_subgraph(
uint32_t external_value_ids,
uint32_t flags,
xnn_subgraph_t* subgraph_out);
enum xnn_status xnn_delete_subgraph(
xnn_subgraph_t subgraph);
#define XNN_VALUE_FLAG_EXTERNAL_INPUT …
#define XNN_VALUE_FLAG_EXTERNAL_OUTPUT …
#define XNN_VALUE_FLAG_PERSISTENT …
#define XNN_INVALID_VALUE_ID …
enum xnn_datatype { … };
enum xnn_status xnn_define_tensor_value(
xnn_subgraph_t subgraph,
enum xnn_datatype datatype,
size_t num_dims,
const size_t* dims,
const void* data,
uint32_t external_id,
uint32_t flags,
uint32_t* id_out);
enum xnn_status xnn_define_quantized_tensor_value(
xnn_subgraph_t subgraph,
enum xnn_datatype datatype,
int32_t zero_point,
float scale,
size_t num_dims,
const size_t* dims,
const void* data,
uint32_t external_id,
uint32_t flags,
uint32_t* id_out);
enum xnn_status xnn_define_channelwise_quantized_tensor_value(
xnn_subgraph_t subgraph,
enum xnn_datatype datatype,
const float* scale,
size_t num_dims,
size_t channel_dim,
const size_t* dims,
const void* data,
uint32_t external_id,
uint32_t flags,
uint32_t* id_out);
enum xnn_status xnn_validate_quantized_tensor(
enum xnn_datatype datatype,
int32_t zero_point,
float scale,
size_t num_dims,
const size_t* dims);
enum xnn_status xnn_validate_channelwise_quantized_tensor(
enum xnn_datatype datatype,
int32_t zero_point,
const float* scale,
size_t num_dims,
size_t channel_dim,
const size_t* dims);
enum xnn_status xnn_define_channelwise_quantized_tensor_value_v2(
xnn_subgraph_t subgraph,
enum xnn_datatype datatype,
int32_t zero_point,
const float* scale,
size_t num_dims,
size_t channel_dim,
const size_t* dims,
const void* data,
uint32_t external_id,
uint32_t flags,
uint32_t* id_out);
enum xnn_status xnn_define_blockwise_quantized_tensor_value(
xnn_subgraph_t subgraph,
enum xnn_datatype datatype,
int32_t zero_point,
const uint16_t* scale,
size_t num_dims,
size_t channel_dim,
size_t block_size,
const size_t* dims,
const void* data,
uint32_t external_id,
uint32_t flags,
uint32_t* id_out);
enum xnn_status xnn_define_dynamically_quantized_tensor_value(
xnn_subgraph_t subgraph,
enum xnn_datatype datatype,
size_t num_dims,
size_t num_nonbatch_dims,
const size_t* dims,
uint32_t external_id,
uint32_t flags,
uint32_t* id_out);
enum xnn_status xnn_define_convert(
xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_convolution_2d(
xnn_subgraph_t subgraph,
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t kernel_height,
uint32_t kernel_width,
uint32_t subsampling_height,
uint32_t subsampling_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint32_t groups,
size_t group_input_channels,
size_t group_output_channels,
float output_min,
float output_max,
uint32_t input_id,
uint32_t filter_id,
uint32_t bias_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_deconvolution_2d(
xnn_subgraph_t subgraph,
uint32_t padding_top,
uint32_t padding_right,
uint32_t padding_bottom,
uint32_t padding_left,
uint32_t adjustment_height,
uint32_t adjustment_width,
uint32_t kernel_height,
uint32_t kernel_width,
uint32_t upsampling_height,
uint32_t upsampling_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint32_t groups,
size_t group_input_channels,
size_t group_output_channels,
float output_min,
float output_max,
uint32_t input_id,
uint32_t filter_id,
uint32_t bias_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_depthwise_convolution_2d(
xnn_subgraph_t subgraph,
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t kernel_height,
uint32_t kernel_width,
uint32_t subsampling_height,
uint32_t subsampling_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint32_t depth_multiplier,
size_t input_channels,
float output_min,
float output_max,
uint32_t input_id,
uint32_t filter_id,
uint32_t bias_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_depth_to_space_2d(
xnn_subgraph_t subgraph,
uint32_t block_size,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_depth_to_space(
xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t output_id,
uint32_t block_size,
uint32_t flags);
enum xnn_status xnn_define_global_average_pooling_1d(
xnn_subgraph_t subgraph,
float output_min,
float output_max,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_global_average_pooling_2d(
xnn_subgraph_t subgraph,
float output_min,
float output_max,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_global_sum_pooling_1d(
xnn_subgraph_t subgraph,
float output_min,
float output_max,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_global_sum_pooling_2d(
xnn_subgraph_t subgraph,
float output_min,
float output_max,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_average_pooling_2d(
xnn_subgraph_t subgraph,
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t pooling_height,
uint32_t pooling_width,
uint32_t stride_height,
uint32_t stride_width,
float output_min,
float output_max,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_fully_connected(
xnn_subgraph_t subgraph,
float output_min,
float output_max,
uint32_t input_id,
uint32_t filter_id,
uint32_t bias_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_fully_connected_sparse(
xnn_subgraph_t subgraph,
float output_min,
float output_max,
uint32_t input_id,
uint32_t filter_id,
uint32_t bias_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_max_pooling_2d(
xnn_subgraph_t subgraph,
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t pooling_height,
uint32_t pooling_width,
uint32_t stride_height,
uint32_t stride_width,
uint32_t dilation_height,
uint32_t dilation_width,
float output_min,
float output_max,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_argmax_pooling_2d(
xnn_subgraph_t subgraph,
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t pooling_height,
uint32_t pooling_width,
uint32_t input_id,
uint32_t output_value_id,
uint32_t output_index_id,
uint32_t flags);
enum xnn_status xnn_define_unpooling_2d(
xnn_subgraph_t subgraph,
uint32_t padding_top,
uint32_t padding_right,
uint32_t padding_bottom,
uint32_t padding_left,
uint32_t pooling_height,
uint32_t pooling_width,
uint32_t input_value_id,
uint32_t input_index_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_add2(
xnn_subgraph_t subgraph,
float output_min,
float output_max,
uint32_t input1_id,
uint32_t input2_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_multiply2(
xnn_subgraph_t subgraph,
float output_min,
float output_max,
uint32_t input1_id,
uint32_t input2_id,
uint32_t output_id,
uint32_t flags);
enum xnn_attention_logits_cap_type { … };
struct xnn_attention_logits_cap_tanh_params { … };
enum xnn_status xnn_define_scaled_dot_product_attention(
xnn_subgraph_t subgraph,
enum xnn_attention_logits_cap_type cap_type,
const void* cap_params,
uint32_t query_id,
uint32_t key_id,
uint32_t value_id,
uint32_t scale_id,
uint32_t mask_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_subtract(
xnn_subgraph_t subgraph,
float output_min,
float output_max,
uint32_t input1_id,
uint32_t input2_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_divide(
xnn_subgraph_t subgraph,
float output_min,
float output_max,
uint32_t input1_id,
uint32_t input2_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_maximum2(
xnn_subgraph_t subgraph,
uint32_t input1_id,
uint32_t input2_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_minimum2(
xnn_subgraph_t subgraph,
uint32_t input1_id,
uint32_t input2_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_squared_difference(
xnn_subgraph_t subgraph,
uint32_t input1_id,
uint32_t input2_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_static_constant_pad(
xnn_subgraph_t subgraph,
const size_t* pre_paddings,
const size_t* post_paddings,
float padding_value,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_static_mean(
xnn_subgraph_t subgraph,
size_t num_reduction_axes,
const size_t* reduction_axes,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_concatenate2(
xnn_subgraph_t subgraph,
int32_t axis,
uint32_t input1_id,
uint32_t input2_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_concatenate3(
xnn_subgraph_t subgraph,
int32_t axis,
uint32_t input1_id,
uint32_t input2_id,
uint32_t input3_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_concatenate4(
xnn_subgraph_t subgraph,
int32_t axis,
uint32_t input1_id,
uint32_t input2_id,
uint32_t input3_id,
uint32_t input4_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_concatenate5(
xnn_subgraph_t subgraph,
int32_t axis,
uint32_t input1_id,
uint32_t input2_id,
uint32_t input3_id,
uint32_t input4_id,
uint32_t input5_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_copysign(
xnn_subgraph_t subgraph,
uint32_t input1_id,
uint32_t input2_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_copy(
xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_even_split2(
xnn_subgraph_t subgraph,
int32_t split_dim,
uint32_t input_id,
uint32_t output1_id,
uint32_t output2_id,
uint32_t flags);
enum xnn_status xnn_define_even_split3(
xnn_subgraph_t subgraph,
int32_t split_dim,
uint32_t input_id,
uint32_t output1_id,
uint32_t output2_id,
uint32_t output3_id,
uint32_t flags);
enum xnn_status xnn_define_even_split4(
xnn_subgraph_t subgraph,
int32_t split_dim,
uint32_t input_id,
uint32_t output1_id,
uint32_t output2_id,
uint32_t output3_id,
uint32_t output4_id,
uint32_t flags);
enum xnn_status xnn_define_static_reshape(
xnn_subgraph_t subgraph,
size_t num_dims,
const size_t* new_shape,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_reshape_2d(xnn_subgraph_t subgraph,
uint32_t input_id, uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_static_resize_bilinear_2d(
xnn_subgraph_t subgraph,
size_t new_height,
size_t new_width,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_prelu(
xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t slope_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_rope(
xnn_subgraph_t subgraph,
size_t max_sequence_size,
uint32_t input_id,
uint32_t weights_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_abs(
xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_bankers_rounding(
xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_batch_matrix_multiply(
xnn_subgraph_t subgraph,
uint32_t input1_id,
uint32_t input2_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_ceiling(
xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_clamp(
xnn_subgraph_t subgraph,
float output_min,
float output_max,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_elu(
xnn_subgraph_t subgraph,
float alpha,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_exp(
xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_floor(
xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_gelu(
xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_hardswish(
xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_leaky_relu(
xnn_subgraph_t subgraph,
float negative_slope,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_log(
xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_negate(
xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_sigmoid(
xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_softmax(
xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_space_to_depth_2d(
xnn_subgraph_t subgraph,
uint32_t block_size,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_square(
xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_square_root(
xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_reciprocal_square_root(xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_static_slice(
xnn_subgraph_t subgraph,
size_t num_dims,
const size_t* offsets,
const size_t* sizes,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_static_transpose(
xnn_subgraph_t subgraph,
size_t num_dims,
const size_t* perm,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
enum xnn_status xnn_define_tanh(
xnn_subgraph_t subgraph,
uint32_t input_id,
uint32_t output_id,
uint32_t flags);
xnn_code_cache_t;
enum xnn_weights_cache_finalization_kind { … };
struct xnn_weights_cache_look_up_key { … };
struct xnn_weights_cache_provider { … };
xnn_weights_cache_t;
enum xnn_status xnn_create_weights_cache_with_size(size_t size, xnn_weights_cache_t* weights_cache_out);
enum xnn_status xnn_create_weights_cache(xnn_weights_cache_t* weights_cache_out);
enum xnn_status xnn_finalize_weights_cache(
xnn_weights_cache_t weights_cache,
enum xnn_weights_cache_finalization_kind finalization_kind);
bool xnn_weights_cache_is_finalized(xnn_weights_cache_t cache);
enum xnn_status xnn_delete_weights_cache(xnn_weights_cache_t weights_cache);
xnn_workspace_t;
enum xnn_status xnn_create_workspace(xnn_workspace_t* workspace_out);
enum xnn_status xnn_release_workspace(xnn_workspace_t workspace);
xnn_runtime_t;
enum xnn_profile_info { … };
enum xnn_status xnn_get_runtime_profiling_info(xnn_runtime_t runtime,
enum xnn_profile_info param_name,
size_t param_value_size,
void* param_value,
size_t* param_value_size_ret);
enum xnn_status xnn_create_runtime_v4(
xnn_subgraph_t subgraph,
xnn_weights_cache_t weights_cache,
xnn_workspace_t workspace,
pthreadpool_t threadpool,
uint32_t flags,
xnn_runtime_t* runtime_out);
enum xnn_status xnn_create_runtime_v3(
xnn_subgraph_t subgraph,
xnn_weights_cache_t weights_cache,
pthreadpool_t threadpool,
uint32_t flags,
xnn_runtime_t* runtime_out);
enum xnn_status xnn_create_runtime_v2(
xnn_subgraph_t subgraph,
pthreadpool_t threadpool,
uint32_t flags,
xnn_runtime_t* runtime_out);
enum xnn_status xnn_create_runtime(
xnn_subgraph_t subgraph,
xnn_runtime_t* runtime_out);
struct xnn_external_value { … };
enum xnn_status xnn_reshape_external_value(
xnn_runtime_t runtime,
uint32_t external_id,
size_t num_dims,
const size_t* dims);
enum xnn_status xnn_get_external_value_shape(
xnn_runtime_t runtime,
uint32_t external_id,
size_t* num_dims,
size_t* dims);
enum xnn_status xnn_reshape_runtime(
xnn_runtime_t runtime);
enum xnn_status xnn_setup_runtime(
xnn_runtime_t runtime,
size_t num_external_values,
const struct xnn_external_value* external_values);
enum xnn_status xnn_setup_runtime_v2(
xnn_runtime_t runtime,
size_t num_external_values,
const struct xnn_external_value* external_values);
enum xnn_status xnn_invoke_runtime(
xnn_runtime_t runtime);
enum xnn_status xnn_delete_runtime(
xnn_runtime_t runtime);
xnn_operator_t;
enum xnn_status xnn_run_operator(
xnn_operator_t op,
pthreadpool_t threadpool);
enum xnn_status xnn_delete_operator(
xnn_operator_t op);
enum xnn_status xnn_create_abs_nc_f16(
uint32_t flags,
xnn_operator_t* abs_op_out);
enum xnn_status xnn_reshape_abs_nc_f16(
xnn_operator_t abs_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_abs_nc_f16(
xnn_operator_t abs_op,
const void* input,
void* output);
enum xnn_status xnn_create_abs_nc_f32(
uint32_t flags,
xnn_operator_t* abs_op_out);
enum xnn_status xnn_reshape_abs_nc_f32(
xnn_operator_t abs_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_abs_nc_f32(
xnn_operator_t abs_op,
const float* input,
float* output);
enum xnn_status xnn_run_abs_nc_f32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const float* input,
float* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_add_nd_f16(
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* add_op_out);
enum xnn_status xnn_reshape_add_nd_f16(
xnn_operator_t add_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_add_nd_f16(
xnn_operator_t add_op,
const void* input1,
const void* input2,
void* output);
enum xnn_status xnn_create_add_nd_f32(
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* add_op_out);
enum xnn_status xnn_reshape_add_nd_f32(
xnn_operator_t add_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_add_nd_f32(
xnn_operator_t add_op,
const float* input1,
const float* input2,
float* output);
enum xnn_status xnn_run_add_nd_f32(
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
const float* input1,
const float* input2,
float* output,
float output_min,
float output_max,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_multiply_nd_s32(
uint32_t flags,
xnn_operator_t* multiply_op_out);
enum xnn_status xnn_reshape_multiply_nd_s32(
xnn_operator_t mul_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_multiply_nd_s32(
xnn_operator_t mul_op,
const int32_t* input1,
const int32_t* input2,
int32_t* output);
enum xnn_status xnn_create_add_nd_qs8(
int8_t input1_zero_point,
float input1_scale,
int8_t input2_zero_point,
float input2_scale,
int8_t output_zero_point,
float output_scale,
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_operator_t* add_op_out);
enum xnn_status xnn_reshape_add_nd_qs8(
xnn_operator_t add_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_add_nd_qs8(
xnn_operator_t add_op,
const int8_t* input1,
const int8_t* input2,
int8_t* output);
enum xnn_status xnn_run_add_nd_qs8(
size_t num_input1_dims,
const size_t* input1_shape,
int8_t input1_zero_point,
float input1_scale,
size_t num_input2_dims,
const size_t* input2_shape,
int8_t input2_zero_point,
float input2_scale,
const int8_t* input1,
const int8_t* input2,
int8_t* output,
int8_t output_zero_point,
float output_scale,
int8_t output_min,
int8_t output_max,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_add_nd_qu8(
uint8_t input1_zero_point,
float input1_scale,
uint8_t input2_zero_point,
float input2_scale,
uint8_t output_zero_point,
float output_scale,
uint8_t output_min,
uint8_t output_max,
uint32_t flags,
xnn_operator_t* add_op_out);
enum xnn_status xnn_reshape_add_nd_qu8(
xnn_operator_t add_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_add_nd_qu8(
xnn_operator_t add_op,
const uint8_t* input1,
const uint8_t* input2,
uint8_t* output);
enum xnn_status xnn_run_add_nd_qu8(
size_t num_input1_dims,
const size_t* input1_shape,
uint8_t input1_zero_point,
float input1_scale,
size_t num_input2_dims,
const size_t* input2_shape,
uint8_t input2_zero_point,
float input2_scale,
const uint8_t* input1,
const uint8_t* input2,
uint8_t* output,
uint8_t output_zero_point,
float output_scale,
uint8_t output_min,
uint8_t output_max,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_argmax_pooling2d_nhwc_f32(
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t pooling_height,
uint32_t pooling_width,
uint32_t flags,
xnn_operator_t* argmax_pooling_op_out);
enum xnn_status xnn_reshape_argmax_pooling2d_nhwc_f32(
xnn_operator_t argmax_pooling_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
size_t* workspace_size,
size_t* workspace_alignment,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_argmax_pooling2d_nhwc_f32(
xnn_operator_t argmax_pooling_op,
void* workspace,
const float* input,
float* output,
uint32_t* index);
enum xnn_status xnn_create_average_pooling2d_nhwc_f16(
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t pooling_height,
uint32_t pooling_width,
uint32_t stride_height,
uint32_t stride_width,
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* average_pooling_op_out);
enum xnn_status xnn_reshape_average_pooling2d_nhwc_f16(
xnn_operator_t average_pooling_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
size_t* workspace_size,
size_t* workspace_alignment,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_average_pooling2d_nhwc_f16(
xnn_operator_t average_pooling_op,
void* workspace,
const void* input,
void* output);
enum xnn_status xnn_create_average_pooling2d_nhwc_f32(
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t pooling_height,
uint32_t pooling_width,
uint32_t stride_height,
uint32_t stride_width,
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* average_pooling_op_out);
enum xnn_status xnn_reshape_average_pooling2d_nhwc_f32(
xnn_operator_t average_pooling_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
size_t* workspace_size,
size_t* workspace_alignment,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_average_pooling2d_nhwc_f32(
xnn_operator_t average_pooling_op,
void* workspace,
const float* input,
float* output);
enum xnn_status xnn_create_average_pooling2d_nhwc_qu8(
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t pooling_height,
uint32_t pooling_width,
uint32_t stride_height,
uint32_t stride_width,
uint8_t input_zero_point,
float input_scale,
uint8_t output_zero_point,
float output_scale,
uint8_t output_min,
uint8_t output_max,
uint32_t flags,
xnn_operator_t* average_pooling_op_out);
enum xnn_status xnn_reshape_average_pooling2d_nhwc_qu8(
xnn_operator_t average_pooling_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
size_t* workspace_size,
size_t* workspace_alignment,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_average_pooling2d_nhwc_qu8(
xnn_operator_t average_pooling_op,
void* workspace,
const uint8_t* input,
uint8_t* output);
enum xnn_status xnn_create_bankers_rounding_nc_f16(
uint32_t flags,
xnn_operator_t* rounding_op_out);
enum xnn_status xnn_reshape_bankers_rounding_nc_f16(
xnn_operator_t rounding_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_bankers_rounding_nc_f16(
xnn_operator_t rounding_op,
const void* input,
void* output);
enum xnn_status xnn_create_bankers_rounding_nc_f32(
uint32_t flags,
xnn_operator_t* rounding_op_out);
enum xnn_status xnn_reshape_bankers_rounding_nc_f32(
xnn_operator_t rounding_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_bankers_rounding_nc_f32(
xnn_operator_t rounding_op,
const float* input,
float* output);
enum xnn_status xnn_run_bankers_rounding_nc_f32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const float* input,
float* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_batch_matrix_multiply_nc_f16(
uint32_t flags,
xnn_operator_t* batch_matrix_multiply_op);
enum xnn_status xnn_reshape_batch_matrix_multiply_nc_f16(
xnn_operator_t batch_matrix_multiply_op, size_t num_batch_dims,
const size_t* batch_dims_a, const size_t* batch_dims_b, size_t m, size_t k,
size_t n, size_t* workspace_size, size_t* workspace_alignment,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_batch_matrix_multiply_nc_f16(
xnn_operator_t batch_matrix_multiply_op, void* workspace,
const void* input_a, const void* input_b, void* output);
enum xnn_status xnn_create_batch_matrix_multiply_nc_f32(
uint32_t flags,
xnn_operator_t* batch_matrix_multiply_op);
enum xnn_status xnn_reshape_batch_matrix_multiply_nc_f32(
xnn_operator_t batch_matrix_multiply_op, size_t num_batch_dims,
const size_t* batch_dims_a, const size_t* batch_dims_b, size_t m, size_t k,
size_t n, size_t* workspace_size, size_t* workspace_alignment,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_batch_matrix_multiply_nc_f32(
xnn_operator_t batch_matrix_multiply_op, void* workspace,
const float* input_a, const float* input_b, float* output);
enum xnn_status xnn_create_batch_matrix_multiply_nc_qd8_f32_qc8w(
size_t batch_size_b, size_t k, size_t n, const int8_t* data_b,
const float* scale_b, uint32_t flags,
xnn_operator_t* batch_matrix_multiply_op);
enum xnn_status xnn_reshape_batch_matrix_multiply_nc_qd8_f32_qc8w(
xnn_operator_t batch_matrix_multiply_op, size_t num_batch_dims,
const size_t* batch_dims_a, const size_t* batch_dims_b, size_t m, size_t k,
size_t n, pthreadpool_t threadpool);
enum xnn_status xnn_setup_batch_matrix_multiply_nc_qd8_f32_qc8w(
xnn_operator_t batch_matrix_multiply_op, const int8_t* input_a,
const struct xnn_dynamic_quantization_params* quantization_params,
float* output);
enum xnn_status xnn_create_ceiling_nc_f16(
uint32_t flags,
xnn_operator_t* ceiling_op_out);
enum xnn_status xnn_reshape_ceiling_nc_f16(
xnn_operator_t ceiling_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_ceiling_nc_f16(
xnn_operator_t ceiling_op,
const void* input,
void* output);
enum xnn_status xnn_create_ceiling_nc_f32(
uint32_t flags,
xnn_operator_t* ceiling_op_out);
enum xnn_status xnn_run_ceiling_nc_f32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const float* input,
float* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_reshape_ceiling_nc_f32(
xnn_operator_t ceiling_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_ceiling_nc_f32(
xnn_operator_t ceiling_op,
const float* input,
float* output);
enum xnn_status xnn_create_channel_shuffle_nc_x8(
size_t groups,
size_t group_channels,
size_t input_stride,
size_t output_stride,
uint32_t flags,
xnn_operator_t* channel_shuffle_op_out);
enum xnn_status xnn_reshape_channel_shuffle_nc_x8(
xnn_operator_t channel_shuffle_op,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_channel_shuffle_nc_x8(
xnn_operator_t channel_shuffle_op,
const void* input,
void* output);
enum xnn_status xnn_create_channel_shuffle_nc_x32(
size_t groups,
size_t group_channels,
size_t input_stride,
size_t output_stride,
uint32_t flags,
xnn_operator_t* channel_shuffle_op_out);
enum xnn_status xnn_reshape_channel_shuffle_nc_x32(
xnn_operator_t channel_shuffle_op,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_channel_shuffle_nc_x32(
xnn_operator_t channel_shuffle_op,
const void* input,
void* output);
enum xnn_status xnn_create_clamp_nc_f16(
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* clamp_op_out);
enum xnn_status xnn_reshape_clamp_nc_f16(
xnn_operator_t clamp_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_clamp_nc_f16(
xnn_operator_t clamp_op,
const void* input,
void* output);
enum xnn_status xnn_create_clamp_nc_f32(
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* clamp_op_out);
enum xnn_status xnn_reshape_clamp_nc_f32(
xnn_operator_t clamp_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_clamp_nc_f32(
xnn_operator_t clamp_op,
const float* input,
float* output);
enum xnn_status xnn_run_clamp_nc_f32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const float* input,
float* output,
float output_min,
float output_max,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_clamp_nc_s8(
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_operator_t* clamp_op_out);
enum xnn_status xnn_reshape_clamp_nc_s8(
xnn_operator_t clamp_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_clamp_nc_s8(
xnn_operator_t clamp_op,
const int8_t* input,
int8_t* output);
enum xnn_status xnn_create_clamp_nc_u8(
uint8_t output_min,
uint8_t output_max,
uint32_t flags,
xnn_operator_t* clamp_op_out);
enum xnn_status xnn_reshape_clamp_nc_u8(
xnn_operator_t clamp_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_clamp_nc_u8(
xnn_operator_t clamp_op,
const uint8_t* input,
uint8_t* output);
enum xnn_status xnn_create_constant_pad_nd_x8(
const void* padding_value,
uint32_t flags,
xnn_operator_t* constant_pad_op_out);
enum xnn_status xnn_reshape_constant_pad_nd_x8(
xnn_operator_t constant_pad_op,
size_t num_dims,
const size_t* input_shape,
const size_t* pre_padding,
const size_t* post_padding,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_constant_pad_nd_x8(
xnn_operator_t constant_pad_op,
const void* input,
void* output);
enum xnn_status xnn_run_constant_pad_nd_x8(
uint32_t flags,
size_t num_dims,
const size_t* input_shape,
const size_t* pre_paddings,
const size_t* post_paddings,
const void* input,
void* output,
const void* padding_value,
pthreadpool_t threadpool);
enum xnn_status xnn_create_constant_pad_nd_x16(
const void* padding_value,
uint32_t flags,
xnn_operator_t* constant_pad_op_out);
enum xnn_status xnn_reshape_constant_pad_nd_x16(
xnn_operator_t constant_pad_op,
size_t num_dims,
const size_t* input_shape,
const size_t* pre_padding,
const size_t* post_padding,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_constant_pad_nd_x16(
xnn_operator_t constant_pad_op,
const void* input,
void* output);
enum xnn_status xnn_run_constant_pad_nd_x16(
uint32_t flags,
size_t num_dims,
const size_t* input_shape,
const size_t* pre_paddings,
const size_t* post_paddings,
const void* input,
void* output,
const void* padding_value,
pthreadpool_t threadpool);
enum xnn_status xnn_create_constant_pad_nd_x32(
const void* padding_value,
uint32_t flags,
xnn_operator_t* constant_pad_op_out);
enum xnn_status xnn_reshape_constant_pad_nd_x32(
xnn_operator_t constant_pad_op,
size_t num_dims,
const size_t* input_shape,
const size_t* pre_padding,
const size_t* post_padding,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_constant_pad_nd_x32(
xnn_operator_t constant_pad_op,
const void* input,
void* output);
enum xnn_status xnn_run_constant_pad_nd_x32(
uint32_t flags,
size_t num_dims,
const size_t* input_shape,
const size_t* pre_paddings,
const size_t* post_paddings,
const void* input,
void* output,
const void* padding_value,
pthreadpool_t threadpool);
enum xnn_status xnn_create_convert_nc_f16_f32(
uint32_t flags,
xnn_operator_t* convert_op_out);
enum xnn_status xnn_reshape_convert_nc_f16_f32(
xnn_operator_t convert_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convert_nc_f16_f32(
xnn_operator_t convert_op,
const void* input,
float* output);
enum xnn_status xnn_run_convert_nc_f16_f32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const void* input,
float* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_convert_nc_f16_qd8(
uint32_t flags,
xnn_operator_t* convert_op_out);
enum xnn_status xnn_reshape_convert_nc_f16_qd8(
xnn_operator_t convert_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convert_nc_f16_qd8(
xnn_operator_t convert_op,
const void* input,
int8_t* output,
struct xnn_dynamic_quantization_params* quantization_params);
enum xnn_status xnn_create_convert_nc_f32_qd8(
uint32_t flags,
xnn_operator_t* convert_op_out);
enum xnn_status xnn_reshape_convert_nc_f32_qd8(
xnn_operator_t convert_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convert_nc_f32_qd8(
xnn_operator_t convert_op,
const float* input,
int8_t* output,
struct xnn_dynamic_quantization_params* quantization_params);
enum xnn_status xnn_create_convert_nc_f32_f16(
uint32_t flags,
xnn_operator_t* convert_op_out);
enum xnn_status xnn_reshape_convert_nc_f32_f16(
xnn_operator_t convert_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convert_nc_f32_f16(
xnn_operator_t convert_op,
const float* input,
void* output);
enum xnn_status xnn_run_convert_nc_f32_f16(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const float* input,
void* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_convert_nc_f32_qs8(
float output_scale,
int8_t output_zero_point,
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_operator_t* convert_op_out);
enum xnn_status xnn_reshape_convert_nc_f32_qs8(
xnn_operator_t convert_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convert_nc_f32_qs8(
xnn_operator_t convert_op,
const float* input,
int8_t* output);
enum xnn_status xnn_run_convert_nc_f32_qs8(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const float* input,
int8_t* output,
float output_scale,
int8_t output_zero_point,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_convert_nc_f32_qu8(
float output_scale,
uint8_t output_zero_point,
uint8_t output_min,
uint8_t output_max,
uint32_t flags,
xnn_operator_t* convert_op_out);
enum xnn_status xnn_reshape_convert_nc_f32_qu8(
xnn_operator_t convert_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convert_nc_f32_qu8(
xnn_operator_t convert_op,
const float* input,
uint8_t* output);
enum xnn_status xnn_run_convert_nc_f32_qu8(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const float* input,
uint8_t* output,
float output_scale,
uint8_t output_zero_point,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_convert_nc_qs8(
float input_scale,
int8_t input_zero_point,
float output_scale,
int8_t output_zero_point,
uint32_t flags,
xnn_operator_t* convert_op_out);
enum xnn_status xnn_reshape_convert_nc_qs8(
xnn_operator_t convert_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convert_nc_qs8(
xnn_operator_t convert_op,
const int8_t* input,
int8_t* output);
enum xnn_status xnn_create_convert_nc_qs8_f16(
float input_scale,
int8_t input_zero_point,
uint32_t flags,
xnn_operator_t* convert_op_out);
enum xnn_status xnn_reshape_convert_nc_qs8_f16(
xnn_operator_t convert_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convert_nc_qs8_f16(
xnn_operator_t convert_op,
const int8_t* input,
void* output);
enum xnn_status xnn_create_convert_nc_qs8_f32(
float input_scale,
int8_t input_zero_point,
uint32_t flags,
xnn_operator_t* convert_op_out);
enum xnn_status xnn_reshape_convert_nc_qs8_f32(
xnn_operator_t convert_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convert_nc_qs8_f32(
xnn_operator_t convert_op,
const int8_t* input,
float* output);
enum xnn_status xnn_run_convert_nc_qs8_f32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const int8_t* input,
float* output,
float input_scale,
int8_t input_zero_point,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_convert_nc_qs16_qs8(
float input_scale,
float output_scale,
int8_t output_zero_point,
uint32_t flags,
xnn_operator_t* convert_op_out);
enum xnn_status xnn_reshape_convert_nc_qs16_qs8(
xnn_operator_t convert_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convert_nc_qs16_qs8(
xnn_operator_t convert_op,
const int16_t* input,
int8_t* output);
enum xnn_status xnn_run_convert_nc_qs16_qs8(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const int16_t* input,
int8_t* output,
float input_scale,
float output_scale,
int8_t output_zero_point,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_convert_nc_qu8(
float input_scale,
uint8_t input_zero_point,
float output_scale,
uint8_t output_zero_point,
uint32_t flags,
xnn_operator_t* convert_op_out);
enum xnn_status xnn_reshape_convert_nc_qu8(
xnn_operator_t convert_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convert_nc_qu8(
xnn_operator_t convert_op,
const uint8_t* input,
uint8_t* output);
enum xnn_status xnn_create_convert_nc_qu8_f32(
float input_scale,
uint8_t input_zero_point,
uint32_t flags,
xnn_operator_t* convert_op_out);
enum xnn_status xnn_reshape_convert_nc_qu8_f32(
xnn_operator_t convert_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convert_nc_qu8_f32(
xnn_operator_t convert_op,
const uint8_t* input,
float* output);
enum xnn_status xnn_run_convert_nc_qu8_f32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const uint8_t* input,
float* output,
float input_scale,
uint8_t input_zero_point,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_convolution2d_nchw_f16(
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t kernel_height,
uint32_t kernel_width,
uint32_t subsampling_height,
uint32_t subsampling_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint32_t groups,
size_t group_input_channels,
size_t group_output_channels,
size_t input_channel_stride,
size_t output_channel_stride,
const void* kernel,
const void* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* convolution_op_out);
enum xnn_status xnn_reshape_convolution2d_nchw_f16(
xnn_operator_t convolution_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convolution2d_nchw_f16(
xnn_operator_t convolution_op,
const void* input,
void* output);
enum xnn_status xnn_create_convolution2d_nchw_f32(
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t kernel_height,
uint32_t kernel_width,
uint32_t subsampling_height,
uint32_t subsampling_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint32_t groups,
size_t group_input_channels,
size_t group_output_channels,
size_t input_channel_stride,
size_t output_channel_stride,
const float* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* convolution_op_out);
enum xnn_status xnn_reshape_convolution2d_nchw_f32(
xnn_operator_t convolution_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convolution2d_nchw_f32(
xnn_operator_t convolution_op,
const float* input,
float* output);
enum xnn_status xnn_create_convolution2d_nhwc_f16(
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t kernel_height,
uint32_t kernel_width,
uint32_t subsampling_height,
uint32_t subsampling_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint32_t groups,
size_t group_input_channels,
size_t group_output_channels,
size_t input_channel_stride,
size_t output_channel_stride,
const void* kernel,
const void* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* convolution_op_out);
enum xnn_status xnn_reshape_convolution2d_nhwc_f16(
xnn_operator_t convolution_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t* workspace_size,
size_t* workspace_alignment,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convolution2d_nhwc_f16(
xnn_operator_t convolution_op,
void* workspace,
const void* input,
void* output);
enum xnn_status xnn_create_convolution2d_nhwc_f32(
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t kernel_height,
uint32_t kernel_width,
uint32_t subsampling_height,
uint32_t subsampling_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint32_t groups,
size_t group_input_channels,
size_t group_output_channels,
size_t input_channel_stride,
size_t output_channel_stride,
const float* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* convolution_op_out);
struct xnn_post_operation;
enum xnn_status xnn_create_fused_convolution2d_nhwc_f32(
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t kernel_height,
uint32_t kernel_width,
uint32_t subsampling_height,
uint32_t subsampling_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint32_t groups,
size_t group_input_channels,
size_t group_output_channels,
size_t input_channel_stride,
size_t output_channel_stride,
const float* kernel,
const float* bias,
size_t num_post_operations,
struct xnn_post_operation* post_operations,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* convolution_op_out);
enum xnn_status xnn_reshape_convolution2d_nhwc_f32(
xnn_operator_t convolution_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t* workspace_size,
size_t* workspace_alignment,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convolution2d_nhwc_f32(
xnn_operator_t convolution_op,
void* workspace,
const float* input,
float* output);
enum xnn_status xnn_create_convolution2d_nhwc_qd8_f16_qc8w(
uint32_t input_padding_top, uint32_t input_padding_right,
uint32_t input_padding_bottom, uint32_t input_padding_left,
uint32_t kernel_height, uint32_t kernel_width, uint32_t subsampling_height,
uint32_t subsampling_width, uint32_t dilation_height,
uint32_t dilation_width, uint32_t groups, size_t group_input_channels,
size_t group_output_channels, size_t input_channel_stride,
size_t output_channel_stride, const float* kernel_scale,
const int8_t* kernel, const float* bias, float output_min, float output_max,
uint32_t flags, xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache, xnn_operator_t* convolution_op_out);
enum xnn_status xnn_create_convolution2d_nhwc_qd8_f32_qc8w(
uint32_t input_padding_top, uint32_t input_padding_right,
uint32_t input_padding_bottom, uint32_t input_padding_left,
uint32_t kernel_height, uint32_t kernel_width, uint32_t subsampling_height,
uint32_t subsampling_width, uint32_t dilation_height,
uint32_t dilation_width, uint32_t groups, size_t group_input_channels,
size_t group_output_channels, size_t input_channel_stride,
size_t output_channel_stride, const float* kernel_scale,
const int8_t* kernel, const float* bias, float output_min, float output_max,
uint32_t flags, xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache, xnn_operator_t* convolution_op_out);
enum xnn_status xnn_create_convolution2d_nhwc_qs8(
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t kernel_height,
uint32_t kernel_width,
uint32_t subsampling_height,
uint32_t subsampling_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint32_t groups,
size_t group_input_channels,
size_t group_output_channels,
size_t input_channel_stride,
size_t output_channel_stride,
int8_t input_zero_point,
float input_scale,
float kernel_scale,
const int8_t* kernel,
const int32_t* bias,
int8_t output_zero_point,
float output_scale,
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* convolution_op_out);
enum xnn_status xnn_reshape_convolution2d_nhwc_qd8_f16_qc8w(
xnn_operator_t convolution_op, size_t batch_size, size_t input_height,
size_t input_width, size_t* workspace_size, size_t* workspace_alignment,
size_t* output_height_out, size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_reshape_convolution2d_nhwc_qd8_f32_qc8w(
xnn_operator_t convolution_op, size_t batch_size, size_t input_height,
size_t input_width, size_t* workspace_size, size_t* workspace_alignment,
size_t* output_height_out, size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_reshape_convolution2d_nhwc_qs8(
xnn_operator_t convolution_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t* workspace_size,
size_t* workspace_alignment,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convolution2d_nhwc_qd8_f16_qc8w(
xnn_operator_t convolution_op, void* workspace, const int8_t* input,
void* output,
const struct xnn_dynamic_quantization_params* quantization_params);
enum xnn_status xnn_setup_convolution2d_nhwc_qd8_f32_qc8w(
xnn_operator_t convolution_op, void* workspace, const int8_t* input,
float* output,
const struct xnn_dynamic_quantization_params* quantization_params);
enum xnn_status xnn_setup_convolution2d_nhwc_qs8(
xnn_operator_t convolution_op,
void* workspace,
const int8_t* input,
int8_t* output);
enum xnn_status xnn_create_convolution2d_nhwc_qs8_qc8w(
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t kernel_height,
uint32_t kernel_width,
uint32_t subsampling_height,
uint32_t subsampling_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint32_t groups,
size_t group_input_channels,
size_t group_output_channels,
size_t input_channel_stride,
size_t output_channel_stride,
int8_t input_zero_point,
float input_scale,
const float* kernel_scale,
const int8_t* kernel,
const int32_t* bias,
int8_t output_zero_point,
float output_scale,
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* convolution_op_out);
enum xnn_status xnn_reshape_convolution2d_nhwc_qs8_qc8w(
xnn_operator_t convolution_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t* workspace_size,
size_t* workspace_alignment,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convolution2d_nhwc_qs8_qc8w(
xnn_operator_t convolution_op,
void* workspace,
const int8_t* input,
int8_t* output);
enum xnn_status xnn_create_convolution2d_nhwc_qu8(
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t kernel_height,
uint32_t kernel_width,
uint32_t subsampling_height,
uint32_t subsampling_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint32_t groups,
size_t group_input_channels,
size_t group_output_channels,
size_t input_channel_stride,
size_t output_channel_stride,
uint8_t input_zero_point,
float input_scale,
uint8_t kernel_zero_point,
float kernel_scale,
const uint8_t* kernel,
const int32_t* bias,
uint8_t output_zero_point,
float output_scale,
uint8_t output_min,
uint8_t output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* convolution_op_out);
enum xnn_status xnn_reshape_convolution2d_nhwc_qu8(
xnn_operator_t convolution_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t* workspace_size,
size_t* workspace_alignment,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_convolution2d_nhwc_qu8(
xnn_operator_t convolution_op,
void* workspace,
const uint8_t* input,
uint8_t* output);
enum xnn_status xnn_create_copy_nc_x8(
uint32_t flags,
xnn_operator_t* copy_op_out);
enum xnn_status xnn_reshape_copy_nc_x8(
xnn_operator_t copy_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_copy_nc_x8(
xnn_operator_t copy_op,
const void* input,
void* output);
enum xnn_status xnn_create_copy_nc_x16(
uint32_t flags,
xnn_operator_t* copy_op_out);
enum xnn_status xnn_reshape_copy_nc_x16(
xnn_operator_t copy_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_copy_nc_x16(
xnn_operator_t copy_op,
const void* input,
void* output);
enum xnn_status xnn_create_copy_nc_x32(
uint32_t flags,
xnn_operator_t* copy_op_out);
enum xnn_status xnn_reshape_copy_nc_x32(
xnn_operator_t copy_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_copy_nc_x32(
xnn_operator_t copy_op,
const void* input,
void* output);
enum xnn_status xnn_run_copy_nc_x32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const uint32_t* input,
uint32_t* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_deconvolution2d_nhwc_f16(
uint32_t output_padding_top,
uint32_t output_padding_right,
uint32_t output_padding_bottom,
uint32_t output_padding_left,
uint32_t kernel_height,
uint32_t kernel_width,
uint32_t stride_height,
uint32_t stride_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint32_t groups,
size_t group_input_channels,
size_t group_output_channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
const void* kernel,
const void* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* deconvolution_op_out);
enum xnn_status xnn_reshape_deconvolution2d_nhwc_f16(
xnn_operator_t deconvolution_op,
size_t batch_size,
size_t input_height,
size_t input_width,
uint32_t adjustment_height,
uint32_t adjustment_width,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_deconvolution2d_nhwc_f16(
xnn_operator_t deconvolution_op,
const void* input,
void* output);
enum xnn_status xnn_create_deconvolution2d_nhwc_f32(
uint32_t output_padding_top,
uint32_t output_padding_right,
uint32_t output_padding_bottom,
uint32_t output_padding_left,
uint32_t kernel_height,
uint32_t kernel_width,
uint32_t stride_height,
uint32_t stride_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint32_t groups,
size_t group_input_channels,
size_t group_output_channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
const float* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* deconvolution_op_out);
enum xnn_status xnn_reshape_deconvolution2d_nhwc_f32(
xnn_operator_t deconvolution_op,
size_t batch_size,
size_t input_height,
size_t input_width,
uint32_t adjustment_height,
uint32_t adjustment_width,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_deconvolution2d_nhwc_f32(
xnn_operator_t deconvolution_op,
const float* input,
float* output);
enum xnn_status xnn_create_deconvolution2d_nhwc_qd8_f32_qc8w(
uint32_t output_padding_top,
uint32_t output_padding_right,
uint32_t output_padding_bottom,
uint32_t output_padding_left,
uint32_t kernel_height,
uint32_t kernel_width,
uint32_t stride_height,
uint32_t stride_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint32_t groups,
size_t group_input_channels,
size_t group_output_channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
const float* kernel_scale,
const int8_t* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* deconvolution_op_out);
enum xnn_status xnn_reshape_deconvolution2d_nhwc_qd8_f32_qc8w(
xnn_operator_t deconvolution_op,
size_t batch_size,
size_t input_height,
size_t input_width,
uint32_t adjustment_height,
uint32_t adjustment_width,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_deconvolution2d_nhwc_qd8_f32_qc8w(
xnn_operator_t deconvolution_op,
const int8_t* input,
float* output,
const struct xnn_dynamic_quantization_params* quantization_params);
enum xnn_status xnn_create_deconvolution2d_nhwc_qs8(
uint32_t output_padding_top,
uint32_t output_padding_right,
uint32_t output_padding_bottom,
uint32_t output_padding_left,
uint32_t kernel_height,
uint32_t kernel_width,
uint32_t stride_height,
uint32_t stride_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint32_t groups,
size_t group_input_channels,
size_t group_output_channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
int8_t input_zero_point,
float input_scale,
float kernel_scale,
const int8_t* kernel,
const int32_t* bias,
int8_t output_zero_point,
float output_scale,
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* deconvolution_op_out);
enum xnn_status xnn_reshape_deconvolution2d_nhwc_qs8(
xnn_operator_t deconvolution_op,
size_t batch_size,
size_t input_height,
size_t input_width,
uint32_t adjustment_height,
uint32_t adjustment_width,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_deconvolution2d_nhwc_qs8(
xnn_operator_t deconvolution_op,
const int8_t* input,
int8_t* output);
enum xnn_status xnn_create_deconvolution2d_nhwc_qs8_qc8w(
uint32_t output_padding_top,
uint32_t output_padding_right,
uint32_t output_padding_bottom,
uint32_t output_padding_left,
uint32_t kernel_height,
uint32_t kernel_width,
uint32_t stride_height,
uint32_t stride_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint32_t groups,
size_t group_input_channels,
size_t group_output_channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
int8_t input_zero_point,
float input_scale,
const float* kernel_scale,
const int8_t* kernel,
const int32_t* bias,
int8_t output_zero_point,
float output_scale,
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* deconvolution_op_out);
enum xnn_status xnn_reshape_deconvolution2d_nhwc_qs8_qc8w(
xnn_operator_t deconvolution_op,
size_t batch_size,
size_t input_height,
size_t input_width,
uint32_t adjustment_height,
uint32_t adjustment_width,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_deconvolution2d_nhwc_qs8_qc8w(
xnn_operator_t deconvolution_op,
const int8_t* input,
int8_t* output);
enum xnn_status xnn_create_deconvolution2d_nhwc_qu8(
uint32_t output_padding_top,
uint32_t output_padding_right,
uint32_t output_padding_bottom,
uint32_t output_padding_left,
uint32_t kernel_height,
uint32_t kernel_width,
uint32_t stride_height,
uint32_t stride_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint32_t groups,
size_t group_input_channels,
size_t group_output_channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
uint8_t input_zero_point,
float input_scale,
uint8_t kernel_zero_point,
float kernel_scale,
const uint8_t* kernel,
const int32_t* bias,
uint8_t output_zero_point,
float output_scale,
uint8_t output_min,
uint8_t output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* deconvolution_op_out);
enum xnn_status xnn_reshape_deconvolution2d_nhwc_qu8(
xnn_operator_t deconvolution_op,
size_t batch_size,
size_t input_height,
size_t input_width,
uint32_t adjustment_height,
uint32_t adjustment_width,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_deconvolution2d_nhwc_qu8(
xnn_operator_t deconvolution_op,
const uint8_t* input,
uint8_t* output);
enum xnn_status xnn_create_depth_to_space_nchw2nhwc_x16(
uint32_t block_size,
uint32_t flags,
xnn_operator_t* depth_to_space_op_out);
enum xnn_status xnn_reshape_depth_to_space_nchw2nhwc_x16(
xnn_operator_t depth_to_space_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t input_channels,
size_t* output_height_out,
size_t* output_width_out,
size_t* output_channels_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_depth_to_space_nchw2nhwc_x16(
xnn_operator_t depth_to_space_op,
const void* input,
void* output);
enum xnn_status xnn_create_depth_to_space_nchw2nhwc_x32(
uint32_t block_size,
uint32_t flags,
xnn_operator_t* depth_to_space_op_out);
enum xnn_status xnn_reshape_depth_to_space_nchw2nhwc_x32(
xnn_operator_t depth_to_space_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t input_channels,
size_t* output_height_out,
size_t* output_width_out,
size_t* output_channels_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_depth_to_space_nchw2nhwc_x32(
xnn_operator_t depth_to_space_op,
const void* input,
void* output);
enum xnn_status xnn_create_depth_to_space_nhwc_x8(
uint32_t block_size,
uint32_t flags,
xnn_operator_t* depth_to_space_op_out);
enum xnn_status xnn_reshape_depth_to_space_nhwc_x8(
xnn_operator_t depth_to_space_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t input_channels,
size_t* output_height_out,
size_t* output_width_out,
size_t* output_channels_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_depth_to_space_nhwc_x8(
xnn_operator_t depth_to_space_op,
const void* input,
void* output);
enum xnn_status xnn_create_depth_to_space_nhwc_x16(
uint32_t block_size,
uint32_t flags,
xnn_operator_t* depth_to_space_op_out);
enum xnn_status xnn_reshape_depth_to_space_nhwc_x16(
xnn_operator_t depth_to_space_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t input_channels,
size_t* output_height_out,
size_t* output_width_out,
size_t* output_channels_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_depth_to_space_nhwc_x16(
xnn_operator_t depth_to_space_op,
const void* input,
void* output);
enum xnn_status xnn_create_depth_to_space_nhwc_x32(
uint32_t block_size,
uint32_t flags,
xnn_operator_t* depth_to_space_op_out);
enum xnn_status xnn_reshape_depth_to_space_nhwc_x32(
xnn_operator_t depth_to_space_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t input_channels,
size_t* output_height_out,
size_t* output_width_out,
size_t* output_channels_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_depth_to_space_nhwc_x32(
xnn_operator_t depth_to_space_op,
const void* input,
void* output);
enum xnn_status xnn_create_divide_nd_f16(
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* divide_op_out);
enum xnn_status xnn_reshape_divide_nd_f16(
xnn_operator_t divide_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_divide_nd_f16(
xnn_operator_t divide_op,
const void* input1,
const void* input2,
void* output);
enum xnn_status xnn_create_copysign_nd_f32(
uint32_t flags,
xnn_operator_t* copysign_op_out);
enum xnn_status xnn_reshape_copysign_nd_f32(
xnn_operator_t copysign_op,
size_t num_mag_dims,
const size_t* mag_shape,
size_t num_sign_dims,
const size_t* sign_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_copysign_nd_f32(
xnn_operator_t copysign_op,
const float* mag,
const float* sign,
float* output);
enum xnn_status xnn_create_divide_nd_f32(
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* divide_op_out);
enum xnn_status xnn_reshape_divide_nd_f32(
xnn_operator_t divide_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_divide_nd_f32(
xnn_operator_t divide_op,
const float* input1,
const float* input2,
float* output);
enum xnn_status xnn_run_divide_nd_f32(
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
const float* input1,
const float* input2,
float* output,
float output_min,
float output_max,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_dynamic_fully_connected_nc_f16(
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* dynamic_fully_connected_op_out);
enum xnn_status xnn_reshape_dynamic_fully_connected_nc_f16(
xnn_operator_t dynamic_fully_connected_op,
size_t batch_size,
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
size_t* workspace_size,
size_t* workspace_alignment,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_dynamic_fully_connected_nc_f16(
xnn_operator_t dynamic_fully_connected_op,
void* workspace,
const void* input,
const void* kernel,
const void* bias,
void* output);
enum xnn_status xnn_create_dynamic_fully_connected_nc_f32(
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* dynamic_fully_connected_op_out);
enum xnn_status xnn_reshape_dynamic_fully_connected_nc_f32(
xnn_operator_t dynamic_fully_connected_op,
size_t batch_size,
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
size_t* workspace_size,
size_t* workspace_alignment,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_dynamic_fully_connected_nc_f32(
xnn_operator_t dynamic_fully_connected_op,
void* workspace,
const float* input,
const float* kernel,
const float* bias,
float* output);
enum xnn_status xnn_create_elu_nc_f16(
float alpha,
uint32_t flags,
xnn_operator_t* elu_op_out);
enum xnn_status xnn_reshape_elu_nc_f16(
xnn_operator_t elu_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_elu_nc_f16(
xnn_operator_t elu_op,
const void* input,
void* output);
enum xnn_status xnn_create_elu_nc_f32(
float alpha,
uint32_t flags,
xnn_operator_t* elu_op_out);
enum xnn_status xnn_reshape_elu_nc_f32(
xnn_operator_t elu_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_elu_nc_f32(
xnn_operator_t elu_op,
const float* input,
float* output);
enum xnn_status xnn_run_elu_nc_f32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const float* input,
float* output,
float alpha,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_elu_nc_qs8(
float alpha,
int8_t input_zero_point,
float input_scale,
int8_t output_zero_point,
float output_scale,
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_operator_t* elu_op_out);
enum xnn_status xnn_reshape_elu_nc_qs8(
xnn_operator_t elu_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_elu_nc_qs8(
xnn_operator_t elu_op,
const int8_t* input,
int8_t* output);
enum xnn_status xnn_create_exp_nc_f32(
uint32_t flags,
xnn_operator_t* exp_op_out);
enum xnn_status xnn_reshape_exp_nc_f32(
xnn_operator_t exp_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_exp_nc_f32(
xnn_operator_t exp_op,
const float* input,
float* output);
enum xnn_status xnn_create_floor_nc_f16(
uint32_t flags,
xnn_operator_t* floor_op_out);
enum xnn_status xnn_reshape_floor_nc_f16(
xnn_operator_t floor_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_floor_nc_f16(
xnn_operator_t floor_op,
const void* input,
void* output);
enum xnn_status xnn_create_floor_nc_f32(
uint32_t flags,
xnn_operator_t* floor_op_out);
enum xnn_status xnn_reshape_floor_nc_f32(
xnn_operator_t floor_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_floor_nc_f32(
xnn_operator_t floor_op,
const float* input,
float* output);
enum xnn_status xnn_run_floor_nc_f32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const float* input,
float* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_fully_connected_nc_f16(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
const void* kernel,
const void* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out);
enum xnn_status xnn_reshape_fully_connected_nc_f16(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_fully_connected_nc_f16(
xnn_operator_t fully_connected_op,
const void* input,
void* output);
enum xnn_status xnn_create_fully_connected_nc_f32(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
const float* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out);
enum xnn_status xnn_reshape_fully_connected_nc_f32(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_fully_connected_nc_f32(
xnn_operator_t fully_connected_op,
const float* input,
float* output);
enum xnn_status xnn_create_fully_connected_nc_f32_qc4w(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
uint8_t kernel_zero_point,
const float* kernel_scale,
const uint8_t* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out);
enum xnn_status xnn_reshape_fully_connected_nc_f32_qc4w(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_fully_connected_nc_f32_qc4w(
xnn_operator_t fully_connected_op,
const float* input,
float* output);
enum xnn_status xnn_create_fully_connected_nc_f32_qc8w(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
const float* kernel_scale,
const int8_t* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out);
enum xnn_status xnn_reshape_fully_connected_nc_f32_qc8w(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_fully_connected_nc_f32_qc8w(
xnn_operator_t fully_connected_op,
const float* input,
float* output);
enum xnn_status xnn_create_fully_connected_nc_qd8_f16_qc4w(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
uint8_t kernel_zero_point,
const float* kernel_scale,
const void* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out);
enum xnn_status xnn_setup_fully_connected_nc_qd8_f16_qc4w(
xnn_operator_t fully_connected_op,
const int8_t* input,
void* output,
const struct xnn_dynamic_quantization_params* quantization_params);
enum xnn_status xnn_reshape_fully_connected_nc_qd8_f16_qc4w(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_create_fully_connected_nc_qd8_f16_qb4w(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
size_t block_size,
uint8_t kernel_zero_point,
const uint16_t* kernel_scale,
const void* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out);
enum xnn_status xnn_reshape_fully_connected_nc_qd8_f16_qb4w(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_fully_connected_nc_qd8_f16_qb4w(
xnn_operator_t fully_connected_op,
const int8_t* input,
void* output,
const struct xnn_dynamic_quantization_params* quantization_params);
enum xnn_status xnn_create_fully_connected_nc_qd8_f32_qc4w(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
uint8_t kernel_zero_point,
const float* kernel_scale,
const void* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out);
enum xnn_status xnn_setup_fully_connected_nc_qd8_f32_qc4w(
xnn_operator_t fully_connected_op,
const int8_t* input,
float* output,
const struct xnn_dynamic_quantization_params* quantization_params);
enum xnn_status xnn_reshape_fully_connected_nc_qd8_f32_qc4w(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_create_fully_connected_nc_qd8_f32_qb4w(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
size_t block_size,
uint8_t kernel_zero_point,
const uint16_t* kernel_scale,
const void* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out);
enum xnn_status xnn_reshape_fully_connected_nc_qd8_f32_qb4w(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_fully_connected_nc_qd8_f32_qb4w(
xnn_operator_t fully_connected_op,
const int8_t* input,
float* output,
const struct xnn_dynamic_quantization_params* quantization_params);
enum xnn_status xnn_create_fully_connected_nc_qd8_f16_qc8w(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
const float* kernel_scale,
const int8_t* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out);
enum xnn_status xnn_setup_fully_connected_nc_qd8_f16_qc8w(
xnn_operator_t fully_connected_op,
const int8_t* input,
void* output,
const struct xnn_dynamic_quantization_params* quantization_params);
enum xnn_status xnn_reshape_fully_connected_nc_qd8_f16_qc8w(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_create_fully_connected_nc_qd8_f32_qc8w(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
const float* kernel_scale,
const int8_t* kernel,
const float* bias,
float output_min,
float output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out);
enum xnn_status xnn_setup_fully_connected_nc_qd8_f32_qc8w(
xnn_operator_t fully_connected_op,
const int8_t* input,
float* output,
const struct xnn_dynamic_quantization_params* quantization_params);
enum xnn_status xnn_reshape_fully_connected_nc_qd8_f32_qc8w(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_create_fully_connected_nc_qs8(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
int8_t input_zero_point,
float input_scale,
float kernel_scale,
const int8_t* kernel,
const int32_t* bias,
int8_t output_zero_point,
float output_scale,
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out);
enum xnn_status xnn_reshape_fully_connected_nc_qs8(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_fully_connected_nc_qs8(
xnn_operator_t fully_connected_op,
const int8_t* input,
int8_t* output);
enum xnn_status xnn_create_fully_connected_nc_qs8_qc8w(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
int8_t input_zero_point,
float input_scale,
const float* kernel_scale,
const int8_t* kernel,
const int32_t* bias,
int8_t output_zero_point,
float output_scale,
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out);
enum xnn_status xnn_reshape_fully_connected_nc_qs8_qc8w(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_fully_connected_nc_qs8_qc8w(
xnn_operator_t fully_connected_op,
const int8_t* input,
int8_t* output);
enum xnn_status xnn_create_fully_connected_nc_qu8(
size_t input_channels,
size_t output_channels,
size_t input_stride,
size_t output_stride,
uint8_t input_zero_point,
float input_scale,
uint8_t kernel_zero_point,
float kernel_scale,
const uint8_t* kernel,
const int32_t* bias,
uint8_t output_zero_point,
float output_scale,
uint8_t output_min,
uint8_t output_max,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* fully_connected_op_out);
enum xnn_status xnn_reshape_fully_connected_nc_qu8(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_fully_connected_nc_qu8(
xnn_operator_t fully_connected_op,
const uint8_t* input,
uint8_t* output);
enum xnn_status xnn_create_gelu_nc_f32(
uint32_t flags,
xnn_operator_t* gelu_op_out);
enum xnn_status xnn_reshape_gelu_nc_f32(
xnn_operator_t gelu_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_gelu_nc_f32(
xnn_operator_t gelu_op,
const float* input,
float* output);
enum xnn_status xnn_run_gelu_nc_f32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const float* input,
float* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_global_average_pooling_ncw_f16(
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* global_average_pooling_op_out);
enum xnn_status xnn_reshape_global_average_pooling_ncw_f16(
xnn_operator_t global_average_pooling_op,
size_t batch_size,
size_t width,
size_t channels,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_global_average_pooling_ncw_f16(
xnn_operator_t global_average_pooling_op,
const void* input,
void* output);
enum xnn_status xnn_create_global_average_pooling_ncw_f32(
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* global_average_pooling_op_out);
enum xnn_status xnn_reshape_global_average_pooling_ncw_f32(
xnn_operator_t global_average_pooling_op,
size_t batch_size,
size_t width,
size_t channels,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_global_average_pooling_ncw_f32(
xnn_operator_t global_average_pooling_op,
const float* input,
float* output);
enum xnn_status xnn_create_global_average_pooling_nwc_f16(
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* global_average_pooling_op_out);
enum xnn_status xnn_reshape_global_average_pooling_nwc_f16(
xnn_operator_t global_average_pooling_op,
size_t batch_size,
size_t width,
size_t channels,
size_t input_stride,
size_t output_stride,
size_t* workspace_size,
size_t* workspace_alignment,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_global_average_pooling_nwc_f16(
xnn_operator_t global_average_pooling_op,
void* workspace,
const void* input,
void* output);
enum xnn_status xnn_create_global_average_pooling_nwc_f32(
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* global_average_pooling_op_out);
enum xnn_status xnn_reshape_global_average_pooling_nwc_f32(
xnn_operator_t global_average_pooling_op,
size_t batch_size,
size_t width,
size_t channels,
size_t input_stride,
size_t output_stride,
size_t* workspace_size,
size_t* workspace_alignment,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_global_average_pooling_nwc_f32(
xnn_operator_t global_average_pooling_op,
void* workspace,
const float* input,
float* output);
enum xnn_status xnn_create_global_average_pooling_nwc_qs8(
int8_t input_zero_point,
float input_scale,
int8_t output_zero_point,
float output_scale,
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_operator_t* global_average_pooling_op_out);
enum xnn_status xnn_reshape_global_average_pooling_nwc_qs8(
xnn_operator_t global_average_pooling_op,
size_t batch_size,
size_t width,
size_t channels,
size_t input_stride,
size_t output_stride,
size_t* workspace_size,
size_t* workspace_alignment,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_global_average_pooling_nwc_qs8(
xnn_operator_t global_average_pooling_op,
void* workspace,
const int8_t* input,
int8_t* output);
enum xnn_status xnn_create_global_average_pooling_nwc_qu8(
uint8_t input_zero_point,
float input_scale,
uint8_t output_zero_point,
float output_scale,
uint8_t output_min,
uint8_t output_max,
uint32_t flags,
xnn_operator_t* global_average_pooling_op_out);
enum xnn_status xnn_reshape_global_average_pooling_nwc_qu8(
xnn_operator_t global_average_pooling_op,
size_t batch_size,
size_t width,
size_t channels,
size_t input_stride,
size_t output_stride,
size_t* workspace_size,
size_t* workspace_alignment,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_global_average_pooling_nwc_qu8(
xnn_operator_t global_average_pooling_op,
void* workspace,
const uint8_t* input,
uint8_t* output);
enum xnn_status xnn_create_global_sum_pooling_nwc_f16(
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* global_sum_pooling_op_out);
enum xnn_status xnn_reshape_global_sum_pooling_nwc_f16(
xnn_operator_t global_sum_pooling_op,
size_t batch_size,
size_t width,
size_t channels,
size_t input_stride,
size_t output_stride,
size_t* workspace_size,
size_t* workspace_alignment,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_global_sum_pooling_nwc_f16(
xnn_operator_t global_sum_pooling_op,
void* workspace,
const void* input,
void* output);
enum xnn_status xnn_create_global_sum_pooling_nwc_f32(
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* global_sum_pooling_op_out);
enum xnn_status xnn_reshape_global_sum_pooling_nwc_f32(
xnn_operator_t global_sum_pooling_op,
size_t batch_size,
size_t width,
size_t channels,
size_t input_stride,
size_t output_stride,
size_t* workspace_size,
size_t* workspace_alignment,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_global_sum_pooling_nwc_f32(
xnn_operator_t global_sum_pooling_op,
void* workspace,
const float* input,
float* output);
enum xnn_status xnn_create_hardswish_nc_f16(
uint32_t flags,
xnn_operator_t* hardswish_op_out);
enum xnn_status xnn_reshape_hardswish_nc_f16(
xnn_operator_t hardswish_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_hardswish_nc_f16(
xnn_operator_t hardswish_op,
const void* input,
void* output);
enum xnn_status xnn_create_hardswish_nc_f32(
uint32_t flags,
xnn_operator_t* hardswish_op_out);
enum xnn_status xnn_reshape_hardswish_nc_f32(
xnn_operator_t hardswish_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_hardswish_nc_f32(
xnn_operator_t hardswish_op,
const float* input,
float* output);
enum xnn_status xnn_run_hardswish_nc_f32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const float* input,
float* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_leaky_relu_nc_f16(
float negative_slope,
uint32_t flags,
xnn_operator_t* leaky_relu_op_out);
enum xnn_status xnn_reshape_leaky_relu_nc_f16(
xnn_operator_t leaky_relu_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_leaky_relu_nc_f16(
xnn_operator_t leaky_relu_op,
const void* input,
void* output);
enum xnn_status xnn_create_leaky_relu_nc_f32(
float negative_slope,
uint32_t flags,
xnn_operator_t* leaky_relu_op_out);
enum xnn_status xnn_reshape_leaky_relu_nc_f32(
xnn_operator_t leaky_relu_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_leaky_relu_nc_f32(
xnn_operator_t leaky_relu_op,
const float* input,
float* output);
enum xnn_status xnn_run_leaky_relu_nc_f32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const float* input,
float* output,
float negative_slope,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_leaky_relu_nc_qs8(
float negative_slope,
int8_t input_zero_point,
float input_scale,
int8_t output_zero_point,
float output_scale,
uint32_t flags,
xnn_operator_t* leaky_relu_op_out);
enum xnn_status xnn_reshape_leaky_relu_nc_qs8(
xnn_operator_t leaky_relu_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_leaky_relu_nc_qs8(
xnn_operator_t leaky_relu_op,
const int8_t* input,
int8_t* output);
enum xnn_status xnn_create_leaky_relu_nc_qu8(
float negative_slope,
uint8_t input_zero_point,
float input_scale,
uint8_t output_zero_point,
float output_scale,
uint32_t flags,
xnn_operator_t* leaky_relu_op_out);
enum xnn_status xnn_create_log_nc_f32(
uint32_t flags,
xnn_operator_t* log_op_out);
enum xnn_status xnn_reshape_log_nc_f32(
xnn_operator_t log_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_log_nc_f32(
xnn_operator_t log_op,
const float* input,
float* output);
enum xnn_status xnn_reshape_leaky_relu_nc_qu8(
xnn_operator_t leaky_relu_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_leaky_relu_nc_qu8(
xnn_operator_t leaky_relu_op,
const uint8_t* input,
uint8_t* output);
enum xnn_status xnn_create_max_pooling2d_nhwc_f16(
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t pooling_height,
uint32_t pooling_width,
uint32_t stride_height,
uint32_t stride_width,
uint32_t dilation_height,
uint32_t dilation_width,
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* max_pooling_op_out);
enum xnn_status xnn_reshape_max_pooling2d_nhwc_f16(
xnn_operator_t max_pooling_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_max_pooling2d_nhwc_f16(
xnn_operator_t max_pooling_op,
const void* input,
void* output);
enum xnn_status xnn_create_max_pooling2d_nhwc_f32(
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t pooling_height,
uint32_t pooling_width,
uint32_t stride_height,
uint32_t stride_width,
uint32_t dilation_height,
uint32_t dilation_width,
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* max_pooling_op_out);
enum xnn_status xnn_reshape_max_pooling2d_nhwc_f32(
xnn_operator_t max_pooling_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_max_pooling2d_nhwc_f32(
xnn_operator_t max_pooling_op,
const float* input,
float* output);
enum xnn_status xnn_create_max_pooling2d_nhwc_s8(
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t pooling_height,
uint32_t pooling_width,
uint32_t stride_height,
uint32_t stride_width,
uint32_t dilation_height,
uint32_t dilation_width,
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_operator_t* max_pooling_op_out);
enum xnn_status xnn_reshape_max_pooling2d_nhwc_s8(
xnn_operator_t max_pooling_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_max_pooling2d_nhwc_s8(
xnn_operator_t max_pooling_op,
const int8_t* input,
int8_t* output);
enum xnn_status xnn_create_max_pooling2d_nhwc_u8(
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t pooling_height,
uint32_t pooling_width,
uint32_t stride_height,
uint32_t stride_width,
uint32_t dilation_height,
uint32_t dilation_width,
uint8_t output_min,
uint8_t output_max,
uint32_t flags,
xnn_operator_t* max_pooling_op_out);
enum xnn_status xnn_reshape_max_pooling2d_nhwc_u8(
xnn_operator_t max_pooling_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_max_pooling2d_nhwc_u8(
xnn_operator_t max_pooling_op,
const uint8_t* input,
uint8_t* output);
enum xnn_status xnn_create_maximum_nd_f16(
uint32_t flags,
xnn_operator_t* maximum_op_out);
enum xnn_status xnn_reshape_maximum_nd_f16(
xnn_operator_t maximum_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_maximum_nd_f16(
xnn_operator_t maximum_op,
const void* input1,
const void* input2,
void* output);
enum xnn_status xnn_create_maximum_nd_f32(
uint32_t flags,
xnn_operator_t* maximum_op_out);
enum xnn_status xnn_reshape_maximum_nd_f32(
xnn_operator_t maximum_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_maximum_nd_f32(
xnn_operator_t maximum_op,
const float* input1,
const float* input2,
float* output);
enum xnn_status xnn_run_maximum_nd_f32(
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
const float* input1,
const float* input2,
float* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_mean_nd_f16(
uint32_t flags,
xnn_operator_t* mean_op_out);
enum xnn_status xnn_reshape_mean_nd_f16(
xnn_operator_t mean_op,
size_t num_reduction_axes,
const size_t* reduction_axes,
size_t num_input_dims,
const size_t* input_shape,
size_t* workspace_size,
size_t* workspace_alignment,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_mean_nd_f16(
xnn_operator_t mean_op,
void* workspace,
const void* input,
void* output);
enum xnn_status xnn_create_mean_nd_f32(
uint32_t flags,
xnn_operator_t* mean_op_out);
enum xnn_status xnn_reshape_mean_nd_f32(
xnn_operator_t mean_op,
size_t num_reduction_axes,
const size_t* reduction_axes,
size_t num_input_dims,
const size_t* input_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_mean_nd_f32(
xnn_operator_t mean_op,
const float* input,
float* output);
enum xnn_status xnn_create_minimum_nd_f16(
uint32_t flags,
xnn_operator_t* minimum_op_out);
enum xnn_status xnn_reshape_minimum_nd_f16(
xnn_operator_t minimum_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_minimum_nd_f16(
xnn_operator_t minimum_op,
const void* input1,
const void* input2,
void* output);
enum xnn_status xnn_create_minimum_nd_f32(
uint32_t flags,
xnn_operator_t* minimum_op_out);
enum xnn_status xnn_reshape_minimum_nd_f32(
xnn_operator_t minimum_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_minimum_nd_f32(
xnn_operator_t minimum_op,
const float* input1,
const float* input2,
float* output);
enum xnn_status xnn_run_minimum_nd_f32(
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
const float* input1,
const float* input2,
float* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_multiply_nd_f16(
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* multiply_op_out);
enum xnn_status xnn_reshape_multiply_nd_f16(
xnn_operator_t multiply_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_multiply_nd_f16(
xnn_operator_t multiply_op,
const void* input1,
const void* input2,
void* output);
enum xnn_status xnn_create_multiply_nd_f32(
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* multiply_op_out);
enum xnn_status xnn_reshape_multiply_nd_f32(
xnn_operator_t multiply_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_multiply_nd_f32(
xnn_operator_t multiply_op,
const float* input1,
const float* input2,
float* output);
enum xnn_status xnn_run_multiply_nd_f32(
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
const float* input1,
const float* input2,
float* output,
float output_min,
float output_max,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_multiply_nd_qs8(
int8_t input1_zero_point,
float input1_scale,
int8_t input2_zero_point,
float input2_scale,
int8_t output_zero_point,
float output_scale,
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_operator_t* multiply_op_out);
enum xnn_status xnn_reshape_multiply_nd_qs8(
xnn_operator_t multiply_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_multiply_nd_qs8(
xnn_operator_t multiply_op,
const int8_t* input1,
const int8_t* input2,
int8_t* output);
enum xnn_status xnn_run_multiply_nd_qs8(
size_t num_input1_dims,
const size_t* input1_shape,
int8_t input1_zero_point,
float input1_scale,
size_t num_input2_dims,
const size_t* input2_shape,
int8_t input2_zero_point,
float input2_scale,
const int8_t* input1,
const int8_t* input2,
int8_t* output,
int8_t output_zero_point,
float output_scale,
int8_t output_min,
int8_t output_max,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_multiply_nd_qu8(
uint8_t input1_zero_point,
float input1_scale,
uint8_t input2_zero_point,
float input2_scale,
uint8_t output_zero_point,
float output_scale,
uint8_t output_min,
uint8_t output_max,
uint32_t flags,
xnn_operator_t* multiply_op_out);
enum xnn_status xnn_reshape_multiply_nd_qu8(
xnn_operator_t multiply_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_multiply_nd_qu8(
xnn_operator_t multiply_op,
const uint8_t* input1,
const uint8_t* input2,
uint8_t* output);
enum xnn_status xnn_run_multiply_nd_qu8(
size_t num_input1_dims,
const size_t* input1_shape,
uint8_t input1_zero_point,
float input1_scale,
size_t num_input2_dims,
const size_t* input2_shape,
uint8_t input2_zero_point,
float input2_scale,
const uint8_t* input1,
const uint8_t* input2,
uint8_t* output,
uint8_t output_zero_point,
float output_scale,
uint8_t output_min,
uint8_t output_max,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_negate_nc_f16(
uint32_t flags,
xnn_operator_t* negate_op_out);
enum xnn_status xnn_reshape_negate_nc_f16(
xnn_operator_t negate_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_negate_nc_f16(
xnn_operator_t negate_op,
const void* input,
void* output);
enum xnn_status xnn_create_negate_nc_f32(
uint32_t flags,
xnn_operator_t* negate_op_out);
enum xnn_status xnn_reshape_negate_nc_f32(
xnn_operator_t negate_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_negate_nc_f32(
xnn_operator_t negate_op,
const float* input,
float* output);
enum xnn_status xnn_run_negate_nc_f32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const float* input,
float* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_prelu_nc_f16(
size_t input_channels,
size_t slope_channels,
size_t input_stride,
size_t output_stride,
const void* negative_slope,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* prelu_op_out);
enum xnn_status xnn_reshape_prelu_nc_f16(
xnn_operator_t prelu_op,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_prelu_nc_f16(
xnn_operator_t prelu_op,
const void* input,
void* output);
enum xnn_status xnn_create_prelu_nc_f32(
size_t input_channels,
size_t slope_channels,
size_t input_stride,
size_t output_stride,
const float* negative_slope,
uint32_t flags,
xnn_code_cache_t code_cache,
xnn_weights_cache_t weights_cache,
xnn_operator_t* prelu_op_out);
enum xnn_status xnn_reshape_prelu_nc_f32(
xnn_operator_t prelu_op,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_prelu_nc_f32(
xnn_operator_t prelu_op,
const float* input,
float* output);
enum xnn_status xnn_create_resize_bilinear2d_nchw_f32(
size_t output_height,
size_t output_width,
uint32_t flags,
xnn_operator_t* resize_op_out);
enum xnn_status xnn_reshape_resize_bilinear2d_nchw_f32(
xnn_operator_t resize_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_resize_bilinear2d_nchw_f32(
xnn_operator_t resize_op,
const float* input,
float* output);
enum xnn_status xnn_create_resize_bilinear2d_nchw_f16(
size_t output_height,
size_t output_width,
uint32_t flags,
xnn_operator_t* resize_op_out);
enum xnn_status xnn_reshape_resize_bilinear2d_nchw_f16(
xnn_operator_t resize_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_resize_bilinear2d_nchw_f16(
xnn_operator_t resize_op,
const void* input,
void* output);
enum xnn_status xnn_create_resize_bilinear2d_nhwc_f16(
size_t output_height,
size_t output_width,
uint32_t flags,
xnn_operator_t* resize_op_out);
enum xnn_status xnn_reshape_resize_bilinear2d_nhwc_f16(
xnn_operator_t resize_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
size_t* workspace_size,
size_t* workspace_alignment,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f16(
xnn_operator_t resize_op,
void* workspace,
const void* input,
void* output);
enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32(
size_t output_height,
size_t output_width,
uint32_t flags,
xnn_operator_t* resize_op_out);
enum xnn_status xnn_reshape_resize_bilinear2d_nhwc_f32(
xnn_operator_t resize_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
size_t* workspace_size,
size_t* workspace_alignment,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32(
xnn_operator_t resize_op,
void* workspace,
const float* input,
float* output);
enum xnn_status xnn_create_resize_bilinear2d_nhwc_s8(
size_t output_height,
size_t output_width,
uint32_t flags,
xnn_operator_t* resize_op_out);
enum xnn_status xnn_reshape_resize_bilinear2d_nhwc_s8(
xnn_operator_t resize_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
size_t* workspace_size,
size_t* workspace,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_resize_bilinear2d_nhwc_s8(
xnn_operator_t resize_op,
void* workspace,
const int8_t* input,
int8_t* output);
enum xnn_status xnn_create_resize_bilinear2d_nhwc_u8(
size_t output_height,
size_t output_width,
uint32_t flags,
xnn_operator_t* resize_op_out);
enum xnn_status xnn_reshape_resize_bilinear2d_nhwc_u8(
xnn_operator_t resize_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
size_t* workspace_size,
size_t* workspace_alignment,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_resize_bilinear2d_nhwc_u8(
xnn_operator_t resize_op,
void* workspace,
const uint8_t* input,
uint8_t* output);
enum xnn_status xnn_create_rope_nthc_f16(
size_t max_tokens,
uint32_t flags,
xnn_operator_t* rope_op_out);
enum xnn_status xnn_reshape_rope_nthc_f16(
xnn_operator_t rope_op,
size_t batch_size,
size_t tokens,
size_t heads,
size_t channels,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_rope_nthc_f16(
xnn_operator_t rope_op,
const void* input,
const void* weights,
void* output);
enum xnn_status xnn_create_rope_nthc_f32(
size_t max_tokens,
uint32_t flags,
xnn_operator_t* rope_op_out);
enum xnn_status xnn_reshape_rope_nthc_f32(
xnn_operator_t rope_op,
size_t batch_size,
size_t tokens,
size_t heads,
size_t channels,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_rope_nthc_f32(
xnn_operator_t rope_op,
const float* input,
const float* weights,
float* output);
enum xnn_status xnn_create_scaled_dot_product_attention_nhtc_f16(
enum xnn_attention_logits_cap_type cap_type,
const void* cap_params,
uint32_t flags,
xnn_operator_t* attention_op_out);
enum xnn_status xnn_reshape_scaled_dot_product_attention_nhtc_f16(
xnn_operator_t attention_op,
size_t batch_size,
size_t query_heads,
size_t query_tokens,
size_t key_value_heads,
size_t key_value_tokens,
size_t query_key_channels,
size_t value_channels,
size_t* workspace_size,
size_t* workspace_alignment,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_scaled_dot_product_attention_nhtc_f16(
xnn_operator_t attention_op,
void* workspace,
const void* query,
const void* key,
const void* value,
const void* scale,
const void* mask,
void* output);
enum xnn_status xnn_create_scaled_dot_product_attention_nhtc_f32(
enum xnn_attention_logits_cap_type cap_type,
const void* cap_params,
uint32_t flags,
xnn_operator_t* attention_op_out);
enum xnn_status xnn_reshape_scaled_dot_product_attention_nhtc_f32(
xnn_operator_t attention_op,
size_t batch_size,
size_t query_heads,
size_t query_tokens,
size_t key_value_heads,
size_t key_value_tokens,
size_t query_key_channels,
size_t value_channels,
size_t* workspace_size,
size_t* workspace_alignment,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_scaled_dot_product_attention_nhtc_f32(
xnn_operator_t attention_op,
void* workspace,
const float* query,
const float* key,
const float* value,
const float* scale,
const float* mask,
float* output);
enum xnn_status xnn_create_sigmoid_nc_f16(
uint32_t flags,
xnn_operator_t* sigmoid_op_out);
enum xnn_status xnn_reshape_sigmoid_nc_f16(
xnn_operator_t sigmoid_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_sigmoid_nc_f16(
xnn_operator_t sigmoid_op,
const void* input,
void* output);
enum xnn_status xnn_create_sigmoid_nc_f32(
uint32_t flags,
xnn_operator_t* sigmoid_op_out);
enum xnn_status xnn_reshape_sigmoid_nc_f32(
xnn_operator_t sigmoid_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_sigmoid_nc_f32(
xnn_operator_t sigmoid_op,
const float* input,
float* output);
enum xnn_status xnn_run_sigmoid_nc_f32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const float* input,
float* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_sigmoid_nc_qs8(
int8_t input_zero_point,
float input_scale,
int8_t output_zero_point,
float output_scale,
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_operator_t* sigmoid_op_out);
enum xnn_status xnn_reshape_sigmoid_nc_qs8(
xnn_operator_t sigmoid_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_sigmoid_nc_qs8(
xnn_operator_t sigmoid_op,
const int8_t* input,
int8_t* output);
enum xnn_status xnn_create_sigmoid_nc_qu8(
uint8_t input_zero_point,
float input_scale,
uint8_t output_zero_point,
float output_scale,
uint8_t output_min,
uint8_t output_max,
uint32_t flags,
xnn_operator_t* sigmoid_op_out);
enum xnn_status xnn_reshape_sigmoid_nc_qu8(
xnn_operator_t sigmoid_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_sigmoid_nc_qu8(
xnn_operator_t sigmoid_op,
const uint8_t* input,
uint8_t* output);
enum xnn_status xnn_create_slice_nd_x16(
uint32_t flags,
xnn_operator_t* slice_op_out);
enum xnn_status xnn_reshape_slice_nd_x16(
xnn_operator_t slice_op,
size_t num_dims,
const size_t* input_shape,
const size_t* offsets,
const size_t* sizes,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_slice_nd_x16(
xnn_operator_t slice_op,
const void* input,
void* output);
enum xnn_status xnn_create_slice_nd_x32(
uint32_t flags,
xnn_operator_t* slice_op_out);
enum xnn_status xnn_reshape_slice_nd_x32(
xnn_operator_t slice_op,
size_t num_dims,
const size_t* input_shape,
const size_t* offsets,
const size_t* sizes,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_slice_nd_x32(
xnn_operator_t slice_op,
const void* input,
void* output);
enum xnn_status xnn_run_slice_nd_x32(
size_t num_dims,
const size_t* input_shape,
const size_t* offsets,
const size_t* sizes,
const void* input,
void* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_softmax_nc_f16(
uint32_t flags,
xnn_operator_t* softmax_op_out);
enum xnn_status xnn_reshape_softmax_nc_f16(
xnn_operator_t softmax_op,
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_softmax_nc_f16(
xnn_operator_t softmax_op,
const void* input,
void* output);
enum xnn_status xnn_create_softmax_nc_f32(
uint32_t flags,
xnn_operator_t* softmax_op_out);
enum xnn_status xnn_reshape_softmax_nc_f32(
xnn_operator_t softmax_op,
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_softmax_nc_f32(
xnn_operator_t softmax_op,
const float* input,
float* output);
enum xnn_status xnn_create_softmax_nc_qu8(
float input_scale,
uint8_t output_zero_point,
float output_scale,
uint32_t flags,
xnn_operator_t* softmax_op_out);
enum xnn_status xnn_reshape_softmax_nc_qu8(
xnn_operator_t softmax_op,
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_softmax_nc_qu8(
xnn_operator_t softmax_op,
const uint8_t* input,
uint8_t* output);
enum xnn_status xnn_create_space_to_depth_nhwc_x16(
uint32_t block_size,
uint32_t flags,
xnn_operator_t* space_to_depth_op_out);
enum xnn_status xnn_reshape_space_to_depth_nhwc_x16(
xnn_operator_t space_to_depth_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t input_channels,
size_t* output_height_out,
size_t* output_width_out,
size_t* output_channels_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_space_to_depth_nhwc_x16(
xnn_operator_t space_to_depth_op,
const void* input,
void* output);
enum xnn_status xnn_create_space_to_depth_nhwc_x32(
uint32_t block_size,
uint32_t flags,
xnn_operator_t* space_to_depth_op_out);
enum xnn_status xnn_reshape_space_to_depth_nhwc_x32(
xnn_operator_t space_to_depth_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t input_channels,
size_t* output_height_out,
size_t* output_width_out,
size_t* output_channels_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_space_to_depth_nhwc_x32(
xnn_operator_t space_to_depth_op,
const void* input,
void* output);
enum xnn_status xnn_create_square_nc_f16(
uint32_t flags,
xnn_operator_t* square_op_out);
enum xnn_status xnn_reshape_square_nc_f16(
xnn_operator_t square_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_square_nc_f16(
xnn_operator_t square_op,
const void* input,
void* output);
enum xnn_status xnn_create_square_nc_f32(
uint32_t flags,
xnn_operator_t* square_op_out);
enum xnn_status xnn_reshape_square_nc_f32(
xnn_operator_t square_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_square_nc_f32(
xnn_operator_t square_op,
const float* input,
float* output);
enum xnn_status xnn_run_square_nc_f32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const float* input,
float* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_square_root_nc_f16(
uint32_t flags,
xnn_operator_t* sqrt_op_out);
enum xnn_status xnn_reshape_square_root_nc_f16(
xnn_operator_t sqrt_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_square_root_nc_f16(
xnn_operator_t sqrt_op,
const void* input,
void* output);
enum xnn_status xnn_create_square_root_nc_f32(
uint32_t flags,
xnn_operator_t* sqrt_op_out);
enum xnn_status xnn_reshape_square_root_nc_f32(
xnn_operator_t sqrt_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_square_root_nc_f32(
xnn_operator_t sqrt_op,
const float* input,
float* output);
enum xnn_status xnn_run_square_root_nc_f32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const float* input,
float* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_reciprocal_square_root_nc_f16(
uint32_t flags, xnn_operator_t* sqrt_op_out);
enum xnn_status xnn_reshape_reciprocal_square_root_nc_f16(
xnn_operator_t sqrt_op, size_t batch_size, size_t channels,
size_t input_stride, size_t output_stride, pthreadpool_t threadpool);
enum xnn_status xnn_setup_reciprocal_square_root_nc_f16(xnn_operator_t sqrt_op,
const void* input,
void* output);
enum xnn_status xnn_create_reciprocal_square_root_nc_f32(
uint32_t flags, xnn_operator_t* sqrt_op_out);
enum xnn_status xnn_reshape_reciprocal_square_root_nc_f32(
xnn_operator_t sqrt_op, size_t batch_size, size_t channels,
size_t input_stride, size_t output_stride, pthreadpool_t threadpool);
enum xnn_status xnn_setup_reciprocal_square_root_nc_f32(xnn_operator_t sqrt_op,
const float* input,
float* output);
enum xnn_status xnn_run_reciprocal_square_root_nc_f32(
size_t channels, size_t input_stride, size_t output_stride,
size_t batch_size, const float* input, float* output, uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_squared_difference_nd_f16(
uint32_t flags,
xnn_operator_t* squared_difference_op_out);
enum xnn_status xnn_reshape_squared_difference_nd_f16(
xnn_operator_t squared_difference_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_squared_difference_nd_f16(
xnn_operator_t squared_difference_op,
const void* input1,
const void* input2,
void* output);
enum xnn_status xnn_create_squared_difference_nd_f32(
uint32_t flags,
xnn_operator_t* squared_difference_op_out);
enum xnn_status xnn_reshape_squared_difference_nd_f32(
xnn_operator_t squared_difference_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_squared_difference_nd_f32(
xnn_operator_t squared_difference_op,
const float* input1,
const float* input2,
float* output);
enum xnn_status xnn_run_squared_difference_nd_f32(
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
const float* input1,
const float* input2,
float* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_subtract_nd_f16(
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* subtract_op_out);
enum xnn_status xnn_reshape_subtract_nd_f16(
xnn_operator_t subtract_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_subtract_nd_f16(
xnn_operator_t subtract_op,
const void* input1,
const void* input2,
void* output);
enum xnn_status xnn_create_subtract_nd_f32(
float output_min,
float output_max,
uint32_t flags,
xnn_operator_t* subtract_op_out);
enum xnn_status xnn_reshape_subtract_nd_f32(
xnn_operator_t subtract_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_subtract_nd_f32(
xnn_operator_t subtract_op,
const float* input1,
const float* input2,
float* output);
enum xnn_status xnn_run_subtract_nd_f32(
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
const float* input1,
const float* input2,
float* output,
float output_min,
float output_max,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_subtract_nd_qs8(
int8_t input1_zero_point,
float input1_scale,
int8_t input2_zero_point,
float input2_scale,
int8_t output_zero_point,
float output_scale,
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_operator_t* subtract_op_out);
enum xnn_status xnn_reshape_subtract_nd_qs8(
xnn_operator_t subtract_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_subtract_nd_qs8(
xnn_operator_t subtract_op,
const int8_t* input1,
const int8_t* input2,
int8_t* output);
enum xnn_status xnn_run_subtract_nd_qs8(
size_t num_input1_dims,
const size_t* input1_shape,
int8_t input1_zero_point,
float input1_scale,
size_t num_input2_dims,
const size_t* input2_shape,
int8_t input2_zero_point,
float input2_scale,
const int8_t* input1,
const int8_t* input2,
int8_t* output,
int8_t output_zero_point,
float output_scale,
int8_t output_min,
int8_t output_max,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_subtract_nd_qu8(
uint8_t input1_zero_point,
float input1_scale,
uint8_t input2_zero_point,
float input2_scale,
uint8_t output_zero_point,
float output_scale,
uint8_t output_min,
uint8_t output_max,
uint32_t flags,
xnn_operator_t* subtract_op_out);
enum xnn_status xnn_reshape_subtract_nd_qu8(
xnn_operator_t subtract_op,
size_t num_input1_dims,
const size_t* input1_shape,
size_t num_input2_dims,
const size_t* input2_shape,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_subtract_nd_qu8(
xnn_operator_t subtract_op,
const uint8_t* input1,
const uint8_t* input2,
uint8_t* output);
enum xnn_status xnn_run_subtract_nd_qu8(
size_t num_input1_dims,
const size_t* input1_shape,
uint8_t input1_zero_point,
float input1_scale,
size_t num_input2_dims,
const size_t* input2_shape,
uint8_t input2_zero_point,
float input2_scale,
const uint8_t* input1,
const uint8_t* input2,
uint8_t* output,
uint8_t output_zero_point,
float output_scale,
uint8_t output_min,
uint8_t output_max,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_tanh_nc_f16(
uint32_t flags,
xnn_operator_t* tanh_op_out);
enum xnn_status xnn_reshape_tanh_nc_f16(
xnn_operator_t tanh_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_tanh_nc_f16(
xnn_operator_t tanh_op,
const void* input,
void* output);
enum xnn_status xnn_create_tanh_nc_f32(
uint32_t flags,
xnn_operator_t* tanh_op_out);
enum xnn_status xnn_reshape_tanh_nc_f32(
xnn_operator_t tanh_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_tanh_nc_f32(
xnn_operator_t tanh_op,
const float* input,
float* output);
enum xnn_status xnn_run_tanh_nc_f32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const float* input,
float* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_tanh_nc_qs8(
int8_t input_zero_point,
float input_scale,
int8_t output_zero_point,
float output_scale,
int8_t output_min,
int8_t output_max,
uint32_t flags,
xnn_operator_t* tanh_op_out);
enum xnn_status xnn_reshape_tanh_nc_qs8(
xnn_operator_t tanh_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_tanh_nc_qs8(
xnn_operator_t tanh_op,
const int8_t* input,
int8_t* output);
enum xnn_status xnn_create_tanh_nc_qu8(
uint8_t input_zero_point,
float input_scale,
uint8_t output_zero_point,
float output_scale,
uint8_t output_min,
uint8_t output_max,
uint32_t flags,
xnn_operator_t* tanh_op_out);
enum xnn_status xnn_reshape_tanh_nc_qu8(
xnn_operator_t tanh_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_tanh_nc_qu8(
xnn_operator_t tanh_op,
const uint8_t* input,
uint8_t* output);
enum xnn_status xnn_create_transpose_nd_x8(
uint32_t flags,
xnn_operator_t* transpose_op_out);
enum xnn_status xnn_reshape_transpose_nd_x8(
xnn_operator_t transpose_op,
size_t num_dims,
const size_t* input_shape,
const size_t* output_perm,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_transpose_nd_x8(
xnn_operator_t transpose_op,
const void* input,
void* output);
enum xnn_status xnn_run_transpose_nd_x8(
const void* input,
void* output,
size_t num_dims,
const size_t* input_shape,
const size_t* output_perm,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_transpose_nd_x16(
uint32_t flags,
xnn_operator_t* transpose_op_out);
enum xnn_status xnn_reshape_transpose_nd_x16(
xnn_operator_t transpose_op,
size_t num_dims,
const size_t* input_shape,
const size_t* output_perm,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_transpose_nd_x16(
xnn_operator_t transpose_op,
const void* input,
void* output);
enum xnn_status xnn_run_transpose_nd_x16(
const void* input,
void* output,
size_t num_dims,
const size_t* input_shape,
const size_t* output_perm,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_transpose_nd_x32(
uint32_t flags,
xnn_operator_t* transpose_op_out);
enum xnn_status xnn_reshape_transpose_nd_x32(
xnn_operator_t transpose_op,
size_t num_dims,
const size_t* input_shape,
const size_t* output_perm,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_transpose_nd_x32(
xnn_operator_t transpose_op,
const void* input,
void* output);
enum xnn_status xnn_run_transpose_nd_x32(
const void* input,
void* output,
size_t num_dims,
const size_t* input_shape,
const size_t* output_perm,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_transpose_nd_x64(
uint32_t flags,
xnn_operator_t* transpose_op_out);
enum xnn_status xnn_reshape_transpose_nd_x64(
xnn_operator_t transpose_op,
size_t num_dims,
const size_t* input_shape,
const size_t* output_perm,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_transpose_nd_x64(
xnn_operator_t transpose_op,
const void* input,
void* output);
enum xnn_status xnn_run_transpose_nd_x64(
const void* input,
void* output,
size_t num_dims,
const size_t* input_shape,
const size_t* output_perm,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_truncation_nc_f16(
uint32_t flags,
xnn_operator_t* truncation_op_out);
enum xnn_status xnn_reshape_truncation_nc_f16(
xnn_operator_t truncation_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_truncation_nc_f16(
xnn_operator_t truncation_op,
const void* input,
void* output);
enum xnn_status xnn_create_truncation_nc_f32(
uint32_t flags,
xnn_operator_t* truncation_op_out);
enum xnn_status xnn_reshape_truncation_nc_f32(
xnn_operator_t truncation_op,
size_t batch_size,
size_t channels,
size_t input_stride,
size_t output_stride,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_truncation_nc_f32(
xnn_operator_t truncation_op,
const float* input,
float* output);
enum xnn_status xnn_run_truncation_nc_f32(
size_t channels,
size_t input_stride,
size_t output_stride,
size_t batch_size,
const float* input,
float* output,
uint32_t flags,
pthreadpool_t threadpool);
enum xnn_status xnn_create_unpooling2d_nhwc_x32(
uint32_t input_padding_top,
uint32_t input_padding_right,
uint32_t input_padding_bottom,
uint32_t input_padding_left,
uint32_t pooling_height,
uint32_t pooling_width,
size_t channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
uint32_t flags,
xnn_operator_t* unpooling_op_out);
enum xnn_status xnn_reshape_unpooling2d_nhwc_x32(
xnn_operator_t unpooling_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t* output_height_out,
size_t* output_width_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_unpooling2d_nhwc_x32(
xnn_operator_t unpooling_op,
const void* input,
const uint32_t* index,
void* output);
enum xnn_status xnn_create_slice_nd_x8(
uint32_t flags,
xnn_operator_t* slice_op_out);
enum xnn_status xnn_reshape_slice_nd_x8(
xnn_operator_t slice_op,
size_t num_dims,
const size_t* input_shape,
const size_t* offsets,
const size_t* sizes,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_slice_nd_x8(
xnn_operator_t slice_op,
const void* input,
void* output);
enum xnn_status xnn_create_space_to_depth_nhwc_x8(
uint32_t block_size,
uint32_t flags,
xnn_operator_t* space_to_depth_op_out);
enum xnn_status xnn_reshape_space_to_depth_nhwc_x8(
xnn_operator_t space_to_depth_op,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t input_channels,
size_t* output_height_out,
size_t* output_width_out,
size_t* output_channels_out,
pthreadpool_t threadpool);
enum xnn_status xnn_setup_space_to_depth_nhwc_x8(
xnn_operator_t space_to_depth_op,
const void* input,
void* output);
#ifdef __cplusplus
}
#endif