#include <stdbool.h>
#include <assert.h>
#include "config/av1_rtcd.h"
#include "av1/encoder/ml.h"
#include "av1/encoder/x86/ml_sse3.h"
static void nn_propagate_8to1(const float *const inputs,
const float *const weights,
__m128 *const output) { … }
void av1_nn_propagate_4to1_sse3(const float *const inputs,
const float *const weights,
__m128 *const output) { … }
void av1_nn_propagate_4to4_sse3(const float *const inputs,
const float *const weights,
__m128 *const outputs, const int num_inputs) { … }
void av1_nn_propagate_4to8_sse3(const float *const inputs,
const float *const weights, __m128 *const out_h,
__m128 *const out_l, const int num_inputs) { … }
static void nn_propagate_8to4(const float *const inputs,
const float *const weights, __m128 *const outputs,
const int num_inputs) { … }
static void nn_activate8(__m128 *out_h, __m128 *out_l) { … }
static void nn_activate4(__m128 *x) { … }
void av1_nn_predict_sse3(const float *input_nodes,
const NN_CONFIG *const nn_config, int reduce_prec,
float *const output) { … }
static inline __m128 approx_exp(__m128 y) { … }
static inline __m128 reduce_max(__m128 reg) { … }
static inline __m128 reduce_sum(__m128 reg) { … }
void av1_nn_fast_softmax_16_sse3(const float *input, float *output) { … }