ml_sse3.c | Explore in Territory

/*
 * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

#include <stdbool.h>
#include <assert.h>

#include "config/av1_rtcd.h"
#include "av1/encoder/ml.h"
#include "av1/encoder/x86/ml_sse3.h"

// In order to avoid the high-latency of swapping between FPU and SIMD
// operations, we keep the result in a 128-bit register even though we only
// care about a single value.
static void nn_propagate_8to1(const float *const inputs,
                              const float *const weights,
                              __m128 *const output) { … }

void av1_nn_propagate_4to1_sse3(const float *const inputs,
                                const float *const weights,
                                __m128 *const output) { … }

void av1_nn_propagate_4to4_sse3(const float *const inputs,
                                const float *const weights,
                                __m128 *const outputs, const int num_inputs) { … }

void av1_nn_propagate_4to8_sse3(const float *const inputs,
                                const float *const weights, __m128 *const out_h,
                                __m128 *const out_l, const int num_inputs) { … }

static void nn_propagate_8to4(const float *const inputs,
                              const float *const weights, __m128 *const outputs,
                              const int num_inputs) { … }

static void nn_activate8(__m128 *out_h, __m128 *out_l) { … }

static void nn_activate4(__m128 *x) { … }

// Calculate prediction based on the given input features and neural net config.
// Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden
// layer.
void av1_nn_predict_sse3(const float *input_nodes,
                         const NN_CONFIG *const nn_config, int reduce_prec,
                         float *const output) { … }

// Based on N. N. Schraudolph. A Fast, Compact Approximation of the Exponential
// Function. Neural Computation, 11(4):853–862, 1999.
static inline __m128 approx_exp(__m128 y) { … }

static inline __m128 reduce_max(__m128 reg) { … }

static inline __m128 reduce_sum(__m128 reg) { … }

void av1_nn_fast_softmax_16_sse3(const float *input, float *output) { … }
chromium/third_party/libaom/source/libaom/av1/encoder/x86/ml_sse3.c