chromium/third_party/webrtc/common_audio/vad/vad_core.c

/*
 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "common_audio/vad/vad_core.h"

#include "rtc_base/sanitizer.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "common_audio/vad/vad_filterbank.h"
#include "common_audio/vad/vad_gmm.h"
#include "common_audio/vad/vad_sp.h"

// Spectrum Weighting
static const int16_t kSpectrumWeight[kNumChannels] =;
static const int16_t kNoiseUpdateConst =; // Q15
static const int16_t kSpeechUpdateConst =; // Q15
static const int16_t kBackEta =; // Q8
// Minimum difference between the two models, Q5
static const int16_t kMinimumDifference[kNumChannels] =;
// Upper limit of mean value for speech model, Q7
static const int16_t kMaximumSpeech[kNumChannels] =;
// Minimum value for mean value
static const int16_t kMinimumMean[kNumGaussians] =;
// Upper limit of mean value for noise model, Q7
static const int16_t kMaximumNoise[kNumChannels] =;
// Start values for the Gaussian models, Q7
// Weights for the two Gaussians for the six channels (noise)
static const int16_t kNoiseDataWeights[kTableSize] =;
// Weights for the two Gaussians for the six channels (speech)
static const int16_t kSpeechDataWeights[kTableSize] =;
// Means for the two Gaussians for the six channels (noise)
static const int16_t kNoiseDataMeans[kTableSize] =;
// Means for the two Gaussians for the six channels (speech)
static const int16_t kSpeechDataMeans[kTableSize] =;
// Stds for the two Gaussians for the six channels (noise)
static const int16_t kNoiseDataStds[kTableSize] =;
// Stds for the two Gaussians for the six channels (speech)
static const int16_t kSpeechDataStds[kTableSize] =;

// Constants used in GmmProbability().
//
// Maximum number of counted speech (VAD = 1) frames in a row.
static const int16_t kMaxSpeechFrames =;
// Minimum standard deviation for both speech and noise.
static const int16_t kMinStd =;

// Constants in WebRtcVad_InitCore().
// Default aggressiveness mode.
static const short kDefaultMode =;
static const int kInitCheck =;

// Constants used in WebRtcVad_set_mode_core().
//
// Thresholds for different frame lengths (10 ms, 20 ms and 30 ms).
//
// Mode 0, Quality.
static const int16_t kOverHangMax1Q[3] =;
static const int16_t kOverHangMax2Q[3] =;
static const int16_t kLocalThresholdQ[3] =;
static const int16_t kGlobalThresholdQ[3] =;
// Mode 1, Low bitrate.
static const int16_t kOverHangMax1LBR[3] =;
static const int16_t kOverHangMax2LBR[3] =;
static const int16_t kLocalThresholdLBR[3] =;
static const int16_t kGlobalThresholdLBR[3] =;
// Mode 2, Aggressive.
static const int16_t kOverHangMax1AGG[3] =;
static const int16_t kOverHangMax2AGG[3] =;
static const int16_t kLocalThresholdAGG[3] =;
static const int16_t kGlobalThresholdAGG[3] =;
// Mode 3, Very aggressive.
static const int16_t kOverHangMax1VAG[3] =;
static const int16_t kOverHangMax2VAG[3] =;
static const int16_t kLocalThresholdVAG[3] =;
static const int16_t kGlobalThresholdVAG[3] =;

// Calculates the weighted average w.r.t. number of Gaussians. The `data` are
// updated with an `offset` before averaging.
//
// - data     [i/o] : Data to average.
// - offset   [i]   : An offset added to `data`.
// - weights  [i]   : Weights used for averaging.
//
// returns          : The weighted average.
static int32_t WeightedAverage(int16_t* data, int16_t offset,
                               const int16_t* weights) {}

// An s16 x s32 -> s32 multiplication that's allowed to overflow. (It's still
// undefined behavior, so not a good idea; this just makes UBSan ignore the
// violation, so that our old code can continue to do what it's always been
// doing.)
static inline int32_t RTC_NO_SANITIZE("signed-integer-overflow")
    OverflowingMulS16ByS32ToS32(int16_t a, int32_t b) {}

// Calculates the probabilities for both speech and background noise using
// Gaussian Mixture Models (GMM). A hypothesis-test is performed to decide which
// type of signal is most probable.
//
// - self           [i/o] : Pointer to VAD instance
// - features       [i]   : Feature vector of length `kNumChannels`
//                          = log10(energy in frequency band)
// - total_power    [i]   : Total power in audio frame.
// - frame_length   [i]   : Number of input samples
//
// - returns              : the VAD decision (0 - noise, 1 - speech).
static int16_t GmmProbability(VadInstT* self, int16_t* features,
                              int16_t total_power, size_t frame_length) {}

// Initialize the VAD. Set aggressiveness mode to default value.
int WebRtcVad_InitCore(VadInstT* self) {}

// Set aggressiveness mode
int WebRtcVad_set_mode_core(VadInstT* self, int mode) {}

// Calculate VAD decision by first extracting feature values and then calculate
// probability for both speech and background noise.

int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame,
                           size_t frame_length) {}

int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame,
                           size_t frame_length)
{}

int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame,
                           size_t frame_length)
{}

int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame,
                          size_t frame_length)
{}