/* * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h" #include <stdlib.h> #include <algorithm> #include <cmath> #include <cstddef> #include <numeric> #include "modules/audio_processing/agc2/rnn_vad/common.h" #include "modules/audio_processing/agc2/rnn_vad/vector_math.h" #include "rtc_base/checks.h" #include "rtc_base/numerics/safe_compare.h" #include "rtc_base/numerics/safe_conversions.h" #include "rtc_base/system/arch.h" namespace webrtc { namespace rnn_vad { namespace { float ComputeAutoCorrelation( int inverted_lag, rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, const VectorMath& vector_math) { … } // Given an auto-correlation coefficient `curr_auto_correlation` and its // neighboring values `prev_auto_correlation` and `next_auto_correlation` // computes a pseudo-interpolation offset to be applied to the pitch period // associated to `curr`. The output is a lag in {-1, 0, +1}. // TODO(bugs.webrtc.org/9076): Consider removing this method. // `GetPitchPseudoInterpolationOffset()` it is relevant only if the spectral // analysis works at a sample rate that is twice as that of the pitch buffer; // In particular, it is not relevant for the estimated pitch period feature fed // into the RNN. int GetPitchPseudoInterpolationOffset(float prev_auto_correlation, float curr_auto_correlation, float next_auto_correlation) { … } // Refines a pitch period `lag` encoded as lag with pseudo-interpolation. The // output sample rate is twice as that of `lag`. int PitchPseudoInterpolationLagPitchBuf( int lag, rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, const VectorMath& vector_math) { … } // Integer multipliers used in ComputeExtendedPitchPeriod48kHz() when // looking for sub-harmonics. // The values have been chosen to serve the following algorithm. Given the // initial pitch period T, we examine whether one of its harmonics is the true // fundamental frequency. We consider T/k with k in {2, ..., 15}. For each of // these harmonics, in addition to the pitch strength of itself, we choose one // multiple of its pitch period, n*T/k, to validate it (by averaging their pitch // strengths). The multiplier n is chosen so that n*T/k is used only one time // over all k. When for example k = 4, we should also expect a peak at 3*T/4. // When k = 8 instead we don't want to look at 2*T/8, since we have already // checked T/4 before. Instead, we look at T*3/8. // The array can be generate in Python as follows: // from fractions import Fraction // # Smallest positive integer not in X. // def mex(X): // for i in range(1, int(max(X)+2)): // if i not in X: // return i // # Visited multiples of the period. // S = {1} // for n in range(2, 16): // sn = mex({n * i for i in S} | {1}) // S = S | {Fraction(1, n), Fraction(sn, n)} // print(sn, end=', ') constexpr std::array<int, 14> kSubHarmonicMultipliers = …; struct Range { … }; // Number of analyzed pitches to the left(right) of a pitch candidate. constexpr int kPitchNeighborhoodRadius = …; // Creates a pitch period interval centered in `inverted_lag` with hard-coded // radius. Clipping is applied so that the interval is always valid for a 24 kHz // pitch buffer. Range CreateInvertedLagRange(int inverted_lag) { … } constexpr int kNumPitchCandidates = …; // Best and second best. // Maximum number of analyzed pitch periods. constexpr int kMaxPitchPeriods24kHz = …; // Collection of inverted lags. class InvertedLagsIndex { … }; // Computes the auto correlation coefficients for the inverted lags in the // closed interval `inverted_lags`. Updates `inverted_lags_index` by appending // the inverted lags for the computed auto correlation values. void ComputeAutoCorrelation( Range inverted_lags, rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, rtc::ArrayView<float, kInitialNumLags24kHz> auto_correlation, InvertedLagsIndex& inverted_lags_index, const VectorMath& vector_math) { … } // Searches the strongest pitch period at 24 kHz and returns its inverted lag at // 48 kHz. int ComputePitchPeriod48kHz( rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, rtc::ArrayView<const int> inverted_lags, rtc::ArrayView<const float, kInitialNumLags24kHz> auto_correlation, rtc::ArrayView<const float, kRefineNumLags24kHz> y_energy, const VectorMath& vector_math) { … } // Returns an alternative pitch period for `pitch_period` given a `multiplier` // and a `divisor` of the period. constexpr int GetAlternativePitchPeriod(int pitch_period, int multiplier, int divisor) { … } // Returns true if the alternative pitch period is stronger than the initial one // given the last estimated pitch and the value of `period_divisor` used to // compute the alternative pitch period via `GetAlternativePitchPeriod()`. bool IsAlternativePitchStrongerThanInitial(PitchInfo last, PitchInfo initial, PitchInfo alternative, int period_divisor) { … } } // namespace void Decimate2x(rtc::ArrayView<const float, kBufSize24kHz> src, rtc::ArrayView<float, kBufSize12kHz> dst) { … } void ComputeSlidingFrameSquareEnergies24kHz( rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, rtc::ArrayView<float, kRefineNumLags24kHz> y_energy, AvailableCpuFeatures cpu_features) { … } CandidatePitchPeriods ComputePitchPeriod12kHz( rtc::ArrayView<const float, kBufSize12kHz> pitch_buffer, rtc::ArrayView<const float, kNumLags12kHz> auto_correlation, AvailableCpuFeatures cpu_features) { … } int ComputePitchPeriod48kHz( rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, rtc::ArrayView<const float, kRefineNumLags24kHz> y_energy, CandidatePitchPeriods pitch_candidates, AvailableCpuFeatures cpu_features) { … } PitchInfo ComputeExtendedPitchPeriod48kHz( rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, rtc::ArrayView<const float, kRefineNumLags24kHz> y_energy, int initial_pitch_period_48kHz, PitchInfo last_pitch_48kHz, AvailableCpuFeatures cpu_features) { … } } // namespace rnn_vad } // namespace webrtc