diff --git a/modules/audio_processing/agc2/rnn_vad/BUILD.gn b/modules/audio_processing/agc2/rnn_vad/BUILD.gn index e05dcab604..395e5224ee 100644 --- a/modules/audio_processing/agc2/rnn_vad/BUILD.gn +++ b/modules/audio_processing/agc2/rnn_vad/BUILD.gn @@ -25,12 +25,15 @@ source_set("lib") { "pitch_search_internal.cc", "pitch_search_internal.h", "ring_buffer.h", + "rnn.cc", + "rnn.h", "sequence_buffer.h", "symmetric_matrix_buffer.h", ] deps = [ "../../../../api:array_view", "../../../../rtc_base:checks", + "//third_party/rnnoise:rnn_vad", ] } @@ -53,6 +56,8 @@ if (rtc_include_tests) { unittest_resources = [ "../../../../resources/audio_processing/agc2/rnn_vad/pitch_buf_24k.dat", "../../../../resources/audio_processing/agc2/rnn_vad/pitch_lp_res.dat", + "../../../../resources/audio_processing/agc2/rnn_vad/sil_features.dat", + "../../../../resources/audio_processing/agc2/rnn_vad/vad_prob.dat", ] if (is_ios) { @@ -72,6 +77,7 @@ if (rtc_include_tests) { "pitch_search_internal_unittest.cc", "pitch_search_unittest.cc", "ring_buffer_unittest.cc", + "rnn_unittest.cc", "sequence_buffer_unittest.cc", "symmetric_matrix_buffer_unittest.cc", ] @@ -79,7 +85,9 @@ if (rtc_include_tests) { ":lib", ":lib_test", "../../../../api:array_view", + "../../../../rtc_base:checks", "../../../../test:test_support", + "//third_party/rnnoise:rnn_vad", ] data = unittest_resources if (is_ios) { diff --git a/modules/audio_processing/agc2/rnn_vad/DEPS b/modules/audio_processing/agc2/rnn_vad/DEPS new file mode 100644 index 0000000000..773c2d7edd --- /dev/null +++ b/modules/audio_processing/agc2/rnn_vad/DEPS @@ -0,0 +1,3 @@ +include_rules = [ + "+third_party/rnnoise", +] diff --git a/modules/audio_processing/agc2/rnn_vad/common.h b/modules/audio_processing/agc2/rnn_vad/common.h index 252bf8472c..3af0719c16 100644 --- a/modules/audio_processing/agc2/rnn_vad/common.h +++ b/modules/audio_processing/agc2/rnn_vad/common.h @@ -43,6 +43,8 @@ constexpr size_t kMaxPitch12kHz = kMaxPitch24kHz / 2; constexpr size_t kMinPitch48kHz = kMinPitch24kHz * 2; constexpr size_t kMaxPitch48kHz = kMaxPitch24kHz * 2; +constexpr size_t kFeatureVectorSize = 42; + } // namespace rnn_vad } // namespace webrtc diff --git a/modules/audio_processing/agc2/rnn_vad/rnn.cc b/modules/audio_processing/agc2/rnn_vad/rnn.cc new file mode 100644 index 0000000000..f88fb75e71 --- /dev/null +++ b/modules/audio_processing/agc2/rnn_vad/rnn.cc @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/rnn.h" + +#include +#include +#include + +#include "rtc_base/checks.h" +#include "third_party/rnnoise/src/rnn_activations.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { + +using rnnoise::kWeightsScale; + +using rnnoise::kInputLayerInputSize; +static_assert(kFeatureVectorSize == kInputLayerInputSize, ""); +using rnnoise::kInputDenseWeights; +using rnnoise::kInputDenseBias; +using rnnoise::kInputLayerOutputSize; +static_assert(kInputLayerOutputSize <= kFullyConnectedLayersMaxUnits, + "Increase kFullyConnectedLayersMaxUnits."); + +using rnnoise::kHiddenGruRecurrentWeights; +using rnnoise::kHiddenGruWeights; +using rnnoise::kHiddenGruBias; +using rnnoise::kHiddenLayerOutputSize; +static_assert(kHiddenLayerOutputSize <= kRecurrentLayersMaxUnits, + "Increase kRecurrentLayersMaxUnits."); + +using rnnoise::kOutputDenseWeights; +using rnnoise::kOutputDenseBias; +using rnnoise::kOutputLayerOutputSize; +static_assert(kOutputLayerOutputSize <= kFullyConnectedLayersMaxUnits, + "Increase kFullyConnectedLayersMaxUnits."); + +using rnnoise::RectifiedLinearUnit; +using rnnoise::SigmoidApproximated; +using rnnoise::TansigApproximated; + +FullyConnectedLayer::FullyConnectedLayer( + const size_t input_size, + const size_t output_size, + const rtc::ArrayView bias, + const rtc::ArrayView weights, + float (*const activation_function)(float)) + : input_size_(input_size), + output_size_(output_size), + bias_(bias), + weights_(weights), + activation_function_(activation_function) { + RTC_DCHECK_LE(output_size_, kFullyConnectedLayersMaxUnits) + << "Static over-allocation of fully-connected layers output vectors is " + "not sufficient."; + RTC_DCHECK_EQ(output_size_, bias_.size()) + << "Mismatching output size and bias terms array size."; + RTC_DCHECK_EQ(input_size_ * output_size_, weights_.size()) + << "Mismatching input-output size and weight coefficients array size."; +} + +FullyConnectedLayer::~FullyConnectedLayer() = default; + +rtc::ArrayView FullyConnectedLayer::GetOutput() const { + return rtc::ArrayView(output_.data(), output_size_); +} + +void FullyConnectedLayer::ComputeOutput(rtc::ArrayView input) { + // TODO(bugs.chromium.org/9076): Optimize using SSE/AVX fused multiply-add + // operations. + for (size_t o = 0; o < output_size_; ++o) { + output_[o] = bias_[o]; + // TODO(bugs.chromium.org/9076): Benchmark how different layouts for + // |weights_| change the performance across different platforms. + for (size_t i = 0; i < input_size_; ++i) { + output_[o] += input[i] * weights_[i * output_size_ + o]; + } + output_[o] = (*activation_function_)(kWeightsScale * output_[o]); + } +} + +GatedRecurrentLayer::GatedRecurrentLayer( + const size_t input_size, + const size_t output_size, + const rtc::ArrayView bias, + const rtc::ArrayView weights, + const rtc::ArrayView recurrent_weights, + float (*const activation_function)(float)) + : input_size_(input_size), + output_size_(output_size), + bias_(bias), + weights_(weights), + recurrent_weights_(recurrent_weights), + activation_function_(activation_function) { + RTC_DCHECK_LE(output_size_, kRecurrentLayersMaxUnits) + << "Static over-allocation of recurrent layers state vectors is not " + << "sufficient."; + RTC_DCHECK_EQ(3 * output_size_, bias_.size()) + << "Mismatching output size and bias terms array size."; + RTC_DCHECK_EQ(3 * input_size_ * output_size_, weights_.size()) + << "Mismatching input-output size and weight coefficients array size."; + RTC_DCHECK_EQ(3 * input_size_ * output_size_, recurrent_weights_.size()) + << "Mismatching input-output size and recurrent weight coefficients array" + << " size."; + Reset(); +} + +GatedRecurrentLayer::~GatedRecurrentLayer() = default; + +rtc::ArrayView GatedRecurrentLayer::GetOutput() const { + return rtc::ArrayView(state_.data(), output_size_); +} + +void GatedRecurrentLayer::Reset() { + state_.fill(0.f); +} + +void GatedRecurrentLayer::ComputeOutput(rtc::ArrayView input) { + // TODO(bugs.chromium.org/9076): Optimize using SSE/AVX fused multiply-add + // operations. + // Stride and offset used to read parameter arrays. + const size_t stride = 3 * output_size_; + size_t offset = 0; + + // Compute update gates. + std::array update; + for (size_t o = 0; o < output_size_; ++o) { + update[o] = bias_[o]; + // TODO(bugs.chromium.org/9076): Benchmark how different layouts for + // |weights_| and |recurrent_weights_| change the performance across + // different platforms. + for (size_t i = 0; i < input_size_; ++i) { // Add input. + update[o] += input[i] * weights_[i * stride + o]; + } + for (size_t s = 0; s < output_size_; ++s) { + update[o] += state_[s] * recurrent_weights_[s * stride + o]; + } // Add state. + update[o] = SigmoidApproximated(kWeightsScale * update[o]); + } + + // Compute reset gates. + offset += output_size_; + std::array reset; + for (size_t o = 0; o < output_size_; ++o) { + reset[o] = bias_[offset + o]; + for (size_t i = 0; i < input_size_; ++i) { // Add input. + reset[o] += input[i] * weights_[offset + i * stride + o]; + } + for (size_t s = 0; s < output_size_; ++s) { // Add state. + reset[o] += state_[s] * recurrent_weights_[offset + s * stride + o]; + } + reset[o] = SigmoidApproximated(kWeightsScale * reset[o]); + } + + // Compute output. + offset += output_size_; + std::array output; + for (size_t o = 0; o < output_size_; ++o) { + output[o] = bias_[offset + o]; + for (size_t i = 0; i < input_size_; ++i) { // Add input. + output[o] += input[i] * weights_[offset + i * stride + o]; + } + for (size_t s = 0; s < output_size_; + ++s) { // Add state through reset gates. + output[o] += + state_[s] * recurrent_weights_[offset + s * stride + o] * reset[s]; + } + output[o] = (*activation_function_)(kWeightsScale * output[o]); + // Update output through the update gates. + output[o] = update[o] * state_[o] + (1.f - update[o]) * output[o]; + } + + // Update the state. Not done in the previous loop since that would pollute + // the current state and lead to incorrect output values. + std::copy(output.begin(), output.end(), state_.begin()); +} + +RnnBasedVad::RnnBasedVad() + : input_layer_(kInputLayerInputSize, + kInputLayerOutputSize, + kInputDenseBias, + kInputDenseWeights, + TansigApproximated), + hidden_layer_(kInputLayerOutputSize, + kHiddenLayerOutputSize, + kHiddenGruBias, + kHiddenGruWeights, + kHiddenGruRecurrentWeights, + RectifiedLinearUnit), + output_layer_(kHiddenLayerOutputSize, + kOutputLayerOutputSize, + kOutputDenseBias, + kOutputDenseWeights, + SigmoidApproximated) { + // Input-output chaining size checks. + RTC_DCHECK_EQ(input_layer_.output_size(), hidden_layer_.input_size()) + << "The input and the hidden layers sizes do not match."; + RTC_DCHECK_EQ(hidden_layer_.output_size(), output_layer_.input_size()) + << "The hidden and the output layers sizes do not match."; +} + +RnnBasedVad::~RnnBasedVad() = default; + +void RnnBasedVad::Reset() { + hidden_layer_.Reset(); +} + +void RnnBasedVad::ComputeVadProbability( + rtc::ArrayView feature_vector) { + input_layer_.ComputeOutput(feature_vector); + hidden_layer_.ComputeOutput(input_layer_.GetOutput()); + output_layer_.ComputeOutput(hidden_layer_.GetOutput()); + const auto vad_output = output_layer_.GetOutput(); + vad_probability_ = vad_output[0]; +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/modules/audio_processing/agc2/rnn_vad/rnn.h b/modules/audio_processing/agc2/rnn_vad/rnn.h new file mode 100644 index 0000000000..81ab87ed48 --- /dev/null +++ b/modules/audio_processing/agc2/rnn_vad/rnn.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" + +namespace webrtc { +namespace rnn_vad { + +// Maximum number of units for a fully-connected layer. This value is used to +// over-allocate space for fully-connected layers output vectors (implemented as +// std::array). The value should equal the number of units of the largest +// fully-connected layer. +constexpr size_t kFullyConnectedLayersMaxUnits = 24; + +// Maximum number of units for a recurrent layer. This value is used to +// over-allocate space for recurrent layers state vectors (implemented as +// std::array). The value should equal the number of units of the largest +// recurrent layer. +constexpr size_t kRecurrentLayersMaxUnits = 24; + +// Fully-connected layer. +class FullyConnectedLayer { + public: + FullyConnectedLayer(const size_t input_size, + const size_t output_size, + const rtc::ArrayView bias, + const rtc::ArrayView weights, + float (*const activation_function)(float)); + FullyConnectedLayer(const FullyConnectedLayer&) = delete; + FullyConnectedLayer& operator=(const FullyConnectedLayer&) = delete; + ~FullyConnectedLayer(); + size_t input_size() const { return input_size_; } + size_t output_size() const { return output_size_; } + rtc::ArrayView GetOutput() const; + // Computes the fully-connected layer output. + void ComputeOutput(rtc::ArrayView input); + + private: + const size_t input_size_; + const size_t output_size_; + const rtc::ArrayView bias_; + const rtc::ArrayView weights_; + float (*const activation_function_)(float); + // The output vector of a recurrent layer has length equal to |output_size_|. + // However, for efficiency, over-allocation is used. + std::array output_; +}; + +// Recurrent layer with gated recurrent units (GRUs). +class GatedRecurrentLayer { + public: + GatedRecurrentLayer(const size_t input_size, + const size_t output_size, + const rtc::ArrayView bias, + const rtc::ArrayView weights, + const rtc::ArrayView recurrent_weights, + float (*const activation_function)(float)); + GatedRecurrentLayer(const GatedRecurrentLayer&) = delete; + GatedRecurrentLayer& operator=(const GatedRecurrentLayer&) = delete; + ~GatedRecurrentLayer(); + size_t input_size() const { return input_size_; } + size_t output_size() const { return output_size_; } + rtc::ArrayView GetOutput() const; + void Reset(); + // Computes the recurrent layer output and updates the status. + void ComputeOutput(rtc::ArrayView input); + + private: + const size_t input_size_; + const size_t output_size_; + const rtc::ArrayView bias_; + const rtc::ArrayView weights_; + const rtc::ArrayView recurrent_weights_; + float (*const activation_function_)(float); + // The state vector of a recurrent layer has length equal to |output_size_|. + // However, to avoid dynamic allocation, over-allocation is used. + std::array state_; +}; + +// Recurrent network based VAD. +class RnnBasedVad { + public: + RnnBasedVad(); + RnnBasedVad(const RnnBasedVad&) = delete; + RnnBasedVad& operator=(const RnnBasedVad&) = delete; + ~RnnBasedVad(); + float vad_probability() const { return vad_probability_; } + void Reset(); + // Compute and returns the probability of voice (range: [0.0, 1.0]). + void ComputeVadProbability( + rtc::ArrayView feature_vector); + + private: + FullyConnectedLayer input_layer_; + GatedRecurrentLayer hidden_layer_; + FullyConnectedLayer output_layer_; + float vad_probability_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_H_ diff --git a/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc b/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc new file mode 100644 index 0000000000..d774c6d557 --- /dev/null +++ b/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "modules/audio_processing/agc2/rnn_vad/rnn.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "rtc_base/checks.h" +#include "test/gtest.h" +#include "third_party/rnnoise/src/rnn_activations.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace test { + +using rnnoise::RectifiedLinearUnit; +using rnnoise::SigmoidApproximated; + +namespace { + +void TestFullyConnectedLayer(FullyConnectedLayer* fc, + rtc::ArrayView input_vector, + const float expected_output) { + RTC_CHECK(fc); + fc->ComputeOutput(input_vector); + const auto output = fc->GetOutput(); + EXPECT_NEAR(expected_output, output[0], 3e-6f); +} + +void TestGatedRecurrentLayer( + GatedRecurrentLayer* gru, + rtc::ArrayView input_sequence, + rtc::ArrayView expected_output_sequence) { + RTC_CHECK(gru); + auto gru_output_view = gru->GetOutput(); + const size_t input_sequence_length = + rtc::CheckedDivExact(input_sequence.size(), gru->input_size()); + const size_t output_sequence_length = + rtc::CheckedDivExact(expected_output_sequence.size(), gru->output_size()); + ASSERT_EQ(input_sequence_length, output_sequence_length) + << "The test data length is invalid."; + // Feed the GRU layer and check the output at every step. + gru->Reset(); + for (size_t i = 0; i < input_sequence_length; ++i) { + SCOPED_TRACE(i); + gru->ComputeOutput( + input_sequence.subview(i * gru->input_size(), gru->input_size())); + const auto expected_output = expected_output_sequence.subview( + i * gru->output_size(), gru->output_size()); + ExpectNearAbsolute(expected_output, gru_output_view, 3e-6f); + } +} + +} // namespace + +// Bit-exactness check for fully connected layers. +TEST(RnnVadTest, CheckFullyConnectedLayerOutput) { + const std::array bias = {-50}; + const std::array weights = { + 127, 127, 127, 127, 127, 20, 127, -126, -126, -54, 14, 125, + -126, -126, 127, -125, -126, 127, -127, -127, -57, -30, 127, 80}; + FullyConnectedLayer fc(24, 1, bias, weights, SigmoidApproximated); + // Test on different inputs. + { + const std::array input_vector = { + 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.215833917f, 0.290601075f, 0.238759011f, 0.244751841f, + 0.f, 0.0461241305f, 0.106401242f, 0.223070428f, 0.630603909f, + 0.690453172f, 0.f, 0.387645692f, 0.166913897f, 0.f, + 0.0327451192f, 0.f, 0.136149868f, 0.446351469f}; + TestFullyConnectedLayer(&fc, {input_vector}, 0.436567038f); + } + { + const std::array input_vector = { + 0.592162728f, 0.529089332f, 1.18205106f, + 1.21736848f, 0.f, 0.470851123f, + 0.130675942f, 0.320903003f, 0.305496395f, + 0.0571633279f, 1.57001138f, 0.0182026215f, + 0.0977443159f, 0.347477973f, 0.493206412f, + 0.9688586f, 0.0320267938f, 0.244722098f, + 0.312745273f, 0.f, 0.00650715502f, + 0.312553257f, 1.62619662f, 0.782880902f}; + TestFullyConnectedLayer(&fc, {input_vector}, 0.874741316f); + } + { + const std::array input_vector = { + 0.395022154f, 0.333681047f, 0.76302278f, + 0.965480626f, 0.f, 0.941198349f, + 0.0892967582f, 0.745046318f, 0.635769248f, + 0.238564298f, 0.970656633f, 0.014159563f, + 0.094203949f, 0.446816623f, 0.640755892f, + 1.20532358f, 0.0254284926f, 0.283327013f, + 0.726210058f, 0.0550272502f, 0.000344108557f, + 0.369803518f, 1.56680179f, 0.997883797f}; + TestFullyConnectedLayer(&fc, {input_vector}, 0.672785878f); + } +} + +TEST(RnnVadTest, CheckGatedRecurrentLayer) { + const std::array bias = {96, -99, -81, -114, 49, 119, + -118, 68, -76, 91, 121, 125}; + const std::array weights = { + 124, 9, 1, 116, -66, -21, -118, -110, 104, 75, -23, -51, + -72, -111, 47, 93, 77, -98, 41, -8, 40, -23, -43, -107, + 9, -73, 30, -32, -2, 64, -26, 91, -48, -24, -28, -104, + 74, -46, 116, 15, 32, 52, -126, -38, -121, 12, -16, 110, + -95, 66, -103, -35, -38, 3, -126, -61, 28, 98, -117, -43}; + const std::array recurrent_weights = { + -3, 87, 50, 51, -22, 27, -39, 62, 31, -83, -52, -48, + -6, 83, -19, 104, 105, 48, 23, 68, 23, 40, 7, -120, + 64, -62, 117, 85, -51, -43, 54, -105, 120, 56, -128, -107, + 39, 50, -17, -47, -117, 14, 108, 12, -7, -72, 103, -87, + -66, 82, 84, 100, -98, 102, -49, 44, 122, 106, -20, -69}; + GatedRecurrentLayer gru(5, 4, bias, weights, recurrent_weights, + RectifiedLinearUnit); + // Test on different inputs. + { + const std::array input_sequence = { + 0.89395463f, 0.93224651f, 0.55788344f, 0.32341808f, 0.93355054f, + 0.13475326f, 0.97370994f, 0.14253306f, 0.93710381f, 0.76093364f, + 0.65780413f, 0.41657975f, 0.49403164f, 0.46843281f, 0.75138855f, + 0.24517593f, 0.47657707f, 0.57064998f, 0.435184f, 0.19319285f}; + const std::array expected_output_sequence = { + 0.0239123f, 0.5773077f, 0.f, 0.f, + 0.01282811f, 0.64330572f, 0.f, 0.04863098f, + 0.00781069f, 0.75267816f, 0.f, 0.02579715f, + 0.00471378f, 0.59162533f, 0.11087593f, 0.01334511f}; + TestGatedRecurrentLayer(&gru, input_sequence, expected_output_sequence); + } +} + +// TODO(bugs.webrtc.org/9076): Remove when the issue is fixed. +// Bit-exactness test checking that precomputed frame-wise features lead to the +// expected VAD probabilities. +TEST(RnnVadTest, RnnBitExactness) { + // Init. + auto features_reader = CreateSilenceFlagsFeatureMatrixReader(); + auto vad_probs_reader = CreateVadProbsReader(); + ASSERT_EQ(features_reader.second, vad_probs_reader.second); + const size_t num_frames = features_reader.second; + // Frame-wise buffers. + float expected_vad_probability; + float is_silence; + std::array features; + + // Compute VAD probability using the precomputed features. + RnnBasedVad vad; + for (size_t i = 0; i < num_frames; ++i) { + SCOPED_TRACE(i); + // Read frame data. + RTC_CHECK(vad_probs_reader.first->ReadValue(&expected_vad_probability)); + // The features file also includes a silence flag for each frame. + RTC_CHECK(features_reader.first->ReadValue(&is_silence)); + RTC_CHECK( + features_reader.first->ReadChunk({features.data(), features.size()})); + // Skip silent frames. + ASSERT_TRUE(is_silence == 0.f || is_silence == 1.f); + if (is_silence == 1.f) { + ASSERT_EQ(expected_vad_probability, 0.f); + continue; + } + // Compute and check VAD probability. + vad.ComputeVadProbability({features.data(), features.size()}); + EXPECT_NEAR(expected_vad_probability, vad.vad_probability(), 3e-6f); + } +} + +} // namespace test +} // namespace rnn_vad +} // namespace webrtc diff --git a/modules/audio_processing/agc2/rnn_vad/test_utils.cc b/modules/audio_processing/agc2/rnn_vad/test_utils.cc index c6cf21e61b..ff91ef7d13 100644 --- a/modules/audio_processing/agc2/rnn_vad/test_utils.cc +++ b/modules/audio_processing/agc2/rnn_vad/test_utils.cc @@ -53,6 +53,21 @@ ReaderPairType CreateLpResidualAndPitchPeriodGainReader() { rtc::CheckedDivExact(ptr->data_length(), 2 + num_lp_residual_coeffs)}; } +ReaderPairType CreateSilenceFlagsFeatureMatrixReader() { + auto ptr = rtc::MakeUnique>( + test::ResourcePath("audio_processing/agc2/rnn_vad/sil_features", "dat"), + 42); + // Features (42) and silence flag. + return {std::move(ptr), + rtc::CheckedDivExact(ptr->data_length(), static_cast(43))}; +} + +ReaderPairType CreateVadProbsReader() { + auto ptr = rtc::MakeUnique>( + test::ResourcePath("audio_processing/agc2/rnn_vad/vad_prob", "dat")); + return {std::move(ptr), ptr->data_length()}; +} + } // namespace test } // namespace rnn_vad } // namespace webrtc diff --git a/modules/audio_processing/agc2/rnn_vad/test_utils.h b/modules/audio_processing/agc2/rnn_vad/test_utils.h index 3f580ab48c..92d370675e 100644 --- a/modules/audio_processing/agc2/rnn_vad/test_utils.h +++ b/modules/audio_processing/agc2/rnn_vad/test_utils.h @@ -95,6 +95,12 @@ CreatePitchBuffer24kHzReader(); // and gain values. std::pair>, const size_t> CreateLpResidualAndPitchPeriodGainReader(); +// Instance a reader for the silence flags and the feature matrix. +std::pair>, const size_t> +CreateSilenceFlagsFeatureMatrixReader(); +// Instance a reader for the VAD probabilities. +std::pair>, const size_t> +CreateVadProbsReader(); } // namespace test } // namespace rnn_vad diff --git a/resources/audio_processing/agc2/rnn_vad/sil_features.dat.sha1 b/resources/audio_processing/agc2/rnn_vad/sil_features.dat.sha1 new file mode 100644 index 0000000000..bc591e9d6c --- /dev/null +++ b/resources/audio_processing/agc2/rnn_vad/sil_features.dat.sha1 @@ -0,0 +1 @@ +e0a92782c2903be9da10385d924d34e8bf212d5e \ No newline at end of file diff --git a/resources/audio_processing/agc2/rnn_vad/vad_prob.dat.sha1 b/resources/audio_processing/agc2/rnn_vad/vad_prob.dat.sha1 new file mode 100644 index 0000000000..1aa3bd0d83 --- /dev/null +++ b/resources/audio_processing/agc2/rnn_vad/vad_prob.dat.sha1 @@ -0,0 +1 @@ +05735ede0b457318e307d12f5acfd11bbbbd0afd \ No newline at end of file diff --git a/tools_webrtc/libs/generate_licenses.py b/tools_webrtc/libs/generate_licenses.py index 9bbe7526a3..df7ad8210e 100755 --- a/tools_webrtc/libs/generate_licenses.py +++ b/tools_webrtc/libs/generate_licenses.py @@ -44,6 +44,7 @@ LIB_TO_LICENSES_DICT = { 'openmax_dl': ['third_party/openmax_dl/LICENSE'], 'opus': ['third_party/opus/src/COPYING'], 'protobuf': ['third_party/protobuf/LICENSE'], + 'rnnoise': ['third_party/rnnoise/COPYING'], 'usrsctp': ['third_party/usrsctp/LICENSE'], 'webrtc': ['LICENSE', 'LICENSE_THIRD_PARTY'], 'zlib': ['third_party/zlib/LICENSE'],