mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-14 06:10:40 +01:00

The implementations for the fully connected layer can be simlpified by using `VectorMath:DotProduct()`. In this way, it is also possible to remove (nearly) duplicated SIMD code, reduce the binary size and more easily maintain the code. This CL also forces unoptimized code for the output layer of the VAD, which is a FC 24x1 layer. A slight improvement of the realtime has been measured (delta ~ +5x). Bug: webrtc:10480 Change-Id: Iee93bd59f7905ebf96275dbbfeb3c921baf4e8db Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/195580 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Ivo Creusen <ivoc@webrtc.org> Cr-Commit-Position: refs/heads/master@{#32806}
91 lines
2.9 KiB
C++
91 lines
2.9 KiB
C++
/*
|
|
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "modules/audio_processing/agc2/rnn_vad/rnn.h"
|
|
|
|
#include "rtc_base/checks.h"
|
|
#include "third_party/rnnoise/src/rnn_vad_weights.h"
|
|
|
|
namespace webrtc {
|
|
namespace rnn_vad {
|
|
namespace {
|
|
|
|
using ::rnnoise::kInputLayerInputSize;
|
|
static_assert(kFeatureVectorSize == kInputLayerInputSize, "");
|
|
using ::rnnoise::kInputDenseBias;
|
|
using ::rnnoise::kInputDenseWeights;
|
|
using ::rnnoise::kInputLayerOutputSize;
|
|
static_assert(kInputLayerOutputSize <= kFullyConnectedLayerMaxUnits, "");
|
|
|
|
using ::rnnoise::kHiddenGruBias;
|
|
using ::rnnoise::kHiddenGruRecurrentWeights;
|
|
using ::rnnoise::kHiddenGruWeights;
|
|
using ::rnnoise::kHiddenLayerOutputSize;
|
|
static_assert(kHiddenLayerOutputSize <= kGruLayerMaxUnits, "");
|
|
|
|
using ::rnnoise::kOutputDenseBias;
|
|
using ::rnnoise::kOutputDenseWeights;
|
|
using ::rnnoise::kOutputLayerOutputSize;
|
|
static_assert(kOutputLayerOutputSize <= kFullyConnectedLayerMaxUnits, "");
|
|
|
|
} // namespace
|
|
|
|
RnnVad::RnnVad(const AvailableCpuFeatures& cpu_features)
|
|
: input_(kInputLayerInputSize,
|
|
kInputLayerOutputSize,
|
|
kInputDenseBias,
|
|
kInputDenseWeights,
|
|
ActivationFunction::kTansigApproximated,
|
|
cpu_features,
|
|
/*layer_name=*/"FC1"),
|
|
hidden_(kInputLayerOutputSize,
|
|
kHiddenLayerOutputSize,
|
|
kHiddenGruBias,
|
|
kHiddenGruWeights,
|
|
kHiddenGruRecurrentWeights,
|
|
cpu_features,
|
|
/*layer_name=*/"GRU1"),
|
|
output_(kHiddenLayerOutputSize,
|
|
kOutputLayerOutputSize,
|
|
kOutputDenseBias,
|
|
kOutputDenseWeights,
|
|
ActivationFunction::kSigmoidApproximated,
|
|
// The output layer is just 24x1. The unoptimized code is faster.
|
|
NoAvailableCpuFeatures(),
|
|
/*layer_name=*/"FC2") {
|
|
// Input-output chaining size checks.
|
|
RTC_DCHECK_EQ(input_.size(), hidden_.input_size())
|
|
<< "The input and the hidden layers sizes do not match.";
|
|
RTC_DCHECK_EQ(hidden_.size(), output_.input_size())
|
|
<< "The hidden and the output layers sizes do not match.";
|
|
}
|
|
|
|
RnnVad::~RnnVad() = default;
|
|
|
|
void RnnVad::Reset() {
|
|
hidden_.Reset();
|
|
}
|
|
|
|
float RnnVad::ComputeVadProbability(
|
|
rtc::ArrayView<const float, kFeatureVectorSize> feature_vector,
|
|
bool is_silence) {
|
|
if (is_silence) {
|
|
Reset();
|
|
return 0.f;
|
|
}
|
|
input_.ComputeOutput(feature_vector);
|
|
hidden_.ComputeOutput(input_);
|
|
output_.ComputeOutput(hidden_);
|
|
RTC_DCHECK_EQ(output_.size(), 1);
|
|
return output_.data()[0];
|
|
}
|
|
|
|
} // namespace rnn_vad
|
|
} // namespace webrtc
|