webrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc
Alessio Bazzica e8ee462681 RNN VAD: FC layer simplified
The implementations for the fully connected layer can be simlpified by
using `VectorMath:DotProduct()`. In this way, it is also possible to
remove (nearly) duplicated SIMD code, reduce the binary size and more
easily maintain the code.

This CL also forces unoptimized code for the output layer of the VAD,
which is a FC 24x1 layer. A slight improvement of the realtime has
been measured (delta ~ +5x).

Bug: webrtc:10480
Change-Id: Iee93bd59f7905ebf96275dbbfeb3c921baf4e8db
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/195580
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Ivo Creusen <ivoc@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32806}
2020-12-09 00:04:50 +00:00

91 lines
2.9 KiB
C++

/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/rnn_vad/rnn.h"
#include "rtc_base/checks.h"
#include "third_party/rnnoise/src/rnn_vad_weights.h"
namespace webrtc {
namespace rnn_vad {
namespace {
using ::rnnoise::kInputLayerInputSize;
static_assert(kFeatureVectorSize == kInputLayerInputSize, "");
using ::rnnoise::kInputDenseBias;
using ::rnnoise::kInputDenseWeights;
using ::rnnoise::kInputLayerOutputSize;
static_assert(kInputLayerOutputSize <= kFullyConnectedLayerMaxUnits, "");
using ::rnnoise::kHiddenGruBias;
using ::rnnoise::kHiddenGruRecurrentWeights;
using ::rnnoise::kHiddenGruWeights;
using ::rnnoise::kHiddenLayerOutputSize;
static_assert(kHiddenLayerOutputSize <= kGruLayerMaxUnits, "");
using ::rnnoise::kOutputDenseBias;
using ::rnnoise::kOutputDenseWeights;
using ::rnnoise::kOutputLayerOutputSize;
static_assert(kOutputLayerOutputSize <= kFullyConnectedLayerMaxUnits, "");
} // namespace
RnnVad::RnnVad(const AvailableCpuFeatures& cpu_features)
: input_(kInputLayerInputSize,
kInputLayerOutputSize,
kInputDenseBias,
kInputDenseWeights,
ActivationFunction::kTansigApproximated,
cpu_features,
/*layer_name=*/"FC1"),
hidden_(kInputLayerOutputSize,
kHiddenLayerOutputSize,
kHiddenGruBias,
kHiddenGruWeights,
kHiddenGruRecurrentWeights,
cpu_features,
/*layer_name=*/"GRU1"),
output_(kHiddenLayerOutputSize,
kOutputLayerOutputSize,
kOutputDenseBias,
kOutputDenseWeights,
ActivationFunction::kSigmoidApproximated,
// The output layer is just 24x1. The unoptimized code is faster.
NoAvailableCpuFeatures(),
/*layer_name=*/"FC2") {
// Input-output chaining size checks.
RTC_DCHECK_EQ(input_.size(), hidden_.input_size())
<< "The input and the hidden layers sizes do not match.";
RTC_DCHECK_EQ(hidden_.size(), output_.input_size())
<< "The hidden and the output layers sizes do not match.";
}
RnnVad::~RnnVad() = default;
void RnnVad::Reset() {
hidden_.Reset();
}
float RnnVad::ComputeVadProbability(
rtc::ArrayView<const float, kFeatureVectorSize> feature_vector,
bool is_silence) {
if (is_silence) {
Reset();
return 0.f;
}
input_.ComputeOutput(feature_vector);
hidden_.ComputeOutput(input_);
output_.ComputeOutput(hidden_);
RTC_DCHECK_EQ(output_.size(), 1);
return output_.data()[0];
}
} // namespace rnn_vad
} // namespace webrtc