mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-15 14:50:39 +01:00

We put back the old noise estimator from LevelController. We add a few new unit tests. We also re-arrange the code so that it fits with how it is used in AGC2. The differences are: 1. The NoiseLevelEstimator is now fully self-contained. 2. The NoiseLevelEstimator is responsible for calling SignalClassifier and computing the signal energy. Previously the signal type and energy were used in several places. It made sense to compute the values independently of the noise calculation. 3. Re-initialization doesn't have to be done by the caller. 4. The interface is AudioFrameView instead of AudioBuffer. # Bots are green, nothing should break internal stuff NOTRY=True Bug: webrtc:7494 Change-Id: I442bdbbeb3796eb2518e96000aec9dc5a039ae6d Reviewed-on: https://webrtc-review.googlesource.com/66380 Commit-Queue: Alex Loiko <aleloi@webrtc.org> Reviewed-by: Sam Zackrisson <saza@webrtc.org> Cr-Commit-Position: refs/heads/master@{#22738}
167 lines
5.5 KiB
C++
167 lines
5.5 KiB
C++
/*
|
|
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "modules/audio_processing/agc2/signal_classifier.h"
|
|
|
|
#include <algorithm>
|
|
#include <numeric>
|
|
#include <vector>
|
|
|
|
#include "api/array_view.h"
|
|
#include "modules/audio_processing/agc2/down_sampler.h"
|
|
#include "modules/audio_processing/agc2/noise_spectrum_estimator.h"
|
|
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
|
#include "rtc_base/constructormagic.h"
|
|
|
|
namespace webrtc {
|
|
namespace {
|
|
|
|
void RemoveDcLevel(rtc::ArrayView<float> x) {
|
|
RTC_DCHECK_LT(0, x.size());
|
|
float mean = std::accumulate(x.data(), x.data() + x.size(), 0.f);
|
|
mean /= x.size();
|
|
|
|
for (float& v : x) {
|
|
v -= mean;
|
|
}
|
|
}
|
|
|
|
void PowerSpectrum(const OouraFft* ooura_fft,
|
|
rtc::ArrayView<const float> x,
|
|
rtc::ArrayView<float> spectrum) {
|
|
RTC_DCHECK_EQ(65, spectrum.size());
|
|
RTC_DCHECK_EQ(128, x.size());
|
|
float X[128];
|
|
std::copy(x.data(), x.data() + x.size(), X);
|
|
ooura_fft->Fft(X);
|
|
|
|
float* X_p = X;
|
|
RTC_DCHECK_EQ(X_p, &X[0]);
|
|
spectrum[0] = (*X_p) * (*X_p);
|
|
++X_p;
|
|
RTC_DCHECK_EQ(X_p, &X[1]);
|
|
spectrum[64] = (*X_p) * (*X_p);
|
|
for (int k = 1; k < 64; ++k) {
|
|
++X_p;
|
|
RTC_DCHECK_EQ(X_p, &X[2 * k]);
|
|
spectrum[k] = (*X_p) * (*X_p);
|
|
++X_p;
|
|
RTC_DCHECK_EQ(X_p, &X[2 * k + 1]);
|
|
spectrum[k] += (*X_p) * (*X_p);
|
|
}
|
|
}
|
|
|
|
webrtc::SignalClassifier::SignalType ClassifySignal(
|
|
rtc::ArrayView<const float> signal_spectrum,
|
|
rtc::ArrayView<const float> noise_spectrum,
|
|
ApmDataDumper* data_dumper) {
|
|
int num_stationary_bands = 0;
|
|
int num_highly_nonstationary_bands = 0;
|
|
|
|
// Detect stationary and highly nonstationary bands.
|
|
for (size_t k = 1; k < 40; k++) {
|
|
if (signal_spectrum[k] < 3 * noise_spectrum[k] &&
|
|
signal_spectrum[k] * 3 > noise_spectrum[k]) {
|
|
++num_stationary_bands;
|
|
} else if (signal_spectrum[k] > 9 * noise_spectrum[k]) {
|
|
++num_highly_nonstationary_bands;
|
|
}
|
|
}
|
|
|
|
data_dumper->DumpRaw("lc_num_stationary_bands", 1, &num_stationary_bands);
|
|
data_dumper->DumpRaw("lc_num_highly_nonstationary_bands", 1,
|
|
&num_highly_nonstationary_bands);
|
|
|
|
// Use the detected number of bands to classify the overall signal
|
|
// stationarity.
|
|
if (num_stationary_bands > 15) {
|
|
return SignalClassifier::SignalType::kStationary;
|
|
} else {
|
|
return SignalClassifier::SignalType::kNonStationary;
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
SignalClassifier::FrameExtender::FrameExtender(size_t frame_size,
|
|
size_t extended_frame_size)
|
|
: x_old_(extended_frame_size - frame_size, 0.f) {}
|
|
|
|
SignalClassifier::FrameExtender::~FrameExtender() = default;
|
|
|
|
void SignalClassifier::FrameExtender::ExtendFrame(
|
|
rtc::ArrayView<const float> x,
|
|
rtc::ArrayView<float> x_extended) {
|
|
RTC_DCHECK_EQ(x_old_.size() + x.size(), x_extended.size());
|
|
std::copy(x_old_.data(), x_old_.data() + x_old_.size(), x_extended.data());
|
|
std::copy(x.data(), x.data() + x.size(), x_extended.data() + x_old_.size());
|
|
std::copy(x_extended.data() + x_extended.size() - x_old_.size(),
|
|
x_extended.data() + x_extended.size(), x_old_.data());
|
|
}
|
|
|
|
SignalClassifier::SignalClassifier(ApmDataDumper* data_dumper)
|
|
: data_dumper_(data_dumper),
|
|
down_sampler_(data_dumper_),
|
|
noise_spectrum_estimator_(data_dumper_) {
|
|
Initialize(48000);
|
|
}
|
|
SignalClassifier::~SignalClassifier() {}
|
|
|
|
void SignalClassifier::Initialize(int sample_rate_hz) {
|
|
down_sampler_.Initialize(sample_rate_hz);
|
|
noise_spectrum_estimator_.Initialize();
|
|
frame_extender_.reset(new FrameExtender(80, 128));
|
|
sample_rate_hz_ = sample_rate_hz;
|
|
initialization_frames_left_ = 2;
|
|
consistent_classification_counter_ = 3;
|
|
last_signal_type_ = SignalClassifier::SignalType::kNonStationary;
|
|
}
|
|
|
|
SignalClassifier::SignalType SignalClassifier::Analyze(
|
|
rtc::ArrayView<const float> signal) {
|
|
RTC_DCHECK_EQ(signal.size(), sample_rate_hz_ / 100);
|
|
|
|
// Compute the signal power spectrum.
|
|
float downsampled_frame[80];
|
|
down_sampler_.DownSample(signal, downsampled_frame);
|
|
float extended_frame[128];
|
|
frame_extender_->ExtendFrame(downsampled_frame, extended_frame);
|
|
RemoveDcLevel(extended_frame);
|
|
float signal_spectrum[65];
|
|
PowerSpectrum(&ooura_fft_, extended_frame, signal_spectrum);
|
|
|
|
// Classify the signal based on the estimate of the noise spectrum and the
|
|
// signal spectrum estimate.
|
|
const SignalType signal_type = ClassifySignal(
|
|
signal_spectrum, noise_spectrum_estimator_.GetNoiseSpectrum(),
|
|
data_dumper_);
|
|
|
|
// Update the noise spectrum based on the signal spectrum.
|
|
noise_spectrum_estimator_.Update(signal_spectrum,
|
|
initialization_frames_left_ > 0);
|
|
|
|
// Update the number of frames until a reliable signal spectrum is achieved.
|
|
initialization_frames_left_ = std::max(0, initialization_frames_left_ - 1);
|
|
|
|
if (last_signal_type_ == signal_type) {
|
|
consistent_classification_counter_ =
|
|
std::max(0, consistent_classification_counter_ - 1);
|
|
} else {
|
|
last_signal_type_ = signal_type;
|
|
consistent_classification_counter_ = 3;
|
|
}
|
|
|
|
if (consistent_classification_counter_ > 0) {
|
|
return SignalClassifier::SignalType::kNonStationary;
|
|
}
|
|
return signal_type;
|
|
}
|
|
|
|
} // namespace webrtc
|