AEC3: Make RenderSignalAnalyzer multi-channel

In this CL:
 - Render signal analyzer considers a frequency bin a narrow band
(peak) if any channel exhibits narrowband (-peak) behavior.
 - The unit tests have to fill frames with noise because small
inaccuracies in the FFT spectrum lead to consistent "narrow bands"
despite spectrum being essentially flat.

Bug: webrtc:10913
Change-Id: I8fa181412c0ee1beeacfda37ffef18251d5f0cd7
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/151912
Reviewed-by: Per Åhgren <peah@webrtc.org>
Commit-Queue: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29176}
This commit is contained in:
Sam Zackrisson 2019-09-12 12:32:44 +02:00 committed by Commit Bot
parent b5a4ae8a57
commit 3f17221d98
5 changed files with 178 additions and 111 deletions

View file

@ -29,19 +29,29 @@ void IdentifySmallNarrowBandRegions(
const RenderBuffer& render_buffer,
const absl::optional<size_t>& delay_partitions,
std::array<size_t, kFftLengthBy2 - 1>* narrow_band_counters) {
RTC_DCHECK(narrow_band_counters);
if (!delay_partitions) {
narrow_band_counters->fill(0);
return;
}
rtc::ArrayView<const float> X2 =
render_buffer.Spectrum(*delay_partitions, /*channel=*/0);
RTC_DCHECK_EQ(kFftLengthBy2Plus1, X2.size());
for (size_t k = 1; k < (X2.size() - 1); ++k) {
(*narrow_band_counters)[k - 1] = X2[k] > 3 * std::max(X2[k - 1], X2[k + 1])
? (*narrow_band_counters)[k - 1] + 1
: 0;
std::array<size_t, kFftLengthBy2 - 1> channel_counters;
channel_counters.fill(0);
for (size_t channel = 0; channel < render_buffer.Block(0)[0].size();
++channel) {
rtc::ArrayView<const float> X2 =
render_buffer.Spectrum(*delay_partitions, channel);
RTC_DCHECK_EQ(kFftLengthBy2Plus1, X2.size());
for (size_t k = 1; k < kFftLengthBy2; ++k) {
if (X2[k] > 3 * std::max(X2[k - 1], X2[k + 1])) {
++channel_counters[k - 1];
}
}
}
for (size_t k = 1; k < kFftLengthBy2; ++k) {
(*narrow_band_counters)[k - 1] =
channel_counters[k - 1] > 0 ? (*narrow_band_counters)[k - 1] + 1 : 0;
}
}
@ -50,47 +60,58 @@ void IdentifyStrongNarrowBandComponent(const RenderBuffer& render_buffer,
int strong_peak_freeze_duration,
absl::optional<int>* narrow_peak_band,
size_t* narrow_peak_counter) {
const auto X2_latest = render_buffer.Spectrum(0, /*channel=*/0);
// Identify the spectral peak.
const int peak_bin = static_cast<int>(
std::max_element(X2_latest.begin(), X2_latest.end()) - X2_latest.begin());
// Compute the level around the peak.
float non_peak_power = 0.f;
for (int k = std::max(0, peak_bin - 14); k < peak_bin - 4; ++k) {
non_peak_power = std::max(X2_latest[k], non_peak_power);
}
for (int k = peak_bin + 5;
k < std::min(peak_bin + 15, static_cast<int>(kFftLengthBy2Plus1)); ++k) {
non_peak_power = std::max(X2_latest[k], non_peak_power);
RTC_DCHECK(narrow_peak_band);
RTC_DCHECK(narrow_peak_counter);
if (*narrow_peak_band &&
++(*narrow_peak_counter) >
static_cast<size_t>(strong_peak_freeze_duration)) {
*narrow_peak_band = absl::nullopt;
}
// Assess the render signal strength.
const std::vector<std::vector<std::vector<float>>>& x_latest =
render_buffer.Block(0);
auto result0 =
std::minmax_element(x_latest[0][0].begin(), x_latest[0][0].end());
float max_abs = std::max(fabs(*result0.first), fabs(*result0.second));
float max_peak_level = 0.f;
for (size_t channel = 0; channel < x_latest[0].size(); ++channel) {
const auto X2_latest = render_buffer.Spectrum(0, channel);
if (x_latest.size() > 1) {
const auto result1 =
std::minmax_element(x_latest[1][0].begin(), x_latest[1][0].end());
max_abs =
std::max(max_abs, static_cast<float>(std::max(fabs(*result1.first),
fabs(*result1.second))));
}
// Identify the spectral peak.
const int peak_bin =
static_cast<int>(std::max_element(X2_latest.begin(), X2_latest.end()) -
X2_latest.begin());
// Detect whether the spectal peak has as strong narrowband nature.
if (peak_bin > 0 && max_abs > 100 &&
X2_latest[peak_bin] > 100 * non_peak_power) {
*narrow_peak_band = peak_bin;
*narrow_peak_counter = 0;
} else {
if (*narrow_peak_band &&
++(*narrow_peak_counter) >
static_cast<size_t>(strong_peak_freeze_duration)) {
*narrow_peak_band = absl::nullopt;
// Compute the level around the peak.
float non_peak_power = 0.f;
for (int k = std::max(0, peak_bin - 14); k < peak_bin - 4; ++k) {
non_peak_power = std::max(X2_latest[k], non_peak_power);
}
for (int k = peak_bin + 5;
k < std::min(peak_bin + 15, static_cast<int>(kFftLengthBy2Plus1));
++k) {
non_peak_power = std::max(X2_latest[k], non_peak_power);
}
// Assess the render signal strength.
auto result0 = std::minmax_element(x_latest[0][channel].begin(),
x_latest[0][channel].end());
float max_abs = std::max(fabs(*result0.first), fabs(*result0.second));
if (x_latest.size() > 1) {
const auto result1 = std::minmax_element(x_latest[1][channel].begin(),
x_latest[1][channel].end());
max_abs =
std::max(max_abs, static_cast<float>(std::max(
fabs(*result1.first), fabs(*result1.second))));
}
// Detect whether the spectral peak has as strong narrowband nature.
const float peak_level = X2_latest[peak_bin];
if (peak_bin > 0 && max_abs > 100 && peak_level > 100 * non_peak_power) {
// Store the strongest peak across channels.
if (peak_level > max_peak_level) {
max_peak_level = peak_level;
*narrow_peak_band = peak_bin;
*narrow_peak_counter = 0;
}
}
}
}

View file

@ -23,6 +23,7 @@
#include "modules/audio_processing/aec3/render_delay_buffer.h"
#include "modules/audio_processing/test/echo_canceller_test_tools.h"
#include "rtc_base/random.h"
#include "rtc_base/strings/string_builder.h"
#include "test/gtest.h"
namespace webrtc {
@ -30,87 +31,42 @@ namespace {
constexpr float kPi = 3.141592f;
void ProduceSinusoid(int sample_rate_hz,
float sinusoidal_frequency_hz,
size_t* sample_counter,
std::vector<std::vector<std::vector<float>>>* x) {
// Produce a sinusoid of the specified frequency.
void ProduceSinusoidInNoise(int sample_rate_hz,
size_t sinusoid_channel,
float sinusoidal_frequency_hz,
Random* random_generator,
size_t* sample_counter,
std::vector<std::vector<std::vector<float>>>* x) {
// Fill x with low-amplitude noise.
for (auto& band : *x) {
for (auto& channel : band) {
RandomizeSampleVector(random_generator, channel,
/*amplitude=*/500.f);
}
}
// Produce a sinusoid of the specified frequency in the specified channel.
for (size_t k = *sample_counter, j = 0; k < (*sample_counter + kBlockSize);
++k, ++j) {
for (size_t channel = 0; channel < (*x)[0].size(); ++channel) {
(*x)[0][channel][j] =
32767.f *
std::sin(2.f * kPi * sinusoidal_frequency_hz * k / sample_rate_hz);
}
(*x)[0][sinusoid_channel][j] +=
32000.f *
std::sin(2.f * kPi * sinusoidal_frequency_hz * k / sample_rate_hz);
}
*sample_counter = *sample_counter + kBlockSize;
for (size_t band = 1; band < x->size(); ++band) {
for (size_t channel = 0; channel < (*x)[band].size(); ++channel) {
std::fill((*x)[band][channel].begin(), (*x)[band][channel].end(), 0.f);
}
}
}
} // namespace
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
// Verifies that the check for non-null output parameter works.
TEST(RenderSignalAnalyzer, NullMaskOutput) {
RenderSignalAnalyzer analyzer(EchoCanceller3Config{});
EXPECT_DEATH(analyzer.MaskRegionsAroundNarrowBands(nullptr), "");
}
#endif
// Verify that no narrow bands are detected in a Gaussian noise signal.
TEST(RenderSignalAnalyzer, NoFalseDetectionOfNarrowBands) {
void RunNarrowBandDetectionTest(size_t num_channels) {
RenderSignalAnalyzer analyzer(EchoCanceller3Config{});
Random random_generator(42U);
std::vector<std::vector<std::vector<float>>> x(
3,
std::vector<std::vector<float>>(1, std::vector<float>(kBlockSize, 0.f)));
std::array<float, kBlockSize> x_old;
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
RenderDelayBuffer::Create(EchoCanceller3Config(), 48000, 1));
std::array<float, kFftLengthBy2Plus1> mask;
x_old.fill(0.f);
for (size_t k = 0; k < 100; ++k) {
RandomizeSampleVector(&random_generator, x[0][0]);
render_delay_buffer->Insert(x);
if (k == 0) {
render_delay_buffer->Reset();
}
render_delay_buffer->PrepareCaptureProcessing();
analyzer.Update(*render_delay_buffer->GetRenderBuffer(),
absl::optional<size_t>(0));
}
mask.fill(1.f);
analyzer.MaskRegionsAroundNarrowBands(&mask);
EXPECT_TRUE(
std::all_of(mask.begin(), mask.end(), [](float a) { return a == 1.f; }));
EXPECT_FALSE(analyzer.PoorSignalExcitation());
}
// Verify that a sinusiod signal is detected as narrow bands.
TEST(RenderSignalAnalyzer, NarrowBandDetection) {
RenderSignalAnalyzer analyzer(EchoCanceller3Config{});
Random random_generator(42U);
constexpr size_t kNumChannels = 1;
constexpr int kSampleRateHz = 48000;
constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz);
std::vector<std::vector<std::vector<float>>> x(
kNumBands, std::vector<std::vector<float>>(
kNumChannels, std::vector<float>(kBlockSize, 0.f)));
num_channels, std::vector<float>(kBlockSize, 0.f)));
std::array<float, kBlockSize> x_old;
Aec3Fft fft;
EchoCanceller3Config config;
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
RenderDelayBuffer::Create(config, kSampleRateHz, kNumChannels));
RenderDelayBuffer::Create(config, kSampleRateHz, num_channels));
std::array<float, kFftLengthBy2Plus1> mask;
x_old.fill(0.f);
@ -119,8 +75,9 @@ TEST(RenderSignalAnalyzer, NarrowBandDetection) {
auto generate_sinusoid_test = [&](bool known_delay) {
size_t sample_counter = 0;
for (size_t k = 0; k < 100; ++k) {
ProduceSinusoid(16000, 16000 / 2 * kSinusFrequencyBin / kFftLengthBy2,
&sample_counter, &x);
ProduceSinusoidInNoise(16000, num_channels - 1,
16000 / 2 * kSinusFrequencyBin / kFftLengthBy2,
&random_generator, &sample_counter, &x);
render_delay_buffer->Insert(x);
if (k == 0) {
@ -140,6 +97,8 @@ TEST(RenderSignalAnalyzer, NarrowBandDetection) {
EXPECT_EQ(abs(k - kSinusFrequencyBin) <= 2 ? 0.f : 1.f, mask[k]);
}
EXPECT_TRUE(analyzer.PoorSignalExcitation());
EXPECT_TRUE(static_cast<bool>(analyzer.NarrowPeakBand()));
EXPECT_EQ(*analyzer.NarrowPeakBand(), 32);
// Verify that no bands are detected as narrow when the delay is unknown.
generate_sinusoid_test(false);
@ -149,4 +108,68 @@ TEST(RenderSignalAnalyzer, NarrowBandDetection) {
EXPECT_FALSE(analyzer.PoorSignalExcitation());
}
std::string ProduceDebugText(size_t num_channels) {
rtc::StringBuilder ss;
ss << "number of channels: " << num_channels;
return ss.Release();
}
} // namespace
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
// Verifies that the check for non-null output parameter works.
TEST(RenderSignalAnalyzer, NullMaskOutput) {
RenderSignalAnalyzer analyzer(EchoCanceller3Config{});
EXPECT_DEATH(analyzer.MaskRegionsAroundNarrowBands(nullptr), "");
}
#endif
// Verify that no narrow bands are detected in a Gaussian noise signal.
TEST(RenderSignalAnalyzer, NoFalseDetectionOfNarrowBands) {
for (auto num_channels : {1, 2, 8}) {
SCOPED_TRACE(ProduceDebugText(num_channels));
RenderSignalAnalyzer analyzer(EchoCanceller3Config{});
Random random_generator(42U);
std::vector<std::vector<std::vector<float>>> x(
3, std::vector<std::vector<float>>(
num_channels, std::vector<float>(kBlockSize, 0.f)));
std::array<float, kBlockSize> x_old;
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
RenderDelayBuffer::Create(EchoCanceller3Config(), 48000, num_channels));
std::array<float, kFftLengthBy2Plus1> mask;
x_old.fill(0.f);
for (size_t k = 0; k < 100; ++k) {
for (auto& band : x) {
for (auto& channel : band) {
RandomizeSampleVector(&random_generator, channel);
}
}
render_delay_buffer->Insert(x);
if (k == 0) {
render_delay_buffer->Reset();
}
render_delay_buffer->PrepareCaptureProcessing();
analyzer.Update(*render_delay_buffer->GetRenderBuffer(),
absl::optional<size_t>(0));
}
mask.fill(1.f);
analyzer.MaskRegionsAroundNarrowBands(&mask);
EXPECT_TRUE(std::all_of(mask.begin(), mask.end(),
[](float a) { return a == 1.f; }));
EXPECT_FALSE(analyzer.PoorSignalExcitation());
EXPECT_FALSE(static_cast<bool>(analyzer.NarrowPeakBand()));
}
}
// Verify that a sinusoid signal is detected as narrow bands.
TEST(RenderSignalAnalyzer, NarrowBandDetection) {
for (auto num_channels : {1, 2, 8}) {
SCOPED_TRACE(ProduceDebugText(num_channels));
RunNarrowBandDetectionTest(num_channels);
}
}
} // namespace webrtc

View file

@ -15,8 +15,15 @@
namespace webrtc {
void RandomizeSampleVector(Random* random_generator, rtc::ArrayView<float> v) {
RandomizeSampleVector(random_generator, v,
/*amplitude=*/32767.f);
}
void RandomizeSampleVector(Random* random_generator,
rtc::ArrayView<float> v,
float amplitude) {
for (auto& v_k : v) {
v_k = 2 * 32767.f * random_generator->Rand<float>() - 32767.f;
v_k = 2 * amplitude * random_generator->Rand<float>() - amplitude;
}
}

View file

@ -23,6 +23,11 @@ namespace webrtc {
// Randomizes the elements in a vector with values -32767.f:32767.f.
void RandomizeSampleVector(Random* random_generator, rtc::ArrayView<float> v);
// Randomizes the elements in a vector with values -amplitude:amplitude.
void RandomizeSampleVector(Random* random_generator,
rtc::ArrayView<float> v,
float amplitude);
// Class for delaying a signal a fixed number of samples.
template <typename T>
class DelayBuffer {

View file

@ -68,4 +68,15 @@ TEST(EchoCancellerTestTools, RandomizeSampleVector) {
EXPECT_NE(v, v_ref);
}
TEST(EchoCancellerTestTools, RandomizeSampleVectorWithAmplitude) {
Random random_generator(42U);
std::vector<float> v(50, 0.f);
RandomizeSampleVector(&random_generator, v, 1000.f);
EXPECT_GE(1000.f, *std::max_element(v.begin(), v.end()));
EXPECT_LE(-1000.f, *std::min_element(v.begin(), v.end()));
RandomizeSampleVector(&random_generator, v, 100.f);
EXPECT_GE(100.f, *std::max_element(v.begin(), v.end()));
EXPECT_LE(-100.f, *std::min_element(v.begin(), v.end()));
}
} // namespace webrtc