mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-12 21:30:45 +01:00
AEC3: Make RenderSignalAnalyzer multi-channel
In this CL: - Render signal analyzer considers a frequency bin a narrow band (peak) if any channel exhibits narrowband (-peak) behavior. - The unit tests have to fill frames with noise because small inaccuracies in the FFT spectrum lead to consistent "narrow bands" despite spectrum being essentially flat. Bug: webrtc:10913 Change-Id: I8fa181412c0ee1beeacfda37ffef18251d5f0cd7 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/151912 Reviewed-by: Per Åhgren <peah@webrtc.org> Commit-Queue: Sam Zackrisson <saza@webrtc.org> Cr-Commit-Position: refs/heads/master@{#29176}
This commit is contained in:
parent
b5a4ae8a57
commit
3f17221d98
5 changed files with 178 additions and 111 deletions
|
@ -29,19 +29,29 @@ void IdentifySmallNarrowBandRegions(
|
|||
const RenderBuffer& render_buffer,
|
||||
const absl::optional<size_t>& delay_partitions,
|
||||
std::array<size_t, kFftLengthBy2 - 1>* narrow_band_counters) {
|
||||
RTC_DCHECK(narrow_band_counters);
|
||||
|
||||
if (!delay_partitions) {
|
||||
narrow_band_counters->fill(0);
|
||||
return;
|
||||
}
|
||||
|
||||
rtc::ArrayView<const float> X2 =
|
||||
render_buffer.Spectrum(*delay_partitions, /*channel=*/0);
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, X2.size());
|
||||
|
||||
for (size_t k = 1; k < (X2.size() - 1); ++k) {
|
||||
(*narrow_band_counters)[k - 1] = X2[k] > 3 * std::max(X2[k - 1], X2[k + 1])
|
||||
? (*narrow_band_counters)[k - 1] + 1
|
||||
: 0;
|
||||
std::array<size_t, kFftLengthBy2 - 1> channel_counters;
|
||||
channel_counters.fill(0);
|
||||
for (size_t channel = 0; channel < render_buffer.Block(0)[0].size();
|
||||
++channel) {
|
||||
rtc::ArrayView<const float> X2 =
|
||||
render_buffer.Spectrum(*delay_partitions, channel);
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, X2.size());
|
||||
for (size_t k = 1; k < kFftLengthBy2; ++k) {
|
||||
if (X2[k] > 3 * std::max(X2[k - 1], X2[k + 1])) {
|
||||
++channel_counters[k - 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
for (size_t k = 1; k < kFftLengthBy2; ++k) {
|
||||
(*narrow_band_counters)[k - 1] =
|
||||
channel_counters[k - 1] > 0 ? (*narrow_band_counters)[k - 1] + 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -50,47 +60,58 @@ void IdentifyStrongNarrowBandComponent(const RenderBuffer& render_buffer,
|
|||
int strong_peak_freeze_duration,
|
||||
absl::optional<int>* narrow_peak_band,
|
||||
size_t* narrow_peak_counter) {
|
||||
const auto X2_latest = render_buffer.Spectrum(0, /*channel=*/0);
|
||||
|
||||
// Identify the spectral peak.
|
||||
const int peak_bin = static_cast<int>(
|
||||
std::max_element(X2_latest.begin(), X2_latest.end()) - X2_latest.begin());
|
||||
|
||||
// Compute the level around the peak.
|
||||
float non_peak_power = 0.f;
|
||||
for (int k = std::max(0, peak_bin - 14); k < peak_bin - 4; ++k) {
|
||||
non_peak_power = std::max(X2_latest[k], non_peak_power);
|
||||
}
|
||||
for (int k = peak_bin + 5;
|
||||
k < std::min(peak_bin + 15, static_cast<int>(kFftLengthBy2Plus1)); ++k) {
|
||||
non_peak_power = std::max(X2_latest[k], non_peak_power);
|
||||
RTC_DCHECK(narrow_peak_band);
|
||||
RTC_DCHECK(narrow_peak_counter);
|
||||
if (*narrow_peak_band &&
|
||||
++(*narrow_peak_counter) >
|
||||
static_cast<size_t>(strong_peak_freeze_duration)) {
|
||||
*narrow_peak_band = absl::nullopt;
|
||||
}
|
||||
|
||||
// Assess the render signal strength.
|
||||
const std::vector<std::vector<std::vector<float>>>& x_latest =
|
||||
render_buffer.Block(0);
|
||||
auto result0 =
|
||||
std::minmax_element(x_latest[0][0].begin(), x_latest[0][0].end());
|
||||
float max_abs = std::max(fabs(*result0.first), fabs(*result0.second));
|
||||
float max_peak_level = 0.f;
|
||||
for (size_t channel = 0; channel < x_latest[0].size(); ++channel) {
|
||||
const auto X2_latest = render_buffer.Spectrum(0, channel);
|
||||
|
||||
if (x_latest.size() > 1) {
|
||||
const auto result1 =
|
||||
std::minmax_element(x_latest[1][0].begin(), x_latest[1][0].end());
|
||||
max_abs =
|
||||
std::max(max_abs, static_cast<float>(std::max(fabs(*result1.first),
|
||||
fabs(*result1.second))));
|
||||
}
|
||||
// Identify the spectral peak.
|
||||
const int peak_bin =
|
||||
static_cast<int>(std::max_element(X2_latest.begin(), X2_latest.end()) -
|
||||
X2_latest.begin());
|
||||
|
||||
// Detect whether the spectal peak has as strong narrowband nature.
|
||||
if (peak_bin > 0 && max_abs > 100 &&
|
||||
X2_latest[peak_bin] > 100 * non_peak_power) {
|
||||
*narrow_peak_band = peak_bin;
|
||||
*narrow_peak_counter = 0;
|
||||
} else {
|
||||
if (*narrow_peak_band &&
|
||||
++(*narrow_peak_counter) >
|
||||
static_cast<size_t>(strong_peak_freeze_duration)) {
|
||||
*narrow_peak_band = absl::nullopt;
|
||||
// Compute the level around the peak.
|
||||
float non_peak_power = 0.f;
|
||||
for (int k = std::max(0, peak_bin - 14); k < peak_bin - 4; ++k) {
|
||||
non_peak_power = std::max(X2_latest[k], non_peak_power);
|
||||
}
|
||||
for (int k = peak_bin + 5;
|
||||
k < std::min(peak_bin + 15, static_cast<int>(kFftLengthBy2Plus1));
|
||||
++k) {
|
||||
non_peak_power = std::max(X2_latest[k], non_peak_power);
|
||||
}
|
||||
|
||||
// Assess the render signal strength.
|
||||
auto result0 = std::minmax_element(x_latest[0][channel].begin(),
|
||||
x_latest[0][channel].end());
|
||||
float max_abs = std::max(fabs(*result0.first), fabs(*result0.second));
|
||||
|
||||
if (x_latest.size() > 1) {
|
||||
const auto result1 = std::minmax_element(x_latest[1][channel].begin(),
|
||||
x_latest[1][channel].end());
|
||||
max_abs =
|
||||
std::max(max_abs, static_cast<float>(std::max(
|
||||
fabs(*result1.first), fabs(*result1.second))));
|
||||
}
|
||||
|
||||
// Detect whether the spectral peak has as strong narrowband nature.
|
||||
const float peak_level = X2_latest[peak_bin];
|
||||
if (peak_bin > 0 && max_abs > 100 && peak_level > 100 * non_peak_power) {
|
||||
// Store the strongest peak across channels.
|
||||
if (peak_level > max_peak_level) {
|
||||
max_peak_level = peak_level;
|
||||
*narrow_peak_band = peak_bin;
|
||||
*narrow_peak_counter = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "modules/audio_processing/aec3/render_delay_buffer.h"
|
||||
#include "modules/audio_processing/test/echo_canceller_test_tools.h"
|
||||
#include "rtc_base/random.h"
|
||||
#include "rtc_base/strings/string_builder.h"
|
||||
#include "test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
@ -30,87 +31,42 @@ namespace {
|
|||
|
||||
constexpr float kPi = 3.141592f;
|
||||
|
||||
void ProduceSinusoid(int sample_rate_hz,
|
||||
float sinusoidal_frequency_hz,
|
||||
size_t* sample_counter,
|
||||
std::vector<std::vector<std::vector<float>>>* x) {
|
||||
// Produce a sinusoid of the specified frequency.
|
||||
void ProduceSinusoidInNoise(int sample_rate_hz,
|
||||
size_t sinusoid_channel,
|
||||
float sinusoidal_frequency_hz,
|
||||
Random* random_generator,
|
||||
size_t* sample_counter,
|
||||
std::vector<std::vector<std::vector<float>>>* x) {
|
||||
// Fill x with low-amplitude noise.
|
||||
for (auto& band : *x) {
|
||||
for (auto& channel : band) {
|
||||
RandomizeSampleVector(random_generator, channel,
|
||||
/*amplitude=*/500.f);
|
||||
}
|
||||
}
|
||||
// Produce a sinusoid of the specified frequency in the specified channel.
|
||||
for (size_t k = *sample_counter, j = 0; k < (*sample_counter + kBlockSize);
|
||||
++k, ++j) {
|
||||
for (size_t channel = 0; channel < (*x)[0].size(); ++channel) {
|
||||
(*x)[0][channel][j] =
|
||||
32767.f *
|
||||
std::sin(2.f * kPi * sinusoidal_frequency_hz * k / sample_rate_hz);
|
||||
}
|
||||
(*x)[0][sinusoid_channel][j] +=
|
||||
32000.f *
|
||||
std::sin(2.f * kPi * sinusoidal_frequency_hz * k / sample_rate_hz);
|
||||
}
|
||||
*sample_counter = *sample_counter + kBlockSize;
|
||||
|
||||
for (size_t band = 1; band < x->size(); ++band) {
|
||||
for (size_t channel = 0; channel < (*x)[band].size(); ++channel) {
|
||||
std::fill((*x)[band][channel].begin(), (*x)[band][channel].end(), 0.f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
|
||||
// Verifies that the check for non-null output parameter works.
|
||||
TEST(RenderSignalAnalyzer, NullMaskOutput) {
|
||||
RenderSignalAnalyzer analyzer(EchoCanceller3Config{});
|
||||
EXPECT_DEATH(analyzer.MaskRegionsAroundNarrowBands(nullptr), "");
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Verify that no narrow bands are detected in a Gaussian noise signal.
|
||||
TEST(RenderSignalAnalyzer, NoFalseDetectionOfNarrowBands) {
|
||||
void RunNarrowBandDetectionTest(size_t num_channels) {
|
||||
RenderSignalAnalyzer analyzer(EchoCanceller3Config{});
|
||||
Random random_generator(42U);
|
||||
std::vector<std::vector<std::vector<float>>> x(
|
||||
3,
|
||||
std::vector<std::vector<float>>(1, std::vector<float>(kBlockSize, 0.f)));
|
||||
std::array<float, kBlockSize> x_old;
|
||||
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
|
||||
RenderDelayBuffer::Create(EchoCanceller3Config(), 48000, 1));
|
||||
std::array<float, kFftLengthBy2Plus1> mask;
|
||||
x_old.fill(0.f);
|
||||
|
||||
for (size_t k = 0; k < 100; ++k) {
|
||||
RandomizeSampleVector(&random_generator, x[0][0]);
|
||||
|
||||
render_delay_buffer->Insert(x);
|
||||
if (k == 0) {
|
||||
render_delay_buffer->Reset();
|
||||
}
|
||||
render_delay_buffer->PrepareCaptureProcessing();
|
||||
|
||||
analyzer.Update(*render_delay_buffer->GetRenderBuffer(),
|
||||
absl::optional<size_t>(0));
|
||||
}
|
||||
|
||||
mask.fill(1.f);
|
||||
analyzer.MaskRegionsAroundNarrowBands(&mask);
|
||||
EXPECT_TRUE(
|
||||
std::all_of(mask.begin(), mask.end(), [](float a) { return a == 1.f; }));
|
||||
EXPECT_FALSE(analyzer.PoorSignalExcitation());
|
||||
}
|
||||
|
||||
// Verify that a sinusiod signal is detected as narrow bands.
|
||||
TEST(RenderSignalAnalyzer, NarrowBandDetection) {
|
||||
RenderSignalAnalyzer analyzer(EchoCanceller3Config{});
|
||||
Random random_generator(42U);
|
||||
constexpr size_t kNumChannels = 1;
|
||||
constexpr int kSampleRateHz = 48000;
|
||||
constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz);
|
||||
std::vector<std::vector<std::vector<float>>> x(
|
||||
kNumBands, std::vector<std::vector<float>>(
|
||||
kNumChannels, std::vector<float>(kBlockSize, 0.f)));
|
||||
num_channels, std::vector<float>(kBlockSize, 0.f)));
|
||||
std::array<float, kBlockSize> x_old;
|
||||
Aec3Fft fft;
|
||||
EchoCanceller3Config config;
|
||||
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
|
||||
RenderDelayBuffer::Create(config, kSampleRateHz, kNumChannels));
|
||||
RenderDelayBuffer::Create(config, kSampleRateHz, num_channels));
|
||||
|
||||
std::array<float, kFftLengthBy2Plus1> mask;
|
||||
x_old.fill(0.f);
|
||||
|
@ -119,8 +75,9 @@ TEST(RenderSignalAnalyzer, NarrowBandDetection) {
|
|||
auto generate_sinusoid_test = [&](bool known_delay) {
|
||||
size_t sample_counter = 0;
|
||||
for (size_t k = 0; k < 100; ++k) {
|
||||
ProduceSinusoid(16000, 16000 / 2 * kSinusFrequencyBin / kFftLengthBy2,
|
||||
&sample_counter, &x);
|
||||
ProduceSinusoidInNoise(16000, num_channels - 1,
|
||||
16000 / 2 * kSinusFrequencyBin / kFftLengthBy2,
|
||||
&random_generator, &sample_counter, &x);
|
||||
|
||||
render_delay_buffer->Insert(x);
|
||||
if (k == 0) {
|
||||
|
@ -140,6 +97,8 @@ TEST(RenderSignalAnalyzer, NarrowBandDetection) {
|
|||
EXPECT_EQ(abs(k - kSinusFrequencyBin) <= 2 ? 0.f : 1.f, mask[k]);
|
||||
}
|
||||
EXPECT_TRUE(analyzer.PoorSignalExcitation());
|
||||
EXPECT_TRUE(static_cast<bool>(analyzer.NarrowPeakBand()));
|
||||
EXPECT_EQ(*analyzer.NarrowPeakBand(), 32);
|
||||
|
||||
// Verify that no bands are detected as narrow when the delay is unknown.
|
||||
generate_sinusoid_test(false);
|
||||
|
@ -149,4 +108,68 @@ TEST(RenderSignalAnalyzer, NarrowBandDetection) {
|
|||
EXPECT_FALSE(analyzer.PoorSignalExcitation());
|
||||
}
|
||||
|
||||
std::string ProduceDebugText(size_t num_channels) {
|
||||
rtc::StringBuilder ss;
|
||||
ss << "number of channels: " << num_channels;
|
||||
return ss.Release();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
|
||||
// Verifies that the check for non-null output parameter works.
|
||||
TEST(RenderSignalAnalyzer, NullMaskOutput) {
|
||||
RenderSignalAnalyzer analyzer(EchoCanceller3Config{});
|
||||
EXPECT_DEATH(analyzer.MaskRegionsAroundNarrowBands(nullptr), "");
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Verify that no narrow bands are detected in a Gaussian noise signal.
|
||||
TEST(RenderSignalAnalyzer, NoFalseDetectionOfNarrowBands) {
|
||||
for (auto num_channels : {1, 2, 8}) {
|
||||
SCOPED_TRACE(ProduceDebugText(num_channels));
|
||||
RenderSignalAnalyzer analyzer(EchoCanceller3Config{});
|
||||
Random random_generator(42U);
|
||||
std::vector<std::vector<std::vector<float>>> x(
|
||||
3, std::vector<std::vector<float>>(
|
||||
num_channels, std::vector<float>(kBlockSize, 0.f)));
|
||||
std::array<float, kBlockSize> x_old;
|
||||
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
|
||||
RenderDelayBuffer::Create(EchoCanceller3Config(), 48000, num_channels));
|
||||
std::array<float, kFftLengthBy2Plus1> mask;
|
||||
x_old.fill(0.f);
|
||||
|
||||
for (size_t k = 0; k < 100; ++k) {
|
||||
for (auto& band : x) {
|
||||
for (auto& channel : band) {
|
||||
RandomizeSampleVector(&random_generator, channel);
|
||||
}
|
||||
}
|
||||
|
||||
render_delay_buffer->Insert(x);
|
||||
if (k == 0) {
|
||||
render_delay_buffer->Reset();
|
||||
}
|
||||
render_delay_buffer->PrepareCaptureProcessing();
|
||||
|
||||
analyzer.Update(*render_delay_buffer->GetRenderBuffer(),
|
||||
absl::optional<size_t>(0));
|
||||
}
|
||||
|
||||
mask.fill(1.f);
|
||||
analyzer.MaskRegionsAroundNarrowBands(&mask);
|
||||
EXPECT_TRUE(std::all_of(mask.begin(), mask.end(),
|
||||
[](float a) { return a == 1.f; }));
|
||||
EXPECT_FALSE(analyzer.PoorSignalExcitation());
|
||||
EXPECT_FALSE(static_cast<bool>(analyzer.NarrowPeakBand()));
|
||||
}
|
||||
}
|
||||
|
||||
// Verify that a sinusoid signal is detected as narrow bands.
|
||||
TEST(RenderSignalAnalyzer, NarrowBandDetection) {
|
||||
for (auto num_channels : {1, 2, 8}) {
|
||||
SCOPED_TRACE(ProduceDebugText(num_channels));
|
||||
RunNarrowBandDetectionTest(num_channels);
|
||||
}
|
||||
}
|
||||
} // namespace webrtc
|
||||
|
|
|
@ -15,8 +15,15 @@
|
|||
namespace webrtc {
|
||||
|
||||
void RandomizeSampleVector(Random* random_generator, rtc::ArrayView<float> v) {
|
||||
RandomizeSampleVector(random_generator, v,
|
||||
/*amplitude=*/32767.f);
|
||||
}
|
||||
|
||||
void RandomizeSampleVector(Random* random_generator,
|
||||
rtc::ArrayView<float> v,
|
||||
float amplitude) {
|
||||
for (auto& v_k : v) {
|
||||
v_k = 2 * 32767.f * random_generator->Rand<float>() - 32767.f;
|
||||
v_k = 2 * amplitude * random_generator->Rand<float>() - amplitude;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -23,6 +23,11 @@ namespace webrtc {
|
|||
// Randomizes the elements in a vector with values -32767.f:32767.f.
|
||||
void RandomizeSampleVector(Random* random_generator, rtc::ArrayView<float> v);
|
||||
|
||||
// Randomizes the elements in a vector with values -amplitude:amplitude.
|
||||
void RandomizeSampleVector(Random* random_generator,
|
||||
rtc::ArrayView<float> v,
|
||||
float amplitude);
|
||||
|
||||
// Class for delaying a signal a fixed number of samples.
|
||||
template <typename T>
|
||||
class DelayBuffer {
|
||||
|
|
|
@ -68,4 +68,15 @@ TEST(EchoCancellerTestTools, RandomizeSampleVector) {
|
|||
EXPECT_NE(v, v_ref);
|
||||
}
|
||||
|
||||
TEST(EchoCancellerTestTools, RandomizeSampleVectorWithAmplitude) {
|
||||
Random random_generator(42U);
|
||||
std::vector<float> v(50, 0.f);
|
||||
RandomizeSampleVector(&random_generator, v, 1000.f);
|
||||
EXPECT_GE(1000.f, *std::max_element(v.begin(), v.end()));
|
||||
EXPECT_LE(-1000.f, *std::min_element(v.begin(), v.end()));
|
||||
RandomizeSampleVector(&random_generator, v, 100.f);
|
||||
EXPECT_GE(100.f, *std::max_element(v.begin(), v.end()));
|
||||
EXPECT_LE(-100.f, *std::min_element(v.begin(), v.end()));
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
Loading…
Reference in a new issue