mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-13 13:50:40 +01:00

This change adds a Block class to reduce the need for std::vector<std::vector<std::vector<float>>>. This make the code easier to read and less error prone. It also enables future changes to the underlying data structure of a block. For instance, the data of all bands and channels could be stored in a single vector. The change has been verified to be bit-exact. Bug: webrtc:14089 Change-Id: Ied9a78124c0bbafe0e912017aef91f7c311de2ae Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/262252 Reviewed-by: Per Åhgren <peah@webrtc.org> Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org> Cr-Commit-Position: refs/heads/main@{#36968}
288 lines
12 KiB
C++
288 lines
12 KiB
C++
/*
|
|
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "modules/audio_processing/aec3/erle_estimator.h"
|
|
|
|
#include <cmath>
|
|
|
|
#include "api/array_view.h"
|
|
#include "modules/audio_processing/aec3/render_delay_buffer.h"
|
|
#include "modules/audio_processing/aec3/spectrum_buffer.h"
|
|
#include "rtc_base/random.h"
|
|
#include "rtc_base/strings/string_builder.h"
|
|
#include "test/gtest.h"
|
|
|
|
namespace webrtc {
|
|
|
|
namespace {
|
|
constexpr int kLowFrequencyLimit = kFftLengthBy2 / 2;
|
|
constexpr float kTrueErle = 10.f;
|
|
constexpr float kTrueErleOnsets = 1.0f;
|
|
constexpr float kEchoPathGain = 3.f;
|
|
|
|
void VerifyErleBands(
|
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle,
|
|
float reference_lf,
|
|
float reference_hf) {
|
|
for (size_t ch = 0; ch < erle.size(); ++ch) {
|
|
std::for_each(
|
|
erle[ch].begin(), erle[ch].begin() + kLowFrequencyLimit,
|
|
[reference_lf](float a) { EXPECT_NEAR(reference_lf, a, 0.001); });
|
|
std::for_each(
|
|
erle[ch].begin() + kLowFrequencyLimit, erle[ch].end(),
|
|
[reference_hf](float a) { EXPECT_NEAR(reference_hf, a, 0.001); });
|
|
}
|
|
}
|
|
|
|
void VerifyErle(
|
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle,
|
|
float erle_time_domain,
|
|
float reference_lf,
|
|
float reference_hf) {
|
|
VerifyErleBands(erle, reference_lf, reference_hf);
|
|
EXPECT_NEAR(kTrueErle, erle_time_domain, 0.5);
|
|
}
|
|
|
|
void VerifyErleGreaterOrEqual(
|
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle1,
|
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle2) {
|
|
for (size_t ch = 0; ch < erle1.size(); ++ch) {
|
|
for (size_t i = 0; i < kFftLengthBy2Plus1; ++i) {
|
|
EXPECT_GE(erle1[ch][i], erle2[ch][i]);
|
|
}
|
|
}
|
|
}
|
|
|
|
void FormFarendTimeFrame(Block* x) {
|
|
const std::array<float, kBlockSize> frame = {
|
|
7459.88, 17209.6, 17383, 20768.9, 16816.7, 18386.3, 4492.83, 9675.85,
|
|
6665.52, 14808.6, 9342.3, 7483.28, 19261.7, 4145.98, 1622.18, 13475.2,
|
|
7166.32, 6856.61, 21937, 7263.14, 9569.07, 14919, 8413.32, 7551.89,
|
|
7848.65, 6011.27, 13080.6, 15865.2, 12656, 17459.6, 4263.93, 4503.03,
|
|
9311.79, 21095.8, 12657.9, 13906.6, 19267.2, 11338.1, 16828.9, 11501.6,
|
|
11405, 15031.4, 14541.6, 19765.5, 18346.3, 19350.2, 3157.47, 18095.8,
|
|
1743.68, 21328.2, 19727.5, 7295.16, 10332.4, 11055.5, 20107.4, 14708.4,
|
|
12416.2, 16434, 2454.69, 9840.8, 6867.23, 1615.75, 6059.9, 8394.19};
|
|
for (int band = 0; band < x->NumBands(); ++band) {
|
|
for (int channel = 0; channel < x->NumChannels(); ++channel) {
|
|
RTC_DCHECK_GE(kBlockSize, frame.size());
|
|
std::copy(frame.begin(), frame.end(), x->begin(band, channel));
|
|
}
|
|
}
|
|
}
|
|
|
|
void FormFarendFrame(const RenderBuffer& render_buffer,
|
|
float erle,
|
|
std::array<float, kFftLengthBy2Plus1>* X2,
|
|
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> E2,
|
|
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> Y2) {
|
|
const auto& spectrum_buffer = render_buffer.GetSpectrumBuffer();
|
|
const int num_render_channels = spectrum_buffer.buffer[0].size();
|
|
const int num_capture_channels = Y2.size();
|
|
|
|
X2->fill(0.f);
|
|
for (int ch = 0; ch < num_render_channels; ++ch) {
|
|
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
|
(*X2)[k] += spectrum_buffer.buffer[spectrum_buffer.write][ch][k] /
|
|
num_render_channels;
|
|
}
|
|
}
|
|
|
|
for (int ch = 0; ch < num_capture_channels; ++ch) {
|
|
std::transform(X2->begin(), X2->end(), Y2[ch].begin(),
|
|
[](float a) { return a * kEchoPathGain * kEchoPathGain; });
|
|
std::transform(Y2[ch].begin(), Y2[ch].end(), E2[ch].begin(),
|
|
[erle](float a) { return a / erle; });
|
|
}
|
|
}
|
|
|
|
void FormNearendFrame(
|
|
Block* x,
|
|
std::array<float, kFftLengthBy2Plus1>* X2,
|
|
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> E2,
|
|
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> Y2) {
|
|
for (int band = 0; band < x->NumBands(); ++band) {
|
|
for (int ch = 0; ch < x->NumChannels(); ++ch) {
|
|
std::fill(x->begin(band, ch), x->end(band, ch), 0.f);
|
|
}
|
|
}
|
|
|
|
X2->fill(0.f);
|
|
for (size_t ch = 0; ch < Y2.size(); ++ch) {
|
|
Y2[ch].fill(500.f * 1000.f * 1000.f);
|
|
E2[ch].fill(Y2[ch][0]);
|
|
}
|
|
}
|
|
|
|
void GetFilterFreq(
|
|
size_t delay_headroom_samples,
|
|
rtc::ArrayView<std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
|
filter_frequency_response) {
|
|
const size_t delay_headroom_blocks = delay_headroom_samples / kBlockSize;
|
|
for (size_t ch = 0; ch < filter_frequency_response[0].size(); ++ch) {
|
|
for (auto& block_freq_resp : filter_frequency_response) {
|
|
block_freq_resp[ch].fill(0.f);
|
|
}
|
|
|
|
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
|
filter_frequency_response[delay_headroom_blocks][ch][k] = kEchoPathGain;
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
class ErleEstimatorMultiChannel
|
|
: public ::testing::Test,
|
|
public ::testing::WithParamInterface<std::tuple<size_t, size_t>> {};
|
|
|
|
INSTANTIATE_TEST_SUITE_P(MultiChannel,
|
|
ErleEstimatorMultiChannel,
|
|
::testing::Combine(::testing::Values(1, 2, 4, 8),
|
|
::testing::Values(1, 2, 8)));
|
|
|
|
TEST_P(ErleEstimatorMultiChannel, VerifyErleIncreaseAndHold) {
|
|
const size_t num_render_channels = std::get<0>(GetParam());
|
|
const size_t num_capture_channels = std::get<1>(GetParam());
|
|
constexpr int kSampleRateHz = 48000;
|
|
constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz);
|
|
|
|
std::array<float, kFftLengthBy2Plus1> X2;
|
|
std::vector<std::array<float, kFftLengthBy2Plus1>> E2(num_capture_channels);
|
|
std::vector<std::array<float, kFftLengthBy2Plus1>> Y2(num_capture_channels);
|
|
std::vector<bool> converged_filters(num_capture_channels, true);
|
|
|
|
EchoCanceller3Config config;
|
|
config.erle.onset_detection = true;
|
|
|
|
Block x(kNumBands, num_render_channels);
|
|
std::vector<std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
|
filter_frequency_response(
|
|
config.filter.refined.length_blocks,
|
|
std::vector<std::array<float, kFftLengthBy2Plus1>>(
|
|
num_capture_channels));
|
|
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
|
|
RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels));
|
|
|
|
GetFilterFreq(config.delay.delay_headroom_samples, filter_frequency_response);
|
|
|
|
ErleEstimator estimator(0, config, num_capture_channels);
|
|
|
|
FormFarendTimeFrame(&x);
|
|
render_delay_buffer->Insert(x);
|
|
render_delay_buffer->PrepareCaptureProcessing();
|
|
// Verifies that the ERLE estimate is properly increased to higher values.
|
|
FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), kTrueErle, &X2, E2,
|
|
Y2);
|
|
for (size_t k = 0; k < 1000; ++k) {
|
|
render_delay_buffer->Insert(x);
|
|
render_delay_buffer->PrepareCaptureProcessing();
|
|
estimator.Update(*render_delay_buffer->GetRenderBuffer(),
|
|
filter_frequency_response, X2, Y2, E2, converged_filters);
|
|
}
|
|
VerifyErle(estimator.Erle(/*onset_compensated=*/true),
|
|
std::pow(2.f, estimator.FullbandErleLog2()), config.erle.max_l,
|
|
config.erle.max_h);
|
|
VerifyErleGreaterOrEqual(estimator.Erle(/*onset_compensated=*/false),
|
|
estimator.Erle(/*onset_compensated=*/true));
|
|
VerifyErleGreaterOrEqual(estimator.ErleUnbounded(),
|
|
estimator.Erle(/*onset_compensated=*/false));
|
|
|
|
FormNearendFrame(&x, &X2, E2, Y2);
|
|
// Verifies that the ERLE is not immediately decreased during nearend
|
|
// activity.
|
|
for (size_t k = 0; k < 50; ++k) {
|
|
render_delay_buffer->Insert(x);
|
|
render_delay_buffer->PrepareCaptureProcessing();
|
|
estimator.Update(*render_delay_buffer->GetRenderBuffer(),
|
|
filter_frequency_response, X2, Y2, E2, converged_filters);
|
|
}
|
|
VerifyErle(estimator.Erle(/*onset_compensated=*/true),
|
|
std::pow(2.f, estimator.FullbandErleLog2()), config.erle.max_l,
|
|
config.erle.max_h);
|
|
VerifyErleGreaterOrEqual(estimator.Erle(/*onset_compensated=*/false),
|
|
estimator.Erle(/*onset_compensated=*/true));
|
|
VerifyErleGreaterOrEqual(estimator.ErleUnbounded(),
|
|
estimator.Erle(/*onset_compensated=*/false));
|
|
}
|
|
|
|
TEST_P(ErleEstimatorMultiChannel, VerifyErleTrackingOnOnsets) {
|
|
const size_t num_render_channels = std::get<0>(GetParam());
|
|
const size_t num_capture_channels = std::get<1>(GetParam());
|
|
constexpr int kSampleRateHz = 48000;
|
|
constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz);
|
|
|
|
std::array<float, kFftLengthBy2Plus1> X2;
|
|
std::vector<std::array<float, kFftLengthBy2Plus1>> E2(num_capture_channels);
|
|
std::vector<std::array<float, kFftLengthBy2Plus1>> Y2(num_capture_channels);
|
|
std::vector<bool> converged_filters(num_capture_channels, true);
|
|
EchoCanceller3Config config;
|
|
config.erle.onset_detection = true;
|
|
Block x(kNumBands, num_render_channels);
|
|
std::vector<std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
|
filter_frequency_response(
|
|
config.filter.refined.length_blocks,
|
|
std::vector<std::array<float, kFftLengthBy2Plus1>>(
|
|
num_capture_channels));
|
|
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
|
|
RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels));
|
|
|
|
GetFilterFreq(config.delay.delay_headroom_samples, filter_frequency_response);
|
|
|
|
ErleEstimator estimator(/*startup_phase_length_blocks=*/0, config,
|
|
num_capture_channels);
|
|
|
|
FormFarendTimeFrame(&x);
|
|
render_delay_buffer->Insert(x);
|
|
render_delay_buffer->PrepareCaptureProcessing();
|
|
|
|
for (size_t burst = 0; burst < 20; ++burst) {
|
|
FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), kTrueErleOnsets,
|
|
&X2, E2, Y2);
|
|
for (size_t k = 0; k < 10; ++k) {
|
|
render_delay_buffer->Insert(x);
|
|
render_delay_buffer->PrepareCaptureProcessing();
|
|
estimator.Update(*render_delay_buffer->GetRenderBuffer(),
|
|
filter_frequency_response, X2, Y2, E2,
|
|
converged_filters);
|
|
}
|
|
FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), kTrueErle, &X2, E2,
|
|
Y2);
|
|
for (size_t k = 0; k < 1000; ++k) {
|
|
render_delay_buffer->Insert(x);
|
|
render_delay_buffer->PrepareCaptureProcessing();
|
|
estimator.Update(*render_delay_buffer->GetRenderBuffer(),
|
|
filter_frequency_response, X2, Y2, E2,
|
|
converged_filters);
|
|
}
|
|
FormNearendFrame(&x, &X2, E2, Y2);
|
|
for (size_t k = 0; k < 300; ++k) {
|
|
render_delay_buffer->Insert(x);
|
|
render_delay_buffer->PrepareCaptureProcessing();
|
|
estimator.Update(*render_delay_buffer->GetRenderBuffer(),
|
|
filter_frequency_response, X2, Y2, E2,
|
|
converged_filters);
|
|
}
|
|
}
|
|
VerifyErleBands(estimator.ErleDuringOnsets(), config.erle.min,
|
|
config.erle.min);
|
|
FormNearendFrame(&x, &X2, E2, Y2);
|
|
for (size_t k = 0; k < 1000; k++) {
|
|
estimator.Update(*render_delay_buffer->GetRenderBuffer(),
|
|
filter_frequency_response, X2, Y2, E2, converged_filters);
|
|
}
|
|
// Verifies that during ne activity, Erle converges to the Erle for
|
|
// onsets.
|
|
VerifyErle(estimator.Erle(/*onset_compensated=*/true),
|
|
std::pow(2.f, estimator.FullbandErleLog2()), config.erle.min,
|
|
config.erle.min);
|
|
}
|
|
|
|
} // namespace webrtc
|