mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-16 15:20:42 +01:00

This CL renames the main filter in AEC3 to have the more accurate name refined filter. The CL consists of 3 main initial patch sets, designed to simplify the review: 1) Replaces "main" with "refined" and adds a fall-back functionality to support the old filter naming. 2) Renames the files according to the new naming. 3) Performs a "git cl format" Bug: webrtc:8671 Change-Id: Ifd0aab34e291736a2250e0986348404618630b1d Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/170825 Reviewed-by: Sam Zackrisson <saza@webrtc.org> Commit-Queue: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#30843}
406 lines
18 KiB
C++
406 lines
18 KiB
C++
/*
|
|
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h"
|
|
|
|
#include <algorithm>
|
|
#include <functional>
|
|
#include <numeric>
|
|
|
|
#include "modules/audio_processing/aec3/spectrum_buffer.h"
|
|
#include "rtc_base/numerics/safe_minmax.h"
|
|
|
|
namespace webrtc {
|
|
|
|
namespace {
|
|
|
|
constexpr std::array<size_t, SignalDependentErleEstimator::kSubbands + 1>
|
|
kBandBoundaries = {1, 8, 16, 24, 32, 48, kFftLengthBy2Plus1};
|
|
|
|
std::array<size_t, kFftLengthBy2Plus1> FormSubbandMap() {
|
|
std::array<size_t, kFftLengthBy2Plus1> map_band_to_subband;
|
|
size_t subband = 1;
|
|
for (size_t k = 0; k < map_band_to_subband.size(); ++k) {
|
|
RTC_DCHECK_LT(subband, kBandBoundaries.size());
|
|
if (k >= kBandBoundaries[subband]) {
|
|
subband++;
|
|
RTC_DCHECK_LT(k, kBandBoundaries[subband]);
|
|
}
|
|
map_band_to_subband[k] = subband - 1;
|
|
}
|
|
return map_band_to_subband;
|
|
}
|
|
|
|
// Defines the size in blocks of the sections that are used for dividing the
|
|
// linear filter. The sections are split in a non-linear manner so that lower
|
|
// sections that typically represent the direct path have a larger resolution
|
|
// than the higher sections which typically represent more reverberant acoustic
|
|
// paths.
|
|
std::vector<size_t> DefineFilterSectionSizes(size_t delay_headroom_blocks,
|
|
size_t num_blocks,
|
|
size_t num_sections) {
|
|
size_t filter_length_blocks = num_blocks - delay_headroom_blocks;
|
|
std::vector<size_t> section_sizes(num_sections);
|
|
size_t remaining_blocks = filter_length_blocks;
|
|
size_t remaining_sections = num_sections;
|
|
size_t estimator_size = 2;
|
|
size_t idx = 0;
|
|
while (remaining_sections > 1 &&
|
|
remaining_blocks > estimator_size * remaining_sections) {
|
|
RTC_DCHECK_LT(idx, section_sizes.size());
|
|
section_sizes[idx] = estimator_size;
|
|
remaining_blocks -= estimator_size;
|
|
remaining_sections--;
|
|
estimator_size *= 2;
|
|
idx++;
|
|
}
|
|
|
|
size_t last_groups_size = remaining_blocks / remaining_sections;
|
|
for (; idx < num_sections; idx++) {
|
|
section_sizes[idx] = last_groups_size;
|
|
}
|
|
section_sizes[num_sections - 1] +=
|
|
remaining_blocks - last_groups_size * remaining_sections;
|
|
return section_sizes;
|
|
}
|
|
|
|
// Forms the limits in blocks for each filter section. Those sections
|
|
// are used for analyzing the echo estimates and investigating which
|
|
// linear filter sections contribute most to the echo estimate energy.
|
|
std::vector<size_t> SetSectionsBoundaries(size_t delay_headroom_blocks,
|
|
size_t num_blocks,
|
|
size_t num_sections) {
|
|
std::vector<size_t> estimator_boundaries_blocks(num_sections + 1);
|
|
if (estimator_boundaries_blocks.size() == 2) {
|
|
estimator_boundaries_blocks[0] = 0;
|
|
estimator_boundaries_blocks[1] = num_blocks;
|
|
return estimator_boundaries_blocks;
|
|
}
|
|
RTC_DCHECK_GT(estimator_boundaries_blocks.size(), 2);
|
|
const std::vector<size_t> section_sizes =
|
|
DefineFilterSectionSizes(delay_headroom_blocks, num_blocks,
|
|
estimator_boundaries_blocks.size() - 1);
|
|
|
|
size_t idx = 0;
|
|
size_t current_size_block = 0;
|
|
RTC_DCHECK_EQ(section_sizes.size() + 1, estimator_boundaries_blocks.size());
|
|
estimator_boundaries_blocks[0] = delay_headroom_blocks;
|
|
for (size_t k = delay_headroom_blocks; k < num_blocks; ++k) {
|
|
current_size_block++;
|
|
if (current_size_block >= section_sizes[idx]) {
|
|
idx = idx + 1;
|
|
if (idx == section_sizes.size()) {
|
|
break;
|
|
}
|
|
estimator_boundaries_blocks[idx] = k + 1;
|
|
current_size_block = 0;
|
|
}
|
|
}
|
|
estimator_boundaries_blocks[section_sizes.size()] = num_blocks;
|
|
return estimator_boundaries_blocks;
|
|
}
|
|
|
|
std::array<float, SignalDependentErleEstimator::kSubbands>
|
|
SetMaxErleSubbands(float max_erle_l, float max_erle_h, size_t limit_subband_l) {
|
|
std::array<float, SignalDependentErleEstimator::kSubbands> max_erle;
|
|
std::fill(max_erle.begin(), max_erle.begin() + limit_subband_l, max_erle_l);
|
|
std::fill(max_erle.begin() + limit_subband_l, max_erle.end(), max_erle_h);
|
|
return max_erle;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
SignalDependentErleEstimator::SignalDependentErleEstimator(
|
|
const EchoCanceller3Config& config,
|
|
size_t num_capture_channels)
|
|
: min_erle_(config.erle.min),
|
|
num_sections_(config.erle.num_sections),
|
|
num_blocks_(config.filter.refined.length_blocks),
|
|
delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize),
|
|
band_to_subband_(FormSubbandMap()),
|
|
max_erle_(SetMaxErleSubbands(config.erle.max_l,
|
|
config.erle.max_h,
|
|
band_to_subband_[kFftLengthBy2 / 2])),
|
|
section_boundaries_blocks_(SetSectionsBoundaries(delay_headroom_blocks_,
|
|
num_blocks_,
|
|
num_sections_)),
|
|
erle_(num_capture_channels),
|
|
S2_section_accum_(
|
|
num_capture_channels,
|
|
std::vector<std::array<float, kFftLengthBy2Plus1>>(num_sections_)),
|
|
erle_estimators_(
|
|
num_capture_channels,
|
|
std::vector<std::array<float, kSubbands>>(num_sections_)),
|
|
erle_ref_(num_capture_channels),
|
|
correction_factors_(
|
|
num_capture_channels,
|
|
std::vector<std::array<float, kSubbands>>(num_sections_)),
|
|
num_updates_(num_capture_channels),
|
|
n_active_sections_(num_capture_channels) {
|
|
RTC_DCHECK_LE(num_sections_, num_blocks_);
|
|
RTC_DCHECK_GE(num_sections_, 1);
|
|
Reset();
|
|
}
|
|
|
|
SignalDependentErleEstimator::~SignalDependentErleEstimator() = default;
|
|
|
|
void SignalDependentErleEstimator::Reset() {
|
|
for (size_t ch = 0; ch < erle_.size(); ++ch) {
|
|
erle_[ch].fill(min_erle_);
|
|
for (auto& erle_estimator : erle_estimators_[ch]) {
|
|
erle_estimator.fill(min_erle_);
|
|
}
|
|
erle_ref_[ch].fill(min_erle_);
|
|
for (auto& factor : correction_factors_[ch]) {
|
|
factor.fill(1.0f);
|
|
}
|
|
num_updates_[ch].fill(0);
|
|
n_active_sections_[ch].fill(0);
|
|
}
|
|
}
|
|
|
|
// Updates the Erle estimate by analyzing the current input signals. It takes
|
|
// the render buffer and the filter frequency response in order to do an
|
|
// estimation of the number of sections of the linear filter that are needed
|
|
// for getting the majority of the energy in the echo estimate. Based on that
|
|
// number of sections, it updates the erle estimation by introducing a
|
|
// correction factor to the erle that is given as an input to this method.
|
|
void SignalDependentErleEstimator::Update(
|
|
const RenderBuffer& render_buffer,
|
|
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
|
filter_frequency_responses,
|
|
rtc::ArrayView<const float, kFftLengthBy2Plus1> X2,
|
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2,
|
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> average_erle,
|
|
const std::vector<bool>& converged_filters) {
|
|
RTC_DCHECK_GT(num_sections_, 1);
|
|
|
|
// Gets the number of filter sections that are needed for achieving 90 %
|
|
// of the power spectrum energy of the echo estimate.
|
|
ComputeNumberOfActiveFilterSections(render_buffer,
|
|
filter_frequency_responses);
|
|
|
|
// Updates the correction factors that is used for correcting the erle and
|
|
// adapt it to the particular characteristics of the input signal.
|
|
UpdateCorrectionFactors(X2, Y2, E2, converged_filters);
|
|
|
|
// Applies the correction factor to the input erle for getting a more refined
|
|
// erle estimation for the current input signal.
|
|
for (size_t ch = 0; ch < erle_.size(); ++ch) {
|
|
for (size_t k = 0; k < kFftLengthBy2; ++k) {
|
|
RTC_DCHECK_GT(correction_factors_[ch].size(), n_active_sections_[ch][k]);
|
|
float correction_factor =
|
|
correction_factors_[ch][n_active_sections_[ch][k]]
|
|
[band_to_subband_[k]];
|
|
erle_[ch][k] = rtc::SafeClamp(average_erle[ch][k] * correction_factor,
|
|
min_erle_, max_erle_[band_to_subband_[k]]);
|
|
}
|
|
}
|
|
}
|
|
|
|
void SignalDependentErleEstimator::Dump(
|
|
const std::unique_ptr<ApmDataDumper>& data_dumper) const {
|
|
for (auto& erle : erle_estimators_[0]) {
|
|
data_dumper->DumpRaw("aec3_all_erle", erle);
|
|
}
|
|
data_dumper->DumpRaw("aec3_ref_erle", erle_ref_[0]);
|
|
for (auto& factor : correction_factors_[0]) {
|
|
data_dumper->DumpRaw("aec3_erle_correction_factor", factor);
|
|
}
|
|
}
|
|
|
|
// Estimates for each band the smallest number of sections in the filter that
|
|
// together constitute 90% of the estimated echo energy.
|
|
void SignalDependentErleEstimator::ComputeNumberOfActiveFilterSections(
|
|
const RenderBuffer& render_buffer,
|
|
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
|
filter_frequency_responses) {
|
|
RTC_DCHECK_GT(num_sections_, 1);
|
|
// Computes an approximation of the power spectrum if the filter would have
|
|
// been limited to a certain number of filter sections.
|
|
ComputeEchoEstimatePerFilterSection(render_buffer,
|
|
filter_frequency_responses);
|
|
// For each band, computes the number of filter sections that are needed for
|
|
// achieving the 90 % energy in the echo estimate.
|
|
ComputeActiveFilterSections();
|
|
}
|
|
|
|
void SignalDependentErleEstimator::UpdateCorrectionFactors(
|
|
rtc::ArrayView<const float, kFftLengthBy2Plus1> X2,
|
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2,
|
|
const std::vector<bool>& converged_filters) {
|
|
for (size_t ch = 0; ch < converged_filters.size(); ++ch) {
|
|
if (converged_filters[ch]) {
|
|
constexpr float kX2BandEnergyThreshold = 44015068.0f;
|
|
constexpr float kSmthConstantDecreases = 0.1f;
|
|
constexpr float kSmthConstantIncreases = kSmthConstantDecreases / 2.f;
|
|
auto subband_powers = [](rtc::ArrayView<const float> power_spectrum,
|
|
rtc::ArrayView<float> power_spectrum_subbands) {
|
|
for (size_t subband = 0; subband < kSubbands; ++subband) {
|
|
RTC_DCHECK_LE(kBandBoundaries[subband + 1], power_spectrum.size());
|
|
power_spectrum_subbands[subband] = std::accumulate(
|
|
power_spectrum.begin() + kBandBoundaries[subband],
|
|
power_spectrum.begin() + kBandBoundaries[subband + 1], 0.f);
|
|
}
|
|
};
|
|
|
|
std::array<float, kSubbands> X2_subbands, E2_subbands, Y2_subbands;
|
|
subband_powers(X2, X2_subbands);
|
|
subband_powers(E2[ch], E2_subbands);
|
|
subband_powers(Y2[ch], Y2_subbands);
|
|
std::array<size_t, kSubbands> idx_subbands;
|
|
for (size_t subband = 0; subband < kSubbands; ++subband) {
|
|
// When aggregating the number of active sections in the filter for
|
|
// different bands we choose to take the minimum of all of them. As an
|
|
// example, if for one of the bands it is the direct path its refined
|
|
// contributor to the final echo estimate, we consider the direct path
|
|
// is as well the refined contributor for the subband that contains that
|
|
// particular band. That aggregate number of sections will be later used
|
|
// as the identifier of the erle estimator that needs to be updated.
|
|
RTC_DCHECK_LE(kBandBoundaries[subband + 1],
|
|
n_active_sections_[ch].size());
|
|
idx_subbands[subband] = *std::min_element(
|
|
n_active_sections_[ch].begin() + kBandBoundaries[subband],
|
|
n_active_sections_[ch].begin() + kBandBoundaries[subband + 1]);
|
|
}
|
|
|
|
std::array<float, kSubbands> new_erle;
|
|
std::array<bool, kSubbands> is_erle_updated;
|
|
is_erle_updated.fill(false);
|
|
new_erle.fill(0.f);
|
|
for (size_t subband = 0; subband < kSubbands; ++subband) {
|
|
if (X2_subbands[subband] > kX2BandEnergyThreshold &&
|
|
E2_subbands[subband] > 0) {
|
|
new_erle[subband] = Y2_subbands[subband] / E2_subbands[subband];
|
|
RTC_DCHECK_GT(new_erle[subband], 0);
|
|
is_erle_updated[subband] = true;
|
|
++num_updates_[ch][subband];
|
|
}
|
|
}
|
|
|
|
for (size_t subband = 0; subband < kSubbands; ++subband) {
|
|
const size_t idx = idx_subbands[subband];
|
|
RTC_DCHECK_LT(idx, erle_estimators_[ch].size());
|
|
float alpha = new_erle[subband] > erle_estimators_[ch][idx][subband]
|
|
? kSmthConstantIncreases
|
|
: kSmthConstantDecreases;
|
|
alpha = static_cast<float>(is_erle_updated[subband]) * alpha;
|
|
erle_estimators_[ch][idx][subband] +=
|
|
alpha * (new_erle[subband] - erle_estimators_[ch][idx][subband]);
|
|
erle_estimators_[ch][idx][subband] = rtc::SafeClamp(
|
|
erle_estimators_[ch][idx][subband], min_erle_, max_erle_[subband]);
|
|
}
|
|
|
|
for (size_t subband = 0; subband < kSubbands; ++subband) {
|
|
float alpha = new_erle[subband] > erle_ref_[ch][subband]
|
|
? kSmthConstantIncreases
|
|
: kSmthConstantDecreases;
|
|
alpha = static_cast<float>(is_erle_updated[subband]) * alpha;
|
|
erle_ref_[ch][subband] +=
|
|
alpha * (new_erle[subband] - erle_ref_[ch][subband]);
|
|
erle_ref_[ch][subband] = rtc::SafeClamp(erle_ref_[ch][subband],
|
|
min_erle_, max_erle_[subband]);
|
|
}
|
|
|
|
for (size_t subband = 0; subband < kSubbands; ++subband) {
|
|
constexpr int kNumUpdateThr = 50;
|
|
if (is_erle_updated[subband] &&
|
|
num_updates_[ch][subband] > kNumUpdateThr) {
|
|
const size_t idx = idx_subbands[subband];
|
|
RTC_DCHECK_GT(erle_ref_[ch][subband], 0.f);
|
|
// Computes the ratio between the erle that is updated using all the
|
|
// points and the erle that is updated only on signals that share the
|
|
// same number of active filter sections.
|
|
float new_correction_factor =
|
|
erle_estimators_[ch][idx][subband] / erle_ref_[ch][subband];
|
|
|
|
correction_factors_[ch][idx][subband] +=
|
|
0.1f *
|
|
(new_correction_factor - correction_factors_[ch][idx][subband]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void SignalDependentErleEstimator::ComputeEchoEstimatePerFilterSection(
|
|
const RenderBuffer& render_buffer,
|
|
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
|
filter_frequency_responses) {
|
|
const SpectrumBuffer& spectrum_render_buffer =
|
|
render_buffer.GetSpectrumBuffer();
|
|
const size_t num_render_channels = spectrum_render_buffer.buffer[0].size();
|
|
const size_t num_capture_channels = S2_section_accum_.size();
|
|
const float one_by_num_render_channels = 1.f / num_render_channels;
|
|
|
|
RTC_DCHECK_EQ(S2_section_accum_.size(), filter_frequency_responses.size());
|
|
|
|
for (size_t capture_ch = 0; capture_ch < num_capture_channels; ++capture_ch) {
|
|
RTC_DCHECK_EQ(S2_section_accum_[capture_ch].size() + 1,
|
|
section_boundaries_blocks_.size());
|
|
size_t idx_render = render_buffer.Position();
|
|
idx_render = spectrum_render_buffer.OffsetIndex(
|
|
idx_render, section_boundaries_blocks_[0]);
|
|
|
|
for (size_t section = 0; section < num_sections_; ++section) {
|
|
std::array<float, kFftLengthBy2Plus1> X2_section;
|
|
std::array<float, kFftLengthBy2Plus1> H2_section;
|
|
X2_section.fill(0.f);
|
|
H2_section.fill(0.f);
|
|
const size_t block_limit =
|
|
std::min(section_boundaries_blocks_[section + 1],
|
|
filter_frequency_responses[capture_ch].size());
|
|
for (size_t block = section_boundaries_blocks_[section];
|
|
block < block_limit; ++block) {
|
|
for (size_t render_ch = 0;
|
|
render_ch < spectrum_render_buffer.buffer[idx_render].size();
|
|
++render_ch) {
|
|
for (size_t k = 0; k < X2_section.size(); ++k) {
|
|
X2_section[k] +=
|
|
spectrum_render_buffer.buffer[idx_render][render_ch][k] *
|
|
one_by_num_render_channels;
|
|
}
|
|
}
|
|
std::transform(H2_section.begin(), H2_section.end(),
|
|
filter_frequency_responses[capture_ch][block].begin(),
|
|
H2_section.begin(), std::plus<float>());
|
|
idx_render = spectrum_render_buffer.IncIndex(idx_render);
|
|
}
|
|
|
|
std::transform(X2_section.begin(), X2_section.end(), H2_section.begin(),
|
|
S2_section_accum_[capture_ch][section].begin(),
|
|
std::multiplies<float>());
|
|
}
|
|
|
|
for (size_t section = 1; section < num_sections_; ++section) {
|
|
std::transform(S2_section_accum_[capture_ch][section - 1].begin(),
|
|
S2_section_accum_[capture_ch][section - 1].end(),
|
|
S2_section_accum_[capture_ch][section].begin(),
|
|
S2_section_accum_[capture_ch][section].begin(),
|
|
std::plus<float>());
|
|
}
|
|
}
|
|
}
|
|
|
|
void SignalDependentErleEstimator::ComputeActiveFilterSections() {
|
|
for (size_t ch = 0; ch < n_active_sections_.size(); ++ch) {
|
|
std::fill(n_active_sections_[ch].begin(), n_active_sections_[ch].end(), 0);
|
|
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
|
size_t section = num_sections_;
|
|
float target = 0.9f * S2_section_accum_[ch][num_sections_ - 1][k];
|
|
while (section > 0 && S2_section_accum_[ch][section - 1][k] >= target) {
|
|
n_active_sections_[ch][k] = --section;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} // namespace webrtc
|