mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-13 13:50:40 +01:00
AEC3: Add multichannel configuration and multichannel detection
The features have two safety fallbacks: - multichannel config has a killswitch WebRTC-Aec3SetupSpecificDefaultConfigDefaultsKillSwitch - stereo detection has a killswitch WebRTC-Aec3StereoContentDetectionKillSwitch Both features are enabled by default in the AEC3 config. Tested: Bitexact on a large number of aecdumps. Bug: chromium:1295710 Change-Id: I340cdc9140dacd4ca22d0911eb9f732b6cf8b226 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/258129 Reviewed-by: Per Åhgren <peah@webrtc.org> Commit-Queue: Sam Zackrisson <saza@webrtc.org> Cr-Commit-Position: refs/heads/main@{#36482}
This commit is contained in:
parent
f236ac73de
commit
64cdcc0792
17 changed files with 870 additions and 87 deletions
|
@ -236,6 +236,11 @@ struct RTC_EXPORT EchoCanceller3Config {
|
|||
float floor_first_increase = 0.00001f;
|
||||
bool conservative_hf_suppression = false;
|
||||
} suppressor;
|
||||
|
||||
struct MultiChannel {
|
||||
bool detect_stereo_content = true;
|
||||
float stereo_detection_threshold = 0.0f;
|
||||
} multi_channel;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
|
|
|
@ -415,6 +415,13 @@ void Aec3ConfigFromJsonString(absl::string_view json_string,
|
|||
ReadParam(section, "conservative_hf_suppression",
|
||||
&cfg.suppressor.conservative_hf_suppression);
|
||||
}
|
||||
|
||||
if (rtc::GetValueFromJsonObject(aec3_root, "multi_channel", §ion)) {
|
||||
ReadParam(section, "detect_stereo_content",
|
||||
&cfg.multi_channel.detect_stereo_content);
|
||||
ReadParam(section, "stereo_detection_threshold",
|
||||
&cfg.multi_channel.stereo_detection_threshold);
|
||||
}
|
||||
}
|
||||
|
||||
EchoCanceller3Config Aec3ConfigFromJsonString(absl::string_view json_string) {
|
||||
|
@ -574,7 +581,8 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) {
|
|||
ost << "\"erle_onset_compensation_in_dominant_nearend\": "
|
||||
<< (config.ep_strength.erle_onset_compensation_in_dominant_nearend
|
||||
? "true"
|
||||
: "false") << ",";
|
||||
: "false")
|
||||
<< ",";
|
||||
ost << "\"use_conservative_tail_frequency_response\": "
|
||||
<< (config.ep_strength.use_conservative_tail_frequency_response
|
||||
? "true"
|
||||
|
@ -736,7 +744,15 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) {
|
|||
<< ",";
|
||||
ost << "\"conservative_hf_suppression\": "
|
||||
<< config.suppressor.conservative_hf_suppression;
|
||||
ost << "},";
|
||||
|
||||
ost << "\"multi_channel\": {";
|
||||
ost << "\"detect_stereo_content\": "
|
||||
<< (config.multi_channel.detect_stereo_content ? "true" : "false") << ",";
|
||||
ost << "\"stereo_detection_threshold\": "
|
||||
<< config.multi_channel.stereo_detection_threshold;
|
||||
ost << "}";
|
||||
|
||||
ost << "}";
|
||||
ost << "}";
|
||||
|
||||
|
|
|
@ -25,7 +25,8 @@ std::unique_ptr<EchoControl> EchoCanceller3Factory::Create(
|
|||
int num_render_channels,
|
||||
int num_capture_channels) {
|
||||
return std::make_unique<EchoCanceller3>(
|
||||
config_, sample_rate_hz, num_render_channels, num_capture_channels);
|
||||
config_, /*multichannel_config=*/absl::nullopt, sample_rate_hz,
|
||||
num_render_channels, num_capture_channels);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
|
@ -31,6 +31,10 @@ TEST(EchoCanceller3JsonHelpers, ToStringAndParseJson) {
|
|||
cfg.suppressor.subband_nearend_detection.subband1 = {4, 5};
|
||||
cfg.suppressor.subband_nearend_detection.nearend_threshold = 2.f;
|
||||
cfg.suppressor.subband_nearend_detection.snr_threshold = 100.f;
|
||||
cfg.multi_channel.detect_stereo_content =
|
||||
!cfg.multi_channel.detect_stereo_content;
|
||||
cfg.multi_channel.stereo_detection_threshold =
|
||||
cfg.multi_channel.stereo_detection_threshold + 1.0f;
|
||||
std::string json_string = Aec3ConfigToJsonString(cfg);
|
||||
EchoCanceller3Config cfg_transformed = Aec3ConfigFromJsonString(json_string);
|
||||
|
||||
|
@ -75,5 +79,9 @@ TEST(EchoCanceller3JsonHelpers, ToStringAndParseJson) {
|
|||
cfg_transformed.suppressor.subband_nearend_detection.nearend_threshold);
|
||||
EXPECT_EQ(cfg.suppressor.subband_nearend_detection.snr_threshold,
|
||||
cfg_transformed.suppressor.subband_nearend_detection.snr_threshold);
|
||||
EXPECT_EQ(cfg.multi_channel.detect_stereo_content,
|
||||
cfg_transformed.multi_channel.detect_stereo_content);
|
||||
EXPECT_EQ(cfg.multi_channel.stereo_detection_threshold,
|
||||
cfg_transformed.multi_channel.stereo_detection_threshold);
|
||||
}
|
||||
} // namespace webrtc
|
||||
|
|
|
@ -37,6 +37,8 @@ rtc_library("aec3") {
|
|||
"coarse_filter_update_gain.h",
|
||||
"comfort_noise_generator.cc",
|
||||
"comfort_noise_generator.h",
|
||||
"config_selector.cc",
|
||||
"config_selector.h",
|
||||
"decimator.cc",
|
||||
"decimator.h",
|
||||
"delay_estimate.h",
|
||||
|
@ -72,6 +74,8 @@ rtc_library("aec3") {
|
|||
"matched_filter_lag_aggregator.h",
|
||||
"moving_average.cc",
|
||||
"moving_average.h",
|
||||
"multi_channel_content_detector.cc",
|
||||
"multi_channel_content_detector.h",
|
||||
"nearend_detector.h",
|
||||
"refined_filter_update_gain.cc",
|
||||
"refined_filter_update_gain.h",
|
||||
|
@ -338,6 +342,7 @@ if (rtc_include_tests) {
|
|||
"clockdrift_detector_unittest.cc",
|
||||
"coarse_filter_update_gain_unittest.cc",
|
||||
"comfort_noise_generator_unittest.cc",
|
||||
"config_selector_unittest.cc",
|
||||
"decimator_unittest.cc",
|
||||
"echo_canceller3_unittest.cc",
|
||||
"echo_path_delay_estimator_unittest.cc",
|
||||
|
@ -352,6 +357,7 @@ if (rtc_include_tests) {
|
|||
"matched_filter_lag_aggregator_unittest.cc",
|
||||
"matched_filter_unittest.cc",
|
||||
"moving_average_unittest.cc",
|
||||
"multi_channel_content_detector_unittest.cc",
|
||||
"refined_filter_update_gain_unittest.cc",
|
||||
"render_buffer_unittest.cc",
|
||||
"render_delay_buffer_unittest.cc",
|
||||
|
|
69
modules/audio_processing/aec3/config_selector.cc
Normal file
69
modules/audio_processing/aec3/config_selector.cc
Normal file
|
@ -0,0 +1,69 @@
|
|||
|
||||
/*
|
||||
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/config_selector.h"
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
// Validates that the mono and the multichannel configs have compatible fields.
|
||||
bool CompatibleConfigs(const EchoCanceller3Config& mono_config,
|
||||
const EchoCanceller3Config& multichannel_config) {
|
||||
if (mono_config.delay.fixed_capture_delay_samples !=
|
||||
multichannel_config.delay.fixed_capture_delay_samples) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mono_config.filter.export_linear_aec_output !=
|
||||
multichannel_config.filter.export_linear_aec_output) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mono_config.filter.high_pass_filter_echo_reference !=
|
||||
multichannel_config.filter.high_pass_filter_echo_reference) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mono_config.multi_channel.detect_stereo_content !=
|
||||
multichannel_config.multi_channel.detect_stereo_content) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ConfigSelector::ConfigSelector(
|
||||
const EchoCanceller3Config& config,
|
||||
const absl::optional<EchoCanceller3Config>& multichannel_config,
|
||||
int num_render_input_channels)
|
||||
: config_(config), multichannel_config_(multichannel_config) {
|
||||
if (multichannel_config_.has_value()) {
|
||||
RTC_DCHECK(CompatibleConfigs(config_, *multichannel_config_));
|
||||
}
|
||||
|
||||
Update(!config_.multi_channel.detect_stereo_content &&
|
||||
num_render_input_channels > 1);
|
||||
|
||||
RTC_DCHECK(active_config_);
|
||||
}
|
||||
|
||||
void ConfigSelector::Update(bool multichannel_content) {
|
||||
if (multichannel_content && multichannel_config_.has_value()) {
|
||||
active_config_ = &(*multichannel_config_);
|
||||
} else {
|
||||
active_config_ = &config_;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
41
modules/audio_processing/aec3/config_selector.h
Normal file
41
modules/audio_processing/aec3/config_selector.h
Normal file
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Selects the config to use.
|
||||
class ConfigSelector {
|
||||
public:
|
||||
ConfigSelector(
|
||||
const EchoCanceller3Config& config,
|
||||
const absl::optional<EchoCanceller3Config>& multichannel_config,
|
||||
int num_render_input_channels);
|
||||
|
||||
// Updates the config selection based on the detection of multichannel
|
||||
// content.
|
||||
void Update(bool multichannel_content);
|
||||
|
||||
const EchoCanceller3Config& active_config() const { return *active_config_; }
|
||||
|
||||
private:
|
||||
const EchoCanceller3Config config_;
|
||||
const absl::optional<EchoCanceller3Config> multichannel_config_;
|
||||
const EchoCanceller3Config* active_config_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_
|
116
modules/audio_processing/aec3/config_selector_unittest.cc
Normal file
116
modules/audio_processing/aec3/config_selector_unittest.cc
Normal file
|
@ -0,0 +1,116 @@
|
|||
/*
|
||||
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/config_selector.h"
|
||||
|
||||
#include <tuple>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ConfigSelectorChannelsAndContentDetection
|
||||
: public ::testing::Test,
|
||||
public ::testing::WithParamInterface<std::tuple<int, bool>> {};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(ConfigSelectorMultiParameters,
|
||||
ConfigSelectorChannelsAndContentDetection,
|
||||
::testing::Combine(::testing::Values(1, 2, 8),
|
||||
::testing::Values(false, true)));
|
||||
|
||||
class ConfigSelectorChannels : public ::testing::Test,
|
||||
public ::testing::WithParamInterface<int> {};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(ConfigSelectorMultiParameters,
|
||||
ConfigSelectorChannels,
|
||||
::testing::Values(1, 2, 8));
|
||||
|
||||
TEST_P(ConfigSelectorChannelsAndContentDetection,
|
||||
MonoConfigIsSelectedWhenNoMultiChannelConfigPresent) {
|
||||
const auto [num_channels, detect_stereo_content] = GetParam();
|
||||
EchoCanceller3Config config;
|
||||
config.multi_channel.detect_stereo_content = detect_stereo_content;
|
||||
absl::optional<EchoCanceller3Config> multichannel_config;
|
||||
|
||||
config.delay.default_delay = config.delay.default_delay + 1;
|
||||
const size_t custom_delay_value_in_config = config.delay.default_delay;
|
||||
|
||||
ConfigSelector cs(config, multichannel_config,
|
||||
/*num_render_input_channels=*/num_channels);
|
||||
EXPECT_EQ(cs.active_config().delay.default_delay,
|
||||
custom_delay_value_in_config);
|
||||
|
||||
cs.Update(/*multichannel_content=*/false);
|
||||
EXPECT_EQ(cs.active_config().delay.default_delay,
|
||||
custom_delay_value_in_config);
|
||||
|
||||
cs.Update(/*multichannel_content=*/true);
|
||||
EXPECT_EQ(cs.active_config().delay.default_delay,
|
||||
custom_delay_value_in_config);
|
||||
}
|
||||
|
||||
TEST_P(ConfigSelectorChannelsAndContentDetection,
|
||||
CorrectInitialConfigIsSelected) {
|
||||
const auto [num_channels, detect_stereo_content] = GetParam();
|
||||
EchoCanceller3Config config;
|
||||
config.multi_channel.detect_stereo_content = detect_stereo_content;
|
||||
absl::optional<EchoCanceller3Config> multichannel_config = config;
|
||||
|
||||
config.delay.default_delay += 1;
|
||||
const size_t custom_delay_value_in_config = config.delay.default_delay;
|
||||
multichannel_config->delay.default_delay += 2;
|
||||
const size_t custom_delay_value_in_multichannel_config =
|
||||
multichannel_config->delay.default_delay;
|
||||
|
||||
ConfigSelector cs(config, multichannel_config,
|
||||
/*num_render_input_channels=*/num_channels);
|
||||
|
||||
if (num_channels == 1 || detect_stereo_content) {
|
||||
EXPECT_EQ(cs.active_config().delay.default_delay,
|
||||
custom_delay_value_in_config);
|
||||
} else {
|
||||
EXPECT_EQ(cs.active_config().delay.default_delay,
|
||||
custom_delay_value_in_multichannel_config);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(ConfigSelectorChannels, CorrectConfigUpdateBehavior) {
|
||||
const int num_channels = GetParam();
|
||||
EchoCanceller3Config config;
|
||||
config.multi_channel.detect_stereo_content = true;
|
||||
absl::optional<EchoCanceller3Config> multichannel_config = config;
|
||||
|
||||
config.delay.default_delay += 1;
|
||||
const size_t custom_delay_value_in_config = config.delay.default_delay;
|
||||
multichannel_config->delay.default_delay += 2;
|
||||
const size_t custom_delay_value_in_multichannel_config =
|
||||
multichannel_config->delay.default_delay;
|
||||
|
||||
ConfigSelector cs(config, multichannel_config,
|
||||
/*num_render_input_channels=*/num_channels);
|
||||
|
||||
cs.Update(/*multichannel_content=*/false);
|
||||
EXPECT_EQ(cs.active_config().delay.default_delay,
|
||||
custom_delay_value_in_config);
|
||||
|
||||
if (num_channels == 1) {
|
||||
cs.Update(/*multichannel_content=*/false);
|
||||
EXPECT_EQ(cs.active_config().delay.default_delay,
|
||||
custom_delay_value_in_config);
|
||||
} else {
|
||||
cs.Update(/*multichannel_content=*/true);
|
||||
EXPECT_EQ(cs.active_config().delay.default_delay,
|
||||
custom_delay_value_in_multichannel_config);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
|
@ -101,13 +101,24 @@ void FillSubFrameView(
|
|||
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
|
||||
RTC_DCHECK_GE(1, sub_frame_index);
|
||||
RTC_DCHECK_EQ(frame->size(), sub_frame_view->size());
|
||||
RTC_DCHECK_EQ((*frame)[0].size(), (*sub_frame_view)[0].size());
|
||||
for (size_t band = 0; band < frame->size(); ++band) {
|
||||
for (size_t channel = 0; channel < (*frame)[band].size(); ++channel) {
|
||||
(*sub_frame_view)[band][channel] = rtc::ArrayView<float>(
|
||||
&(*frame)[band][channel][sub_frame_index * kSubFrameLength],
|
||||
if ((*frame)[0].size() > (*sub_frame_view)[0].size()) {
|
||||
RTC_DCHECK_EQ((*sub_frame_view)[0].size(), 1);
|
||||
// Downmix the audio to mono (should only be done when the audio contains
|
||||
// fake-stereo or fake-multichannel).
|
||||
for (size_t band = 0; band < frame->size(); ++band) {
|
||||
(*sub_frame_view)[band][/*channel=*/0] = rtc::ArrayView<float>(
|
||||
&(*frame)[band][/*channel=*/0][sub_frame_index * kSubFrameLength],
|
||||
kSubFrameLength);
|
||||
}
|
||||
} else {
|
||||
RTC_DCHECK_EQ((*frame)[0].size(), (*sub_frame_view)[0].size());
|
||||
for (size_t band = 0; band < frame->size(); ++band) {
|
||||
for (size_t channel = 0; channel < (*frame)[band].size(); ++channel) {
|
||||
(*sub_frame_view)[band][channel] = rtc::ArrayView<float>(
|
||||
&(*frame)[band][channel][sub_frame_index * kSubFrameLength],
|
||||
kSubFrameLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -221,6 +232,10 @@ void CopyBufferIntoFrame(const AudioBuffer& buffer,
|
|||
EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
|
||||
EchoCanceller3Config adjusted_cfg = config;
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3StereoContentDetectionKillSwitch")) {
|
||||
adjusted_cfg.multi_channel.detect_stereo_content = false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3AntiHowlingMinimizationKillSwitch")) {
|
||||
adjusted_cfg.suppressor.high_bands_suppression
|
||||
.anti_howling_activation_threshold = 25.f;
|
||||
|
@ -667,68 +682,71 @@ void EchoCanceller3::RenderWriter::Insert(const AudioBuffer& input) {
|
|||
|
||||
int EchoCanceller3::instance_count_ = 0;
|
||||
|
||||
EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels)
|
||||
EchoCanceller3::EchoCanceller3(
|
||||
const EchoCanceller3Config& config,
|
||||
const absl::optional<EchoCanceller3Config>& multichannel_config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
config_(AdjustConfig(config)),
|
||||
sample_rate_hz_(sample_rate_hz),
|
||||
num_bands_(NumBandsForRate(sample_rate_hz_)),
|
||||
num_render_channels_(num_render_channels),
|
||||
num_render_input_channels_(num_render_channels),
|
||||
num_capture_channels_(num_capture_channels),
|
||||
config_selector_(AdjustConfig(config),
|
||||
multichannel_config,
|
||||
num_render_input_channels_),
|
||||
multichannel_content_detector_(
|
||||
config_selector_.active_config().multi_channel.detect_stereo_content,
|
||||
num_render_input_channels_,
|
||||
config_selector_.active_config()
|
||||
.multi_channel.stereo_detection_threshold),
|
||||
output_framer_(num_bands_, num_capture_channels_),
|
||||
capture_blocker_(num_bands_, num_capture_channels_),
|
||||
render_blocker_(num_bands_, num_render_channels_),
|
||||
render_transfer_queue_(
|
||||
kRenderTransferQueueSizeFrames,
|
||||
std::vector<std::vector<std::vector<float>>>(
|
||||
num_bands_,
|
||||
std::vector<std::vector<float>>(
|
||||
num_render_channels_,
|
||||
num_render_input_channels_,
|
||||
std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
|
||||
Aec3RenderQueueItemVerifier(num_bands_,
|
||||
num_render_channels_,
|
||||
num_render_input_channels_,
|
||||
AudioBuffer::kSplitBandSize)),
|
||||
render_queue_output_frame_(
|
||||
num_bands_,
|
||||
std::vector<std::vector<float>>(
|
||||
num_render_channels_,
|
||||
num_render_input_channels_,
|
||||
std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
|
||||
render_block_(
|
||||
num_bands_,
|
||||
std::vector<std::vector<float>>(num_render_channels_,
|
||||
std::vector<std::vector<float>>(num_render_input_channels_,
|
||||
std::vector<float>(kBlockSize, 0.f))),
|
||||
capture_block_(
|
||||
num_bands_,
|
||||
std::vector<std::vector<float>>(num_capture_channels_,
|
||||
std::vector<float>(kBlockSize, 0.f))),
|
||||
render_sub_frame_view_(
|
||||
num_bands_,
|
||||
std::vector<rtc::ArrayView<float>>(num_render_channels_)),
|
||||
capture_sub_frame_view_(
|
||||
num_bands_,
|
||||
std::vector<rtc::ArrayView<float>>(num_capture_channels_)) {
|
||||
RTC_DCHECK(ValidFullBandRate(sample_rate_hz_));
|
||||
|
||||
block_processor_.reset(BlockProcessor::Create(
|
||||
config_, sample_rate_hz_, num_render_channels_, num_capture_channels_));
|
||||
|
||||
if (config_.delay.fixed_capture_delay_samples > 0) {
|
||||
if (config_selector_.active_config().delay.fixed_capture_delay_samples > 0) {
|
||||
block_delay_buffer_.reset(new BlockDelayBuffer(
|
||||
num_capture_channels_, num_bands_, AudioBuffer::kSplitBandSize,
|
||||
config_.delay.fixed_capture_delay_samples));
|
||||
}
|
||||
|
||||
render_writer_.reset(new RenderWriter(data_dumper_.get(), config_,
|
||||
&render_transfer_queue_, num_bands_,
|
||||
num_render_channels_));
|
||||
render_writer_.reset(new RenderWriter(
|
||||
data_dumper_.get(), config_selector_.active_config(),
|
||||
&render_transfer_queue_, num_bands_, num_render_input_channels_));
|
||||
|
||||
RTC_DCHECK_EQ(num_bands_, std::max(sample_rate_hz_, 16000) / 16000);
|
||||
RTC_DCHECK_GE(kMaxNumBands, num_bands_);
|
||||
|
||||
if (config_.filter.export_linear_aec_output) {
|
||||
if (config_selector_.active_config().filter.export_linear_aec_output) {
|
||||
linear_output_framer_.reset(new BlockFramer(1, num_capture_channels_));
|
||||
linear_output_block_ =
|
||||
std::make_unique<std::vector<std::vector<std::vector<float>>>>(
|
||||
|
@ -739,17 +757,49 @@ EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
|
|||
1, std::vector<rtc::ArrayView<float>>(num_capture_channels_));
|
||||
}
|
||||
|
||||
Initialize();
|
||||
|
||||
RTC_LOG(LS_INFO) << "AEC3 created with sample rate: " << sample_rate_hz_
|
||||
<< " Hz, num render channels: " << num_render_channels_
|
||||
<< " Hz, num render channels: " << num_render_input_channels_
|
||||
<< ", num capture channels: " << num_capture_channels_;
|
||||
}
|
||||
|
||||
EchoCanceller3::~EchoCanceller3() = default;
|
||||
|
||||
void EchoCanceller3::Initialize() {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
|
||||
num_render_channels_to_aec_ =
|
||||
multichannel_content_detector_.IsMultiChannelContentDetected()
|
||||
? num_render_input_channels_
|
||||
: 1;
|
||||
|
||||
config_selector_.Update(
|
||||
multichannel_content_detector_.IsMultiChannelContentDetected());
|
||||
|
||||
for (std::vector<std::vector<float>>& block_band : render_block_) {
|
||||
block_band.resize(num_render_channels_to_aec_);
|
||||
for (std::vector<float>& block_channel : block_band) {
|
||||
block_channel.resize(kBlockSize, 0.0f);
|
||||
}
|
||||
}
|
||||
|
||||
render_blocker_.reset(
|
||||
new FrameBlocker(num_bands_, num_render_channels_to_aec_));
|
||||
|
||||
block_processor_.reset(BlockProcessor::Create(
|
||||
config_selector_.active_config(), sample_rate_hz_,
|
||||
num_render_channels_to_aec_, num_capture_channels_));
|
||||
|
||||
render_sub_frame_view_ = std::vector<std::vector<rtc::ArrayView<float>>>(
|
||||
num_bands_,
|
||||
std::vector<rtc::ArrayView<float>>(num_render_channels_to_aec_));
|
||||
}
|
||||
|
||||
void EchoCanceller3::AnalyzeRender(const AudioBuffer& render) {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&render_race_checker_);
|
||||
|
||||
RTC_DCHECK_EQ(render.num_channels(), num_render_channels_);
|
||||
RTC_DCHECK_EQ(render.num_channels(), num_render_input_channels_);
|
||||
data_dumper_->DumpRaw("aec3_call_order",
|
||||
static_cast<int>(EchoCanceller3ApiCall::kRender));
|
||||
|
||||
|
@ -797,7 +847,7 @@ void EchoCanceller3::ProcessCapture(AudioBuffer* capture,
|
|||
api_call_metrics_.ReportCaptureCall();
|
||||
|
||||
// Optionally delay the capture signal.
|
||||
if (config_.delay.fixed_capture_delay_samples > 0) {
|
||||
if (config_selector_.active_config().delay.fixed_capture_delay_samples > 0) {
|
||||
RTC_DCHECK(block_delay_buffer_);
|
||||
block_delay_buffer_->DelaySignal(capture);
|
||||
}
|
||||
|
@ -853,22 +903,18 @@ bool EchoCanceller3::ActiveProcessing() const {
|
|||
return true;
|
||||
}
|
||||
|
||||
EchoCanceller3Config EchoCanceller3::CreateDefaultConfig(
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels) {
|
||||
EchoCanceller3Config EchoCanceller3::CreateDefaultMultichannelConfig() {
|
||||
EchoCanceller3Config cfg;
|
||||
if (num_render_channels > 1) {
|
||||
// Use shorter and more rapidly adapting coarse filter to compensate for
|
||||
// thge increased number of total filter parameters to adapt.
|
||||
cfg.filter.coarse.length_blocks = 11;
|
||||
cfg.filter.coarse.rate = 0.95f;
|
||||
cfg.filter.coarse_initial.length_blocks = 11;
|
||||
cfg.filter.coarse_initial.rate = 0.95f;
|
||||
// Use shorter and more rapidly adapting coarse filter to compensate for
|
||||
// thge increased number of total filter parameters to adapt.
|
||||
cfg.filter.coarse.length_blocks = 11;
|
||||
cfg.filter.coarse.rate = 0.95f;
|
||||
cfg.filter.coarse_initial.length_blocks = 11;
|
||||
cfg.filter.coarse_initial.rate = 0.95f;
|
||||
|
||||
// Use more concervative suppressor behavior for non-nearend speech.
|
||||
cfg.suppressor.normal_tuning.max_dec_factor_lf = 0.35f;
|
||||
cfg.suppressor.normal_tuning.max_inc_factor = 1.5f;
|
||||
}
|
||||
// Use more concervative suppressor behavior for non-nearend speech.
|
||||
cfg.suppressor.normal_tuning.max_dec_factor_lf = 0.35f;
|
||||
cfg.suppressor.normal_tuning.max_inc_factor = 1.5f;
|
||||
return cfg;
|
||||
}
|
||||
|
||||
|
@ -887,16 +933,23 @@ void EchoCanceller3::EmptyRenderQueue() {
|
|||
// Report render call in the metrics.
|
||||
api_call_metrics_.ReportRenderCall();
|
||||
|
||||
BufferRenderFrameContent(&render_queue_output_frame_, 0, &render_blocker_,
|
||||
block_processor_.get(), &render_block_,
|
||||
&render_sub_frame_view_);
|
||||
if (multichannel_content_detector_.UpdateDetection(
|
||||
render_queue_output_frame_)) {
|
||||
// Reinitialize the AEC when proper stereo is detected.
|
||||
Initialize();
|
||||
}
|
||||
|
||||
BufferRenderFrameContent(&render_queue_output_frame_, 1, &render_blocker_,
|
||||
block_processor_.get(), &render_block_,
|
||||
&render_sub_frame_view_);
|
||||
// Buffer frame content.
|
||||
BufferRenderFrameContent(&render_queue_output_frame_, 0,
|
||||
render_blocker_.get(), block_processor_.get(),
|
||||
&render_block_, &render_sub_frame_view_);
|
||||
|
||||
BufferRemainingRenderFrameContent(&render_blocker_, block_processor_.get(),
|
||||
&render_block_);
|
||||
BufferRenderFrameContent(&render_queue_output_frame_, 1,
|
||||
render_blocker_.get(), block_processor_.get(),
|
||||
&render_block_, &render_sub_frame_view_);
|
||||
|
||||
BufferRemainingRenderFrameContent(render_blocker_.get(),
|
||||
block_processor_.get(), &render_block_);
|
||||
|
||||
frame_to_buffer =
|
||||
render_transfer_queue_.Remove(&render_queue_output_frame_);
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "api/audio/echo_control.h"
|
||||
|
@ -23,7 +24,9 @@
|
|||
#include "modules/audio_processing/aec3/block_delay_buffer.h"
|
||||
#include "modules/audio_processing/aec3/block_framer.h"
|
||||
#include "modules/audio_processing/aec3/block_processor.h"
|
||||
#include "modules/audio_processing/aec3/config_selector.h"
|
||||
#include "modules/audio_processing/aec3/frame_blocker.h"
|
||||
#include "modules/audio_processing/aec3/multi_channel_content_detector.h"
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
@ -84,10 +87,12 @@ class Aec3RenderQueueItemVerifier {
|
|||
// AnalyzeRender call which can be called concurrently with the other methods.
|
||||
class EchoCanceller3 : public EchoControl {
|
||||
public:
|
||||
EchoCanceller3(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels);
|
||||
EchoCanceller3(
|
||||
const EchoCanceller3Config& config,
|
||||
const absl::optional<EchoCanceller3Config>& multichannel_config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels);
|
||||
|
||||
~EchoCanceller3() override;
|
||||
|
||||
|
@ -130,20 +135,37 @@ class EchoCanceller3 : public EchoControl {
|
|||
block_processor_->UpdateEchoLeakageStatus(leakage_detected);
|
||||
}
|
||||
|
||||
// Produces a default configuration that is suitable for a certain combination
|
||||
// of render and capture channels.
|
||||
static EchoCanceller3Config CreateDefaultConfig(size_t num_render_channels,
|
||||
size_t num_capture_channels);
|
||||
// Produces a default configuration for multichannel.
|
||||
static EchoCanceller3Config CreateDefaultMultichannelConfig();
|
||||
|
||||
private:
|
||||
class RenderWriter;
|
||||
friend class EchoCanceller3Tester;
|
||||
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3Metrics, EchoReturnLossEnhancement);
|
||||
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3, DetectionOfProperStereo);
|
||||
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
|
||||
DetectionOfProperStereoUsingThreshold);
|
||||
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
|
||||
StereoContentDetectionForMonoSignals);
|
||||
|
||||
// Replaces the internal block processor with a custom one for testing.
|
||||
class RenderWriter;
|
||||
|
||||
// (Re-)Initializes the selected subset of the EchoCanceller3 fields, at
|
||||
// creation as well as during reconfiguration.
|
||||
void Initialize();
|
||||
|
||||
// Only for testing. Replaces the internal block processor.
|
||||
void SetBlockProcessorForTesting(
|
||||
std::unique_ptr<BlockProcessor> block_processor);
|
||||
|
||||
// Only for testing. Returns whether stereo processing is active.
|
||||
bool StereoRenderProcessingActiveForTesting() const {
|
||||
return multichannel_content_detector_.IsMultiChannelContentDetected();
|
||||
}
|
||||
|
||||
// Only for testing.
|
||||
const EchoCanceller3Config& GetActiveConfigForTesting() const {
|
||||
return config_selector_.active_config();
|
||||
}
|
||||
|
||||
// Empties the render SwapQueue.
|
||||
void EmptyRenderQueue();
|
||||
|
||||
|
@ -166,13 +188,17 @@ class EchoCanceller3 : public EchoControl {
|
|||
const EchoCanceller3Config config_;
|
||||
const int sample_rate_hz_;
|
||||
const int num_bands_;
|
||||
const size_t num_render_channels_;
|
||||
const size_t num_render_input_channels_;
|
||||
size_t num_render_channels_to_aec_;
|
||||
const size_t num_capture_channels_;
|
||||
ConfigSelector config_selector_;
|
||||
MultiChannelContentDetector multichannel_content_detector_;
|
||||
std::unique_ptr<BlockFramer> linear_output_framer_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
BlockFramer output_framer_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
FrameBlocker capture_blocker_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
FrameBlocker render_blocker_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::unique_ptr<FrameBlocker> render_blocker_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
SwapQueue<std::vector<std::vector<std::vector<float>>>,
|
||||
Aec3RenderQueueItemVerifier>
|
||||
render_transfer_queue_;
|
||||
|
|
|
@ -190,6 +190,32 @@ std::string ProduceDebugText(int sample_rate_hz, int variant) {
|
|||
return ss.Release();
|
||||
}
|
||||
|
||||
void RunAecInStereo(AudioBuffer& buffer,
|
||||
EchoCanceller3& aec3,
|
||||
float channel_0_value,
|
||||
float channel_1_value) {
|
||||
rtc::ArrayView<float> data_channel_0(&buffer.channels()[0][0],
|
||||
buffer.num_frames());
|
||||
std::fill(data_channel_0.begin(), data_channel_0.end(), channel_0_value);
|
||||
rtc::ArrayView<float> data_channel_1(&buffer.channels()[1][0],
|
||||
buffer.num_frames());
|
||||
std::fill(data_channel_1.begin(), data_channel_1.end(), channel_1_value);
|
||||
aec3.AnalyzeRender(&buffer);
|
||||
aec3.AnalyzeCapture(&buffer);
|
||||
aec3.ProcessCapture(&buffer, /*level_change=*/false);
|
||||
}
|
||||
|
||||
void RunAecInSMono(AudioBuffer& buffer,
|
||||
EchoCanceller3& aec3,
|
||||
float channel_0_value) {
|
||||
rtc::ArrayView<float> data_channel_0(&buffer.channels()[0][0],
|
||||
buffer.num_frames());
|
||||
std::fill(data_channel_0.begin(), data_channel_0.end(), channel_0_value);
|
||||
aec3.AnalyzeRender(&buffer);
|
||||
aec3.AnalyzeCapture(&buffer);
|
||||
aec3.ProcessCapture(&buffer, /*level_change=*/false);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
class EchoCanceller3Tester {
|
||||
|
@ -220,7 +246,9 @@ class EchoCanceller3Tester {
|
|||
// and that the processor data is properly passed to the EchoCanceller3
|
||||
// output.
|
||||
void RunCaptureTransportVerificationTest() {
|
||||
EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, 1, 1);
|
||||
EchoCanceller3 aec3(EchoCanceller3Config(),
|
||||
/*multichannel_config=*/absl::nullopt, sample_rate_hz_,
|
||||
1, 1);
|
||||
aec3.SetBlockProcessorForTesting(
|
||||
std::make_unique<CaptureTransportVerificationProcessor>(num_bands_));
|
||||
|
||||
|
@ -244,7 +272,9 @@ class EchoCanceller3Tester {
|
|||
// Test method for testing that the render data is properly received by the
|
||||
// block processor.
|
||||
void RunRenderTransportVerificationTest() {
|
||||
EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, 1, 1);
|
||||
EchoCanceller3 aec3(EchoCanceller3Config(),
|
||||
/*multichannel_config=*/absl::nullopt, sample_rate_hz_,
|
||||
1, 1);
|
||||
aec3.SetBlockProcessorForTesting(
|
||||
std::make_unique<RenderTransportVerificationProcessor>(num_bands_));
|
||||
|
||||
|
@ -313,7 +343,9 @@ class EchoCanceller3Tester {
|
|||
break;
|
||||
}
|
||||
|
||||
EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, 1, 1);
|
||||
EchoCanceller3 aec3(EchoCanceller3Config(),
|
||||
/*multichannel_config=*/absl::nullopt, sample_rate_hz_,
|
||||
1, 1);
|
||||
aec3.SetBlockProcessorForTesting(std::move(block_processor_mock));
|
||||
|
||||
for (size_t frame_index = 0; frame_index < kNumFramesToProcess;
|
||||
|
@ -393,7 +425,9 @@ class EchoCanceller3Tester {
|
|||
} break;
|
||||
}
|
||||
|
||||
EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, 1, 1);
|
||||
EchoCanceller3 aec3(EchoCanceller3Config(),
|
||||
/*multichannel_config=*/absl::nullopt, sample_rate_hz_,
|
||||
1, 1);
|
||||
aec3.SetBlockProcessorForTesting(std::move(block_processor_mock));
|
||||
|
||||
for (size_t frame_index = 0; frame_index < kNumFramesToProcess;
|
||||
|
@ -479,7 +513,9 @@ class EchoCanceller3Tester {
|
|||
} break;
|
||||
}
|
||||
|
||||
EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, 1, 1);
|
||||
EchoCanceller3 aec3(EchoCanceller3Config(),
|
||||
/*multichannel_config=*/absl::nullopt, sample_rate_hz_,
|
||||
1, 1);
|
||||
aec3.SetBlockProcessorForTesting(std::move(block_processor_mock));
|
||||
for (size_t frame_index = 0; frame_index < kNumFramesToProcess;
|
||||
++frame_index) {
|
||||
|
@ -518,7 +554,8 @@ class EchoCanceller3Tester {
|
|||
// capture and render API calls.
|
||||
void RunRenderSwapQueueVerificationTest() {
|
||||
const EchoCanceller3Config config;
|
||||
EchoCanceller3 aec3(config, sample_rate_hz_, 1, 1);
|
||||
EchoCanceller3 aec3(config, /*multichannel_config=*/absl::nullopt,
|
||||
sample_rate_hz_, 1, 1);
|
||||
aec3.SetBlockProcessorForTesting(
|
||||
std::make_unique<RenderTransportVerificationProcessor>(num_bands_));
|
||||
|
||||
|
@ -566,7 +603,9 @@ class EchoCanceller3Tester {
|
|||
// This test verifies that a buffer overrun in the render swapqueue is
|
||||
// properly reported.
|
||||
void RunRenderPipelineSwapQueueOverrunReturnValueTest() {
|
||||
EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, 1, 1);
|
||||
EchoCanceller3 aec3(EchoCanceller3Config(),
|
||||
/*multichannel_config=*/absl::nullopt, sample_rate_hz_,
|
||||
1, 1);
|
||||
|
||||
constexpr size_t kRenderTransferQueueSize = 30;
|
||||
for (size_t k = 0; k < 2; ++k) {
|
||||
|
@ -591,7 +630,9 @@ class EchoCanceller3Tester {
|
|||
// Set aec3_sample_rate_hz to be different from sample_rate_hz_ in such a
|
||||
// way that the number of bands for the rates are different.
|
||||
const int aec3_sample_rate_hz = sample_rate_hz_ == 48000 ? 32000 : 48000;
|
||||
EchoCanceller3 aec3(EchoCanceller3Config(), aec3_sample_rate_hz, 1, 1);
|
||||
EchoCanceller3 aec3(EchoCanceller3Config(),
|
||||
/*multichannel_config=*/absl::nullopt,
|
||||
aec3_sample_rate_hz, 1, 1);
|
||||
PopulateInputFrame(frame_length_, 0, &render_buffer_.channels_f()[0][0], 0);
|
||||
|
||||
EXPECT_DEATH(aec3.AnalyzeRender(&render_buffer_), "");
|
||||
|
@ -604,7 +645,9 @@ class EchoCanceller3Tester {
|
|||
// Set aec3_sample_rate_hz to be different from sample_rate_hz_ in such a
|
||||
// way that the number of bands for the rates are different.
|
||||
const int aec3_sample_rate_hz = sample_rate_hz_ == 48000 ? 32000 : 48000;
|
||||
EchoCanceller3 aec3(EchoCanceller3Config(), aec3_sample_rate_hz, 1, 1);
|
||||
EchoCanceller3 aec3(EchoCanceller3Config(),
|
||||
/*multichannel_config=*/absl::nullopt,
|
||||
aec3_sample_rate_hz, 1, 1);
|
||||
PopulateInputFrame(frame_length_, num_bands_, 0,
|
||||
&capture_buffer_.split_bands_f(0)[0], 100);
|
||||
EXPECT_DEATH(aec3.ProcessCapture(&capture_buffer_, false), "");
|
||||
|
@ -887,6 +930,147 @@ TEST(EchoCanceller3FieldTrials, Aec3UseNearendReverb) {
|
|||
EXPECT_FLOAT_EQ(adjusted_config.ep_strength.nearend_len, 0.8);
|
||||
}
|
||||
|
||||
TEST(EchoCanceller3, DetectionOfProperStereo) {
|
||||
constexpr int kSampleRateHz = 16000;
|
||||
constexpr int kNumChannels = 2;
|
||||
AudioBuffer buffer(/*input_rate=*/kSampleRateHz,
|
||||
/*input_num_channels=*/kNumChannels,
|
||||
/*input_rate=*/kSampleRateHz,
|
||||
/*buffer_num_channels=*/kNumChannels,
|
||||
/*output_rate=*/kSampleRateHz,
|
||||
/*output_num_channels=*/kNumChannels);
|
||||
|
||||
constexpr size_t kNumBlocksForMonoConfig = 1;
|
||||
constexpr size_t kNumBlocksForSurroundConfig = 2;
|
||||
EchoCanceller3Config mono_config;
|
||||
absl::optional<EchoCanceller3Config> multichannel_config;
|
||||
|
||||
mono_config.multi_channel.detect_stereo_content = true;
|
||||
mono_config.multi_channel.stereo_detection_threshold = 0.0f;
|
||||
multichannel_config = mono_config;
|
||||
mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig;
|
||||
multichannel_config->filter.coarse_initial.length_blocks =
|
||||
kNumBlocksForSurroundConfig;
|
||||
|
||||
EchoCanceller3 aec3(mono_config, multichannel_config,
|
||||
/*sample_rate_hz=*/kSampleRateHz,
|
||||
/*num_render_channels=*/kNumChannels,
|
||||
/*num_capture_input_channels=*/kNumChannels);
|
||||
|
||||
EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
|
||||
EXPECT_EQ(
|
||||
aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
|
||||
kNumBlocksForMonoConfig);
|
||||
|
||||
RunAecInStereo(buffer, aec3, 100.0f, 100.0f);
|
||||
EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
|
||||
EXPECT_EQ(
|
||||
aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
|
||||
kNumBlocksForMonoConfig);
|
||||
|
||||
RunAecInStereo(buffer, aec3, 100.0f, 101.0f);
|
||||
EXPECT_TRUE(aec3.StereoRenderProcessingActiveForTesting());
|
||||
EXPECT_EQ(
|
||||
aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
|
||||
kNumBlocksForSurroundConfig);
|
||||
}
|
||||
|
||||
TEST(EchoCanceller3, DetectionOfProperStereoUsingThreshold) {
|
||||
constexpr int kSampleRateHz = 16000;
|
||||
constexpr int kNumChannels = 2;
|
||||
AudioBuffer buffer(/*input_rate=*/kSampleRateHz,
|
||||
/*input_num_channels=*/kNumChannels,
|
||||
/*input_rate=*/kSampleRateHz,
|
||||
/*buffer_num_channels=*/kNumChannels,
|
||||
/*output_rate=*/kSampleRateHz,
|
||||
/*output_num_channels=*/kNumChannels);
|
||||
|
||||
constexpr size_t kNumBlocksForMonoConfig = 1;
|
||||
constexpr size_t kNumBlocksForSurroundConfig = 2;
|
||||
EchoCanceller3Config mono_config;
|
||||
absl::optional<EchoCanceller3Config> multichannel_config;
|
||||
|
||||
constexpr float kStereoDetectionThreshold = 2.0f;
|
||||
mono_config.multi_channel.detect_stereo_content = true;
|
||||
mono_config.multi_channel.stereo_detection_threshold =
|
||||
kStereoDetectionThreshold;
|
||||
multichannel_config = mono_config;
|
||||
mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig;
|
||||
multichannel_config->filter.coarse_initial.length_blocks =
|
||||
kNumBlocksForSurroundConfig;
|
||||
|
||||
EchoCanceller3 aec3(mono_config, multichannel_config,
|
||||
/*sample_rate_hz=*/kSampleRateHz,
|
||||
/*num_render_channels=*/kNumChannels,
|
||||
/*num_capture_input_channels=*/kNumChannels);
|
||||
|
||||
EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
|
||||
EXPECT_EQ(
|
||||
aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
|
||||
kNumBlocksForMonoConfig);
|
||||
|
||||
RunAecInStereo(buffer, aec3, 100.0f,
|
||||
100.0f + kStereoDetectionThreshold - 1.0f);
|
||||
EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
|
||||
EXPECT_EQ(
|
||||
aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
|
||||
kNumBlocksForMonoConfig);
|
||||
|
||||
RunAecInStereo(buffer, aec3, 100.0f,
|
||||
100.0f + kStereoDetectionThreshold + 10.0f);
|
||||
EXPECT_TRUE(aec3.StereoRenderProcessingActiveForTesting());
|
||||
EXPECT_EQ(
|
||||
aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
|
||||
kNumBlocksForSurroundConfig);
|
||||
}
|
||||
|
||||
TEST(EchoCanceller3, StereoContentDetectionForMonoSignals) {
|
||||
constexpr int kSampleRateHz = 16000;
|
||||
constexpr int kNumChannels = 2;
|
||||
AudioBuffer buffer(/*input_rate=*/kSampleRateHz,
|
||||
/*input_num_channels=*/kNumChannels,
|
||||
/*input_rate=*/kSampleRateHz,
|
||||
/*buffer_num_channels=*/kNumChannels,
|
||||
/*output_rate=*/kSampleRateHz,
|
||||
/*output_num_channels=*/kNumChannels);
|
||||
|
||||
constexpr size_t kNumBlocksForMonoConfig = 1;
|
||||
constexpr size_t kNumBlocksForSurroundConfig = 2;
|
||||
EchoCanceller3Config mono_config;
|
||||
absl::optional<EchoCanceller3Config> multichannel_config;
|
||||
|
||||
for (bool detect_stereo_content : {false, true}) {
|
||||
mono_config.multi_channel.detect_stereo_content = detect_stereo_content;
|
||||
multichannel_config = mono_config;
|
||||
mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig;
|
||||
multichannel_config->filter.coarse_initial.length_blocks =
|
||||
kNumBlocksForSurroundConfig;
|
||||
|
||||
AudioBuffer mono_buffer(/*input_rate=*/kSampleRateHz,
|
||||
/*input_num_channels=*/1,
|
||||
/*input_rate=*/kSampleRateHz,
|
||||
/*buffer_num_channels=*/1,
|
||||
/*output_rate=*/kSampleRateHz,
|
||||
/*output_num_channels=*/1);
|
||||
|
||||
EchoCanceller3 aec3(mono_config, multichannel_config,
|
||||
/*sample_rate_hz=*/kSampleRateHz,
|
||||
/*num_render_channels=*/1,
|
||||
/*num_capture_input_channels=*/1);
|
||||
|
||||
EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
|
||||
EXPECT_EQ(
|
||||
aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
|
||||
kNumBlocksForMonoConfig);
|
||||
|
||||
RunAecInSMono(mono_buffer, aec3, 100.0f);
|
||||
EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting());
|
||||
EXPECT_EQ(
|
||||
aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks,
|
||||
kNumBlocksForMonoConfig);
|
||||
}
|
||||
}
|
||||
|
||||
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
|
||||
|
||||
TEST(EchoCanceller3InputCheckDeathTest, WrongCaptureNumBandsCheckVerification) {
|
||||
|
@ -899,9 +1083,11 @@ TEST(EchoCanceller3InputCheckDeathTest, WrongCaptureNumBandsCheckVerification) {
|
|||
// Verifiers that the verification for null input to the capture processing api
|
||||
// call works.
|
||||
TEST(EchoCanceller3InputCheckDeathTest, NullCaptureProcessingParameter) {
|
||||
EXPECT_DEATH(EchoCanceller3(EchoCanceller3Config(), 16000, 1, 1)
|
||||
.ProcessCapture(nullptr, false),
|
||||
"");
|
||||
EXPECT_DEATH(
|
||||
EchoCanceller3(EchoCanceller3Config(),
|
||||
/*multichannel_config_=*/absl::nullopt, 16000, 1, 1)
|
||||
.ProcessCapture(nullptr, false),
|
||||
"");
|
||||
}
|
||||
|
||||
// Verifies the check for correct sample rate.
|
||||
|
@ -909,7 +1095,10 @@ TEST(EchoCanceller3InputCheckDeathTest, NullCaptureProcessingParameter) {
|
|||
// tests on test bots has been fixed.
|
||||
TEST(EchoCanceller3InputCheckDeathTest, DISABLED_WrongSampleRate) {
|
||||
ApmDataDumper data_dumper(0);
|
||||
EXPECT_DEATH(EchoCanceller3(EchoCanceller3Config(), 8001, 1, 1), "");
|
||||
EXPECT_DEATH(
|
||||
EchoCanceller3(EchoCanceller3Config(),
|
||||
/*multichannel_config_=*/absl::nullopt, 8001, 1, 1),
|
||||
"");
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
|
||||
/*
|
||||
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/multi_channel_content_detector.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
// Compares the left and right channels in the render `frame` to determine
|
||||
// whether the signal is a proper stereo signal. To allow for differences
|
||||
// introduced by hardware drivers, a threshold `detection_threshold` is used for
|
||||
// the detection.
|
||||
bool IsProperStereo(const std::vector<std::vector<std::vector<float>>>& frame,
|
||||
float detection_threshold) {
|
||||
if (frame[0].size() < 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t band = 0; band < frame.size(); ++band) {
|
||||
for (size_t k = 0; k < frame[band][0].size(); ++k) {
|
||||
if (std::fabs(frame[band][0][k] - frame[band][1][k]) >
|
||||
detection_threshold) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
MultiChannelContentDetector::MultiChannelContentDetector(
|
||||
bool detect_stereo_content,
|
||||
int num_render_input_channels,
|
||||
float detection_threshold)
|
||||
: detect_stereo_content_(detect_stereo_content),
|
||||
detection_threshold_(detection_threshold),
|
||||
proper_multichannel_content_detected_(!detect_stereo_content &&
|
||||
num_render_input_channels > 1) {}
|
||||
|
||||
bool MultiChannelContentDetector::UpdateDetection(
|
||||
const std::vector<std::vector<std::vector<float>>>& frame) {
|
||||
bool previous_proper_multichannel_content_detected_ =
|
||||
proper_multichannel_content_detected_;
|
||||
if (detect_stereo_content_ && !proper_multichannel_content_detected_) {
|
||||
proper_multichannel_content_detected_ =
|
||||
IsProperStereo(frame, detection_threshold_);
|
||||
}
|
||||
return previous_proper_multichannel_content_detected_ !=
|
||||
proper_multichannel_content_detected_;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_MULTI_CHANNEL_CONTENT_DETECTOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_MULTI_CHANNEL_CONTENT_DETECTOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Analyzes audio content to determine whether the contained audio is proper
|
||||
// multichannel, or only upmixed mono. To allow for differences introduced by
|
||||
// hardware drivers, a threshold `detection_threshold` is used for the
|
||||
// detection.
|
||||
class MultiChannelContentDetector {
|
||||
public:
|
||||
MultiChannelContentDetector(bool detect_stereo_content,
|
||||
int num_render_input_channels,
|
||||
float detection_threshold);
|
||||
|
||||
// Compares the left and right channels in the render `frame` to determine
|
||||
// whether the signal is a proper multichannel signal. Returns a bool
|
||||
// indicating whether a change in the multichannel was detected.
|
||||
bool UpdateDetection(
|
||||
const std::vector<std::vector<std::vector<float>>>& frame);
|
||||
|
||||
bool IsMultiChannelContentDetected() const {
|
||||
return proper_multichannel_content_detected_;
|
||||
}
|
||||
|
||||
private:
|
||||
const bool detect_stereo_content_;
|
||||
const float detection_threshold_;
|
||||
bool proper_multichannel_content_detected_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_MULTI_CHANNEL_CONTENT_DETECTOR_H_
|
|
@ -0,0 +1,138 @@
|
|||
/*
|
||||
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/multi_channel_content_detector.h"
|
||||
|
||||
#include "test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(MultiChannelContentDetector, HandlingOfMono) {
|
||||
MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/1,
|
||||
/*detection_threshold=*/0.0f);
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
}
|
||||
|
||||
TEST(MultiChannelContentDetector, HandlingOfMonoAndDetectionOff) {
|
||||
MultiChannelContentDetector mc(/*detect_stereo_content=*/false,
|
||||
/*num_render_input_channels=*/1,
|
||||
/*detection_threshold=*/0.0f);
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
}
|
||||
|
||||
TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
|
||||
MultiChannelContentDetector mc(/*detect_stereo_content=*/false,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f);
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
|
||||
std::vector<std::vector<std::vector<float>>> frame(
|
||||
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
|
||||
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
|
||||
std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
|
||||
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
}
|
||||
|
||||
TEST(MultiChannelContentDetector, InitialDetectionOfStereo) {
|
||||
MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f);
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
}
|
||||
|
||||
TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) {
|
||||
MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f);
|
||||
std::vector<std::vector<std::vector<float>>> frame(
|
||||
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
|
||||
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
|
||||
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f);
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
}
|
||||
|
||||
TEST(MultiChannelContentDetector, DetectionWhenStereo) {
|
||||
MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f);
|
||||
std::vector<std::vector<std::vector<float>>> frame(
|
||||
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
|
||||
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
|
||||
std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
|
||||
EXPECT_TRUE(mc.UpdateDetection(frame));
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
}
|
||||
|
||||
TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) {
|
||||
MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/0.0f);
|
||||
std::vector<std::vector<std::vector<float>>> frame(
|
||||
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
|
||||
|
||||
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
|
||||
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f);
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
|
||||
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
|
||||
std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
|
||||
|
||||
EXPECT_TRUE(mc.UpdateDetection(frame));
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
}
|
||||
|
||||
TEST(MultiChannelContentDetector, DetectionWithStereoBelowThreshold) {
|
||||
constexpr float kThreshold = 1.0f;
|
||||
MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/kThreshold);
|
||||
std::vector<std::vector<std::vector<float>>> frame(
|
||||
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
|
||||
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
|
||||
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold);
|
||||
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
EXPECT_FALSE(mc.IsMultiChannelContentDetected());
|
||||
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
}
|
||||
|
||||
TEST(MultiChannelContentDetector, DetectionWithStereoAboveThreshold) {
|
||||
constexpr float kThreshold = 1.0f;
|
||||
MultiChannelContentDetector mc(/*detect_stereo_content=*/true,
|
||||
/*num_render_input_channels=*/2,
|
||||
/*detection_threshold=*/kThreshold);
|
||||
std::vector<std::vector<std::vector<float>>> frame(
|
||||
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
|
||||
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
|
||||
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold + 0.1f);
|
||||
|
||||
EXPECT_TRUE(mc.UpdateDetection(frame));
|
||||
EXPECT_TRUE(mc.IsMultiChannelContentDetected());
|
||||
|
||||
EXPECT_FALSE(mc.UpdateDetection(frame));
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
|
@ -1726,14 +1726,14 @@ void AudioProcessingImpl::InitializeEchoController() {
|
|||
proc_sample_rate_hz(), num_reverse_channels(), num_proc_channels());
|
||||
RTC_DCHECK(submodules_.echo_controller);
|
||||
} else {
|
||||
EchoCanceller3Config config =
|
||||
use_setup_specific_default_aec3_config_
|
||||
? EchoCanceller3::CreateDefaultConfig(num_reverse_channels(),
|
||||
num_proc_channels())
|
||||
: EchoCanceller3Config();
|
||||
EchoCanceller3Config config;
|
||||
absl::optional<EchoCanceller3Config> multichannel_config;
|
||||
if (use_setup_specific_default_aec3_config_) {
|
||||
multichannel_config = EchoCanceller3::CreateDefaultMultichannelConfig();
|
||||
}
|
||||
submodules_.echo_controller = std::make_unique<EchoCanceller3>(
|
||||
config, proc_sample_rate_hz(), num_reverse_channels(),
|
||||
num_proc_channels());
|
||||
config, multichannel_config, proc_sample_rate_hz(),
|
||||
num_reverse_channels(), num_proc_channels());
|
||||
}
|
||||
|
||||
// Setup the storage for returning the linear AEC output.
|
||||
|
|
|
@ -560,6 +560,7 @@ webrtc_fuzzer_test("aec3_fuzzer") {
|
|||
"../../modules/audio_processing:audio_buffer",
|
||||
"../../modules/audio_processing/aec3",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
|
||||
}
|
||||
|
||||
webrtc_fuzzer_test("comfort_noise_decoder_fuzzer") {
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "modules/audio_processing/aec3/echo_canceller3.h"
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
#include "modules/audio_processing/include/audio_processing.h"
|
||||
|
@ -51,7 +52,8 @@ void FuzzOneInput(const uint8_t* data, size_t size) {
|
|||
const size_t num_capture_channels =
|
||||
1 + fuzz_data.ReadOrDefaultValue<uint8_t>(0) % (kMaxNumChannels - 1);
|
||||
|
||||
EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz,
|
||||
EchoCanceller3 aec3(EchoCanceller3Config(),
|
||||
/*multichannel_config=*/absl::nullopt, sample_rate_hz,
|
||||
num_render_channels, num_capture_channels);
|
||||
|
||||
AudioBuffer capture_audio(sample_rate_hz, num_capture_channels,
|
||||
|
|
Loading…
Reference in a new issue