diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h index 1fd403652a..efbc5b7845 100644 --- a/api/audio/echo_canceller3_config.h +++ b/api/audio/echo_canceller3_config.h @@ -236,6 +236,11 @@ struct RTC_EXPORT EchoCanceller3Config { float floor_first_increase = 0.00001f; bool conservative_hf_suppression = false; } suppressor; + + struct MultiChannel { + bool detect_stereo_content = true; + float stereo_detection_threshold = 0.0f; + } multi_channel; }; } // namespace webrtc diff --git a/api/audio/echo_canceller3_config_json.cc b/api/audio/echo_canceller3_config_json.cc index 71966c13b3..aa490b0d52 100644 --- a/api/audio/echo_canceller3_config_json.cc +++ b/api/audio/echo_canceller3_config_json.cc @@ -415,6 +415,13 @@ void Aec3ConfigFromJsonString(absl::string_view json_string, ReadParam(section, "conservative_hf_suppression", &cfg.suppressor.conservative_hf_suppression); } + + if (rtc::GetValueFromJsonObject(aec3_root, "multi_channel", §ion)) { + ReadParam(section, "detect_stereo_content", + &cfg.multi_channel.detect_stereo_content); + ReadParam(section, "stereo_detection_threshold", + &cfg.multi_channel.stereo_detection_threshold); + } } EchoCanceller3Config Aec3ConfigFromJsonString(absl::string_view json_string) { @@ -574,7 +581,8 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) { ost << "\"erle_onset_compensation_in_dominant_nearend\": " << (config.ep_strength.erle_onset_compensation_in_dominant_nearend ? "true" - : "false") << ","; + : "false") + << ","; ost << "\"use_conservative_tail_frequency_response\": " << (config.ep_strength.use_conservative_tail_frequency_response ? "true" @@ -736,7 +744,15 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) { << ","; ost << "\"conservative_hf_suppression\": " << config.suppressor.conservative_hf_suppression; + ost << "},"; + + ost << "\"multi_channel\": {"; + ost << "\"detect_stereo_content\": " + << (config.multi_channel.detect_stereo_content ? "true" : "false") << ","; + ost << "\"stereo_detection_threshold\": " + << config.multi_channel.stereo_detection_threshold; ost << "}"; + ost << "}"; ost << "}"; diff --git a/api/audio/echo_canceller3_factory.cc b/api/audio/echo_canceller3_factory.cc index d65a7262fa..284b117bea 100644 --- a/api/audio/echo_canceller3_factory.cc +++ b/api/audio/echo_canceller3_factory.cc @@ -25,7 +25,8 @@ std::unique_ptr EchoCanceller3Factory::Create( int num_render_channels, int num_capture_channels) { return std::make_unique( - config_, sample_rate_hz, num_render_channels, num_capture_channels); + config_, /*multichannel_config=*/absl::nullopt, sample_rate_hz, + num_render_channels, num_capture_channels); } } // namespace webrtc diff --git a/api/audio/test/echo_canceller3_config_json_unittest.cc b/api/audio/test/echo_canceller3_config_json_unittest.cc index bb28b4feb3..5610aeb8fd 100644 --- a/api/audio/test/echo_canceller3_config_json_unittest.cc +++ b/api/audio/test/echo_canceller3_config_json_unittest.cc @@ -31,6 +31,10 @@ TEST(EchoCanceller3JsonHelpers, ToStringAndParseJson) { cfg.suppressor.subband_nearend_detection.subband1 = {4, 5}; cfg.suppressor.subband_nearend_detection.nearend_threshold = 2.f; cfg.suppressor.subband_nearend_detection.snr_threshold = 100.f; + cfg.multi_channel.detect_stereo_content = + !cfg.multi_channel.detect_stereo_content; + cfg.multi_channel.stereo_detection_threshold = + cfg.multi_channel.stereo_detection_threshold + 1.0f; std::string json_string = Aec3ConfigToJsonString(cfg); EchoCanceller3Config cfg_transformed = Aec3ConfigFromJsonString(json_string); @@ -75,5 +79,9 @@ TEST(EchoCanceller3JsonHelpers, ToStringAndParseJson) { cfg_transformed.suppressor.subband_nearend_detection.nearend_threshold); EXPECT_EQ(cfg.suppressor.subband_nearend_detection.snr_threshold, cfg_transformed.suppressor.subband_nearend_detection.snr_threshold); + EXPECT_EQ(cfg.multi_channel.detect_stereo_content, + cfg_transformed.multi_channel.detect_stereo_content); + EXPECT_EQ(cfg.multi_channel.stereo_detection_threshold, + cfg_transformed.multi_channel.stereo_detection_threshold); } } // namespace webrtc diff --git a/modules/audio_processing/aec3/BUILD.gn b/modules/audio_processing/aec3/BUILD.gn index 128e8f3bd4..928afef13f 100644 --- a/modules/audio_processing/aec3/BUILD.gn +++ b/modules/audio_processing/aec3/BUILD.gn @@ -37,6 +37,8 @@ rtc_library("aec3") { "coarse_filter_update_gain.h", "comfort_noise_generator.cc", "comfort_noise_generator.h", + "config_selector.cc", + "config_selector.h", "decimator.cc", "decimator.h", "delay_estimate.h", @@ -72,6 +74,8 @@ rtc_library("aec3") { "matched_filter_lag_aggregator.h", "moving_average.cc", "moving_average.h", + "multi_channel_content_detector.cc", + "multi_channel_content_detector.h", "nearend_detector.h", "refined_filter_update_gain.cc", "refined_filter_update_gain.h", @@ -338,6 +342,7 @@ if (rtc_include_tests) { "clockdrift_detector_unittest.cc", "coarse_filter_update_gain_unittest.cc", "comfort_noise_generator_unittest.cc", + "config_selector_unittest.cc", "decimator_unittest.cc", "echo_canceller3_unittest.cc", "echo_path_delay_estimator_unittest.cc", @@ -352,6 +357,7 @@ if (rtc_include_tests) { "matched_filter_lag_aggregator_unittest.cc", "matched_filter_unittest.cc", "moving_average_unittest.cc", + "multi_channel_content_detector_unittest.cc", "refined_filter_update_gain_unittest.cc", "render_buffer_unittest.cc", "render_delay_buffer_unittest.cc", diff --git a/modules/audio_processing/aec3/config_selector.cc b/modules/audio_processing/aec3/config_selector.cc new file mode 100644 index 0000000000..9a37da6ccc --- /dev/null +++ b/modules/audio_processing/aec3/config_selector.cc @@ -0,0 +1,69 @@ + +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/config_selector.h" + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Validates that the mono and the multichannel configs have compatible fields. +bool CompatibleConfigs(const EchoCanceller3Config& mono_config, + const EchoCanceller3Config& multichannel_config) { + if (mono_config.delay.fixed_capture_delay_samples != + multichannel_config.delay.fixed_capture_delay_samples) { + return false; + } + + if (mono_config.filter.export_linear_aec_output != + multichannel_config.filter.export_linear_aec_output) { + return false; + } + + if (mono_config.filter.high_pass_filter_echo_reference != + multichannel_config.filter.high_pass_filter_echo_reference) { + return false; + } + + if (mono_config.multi_channel.detect_stereo_content != + multichannel_config.multi_channel.detect_stereo_content) { + return false; + } + return true; +} + +} // namespace + +ConfigSelector::ConfigSelector( + const EchoCanceller3Config& config, + const absl::optional& multichannel_config, + int num_render_input_channels) + : config_(config), multichannel_config_(multichannel_config) { + if (multichannel_config_.has_value()) { + RTC_DCHECK(CompatibleConfigs(config_, *multichannel_config_)); + } + + Update(!config_.multi_channel.detect_stereo_content && + num_render_input_channels > 1); + + RTC_DCHECK(active_config_); +} + +void ConfigSelector::Update(bool multichannel_content) { + if (multichannel_content && multichannel_config_.has_value()) { + active_config_ = &(*multichannel_config_); + } else { + active_config_ = &config_; + } +} + +} // namespace webrtc diff --git a/modules/audio_processing/aec3/config_selector.h b/modules/audio_processing/aec3/config_selector.h new file mode 100644 index 0000000000..3b3f94e5ac --- /dev/null +++ b/modules/audio_processing/aec3/config_selector.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_ + +#include "absl/types/optional.h" +#include "api/audio/echo_canceller3_config.h" + +namespace webrtc { + +// Selects the config to use. +class ConfigSelector { + public: + ConfigSelector( + const EchoCanceller3Config& config, + const absl::optional& multichannel_config, + int num_render_input_channels); + + // Updates the config selection based on the detection of multichannel + // content. + void Update(bool multichannel_content); + + const EchoCanceller3Config& active_config() const { return *active_config_; } + + private: + const EchoCanceller3Config config_; + const absl::optional multichannel_config_; + const EchoCanceller3Config* active_config_ = nullptr; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_ diff --git a/modules/audio_processing/aec3/config_selector_unittest.cc b/modules/audio_processing/aec3/config_selector_unittest.cc new file mode 100644 index 0000000000..1826bfcace --- /dev/null +++ b/modules/audio_processing/aec3/config_selector_unittest.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/config_selector.h" + +#include + +#include "absl/types/optional.h" +#include "api/audio/echo_canceller3_config.h" +#include "test/gtest.h" + +namespace webrtc { + +class ConfigSelectorChannelsAndContentDetection + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(ConfigSelectorMultiParameters, + ConfigSelectorChannelsAndContentDetection, + ::testing::Combine(::testing::Values(1, 2, 8), + ::testing::Values(false, true))); + +class ConfigSelectorChannels : public ::testing::Test, + public ::testing::WithParamInterface {}; + +INSTANTIATE_TEST_SUITE_P(ConfigSelectorMultiParameters, + ConfigSelectorChannels, + ::testing::Values(1, 2, 8)); + +TEST_P(ConfigSelectorChannelsAndContentDetection, + MonoConfigIsSelectedWhenNoMultiChannelConfigPresent) { + const auto [num_channels, detect_stereo_content] = GetParam(); + EchoCanceller3Config config; + config.multi_channel.detect_stereo_content = detect_stereo_content; + absl::optional multichannel_config; + + config.delay.default_delay = config.delay.default_delay + 1; + const size_t custom_delay_value_in_config = config.delay.default_delay; + + ConfigSelector cs(config, multichannel_config, + /*num_render_input_channels=*/num_channels); + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_config); + + cs.Update(/*multichannel_content=*/false); + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_config); + + cs.Update(/*multichannel_content=*/true); + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_config); +} + +TEST_P(ConfigSelectorChannelsAndContentDetection, + CorrectInitialConfigIsSelected) { + const auto [num_channels, detect_stereo_content] = GetParam(); + EchoCanceller3Config config; + config.multi_channel.detect_stereo_content = detect_stereo_content; + absl::optional multichannel_config = config; + + config.delay.default_delay += 1; + const size_t custom_delay_value_in_config = config.delay.default_delay; + multichannel_config->delay.default_delay += 2; + const size_t custom_delay_value_in_multichannel_config = + multichannel_config->delay.default_delay; + + ConfigSelector cs(config, multichannel_config, + /*num_render_input_channels=*/num_channels); + + if (num_channels == 1 || detect_stereo_content) { + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_config); + } else { + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_multichannel_config); + } +} + +TEST_P(ConfigSelectorChannels, CorrectConfigUpdateBehavior) { + const int num_channels = GetParam(); + EchoCanceller3Config config; + config.multi_channel.detect_stereo_content = true; + absl::optional multichannel_config = config; + + config.delay.default_delay += 1; + const size_t custom_delay_value_in_config = config.delay.default_delay; + multichannel_config->delay.default_delay += 2; + const size_t custom_delay_value_in_multichannel_config = + multichannel_config->delay.default_delay; + + ConfigSelector cs(config, multichannel_config, + /*num_render_input_channels=*/num_channels); + + cs.Update(/*multichannel_content=*/false); + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_config); + + if (num_channels == 1) { + cs.Update(/*multichannel_content=*/false); + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_config); + } else { + cs.Update(/*multichannel_content=*/true); + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_multichannel_config); + } +} + +} // namespace webrtc diff --git a/modules/audio_processing/aec3/echo_canceller3.cc b/modules/audio_processing/aec3/echo_canceller3.cc index fcc941b0db..ef58314c0b 100644 --- a/modules/audio_processing/aec3/echo_canceller3.cc +++ b/modules/audio_processing/aec3/echo_canceller3.cc @@ -101,13 +101,24 @@ void FillSubFrameView( std::vector>>* sub_frame_view) { RTC_DCHECK_GE(1, sub_frame_index); RTC_DCHECK_EQ(frame->size(), sub_frame_view->size()); - RTC_DCHECK_EQ((*frame)[0].size(), (*sub_frame_view)[0].size()); - for (size_t band = 0; band < frame->size(); ++band) { - for (size_t channel = 0; channel < (*frame)[band].size(); ++channel) { - (*sub_frame_view)[band][channel] = rtc::ArrayView( - &(*frame)[band][channel][sub_frame_index * kSubFrameLength], + if ((*frame)[0].size() > (*sub_frame_view)[0].size()) { + RTC_DCHECK_EQ((*sub_frame_view)[0].size(), 1); + // Downmix the audio to mono (should only be done when the audio contains + // fake-stereo or fake-multichannel). + for (size_t band = 0; band < frame->size(); ++band) { + (*sub_frame_view)[band][/*channel=*/0] = rtc::ArrayView( + &(*frame)[band][/*channel=*/0][sub_frame_index * kSubFrameLength], kSubFrameLength); } + } else { + RTC_DCHECK_EQ((*frame)[0].size(), (*sub_frame_view)[0].size()); + for (size_t band = 0; band < frame->size(); ++band) { + for (size_t channel = 0; channel < (*frame)[band].size(); ++channel) { + (*sub_frame_view)[band][channel] = rtc::ArrayView( + &(*frame)[band][channel][sub_frame_index * kSubFrameLength], + kSubFrameLength); + } + } } } @@ -221,6 +232,10 @@ void CopyBufferIntoFrame(const AudioBuffer& buffer, EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) { EchoCanceller3Config adjusted_cfg = config; + if (field_trial::IsEnabled("WebRTC-Aec3StereoContentDetectionKillSwitch")) { + adjusted_cfg.multi_channel.detect_stereo_content = false; + } + if (field_trial::IsEnabled("WebRTC-Aec3AntiHowlingMinimizationKillSwitch")) { adjusted_cfg.suppressor.high_bands_suppression .anti_howling_activation_threshold = 25.f; @@ -667,68 +682,71 @@ void EchoCanceller3::RenderWriter::Insert(const AudioBuffer& input) { int EchoCanceller3::instance_count_ = 0; -EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config, - int sample_rate_hz, - size_t num_render_channels, - size_t num_capture_channels) +EchoCanceller3::EchoCanceller3( + const EchoCanceller3Config& config, + const absl::optional& multichannel_config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels) : data_dumper_( new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), config_(AdjustConfig(config)), sample_rate_hz_(sample_rate_hz), num_bands_(NumBandsForRate(sample_rate_hz_)), - num_render_channels_(num_render_channels), + num_render_input_channels_(num_render_channels), num_capture_channels_(num_capture_channels), + config_selector_(AdjustConfig(config), + multichannel_config, + num_render_input_channels_), + multichannel_content_detector_( + config_selector_.active_config().multi_channel.detect_stereo_content, + num_render_input_channels_, + config_selector_.active_config() + .multi_channel.stereo_detection_threshold), output_framer_(num_bands_, num_capture_channels_), capture_blocker_(num_bands_, num_capture_channels_), - render_blocker_(num_bands_, num_render_channels_), render_transfer_queue_( kRenderTransferQueueSizeFrames, std::vector>>( num_bands_, std::vector>( - num_render_channels_, + num_render_input_channels_, std::vector(AudioBuffer::kSplitBandSize, 0.f))), Aec3RenderQueueItemVerifier(num_bands_, - num_render_channels_, + num_render_input_channels_, AudioBuffer::kSplitBandSize)), render_queue_output_frame_( num_bands_, std::vector>( - num_render_channels_, + num_render_input_channels_, std::vector(AudioBuffer::kSplitBandSize, 0.f))), render_block_( num_bands_, - std::vector>(num_render_channels_, + std::vector>(num_render_input_channels_, std::vector(kBlockSize, 0.f))), capture_block_( num_bands_, std::vector>(num_capture_channels_, std::vector(kBlockSize, 0.f))), - render_sub_frame_view_( - num_bands_, - std::vector>(num_render_channels_)), capture_sub_frame_view_( num_bands_, std::vector>(num_capture_channels_)) { RTC_DCHECK(ValidFullBandRate(sample_rate_hz_)); - block_processor_.reset(BlockProcessor::Create( - config_, sample_rate_hz_, num_render_channels_, num_capture_channels_)); - - if (config_.delay.fixed_capture_delay_samples > 0) { + if (config_selector_.active_config().delay.fixed_capture_delay_samples > 0) { block_delay_buffer_.reset(new BlockDelayBuffer( num_capture_channels_, num_bands_, AudioBuffer::kSplitBandSize, config_.delay.fixed_capture_delay_samples)); } - render_writer_.reset(new RenderWriter(data_dumper_.get(), config_, - &render_transfer_queue_, num_bands_, - num_render_channels_)); + render_writer_.reset(new RenderWriter( + data_dumper_.get(), config_selector_.active_config(), + &render_transfer_queue_, num_bands_, num_render_input_channels_)); RTC_DCHECK_EQ(num_bands_, std::max(sample_rate_hz_, 16000) / 16000); RTC_DCHECK_GE(kMaxNumBands, num_bands_); - if (config_.filter.export_linear_aec_output) { + if (config_selector_.active_config().filter.export_linear_aec_output) { linear_output_framer_.reset(new BlockFramer(1, num_capture_channels_)); linear_output_block_ = std::make_unique>>>( @@ -739,17 +757,49 @@ EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config, 1, std::vector>(num_capture_channels_)); } + Initialize(); + RTC_LOG(LS_INFO) << "AEC3 created with sample rate: " << sample_rate_hz_ - << " Hz, num render channels: " << num_render_channels_ + << " Hz, num render channels: " << num_render_input_channels_ << ", num capture channels: " << num_capture_channels_; } EchoCanceller3::~EchoCanceller3() = default; +void EchoCanceller3::Initialize() { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + + num_render_channels_to_aec_ = + multichannel_content_detector_.IsMultiChannelContentDetected() + ? num_render_input_channels_ + : 1; + + config_selector_.Update( + multichannel_content_detector_.IsMultiChannelContentDetected()); + + for (std::vector>& block_band : render_block_) { + block_band.resize(num_render_channels_to_aec_); + for (std::vector& block_channel : block_band) { + block_channel.resize(kBlockSize, 0.0f); + } + } + + render_blocker_.reset( + new FrameBlocker(num_bands_, num_render_channels_to_aec_)); + + block_processor_.reset(BlockProcessor::Create( + config_selector_.active_config(), sample_rate_hz_, + num_render_channels_to_aec_, num_capture_channels_)); + + render_sub_frame_view_ = std::vector>>( + num_bands_, + std::vector>(num_render_channels_to_aec_)); +} + void EchoCanceller3::AnalyzeRender(const AudioBuffer& render) { RTC_DCHECK_RUNS_SERIALIZED(&render_race_checker_); - RTC_DCHECK_EQ(render.num_channels(), num_render_channels_); + RTC_DCHECK_EQ(render.num_channels(), num_render_input_channels_); data_dumper_->DumpRaw("aec3_call_order", static_cast(EchoCanceller3ApiCall::kRender)); @@ -797,7 +847,7 @@ void EchoCanceller3::ProcessCapture(AudioBuffer* capture, api_call_metrics_.ReportCaptureCall(); // Optionally delay the capture signal. - if (config_.delay.fixed_capture_delay_samples > 0) { + if (config_selector_.active_config().delay.fixed_capture_delay_samples > 0) { RTC_DCHECK(block_delay_buffer_); block_delay_buffer_->DelaySignal(capture); } @@ -853,22 +903,18 @@ bool EchoCanceller3::ActiveProcessing() const { return true; } -EchoCanceller3Config EchoCanceller3::CreateDefaultConfig( - size_t num_render_channels, - size_t num_capture_channels) { +EchoCanceller3Config EchoCanceller3::CreateDefaultMultichannelConfig() { EchoCanceller3Config cfg; - if (num_render_channels > 1) { - // Use shorter and more rapidly adapting coarse filter to compensate for - // thge increased number of total filter parameters to adapt. - cfg.filter.coarse.length_blocks = 11; - cfg.filter.coarse.rate = 0.95f; - cfg.filter.coarse_initial.length_blocks = 11; - cfg.filter.coarse_initial.rate = 0.95f; + // Use shorter and more rapidly adapting coarse filter to compensate for + // thge increased number of total filter parameters to adapt. + cfg.filter.coarse.length_blocks = 11; + cfg.filter.coarse.rate = 0.95f; + cfg.filter.coarse_initial.length_blocks = 11; + cfg.filter.coarse_initial.rate = 0.95f; - // Use more concervative suppressor behavior for non-nearend speech. - cfg.suppressor.normal_tuning.max_dec_factor_lf = 0.35f; - cfg.suppressor.normal_tuning.max_inc_factor = 1.5f; - } + // Use more concervative suppressor behavior for non-nearend speech. + cfg.suppressor.normal_tuning.max_dec_factor_lf = 0.35f; + cfg.suppressor.normal_tuning.max_inc_factor = 1.5f; return cfg; } @@ -887,16 +933,23 @@ void EchoCanceller3::EmptyRenderQueue() { // Report render call in the metrics. api_call_metrics_.ReportRenderCall(); - BufferRenderFrameContent(&render_queue_output_frame_, 0, &render_blocker_, - block_processor_.get(), &render_block_, - &render_sub_frame_view_); + if (multichannel_content_detector_.UpdateDetection( + render_queue_output_frame_)) { + // Reinitialize the AEC when proper stereo is detected. + Initialize(); + } - BufferRenderFrameContent(&render_queue_output_frame_, 1, &render_blocker_, - block_processor_.get(), &render_block_, - &render_sub_frame_view_); + // Buffer frame content. + BufferRenderFrameContent(&render_queue_output_frame_, 0, + render_blocker_.get(), block_processor_.get(), + &render_block_, &render_sub_frame_view_); - BufferRemainingRenderFrameContent(&render_blocker_, block_processor_.get(), - &render_block_); + BufferRenderFrameContent(&render_queue_output_frame_, 1, + render_blocker_.get(), block_processor_.get(), + &render_block_, &render_sub_frame_view_); + + BufferRemainingRenderFrameContent(render_blocker_.get(), + block_processor_.get(), &render_block_); frame_to_buffer = render_transfer_queue_.Remove(&render_queue_output_frame_); diff --git a/modules/audio_processing/aec3/echo_canceller3.h b/modules/audio_processing/aec3/echo_canceller3.h index 44e0f38cb9..ba5895f34a 100644 --- a/modules/audio_processing/aec3/echo_canceller3.h +++ b/modules/audio_processing/aec3/echo_canceller3.h @@ -16,6 +16,7 @@ #include #include +#include "absl/types/optional.h" #include "api/array_view.h" #include "api/audio/echo_canceller3_config.h" #include "api/audio/echo_control.h" @@ -23,7 +24,9 @@ #include "modules/audio_processing/aec3/block_delay_buffer.h" #include "modules/audio_processing/aec3/block_framer.h" #include "modules/audio_processing/aec3/block_processor.h" +#include "modules/audio_processing/aec3/config_selector.h" #include "modules/audio_processing/aec3/frame_blocker.h" +#include "modules/audio_processing/aec3/multi_channel_content_detector.h" #include "modules/audio_processing/audio_buffer.h" #include "modules/audio_processing/logging/apm_data_dumper.h" #include "rtc_base/checks.h" @@ -84,10 +87,12 @@ class Aec3RenderQueueItemVerifier { // AnalyzeRender call which can be called concurrently with the other methods. class EchoCanceller3 : public EchoControl { public: - EchoCanceller3(const EchoCanceller3Config& config, - int sample_rate_hz, - size_t num_render_channels, - size_t num_capture_channels); + EchoCanceller3( + const EchoCanceller3Config& config, + const absl::optional& multichannel_config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels); ~EchoCanceller3() override; @@ -130,20 +135,37 @@ class EchoCanceller3 : public EchoControl { block_processor_->UpdateEchoLeakageStatus(leakage_detected); } - // Produces a default configuration that is suitable for a certain combination - // of render and capture channels. - static EchoCanceller3Config CreateDefaultConfig(size_t num_render_channels, - size_t num_capture_channels); + // Produces a default configuration for multichannel. + static EchoCanceller3Config CreateDefaultMultichannelConfig(); private: - class RenderWriter; friend class EchoCanceller3Tester; - FRIEND_TEST_ALL_PREFIXES(EchoCanceller3Metrics, EchoReturnLossEnhancement); + FRIEND_TEST_ALL_PREFIXES(EchoCanceller3, DetectionOfProperStereo); + FRIEND_TEST_ALL_PREFIXES(EchoCanceller3, + DetectionOfProperStereoUsingThreshold); + FRIEND_TEST_ALL_PREFIXES(EchoCanceller3, + StereoContentDetectionForMonoSignals); - // Replaces the internal block processor with a custom one for testing. + class RenderWriter; + + // (Re-)Initializes the selected subset of the EchoCanceller3 fields, at + // creation as well as during reconfiguration. + void Initialize(); + + // Only for testing. Replaces the internal block processor. void SetBlockProcessorForTesting( std::unique_ptr block_processor); + // Only for testing. Returns whether stereo processing is active. + bool StereoRenderProcessingActiveForTesting() const { + return multichannel_content_detector_.IsMultiChannelContentDetected(); + } + + // Only for testing. + const EchoCanceller3Config& GetActiveConfigForTesting() const { + return config_selector_.active_config(); + } + // Empties the render SwapQueue. void EmptyRenderQueue(); @@ -166,13 +188,17 @@ class EchoCanceller3 : public EchoControl { const EchoCanceller3Config config_; const int sample_rate_hz_; const int num_bands_; - const size_t num_render_channels_; + const size_t num_render_input_channels_; + size_t num_render_channels_to_aec_; const size_t num_capture_channels_; + ConfigSelector config_selector_; + MultiChannelContentDetector multichannel_content_detector_; std::unique_ptr linear_output_framer_ RTC_GUARDED_BY(capture_race_checker_); BlockFramer output_framer_ RTC_GUARDED_BY(capture_race_checker_); FrameBlocker capture_blocker_ RTC_GUARDED_BY(capture_race_checker_); - FrameBlocker render_blocker_ RTC_GUARDED_BY(capture_race_checker_); + std::unique_ptr render_blocker_ + RTC_GUARDED_BY(capture_race_checker_); SwapQueue>>, Aec3RenderQueueItemVerifier> render_transfer_queue_; diff --git a/modules/audio_processing/aec3/echo_canceller3_unittest.cc b/modules/audio_processing/aec3/echo_canceller3_unittest.cc index ce0fa50c88..81cefb6f92 100644 --- a/modules/audio_processing/aec3/echo_canceller3_unittest.cc +++ b/modules/audio_processing/aec3/echo_canceller3_unittest.cc @@ -190,6 +190,32 @@ std::string ProduceDebugText(int sample_rate_hz, int variant) { return ss.Release(); } +void RunAecInStereo(AudioBuffer& buffer, + EchoCanceller3& aec3, + float channel_0_value, + float channel_1_value) { + rtc::ArrayView data_channel_0(&buffer.channels()[0][0], + buffer.num_frames()); + std::fill(data_channel_0.begin(), data_channel_0.end(), channel_0_value); + rtc::ArrayView data_channel_1(&buffer.channels()[1][0], + buffer.num_frames()); + std::fill(data_channel_1.begin(), data_channel_1.end(), channel_1_value); + aec3.AnalyzeRender(&buffer); + aec3.AnalyzeCapture(&buffer); + aec3.ProcessCapture(&buffer, /*level_change=*/false); +} + +void RunAecInSMono(AudioBuffer& buffer, + EchoCanceller3& aec3, + float channel_0_value) { + rtc::ArrayView data_channel_0(&buffer.channels()[0][0], + buffer.num_frames()); + std::fill(data_channel_0.begin(), data_channel_0.end(), channel_0_value); + aec3.AnalyzeRender(&buffer); + aec3.AnalyzeCapture(&buffer); + aec3.ProcessCapture(&buffer, /*level_change=*/false); +} + } // namespace class EchoCanceller3Tester { @@ -220,7 +246,9 @@ class EchoCanceller3Tester { // and that the processor data is properly passed to the EchoCanceller3 // output. void RunCaptureTransportVerificationTest() { - EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, 1, 1); + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz_, + 1, 1); aec3.SetBlockProcessorForTesting( std::make_unique(num_bands_)); @@ -244,7 +272,9 @@ class EchoCanceller3Tester { // Test method for testing that the render data is properly received by the // block processor. void RunRenderTransportVerificationTest() { - EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, 1, 1); + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz_, + 1, 1); aec3.SetBlockProcessorForTesting( std::make_unique(num_bands_)); @@ -313,7 +343,9 @@ class EchoCanceller3Tester { break; } - EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, 1, 1); + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz_, + 1, 1); aec3.SetBlockProcessorForTesting(std::move(block_processor_mock)); for (size_t frame_index = 0; frame_index < kNumFramesToProcess; @@ -393,7 +425,9 @@ class EchoCanceller3Tester { } break; } - EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, 1, 1); + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz_, + 1, 1); aec3.SetBlockProcessorForTesting(std::move(block_processor_mock)); for (size_t frame_index = 0; frame_index < kNumFramesToProcess; @@ -479,7 +513,9 @@ class EchoCanceller3Tester { } break; } - EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, 1, 1); + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz_, + 1, 1); aec3.SetBlockProcessorForTesting(std::move(block_processor_mock)); for (size_t frame_index = 0; frame_index < kNumFramesToProcess; ++frame_index) { @@ -518,7 +554,8 @@ class EchoCanceller3Tester { // capture and render API calls. void RunRenderSwapQueueVerificationTest() { const EchoCanceller3Config config; - EchoCanceller3 aec3(config, sample_rate_hz_, 1, 1); + EchoCanceller3 aec3(config, /*multichannel_config=*/absl::nullopt, + sample_rate_hz_, 1, 1); aec3.SetBlockProcessorForTesting( std::make_unique(num_bands_)); @@ -566,7 +603,9 @@ class EchoCanceller3Tester { // This test verifies that a buffer overrun in the render swapqueue is // properly reported. void RunRenderPipelineSwapQueueOverrunReturnValueTest() { - EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, 1, 1); + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz_, + 1, 1); constexpr size_t kRenderTransferQueueSize = 30; for (size_t k = 0; k < 2; ++k) { @@ -591,7 +630,9 @@ class EchoCanceller3Tester { // Set aec3_sample_rate_hz to be different from sample_rate_hz_ in such a // way that the number of bands for the rates are different. const int aec3_sample_rate_hz = sample_rate_hz_ == 48000 ? 32000 : 48000; - EchoCanceller3 aec3(EchoCanceller3Config(), aec3_sample_rate_hz, 1, 1); + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, + aec3_sample_rate_hz, 1, 1); PopulateInputFrame(frame_length_, 0, &render_buffer_.channels_f()[0][0], 0); EXPECT_DEATH(aec3.AnalyzeRender(&render_buffer_), ""); @@ -604,7 +645,9 @@ class EchoCanceller3Tester { // Set aec3_sample_rate_hz to be different from sample_rate_hz_ in such a // way that the number of bands for the rates are different. const int aec3_sample_rate_hz = sample_rate_hz_ == 48000 ? 32000 : 48000; - EchoCanceller3 aec3(EchoCanceller3Config(), aec3_sample_rate_hz, 1, 1); + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, + aec3_sample_rate_hz, 1, 1); PopulateInputFrame(frame_length_, num_bands_, 0, &capture_buffer_.split_bands_f(0)[0], 100); EXPECT_DEATH(aec3.ProcessCapture(&capture_buffer_, false), ""); @@ -887,6 +930,147 @@ TEST(EchoCanceller3FieldTrials, Aec3UseNearendReverb) { EXPECT_FLOAT_EQ(adjusted_config.ep_strength.nearend_len, 0.8); } +TEST(EchoCanceller3, DetectionOfProperStereo) { + constexpr int kSampleRateHz = 16000; + constexpr int kNumChannels = 2; + AudioBuffer buffer(/*input_rate=*/kSampleRateHz, + /*input_num_channels=*/kNumChannels, + /*input_rate=*/kSampleRateHz, + /*buffer_num_channels=*/kNumChannels, + /*output_rate=*/kSampleRateHz, + /*output_num_channels=*/kNumChannels); + + constexpr size_t kNumBlocksForMonoConfig = 1; + constexpr size_t kNumBlocksForSurroundConfig = 2; + EchoCanceller3Config mono_config; + absl::optional multichannel_config; + + mono_config.multi_channel.detect_stereo_content = true; + mono_config.multi_channel.stereo_detection_threshold = 0.0f; + multichannel_config = mono_config; + mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig; + multichannel_config->filter.coarse_initial.length_blocks = + kNumBlocksForSurroundConfig; + + EchoCanceller3 aec3(mono_config, multichannel_config, + /*sample_rate_hz=*/kSampleRateHz, + /*num_render_channels=*/kNumChannels, + /*num_capture_input_channels=*/kNumChannels); + + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + RunAecInStereo(buffer, aec3, 100.0f, 100.0f); + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + RunAecInStereo(buffer, aec3, 100.0f, 101.0f); + EXPECT_TRUE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForSurroundConfig); +} + +TEST(EchoCanceller3, DetectionOfProperStereoUsingThreshold) { + constexpr int kSampleRateHz = 16000; + constexpr int kNumChannels = 2; + AudioBuffer buffer(/*input_rate=*/kSampleRateHz, + /*input_num_channels=*/kNumChannels, + /*input_rate=*/kSampleRateHz, + /*buffer_num_channels=*/kNumChannels, + /*output_rate=*/kSampleRateHz, + /*output_num_channels=*/kNumChannels); + + constexpr size_t kNumBlocksForMonoConfig = 1; + constexpr size_t kNumBlocksForSurroundConfig = 2; + EchoCanceller3Config mono_config; + absl::optional multichannel_config; + + constexpr float kStereoDetectionThreshold = 2.0f; + mono_config.multi_channel.detect_stereo_content = true; + mono_config.multi_channel.stereo_detection_threshold = + kStereoDetectionThreshold; + multichannel_config = mono_config; + mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig; + multichannel_config->filter.coarse_initial.length_blocks = + kNumBlocksForSurroundConfig; + + EchoCanceller3 aec3(mono_config, multichannel_config, + /*sample_rate_hz=*/kSampleRateHz, + /*num_render_channels=*/kNumChannels, + /*num_capture_input_channels=*/kNumChannels); + + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + RunAecInStereo(buffer, aec3, 100.0f, + 100.0f + kStereoDetectionThreshold - 1.0f); + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + RunAecInStereo(buffer, aec3, 100.0f, + 100.0f + kStereoDetectionThreshold + 10.0f); + EXPECT_TRUE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForSurroundConfig); +} + +TEST(EchoCanceller3, StereoContentDetectionForMonoSignals) { + constexpr int kSampleRateHz = 16000; + constexpr int kNumChannels = 2; + AudioBuffer buffer(/*input_rate=*/kSampleRateHz, + /*input_num_channels=*/kNumChannels, + /*input_rate=*/kSampleRateHz, + /*buffer_num_channels=*/kNumChannels, + /*output_rate=*/kSampleRateHz, + /*output_num_channels=*/kNumChannels); + + constexpr size_t kNumBlocksForMonoConfig = 1; + constexpr size_t kNumBlocksForSurroundConfig = 2; + EchoCanceller3Config mono_config; + absl::optional multichannel_config; + + for (bool detect_stereo_content : {false, true}) { + mono_config.multi_channel.detect_stereo_content = detect_stereo_content; + multichannel_config = mono_config; + mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig; + multichannel_config->filter.coarse_initial.length_blocks = + kNumBlocksForSurroundConfig; + + AudioBuffer mono_buffer(/*input_rate=*/kSampleRateHz, + /*input_num_channels=*/1, + /*input_rate=*/kSampleRateHz, + /*buffer_num_channels=*/1, + /*output_rate=*/kSampleRateHz, + /*output_num_channels=*/1); + + EchoCanceller3 aec3(mono_config, multichannel_config, + /*sample_rate_hz=*/kSampleRateHz, + /*num_render_channels=*/1, + /*num_capture_input_channels=*/1); + + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + RunAecInSMono(mono_buffer, aec3, 100.0f); + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + } +} + #if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) TEST(EchoCanceller3InputCheckDeathTest, WrongCaptureNumBandsCheckVerification) { @@ -899,9 +1083,11 @@ TEST(EchoCanceller3InputCheckDeathTest, WrongCaptureNumBandsCheckVerification) { // Verifiers that the verification for null input to the capture processing api // call works. TEST(EchoCanceller3InputCheckDeathTest, NullCaptureProcessingParameter) { - EXPECT_DEATH(EchoCanceller3(EchoCanceller3Config(), 16000, 1, 1) - .ProcessCapture(nullptr, false), - ""); + EXPECT_DEATH( + EchoCanceller3(EchoCanceller3Config(), + /*multichannel_config_=*/absl::nullopt, 16000, 1, 1) + .ProcessCapture(nullptr, false), + ""); } // Verifies the check for correct sample rate. @@ -909,7 +1095,10 @@ TEST(EchoCanceller3InputCheckDeathTest, NullCaptureProcessingParameter) { // tests on test bots has been fixed. TEST(EchoCanceller3InputCheckDeathTest, DISABLED_WrongSampleRate) { ApmDataDumper data_dumper(0); - EXPECT_DEATH(EchoCanceller3(EchoCanceller3Config(), 8001, 1, 1), ""); + EXPECT_DEATH( + EchoCanceller3(EchoCanceller3Config(), + /*multichannel_config_=*/absl::nullopt, 8001, 1, 1), + ""); } #endif diff --git a/modules/audio_processing/aec3/multi_channel_content_detector.cc b/modules/audio_processing/aec3/multi_channel_content_detector.cc new file mode 100644 index 0000000000..62b3ae0840 --- /dev/null +++ b/modules/audio_processing/aec3/multi_channel_content_detector.cc @@ -0,0 +1,64 @@ + +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/multi_channel_content_detector.h" + +#include + +namespace webrtc { + +namespace { + +// Compares the left and right channels in the render `frame` to determine +// whether the signal is a proper stereo signal. To allow for differences +// introduced by hardware drivers, a threshold `detection_threshold` is used for +// the detection. +bool IsProperStereo(const std::vector>>& frame, + float detection_threshold) { + if (frame[0].size() < 2) { + return false; + } + + for (size_t band = 0; band < frame.size(); ++band) { + for (size_t k = 0; k < frame[band][0].size(); ++k) { + if (std::fabs(frame[band][0][k] - frame[band][1][k]) > + detection_threshold) { + return true; + } + } + } + return false; +} + +} // namespace + +MultiChannelContentDetector::MultiChannelContentDetector( + bool detect_stereo_content, + int num_render_input_channels, + float detection_threshold) + : detect_stereo_content_(detect_stereo_content), + detection_threshold_(detection_threshold), + proper_multichannel_content_detected_(!detect_stereo_content && + num_render_input_channels > 1) {} + +bool MultiChannelContentDetector::UpdateDetection( + const std::vector>>& frame) { + bool previous_proper_multichannel_content_detected_ = + proper_multichannel_content_detected_; + if (detect_stereo_content_ && !proper_multichannel_content_detected_) { + proper_multichannel_content_detected_ = + IsProperStereo(frame, detection_threshold_); + } + return previous_proper_multichannel_content_detected_ != + proper_multichannel_content_detected_; +} + +} // namespace webrtc diff --git a/modules/audio_processing/aec3/multi_channel_content_detector.h b/modules/audio_processing/aec3/multi_channel_content_detector.h new file mode 100644 index 0000000000..119cd1f2a4 --- /dev/null +++ b/modules/audio_processing/aec3/multi_channel_content_detector.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MULTI_CHANNEL_CONTENT_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MULTI_CHANNEL_CONTENT_DETECTOR_H_ + +#include + +#include + +namespace webrtc { + +// Analyzes audio content to determine whether the contained audio is proper +// multichannel, or only upmixed mono. To allow for differences introduced by +// hardware drivers, a threshold `detection_threshold` is used for the +// detection. +class MultiChannelContentDetector { + public: + MultiChannelContentDetector(bool detect_stereo_content, + int num_render_input_channels, + float detection_threshold); + + // Compares the left and right channels in the render `frame` to determine + // whether the signal is a proper multichannel signal. Returns a bool + // indicating whether a change in the multichannel was detected. + bool UpdateDetection( + const std::vector>>& frame); + + bool IsMultiChannelContentDetected() const { + return proper_multichannel_content_detected_; + } + + private: + const bool detect_stereo_content_; + const float detection_threshold_; + bool proper_multichannel_content_detected_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MULTI_CHANNEL_CONTENT_DETECTOR_H_ diff --git a/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc b/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc new file mode 100644 index 0000000000..ae2a9336ef --- /dev/null +++ b/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/multi_channel_content_detector.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(MultiChannelContentDetector, HandlingOfMono) { + MultiChannelContentDetector mc(/*detect_stereo_content=*/true, + /*num_render_input_channels=*/1, + /*detection_threshold=*/0.0f); + EXPECT_FALSE(mc.IsMultiChannelContentDetected()); +} + +TEST(MultiChannelContentDetector, HandlingOfMonoAndDetectionOff) { + MultiChannelContentDetector mc(/*detect_stereo_content=*/false, + /*num_render_input_channels=*/1, + /*detection_threshold=*/0.0f); + EXPECT_FALSE(mc.IsMultiChannelContentDetected()); +} + +TEST(MultiChannelContentDetector, HandlingOfDetectionOff) { + MultiChannelContentDetector mc(/*detect_stereo_content=*/false, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f); + EXPECT_TRUE(mc.IsMultiChannelContentDetected()); + + std::vector>> frame( + 1, std::vector>(2, std::vector(160, 0.0f))); + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f); + + EXPECT_FALSE(mc.UpdateDetection(frame)); + EXPECT_TRUE(mc.IsMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); +} + +TEST(MultiChannelContentDetector, InitialDetectionOfStereo) { + MultiChannelContentDetector mc(/*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f); + EXPECT_FALSE(mc.IsMultiChannelContentDetected()); +} + +TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) { + MultiChannelContentDetector mc(/*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f); + std::vector>> frame( + 1, std::vector>(2, std::vector(160, 0.0f))); + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f); + EXPECT_FALSE(mc.UpdateDetection(frame)); + EXPECT_FALSE(mc.IsMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); +} + +TEST(MultiChannelContentDetector, DetectionWhenStereo) { + MultiChannelContentDetector mc(/*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f); + std::vector>> frame( + 1, std::vector>(2, std::vector(160, 0.0f))); + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f); + EXPECT_TRUE(mc.UpdateDetection(frame)); + EXPECT_TRUE(mc.IsMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); +} + +TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) { + MultiChannelContentDetector mc(/*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f); + std::vector>> frame( + 1, std::vector>(2, std::vector(160, 0.0f))); + + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f); + EXPECT_FALSE(mc.UpdateDetection(frame)); + EXPECT_FALSE(mc.IsMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); + + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f); + + EXPECT_TRUE(mc.UpdateDetection(frame)); + EXPECT_TRUE(mc.IsMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); +} + +TEST(MultiChannelContentDetector, DetectionWithStereoBelowThreshold) { + constexpr float kThreshold = 1.0f; + MultiChannelContentDetector mc(/*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/kThreshold); + std::vector>> frame( + 1, std::vector>(2, std::vector(160, 0.0f))); + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold); + + EXPECT_FALSE(mc.UpdateDetection(frame)); + EXPECT_FALSE(mc.IsMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); +} + +TEST(MultiChannelContentDetector, DetectionWithStereoAboveThreshold) { + constexpr float kThreshold = 1.0f; + MultiChannelContentDetector mc(/*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/kThreshold); + std::vector>> frame( + 1, std::vector>(2, std::vector(160, 0.0f))); + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold + 0.1f); + + EXPECT_TRUE(mc.UpdateDetection(frame)); + EXPECT_TRUE(mc.IsMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); +} + +} // namespace webrtc diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index 9a1aaee821..20e826d730 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -1726,14 +1726,14 @@ void AudioProcessingImpl::InitializeEchoController() { proc_sample_rate_hz(), num_reverse_channels(), num_proc_channels()); RTC_DCHECK(submodules_.echo_controller); } else { - EchoCanceller3Config config = - use_setup_specific_default_aec3_config_ - ? EchoCanceller3::CreateDefaultConfig(num_reverse_channels(), - num_proc_channels()) - : EchoCanceller3Config(); + EchoCanceller3Config config; + absl::optional multichannel_config; + if (use_setup_specific_default_aec3_config_) { + multichannel_config = EchoCanceller3::CreateDefaultMultichannelConfig(); + } submodules_.echo_controller = std::make_unique( - config, proc_sample_rate_hz(), num_reverse_channels(), - num_proc_channels()); + config, multichannel_config, proc_sample_rate_hz(), + num_reverse_channels(), num_proc_channels()); } // Setup the storage for returning the linear AEC output. diff --git a/test/fuzzers/BUILD.gn b/test/fuzzers/BUILD.gn index 72fa6d7926..487594d864 100644 --- a/test/fuzzers/BUILD.gn +++ b/test/fuzzers/BUILD.gn @@ -560,6 +560,7 @@ webrtc_fuzzer_test("aec3_fuzzer") { "../../modules/audio_processing:audio_buffer", "../../modules/audio_processing/aec3", ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] } webrtc_fuzzer_test("comfort_noise_decoder_fuzzer") { diff --git a/test/fuzzers/aec3_fuzzer.cc b/test/fuzzers/aec3_fuzzer.cc index a9b4a9ea94..a12ca30f63 100644 --- a/test/fuzzers/aec3_fuzzer.cc +++ b/test/fuzzers/aec3_fuzzer.cc @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "absl/types/optional.h" #include "modules/audio_processing/aec3/echo_canceller3.h" #include "modules/audio_processing/audio_buffer.h" #include "modules/audio_processing/include/audio_processing.h" @@ -51,7 +52,8 @@ void FuzzOneInput(const uint8_t* data, size_t size) { const size_t num_capture_channels = 1 + fuzz_data.ReadOrDefaultValue(0) % (kMaxNumChannels - 1); - EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz, + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz, num_render_channels, num_capture_channels); AudioBuffer capture_audio(sample_rate_hz, num_capture_channels,