diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index 745e45ce72..c80cc76a3d 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -506,13 +506,16 @@ AudioProcessing::Config AudioProcessingImpl::AdjustConfig( !config.gain_controller2.enabled; const bool one_and_only_one_input_volume_controller = hybrid_agc_config_detected != full_agc1_config_detected; + const bool agc2_input_volume_controller_enabled = + config.gain_controller2.enabled && + config.gain_controller2.input_volume_controller.enabled; if (!one_and_only_one_input_volume_controller || - config.gain_controller2.input_volume_controller.enabled) { + agc2_input_volume_controller_enabled) { RTC_LOG(LS_ERROR) << "Cannot adjust AGC config (precondition failed)"; if (!one_and_only_one_input_volume_controller) RTC_LOG(LS_ERROR) << "One and only one input volume controller must be enabled."; - if (config.gain_controller2.input_volume_controller.enabled) + if (agc2_input_volume_controller_enabled) RTC_LOG(LS_ERROR) << "The AGC2 input volume controller must be disabled."; } else { @@ -530,19 +533,18 @@ AudioProcessing::Config AudioProcessingImpl::AdjustConfig( return adjusted_config; } -TransientSuppressor::VadMode AudioProcessingImpl::GetTransientSuppressorVadMode( - const absl::optional& - params) { - if (params.has_value() && params->agc2_config.has_value() && - !params->disallow_transient_suppressor_usage) { - // When the experiment is active, the gain control switches to AGC2 and TS - // can be active, use the RNN VAD to control TS. This choice will also - // disable the internal RNN VAD in AGC2. - return TransientSuppressor::VadMode::kRnnVad; - } - // If TS is disabled, the returned value does not matter. If enabled, use the - // default VAD. - return TransientSuppressor::VadMode::kDefault; +bool AudioProcessingImpl::UseApmVadSubModule( + const AudioProcessing::Config& config, + const absl::optional& experiment_params) { + // The VAD as an APM sub-module is needed only in one case, that is when TS + // and AGC2 are both enabled and when the AGC2 experiment is running and its + // parameters require to fully switch the gain control to AGC2. + return config.transient_suppression.enabled && + config.gain_controller2.enabled && + (config.gain_controller2.input_volume_controller.enabled || + config.gain_controller2.adaptive_digital.enabled) && + experiment_params.has_value() && + experiment_params->agc2_config.has_value(); } AudioProcessingImpl::SubmoduleStates::SubmoduleStates( @@ -663,8 +665,7 @@ AudioProcessingImpl::AudioProcessingImpl( use_setup_specific_default_aec3_config_( UseSetupSpecificDefaultAec3Congfig()), gain_controller2_experiment_params_(GetGainController2ExperimentParams()), - transient_suppressor_vad_mode_( - GetTransientSuppressorVadMode(gain_controller2_experiment_params_)), + transient_suppressor_vad_mode_(TransientSuppressor::VadMode::kDefault), capture_runtime_settings_(RuntimeSettingQueueSize()), render_runtime_settings_(RuntimeSettingQueueSize()), capture_runtime_settings_enqueuer_(&capture_runtime_settings_), @@ -809,8 +810,8 @@ void AudioProcessingImpl::InitializeLocked() { InitializeHighPassFilter(true); InitializeResidualEchoDetector(); InitializeEchoController(); - InitializeGainController2(/*config_has_changed=*/true); - InitializeVoiceActivityDetector(/*config_has_changed=*/true); + InitializeGainController2(); + InitializeVoiceActivityDetector(); InitializeNoiseSuppressor(); InitializeAnalyzer(); InitializePostProcessor(); @@ -977,8 +978,12 @@ void AudioProcessingImpl::ApplyConfig(const AudioProcessing::Config& config) { config_.gain_controller2 = AudioProcessing::Config::GainController2(); } - InitializeGainController2(agc2_config_changed); - InitializeVoiceActivityDetector(agc2_config_changed); + if (agc2_config_changed || ts_config_changed) { + // AGC2 also depends on TS because of the possible dependency on the APM VAD + // sub-module. + InitializeGainController2(); + InitializeVoiceActivityDetector(); + } if (pre_amplifier_config_changed || gain_adjustment_config_changed) { InitializeCaptureLevelsAdjuster(); @@ -2144,10 +2149,20 @@ bool AudioProcessingImpl::UpdateActiveSubmoduleStates() { } void AudioProcessingImpl::InitializeTransientSuppressor() { + // Choose the VAD mode for TS and detect a VAD mode change. + const TransientSuppressor::VadMode previous_vad_mode = + transient_suppressor_vad_mode_; + transient_suppressor_vad_mode_ = TransientSuppressor::VadMode::kDefault; + if (UseApmVadSubModule(config_, gain_controller2_experiment_params_)) { + transient_suppressor_vad_mode_ = TransientSuppressor::VadMode::kRnnVad; + } + const bool vad_mode_changed = + previous_vad_mode != transient_suppressor_vad_mode_; + if (config_.transient_suppression.enabled && !constants_.transient_suppressor_forced_off) { // Attempt to create a transient suppressor, if one is not already created. - if (!submodules_.transient_suppressor) { + if (!submodules_.transient_suppressor || vad_mode_changed) { submodules_.transient_suppressor = CreateTransientSuppressor( submodule_creation_overrides_, transient_suppressor_vad_mode_, proc_fullband_sample_rate_hz(), capture_nonlocked_.split_rate, @@ -2341,54 +2356,48 @@ void AudioProcessingImpl::InitializeGainController1() { capture_.capture_output_used); } -void AudioProcessingImpl::InitializeGainController2(bool config_has_changed) { - if (!config_has_changed) { - return; - } +void AudioProcessingImpl::InitializeGainController2() { if (!config_.gain_controller2.enabled) { submodules_.gain_controller2.reset(); return; } - if (!submodules_.gain_controller2 || config_has_changed) { - const bool use_internal_vad = - transient_suppressor_vad_mode_ != TransientSuppressor::VadMode::kRnnVad; - const bool input_volume_controller_config_overridden = - gain_controller2_experiment_params_.has_value() && - gain_controller2_experiment_params_->agc2_config.has_value(); - const InputVolumeController::Config input_volume_controller_config = - input_volume_controller_config_overridden - ? gain_controller2_experiment_params_->agc2_config - ->input_volume_controller - : InputVolumeController::Config{}; - submodules_.gain_controller2 = std::make_unique( - config_.gain_controller2, input_volume_controller_config, - proc_fullband_sample_rate_hz(), num_proc_channels(), use_internal_vad); - submodules_.gain_controller2->SetCaptureOutputUsed( - capture_.capture_output_used); - } + // Override the input volume controller configuration if the AGC2 experiment + // is running and its parameters require to fully switch the gain control to + // AGC2. + const bool input_volume_controller_config_overridden = + gain_controller2_experiment_params_.has_value() && + gain_controller2_experiment_params_->agc2_config.has_value(); + const InputVolumeController::Config input_volume_controller_config = + input_volume_controller_config_overridden + ? gain_controller2_experiment_params_->agc2_config + ->input_volume_controller + : InputVolumeController::Config{}; + // If the APM VAD sub-module is not used, let AGC2 use its internal VAD. + const bool use_internal_vad = + !UseApmVadSubModule(config_, gain_controller2_experiment_params_); + submodules_.gain_controller2 = std::make_unique( + config_.gain_controller2, input_volume_controller_config, + proc_fullband_sample_rate_hz(), num_proc_channels(), use_internal_vad); + submodules_.gain_controller2->SetCaptureOutputUsed( + capture_.capture_output_used); } -void AudioProcessingImpl::InitializeVoiceActivityDetector( - bool config_has_changed) { - if (!config_has_changed) { - return; - } - const bool use_vad = - transient_suppressor_vad_mode_ == TransientSuppressor::VadMode::kRnnVad && - config_.gain_controller2.enabled && - (config_.gain_controller2.adaptive_digital.enabled || - config_.gain_controller2.input_volume_controller.enabled); - if (!use_vad) { +void AudioProcessingImpl::InitializeVoiceActivityDetector() { + if (!UseApmVadSubModule(config_, gain_controller2_experiment_params_)) { submodules_.voice_activity_detector.reset(); return; } - if (!submodules_.voice_activity_detector || config_has_changed) { + + if (!submodules_.voice_activity_detector) { RTC_DCHECK(!!submodules_.gain_controller2); // TODO(bugs.webrtc.org/13663): Cache CPU features in APM and use here. submodules_.voice_activity_detector = std::make_unique( submodules_.gain_controller2->GetCpuFeatures(), proc_fullband_sample_rate_hz()); + } else { + submodules_.voice_activity_detector->Initialize( + proc_fullband_sample_rate_hz()); } } diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h index b4b3de2f2b..fe80e0d912 100644 --- a/modules/audio_processing/audio_processing_impl.h +++ b/modules/audio_processing/audio_processing_impl.h @@ -227,10 +227,12 @@ class AudioProcessingImpl : public AudioProcessing { static AudioProcessing::Config AdjustConfig( const AudioProcessing::Config& config, const absl::optional& experiment_params); - static TransientSuppressor::VadMode GetTransientSuppressorVadMode( + // Returns true if the APM VAD sub-module should be used. + static bool UseApmVadSubModule( + const AudioProcessing::Config& config, const absl::optional& experiment_params); - const TransientSuppressor::VadMode transient_suppressor_vad_mode_; + TransientSuppressor::VadMode transient_suppressor_vad_mode_; SwapQueue capture_runtime_settings_; SwapQueue render_runtime_settings_; @@ -317,14 +319,15 @@ class AudioProcessingImpl : public AudioProcessing { void InitializeGainController1() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); void InitializeTransientSuppressor() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); - // Initializes the `GainController2` sub-module. If the sub-module is enabled - // and `config_has_changed` is true, recreates the sub-module. - void InitializeGainController2(bool config_has_changed) - RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + // Initializes the `GainController2` sub-module. If the sub-module is enabled, + // recreates it. + void InitializeGainController2() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); // Initializes the `VoiceActivityDetectorWrapper` sub-module. If the - // sub-module is enabled and `config_has_changed` is true, recreates the - // sub-module. - void InitializeVoiceActivityDetector(bool config_has_changed) + // sub-module is enabled, recreates it. Call `InitializeGainController2()` + // first. + // TODO(bugs.webrtc.org/13663): Remove if TS is removed otherwise remove call + // order requirement - i.e., decouple from `InitializeGainController2()`. + void InitializeVoiceActivityDetector() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); void InitializeNoiseSuppressor() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); void InitializeCaptureLevelsAdjuster() diff --git a/modules/audio_processing/audio_processing_impl_unittest.cc b/modules/audio_processing/audio_processing_impl_unittest.cc index 633dd85d9d..7c12a07ed9 100644 --- a/modules/audio_processing/audio_processing_impl_unittest.cc +++ b/modules/audio_processing/audio_processing_impl_unittest.cc @@ -1287,7 +1287,10 @@ TEST_P(Agc2FieldTrialParametrizedTest, TEST_P(Agc2FieldTrialParametrizedTest, ProcessSucceedsWithTs) { AudioProcessing::Config config = GetParam(); - config.transient_suppression.enabled = true; + if (!config.transient_suppression.enabled) { + GTEST_SKIP() << "TS is disabled, skip."; + } + webrtc::test::ScopedFieldTrials field_trials( "WebRTC-Audio-GainController2/Disabled/"); auto apm = AudioProcessingBuilder().SetConfig(config).Create(); @@ -1340,7 +1343,10 @@ TEST_P(Agc2FieldTrialParametrizedTest, ProcessSucceedsWithoutTs) { TEST_P(Agc2FieldTrialParametrizedTest, ProcessSucceedsWhenSwitchToFullAgc2WithTs) { AudioProcessing::Config config = GetParam(); - config.transient_suppression.enabled = true; + if (!config.transient_suppression.enabled) { + GTEST_SKIP() << "TS is disabled, skip."; + } + webrtc::test::ScopedFieldTrials field_trials( "WebRTC-Audio-GainController2/Enabled," "switch_to_agc2:true," @@ -1397,15 +1403,34 @@ INSTANTIATE_TEST_SUITE_P( AudioProcessingImplTest, Agc2FieldTrialParametrizedTest, ::testing::Values( - // Full AGC1. + // Full AGC1, TS disabled. AudioProcessing::Config{ + .transient_suppression = {.enabled = false}, .gain_controller1 = {.enabled = true, .analog_gain_controller = {.enabled = true, .enable_digital_adaptive = true}}, .gain_controller2 = {.enabled = false}}, - // Hybrid AGC. + // Full AGC1, TS enabled. AudioProcessing::Config{ + .transient_suppression = {.enabled = true}, + .gain_controller1 = + {.enabled = true, + .analog_gain_controller = {.enabled = true, + .enable_digital_adaptive = true}}, + .gain_controller2 = {.enabled = false}}, + // Hybrid AGC, TS disabled. + AudioProcessing::Config{ + .transient_suppression = {.enabled = false}, + .gain_controller1 = + {.enabled = true, + .analog_gain_controller = {.enabled = true, + .enable_digital_adaptive = false}}, + .gain_controller2 = {.enabled = true, + .adaptive_digital = {.enabled = true}}}, + // Hybrid AGC, TS enabled. + AudioProcessing::Config{ + .transient_suppression = {.enabled = true}, .gain_controller1 = {.enabled = true, .analog_gain_controller = {.enabled = true, diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc index 9beaf00823..dd3521268d 100644 --- a/modules/audio_processing/gain_controller2.cc +++ b/modules/audio_processing/gain_controller2.cc @@ -183,8 +183,13 @@ void GainController2::Process(absl::optional speech_probability, audio->num_frames()); // Compute speech probability. if (vad_) { + // When the VAD component runs, `speech_probability` should not be specified + // because APM should not run the same VAD twice (as an APM sub-module and + // internally in AGC2). + RTC_DCHECK(!speech_probability.has_value()); speech_probability = vad_->Analyze(float_frame); - } else if (speech_probability.has_value()) { + } + if (speech_probability.has_value()) { RTC_DCHECK_GE(*speech_probability, 0.0f); RTC_DCHECK_LE(*speech_probability, 1.0f); } diff --git a/modules/audio_processing/gain_controller2_unittest.cc b/modules/audio_processing/gain_controller2_unittest.cc index c3d0e5947a..5023bab617 100644 --- a/modules/audio_processing/gain_controller2_unittest.cc +++ b/modules/audio_processing/gain_controller2_unittest.cc @@ -455,74 +455,26 @@ TEST(GainController2, CheckFinalGainWithAdaptiveDigitalController) { EXPECT_NEAR(applied_gain_db, kExpectedGainDb, kToleranceDb); } -// Processes a test audio file and checks that the injected speech probability -// is ignored when the internal VAD is used. -TEST(GainController2, - CheckInjectedVadProbabilityNotUsedWithAdaptiveDigitalController) { +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Checks that `GainController2` crashes in debug mode if it runs its internal +// VAD and the speech probability values are provided by the caller. +TEST(GainController2DeathTest, + DebugCrashIfUseInternalVadAndSpeechProbabilityGiven) { constexpr int kSampleRateHz = AudioProcessing::kSampleRate48kHz; constexpr int kStereo = 2; - - // Create AGC2 enabling only the adaptive digital controller. - Agc2Config config; - config.fixed_digital.gain_db = 0.0f; - config.adaptive_digital.enabled = true; - GainController2 agc2(config, /*input_volume_controller_config=*/{}, - kSampleRateHz, kStereo, - /*use_internal_vad=*/true); - GainController2 agc2_reference(config, /*input_volume_controller_config=*/{}, - kSampleRateHz, kStereo, - /*use_internal_vad=*/true); - - test::InputAudioFile input_file( - test::GetApmCaptureTestVectorFileName(kSampleRateHz), - /*loop_at_end=*/true); - const StreamConfig stream_config(kSampleRateHz, kStereo); - - // Init buffers. - constexpr int kFrameDurationMs = 10; - std::vector frame(kStereo * stream_config.num_frames()); AudioBuffer audio_buffer(kSampleRateHz, kStereo, kSampleRateHz, kStereo, kSampleRateHz, kStereo); - AudioBuffer audio_buffer_reference(kSampleRateHz, kStereo, kSampleRateHz, - kStereo, kSampleRateHz, kStereo); + // Create AGC2 so that the interval VAD is also created. + GainController2 agc2(/*config=*/{.adaptive_digital = {.enabled = true}}, + /*input_volume_controller_config=*/{}, kSampleRateHz, + kStereo, + /*use_internal_vad=*/true); - // Simulate. - constexpr float kGainDb = -6.0f; - const float gain = std::pow(10.0f, kGainDb / 20.0f); - constexpr int kDurationMs = 10000; - constexpr int kNumFramesToProcess = kDurationMs / kFrameDurationMs; - constexpr float kSpeechProbabilities[] = {1.0f, 0.3f}; - constexpr float kEpsilon = 0.0001f; - bool all_samples_zero = true; - for (int i = 0, j = 0; i < kNumFramesToProcess; ++i, j = 1 - j) { - ReadFloatSamplesFromStereoFile(stream_config.num_frames(), - stream_config.num_channels(), &input_file, - frame); - // Apply a fixed gain to the input audio. - for (float& x : frame) { - x *= gain; - } - test::CopyVectorToAudioBuffer(stream_config, frame, &audio_buffer); - agc2.Process(kSpeechProbabilities[j], /*input_volume_changed=*/false, - &audio_buffer); - test::CopyVectorToAudioBuffer(stream_config, frame, - &audio_buffer_reference); - agc2_reference.Process(/*speech_probability=*/absl::nullopt, - /*input_volume_changed=*/false, - &audio_buffer_reference); - - // Check the output buffers. - for (int i = 0; i < kStereo; ++i) { - for (int j = 0; j < static_cast(audio_buffer.num_frames()); ++j) { - all_samples_zero &= - fabs(audio_buffer.channels_const()[i][j]) < kEpsilon; - EXPECT_FLOAT_EQ(audio_buffer.channels_const()[i][j], - audio_buffer_reference.channels_const()[i][j]); - } - } - } - EXPECT_FALSE(all_samples_zero); + EXPECT_DEATH(agc2.Process(/*speech_probability=*/0.123f, + /*input_volume_changed=*/false, &audio_buffer), + ""); } +#endif // Processes a test audio file and checks that the injected speech probability // is not ignored when the internal VAD is not used.