From 5825aa673cf5bda080d1570a8c905f598d07d72c Mon Sep 17 00:00:00 2001 From: Alex Loiko Date: Mon, 18 Dec 2017 16:02:40 +0100 Subject: [PATCH] Render-side pre-processing in APM. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL adds a way to insert a custom render-side pre-processor to APM. The pre-processor operates in full-band mode before anything else. Currently the render processing chain is (if everything is enabled): Network --> [Pre processing] --> [Band split] --> [IntelligibilityEnhancer] --> [Echo canceller (read-only)] --> [Band merge] --> Playout Since the render pre processor and capture post processor have the same interface, I renamed webrtc::PostProcessing into webrtc::CustomProcessing. The old APM factory method PostProcessing will be deprecated and dependencies updated as part of webrtc:8665 NOTRY=True Bug: webrtc:8665 Change-Id: Ia381cbf12e336d6587406a14d77243d931f69a31 Reviewed-on: https://webrtc-review.googlesource.com/29201 Commit-Queue: Alex Loiko Reviewed-by: Per Ã…hgren Cr-Commit-Position: refs/heads/master@{#21327} --- .../audio_processing/audio_processing_impl.cc | 73 +++++++++++++++---- .../audio_processing/audio_processing_impl.h | 9 ++- .../audio_processing_unittest.cc | 31 ++++++-- .../include/audio_processing.h | 25 +++++-- .../include/mock_audio_processing.h | 4 +- .../test/audio_processing_simulator.cc | 2 +- .../audio_processing/test/debug_dump_test.cc | 1 + 7 files changed, 112 insertions(+), 33 deletions(-) diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index d61ab97c6e..2fbf369e51 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -172,8 +172,10 @@ webrtc::InternalAPMStreamsConfig ToStreamsConfig( static_assert(AudioProcessing::kNoError == 0, "kNoError must be zero"); AudioProcessingImpl::ApmSubmoduleStates::ApmSubmoduleStates( - bool capture_post_processor_enabled) - : capture_post_processor_enabled_(capture_post_processor_enabled) {} + bool capture_post_processor_enabled, + bool render_pre_processor_enabled) + : capture_post_processor_enabled_(capture_post_processor_enabled), + render_pre_processor_enabled_(render_pre_processor_enabled) {} bool AudioProcessingImpl::ApmSubmoduleStates::Update( bool low_cut_filter_enabled, @@ -264,6 +266,11 @@ bool AudioProcessingImpl::ApmSubmoduleStates::RenderMultiBandSubModulesActive() echo_controller_enabled_; } +bool AudioProcessingImpl::ApmSubmoduleStates::RenderFullBandProcessingActive() + const { + return render_pre_processor_enabled_; +} + bool AudioProcessingImpl::ApmSubmoduleStates::RenderMultiBandProcessingActive() const { #if WEBRTC_INTELLIGIBILITY_ENHANCER @@ -294,9 +301,11 @@ struct AudioProcessingImpl::ApmPublicSubmodules { struct AudioProcessingImpl::ApmPrivateSubmodules { ApmPrivateSubmodules(NonlinearBeamformer* beamformer, - std::unique_ptr capture_post_processor) + std::unique_ptr capture_post_processor, + std::unique_ptr render_pre_processor) : beamformer(beamformer), - capture_post_processor(std::move(capture_post_processor)) {} + capture_post_processor(std::move(capture_post_processor)), + render_pre_processor(std::move(render_pre_processor)) {} // Accessed internally from capture or during initialization std::unique_ptr beamformer; std::unique_ptr agc_manager; @@ -305,31 +314,43 @@ struct AudioProcessingImpl::ApmPrivateSubmodules { std::unique_ptr level_controller; std::unique_ptr residual_echo_detector; std::unique_ptr echo_controller; - std::unique_ptr capture_post_processor; + std::unique_ptr capture_post_processor; + std::unique_ptr render_pre_processor; }; AudioProcessing* AudioProcessing::Create() { webrtc::Config config; - return Create(config, nullptr, nullptr, nullptr); + return Create(config, nullptr, nullptr, nullptr, nullptr); } AudioProcessing* AudioProcessing::Create(const webrtc::Config& config) { - return Create(config, nullptr, nullptr, nullptr); + return Create(config, nullptr, nullptr, nullptr, nullptr); } AudioProcessing* AudioProcessing::Create(const webrtc::Config& config, NonlinearBeamformer* beamformer) { - return Create(config, nullptr, nullptr, beamformer); + return Create(config, nullptr, nullptr, nullptr, beamformer); } AudioProcessing* AudioProcessing::Create( const webrtc::Config& config, - std::unique_ptr capture_post_processor, + std::unique_ptr capture_post_processor, + std::unique_ptr echo_control_factory, + NonlinearBeamformer* beamformer) { + return Create(config, std::move(capture_post_processor), nullptr, + std::move(echo_control_factory), beamformer); +} + +AudioProcessing* AudioProcessing::Create( + const webrtc::Config& config, + std::unique_ptr capture_post_processor, + std::unique_ptr render_pre_processor, std::unique_ptr echo_control_factory, NonlinearBeamformer* beamformer) { AudioProcessingImpl* apm = new rtc::RefCountedObject( config, std::move(capture_post_processor), - std::move(echo_control_factory), beamformer); + std::move(render_pre_processor), std::move(echo_control_factory), + beamformer); if (apm->Initialize() != kNoError) { delete apm; apm = nullptr; @@ -339,20 +360,22 @@ AudioProcessing* AudioProcessing::Create( } AudioProcessingImpl::AudioProcessingImpl(const webrtc::Config& config) - : AudioProcessingImpl(config, nullptr, nullptr, nullptr) {} + : AudioProcessingImpl(config, nullptr, nullptr, nullptr, nullptr) {} AudioProcessingImpl::AudioProcessingImpl( const webrtc::Config& config, - std::unique_ptr capture_post_processor, + std::unique_ptr capture_post_processor, + std::unique_ptr render_pre_processor, std::unique_ptr echo_control_factory, NonlinearBeamformer* beamformer) : high_pass_filter_impl_(new HighPassFilterImpl(this)), echo_control_factory_(std::move(echo_control_factory)), - submodule_states_(!!capture_post_processor), + submodule_states_(!!capture_post_processor, !!render_pre_processor), public_submodules_(new ApmPublicSubmodules()), private_submodules_( new ApmPrivateSubmodules(beamformer, - std::move(capture_post_processor))), + std::move(capture_post_processor), + std::move(render_pre_processor))), constants_(config.Get().startup_min_volume, config.Get().clipped_level_min, #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) @@ -405,6 +428,9 @@ AudioProcessingImpl::AudioProcessingImpl( RTC_LOG(LS_INFO) << "Capture post processor activated: " << !!private_submodules_->capture_post_processor; + + RTC_LOG(LS_INFO) << "Render pre processor activated: " + << !!private_submodules_->render_pre_processor; } SetExtraOptions(config); @@ -560,6 +586,7 @@ int AudioProcessingImpl::InitializeLocked() { InitializeEchoController(); InitializeGainController2(); InitializePostProcessor(); + InitializePreProcessor(); if (aec_dump_) { aec_dump_->WriteInitMessage(ToStreamsConfig(formats_.api_format)); @@ -1345,7 +1372,8 @@ int AudioProcessingImpl::ProcessReverseStream(const float* const* src, TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_StreamConfig"); rtc::CritScope cs(&crit_render_); RETURN_ON_ERR(AnalyzeReverseStreamLocked(src, input_config, output_config)); - if (submodule_states_.RenderMultiBandProcessingActive()) { + if (submodule_states_.RenderMultiBandProcessingActive() || + submodule_states_.RenderFullBandProcessingActive()) { render_.render_audio->CopyTo(formats_.api_format.reverse_output_stream(), dest); } else if (formats_.api_format.reverse_input_stream() != @@ -1434,7 +1462,8 @@ int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) { render_.render_audio->DeinterleaveFrom(frame); RETURN_ON_ERR(ProcessRenderStreamLocked()); render_.render_audio->InterleaveTo( - frame, submodule_states_.RenderMultiBandProcessingActive()); + frame, submodule_states_.RenderMultiBandProcessingActive() || + submodule_states_.RenderFullBandProcessingActive()); return kNoError; } @@ -1443,6 +1472,10 @@ int AudioProcessingImpl::ProcessRenderStreamLocked() { QueueNonbandedRenderAudio(render_buffer); + if (private_submodules_->render_pre_processor) { + private_submodules_->render_pre_processor->Process(render_buffer); + } + if (submodule_states_.RenderMultiBandSubModulesActive() && SampleRateSupportsMultiBand( formats_.render_processing_format.sample_rate_hz())) { @@ -1792,6 +1825,14 @@ void AudioProcessingImpl::InitializePostProcessor() { } } +void AudioProcessingImpl::InitializePreProcessor() { + if (private_submodules_->render_pre_processor) { + private_submodules_->render_pre_processor->Initialize( + formats_.render_processing_format.sample_rate_hz(), + formats_.render_processing_format.num_channels()); + } +} + void AudioProcessingImpl::MaybeUpdateHistograms() { static const int kMinDiffDelayMs = 60; diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h index 021a52037c..c05d23838a 100644 --- a/modules/audio_processing/audio_processing_impl.h +++ b/modules/audio_processing/audio_processing_impl.h @@ -42,7 +42,8 @@ class AudioProcessingImpl : public AudioProcessing { // AudioProcessingImpl takes ownership of capture post processor and // beamformer. AudioProcessingImpl(const webrtc::Config& config, - std::unique_ptr capture_post_processor, + std::unique_ptr capture_post_processor, + std::unique_ptr render_pre_processor, std::unique_ptr echo_control_factory, NonlinearBeamformer* beamformer); ~AudioProcessingImpl() override; @@ -148,7 +149,8 @@ class AudioProcessingImpl : public AudioProcessing { class ApmSubmoduleStates { public: - explicit ApmSubmoduleStates(bool capture_post_processor_enabled); + ApmSubmoduleStates(bool capture_post_processor_enabled, + bool render_pre_processor_enabled); // Updates the submodule state and returns true if it has changed. bool Update(bool low_cut_filter_enabled, bool echo_canceller_enabled, @@ -168,10 +170,12 @@ class AudioProcessingImpl : public AudioProcessing { bool CaptureMultiBandProcessingActive() const; bool CaptureFullBandProcessingActive() const; bool RenderMultiBandSubModulesActive() const; + bool RenderFullBandProcessingActive() const; bool RenderMultiBandProcessingActive() const; private: const bool capture_post_processor_enabled_ = false; + const bool render_pre_processor_enabled_ = false; bool low_cut_filter_enabled_ = false; bool echo_canceller_enabled_ = false; bool mobile_echo_controller_enabled_ = false; @@ -228,6 +232,7 @@ class AudioProcessingImpl : public AudioProcessing { void InitializeEchoController() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); void InitializeGainController2() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); void InitializePostProcessor() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + void InitializePreProcessor() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_); void EmptyQueuedRenderAudio(); void AllocateRenderQueue() diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc index 5984ed72a5..b19a56bd4c 100644 --- a/modules/audio_processing/audio_processing_unittest.cc +++ b/modules/audio_processing/audio_processing_unittest.cc @@ -1318,7 +1318,7 @@ TEST_F(ApmTest, AgcOnlyAdaptsWhenTargetSignalIsPresent) { testing::NiceMock* beamformer = new testing::NiceMock(geometry, 1u); std::unique_ptr apm( - AudioProcessing::Create(config, nullptr, nullptr, beamformer)); + AudioProcessing::Create(config, nullptr, nullptr, nullptr, beamformer)); EXPECT_EQ(kNoErr, apm->gain_control()->Enable(true)); ChannelBuffer src_buf(kSamplesPerChannel, kNumInputChannels); ChannelBuffer dest_buf(kSamplesPerChannel, kNumOutputChannels); @@ -2912,11 +2912,11 @@ TEST(ApmConfiguration, EnablePostProcessing) { // Verify that apm uses a capture post processing module if one is provided. webrtc::Config webrtc_config; auto mock_post_processor_ptr = - new testing::NiceMock(); + new testing::NiceMock(); auto mock_post_processor = - std::unique_ptr(mock_post_processor_ptr); + std::unique_ptr(mock_post_processor_ptr); rtc::scoped_refptr apm = AudioProcessing::Create( - webrtc_config, std::move(mock_post_processor), nullptr, nullptr); + webrtc_config, std::move(mock_post_processor), nullptr, nullptr, nullptr); AudioFrame audio; audio.num_channels_ = 1; @@ -2926,6 +2926,24 @@ TEST(ApmConfiguration, EnablePostProcessing) { apm->ProcessStream(&audio); } +TEST(ApmConfiguration, EnablePreProcessing) { + // Verify that apm uses a capture post processing module if one is provided. + webrtc::Config webrtc_config; + auto mock_pre_processor_ptr = + new testing::NiceMock(); + auto mock_pre_processor = + std::unique_ptr(mock_pre_processor_ptr); + rtc::scoped_refptr apm = AudioProcessing::Create( + webrtc_config, nullptr, std::move(mock_pre_processor), nullptr, nullptr); + + AudioFrame audio; + audio.num_channels_ = 1; + SetFrameSampleRate(&audio, AudioProcessing::NativeRate::kSampleRate16kHz); + + EXPECT_CALL(*mock_pre_processor_ptr, Process(testing::_)).Times(1); + apm->ProcessReverseStream(&audio); +} + class MyEchoControlFactory : public EchoControlFactory { public: std::unique_ptr Create(int sample_rate_hz) { @@ -2943,8 +2961,9 @@ TEST(ApmConfiguration, EchoControlInjection) { std::unique_ptr echo_control_factory( new MyEchoControlFactory()); - rtc::scoped_refptr apm = AudioProcessing::Create( - webrtc_config, nullptr, std::move(echo_control_factory), nullptr); + rtc::scoped_refptr apm = + AudioProcessing::Create(webrtc_config, nullptr, nullptr, + std::move(echo_control_factory), nullptr); AudioFrame audio; audio.num_channels_ = 1; diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h index 3b8d2c42c0..4a3a30baf8 100644 --- a/modules/audio_processing/include/audio_processing.h +++ b/modules/audio_processing/include/audio_processing.h @@ -52,9 +52,12 @@ class GainControl; class HighPassFilter; class LevelEstimator; class NoiseSuppression; -class PostProcessing; +class CustomProcessing; class VoiceDetection; +// webrtc:8665, addedd temporarily to avoid breaking dependencies. +typedef CustomProcessing PostProcessing; + // Use to enable the extended filter mode in the AEC, along with robustness // measures around the reported system delays. It comes with a significant // increase in AEC complexity, but is much more robust to unreliable reported @@ -317,14 +320,24 @@ class AudioProcessing : public rtc::RefCountInterface { static AudioProcessing* Create(); // Allows passing in an optional configuration at create-time. static AudioProcessing* Create(const webrtc::Config& config); - // Deprecated. Use the Create below, with nullptr PostProcessing. + // Deprecated. Use the Create below, with nullptr CustomProcessing. RTC_DEPRECATED static AudioProcessing* Create(const webrtc::Config& config, NonlinearBeamformer* beamformer); + + // Will be deprecated and removed as part of webrtc:8665. Use the + // Create below, with nullptr CustomProcessing. + static AudioProcessing* Create( + const webrtc::Config& config, + std::unique_ptr capture_post_processor, + std::unique_ptr echo_control_factory, + NonlinearBeamformer* beamformer); + // Allows passing in optional user-defined processing modules. static AudioProcessing* Create( const webrtc::Config& config, - std::unique_ptr capture_post_processor, + std::unique_ptr capture_post_processor, + std::unique_ptr render_pre_processor, std::unique_ptr echo_control_factory, NonlinearBeamformer* beamformer); ~AudioProcessing() override {} @@ -1087,8 +1100,8 @@ class NoiseSuppression { virtual ~NoiseSuppression() {} }; -// Interface for a post processing submodule. -class PostProcessing { +// Interface for a custom processing submodule. +class CustomProcessing { public: // (Re-)Initializes the submodule. virtual void Initialize(int sample_rate_hz, int num_channels) = 0; @@ -1097,7 +1110,7 @@ class PostProcessing { // Returns a string representation of the module state. virtual std::string ToString() const = 0; - virtual ~PostProcessing() {} + virtual ~CustomProcessing() {} }; // The voice activity detection (VAD) component analyzes the stream to diff --git a/modules/audio_processing/include/mock_audio_processing.h b/modules/audio_processing/include/mock_audio_processing.h index f2bdc2f241..8a1e463b23 100644 --- a/modules/audio_processing/include/mock_audio_processing.h +++ b/modules/audio_processing/include/mock_audio_processing.h @@ -105,9 +105,9 @@ class MockNoiseSuppression : public NoiseSuppression { MOCK_METHOD0(NoiseEstimate, std::vector()); }; -class MockPostProcessing : public PostProcessing { +class MockCustomProcessing : public CustomProcessing { public: - virtual ~MockPostProcessing() {} + virtual ~MockCustomProcessing() {} MOCK_METHOD2(Initialize, void(int sample_rate_hz, int num_channels)); MOCK_METHOD1(Process, void(AudioBuffer* audio)); MOCK_CONST_METHOD0(ToString, std::string()); diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc index af1024312f..2477a1f7db 100644 --- a/modules/audio_processing/test/audio_processing_simulator.cc +++ b/modules/audio_processing/test/audio_processing_simulator.cc @@ -348,7 +348,7 @@ void AudioProcessingSimulator::CreateAudioProcessor() { apm_config.residual_echo_detector.enabled = *settings_.use_ed; } - ap_.reset(AudioProcessing::Create(config, nullptr, + ap_.reset(AudioProcessing::Create(config, nullptr, nullptr, std::move(echo_control_factory), nullptr)); RTC_CHECK(ap_); diff --git a/modules/audio_processing/test/debug_dump_test.cc b/modules/audio_processing/test/debug_dump_test.cc index 2c839d3bac..b901fd0657 100644 --- a/modules/audio_processing/test/debug_dump_test.cc +++ b/modules/audio_processing/test/debug_dump_test.cc @@ -142,6 +142,7 @@ DebugDumpGenerator::DebugDumpGenerator(const std::string& input_file_name, apm_(AudioProcessing::Create( config, nullptr, + nullptr, (enable_aec3 ? std::unique_ptr( new EchoCanceller3Factory()) : nullptr),