Don't always downsample to 16kHz in the reverse stream in APM

The first approach landed here: https://codereview.webrtc.org/1773173002 But it was partially reverted, because it affected the AEC performance, here: https://codereview.webrtc.org/1867483003/ The main difference of this approach is that it doesn't use the 3-band splitting filter in the reverse stream, which seems to be the culprit of the AEC regression. Also, the 2-band splitting filter has been used for the 32kHz case for a long time without any problem, and this is expanded in the CL to cover the 48kHz case as well. BUG=webrtc:5725 TBR=tina.legrand@webrtc.org Review URL: https://codereview.webrtc.org/1865633005 Cr-Commit-Position: refs/heads/master@{#12451}
2025-05-13 05:40:42 +01:00 · 2016-04-20 15:27:58 -07:00 · 2016-04-20 15:27:58 -07:00 · eb3603bd5e
commit eb3603bd5e
parent 0a2c054f42
5 changed files with 20 additions and 13 deletions
--- a/data/audio_processing/output_data_fixed.pb
+++ b/data/audio_processing/output_data_fixed.pb
--- a/data/audio_processing/output_data_float.pb
+++ b/data/audio_processing/output_data_float.pb
--- a/data/audio_processing/output_data_mac.pb
+++ b/data/audio_processing/output_data_mac.pb
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@ -366,18 +366,20 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
      std::min(formats_.api_format.input_stream().sample_rate_hz(),
               formats_.api_format.output_stream().sample_rate_hz())));
-  // We normally process the reverse stream at 16 kHz. Unless...
+  int rev_proc_rate = ClosestHigherNativeRate(std::min(
-  int rev_proc_rate = kSampleRate16kHz;
+      formats_.api_format.reverse_input_stream().sample_rate_hz(),
      formats_.api_format.reverse_output_stream().sample_rate_hz()));
  // TODO(aluebs): Remove this restriction once we figure out why the 3-band
  // splitting filter degrades the AEC performance.
  if (rev_proc_rate > kSampleRate32kHz) {
    rev_proc_rate = is_rev_processed() ? kSampleRate32kHz : kSampleRate16kHz;
  }
  // If the forward sample rate is 8 kHz, the reverse stream is also processed
  // at this rate.
  if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == kSampleRate8kHz) {
    // ...the forward stream is at 8 kHz.
    rev_proc_rate = kSampleRate8kHz;
  } else {
-    if (formats_.api_format.reverse_input_stream().sample_rate_hz() ==
+    rev_proc_rate = std::max(rev_proc_rate, static_cast<int>(kSampleRate16kHz));
        kSampleRate32kHz) {
      // ...or the input is at 32 kHz, in which case we use the splitting
      // filter rather than the resampler.
      rev_proc_rate = kSampleRate32kHz;
    }
  }
  // Always downmix the reverse stream to mono for analysis. This has been
@ -1151,11 +1153,11 @@ bool AudioProcessingImpl::is_rev_processed() const {
 bool AudioProcessingImpl::rev_synthesis_needed() const {
  return (is_rev_processed() &&
-          formats_.rev_proc_format.sample_rate_hz() == kSampleRate32kHz);
+          is_multi_band(formats_.rev_proc_format.sample_rate_hz()));
 }
 bool AudioProcessingImpl::rev_analysis_needed() const {
-  return formats_.rev_proc_format.sample_rate_hz() == kSampleRate32kHz &&
+  return is_multi_band(formats_.rev_proc_format.sample_rate_hz()) &&
         (is_rev_processed() ||
          public_submodules_->echo_cancellation
              ->is_enabled_render_side_query() ||
--- a/webrtc/modules/audio_processing/audio_processing_unittest.cc
+++ b/webrtc/modules/audio_processing/audio_processing_unittest.cc
@ -54,7 +54,12 @@ bool write_ref_data = false;
 const google::protobuf::int32 kChannels[] = {1, 2};
 const int kSampleRates[] = {8000, 16000, 32000, 48000};
 #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
 // Android doesn't support 48kHz.
 const int kProcessSampleRates[] = {8000, 16000, 32000};
 #elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
 const int kProcessSampleRates[] = {8000, 16000, 32000, 48000};
 #endif
 enum StreamDirection { kForward = 0, kReverse };
@ -2692,7 +2697,7 @@ INSTANTIATE_TEST_CASE_P(
                    std::tr1::make_tuple(16000, 32000, 32000, 32000, 25, 0),
                    std::tr1::make_tuple(16000, 32000, 16000, 32000, 25, 20),
                    std::tr1::make_tuple(16000, 16000, 48000, 16000, 40, 20),
-                    std::tr1::make_tuple(16000, 16000, 32000, 16000, 50, 20),
+                    std::tr1::make_tuple(16000, 16000, 32000, 16000, 40, 20),
                    std::tr1::make_tuple(16000, 16000, 16000, 16000, 0, 0)));
 #elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
@ -2748,7 +2753,7 @@ INSTANTIATE_TEST_CASE_P(
                    std::tr1::make_tuple(16000, 32000, 32000, 32000, 25, 0),
                    std::tr1::make_tuple(16000, 32000, 16000, 32000, 25, 20),
                    std::tr1::make_tuple(16000, 16000, 48000, 16000, 35, 20),
-                    std::tr1::make_tuple(16000, 16000, 32000, 16000, 40, 20),
+                    std::tr1::make_tuple(16000, 16000, 32000, 16000, 35, 20),
                    std::tr1::make_tuple(16000, 16000, 16000, 16000, 0, 0)));
 #endif