AGC2: Return the recommended volume from RecommendInputVolume()

In InputVolumeController, rename AnalyzePreProcess() and Process() to reflect their use and replace the use of the getter recommended_input_volume() with an optional return value from the latter one. The added return value carries the recommended input volume if the call sequence follows the API contract. Make the member applied_input_volume_ optional. Restrict the use of the getter recommended_input_volume() for test use. Add a method capture_output_used() for test use. In GainController2, store the output of InputVolumeController::Process() in a new member variable that's updated in Analyze() and Process(). Use a trivial getter to read the value in APM. Bug: webrtc:7494 Change-Id: Ifcfb466c4f558be560eb6d2f45410d04adb7e2ad Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/287862 Reviewed-by: Alessio Bazzica <alessiob@webrtc.org> Commit-Queue: Hanna Silen <silen@webrtc.org> Cr-Commit-Position: refs/heads/main@{#38889}
2025-05-13 05:40:42 +01:00 · 2022-12-14 12:48:37 +01:00 · 2022-12-14 12:48:37 +01:00 · 597a2ba41a
commit 597a2ba41a
parent c0d44d9d63
7 changed files with 538 additions and 412 deletions
--- a/modules/audio_processing/agc2/input_volume_controller.cc
+++ b/modules/audio_processing/agc2/input_volume_controller.cc
@ -426,9 +426,17 @@ void InputVolumeController::Initialize() {
  AggregateChannelLevels();
  clipping_rate_log_ = 0.0f;
  clipping_rate_log_counter_ = 0;
+
+  applied_input_volume_ = absl::nullopt;
 }

-void InputVolumeController::AnalyzePreProcess(const AudioBuffer& audio_buffer) {
+void InputVolumeController::AnalyzeInputAudio(int applied_input_volume,
+                                              const AudioBuffer& audio_buffer) {
+  RTC_DCHECK_GE(applied_input_volume, 0);
+  RTC_DCHECK_LE(applied_input_volume, 255);
+
+  SetAppliedInputVolume(applied_input_volume);
+
  RTC_DCHECK_EQ(audio_buffer.num_channels(), channel_controllers_.size());
  const float* const* audio = audio_buffer.channels_const();
  size_t samples_per_channel = audio_buffer.num_frames();
@ -513,13 +521,20 @@ void InputVolumeController::AnalyzePreProcess(const AudioBuffer& audio_buffer) {
  AggregateChannelLevels();
 }

-void InputVolumeController::Process(float speech_probability,
-                                    absl::optional<float> speech_level_dbfs) {
+absl::optional<int> InputVolumeController::RecommendInputVolume(
+    float speech_probability,
+    absl::optional<float> speech_level_dbfs) {
+  // Only process if applied input volume is set.
+  if (!applied_input_volume_.has_value()) {
+    RTC_LOG(LS_ERROR) << "[AGC2] Applied input volume not set.";
+    return absl::nullopt;
+  }
+
  AggregateChannelLevels();
  const int volume_after_clipping_handling = recommended_input_volume_;

  if (!capture_output_used_) {
-    return;
+    return applied_input_volume_;
  }

  absl::optional<int> rms_error_db;
@ -540,6 +555,9 @@ void InputVolumeController::Process(float speech_probability,
    UpdateHistogramOnRecommendedInputVolumeChangeToMatchTarget(
        recommended_input_volume_);
  }
+
+  applied_input_volume_ = absl::nullopt;
+  return recommended_input_volume();
 }

 void InputVolumeController::HandleCaptureOutputUsedChange(
@ -574,7 +592,7 @@ void InputVolumeController::AggregateChannelLevels() {
  }

  // Enforce the minimum input volume when a recommendation is made.
-  if (applied_input_volume_ > 0) {
+  if (applied_input_volume_.has_value() && *applied_input_volume_ > 0) {
    new_recommended_input_volume =
        std::max(new_recommended_input_volume, min_input_volume_);
  }
--- a/modules/audio_processing/agc2/input_volume_controller.h
+++ b/modules/audio_processing/agc2/input_volume_controller.h
@ -81,31 +81,24 @@ class InputVolumeController final {
  // TODO(webrtc:7494): Integrate initialization into ctor and remove.
  void Initialize();

-  // Sets the applied input volume.
-  void SetAppliedInputVolume(int level);
+  // Analyzes `audio_buffer` before `RecommendInputVolume()` is called so tha
+  // the analysis can be performed before digital processing operations take
+  // place (e.g., echo cancellation). The analysis consists of input clipping
+  // detection and prediction (if enabled).
+  void AnalyzeInputAudio(int applied_input_volume,
+                         const AudioBuffer& audio_buffer);

-  // TODO(bugs.webrtc.org/7494): Add argument for the applied input volume and
-  // remove `set_stream_analog_level()`.
-  // Analyzes `audio` before `Process()` is called so that the analysis can be
-  // performed before digital processing operations take place (e.g., echo
-  // cancellation). The analysis consists of input clipping detection and
-  // prediction (if enabled). Must be called after `set_stream_analog_level()`.
-  void AnalyzePreProcess(const AudioBuffer& audio_buffer);
-
-  // TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore.
  // Adjusts the recommended input volume upwards/downwards based on the result
-  // of `AnalyzePreProcess()` and on  `speech_level_dbfs` (if specified). Must
-  // be called after `AnalyzePreProcess()`. The value of `speech_probability` is
-  // expected to be in the range [0, 1] and `speech_level_dbfs` in the the range
-  // [-90, 30].
-  void Process(float speech_probability,
-               absl::optional<float> speech_level_dbfs);
-
-  // Returns the recommended input volume. If the input volume contoller is
-  // disabled, returns the input volume set via the latest
-  // `SetAppliedInputVolume()` call. Must be called after `AnalyzePreProcess()`
-  // and `Process()`.
-  int recommended_input_volume() const { return recommended_input_volume_; }
+  // of `AnalyzeInputAudio()` and on `speech_level_dbfs` (if specified). Must
+  // be called after `AnalyzeInputAudio()`.  The value of `speech_probability`
+  // is expected to be in the range [0, 1] and `speech_level_dbfs` in the range
+  // [-90, 30] and both should be estimated after echo cancellation and noise
+  // suppression are applied. Returns a non-empty input volume recommendation if
+  // available. If `capture_output_used_` is true, returns the applied input
+  // volume.
+  absl::optional<int> RecommendInputVolume(
+      float speech_probability,
+      absl::optional<float> speech_level_dbfs);

  // Stores whether the capture output will be used or not. Call when the
  // capture stream output has been flagged to be used/not-used. If unused, the
@ -122,6 +115,14 @@ class InputVolumeController final {
    return use_clipping_predictor_step_;
  }

+  // Only use for testing: Use `RecommendInputVolume()` elsewhere.
+  // Returns the value of a member variable, needed for testing
+  // `AnalyzeInputAudio()`.
+  int recommended_input_volume() const { return recommended_input_volume_; }
+
+  // Only use for testing.
+  bool capture_output_used() const { return capture_output_used_; }
+
 private:
  friend class InputVolumeControllerTestHelper;

@ -135,6 +136,9 @@ class InputVolumeController final {
  FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerParametrizedTest,
                           ClippingParametersVerified);

+  // Sets the applied input volume and resets the recommended input volume.
+  void SetAppliedInputVolume(int level);
+
  void AggregateChannelLevels();

  const int num_capture_channels_;
@ -152,7 +156,7 @@ class InputVolumeController final {
  int recommended_input_volume_ = 0;
  // Applied input volume. After `SetAppliedInputVolume()` is called it holds
  // the current applied volume.
-  int applied_input_volume_ = 0;
+  absl::optional<int> applied_input_volume_;

  bool capture_output_used_;

--- a/modules/audio_processing/agc2/input_volume_controller_unittest.cc
+++ b/modules/audio_processing/agc2/input_volume_controller_unittest.cc
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@ -2058,7 +2058,7 @@ void AudioProcessingImpl::UpdateRecommendedInputVolumeLocked() {
  if (submodules_.gain_controller2 &&
      config_.gain_controller2.input_volume_controller.enabled) {
    capture_.recommended_input_volume =
-        submodules_.gain_controller2->GetRecommendedInputVolume();
+        submodules_.gain_controller2->recommended_input_volume();
    return;
  }

--- a/modules/audio_processing/gain_controller2.cc
+++ b/modules/audio_processing/gain_controller2.cc
@ -153,26 +153,22 @@ void GainController2::SetFixedGainDb(float gain_db) {

 void GainController2::Analyze(int applied_input_volume,
                              const AudioBuffer& audio_buffer) {
+  recommended_input_volume_ = absl::nullopt;
+
  RTC_DCHECK_GE(applied_input_volume, 0);
  RTC_DCHECK_LE(applied_input_volume, 255);

  if (input_volume_controller_) {
-    // TODO(bugs.webrtc.org/7494): Pass applied volume to `AnalyzePreProcess()`.
-    input_volume_controller_->SetAppliedInputVolume(applied_input_volume);
-    input_volume_controller_->AnalyzePreProcess(audio_buffer);
+    input_volume_controller_->AnalyzeInputAudio(applied_input_volume,
+                                                audio_buffer);
  }
 }

-absl::optional<int> GainController2::GetRecommendedInputVolume() const {
-  return input_volume_controller_
-             ? absl::optional<int>(
-                   input_volume_controller_->recommended_input_volume())
-             : absl::nullopt;
-}
-
 void GainController2::Process(absl::optional<float> speech_probability,
                              bool input_volume_changed,
                              AudioBuffer* audio) {
+  recommended_input_volume_ = absl::nullopt;
+
  data_dumper_.DumpRaw("agc2_applied_input_volume_changed",
                       input_volume_changed);
  if (input_volume_changed) {
@ -220,13 +216,12 @@ void GainController2::Process(absl::optional<float> speech_probability,
    RTC_DCHECK(speech_level.has_value());
    RTC_DCHECK(speech_probability.has_value());
    if (speech_probability.has_value()) {
-      // TODO(bugs.webrtc.org/7494): Rename `Process()` to `RecommendVolume()`
-      // and let it return the recommended input volume.
-      input_volume_controller_->Process(
-          *speech_probability,
-          speech_level->is_confident
-              ? absl::optional<float>(speech_level->rms_dbfs)
-              : absl::nullopt);
+      recommended_input_volume_ =
+          input_volume_controller_->RecommendInputVolume(
+              *speech_probability,
+              speech_level->is_confident
+                  ? absl::optional<float>(speech_level->rms_dbfs)
+                  : absl::nullopt);
    }
  }

--- a/modules/audio_processing/gain_controller2.h
+++ b/modules/audio_processing/gain_controller2.h
@ -76,9 +76,9 @@ class GainController2 {

  AvailableCpuFeatures GetCpuFeatures() const { return cpu_features_; }

-  // Returns the recommended input volume if input volume controller is enabled
-  // and if a volume recommendation is available.
-  absl::optional<int> GetRecommendedInputVolume() const;
+  absl::optional<int> recommended_input_volume() const {
+    return recommended_input_volume_;
+  }

 private:
  static std::atomic<int> instance_count_;
@ -96,6 +96,13 @@ class GainController2 {
  Limiter limiter_;

  int calls_since_last_limiter_log_;
+
+  // TODO(bugs.webrtc.org/7494): Remove intermediate storing at this level once
+  // APM refactoring is completed.
+  // Recommended input volume from `InputVolumecontroller`. Non-empty after
+  // `Process()` if input volume controller is enabled and
+  // `InputVolumeController::Process()` has returned a non-empty value.
+  absl::optional<int> recommended_input_volume_;
 };

 }  // namespace webrtc
--- a/modules/audio_processing/gain_controller2_unittest.cc
+++ b/modules/audio_processing/gain_controller2_unittest.cc
@ -55,10 +55,9 @@ float RunAgc2WithConstantInput(GainController2& agc2,
  // Give time to the level estimator to converge.
  for (int i = 0; i < num_frames + 1; ++i) {
    SetAudioBufferSamples(input_level, ab);
-    const auto applied_volume = agc2.GetRecommendedInputVolume();
-    agc2.Analyze(i > 0 && applied_volume.has_value() ? *applied_volume
-                                                     : applied_initial_volume,
-                 ab);
+    const auto applied_volume = agc2.recommended_input_volume();
+    agc2.Analyze(applied_volume.value_or(applied_initial_volume), ab);
+
    agc2.Process(/*speech_probability=*/absl::nullopt,
                 /*input_volume_changed=*/false, &ab);
  }
@ -179,19 +178,19 @@ TEST(GainController2,
      config, InputVolumeControllerConfig{}, kSampleRateHz, kNumChannels,
      /*use_internal_vad=*/true);

-  EXPECT_FALSE(gain_controller->GetRecommendedInputVolume().has_value());
+  EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());

  // Run AGC for a signal with no clipping or detected speech.
  RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames,
                           kSampleRateHz, kNumChannels, kInitialInputVolume);

-  EXPECT_FALSE(gain_controller->GetRecommendedInputVolume().has_value());
+  EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());

  // Run AGC for a signal with clipping.
  RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames,
                           kSampleRateHz, kNumChannels, kInitialInputVolume);

-  EXPECT_FALSE(gain_controller->GetRecommendedInputVolume().has_value());
+  EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());
 }

 TEST(
@ -211,19 +210,19 @@ TEST(
      config, kTestInputVolumeControllerConfig, kSampleRateHz, kNumChannels,
      /*use_internal_vad=*/true);

-  EXPECT_FALSE(gain_controller->GetRecommendedInputVolume().has_value());
+  EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());

  // Run AGC for a signal with no clipping or detected speech.
  RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames,
                           kSampleRateHz, kNumChannels, kInitialInputVolume);

-  EXPECT_FALSE(gain_controller->GetRecommendedInputVolume().has_value());
+  EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());

  // Run AGC for a signal with clipping.
  RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames,
                           kSampleRateHz, kNumChannels, kInitialInputVolume);

-  EXPECT_FALSE(gain_controller->GetRecommendedInputVolume().has_value());
+  EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());
 }

 TEST(GainController2,
@ -243,19 +242,19 @@ TEST(GainController2,
      config, InputVolumeControllerConfig{}, kSampleRateHz, kNumChannels,
      /*use_internal_vad=*/true);

-  EXPECT_TRUE(gain_controller->GetRecommendedInputVolume().has_value());
+  EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());

  // Run AGC for a signal with no clipping or detected speech.
  RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames,
                           kSampleRateHz, kNumChannels, kInitialInputVolume);

-  EXPECT_TRUE(gain_controller->GetRecommendedInputVolume().has_value());
+  EXPECT_TRUE(gain_controller->recommended_input_volume().has_value());

  // Run AGC for a signal with clipping.
  RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames,
                           kSampleRateHz, kNumChannels, kInitialInputVolume);

-  EXPECT_TRUE(gain_controller->GetRecommendedInputVolume().has_value());
+  EXPECT_TRUE(gain_controller->recommended_input_volume().has_value());
 }

 TEST(
@ -276,19 +275,19 @@ TEST(
      config, kTestInputVolumeControllerConfig, kSampleRateHz, kNumChannels,
      /*use_internal_vad=*/true);

-  EXPECT_TRUE(gain_controller->GetRecommendedInputVolume().has_value());
+  EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());

  // Run AGC for a signal with no clipping or detected speech.
  RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames,
                           kSampleRateHz, kNumChannels, kInitialInputVolume);

-  EXPECT_TRUE(gain_controller->GetRecommendedInputVolume().has_value());
+  EXPECT_TRUE(gain_controller->recommended_input_volume().has_value());

  // Run AGC for a signal with clipping.
  RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames,
                           kSampleRateHz, kNumChannels, kInitialInputVolume);

-  EXPECT_TRUE(gain_controller->GetRecommendedInputVolume().has_value());
+  EXPECT_TRUE(gain_controller->recommended_input_volume().has_value());
 }

 // Checks that the default config is applied.