AGC2: Return the recommended volume from RecommendInputVolume()

In InputVolumeController, rename AnalyzePreProcess() and Process() to
reflect their use and replace the use of the getter
recommended_input_volume() with an optional return value from the
latter one. The added return value carries the recommended input
volume if the call sequence follows the API contract. Make the member
applied_input_volume_ optional. Restrict the use of the getter
recommended_input_volume() for test use. Add a method
capture_output_used() for test use.

In GainController2, store the output of InputVolumeController::Process()
in a new member variable that's updated in Analyze() and Process(). Use
a trivial getter to read the value in APM.

Bug: webrtc:7494
Change-Id: Ifcfb466c4f558be560eb6d2f45410d04adb7e2ad
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/287862
Reviewed-by: Alessio Bazzica <alessiob@webrtc.org>
Commit-Queue: Hanna Silen <silen@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#38889}
This commit is contained in:
Hanna Silen 2022-12-14 12:48:37 +01:00 committed by WebRTC LUCI CQ
parent c0d44d9d63
commit 597a2ba41a
7 changed files with 538 additions and 412 deletions

View file

@ -426,9 +426,17 @@ void InputVolumeController::Initialize() {
AggregateChannelLevels();
clipping_rate_log_ = 0.0f;
clipping_rate_log_counter_ = 0;
applied_input_volume_ = absl::nullopt;
}
void InputVolumeController::AnalyzePreProcess(const AudioBuffer& audio_buffer) {
void InputVolumeController::AnalyzeInputAudio(int applied_input_volume,
const AudioBuffer& audio_buffer) {
RTC_DCHECK_GE(applied_input_volume, 0);
RTC_DCHECK_LE(applied_input_volume, 255);
SetAppliedInputVolume(applied_input_volume);
RTC_DCHECK_EQ(audio_buffer.num_channels(), channel_controllers_.size());
const float* const* audio = audio_buffer.channels_const();
size_t samples_per_channel = audio_buffer.num_frames();
@ -513,13 +521,20 @@ void InputVolumeController::AnalyzePreProcess(const AudioBuffer& audio_buffer) {
AggregateChannelLevels();
}
void InputVolumeController::Process(float speech_probability,
absl::optional<float> speech_level_dbfs) {
absl::optional<int> InputVolumeController::RecommendInputVolume(
float speech_probability,
absl::optional<float> speech_level_dbfs) {
// Only process if applied input volume is set.
if (!applied_input_volume_.has_value()) {
RTC_LOG(LS_ERROR) << "[AGC2] Applied input volume not set.";
return absl::nullopt;
}
AggregateChannelLevels();
const int volume_after_clipping_handling = recommended_input_volume_;
if (!capture_output_used_) {
return;
return applied_input_volume_;
}
absl::optional<int> rms_error_db;
@ -540,6 +555,9 @@ void InputVolumeController::Process(float speech_probability,
UpdateHistogramOnRecommendedInputVolumeChangeToMatchTarget(
recommended_input_volume_);
}
applied_input_volume_ = absl::nullopt;
return recommended_input_volume();
}
void InputVolumeController::HandleCaptureOutputUsedChange(
@ -574,7 +592,7 @@ void InputVolumeController::AggregateChannelLevels() {
}
// Enforce the minimum input volume when a recommendation is made.
if (applied_input_volume_ > 0) {
if (applied_input_volume_.has_value() && *applied_input_volume_ > 0) {
new_recommended_input_volume =
std::max(new_recommended_input_volume, min_input_volume_);
}

View file

@ -81,31 +81,24 @@ class InputVolumeController final {
// TODO(webrtc:7494): Integrate initialization into ctor and remove.
void Initialize();
// Sets the applied input volume.
void SetAppliedInputVolume(int level);
// Analyzes `audio_buffer` before `RecommendInputVolume()` is called so tha
// the analysis can be performed before digital processing operations take
// place (e.g., echo cancellation). The analysis consists of input clipping
// detection and prediction (if enabled).
void AnalyzeInputAudio(int applied_input_volume,
const AudioBuffer& audio_buffer);
// TODO(bugs.webrtc.org/7494): Add argument for the applied input volume and
// remove `set_stream_analog_level()`.
// Analyzes `audio` before `Process()` is called so that the analysis can be
// performed before digital processing operations take place (e.g., echo
// cancellation). The analysis consists of input clipping detection and
// prediction (if enabled). Must be called after `set_stream_analog_level()`.
void AnalyzePreProcess(const AudioBuffer& audio_buffer);
// TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore.
// Adjusts the recommended input volume upwards/downwards based on the result
// of `AnalyzePreProcess()` and on `speech_level_dbfs` (if specified). Must
// be called after `AnalyzePreProcess()`. The value of `speech_probability` is
// expected to be in the range [0, 1] and `speech_level_dbfs` in the the range
// [-90, 30].
void Process(float speech_probability,
absl::optional<float> speech_level_dbfs);
// Returns the recommended input volume. If the input volume contoller is
// disabled, returns the input volume set via the latest
// `SetAppliedInputVolume()` call. Must be called after `AnalyzePreProcess()`
// and `Process()`.
int recommended_input_volume() const { return recommended_input_volume_; }
// of `AnalyzeInputAudio()` and on `speech_level_dbfs` (if specified). Must
// be called after `AnalyzeInputAudio()`. The value of `speech_probability`
// is expected to be in the range [0, 1] and `speech_level_dbfs` in the range
// [-90, 30] and both should be estimated after echo cancellation and noise
// suppression are applied. Returns a non-empty input volume recommendation if
// available. If `capture_output_used_` is true, returns the applied input
// volume.
absl::optional<int> RecommendInputVolume(
float speech_probability,
absl::optional<float> speech_level_dbfs);
// Stores whether the capture output will be used or not. Call when the
// capture stream output has been flagged to be used/not-used. If unused, the
@ -122,6 +115,14 @@ class InputVolumeController final {
return use_clipping_predictor_step_;
}
// Only use for testing: Use `RecommendInputVolume()` elsewhere.
// Returns the value of a member variable, needed for testing
// `AnalyzeInputAudio()`.
int recommended_input_volume() const { return recommended_input_volume_; }
// Only use for testing.
bool capture_output_used() const { return capture_output_used_; }
private:
friend class InputVolumeControllerTestHelper;
@ -135,6 +136,9 @@ class InputVolumeController final {
FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerParametrizedTest,
ClippingParametersVerified);
// Sets the applied input volume and resets the recommended input volume.
void SetAppliedInputVolume(int level);
void AggregateChannelLevels();
const int num_capture_channels_;
@ -152,7 +156,7 @@ class InputVolumeController final {
int recommended_input_volume_ = 0;
// Applied input volume. After `SetAppliedInputVolume()` is called it holds
// the current applied volume.
int applied_input_volume_ = 0;
absl::optional<int> applied_input_volume_;
bool capture_output_used_;

View file

@ -2058,7 +2058,7 @@ void AudioProcessingImpl::UpdateRecommendedInputVolumeLocked() {
if (submodules_.gain_controller2 &&
config_.gain_controller2.input_volume_controller.enabled) {
capture_.recommended_input_volume =
submodules_.gain_controller2->GetRecommendedInputVolume();
submodules_.gain_controller2->recommended_input_volume();
return;
}

View file

@ -153,26 +153,22 @@ void GainController2::SetFixedGainDb(float gain_db) {
void GainController2::Analyze(int applied_input_volume,
const AudioBuffer& audio_buffer) {
recommended_input_volume_ = absl::nullopt;
RTC_DCHECK_GE(applied_input_volume, 0);
RTC_DCHECK_LE(applied_input_volume, 255);
if (input_volume_controller_) {
// TODO(bugs.webrtc.org/7494): Pass applied volume to `AnalyzePreProcess()`.
input_volume_controller_->SetAppliedInputVolume(applied_input_volume);
input_volume_controller_->AnalyzePreProcess(audio_buffer);
input_volume_controller_->AnalyzeInputAudio(applied_input_volume,
audio_buffer);
}
}
absl::optional<int> GainController2::GetRecommendedInputVolume() const {
return input_volume_controller_
? absl::optional<int>(
input_volume_controller_->recommended_input_volume())
: absl::nullopt;
}
void GainController2::Process(absl::optional<float> speech_probability,
bool input_volume_changed,
AudioBuffer* audio) {
recommended_input_volume_ = absl::nullopt;
data_dumper_.DumpRaw("agc2_applied_input_volume_changed",
input_volume_changed);
if (input_volume_changed) {
@ -220,13 +216,12 @@ void GainController2::Process(absl::optional<float> speech_probability,
RTC_DCHECK(speech_level.has_value());
RTC_DCHECK(speech_probability.has_value());
if (speech_probability.has_value()) {
// TODO(bugs.webrtc.org/7494): Rename `Process()` to `RecommendVolume()`
// and let it return the recommended input volume.
input_volume_controller_->Process(
*speech_probability,
speech_level->is_confident
? absl::optional<float>(speech_level->rms_dbfs)
: absl::nullopt);
recommended_input_volume_ =
input_volume_controller_->RecommendInputVolume(
*speech_probability,
speech_level->is_confident
? absl::optional<float>(speech_level->rms_dbfs)
: absl::nullopt);
}
}

View file

@ -76,9 +76,9 @@ class GainController2 {
AvailableCpuFeatures GetCpuFeatures() const { return cpu_features_; }
// Returns the recommended input volume if input volume controller is enabled
// and if a volume recommendation is available.
absl::optional<int> GetRecommendedInputVolume() const;
absl::optional<int> recommended_input_volume() const {
return recommended_input_volume_;
}
private:
static std::atomic<int> instance_count_;
@ -96,6 +96,13 @@ class GainController2 {
Limiter limiter_;
int calls_since_last_limiter_log_;
// TODO(bugs.webrtc.org/7494): Remove intermediate storing at this level once
// APM refactoring is completed.
// Recommended input volume from `InputVolumecontroller`. Non-empty after
// `Process()` if input volume controller is enabled and
// `InputVolumeController::Process()` has returned a non-empty value.
absl::optional<int> recommended_input_volume_;
};
} // namespace webrtc

View file

@ -55,10 +55,9 @@ float RunAgc2WithConstantInput(GainController2& agc2,
// Give time to the level estimator to converge.
for (int i = 0; i < num_frames + 1; ++i) {
SetAudioBufferSamples(input_level, ab);
const auto applied_volume = agc2.GetRecommendedInputVolume();
agc2.Analyze(i > 0 && applied_volume.has_value() ? *applied_volume
: applied_initial_volume,
ab);
const auto applied_volume = agc2.recommended_input_volume();
agc2.Analyze(applied_volume.value_or(applied_initial_volume), ab);
agc2.Process(/*speech_probability=*/absl::nullopt,
/*input_volume_changed=*/false, &ab);
}
@ -179,19 +178,19 @@ TEST(GainController2,
config, InputVolumeControllerConfig{}, kSampleRateHz, kNumChannels,
/*use_internal_vad=*/true);
EXPECT_FALSE(gain_controller->GetRecommendedInputVolume().has_value());
EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());
// Run AGC for a signal with no clipping or detected speech.
RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames,
kSampleRateHz, kNumChannels, kInitialInputVolume);
EXPECT_FALSE(gain_controller->GetRecommendedInputVolume().has_value());
EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());
// Run AGC for a signal with clipping.
RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames,
kSampleRateHz, kNumChannels, kInitialInputVolume);
EXPECT_FALSE(gain_controller->GetRecommendedInputVolume().has_value());
EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());
}
TEST(
@ -211,19 +210,19 @@ TEST(
config, kTestInputVolumeControllerConfig, kSampleRateHz, kNumChannels,
/*use_internal_vad=*/true);
EXPECT_FALSE(gain_controller->GetRecommendedInputVolume().has_value());
EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());
// Run AGC for a signal with no clipping or detected speech.
RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames,
kSampleRateHz, kNumChannels, kInitialInputVolume);
EXPECT_FALSE(gain_controller->GetRecommendedInputVolume().has_value());
EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());
// Run AGC for a signal with clipping.
RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames,
kSampleRateHz, kNumChannels, kInitialInputVolume);
EXPECT_FALSE(gain_controller->GetRecommendedInputVolume().has_value());
EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());
}
TEST(GainController2,
@ -243,19 +242,19 @@ TEST(GainController2,
config, InputVolumeControllerConfig{}, kSampleRateHz, kNumChannels,
/*use_internal_vad=*/true);
EXPECT_TRUE(gain_controller->GetRecommendedInputVolume().has_value());
EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());
// Run AGC for a signal with no clipping or detected speech.
RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames,
kSampleRateHz, kNumChannels, kInitialInputVolume);
EXPECT_TRUE(gain_controller->GetRecommendedInputVolume().has_value());
EXPECT_TRUE(gain_controller->recommended_input_volume().has_value());
// Run AGC for a signal with clipping.
RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames,
kSampleRateHz, kNumChannels, kInitialInputVolume);
EXPECT_TRUE(gain_controller->GetRecommendedInputVolume().has_value());
EXPECT_TRUE(gain_controller->recommended_input_volume().has_value());
}
TEST(
@ -276,19 +275,19 @@ TEST(
config, kTestInputVolumeControllerConfig, kSampleRateHz, kNumChannels,
/*use_internal_vad=*/true);
EXPECT_TRUE(gain_controller->GetRecommendedInputVolume().has_value());
EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());
// Run AGC for a signal with no clipping or detected speech.
RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames,
kSampleRateHz, kNumChannels, kInitialInputVolume);
EXPECT_TRUE(gain_controller->GetRecommendedInputVolume().has_value());
EXPECT_TRUE(gain_controller->recommended_input_volume().has_value());
// Run AGC for a signal with clipping.
RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames,
kSampleRateHz, kNumChannels, kInitialInputVolume);
EXPECT_TRUE(gain_controller->GetRecommendedInputVolume().has_value());
EXPECT_TRUE(gain_controller->recommended_input_volume().has_value());
}
// Checks that the default config is applied.