mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-13 05:40:42 +01:00
AGC2: Return the recommended volume from RecommendInputVolume()
In InputVolumeController, rename AnalyzePreProcess() and Process() to reflect their use and replace the use of the getter recommended_input_volume() with an optional return value from the latter one. The added return value carries the recommended input volume if the call sequence follows the API contract. Make the member applied_input_volume_ optional. Restrict the use of the getter recommended_input_volume() for test use. Add a method capture_output_used() for test use. In GainController2, store the output of InputVolumeController::Process() in a new member variable that's updated in Analyze() and Process(). Use a trivial getter to read the value in APM. Bug: webrtc:7494 Change-Id: Ifcfb466c4f558be560eb6d2f45410d04adb7e2ad Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/287862 Reviewed-by: Alessio Bazzica <alessiob@webrtc.org> Commit-Queue: Hanna Silen <silen@webrtc.org> Cr-Commit-Position: refs/heads/main@{#38889}
This commit is contained in:
parent
c0d44d9d63
commit
597a2ba41a
7 changed files with 538 additions and 412 deletions
|
@ -426,9 +426,17 @@ void InputVolumeController::Initialize() {
|
|||
AggregateChannelLevels();
|
||||
clipping_rate_log_ = 0.0f;
|
||||
clipping_rate_log_counter_ = 0;
|
||||
|
||||
applied_input_volume_ = absl::nullopt;
|
||||
}
|
||||
|
||||
void InputVolumeController::AnalyzePreProcess(const AudioBuffer& audio_buffer) {
|
||||
void InputVolumeController::AnalyzeInputAudio(int applied_input_volume,
|
||||
const AudioBuffer& audio_buffer) {
|
||||
RTC_DCHECK_GE(applied_input_volume, 0);
|
||||
RTC_DCHECK_LE(applied_input_volume, 255);
|
||||
|
||||
SetAppliedInputVolume(applied_input_volume);
|
||||
|
||||
RTC_DCHECK_EQ(audio_buffer.num_channels(), channel_controllers_.size());
|
||||
const float* const* audio = audio_buffer.channels_const();
|
||||
size_t samples_per_channel = audio_buffer.num_frames();
|
||||
|
@ -513,13 +521,20 @@ void InputVolumeController::AnalyzePreProcess(const AudioBuffer& audio_buffer) {
|
|||
AggregateChannelLevels();
|
||||
}
|
||||
|
||||
void InputVolumeController::Process(float speech_probability,
|
||||
absl::optional<float> speech_level_dbfs) {
|
||||
absl::optional<int> InputVolumeController::RecommendInputVolume(
|
||||
float speech_probability,
|
||||
absl::optional<float> speech_level_dbfs) {
|
||||
// Only process if applied input volume is set.
|
||||
if (!applied_input_volume_.has_value()) {
|
||||
RTC_LOG(LS_ERROR) << "[AGC2] Applied input volume not set.";
|
||||
return absl::nullopt;
|
||||
}
|
||||
|
||||
AggregateChannelLevels();
|
||||
const int volume_after_clipping_handling = recommended_input_volume_;
|
||||
|
||||
if (!capture_output_used_) {
|
||||
return;
|
||||
return applied_input_volume_;
|
||||
}
|
||||
|
||||
absl::optional<int> rms_error_db;
|
||||
|
@ -540,6 +555,9 @@ void InputVolumeController::Process(float speech_probability,
|
|||
UpdateHistogramOnRecommendedInputVolumeChangeToMatchTarget(
|
||||
recommended_input_volume_);
|
||||
}
|
||||
|
||||
applied_input_volume_ = absl::nullopt;
|
||||
return recommended_input_volume();
|
||||
}
|
||||
|
||||
void InputVolumeController::HandleCaptureOutputUsedChange(
|
||||
|
@ -574,7 +592,7 @@ void InputVolumeController::AggregateChannelLevels() {
|
|||
}
|
||||
|
||||
// Enforce the minimum input volume when a recommendation is made.
|
||||
if (applied_input_volume_ > 0) {
|
||||
if (applied_input_volume_.has_value() && *applied_input_volume_ > 0) {
|
||||
new_recommended_input_volume =
|
||||
std::max(new_recommended_input_volume, min_input_volume_);
|
||||
}
|
||||
|
|
|
@ -81,31 +81,24 @@ class InputVolumeController final {
|
|||
// TODO(webrtc:7494): Integrate initialization into ctor and remove.
|
||||
void Initialize();
|
||||
|
||||
// Sets the applied input volume.
|
||||
void SetAppliedInputVolume(int level);
|
||||
// Analyzes `audio_buffer` before `RecommendInputVolume()` is called so tha
|
||||
// the analysis can be performed before digital processing operations take
|
||||
// place (e.g., echo cancellation). The analysis consists of input clipping
|
||||
// detection and prediction (if enabled).
|
||||
void AnalyzeInputAudio(int applied_input_volume,
|
||||
const AudioBuffer& audio_buffer);
|
||||
|
||||
// TODO(bugs.webrtc.org/7494): Add argument for the applied input volume and
|
||||
// remove `set_stream_analog_level()`.
|
||||
// Analyzes `audio` before `Process()` is called so that the analysis can be
|
||||
// performed before digital processing operations take place (e.g., echo
|
||||
// cancellation). The analysis consists of input clipping detection and
|
||||
// prediction (if enabled). Must be called after `set_stream_analog_level()`.
|
||||
void AnalyzePreProcess(const AudioBuffer& audio_buffer);
|
||||
|
||||
// TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore.
|
||||
// Adjusts the recommended input volume upwards/downwards based on the result
|
||||
// of `AnalyzePreProcess()` and on `speech_level_dbfs` (if specified). Must
|
||||
// be called after `AnalyzePreProcess()`. The value of `speech_probability` is
|
||||
// expected to be in the range [0, 1] and `speech_level_dbfs` in the the range
|
||||
// [-90, 30].
|
||||
void Process(float speech_probability,
|
||||
absl::optional<float> speech_level_dbfs);
|
||||
|
||||
// Returns the recommended input volume. If the input volume contoller is
|
||||
// disabled, returns the input volume set via the latest
|
||||
// `SetAppliedInputVolume()` call. Must be called after `AnalyzePreProcess()`
|
||||
// and `Process()`.
|
||||
int recommended_input_volume() const { return recommended_input_volume_; }
|
||||
// of `AnalyzeInputAudio()` and on `speech_level_dbfs` (if specified). Must
|
||||
// be called after `AnalyzeInputAudio()`. The value of `speech_probability`
|
||||
// is expected to be in the range [0, 1] and `speech_level_dbfs` in the range
|
||||
// [-90, 30] and both should be estimated after echo cancellation and noise
|
||||
// suppression are applied. Returns a non-empty input volume recommendation if
|
||||
// available. If `capture_output_used_` is true, returns the applied input
|
||||
// volume.
|
||||
absl::optional<int> RecommendInputVolume(
|
||||
float speech_probability,
|
||||
absl::optional<float> speech_level_dbfs);
|
||||
|
||||
// Stores whether the capture output will be used or not. Call when the
|
||||
// capture stream output has been flagged to be used/not-used. If unused, the
|
||||
|
@ -122,6 +115,14 @@ class InputVolumeController final {
|
|||
return use_clipping_predictor_step_;
|
||||
}
|
||||
|
||||
// Only use for testing: Use `RecommendInputVolume()` elsewhere.
|
||||
// Returns the value of a member variable, needed for testing
|
||||
// `AnalyzeInputAudio()`.
|
||||
int recommended_input_volume() const { return recommended_input_volume_; }
|
||||
|
||||
// Only use for testing.
|
||||
bool capture_output_used() const { return capture_output_used_; }
|
||||
|
||||
private:
|
||||
friend class InputVolumeControllerTestHelper;
|
||||
|
||||
|
@ -135,6 +136,9 @@ class InputVolumeController final {
|
|||
FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerParametrizedTest,
|
||||
ClippingParametersVerified);
|
||||
|
||||
// Sets the applied input volume and resets the recommended input volume.
|
||||
void SetAppliedInputVolume(int level);
|
||||
|
||||
void AggregateChannelLevels();
|
||||
|
||||
const int num_capture_channels_;
|
||||
|
@ -152,7 +156,7 @@ class InputVolumeController final {
|
|||
int recommended_input_volume_ = 0;
|
||||
// Applied input volume. After `SetAppliedInputVolume()` is called it holds
|
||||
// the current applied volume.
|
||||
int applied_input_volume_ = 0;
|
||||
absl::optional<int> applied_input_volume_;
|
||||
|
||||
bool capture_output_used_;
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2058,7 +2058,7 @@ void AudioProcessingImpl::UpdateRecommendedInputVolumeLocked() {
|
|||
if (submodules_.gain_controller2 &&
|
||||
config_.gain_controller2.input_volume_controller.enabled) {
|
||||
capture_.recommended_input_volume =
|
||||
submodules_.gain_controller2->GetRecommendedInputVolume();
|
||||
submodules_.gain_controller2->recommended_input_volume();
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -153,26 +153,22 @@ void GainController2::SetFixedGainDb(float gain_db) {
|
|||
|
||||
void GainController2::Analyze(int applied_input_volume,
|
||||
const AudioBuffer& audio_buffer) {
|
||||
recommended_input_volume_ = absl::nullopt;
|
||||
|
||||
RTC_DCHECK_GE(applied_input_volume, 0);
|
||||
RTC_DCHECK_LE(applied_input_volume, 255);
|
||||
|
||||
if (input_volume_controller_) {
|
||||
// TODO(bugs.webrtc.org/7494): Pass applied volume to `AnalyzePreProcess()`.
|
||||
input_volume_controller_->SetAppliedInputVolume(applied_input_volume);
|
||||
input_volume_controller_->AnalyzePreProcess(audio_buffer);
|
||||
input_volume_controller_->AnalyzeInputAudio(applied_input_volume,
|
||||
audio_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
absl::optional<int> GainController2::GetRecommendedInputVolume() const {
|
||||
return input_volume_controller_
|
||||
? absl::optional<int>(
|
||||
input_volume_controller_->recommended_input_volume())
|
||||
: absl::nullopt;
|
||||
}
|
||||
|
||||
void GainController2::Process(absl::optional<float> speech_probability,
|
||||
bool input_volume_changed,
|
||||
AudioBuffer* audio) {
|
||||
recommended_input_volume_ = absl::nullopt;
|
||||
|
||||
data_dumper_.DumpRaw("agc2_applied_input_volume_changed",
|
||||
input_volume_changed);
|
||||
if (input_volume_changed) {
|
||||
|
@ -220,13 +216,12 @@ void GainController2::Process(absl::optional<float> speech_probability,
|
|||
RTC_DCHECK(speech_level.has_value());
|
||||
RTC_DCHECK(speech_probability.has_value());
|
||||
if (speech_probability.has_value()) {
|
||||
// TODO(bugs.webrtc.org/7494): Rename `Process()` to `RecommendVolume()`
|
||||
// and let it return the recommended input volume.
|
||||
input_volume_controller_->Process(
|
||||
*speech_probability,
|
||||
speech_level->is_confident
|
||||
? absl::optional<float>(speech_level->rms_dbfs)
|
||||
: absl::nullopt);
|
||||
recommended_input_volume_ =
|
||||
input_volume_controller_->RecommendInputVolume(
|
||||
*speech_probability,
|
||||
speech_level->is_confident
|
||||
? absl::optional<float>(speech_level->rms_dbfs)
|
||||
: absl::nullopt);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -76,9 +76,9 @@ class GainController2 {
|
|||
|
||||
AvailableCpuFeatures GetCpuFeatures() const { return cpu_features_; }
|
||||
|
||||
// Returns the recommended input volume if input volume controller is enabled
|
||||
// and if a volume recommendation is available.
|
||||
absl::optional<int> GetRecommendedInputVolume() const;
|
||||
absl::optional<int> recommended_input_volume() const {
|
||||
return recommended_input_volume_;
|
||||
}
|
||||
|
||||
private:
|
||||
static std::atomic<int> instance_count_;
|
||||
|
@ -96,6 +96,13 @@ class GainController2 {
|
|||
Limiter limiter_;
|
||||
|
||||
int calls_since_last_limiter_log_;
|
||||
|
||||
// TODO(bugs.webrtc.org/7494): Remove intermediate storing at this level once
|
||||
// APM refactoring is completed.
|
||||
// Recommended input volume from `InputVolumecontroller`. Non-empty after
|
||||
// `Process()` if input volume controller is enabled and
|
||||
// `InputVolumeController::Process()` has returned a non-empty value.
|
||||
absl::optional<int> recommended_input_volume_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
|
@ -55,10 +55,9 @@ float RunAgc2WithConstantInput(GainController2& agc2,
|
|||
// Give time to the level estimator to converge.
|
||||
for (int i = 0; i < num_frames + 1; ++i) {
|
||||
SetAudioBufferSamples(input_level, ab);
|
||||
const auto applied_volume = agc2.GetRecommendedInputVolume();
|
||||
agc2.Analyze(i > 0 && applied_volume.has_value() ? *applied_volume
|
||||
: applied_initial_volume,
|
||||
ab);
|
||||
const auto applied_volume = agc2.recommended_input_volume();
|
||||
agc2.Analyze(applied_volume.value_or(applied_initial_volume), ab);
|
||||
|
||||
agc2.Process(/*speech_probability=*/absl::nullopt,
|
||||
/*input_volume_changed=*/false, &ab);
|
||||
}
|
||||
|
@ -179,19 +178,19 @@ TEST(GainController2,
|
|||
config, InputVolumeControllerConfig{}, kSampleRateHz, kNumChannels,
|
||||
/*use_internal_vad=*/true);
|
||||
|
||||
EXPECT_FALSE(gain_controller->GetRecommendedInputVolume().has_value());
|
||||
EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());
|
||||
|
||||
// Run AGC for a signal with no clipping or detected speech.
|
||||
RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames,
|
||||
kSampleRateHz, kNumChannels, kInitialInputVolume);
|
||||
|
||||
EXPECT_FALSE(gain_controller->GetRecommendedInputVolume().has_value());
|
||||
EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());
|
||||
|
||||
// Run AGC for a signal with clipping.
|
||||
RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames,
|
||||
kSampleRateHz, kNumChannels, kInitialInputVolume);
|
||||
|
||||
EXPECT_FALSE(gain_controller->GetRecommendedInputVolume().has_value());
|
||||
EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());
|
||||
}
|
||||
|
||||
TEST(
|
||||
|
@ -211,19 +210,19 @@ TEST(
|
|||
config, kTestInputVolumeControllerConfig, kSampleRateHz, kNumChannels,
|
||||
/*use_internal_vad=*/true);
|
||||
|
||||
EXPECT_FALSE(gain_controller->GetRecommendedInputVolume().has_value());
|
||||
EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());
|
||||
|
||||
// Run AGC for a signal with no clipping or detected speech.
|
||||
RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames,
|
||||
kSampleRateHz, kNumChannels, kInitialInputVolume);
|
||||
|
||||
EXPECT_FALSE(gain_controller->GetRecommendedInputVolume().has_value());
|
||||
EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());
|
||||
|
||||
// Run AGC for a signal with clipping.
|
||||
RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames,
|
||||
kSampleRateHz, kNumChannels, kInitialInputVolume);
|
||||
|
||||
EXPECT_FALSE(gain_controller->GetRecommendedInputVolume().has_value());
|
||||
EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());
|
||||
}
|
||||
|
||||
TEST(GainController2,
|
||||
|
@ -243,19 +242,19 @@ TEST(GainController2,
|
|||
config, InputVolumeControllerConfig{}, kSampleRateHz, kNumChannels,
|
||||
/*use_internal_vad=*/true);
|
||||
|
||||
EXPECT_TRUE(gain_controller->GetRecommendedInputVolume().has_value());
|
||||
EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());
|
||||
|
||||
// Run AGC for a signal with no clipping or detected speech.
|
||||
RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames,
|
||||
kSampleRateHz, kNumChannels, kInitialInputVolume);
|
||||
|
||||
EXPECT_TRUE(gain_controller->GetRecommendedInputVolume().has_value());
|
||||
EXPECT_TRUE(gain_controller->recommended_input_volume().has_value());
|
||||
|
||||
// Run AGC for a signal with clipping.
|
||||
RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames,
|
||||
kSampleRateHz, kNumChannels, kInitialInputVolume);
|
||||
|
||||
EXPECT_TRUE(gain_controller->GetRecommendedInputVolume().has_value());
|
||||
EXPECT_TRUE(gain_controller->recommended_input_volume().has_value());
|
||||
}
|
||||
|
||||
TEST(
|
||||
|
@ -276,19 +275,19 @@ TEST(
|
|||
config, kTestInputVolumeControllerConfig, kSampleRateHz, kNumChannels,
|
||||
/*use_internal_vad=*/true);
|
||||
|
||||
EXPECT_TRUE(gain_controller->GetRecommendedInputVolume().has_value());
|
||||
EXPECT_FALSE(gain_controller->recommended_input_volume().has_value());
|
||||
|
||||
// Run AGC for a signal with no clipping or detected speech.
|
||||
RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames,
|
||||
kSampleRateHz, kNumChannels, kInitialInputVolume);
|
||||
|
||||
EXPECT_TRUE(gain_controller->GetRecommendedInputVolume().has_value());
|
||||
EXPECT_TRUE(gain_controller->recommended_input_volume().has_value());
|
||||
|
||||
// Run AGC for a signal with clipping.
|
||||
RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames,
|
||||
kSampleRateHz, kNumChannels, kInitialInputVolume);
|
||||
|
||||
EXPECT_TRUE(gain_controller->GetRecommendedInputVolume().has_value());
|
||||
EXPECT_TRUE(gain_controller->recommended_input_volume().has_value());
|
||||
}
|
||||
|
||||
// Checks that the default config is applied.
|
||||
|
|
Loading…
Reference in a new issue