mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-17 23:57:59 +01:00

This CL does *not* change the behavior of the AGC2 adaptive digital controller - bitexactness verified with audioproc_f on a collection of AEC dumps and Wav files (42 recordings in total). Tested: compiled Chrome with this patch and made an appr.tc test call Bug: webrtc:7494 Change-Id: Ia8a9f6fbc3a3459b888a2eed87e108f0d39cfe99 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/233520 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Sam Zackrisson <saza@webrtc.org> Cr-Commit-Position: refs/heads/main@{#35140}
381 lines
15 KiB
C++
381 lines
15 KiB
C++
/*
|
|
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
|
|
|
|
#include <algorithm>
|
|
#include <memory>
|
|
|
|
#include "common_audio/include/audio_util.h"
|
|
#include "modules/audio_processing/agc2/agc2_common.h"
|
|
#include "modules/audio_processing/agc2/vector_float_frame.h"
|
|
#include "modules/audio_processing/include/audio_processing.h"
|
|
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
|
#include "rtc_base/gunit.h"
|
|
|
|
namespace webrtc {
|
|
namespace {
|
|
|
|
constexpr int kMono = 1;
|
|
constexpr int kStereo = 2;
|
|
constexpr int kFrameLen10ms8kHz = 80;
|
|
constexpr int kFrameLen10ms48kHz = 480;
|
|
|
|
constexpr float kMaxSpeechProbability = 1.0f;
|
|
|
|
// Constants used in place of estimated noise levels.
|
|
constexpr float kNoNoiseDbfs = kMinLevelDbfs;
|
|
constexpr float kWithNoiseDbfs = -20.0f;
|
|
|
|
// Number of additional frames to process in the tests to ensure that the tested
|
|
// adaptation processes have converged.
|
|
constexpr int kNumExtraFrames = 10;
|
|
|
|
constexpr float GetMaxGainChangePerFrameDb(
|
|
float max_gain_change_db_per_second) {
|
|
return max_gain_change_db_per_second * kFrameDurationMs / 1000.0f;
|
|
}
|
|
|
|
using AdaptiveDigitalConfig =
|
|
AudioProcessing::Config::GainController2::AdaptiveDigital;
|
|
|
|
constexpr AdaptiveDigitalConfig kDefaultConfig{};
|
|
|
|
// Helper to create initialized `AdaptiveDigitalGainApplier` objects.
|
|
struct GainApplierHelper {
|
|
explicit GainApplierHelper(const AdaptiveDigitalConfig& config)
|
|
: apm_data_dumper(0),
|
|
gain_applier(
|
|
std::make_unique<AdaptiveDigitalGainApplier>(&apm_data_dumper,
|
|
config)) {}
|
|
ApmDataDumper apm_data_dumper;
|
|
std::unique_ptr<AdaptiveDigitalGainApplier> gain_applier;
|
|
};
|
|
|
|
// Returns a `FrameInfo` sample to simulate noiseless speech detected with
|
|
// maximum probability and with level, headroom and limiter envelope chosen
|
|
// so that the resulting gain equals the default initial adaptive digital gain
|
|
// i.e., no gain adaptation is expected.
|
|
AdaptiveDigitalGainApplier::FrameInfo GetFrameInfoToNotAdapt(
|
|
const AdaptiveDigitalConfig& config) {
|
|
AdaptiveDigitalGainApplier::FrameInfo info;
|
|
info.speech_probability = kMaxSpeechProbability;
|
|
info.speech_level_dbfs = -config.initial_gain_db - config.headroom_db;
|
|
info.speech_level_reliable = true;
|
|
info.noise_rms_dbfs = kNoNoiseDbfs;
|
|
info.headroom_db = config.headroom_db;
|
|
info.limiter_envelope_dbfs = -2.0f;
|
|
return info;
|
|
}
|
|
|
|
TEST(GainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
|
|
GainApplierHelper helper(kDefaultConfig);
|
|
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kStereo);
|
|
// Make one call with reasonable audio level values and settings.
|
|
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
|
|
helper.gain_applier->Process(GetFrameInfoToNotAdapt(kDefaultConfig),
|
|
fake_audio.float_frame_view());
|
|
}
|
|
|
|
// Checks that the maximum allowed gain is applied.
|
|
TEST(GainController2AdaptiveGainApplier, MaxGainApplied) {
|
|
constexpr int kNumFramesToAdapt =
|
|
static_cast<int>(kDefaultConfig.max_gain_db /
|
|
GetMaxGainChangePerFrameDb(
|
|
kDefaultConfig.max_gain_change_db_per_second)) +
|
|
kNumExtraFrames;
|
|
|
|
GainApplierHelper helper(kDefaultConfig);
|
|
helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
|
|
AdaptiveDigitalGainApplier::FrameInfo info =
|
|
GetFrameInfoToNotAdapt(kDefaultConfig);
|
|
info.speech_level_dbfs = -60.0f;
|
|
float applied_gain;
|
|
for (int i = 0; i < kNumFramesToAdapt; ++i) {
|
|
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
|
|
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
|
applied_gain = fake_audio.float_frame_view().channel(0)[0];
|
|
}
|
|
const float applied_gain_db = 20.0f * std::log10f(applied_gain);
|
|
EXPECT_NEAR(applied_gain_db, kDefaultConfig.max_gain_db, 0.1f);
|
|
}
|
|
|
|
TEST(GainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
|
|
GainApplierHelper helper(kDefaultConfig);
|
|
helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
|
|
|
|
constexpr float initial_level_dbfs = -25.0f;
|
|
constexpr float kMaxGainChangeDbPerFrame =
|
|
GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second);
|
|
constexpr int kNumFramesToAdapt =
|
|
static_cast<int>(initial_level_dbfs / kMaxGainChangeDbPerFrame) +
|
|
kNumExtraFrames;
|
|
|
|
const float max_change_per_frame_linear = DbToRatio(kMaxGainChangeDbPerFrame);
|
|
|
|
float last_gain_linear = 1.f;
|
|
for (int i = 0; i < kNumFramesToAdapt; ++i) {
|
|
SCOPED_TRACE(i);
|
|
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
|
|
AdaptiveDigitalGainApplier::FrameInfo info =
|
|
GetFrameInfoToNotAdapt(kDefaultConfig);
|
|
info.speech_level_dbfs = initial_level_dbfs;
|
|
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
|
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
|
|
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
|
|
max_change_per_frame_linear);
|
|
last_gain_linear = current_gain_linear;
|
|
}
|
|
|
|
// Check that the same is true when gain decreases as well.
|
|
for (int i = 0; i < kNumFramesToAdapt; ++i) {
|
|
SCOPED_TRACE(i);
|
|
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
|
|
AdaptiveDigitalGainApplier::FrameInfo info =
|
|
GetFrameInfoToNotAdapt(kDefaultConfig);
|
|
info.speech_level_dbfs = 0.f;
|
|
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
|
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
|
|
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
|
|
max_change_per_frame_linear);
|
|
last_gain_linear = current_gain_linear;
|
|
}
|
|
}
|
|
|
|
TEST(GainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
|
|
GainApplierHelper helper(kDefaultConfig);
|
|
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
|
|
|
constexpr float initial_level_dbfs = -25.0f;
|
|
|
|
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
|
|
AdaptiveDigitalGainApplier::FrameInfo info =
|
|
GetFrameInfoToNotAdapt(kDefaultConfig);
|
|
info.speech_level_dbfs = initial_level_dbfs;
|
|
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
|
float maximal_difference = 0.0f;
|
|
float current_value = 1.0f * DbToRatio(kDefaultConfig.initial_gain_db);
|
|
for (const auto& x : fake_audio.float_frame_view().channel(0)) {
|
|
const float difference = std::abs(x - current_value);
|
|
maximal_difference = std::max(maximal_difference, difference);
|
|
current_value = x;
|
|
}
|
|
|
|
const float max_change_per_frame_linear = DbToRatio(
|
|
GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second));
|
|
const float max_change_per_sample =
|
|
max_change_per_frame_linear / kFrameLen10ms48kHz;
|
|
|
|
EXPECT_LE(maximal_difference, max_change_per_sample);
|
|
}
|
|
|
|
TEST(GainController2AdaptiveGainApplier, NoiseLimitsGain) {
|
|
GainApplierHelper helper(kDefaultConfig);
|
|
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
|
|
|
constexpr float initial_level_dbfs = -25.0f;
|
|
constexpr int num_initial_frames =
|
|
kDefaultConfig.initial_gain_db /
|
|
GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second);
|
|
constexpr int num_frames = 50;
|
|
|
|
ASSERT_GT(kWithNoiseDbfs, kDefaultConfig.max_output_noise_level_dbfs)
|
|
<< "kWithNoiseDbfs is too low";
|
|
|
|
for (int i = 0; i < num_initial_frames + num_frames; ++i) {
|
|
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
|
|
AdaptiveDigitalGainApplier::FrameInfo info =
|
|
GetFrameInfoToNotAdapt(kDefaultConfig);
|
|
info.speech_level_dbfs = initial_level_dbfs;
|
|
info.noise_rms_dbfs = kWithNoiseDbfs;
|
|
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
|
|
|
// Wait so that the adaptive gain applier has time to lower the gain.
|
|
if (i > num_initial_frames) {
|
|
const float maximal_ratio =
|
|
*std::max_element(fake_audio.float_frame_view().channel(0).begin(),
|
|
fake_audio.float_frame_view().channel(0).end());
|
|
|
|
EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(GainController2GainApplier, CanHandlePositiveSpeechLevels) {
|
|
GainApplierHelper helper(kDefaultConfig);
|
|
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kStereo);
|
|
|
|
// Make one call with positive audio level values and settings.
|
|
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
|
|
AdaptiveDigitalGainApplier::FrameInfo info =
|
|
GetFrameInfoToNotAdapt(kDefaultConfig);
|
|
info.speech_level_dbfs = 5.0f;
|
|
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
|
}
|
|
|
|
TEST(GainController2GainApplier, AudioLevelLimitsGain) {
|
|
GainApplierHelper helper(kDefaultConfig);
|
|
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
|
|
|
constexpr float initial_level_dbfs = -25.0f;
|
|
constexpr int num_initial_frames =
|
|
kDefaultConfig.initial_gain_db /
|
|
GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second);
|
|
constexpr int num_frames = 50;
|
|
|
|
ASSERT_GT(kWithNoiseDbfs, kDefaultConfig.max_output_noise_level_dbfs)
|
|
<< "kWithNoiseDbfs is too low";
|
|
|
|
for (int i = 0; i < num_initial_frames + num_frames; ++i) {
|
|
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
|
|
AdaptiveDigitalGainApplier::FrameInfo info =
|
|
GetFrameInfoToNotAdapt(kDefaultConfig);
|
|
info.speech_level_dbfs = initial_level_dbfs;
|
|
info.limiter_envelope_dbfs = 1.0f;
|
|
info.speech_level_reliable = false;
|
|
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
|
|
|
// Wait so that the adaptive gain applier has time to lower the gain.
|
|
if (i > num_initial_frames) {
|
|
const float maximal_ratio =
|
|
*std::max_element(fake_audio.float_frame_view().channel(0).begin(),
|
|
fake_audio.float_frame_view().channel(0).end());
|
|
|
|
EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f);
|
|
}
|
|
}
|
|
}
|
|
|
|
class AdaptiveDigitalGainApplierTest : public ::testing::TestWithParam<int> {
|
|
protected:
|
|
int adjacent_speech_frames_threshold() const { return GetParam(); }
|
|
};
|
|
|
|
TEST_P(AdaptiveDigitalGainApplierTest,
|
|
DoNotIncreaseGainWithTooFewSpeechFrames) {
|
|
AdaptiveDigitalConfig config;
|
|
config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold();
|
|
GainApplierHelper helper(config);
|
|
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
|
|
|
// Lower the speech level so that the target gain will be increased.
|
|
AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
|
|
info.speech_level_dbfs -= 12.0f;
|
|
|
|
float prev_gain = 0.0f;
|
|
for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) {
|
|
SCOPED_TRACE(i);
|
|
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
|
|
helper.gain_applier->Process(info, audio.float_frame_view());
|
|
const float gain = audio.float_frame_view().channel(0)[0];
|
|
if (i > 0) {
|
|
EXPECT_EQ(prev_gain, gain); // No gain increase applied.
|
|
}
|
|
prev_gain = gain;
|
|
}
|
|
}
|
|
|
|
TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
|
|
AdaptiveDigitalConfig config;
|
|
config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold();
|
|
GainApplierHelper helper(config);
|
|
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
|
|
|
// Lower the speech level so that the target gain will be increased.
|
|
AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
|
|
info.speech_level_dbfs -= 12.0f;
|
|
|
|
float prev_gain = 0.0f;
|
|
for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) {
|
|
SCOPED_TRACE(i);
|
|
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
|
|
helper.gain_applier->Process(info, audio.float_frame_view());
|
|
prev_gain = audio.float_frame_view().channel(0)[0];
|
|
}
|
|
|
|
// Process one more speech frame.
|
|
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
|
|
helper.gain_applier->Process(info, audio.float_frame_view());
|
|
|
|
// An increased gain has been applied.
|
|
EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain);
|
|
}
|
|
|
|
INSTANTIATE_TEST_SUITE_P(GainController2,
|
|
AdaptiveDigitalGainApplierTest,
|
|
::testing::Values(1, 7, 31));
|
|
|
|
// Checks that the input is never modified when running in dry run mode.
|
|
TEST(GainController2GainApplier, DryRunDoesNotChangeInput) {
|
|
AdaptiveDigitalConfig config;
|
|
config.dry_run = true;
|
|
GainApplierHelper helper(config);
|
|
|
|
// Simulate an input signal with log speech level.
|
|
AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
|
|
info.speech_level_dbfs = -60.0f;
|
|
const int num_frames_to_adapt =
|
|
static_cast<int>(
|
|
config.max_gain_db /
|
|
GetMaxGainChangePerFrameDb(config.max_gain_change_db_per_second)) +
|
|
kNumExtraFrames;
|
|
constexpr float kPcmSamples = 123.456f;
|
|
// Run the gain applier and check that the PCM samples are not modified.
|
|
helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
|
|
for (int i = 0; i < num_frames_to_adapt; ++i) {
|
|
SCOPED_TRACE(i);
|
|
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, kPcmSamples);
|
|
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
|
EXPECT_FLOAT_EQ(fake_audio.float_frame_view().channel(0)[0], kPcmSamples);
|
|
}
|
|
}
|
|
|
|
// Checks that no sample is modified before and after the sample rate changes.
|
|
TEST(GainController2GainApplier, DryRunHandlesSampleRateChange) {
|
|
AdaptiveDigitalConfig config;
|
|
config.dry_run = true;
|
|
GainApplierHelper helper(config);
|
|
|
|
AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
|
|
info.speech_level_dbfs = -60.0f;
|
|
constexpr float kPcmSamples = 123.456f;
|
|
VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
|
|
helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
|
|
helper.gain_applier->Process(info, fake_audio_8k.float_frame_view());
|
|
EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
|
|
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
|
VectorFloatFrame fake_audio_48k(kMono, kFrameLen10ms48kHz, kPcmSamples);
|
|
helper.gain_applier->Process(info, fake_audio_48k.float_frame_view());
|
|
EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
|
|
}
|
|
|
|
// Checks that no sample is modified before and after the number of channels
|
|
// changes.
|
|
TEST(GainController2GainApplier, DryRunHandlesNumChannelsChange) {
|
|
AdaptiveDigitalConfig config;
|
|
config.dry_run = true;
|
|
GainApplierHelper helper(config);
|
|
|
|
AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
|
|
info.speech_level_dbfs = -60.0f;
|
|
constexpr float kPcmSamples = 123.456f;
|
|
VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
|
|
helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
|
|
helper.gain_applier->Process(info, fake_audio_8k.float_frame_view());
|
|
EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
|
|
VectorFloatFrame fake_audio_48k(kStereo, kFrameLen10ms8kHz, kPcmSamples);
|
|
helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kStereo);
|
|
helper.gain_applier->Process(info, fake_audio_48k.float_frame_view());
|
|
EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
|
|
EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(1)[0], kPcmSamples);
|
|
}
|
|
|
|
} // namespace
|
|
} // namespace webrtc
|