mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-13 22:00:47 +01:00

Bug: webrtc:15874 Change-Id: Ie8a6e031c0f0505cfe238f7d252c47e9c34408d4 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/347983 Commit-Queue: Harald Alvestrand <hta@webrtc.org> Auto-Submit: Florent Castelli <orphis@webrtc.org> Reviewed-by: Harald Alvestrand <hta@webrtc.org> Cr-Commit-Position: refs/heads/main@{#42128}
207 lines
9.3 KiB
C++
207 lines
9.3 KiB
C++
/*
|
|
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "modules/audio_processing/agc2/speech_level_estimator.h"
|
|
|
|
#include <memory>
|
|
|
|
#include "api/audio/audio_processing.h"
|
|
#include "modules/audio_processing/agc2/agc2_common.h"
|
|
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
|
#include "rtc_base/gunit.h"
|
|
|
|
namespace webrtc {
|
|
namespace {
|
|
|
|
using AdaptiveDigitalConfig =
|
|
AudioProcessing::Config::GainController2::AdaptiveDigital;
|
|
|
|
// Number of speech frames that the level estimator must observe in order to
|
|
// become confident about the estimated level.
|
|
constexpr int kNumFramesToConfidence =
|
|
kLevelEstimatorTimeToConfidenceMs / kFrameDurationMs;
|
|
static_assert(kNumFramesToConfidence > 0, "");
|
|
|
|
constexpr float kConvergenceSpeedTestsLevelTolerance = 0.5f;
|
|
|
|
// Provides the `vad_level` value `num_iterations` times to `level_estimator`.
|
|
void RunOnConstantLevel(int num_iterations,
|
|
float rms_dbfs,
|
|
float peak_dbfs,
|
|
float speech_probability,
|
|
SpeechLevelEstimator& level_estimator) {
|
|
for (int i = 0; i < num_iterations; ++i) {
|
|
level_estimator.Update(rms_dbfs, peak_dbfs, speech_probability);
|
|
}
|
|
}
|
|
|
|
constexpr float kNoSpeechProbability = 0.0f;
|
|
constexpr float kLowSpeechProbability = kVadConfidenceThreshold / 2.0f;
|
|
constexpr float kMaxSpeechProbability = 1.0f;
|
|
|
|
// Level estimator with data dumper.
|
|
struct TestLevelEstimator {
|
|
explicit TestLevelEstimator(int adjacent_speech_frames_threshold)
|
|
: data_dumper(0),
|
|
estimator(std::make_unique<SpeechLevelEstimator>(
|
|
&data_dumper,
|
|
AdaptiveDigitalConfig{},
|
|
adjacent_speech_frames_threshold)),
|
|
initial_speech_level_dbfs(estimator->level_dbfs()),
|
|
level_rms_dbfs(initial_speech_level_dbfs / 2.0f),
|
|
level_peak_dbfs(initial_speech_level_dbfs / 3.0f) {
|
|
RTC_DCHECK_LT(level_rms_dbfs, level_peak_dbfs);
|
|
RTC_DCHECK_LT(initial_speech_level_dbfs, level_rms_dbfs);
|
|
RTC_DCHECK_GT(level_rms_dbfs - initial_speech_level_dbfs, 5.0f)
|
|
<< "Adjust `level_rms_dbfs` so that the difference from the initial "
|
|
"level is wide enough for the tests";
|
|
}
|
|
ApmDataDumper data_dumper;
|
|
std::unique_ptr<SpeechLevelEstimator> estimator;
|
|
const float initial_speech_level_dbfs;
|
|
const float level_rms_dbfs;
|
|
const float level_peak_dbfs;
|
|
};
|
|
|
|
// Checks that the level estimator converges to a constant input speech level.
|
|
TEST(GainController2SpeechLevelEstimator, LevelStabilizes) {
|
|
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
|
|
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
|
|
level_estimator.level_rms_dbfs,
|
|
level_estimator.level_peak_dbfs, kMaxSpeechProbability,
|
|
*level_estimator.estimator);
|
|
const float estimated_level_dbfs = level_estimator.estimator->level_dbfs();
|
|
RunOnConstantLevel(/*num_iterations=*/1, level_estimator.level_rms_dbfs,
|
|
level_estimator.level_peak_dbfs, kMaxSpeechProbability,
|
|
*level_estimator.estimator);
|
|
EXPECT_NEAR(level_estimator.estimator->level_dbfs(), estimated_level_dbfs,
|
|
0.1f);
|
|
}
|
|
|
|
// Checks that the level controller does not become confident when too few
|
|
// speech frames are observed.
|
|
TEST(GainController2SpeechLevelEstimator, IsNotConfident) {
|
|
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
|
|
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence / 2,
|
|
level_estimator.level_rms_dbfs,
|
|
level_estimator.level_peak_dbfs, kMaxSpeechProbability,
|
|
*level_estimator.estimator);
|
|
EXPECT_FALSE(level_estimator.estimator->is_confident());
|
|
}
|
|
|
|
// Checks that the level controller becomes confident when enough speech frames
|
|
// are observed.
|
|
TEST(GainController2SpeechLevelEstimator, IsConfident) {
|
|
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
|
|
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
|
|
level_estimator.level_rms_dbfs,
|
|
level_estimator.level_peak_dbfs, kMaxSpeechProbability,
|
|
*level_estimator.estimator);
|
|
EXPECT_TRUE(level_estimator.estimator->is_confident());
|
|
}
|
|
|
|
// Checks that the estimated level is not affected by the level of non-speech
|
|
// frames.
|
|
TEST(GainController2SpeechLevelEstimator, EstimatorIgnoresNonSpeechFrames) {
|
|
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
|
|
// Simulate speech.
|
|
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
|
|
level_estimator.level_rms_dbfs,
|
|
level_estimator.level_peak_dbfs, kMaxSpeechProbability,
|
|
*level_estimator.estimator);
|
|
const float estimated_level_dbfs = level_estimator.estimator->level_dbfs();
|
|
// Simulate full-scale non-speech.
|
|
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
|
|
/*rms_dbfs=*/0.0f, /*peak_dbfs=*/0.0f,
|
|
kNoSpeechProbability, *level_estimator.estimator);
|
|
// No estimated level change is expected.
|
|
EXPECT_FLOAT_EQ(level_estimator.estimator->level_dbfs(),
|
|
estimated_level_dbfs);
|
|
}
|
|
|
|
// Checks the convergence speed of the estimator before it becomes confident.
|
|
TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedBeforeConfidence) {
|
|
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
|
|
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
|
|
level_estimator.level_rms_dbfs,
|
|
level_estimator.level_peak_dbfs, kMaxSpeechProbability,
|
|
*level_estimator.estimator);
|
|
EXPECT_NEAR(level_estimator.estimator->level_dbfs(),
|
|
level_estimator.level_rms_dbfs,
|
|
kConvergenceSpeedTestsLevelTolerance);
|
|
}
|
|
|
|
// Checks the convergence speed of the estimator after it becomes confident.
|
|
TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedAfterConfidence) {
|
|
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
|
|
// Reach confidence using the initial level estimate.
|
|
RunOnConstantLevel(
|
|
/*num_iterations=*/kNumFramesToConfidence,
|
|
/*rms_dbfs=*/level_estimator.initial_speech_level_dbfs,
|
|
/*peak_dbfs=*/level_estimator.initial_speech_level_dbfs + 6.0f,
|
|
kMaxSpeechProbability, *level_estimator.estimator);
|
|
// No estimate change should occur, but confidence is achieved.
|
|
ASSERT_FLOAT_EQ(level_estimator.estimator->level_dbfs(),
|
|
level_estimator.initial_speech_level_dbfs);
|
|
ASSERT_TRUE(level_estimator.estimator->is_confident());
|
|
// After confidence.
|
|
constexpr float kConvergenceTimeAfterConfidenceNumFrames = 700; // 7 seconds.
|
|
static_assert(
|
|
kConvergenceTimeAfterConfidenceNumFrames > kNumFramesToConfidence, "");
|
|
RunOnConstantLevel(
|
|
/*num_iterations=*/kConvergenceTimeAfterConfidenceNumFrames,
|
|
level_estimator.level_rms_dbfs, level_estimator.level_peak_dbfs,
|
|
kMaxSpeechProbability, *level_estimator.estimator);
|
|
EXPECT_NEAR(level_estimator.estimator->level_dbfs(),
|
|
level_estimator.level_rms_dbfs,
|
|
kConvergenceSpeedTestsLevelTolerance);
|
|
}
|
|
|
|
class SpeechLevelEstimatorParametrization
|
|
: public ::testing::TestWithParam<int> {
|
|
protected:
|
|
int adjacent_speech_frames_threshold() const { return GetParam(); }
|
|
};
|
|
|
|
TEST_P(SpeechLevelEstimatorParametrization, DoNotAdaptToShortSpeechSegments) {
|
|
TestLevelEstimator level_estimator(adjacent_speech_frames_threshold());
|
|
const float initial_level = level_estimator.estimator->level_dbfs();
|
|
ASSERT_LT(initial_level, level_estimator.level_peak_dbfs);
|
|
for (int i = 0; i < adjacent_speech_frames_threshold() - 1; ++i) {
|
|
SCOPED_TRACE(i);
|
|
level_estimator.estimator->Update(level_estimator.level_rms_dbfs,
|
|
level_estimator.level_peak_dbfs,
|
|
kMaxSpeechProbability);
|
|
EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs());
|
|
}
|
|
level_estimator.estimator->Update(level_estimator.level_rms_dbfs,
|
|
level_estimator.level_peak_dbfs,
|
|
kLowSpeechProbability);
|
|
EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs());
|
|
}
|
|
|
|
TEST_P(SpeechLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) {
|
|
TestLevelEstimator level_estimator(adjacent_speech_frames_threshold());
|
|
const float initial_level = level_estimator.estimator->level_dbfs();
|
|
ASSERT_LT(initial_level, level_estimator.level_peak_dbfs);
|
|
for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) {
|
|
level_estimator.estimator->Update(level_estimator.level_rms_dbfs,
|
|
level_estimator.level_peak_dbfs,
|
|
kMaxSpeechProbability);
|
|
}
|
|
EXPECT_LT(initial_level, level_estimator.estimator->level_dbfs());
|
|
}
|
|
|
|
INSTANTIATE_TEST_SUITE_P(GainController2,
|
|
SpeechLevelEstimatorParametrization,
|
|
::testing::Values(1, 9, 17));
|
|
|
|
} // namespace
|
|
} // namespace webrtc
|