mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-13 05:40:42 +01:00
Removing the intelligibility enhancer.
The intelligibility enhancer is always disabled and it is the only non-test target using the lapped transform in common_audio (which we planned to remove). Bug: webrtc:9689, webrtc:5298 Change-Id: Ida65d3aa11ac366471e7e5cbc053108b376c67d8 Reviewed-on: https://webrtc-review.googlesource.com/96460 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Alex Loiko <aleloi@webrtc.org> Reviewed-by: Fredrik Solenberg <solenberg@webrtc.org> Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org> Reviewed-by: Karl Wiberg <kwiberg@webrtc.org> Reviewed-by: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#24504}
This commit is contained in:
parent
fc173d00ec
commit
cc22f51988
33 changed files with 17 additions and 1723 deletions
|
@ -44,7 +44,6 @@ struct AudioOptions {
|
|||
SetFrom(&extended_filter_aec, change.extended_filter_aec);
|
||||
SetFrom(&delay_agnostic_aec, change.delay_agnostic_aec);
|
||||
SetFrom(&experimental_ns, change.experimental_ns);
|
||||
SetFrom(&intelligibility_enhancer, change.intelligibility_enhancer);
|
||||
SetFrom(&residual_echo_detector, change.residual_echo_detector);
|
||||
SetFrom(&tx_agc_target_dbov, change.tx_agc_target_dbov);
|
||||
SetFrom(&tx_agc_digital_compression_gain,
|
||||
|
@ -74,7 +73,6 @@ struct AudioOptions {
|
|||
extended_filter_aec == o.extended_filter_aec &&
|
||||
delay_agnostic_aec == o.delay_agnostic_aec &&
|
||||
experimental_ns == o.experimental_ns &&
|
||||
intelligibility_enhancer == o.intelligibility_enhancer &&
|
||||
residual_echo_detector == o.residual_echo_detector &&
|
||||
tx_agc_target_dbov == o.tx_agc_target_dbov &&
|
||||
tx_agc_digital_compression_gain ==
|
||||
|
@ -108,7 +106,6 @@ struct AudioOptions {
|
|||
ost << ToStringIfSet("extended_filter_aec", extended_filter_aec);
|
||||
ost << ToStringIfSet("delay_agnostic_aec", delay_agnostic_aec);
|
||||
ost << ToStringIfSet("experimental_ns", experimental_ns);
|
||||
ost << ToStringIfSet("intelligibility_enhancer", intelligibility_enhancer);
|
||||
ost << ToStringIfSet("residual_echo_detector", residual_echo_detector);
|
||||
ost << ToStringIfSet("tx_agc_target_dbov", tx_agc_target_dbov);
|
||||
ost << ToStringIfSet("tx_agc_digital_compression_gain",
|
||||
|
@ -153,7 +150,6 @@ struct AudioOptions {
|
|||
absl::optional<bool> extended_filter_aec;
|
||||
absl::optional<bool> delay_agnostic_aec;
|
||||
absl::optional<bool> experimental_ns;
|
||||
absl::optional<bool> intelligibility_enhancer;
|
||||
// Note that tx_agc_* only applies to non-experimental AGC.
|
||||
absl::optional<bool> residual_echo_detector;
|
||||
absl::optional<uint16_t> tx_agc_target_dbov;
|
||||
|
|
|
@ -104,8 +104,6 @@ const char MediaConstraintsInterface::kNoiseSuppression[] =
|
|||
"googNoiseSuppression";
|
||||
const char MediaConstraintsInterface::kExperimentalNoiseSuppression[] =
|
||||
"googNoiseSuppression2";
|
||||
const char MediaConstraintsInterface::kIntelligibilityEnhancer[] =
|
||||
"intelligibilityEnhancer";
|
||||
const char MediaConstraintsInterface::kHighpassFilter[] = "googHighpassFilter";
|
||||
const char MediaConstraintsInterface::kTypingNoiseDetection[] =
|
||||
"googTypingNoiseDetection";
|
||||
|
@ -241,9 +239,6 @@ void CopyConstraintsIntoAudioOptions(
|
|||
ConstraintToOptional<bool>(
|
||||
constraints, MediaConstraintsInterface::kExperimentalNoiseSuppression,
|
||||
&options->experimental_ns);
|
||||
ConstraintToOptional<bool>(
|
||||
constraints, MediaConstraintsInterface::kIntelligibilityEnhancer,
|
||||
&options->intelligibility_enhancer);
|
||||
ConstraintToOptional<bool>(constraints,
|
||||
MediaConstraintsInterface::kHighpassFilter,
|
||||
&options->highpass_filter);
|
||||
|
|
|
@ -73,7 +73,6 @@ class MediaConstraintsInterface {
|
|||
static const char kExperimentalAutoGainControl[]; // googAutoGainControl2
|
||||
static const char kNoiseSuppression[]; // googNoiseSuppression
|
||||
static const char kExperimentalNoiseSuppression[]; // googNoiseSuppression2
|
||||
static const char kIntelligibilityEnhancer[]; // intelligibilityEnhancer
|
||||
static const char kHighpassFilter[]; // googHighpassFilter
|
||||
static const char kTypingNoiseDetection[]; // googTypingNoiseDetection
|
||||
static const char kAudioMirroring[]; // googAudioMirroring
|
||||
|
|
|
@ -273,12 +273,6 @@ rtc_static_library("rtc_audio_video") {
|
|||
suppressed_configs += [ "//build/config/clang:find_bad_constructs" ]
|
||||
}
|
||||
|
||||
if (rtc_enable_intelligibility_enhancer) {
|
||||
defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ]
|
||||
} else {
|
||||
defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ]
|
||||
}
|
||||
|
||||
if (rtc_opus_support_120ms_ptime) {
|
||||
defines += [ "WEBRTC_OPUS_SUPPORT_120MS_PTIME=1" ]
|
||||
} else {
|
||||
|
|
|
@ -53,14 +53,6 @@ constexpr size_t kMaxUnsignaledRecvStreams = 4;
|
|||
|
||||
constexpr int kNackRtpHistoryMs = 5000;
|
||||
|
||||
// Check to verify that the define for the intelligibility enhancer is properly
|
||||
// set.
|
||||
#if !defined(WEBRTC_INTELLIGIBILITY_ENHANCER) || \
|
||||
(WEBRTC_INTELLIGIBILITY_ENHANCER != 0 && \
|
||||
WEBRTC_INTELLIGIBILITY_ENHANCER != 1)
|
||||
#error "Set WEBRTC_INTELLIGIBILITY_ENHANCER to either 0 or 1"
|
||||
#endif
|
||||
|
||||
// For SendSideBwe, Opus bitrate should be in the range between 6000 and 32000.
|
||||
const int kOpusMinBitrateBps = 6000;
|
||||
const int kOpusBitrateFbBps = 32000;
|
||||
|
@ -296,7 +288,6 @@ void WebRtcVoiceEngine::Init() {
|
|||
options.extended_filter_aec = false;
|
||||
options.delay_agnostic_aec = false;
|
||||
options.experimental_ns = false;
|
||||
options.intelligibility_enhancer = false;
|
||||
options.residual_echo_detector = true;
|
||||
bool error = ApplyOptions(options);
|
||||
RTC_DCHECK(error);
|
||||
|
@ -410,11 +401,6 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) {
|
|||
}
|
||||
#endif
|
||||
|
||||
#if (WEBRTC_INTELLIGIBILITY_ENHANCER == 0)
|
||||
// Hardcode the intelligibility enhancer to be off.
|
||||
options.intelligibility_enhancer = false;
|
||||
#endif
|
||||
|
||||
if (options.echo_cancellation) {
|
||||
// Check if platform supports built-in EC. Currently only supported on
|
||||
// Android and in combination with Java based audio layer.
|
||||
|
@ -479,19 +465,9 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) {
|
|||
webrtc::apm_helpers::SetAgcConfig(apm(), default_agc_config_);
|
||||
}
|
||||
|
||||
if (options.intelligibility_enhancer) {
|
||||
intelligibility_enhancer_ = options.intelligibility_enhancer;
|
||||
}
|
||||
if (intelligibility_enhancer_ && *intelligibility_enhancer_) {
|
||||
RTC_LOG(LS_INFO) << "Enabling NS when Intelligibility Enhancer is active.";
|
||||
options.noise_suppression = intelligibility_enhancer_;
|
||||
}
|
||||
|
||||
if (options.noise_suppression) {
|
||||
if (adm()->BuiltInNSIsAvailable()) {
|
||||
bool builtin_ns =
|
||||
*options.noise_suppression &&
|
||||
!(intelligibility_enhancer_ && *intelligibility_enhancer_);
|
||||
bool builtin_ns = *options.noise_suppression;
|
||||
if (adm()->EnableBuiltInNS(builtin_ns) == 0 && builtin_ns) {
|
||||
// Disable internal software NS if built-in NS is enabled,
|
||||
// i.e., replace the software NS with the built-in NS.
|
||||
|
@ -558,13 +534,6 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) {
|
|||
new webrtc::ExperimentalNs(*experimental_ns_));
|
||||
}
|
||||
|
||||
if (intelligibility_enhancer_) {
|
||||
RTC_LOG(LS_INFO) << "Intelligibility Enhancer is enabled? "
|
||||
<< *intelligibility_enhancer_;
|
||||
config.Set<webrtc::Intelligibility>(
|
||||
new webrtc::Intelligibility(*intelligibility_enhancer_));
|
||||
}
|
||||
|
||||
webrtc::AudioProcessing::Config apm_config = apm()->GetConfig();
|
||||
|
||||
if (options.highpass_filter) {
|
||||
|
|
|
@ -120,15 +120,13 @@ class WebRtcVoiceEngine final {
|
|||
bool initialized_ = false;
|
||||
|
||||
webrtc::AgcConfig default_agc_config_;
|
||||
// Cache received extended_filter_aec, delay_agnostic_aec, experimental_ns
|
||||
// and intelligibility_enhancer values, and apply them
|
||||
// in case they are missing in the audio options. We need to do this because
|
||||
// SetExtraOptions() will revert to defaults for options which are not
|
||||
// provided.
|
||||
// Cache received extended_filter_aec, delay_agnostic_aec and experimental_ns
|
||||
// values, and apply them in case they are missing in the audio options.
|
||||
// We need to do this because SetExtraOptions() will revert to defaults for
|
||||
// options which are not provided.
|
||||
absl::optional<bool> extended_filter_aec_;
|
||||
absl::optional<bool> delay_agnostic_aec_;
|
||||
absl::optional<bool> experimental_ns_;
|
||||
absl::optional<bool> intelligibility_enhancer_;
|
||||
// Jitter buffer settings for new streams.
|
||||
size_t audio_jitter_buffer_max_packets_ = 50;
|
||||
bool audio_jitter_buffer_fast_accelerate_ = false;
|
||||
|
|
|
@ -138,18 +138,6 @@ rtc_static_library("audio_processing") {
|
|||
defines += [ "WEBRTC_UNTRUSTED_DELAY" ]
|
||||
}
|
||||
|
||||
if (rtc_enable_intelligibility_enhancer) {
|
||||
defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ]
|
||||
sources += [
|
||||
"intelligibility/intelligibility_enhancer.cc",
|
||||
"intelligibility/intelligibility_enhancer.h",
|
||||
"intelligibility/intelligibility_utils.cc",
|
||||
"intelligibility/intelligibility_utils.h",
|
||||
]
|
||||
} else {
|
||||
defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ]
|
||||
}
|
||||
|
||||
if (rtc_prefer_fixed_point) {
|
||||
defines += [ "WEBRTC_NS_FIXED" ]
|
||||
} else {
|
||||
|
@ -337,10 +325,6 @@ if (rtc_include_tests) {
|
|||
":transient_suppression_test",
|
||||
]
|
||||
|
||||
if (rtc_enable_intelligibility_enhancer) {
|
||||
deps += [ ":intelligibility_proc" ]
|
||||
}
|
||||
|
||||
if (rtc_enable_protobuf) {
|
||||
deps += [
|
||||
":audioproc_f",
|
||||
|
@ -422,16 +406,6 @@ if (rtc_include_tests) {
|
|||
|
||||
defines = []
|
||||
|
||||
if (rtc_enable_intelligibility_enhancer) {
|
||||
defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ]
|
||||
sources += [
|
||||
"intelligibility/intelligibility_enhancer_unittest.cc",
|
||||
"intelligibility/intelligibility_utils_unittest.cc",
|
||||
]
|
||||
} else {
|
||||
defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ]
|
||||
}
|
||||
|
||||
if (rtc_prefer_fixed_point) {
|
||||
defines += [ "WEBRTC_AUDIOPROC_FIXED_PROFILE" ]
|
||||
} else {
|
||||
|
@ -500,12 +474,6 @@ if (rtc_include_tests) {
|
|||
"../../test:perf_test",
|
||||
"../../test:test_support",
|
||||
]
|
||||
|
||||
if (rtc_enable_intelligibility_enhancer) {
|
||||
defines = [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ]
|
||||
} else {
|
||||
defines = [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ]
|
||||
}
|
||||
}
|
||||
|
||||
rtc_source_set("file_audio_generator_unittests") {
|
||||
|
@ -666,24 +634,6 @@ if (rtc_include_tests) {
|
|||
]
|
||||
}
|
||||
|
||||
if (rtc_enable_intelligibility_enhancer) {
|
||||
rtc_executable("intelligibility_proc") {
|
||||
testonly = true
|
||||
sources = [
|
||||
"intelligibility/test/intelligibility_proc.cc",
|
||||
]
|
||||
deps = [
|
||||
":audio_processing",
|
||||
":audioproc_test_utils",
|
||||
"../../common_audio",
|
||||
"../../rtc_base:rtc_base_approved",
|
||||
"../../system_wrappers:metrics_default",
|
||||
"../../test:test_support",
|
||||
"//testing/gtest",
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
if (rtc_enable_protobuf) {
|
||||
proto_library("audioproc_unittest_proto") {
|
||||
sources = [
|
||||
|
|
|
@ -45,8 +45,6 @@ void CopyFromConfigToEvent(const webrtc::InternalAPMConfig& config,
|
|||
|
||||
pb_cfg->set_transient_suppression_enabled(
|
||||
config.transient_suppression_enabled);
|
||||
pb_cfg->set_intelligibility_enhancer_enabled(
|
||||
config.intelligibility_enhancer_enabled);
|
||||
|
||||
pb_cfg->set_pre_amplifier_enabled(config.pre_amplifier_enabled);
|
||||
pb_cfg->set_pre_amplifier_fixed_gain_factor(
|
||||
|
|
|
@ -38,9 +38,6 @@
|
|||
#include "rtc_base/system/arch.h"
|
||||
#include "rtc_base/timeutils.h"
|
||||
#include "rtc_base/trace_event.h"
|
||||
#if WEBRTC_INTELLIGIBILITY_ENHANCER
|
||||
#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
|
||||
#endif
|
||||
#include "modules/audio_processing/level_estimator_impl.h"
|
||||
#include "modules/audio_processing/low_cut_filter.h"
|
||||
#include "modules/audio_processing/noise_suppression_impl.h"
|
||||
|
@ -50,14 +47,6 @@
|
|||
#include "rtc_base/atomicops.h"
|
||||
#include "system_wrappers/include/metrics.h"
|
||||
|
||||
// Check to verify that the define for the intelligibility enhancer is properly
|
||||
// set.
|
||||
#if !defined(WEBRTC_INTELLIGIBILITY_ENHANCER) || \
|
||||
(WEBRTC_INTELLIGIBILITY_ENHANCER != 0 && \
|
||||
WEBRTC_INTELLIGIBILITY_ENHANCER != 1)
|
||||
#error "Set WEBRTC_INTELLIGIBILITY_ENHANCER to either 0 or 1"
|
||||
#endif
|
||||
|
||||
#define RETURN_ON_ERR(expr) \
|
||||
do { \
|
||||
int err = (expr); \
|
||||
|
@ -170,7 +159,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update(
|
|||
bool mobile_echo_controller_enabled,
|
||||
bool residual_echo_detector_enabled,
|
||||
bool noise_suppressor_enabled,
|
||||
bool intelligibility_enhancer_enabled,
|
||||
bool adaptive_gain_controller_enabled,
|
||||
bool gain_controller2_enabled,
|
||||
bool pre_amplifier_enabled,
|
||||
|
@ -186,8 +174,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update(
|
|||
changed |=
|
||||
(residual_echo_detector_enabled != residual_echo_detector_enabled_);
|
||||
changed |= (noise_suppressor_enabled != noise_suppressor_enabled_);
|
||||
changed |=
|
||||
(intelligibility_enhancer_enabled != intelligibility_enhancer_enabled_);
|
||||
changed |=
|
||||
(adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_);
|
||||
changed |=
|
||||
|
@ -204,7 +190,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update(
|
|||
mobile_echo_controller_enabled_ = mobile_echo_controller_enabled;
|
||||
residual_echo_detector_enabled_ = residual_echo_detector_enabled;
|
||||
noise_suppressor_enabled_ = noise_suppressor_enabled;
|
||||
intelligibility_enhancer_enabled_ = intelligibility_enhancer_enabled;
|
||||
adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled;
|
||||
gain_controller2_enabled_ = gain_controller2_enabled;
|
||||
pre_amplifier_enabled_ = pre_amplifier_enabled;
|
||||
|
@ -221,12 +206,7 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update(
|
|||
|
||||
bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandSubModulesActive()
|
||||
const {
|
||||
#if WEBRTC_INTELLIGIBILITY_ENHANCER
|
||||
return CaptureMultiBandProcessingActive() ||
|
||||
intelligibility_enhancer_enabled_ || voice_activity_detector_enabled_;
|
||||
#else
|
||||
return CaptureMultiBandProcessingActive() || voice_activity_detector_enabled_;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandProcessingActive()
|
||||
|
@ -260,11 +240,7 @@ bool AudioProcessingImpl::ApmSubmoduleStates::RenderFullBandProcessingActive()
|
|||
|
||||
bool AudioProcessingImpl::ApmSubmoduleStates::RenderMultiBandProcessingActive()
|
||||
const {
|
||||
#if WEBRTC_INTELLIGIBILITY_ENHANCER
|
||||
return intelligibility_enhancer_enabled_;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
struct AudioProcessingImpl::ApmPublicSubmodules {
|
||||
|
@ -283,9 +259,6 @@ struct AudioProcessingImpl::ApmPublicSubmodules {
|
|||
|
||||
// Accessed internally from both render and capture.
|
||||
std::unique_ptr<TransientSuppressor> transient_suppressor;
|
||||
#if WEBRTC_INTELLIGIBILITY_ENHANCER
|
||||
std::unique_ptr<IntelligibilityEnhancer> intelligibility_enhancer;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct AudioProcessingImpl::ApmPrivateSubmodules {
|
||||
|
@ -405,7 +378,7 @@ AudioProcessingImpl::AudioProcessingImpl(
|
|||
#else
|
||||
capture_(config.Get<ExperimentalNs>().enabled),
|
||||
#endif
|
||||
capture_nonlocked_(config.Get<Intelligibility>().enabled) {
|
||||
capture_nonlocked_() {
|
||||
{
|
||||
rtc::CritScope cs_render(&crit_render_);
|
||||
rtc::CritScope cs_capture(&crit_capture_);
|
||||
|
@ -589,9 +562,6 @@ int AudioProcessingImpl::InitializeLocked() {
|
|||
public_submodules_->gain_control_for_experimental_agc->Initialize();
|
||||
}
|
||||
InitializeTransient();
|
||||
#if WEBRTC_INTELLIGIBILITY_ENHANCER
|
||||
InitializeIntelligibility();
|
||||
#endif
|
||||
InitializeLowCutFilter();
|
||||
public_submodules_->noise_suppression->Initialize(num_proc_channels(),
|
||||
proc_sample_rate_hz());
|
||||
|
@ -742,15 +712,6 @@ void AudioProcessingImpl::SetExtraOptions(const webrtc::Config& config) {
|
|||
config.Get<ExperimentalNs>().enabled;
|
||||
InitializeTransient();
|
||||
}
|
||||
|
||||
#if WEBRTC_INTELLIGIBILITY_ENHANCER
|
||||
if (capture_nonlocked_.intelligibility_enabled !=
|
||||
config.Get<Intelligibility>().enabled) {
|
||||
capture_nonlocked_.intelligibility_enabled =
|
||||
config.Get<Intelligibility>().enabled;
|
||||
InitializeIntelligibility();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::proc_sample_rate_hz() const {
|
||||
|
@ -1306,18 +1267,6 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
|
|||
capture_buffer->CopyLowPassToReference();
|
||||
}
|
||||
public_submodules_->noise_suppression->ProcessCaptureAudio(capture_buffer);
|
||||
#if WEBRTC_INTELLIGIBILITY_ENHANCER
|
||||
if (capture_nonlocked_.intelligibility_enabled) {
|
||||
RTC_DCHECK(public_submodules_->noise_suppression->is_enabled());
|
||||
const int gain_db =
|
||||
public_submodules_->gain_control->is_enabled()
|
||||
? public_submodules_->gain_control->compression_gain_db()
|
||||
: 0;
|
||||
const float gain = DbToRatio(gain_db);
|
||||
public_submodules_->intelligibility_enhancer->SetCaptureNoiseEstimate(
|
||||
public_submodules_->noise_suppression->NoiseEstimate(), gain);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Ensure that the stream delay was set before the call to the
|
||||
// AECM ProcessCaptureAudio function.
|
||||
|
@ -1540,13 +1489,6 @@ int AudioProcessingImpl::ProcessRenderStreamLocked() {
|
|||
render_buffer->SplitIntoFrequencyBands();
|
||||
}
|
||||
|
||||
#if WEBRTC_INTELLIGIBILITY_ENHANCER
|
||||
if (capture_nonlocked_.intelligibility_enabled) {
|
||||
public_submodules_->intelligibility_enhancer->ProcessRenderAudio(
|
||||
render_buffer);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (submodule_states_.RenderMultiBandSubModulesActive()) {
|
||||
QueueBandedRenderAudio(render_buffer);
|
||||
}
|
||||
|
@ -1809,7 +1751,6 @@ bool AudioProcessingImpl::UpdateActiveSubmoduleStates() {
|
|||
public_submodules_->echo_control_mobile->is_enabled(),
|
||||
config_.residual_echo_detector.enabled,
|
||||
public_submodules_->noise_suppression->is_enabled(),
|
||||
capture_nonlocked_.intelligibility_enabled,
|
||||
public_submodules_->gain_control->is_enabled(),
|
||||
config_.gain_controller2.enabled, config_.pre_amplifier.enabled,
|
||||
capture_nonlocked_.echo_controller_enabled,
|
||||
|
@ -1830,18 +1771,6 @@ void AudioProcessingImpl::InitializeTransient() {
|
|||
}
|
||||
}
|
||||
|
||||
void AudioProcessingImpl::InitializeIntelligibility() {
|
||||
#if WEBRTC_INTELLIGIBILITY_ENHANCER
|
||||
if (capture_nonlocked_.intelligibility_enabled) {
|
||||
public_submodules_->intelligibility_enhancer.reset(
|
||||
new IntelligibilityEnhancer(capture_nonlocked_.split_rate,
|
||||
render_.render_audio->num_channels(),
|
||||
render_.render_audio->num_bands(),
|
||||
NoiseSuppressionImpl::num_noise_bins()));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void AudioProcessingImpl::InitializeLowCutFilter() {
|
||||
if (config_.high_pass_filter.enabled) {
|
||||
private_submodules_->low_cut_filter.reset(
|
||||
|
@ -2029,8 +1958,6 @@ void AudioProcessingImpl::WriteAecDumpConfigMessage(bool forced) {
|
|||
|
||||
apm_config.transient_suppression_enabled =
|
||||
capture_.transient_suppressor_enabled;
|
||||
apm_config.intelligibility_enhancer_enabled =
|
||||
capture_nonlocked_.intelligibility_enabled;
|
||||
apm_config.experiments_description = experiments_description;
|
||||
apm_config.pre_amplifier_enabled = config_.pre_amplifier.enabled;
|
||||
apm_config.pre_amplifier_fixed_gain_factor =
|
||||
|
|
|
@ -183,7 +183,6 @@ class AudioProcessingImpl : public AudioProcessing {
|
|||
bool mobile_echo_controller_enabled,
|
||||
bool residual_echo_detector_enabled,
|
||||
bool noise_suppressor_enabled,
|
||||
bool intelligibility_enhancer_enabled,
|
||||
bool adaptive_gain_controller_enabled,
|
||||
bool gain_controller2_enabled,
|
||||
bool pre_amplifier_enabled,
|
||||
|
@ -208,7 +207,6 @@ class AudioProcessingImpl : public AudioProcessing {
|
|||
bool mobile_echo_controller_enabled_ = false;
|
||||
bool residual_echo_detector_enabled_ = false;
|
||||
bool noise_suppressor_enabled_ = false;
|
||||
bool intelligibility_enhancer_enabled_ = false;
|
||||
bool adaptive_gain_controller_enabled_ = false;
|
||||
bool gain_controller2_enabled_ = false;
|
||||
bool pre_amplifier_enabled_ = false;
|
||||
|
@ -245,8 +243,6 @@ class AudioProcessingImpl : public AudioProcessing {
|
|||
// acquired.
|
||||
void InitializeTransient()
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
|
||||
void InitializeIntelligibility()
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
|
||||
int InitializeLocked(const ProcessingConfig& config)
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
|
||||
void InitializeResidualEchoDetector()
|
||||
|
@ -399,18 +395,16 @@ class AudioProcessingImpl : public AudioProcessing {
|
|||
} capture_ RTC_GUARDED_BY(crit_capture_);
|
||||
|
||||
struct ApmCaptureNonLockedState {
|
||||
ApmCaptureNonLockedState(bool intelligibility_enabled)
|
||||
ApmCaptureNonLockedState()
|
||||
: capture_processing_format(kSampleRate16kHz),
|
||||
split_rate(kSampleRate16kHz),
|
||||
stream_delay_ms(0),
|
||||
intelligibility_enabled(intelligibility_enabled) {}
|
||||
stream_delay_ms(0) {}
|
||||
// Only the rate and samples fields of capture_processing_format_ are used
|
||||
// because the forward processing number of channels is mutable and is
|
||||
// tracked by the capture_audio_.
|
||||
StreamConfig capture_processing_format;
|
||||
int split_rate;
|
||||
int stream_delay_ms;
|
||||
bool intelligibility_enabled;
|
||||
bool echo_controller_enabled = false;
|
||||
} capture_nonlocked_;
|
||||
|
||||
|
|
|
@ -26,14 +26,6 @@
|
|||
#include "test/gtest.h"
|
||||
#include "test/testsupport/perf_test.h"
|
||||
|
||||
// Check to verify that the define for the intelligibility enhancer is properly
|
||||
// set.
|
||||
#if !defined(WEBRTC_INTELLIGIBILITY_ENHANCER) || \
|
||||
(WEBRTC_INTELLIGIBILITY_ENHANCER != 0 && \
|
||||
WEBRTC_INTELLIGIBILITY_ENHANCER != 1)
|
||||
#error "Set WEBRTC_INTELLIGIBILITY_ENHANCER to either 0 or 1"
|
||||
#endif
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
@ -49,7 +41,6 @@ enum class ProcessorType { kRender, kCapture };
|
|||
enum class SettingsType {
|
||||
kDefaultApmDesktop,
|
||||
kDefaultApmMobile,
|
||||
kDefaultApmDesktopAndIntelligibilityEnhancer,
|
||||
kAllSubmodulesTurnedOff,
|
||||
kDefaultApmDesktopWithoutDelayAgnostic,
|
||||
kDefaultApmDesktopWithoutExtendedFilter
|
||||
|
@ -99,20 +90,6 @@ struct SimulationConfig {
|
|||
simulation_configs.push_back(SimulationConfig(sample_rate, settings));
|
||||
}
|
||||
}
|
||||
|
||||
#if WEBRTC_INTELLIGIBILITY_ENHANCER == 1
|
||||
const SettingsType intelligibility_enhancer_settings[] = {
|
||||
SettingsType::kDefaultApmDesktopAndIntelligibilityEnhancer};
|
||||
|
||||
const int intelligibility_enhancer_sample_rates[] = {8000, 16000, 32000,
|
||||
48000};
|
||||
|
||||
for (auto sample_rate : intelligibility_enhancer_sample_rates) {
|
||||
for (auto settings : intelligibility_enhancer_settings) {
|
||||
simulation_configs.push_back(SimulationConfig(sample_rate, settings));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
const SettingsType mobile_settings[] = {SettingsType::kDefaultApmMobile};
|
||||
|
@ -137,9 +114,6 @@ struct SimulationConfig {
|
|||
case SettingsType::kDefaultApmDesktop:
|
||||
description = "DefaultApmDesktop";
|
||||
break;
|
||||
case SettingsType::kDefaultApmDesktopAndIntelligibilityEnhancer:
|
||||
description = "DefaultApmDesktopAndIntelligibilityEnhancer";
|
||||
break;
|
||||
case SettingsType::kAllSubmodulesTurnedOff:
|
||||
description = "AllSubmodulesOff";
|
||||
break;
|
||||
|
@ -538,16 +512,6 @@ class CallSimulator : public ::testing::TestWithParam<SimulationConfig> {
|
|||
apm_->SetExtraOptions(config);
|
||||
break;
|
||||
}
|
||||
case SettingsType::kDefaultApmDesktopAndIntelligibilityEnhancer: {
|
||||
Config config;
|
||||
config.Set<Intelligibility>(new Intelligibility(true));
|
||||
add_default_desktop_config(&config);
|
||||
apm_.reset(AudioProcessingBuilder().Create(config));
|
||||
ASSERT_TRUE(!!apm_);
|
||||
set_default_desktop_apm_runtime_settings(apm_.get());
|
||||
apm_->SetExtraOptions(config);
|
||||
break;
|
||||
}
|
||||
case SettingsType::kAllSubmodulesTurnedOff: {
|
||||
apm_.reset(AudioProcessingBuilder().Create());
|
||||
ASSERT_TRUE(!!apm_);
|
||||
|
|
|
@ -47,7 +47,6 @@ message Stream {
|
|||
// Contains the configurations of various APM component. A Config message is
|
||||
// added when any of the fields are changed.
|
||||
message Config {
|
||||
// Next field number 19.
|
||||
// Acoustic echo canceler.
|
||||
optional bool aec_enabled = 1;
|
||||
optional bool aec_delay_agnostic_enabled = 2;
|
||||
|
@ -73,11 +72,12 @@ message Config {
|
|||
// Semicolon-separated string containing experimental feature
|
||||
// descriptions.
|
||||
optional string experiments_description = 17;
|
||||
// Intelligibility Enhancer.
|
||||
optional bool intelligibility_enhancer_enabled = 18;
|
||||
reserved 18; // Intelligibility enhancer enabled (deprecated).
|
||||
// Pre amplifier.
|
||||
optional bool pre_amplifier_enabled = 19;
|
||||
optional float pre_amplifier_fixed_gain_factor = 20;
|
||||
|
||||
// Next field number 21.
|
||||
}
|
||||
|
||||
message Event {
|
||||
|
|
|
@ -32,8 +32,6 @@ bool InternalAPMConfig::operator==(const InternalAPMConfig& other) {
|
|||
hpf_enabled == other.hpf_enabled && ns_enabled == other.ns_enabled &&
|
||||
ns_level == other.ns_level &&
|
||||
transient_suppression_enabled == other.transient_suppression_enabled &&
|
||||
intelligibility_enhancer_enabled ==
|
||||
other.intelligibility_enhancer_enabled &&
|
||||
noise_robust_agc_enabled == other.noise_robust_agc_enabled &&
|
||||
pre_amplifier_enabled == other.pre_amplifier_enabled &&
|
||||
pre_amplifier_fixed_gain_factor ==
|
||||
|
|
|
@ -49,7 +49,6 @@ struct InternalAPMConfig {
|
|||
bool ns_enabled = false;
|
||||
int ns_level = 0;
|
||||
bool transient_suppression_enabled = false;
|
||||
bool intelligibility_enhancer_enabled = false;
|
||||
bool noise_robust_agc_enabled = false;
|
||||
bool pre_amplifier_enabled = false;
|
||||
float pre_amplifier_fixed_gain_factor = 1.f;
|
||||
|
|
|
@ -151,17 +151,6 @@ struct ExperimentalNs {
|
|||
bool enabled;
|
||||
};
|
||||
|
||||
// Use to enable intelligibility enhancer in audio processing.
|
||||
//
|
||||
// Note: If enabled and the reverse stream has more than one output channel,
|
||||
// the reverse stream will become an upmixed mono signal.
|
||||
struct Intelligibility {
|
||||
Intelligibility() : enabled(false) {}
|
||||
explicit Intelligibility(bool enabled) : enabled(enabled) {}
|
||||
static const ConfigOptionID identifier = ConfigOptionID::kIntelligibility;
|
||||
bool enabled;
|
||||
};
|
||||
|
||||
// The Audio Processing Module (APM) provides a collection of voice processing
|
||||
// components designed for real-time communications software.
|
||||
//
|
||||
|
|
|
@ -31,7 +31,7 @@ enum class ConfigOptionID {
|
|||
kExperimentalAgc,
|
||||
kExperimentalNs,
|
||||
kBeamforming, // Deprecated
|
||||
kIntelligibility,
|
||||
kIntelligibility, // Deprecated
|
||||
kEchoCanceller3, // Deprecated
|
||||
kAecRefinedAdaptiveFilter,
|
||||
kLevelControl // Deprecated
|
||||
|
|
|
@ -1,390 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
|
||||
#include "common_audio/include/audio_util.h"
|
||||
#include "common_audio/window_generator.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
const size_t kErbResolution = 2;
|
||||
const int kWindowSizeMs = 16;
|
||||
const int kChunkSizeMs = 10; // Size provided by APM.
|
||||
const float kClipFreqKhz = 0.2f;
|
||||
const float kKbdAlpha = 1.5f;
|
||||
const float kLambdaBot = -1.f; // Extreme values in bisection
|
||||
const float kLambdaTop = -1e-5f; // search for lamda.
|
||||
const float kVoiceProbabilityThreshold = 0.5f;
|
||||
// Number of chunks after voice activity which is still considered speech.
|
||||
const size_t kSpeechOffsetDelay = 10;
|
||||
const float kDecayRate = 0.995f; // Power estimation decay rate.
|
||||
const float kMaxRelativeGainChange = 0.005f;
|
||||
const float kRho = 0.0004f; // Default production and interpretation SNR.
|
||||
const float kPowerNormalizationFactor = 1.f / (1 << 30);
|
||||
const float kMaxActiveSNR = 128.f; // 21dB
|
||||
const float kMinInactiveSNR = 32.f; // 15dB
|
||||
const size_t kGainUpdatePeriod = 10u;
|
||||
|
||||
// Returns dot product of vectors |a| and |b| with size |length|.
|
||||
float DotProduct(const float* a, const float* b, size_t length) {
|
||||
float ret = 0.f;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
ret += a[i] * b[i];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Computes the power across ERB bands from the power spectral density |pow|.
|
||||
// Stores it in |result|.
|
||||
void MapToErbBands(const float* pow,
|
||||
const std::vector<std::vector<float>>& filter_bank,
|
||||
float* result) {
|
||||
for (size_t i = 0; i < filter_bank.size(); ++i) {
|
||||
RTC_DCHECK_GT(filter_bank[i].size(), 0);
|
||||
result[i] = kPowerNormalizationFactor *
|
||||
DotProduct(filter_bank[i].data(), pow, filter_bank[i].size());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_bands,
|
||||
size_t num_noise_bins)
|
||||
: freqs_(RealFourier::ComplexLength(
|
||||
RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),
|
||||
num_noise_bins_(num_noise_bins),
|
||||
chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)),
|
||||
bank_size_(GetBankSize(sample_rate_hz, kErbResolution)),
|
||||
sample_rate_hz_(sample_rate_hz),
|
||||
num_render_channels_(num_render_channels),
|
||||
clear_power_estimator_(freqs_, kDecayRate),
|
||||
noise_power_estimator_(num_noise_bins, kDecayRate),
|
||||
filtered_clear_pow_(bank_size_, 0.f),
|
||||
filtered_noise_pow_(num_noise_bins, 0.f),
|
||||
center_freqs_(bank_size_),
|
||||
capture_filter_bank_(CreateErbBank(num_noise_bins)),
|
||||
render_filter_bank_(CreateErbBank(freqs_)),
|
||||
gains_eq_(bank_size_),
|
||||
gain_applier_(freqs_, kMaxRelativeGainChange),
|
||||
audio_s16_(chunk_length_),
|
||||
chunks_since_voice_(kSpeechOffsetDelay),
|
||||
is_speech_(false),
|
||||
snr_(kMaxActiveSNR),
|
||||
is_active_(false),
|
||||
num_chunks_(0u),
|
||||
num_active_chunks_(0u),
|
||||
noise_estimation_buffer_(num_noise_bins),
|
||||
noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,
|
||||
std::vector<float>(num_noise_bins),
|
||||
RenderQueueItemVerifier<float>(num_noise_bins)) {
|
||||
RTC_DCHECK_LE(kRho, 1.f);
|
||||
|
||||
const size_t erb_index = static_cast<size_t>(
|
||||
ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +
|
||||
43.f));
|
||||
start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);
|
||||
|
||||
size_t window_size = static_cast<size_t>(1) << RealFourier::FftOrder(freqs_);
|
||||
std::vector<float> kbd_window(window_size);
|
||||
WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size,
|
||||
kbd_window.data());
|
||||
render_mangler_.reset(new LappedTransform(
|
||||
num_render_channels_, num_render_channels_, chunk_length_,
|
||||
kbd_window.data(), window_size, window_size / 2, this));
|
||||
|
||||
const size_t initial_delay = render_mangler_->initial_delay();
|
||||
for (size_t i = 0u; i < num_bands - 1; ++i) {
|
||||
high_bands_buffers_.push_back(std::unique_ptr<intelligibility::DelayBuffer>(
|
||||
new intelligibility::DelayBuffer(initial_delay, num_render_channels_)));
|
||||
}
|
||||
}
|
||||
|
||||
IntelligibilityEnhancer::~IntelligibilityEnhancer() {
|
||||
// Don't rely on this log, since the destructor isn't called when the
|
||||
// app/tab is killed.
|
||||
if (num_chunks_ > 0) {
|
||||
RTC_LOG(LS_INFO) << "Intelligibility Enhancer was active for "
|
||||
<< 100.f * static_cast<float>(num_active_chunks_) /
|
||||
num_chunks_
|
||||
<< "% of the call.";
|
||||
} else {
|
||||
RTC_LOG(LS_INFO) << "Intelligibility Enhancer processed no chunk.";
|
||||
}
|
||||
}
|
||||
|
||||
void IntelligibilityEnhancer::SetCaptureNoiseEstimate(std::vector<float> noise,
|
||||
float gain) {
|
||||
RTC_DCHECK_EQ(noise.size(), num_noise_bins_);
|
||||
for (auto& bin : noise) {
|
||||
bin *= gain;
|
||||
}
|
||||
// Disregarding return value since buffer overflow is acceptable, because it
|
||||
// is not critical to get each noise estimate.
|
||||
if (noise_estimation_queue_.Insert(&noise)) {
|
||||
};
|
||||
}
|
||||
|
||||
void IntelligibilityEnhancer::ProcessRenderAudio(AudioBuffer* audio) {
|
||||
RTC_DCHECK_EQ(num_render_channels_, audio->num_channels());
|
||||
while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) {
|
||||
noise_power_estimator_.Step(noise_estimation_buffer_.data());
|
||||
}
|
||||
float* const* low_band = audio->split_channels_f(kBand0To8kHz);
|
||||
is_speech_ = IsSpeech(low_band[0]);
|
||||
render_mangler_->ProcessChunk(low_band, low_band);
|
||||
DelayHighBands(audio);
|
||||
}
|
||||
|
||||
void IntelligibilityEnhancer::ProcessAudioBlock(
|
||||
const std::complex<float>* const* in_block,
|
||||
size_t in_channels,
|
||||
size_t frames,
|
||||
size_t /* out_channels */,
|
||||
std::complex<float>* const* out_block) {
|
||||
RTC_DCHECK_EQ(freqs_, frames);
|
||||
if (is_speech_) {
|
||||
clear_power_estimator_.Step(in_block[0]);
|
||||
}
|
||||
SnrBasedEffectActivation();
|
||||
++num_chunks_;
|
||||
if (is_active_) {
|
||||
++num_active_chunks_;
|
||||
if (num_chunks_ % kGainUpdatePeriod == 0) {
|
||||
MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_,
|
||||
filtered_clear_pow_.data());
|
||||
MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_,
|
||||
filtered_noise_pow_.data());
|
||||
SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());
|
||||
const float power_target =
|
||||
std::accumulate(filtered_clear_pow_.data(),
|
||||
filtered_clear_pow_.data() + bank_size_, 0.f);
|
||||
const float power_top =
|
||||
DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
|
||||
SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());
|
||||
const float power_bot =
|
||||
DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
|
||||
if (power_target >= power_bot && power_target <= power_top) {
|
||||
SolveForLambda(power_target);
|
||||
UpdateErbGains();
|
||||
} // Else experiencing power underflow, so do nothing.
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < in_channels; ++i) {
|
||||
gain_applier_.Apply(in_block[i], out_block[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void IntelligibilityEnhancer::SnrBasedEffectActivation() {
|
||||
const float* clear_psd = clear_power_estimator_.power().data();
|
||||
const float* noise_psd = noise_power_estimator_.power().data();
|
||||
const float clear_power = std::accumulate(clear_psd, clear_psd + freqs_, 0.f);
|
||||
const float noise_power = std::accumulate(noise_psd, noise_psd + freqs_, 0.f);
|
||||
snr_ = kDecayRate * snr_ +
|
||||
(1.f - kDecayRate) * clear_power /
|
||||
(noise_power + std::numeric_limits<float>::epsilon());
|
||||
if (is_active_) {
|
||||
if (snr_ > kMaxActiveSNR) {
|
||||
RTC_LOG(LS_INFO) << "Intelligibility Enhancer was deactivated at chunk "
|
||||
<< num_chunks_;
|
||||
is_active_ = false;
|
||||
// Set the target gains to unity.
|
||||
float* gains = gain_applier_.target();
|
||||
for (size_t i = 0; i < freqs_; ++i) {
|
||||
gains[i] = 1.f;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (snr_ < kMinInactiveSNR) {
|
||||
RTC_LOG(LS_INFO) << "Intelligibility Enhancer was activated at chunk "
|
||||
<< num_chunks_;
|
||||
is_active_ = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void IntelligibilityEnhancer::SolveForLambda(float power_target) {
|
||||
const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values
|
||||
const int kMaxIters = 100; // for these, based on experiments.
|
||||
|
||||
const float reciprocal_power_target =
|
||||
1.f / (power_target + std::numeric_limits<float>::epsilon());
|
||||
float lambda_bot = kLambdaBot;
|
||||
float lambda_top = kLambdaTop;
|
||||
float power_ratio = 2.f; // Ratio of achieved power to target power.
|
||||
int iters = 0;
|
||||
while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) {
|
||||
const float lambda = (lambda_bot + lambda_top) / 2.f;
|
||||
SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.data());
|
||||
const float power =
|
||||
DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
|
||||
if (power < power_target) {
|
||||
lambda_bot = lambda;
|
||||
} else {
|
||||
lambda_top = lambda;
|
||||
}
|
||||
power_ratio = std::fabs(power * reciprocal_power_target);
|
||||
++iters;
|
||||
}
|
||||
}
|
||||
|
||||
void IntelligibilityEnhancer::UpdateErbGains() {
|
||||
// (ERB gain) = filterbank' * (freq gain)
|
||||
float* gains = gain_applier_.target();
|
||||
for (size_t i = 0; i < freqs_; ++i) {
|
||||
gains[i] = 0.f;
|
||||
for (size_t j = 0; j < bank_size_; ++j) {
|
||||
gains[i] += render_filter_bank_[j][i] * gains_eq_[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t IntelligibilityEnhancer::GetBankSize(int sample_rate,
|
||||
size_t erb_resolution) {
|
||||
float freq_limit = sample_rate / 2000.f;
|
||||
size_t erb_scale = static_cast<size_t>(ceilf(
|
||||
11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.f));
|
||||
return erb_scale * erb_resolution;
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> IntelligibilityEnhancer::CreateErbBank(
|
||||
size_t num_freqs) {
|
||||
std::vector<std::vector<float>> filter_bank(bank_size_);
|
||||
size_t lf = 1, rf = 4;
|
||||
|
||||
for (size_t i = 0; i < bank_size_; ++i) {
|
||||
float abs_temp = fabsf((i + 1.f) / static_cast<float>(kErbResolution));
|
||||
center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp));
|
||||
center_freqs_[i] -= 14678.49f;
|
||||
}
|
||||
float last_center_freq = center_freqs_[bank_size_ - 1];
|
||||
for (size_t i = 0; i < bank_size_; ++i) {
|
||||
center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < bank_size_; ++i) {
|
||||
filter_bank[i].resize(num_freqs);
|
||||
}
|
||||
|
||||
for (size_t i = 1; i <= bank_size_; ++i) {
|
||||
size_t lll = static_cast<size_t>(
|
||||
round(center_freqs_[rtc::SafeMax<size_t>(1, i - lf) - 1] * num_freqs /
|
||||
(0.5f * sample_rate_hz_)));
|
||||
size_t ll = static_cast<size_t>(
|
||||
round(center_freqs_[rtc::SafeMax<size_t>(1, i) - 1] * num_freqs /
|
||||
(0.5f * sample_rate_hz_)));
|
||||
lll = rtc::SafeClamp<size_t>(lll, 1, num_freqs) - 1;
|
||||
ll = rtc::SafeClamp<size_t>(ll, 1, num_freqs) - 1;
|
||||
|
||||
size_t rrr = static_cast<size_t>(
|
||||
round(center_freqs_[rtc::SafeMin<size_t>(bank_size_, i + rf) - 1] *
|
||||
num_freqs / (0.5f * sample_rate_hz_)));
|
||||
size_t rr = static_cast<size_t>(
|
||||
round(center_freqs_[rtc::SafeMin<size_t>(bank_size_, i + 1) - 1] *
|
||||
num_freqs / (0.5f * sample_rate_hz_)));
|
||||
rrr = rtc::SafeClamp<size_t>(rrr, 1, num_freqs) - 1;
|
||||
rr = rtc::SafeClamp<size_t>(rr, 1, num_freqs) - 1;
|
||||
|
||||
float step = ll == lll ? 0.f : 1.f / (ll - lll);
|
||||
float element = 0.f;
|
||||
for (size_t j = lll; j <= ll; ++j) {
|
||||
filter_bank[i - 1][j] = element;
|
||||
element += step;
|
||||
}
|
||||
step = rr == rrr ? 0.f : 1.f / (rrr - rr);
|
||||
element = 1.f;
|
||||
for (size_t j = rr; j <= rrr; ++j) {
|
||||
filter_bank[i - 1][j] = element;
|
||||
element -= step;
|
||||
}
|
||||
for (size_t j = ll; j <= rr; ++j) {
|
||||
filter_bank[i - 1][j] = 1.f;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < num_freqs; ++i) {
|
||||
float sum = 0.f;
|
||||
for (size_t j = 0; j < bank_size_; ++j) {
|
||||
sum += filter_bank[j][i];
|
||||
}
|
||||
for (size_t j = 0; j < bank_size_; ++j) {
|
||||
filter_bank[j][i] /= sum;
|
||||
}
|
||||
}
|
||||
return filter_bank;
|
||||
}
|
||||
|
||||
void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
|
||||
size_t start_freq,
|
||||
float* sols) {
|
||||
const float kMinPower = 1e-5f;
|
||||
|
||||
const float* pow_x0 = filtered_clear_pow_.data();
|
||||
const float* pow_n0 = filtered_noise_pow_.data();
|
||||
|
||||
for (size_t n = 0; n < start_freq; ++n) {
|
||||
sols[n] = 1.f;
|
||||
}
|
||||
|
||||
// Analytic solution for optimal gains. See paper for derivation.
|
||||
for (size_t n = start_freq; n < bank_size_; ++n) {
|
||||
if (pow_x0[n] < kMinPower || pow_n0[n] < kMinPower) {
|
||||
sols[n] = 1.f;
|
||||
} else {
|
||||
const float gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] +
|
||||
lambda * pow_x0[n] * pow_n0[n] * pow_n0[n];
|
||||
const float beta0 =
|
||||
lambda * pow_x0[n] * (2.f - kRho) * pow_x0[n] * pow_n0[n];
|
||||
const float alpha0 =
|
||||
lambda * pow_x0[n] * (1.f - kRho) * pow_x0[n] * pow_x0[n];
|
||||
RTC_DCHECK_LT(alpha0, 0.f);
|
||||
// The quadratic equation should always have real roots, but to guard
|
||||
// against numerical errors we limit it to a minimum of zero.
|
||||
sols[n] = std::max(
|
||||
0.f, (-beta0 - std::sqrt(std::max(
|
||||
0.f, beta0 * beta0 - 4.f * alpha0 * gamma0))) /
|
||||
(2.f * alpha0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool IntelligibilityEnhancer::IsSpeech(const float* audio) {
|
||||
FloatToS16(audio, chunk_length_, audio_s16_.data());
|
||||
vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);
|
||||
if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {
|
||||
chunks_since_voice_ = 0;
|
||||
} else if (chunks_since_voice_ < kSpeechOffsetDelay) {
|
||||
++chunks_since_voice_;
|
||||
}
|
||||
return chunks_since_voice_ < kSpeechOffsetDelay;
|
||||
}
|
||||
|
||||
void IntelligibilityEnhancer::DelayHighBands(AudioBuffer* audio) {
|
||||
RTC_DCHECK_EQ(audio->num_bands(), high_bands_buffers_.size() + 1);
|
||||
for (size_t i = 0u; i < high_bands_buffers_.size(); ++i) {
|
||||
Band band = static_cast<Band>(i + 1);
|
||||
high_bands_buffers_[i]->Delay(audio->split_channels_f(band), chunk_length_);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
|
@ -1,137 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
|
||||
|
||||
#include <complex>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common_audio/channel_buffer.h"
|
||||
#include "common_audio/lapped_transform.h"
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
#include "modules/audio_processing/intelligibility/intelligibility_utils.h"
|
||||
#include "modules/audio_processing/render_queue_item_verifier.h"
|
||||
#include "modules/audio_processing/vad/voice_activity_detector.h"
|
||||
#include "rtc_base/swap_queue.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Speech intelligibility enhancement module. Reads render and capture
|
||||
// audio streams and modifies the render stream with a set of gains per
|
||||
// frequency bin to enhance speech against the noise background.
|
||||
// Details of the model and algorithm can be found in the original paper:
|
||||
// http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788
|
||||
class IntelligibilityEnhancer : public LappedTransform::Callback {
|
||||
public:
|
||||
IntelligibilityEnhancer(int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_bands,
|
||||
size_t num_noise_bins);
|
||||
|
||||
~IntelligibilityEnhancer() override;
|
||||
|
||||
// Sets the capture noise magnitude spectrum estimate.
|
||||
void SetCaptureNoiseEstimate(std::vector<float> noise, float gain);
|
||||
|
||||
// Reads chunk of speech in time domain and updates with modified signal.
|
||||
void ProcessRenderAudio(AudioBuffer* audio);
|
||||
bool active() const;
|
||||
|
||||
protected:
|
||||
// All in frequency domain, receives input |in_block|, applies
|
||||
// intelligibility enhancement, and writes result to |out_block|.
|
||||
void ProcessAudioBlock(const std::complex<float>* const* in_block,
|
||||
size_t in_channels,
|
||||
size_t frames,
|
||||
size_t out_channels,
|
||||
std::complex<float>* const* out_block) override;
|
||||
|
||||
private:
|
||||
FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestRenderUpdate);
|
||||
FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);
|
||||
FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);
|
||||
FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest,
|
||||
TestNoiseGainHasExpectedResult);
|
||||
FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest,
|
||||
TestAllBandsHaveSameDelay);
|
||||
|
||||
// Updates the SNR estimation and enables or disables this component using a
|
||||
// hysteresis.
|
||||
void SnrBasedEffectActivation();
|
||||
|
||||
// Bisection search for optimal |lambda|.
|
||||
void SolveForLambda(float power_target);
|
||||
|
||||
// Transforms freq gains to ERB gains.
|
||||
void UpdateErbGains();
|
||||
|
||||
// Returns number of ERB filters.
|
||||
static size_t GetBankSize(int sample_rate, size_t erb_resolution);
|
||||
|
||||
// Initializes ERB filterbank.
|
||||
std::vector<std::vector<float>> CreateErbBank(size_t num_freqs);
|
||||
|
||||
// Analytically solves quadratic for optimal gains given |lambda|.
|
||||
// Negative gains are set to 0. Stores the results in |sols|.
|
||||
void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);
|
||||
|
||||
// Returns true if the audio is speech.
|
||||
bool IsSpeech(const float* audio);
|
||||
|
||||
// Delays the high bands to compensate for the processing delay in the low
|
||||
// band.
|
||||
void DelayHighBands(AudioBuffer* audio);
|
||||
|
||||
static const size_t kMaxNumNoiseEstimatesToBuffer = 5;
|
||||
|
||||
const size_t freqs_; // Num frequencies in frequency domain.
|
||||
const size_t num_noise_bins_;
|
||||
const size_t chunk_length_; // Chunk size in samples.
|
||||
const size_t bank_size_; // Num ERB filters.
|
||||
const int sample_rate_hz_;
|
||||
const size_t num_render_channels_;
|
||||
|
||||
intelligibility::PowerEstimator<std::complex<float>> clear_power_estimator_;
|
||||
intelligibility::PowerEstimator<float> noise_power_estimator_;
|
||||
std::vector<float> filtered_clear_pow_;
|
||||
std::vector<float> filtered_noise_pow_;
|
||||
std::vector<float> center_freqs_;
|
||||
std::vector<std::vector<float>> capture_filter_bank_;
|
||||
std::vector<std::vector<float>> render_filter_bank_;
|
||||
size_t start_freq_;
|
||||
|
||||
std::vector<float> gains_eq_; // Pre-filter modified gains.
|
||||
intelligibility::GainApplier gain_applier_;
|
||||
|
||||
std::unique_ptr<LappedTransform> render_mangler_;
|
||||
|
||||
VoiceActivityDetector vad_;
|
||||
std::vector<int16_t> audio_s16_;
|
||||
size_t chunks_since_voice_;
|
||||
bool is_speech_;
|
||||
float snr_;
|
||||
bool is_active_;
|
||||
|
||||
unsigned long int num_chunks_;
|
||||
unsigned long int num_active_chunks_;
|
||||
|
||||
std::vector<float> noise_estimation_buffer_;
|
||||
SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>
|
||||
noise_estimation_queue_;
|
||||
|
||||
std::vector<std::unique_ptr<intelligibility::DelayBuffer>>
|
||||
high_bands_buffers_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
|
|
@ -1,536 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
|
||||
#include "modules/audio_processing/noise_suppression_impl.h"
|
||||
#include "modules/audio_processing/test/audio_buffer_tools.h"
|
||||
#include "modules/audio_processing/test/bitexactness_tools.h"
|
||||
#include "rtc_base/arraysize.h"
|
||||
#include "test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
// Target output for ERB create test. Generated with matlab.
|
||||
const float kTestCenterFreqs[] = {
|
||||
14.5213f, 29.735f, 45.6781f, 62.3884f, 79.9058f, 98.2691f, 117.521f,
|
||||
137.708f, 158.879f, 181.084f, 204.378f, 228.816f, 254.459f, 281.371f,
|
||||
309.618f, 339.273f, 370.411f, 403.115f, 437.469f, 473.564f, 511.497f,
|
||||
551.371f, 593.293f, 637.386f, 683.77f, 732.581f, 783.96f, 838.06f,
|
||||
895.046f, 955.09f, 1018.38f, 1085.13f, 1155.54f, 1229.85f, 1308.32f,
|
||||
1391.22f, 1478.83f, 1571.5f, 1669.55f, 1773.37f, 1883.37f, 2000.f};
|
||||
const float kTestFilterBank[][33] = {
|
||||
{0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.2f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.25f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.25f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.25f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.157895f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.210526f, 0.117647f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.315789f, 0.176471f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.315789f, 0.352941f, 0.142857f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.352941f, 0.285714f,
|
||||
0.157895f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f,
|
||||
0.210526f, 0.111111f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.285714f, 0.315789f, 0.222222f, 0.111111f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.315789f, 0.333333f, 0.222222f, 0.111111f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f,
|
||||
0.108108f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f,
|
||||
0.243243f, 0.153846f, 0.0833333f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f,
|
||||
0.324324f, 0.230769f, 0.166667f, 0.0909091f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.324324f, 0.307692f, 0.25f, 0.181818f, 0.0833333f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.307692f, 0.333333f,
|
||||
0.363636f, 0.25f, 0.151515f, 0.0793651f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.166667f, 0.363636f, 0.333333f, 0.242424f,
|
||||
0.190476f, 0.133333f, 0.0689655f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.333333f, 0.30303f, 0.253968f, 0.2f, 0.137931f,
|
||||
0.0714286f, 0.f, 0.f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.30303f, 0.31746f, 0.333333f, 0.275862f, 0.214286f,
|
||||
0.125f, 0.0655738f, 0.f, 0.f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.15873f, 0.333333f, 0.344828f, 0.357143f,
|
||||
0.25f, 0.196721f, 0.137931f, 0.0816327f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.172414f, 0.357143f,
|
||||
0.3125f, 0.245902f, 0.172414f, 0.102041f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.3125f, 0.327869f, 0.344828f, 0.204082f, 0.f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.163934f, 0.344828f, 0.408163f, 0.5f},
|
||||
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.204082f, 0.5f}};
|
||||
static_assert(arraysize(kTestCenterFreqs) == arraysize(kTestFilterBank),
|
||||
"Test filterbank badly initialized.");
|
||||
|
||||
// Target output for gain solving test. Generated with matlab.
|
||||
const size_t kTestStartFreq = 12; // Lowest integral frequency for ERBs.
|
||||
const float kTestZeroVar = 1.f;
|
||||
const float kTestNonZeroVarLambdaTop[] = {
|
||||
1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
|
||||
static_assert(arraysize(kTestCenterFreqs) ==
|
||||
arraysize(kTestNonZeroVarLambdaTop),
|
||||
"Power test data badly initialized.");
|
||||
const float kMaxTestError = 0.005f;
|
||||
|
||||
// Enhancer initialization parameters.
|
||||
const int kSamples = 10000;
|
||||
const int kSampleRate = 4000;
|
||||
const int kNumChannels = 1;
|
||||
const int kFragmentSize = kSampleRate / 100;
|
||||
const size_t kNumNoiseBins = 129;
|
||||
const size_t kNumBands = 1;
|
||||
|
||||
// Number of frames to process in the bitexactness tests.
|
||||
const size_t kNumFramesToProcess = 1000;
|
||||
|
||||
int IntelligibilityEnhancerSampleRate(int sample_rate_hz) {
|
||||
return (sample_rate_hz > AudioProcessing::kSampleRate16kHz
|
||||
? AudioProcessing::kSampleRate16kHz
|
||||
: sample_rate_hz);
|
||||
}
|
||||
|
||||
// Process one frame of data and produce the output.
|
||||
void ProcessOneFrame(int sample_rate_hz,
|
||||
AudioBuffer* render_audio_buffer,
|
||||
AudioBuffer* capture_audio_buffer,
|
||||
NoiseSuppressionImpl* noise_suppressor,
|
||||
IntelligibilityEnhancer* intelligibility_enhancer) {
|
||||
if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
|
||||
render_audio_buffer->SplitIntoFrequencyBands();
|
||||
capture_audio_buffer->SplitIntoFrequencyBands();
|
||||
}
|
||||
|
||||
intelligibility_enhancer->ProcessRenderAudio(render_audio_buffer);
|
||||
|
||||
noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer);
|
||||
noise_suppressor->ProcessCaptureAudio(capture_audio_buffer);
|
||||
|
||||
intelligibility_enhancer->SetCaptureNoiseEstimate(
|
||||
noise_suppressor->NoiseEstimate(), 0);
|
||||
|
||||
if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
|
||||
render_audio_buffer->MergeFrequencyBands();
|
||||
}
|
||||
}
|
||||
|
||||
// Processes a specified amount of frames, verifies the results and reports
|
||||
// any errors.
|
||||
void RunBitexactnessTest(int sample_rate_hz,
|
||||
size_t num_channels,
|
||||
rtc::ArrayView<const float> output_reference) {
|
||||
const StreamConfig render_config(sample_rate_hz, num_channels, false);
|
||||
AudioBuffer render_buffer(
|
||||
render_config.num_frames(), render_config.num_channels(),
|
||||
render_config.num_frames(), render_config.num_channels(),
|
||||
render_config.num_frames());
|
||||
test::InputAudioFile render_file(
|
||||
test::GetApmRenderTestVectorFileName(sample_rate_hz));
|
||||
std::vector<float> render_input(render_buffer.num_frames() *
|
||||
render_buffer.num_channels());
|
||||
|
||||
const StreamConfig capture_config(sample_rate_hz, num_channels, false);
|
||||
AudioBuffer capture_buffer(
|
||||
capture_config.num_frames(), capture_config.num_channels(),
|
||||
capture_config.num_frames(), capture_config.num_channels(),
|
||||
capture_config.num_frames());
|
||||
test::InputAudioFile capture_file(
|
||||
test::GetApmCaptureTestVectorFileName(sample_rate_hz));
|
||||
std::vector<float> capture_input(render_buffer.num_frames() *
|
||||
capture_buffer.num_channels());
|
||||
|
||||
rtc::CriticalSection crit_capture;
|
||||
NoiseSuppressionImpl noise_suppressor(&crit_capture);
|
||||
noise_suppressor.Initialize(capture_config.num_channels(), sample_rate_hz);
|
||||
noise_suppressor.Enable(true);
|
||||
|
||||
IntelligibilityEnhancer intelligibility_enhancer(
|
||||
IntelligibilityEnhancerSampleRate(sample_rate_hz),
|
||||
render_config.num_channels(), kNumBands,
|
||||
NoiseSuppressionImpl::num_noise_bins());
|
||||
|
||||
for (size_t frame_no = 0u; frame_no < kNumFramesToProcess; ++frame_no) {
|
||||
ReadFloatSamplesFromStereoFile(render_buffer.num_frames(),
|
||||
render_buffer.num_channels(), &render_file,
|
||||
render_input);
|
||||
ReadFloatSamplesFromStereoFile(capture_buffer.num_frames(),
|
||||
capture_buffer.num_channels(), &capture_file,
|
||||
capture_input);
|
||||
|
||||
test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer);
|
||||
test::CopyVectorToAudioBuffer(capture_config, capture_input,
|
||||
&capture_buffer);
|
||||
|
||||
ProcessOneFrame(sample_rate_hz, &render_buffer, &capture_buffer,
|
||||
&noise_suppressor, &intelligibility_enhancer);
|
||||
}
|
||||
|
||||
// Extract and verify the test results.
|
||||
std::vector<float> render_output;
|
||||
test::ExtractVectorFromAudioBuffer(render_config, &render_buffer,
|
||||
&render_output);
|
||||
|
||||
const float kElementErrorBound = 1.f / static_cast<float>(1 << 15);
|
||||
|
||||
// Compare the output with the reference. Only the first values of the output
|
||||
// from last frame processed are compared in order not having to specify all
|
||||
// preceeding frames as testvectors. As the algorithm being tested has a
|
||||
// memory, testing only the last frame implicitly also tests the preceeding
|
||||
// frames.
|
||||
EXPECT_TRUE(test::VerifyDeinterleavedArray(
|
||||
render_buffer.num_frames(), render_config.num_channels(),
|
||||
output_reference, render_output, kElementErrorBound));
|
||||
}
|
||||
|
||||
float float_rand() {
|
||||
return std::rand() * 2.f / RAND_MAX - 1;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
class IntelligibilityEnhancerTest : public ::testing::Test {
|
||||
protected:
|
||||
IntelligibilityEnhancerTest()
|
||||
: clear_buffer_(kFragmentSize,
|
||||
kNumChannels,
|
||||
kFragmentSize,
|
||||
kNumChannels,
|
||||
kFragmentSize),
|
||||
stream_config_(kSampleRate, kNumChannels),
|
||||
clear_data_(kSamples),
|
||||
noise_data_(kNumNoiseBins),
|
||||
orig_data_(kSamples) {
|
||||
std::srand(1);
|
||||
enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands,
|
||||
kNumNoiseBins));
|
||||
}
|
||||
|
||||
bool CheckUpdate() {
|
||||
enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands,
|
||||
kNumNoiseBins));
|
||||
float* clear_cursor = clear_data_.data();
|
||||
for (int i = 0; i < kSamples; i += kFragmentSize) {
|
||||
enh_->SetCaptureNoiseEstimate(noise_data_, 1);
|
||||
clear_buffer_.CopyFrom(&clear_cursor, stream_config_);
|
||||
enh_->ProcessRenderAudio(&clear_buffer_);
|
||||
clear_buffer_.CopyTo(stream_config_, &clear_cursor);
|
||||
clear_cursor += kFragmentSize;
|
||||
}
|
||||
for (int i = initial_delay_; i < kSamples; i++) {
|
||||
if (std::fabs(clear_data_[i] - orig_data_[i - initial_delay_]) >
|
||||
kMaxTestError) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::unique_ptr<IntelligibilityEnhancer> enh_;
|
||||
// Render clean speech buffer.
|
||||
AudioBuffer clear_buffer_;
|
||||
StreamConfig stream_config_;
|
||||
std::vector<float> clear_data_;
|
||||
std::vector<float> noise_data_;
|
||||
std::vector<float> orig_data_;
|
||||
size_t initial_delay_;
|
||||
};
|
||||
|
||||
// For each class of generated data, tests that render stream is updated when
|
||||
// it should be.
|
||||
TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) {
|
||||
initial_delay_ = enh_->render_mangler_->initial_delay();
|
||||
std::fill(noise_data_.begin(), noise_data_.end(), 0.f);
|
||||
std::fill(orig_data_.begin(), orig_data_.end(), 0.f);
|
||||
std::fill(clear_data_.begin(), clear_data_.end(), 0.f);
|
||||
EXPECT_FALSE(CheckUpdate());
|
||||
std::generate(clear_data_.begin(), clear_data_.end(), float_rand);
|
||||
orig_data_ = clear_data_;
|
||||
EXPECT_FALSE(CheckUpdate());
|
||||
std::generate(clear_data_.begin(), clear_data_.end(), float_rand);
|
||||
orig_data_ = clear_data_;
|
||||
std::generate(noise_data_.begin(), noise_data_.end(), float_rand);
|
||||
FloatToFloatS16(noise_data_.data(), noise_data_.size(), noise_data_.data());
|
||||
EXPECT_TRUE(CheckUpdate());
|
||||
}
|
||||
|
||||
// Tests ERB bank creation, comparing against matlab output.
|
||||
TEST_F(IntelligibilityEnhancerTest, TestErbCreation) {
|
||||
ASSERT_EQ(arraysize(kTestCenterFreqs), enh_->bank_size_);
|
||||
for (size_t i = 0; i < enh_->bank_size_; ++i) {
|
||||
EXPECT_NEAR(kTestCenterFreqs[i], enh_->center_freqs_[i], kMaxTestError);
|
||||
ASSERT_EQ(arraysize(kTestFilterBank[0]), enh_->freqs_);
|
||||
for (size_t j = 0; j < enh_->freqs_; ++j) {
|
||||
EXPECT_NEAR(kTestFilterBank[i][j], enh_->render_filter_bank_[i][j],
|
||||
kMaxTestError);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Tests analytic solution for optimal gains, comparing
|
||||
// against matlab output.
|
||||
TEST_F(IntelligibilityEnhancerTest, TestSolveForGains) {
|
||||
ASSERT_EQ(kTestStartFreq, enh_->start_freq_);
|
||||
std::vector<float> sols(enh_->bank_size_);
|
||||
float lambda = -0.001f;
|
||||
for (size_t i = 0; i < enh_->bank_size_; i++) {
|
||||
enh_->filtered_clear_pow_[i] = 0.f;
|
||||
enh_->filtered_noise_pow_[i] = 0.f;
|
||||
}
|
||||
enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data());
|
||||
for (size_t i = 0; i < enh_->bank_size_; i++) {
|
||||
EXPECT_NEAR(kTestZeroVar, sols[i], kMaxTestError);
|
||||
}
|
||||
for (size_t i = 0; i < enh_->bank_size_; i++) {
|
||||
enh_->filtered_clear_pow_[i] = static_cast<float>(i + 1);
|
||||
enh_->filtered_noise_pow_[i] = static_cast<float>(enh_->bank_size_ - i);
|
||||
}
|
||||
enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data());
|
||||
for (size_t i = 0; i < enh_->bank_size_; i++) {
|
||||
EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError);
|
||||
}
|
||||
lambda = -1.f;
|
||||
enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data());
|
||||
for (size_t i = 0; i < enh_->bank_size_; i++) {
|
||||
EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(IntelligibilityEnhancerTest, TestNoiseGainHasExpectedResult) {
|
||||
const float kGain = 2.f;
|
||||
const float kTolerance = 0.007f;
|
||||
std::vector<float> noise(kNumNoiseBins);
|
||||
std::vector<float> noise_psd(kNumNoiseBins);
|
||||
std::generate(noise.begin(), noise.end(), float_rand);
|
||||
for (size_t i = 0; i < kNumNoiseBins; ++i) {
|
||||
noise_psd[i] = kGain * kGain * noise[i] * noise[i];
|
||||
}
|
||||
float* clear_cursor = clear_data_.data();
|
||||
for (size_t i = 0; i < kNumFramesToProcess; ++i) {
|
||||
enh_->SetCaptureNoiseEstimate(noise, kGain);
|
||||
clear_buffer_.CopyFrom(&clear_cursor, stream_config_);
|
||||
enh_->ProcessRenderAudio(&clear_buffer_);
|
||||
}
|
||||
const std::vector<float>& estimated_psd =
|
||||
enh_->noise_power_estimator_.power();
|
||||
for (size_t i = 0; i < kNumNoiseBins; ++i) {
|
||||
EXPECT_LT(std::abs(estimated_psd[i] - noise_psd[i]) / noise_psd[i],
|
||||
kTolerance);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(IntelligibilityEnhancerTest, TestAllBandsHaveSameDelay) {
|
||||
const int kTestSampleRate = AudioProcessing::kSampleRate32kHz;
|
||||
const int kTestSplitRate = AudioProcessing::kSampleRate16kHz;
|
||||
const size_t kTestNumBands =
|
||||
rtc::CheckedDivExact(kTestSampleRate, kTestSplitRate);
|
||||
const size_t kTestFragmentSize = rtc::CheckedDivExact(kTestSampleRate, 100);
|
||||
const size_t kTestSplitFragmentSize =
|
||||
rtc::CheckedDivExact(kTestSplitRate, 100);
|
||||
enh_.reset(new IntelligibilityEnhancer(kTestSplitRate, kNumChannels,
|
||||
kTestNumBands, kNumNoiseBins));
|
||||
size_t initial_delay = enh_->render_mangler_->initial_delay();
|
||||
std::vector<float> rand_gen_buf(kTestFragmentSize);
|
||||
AudioBuffer original_buffer(kTestFragmentSize, kNumChannels,
|
||||
kTestFragmentSize, kNumChannels,
|
||||
kTestFragmentSize);
|
||||
AudioBuffer audio_buffer(kTestFragmentSize, kNumChannels, kTestFragmentSize,
|
||||
kNumChannels, kTestFragmentSize);
|
||||
for (size_t i = 0u; i < kTestNumBands; ++i) {
|
||||
std::generate(rand_gen_buf.begin(), rand_gen_buf.end(), float_rand);
|
||||
original_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(),
|
||||
rand_gen_buf.size());
|
||||
audio_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(),
|
||||
rand_gen_buf.size());
|
||||
}
|
||||
enh_->ProcessRenderAudio(&audio_buffer);
|
||||
for (size_t i = 0u; i < kTestNumBands; ++i) {
|
||||
const float* original_ptr = original_buffer.split_bands_const_f(0)[i];
|
||||
const float* audio_ptr = audio_buffer.split_bands_const_f(0)[i];
|
||||
for (size_t j = initial_delay; j < kTestSplitFragmentSize; ++j) {
|
||||
EXPECT_LT(std::fabs(original_ptr[j - initial_delay] - audio_ptr[j]),
|
||||
kMaxTestError);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) {
|
||||
const float kOutputReference[] = {-0.001892f, -0.003296f, -0.001953f};
|
||||
|
||||
RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 1, kOutputReference);
|
||||
}
|
||||
|
||||
TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono16kHz) {
|
||||
const float kOutputReference[] = {-0.000977f, -0.003296f, -0.002441f};
|
||||
|
||||
RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 1, kOutputReference);
|
||||
}
|
||||
|
||||
TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono32kHz) {
|
||||
const float kOutputReference[] = {0.003021f, -0.011780f, -0.008209f};
|
||||
|
||||
RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 1, kOutputReference);
|
||||
}
|
||||
|
||||
TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono48kHz) {
|
||||
const float kOutputReference[] = {-0.027696f, -0.026253f, -0.018001f};
|
||||
|
||||
RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 1, kOutputReference);
|
||||
}
|
||||
|
||||
TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo8kHz) {
|
||||
const float kOutputReference[] = {0.021454f, 0.035919f, 0.026428f,
|
||||
-0.000641f, 0.000366f, 0.000641f};
|
||||
|
||||
RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 2, kOutputReference);
|
||||
}
|
||||
|
||||
TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo16kHz) {
|
||||
const float kOutputReference[] = {0.021362f, 0.035736f, 0.023895f,
|
||||
-0.001404f, -0.001465f, 0.000549f};
|
||||
|
||||
RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 2, kOutputReference);
|
||||
}
|
||||
|
||||
TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo32kHz) {
|
||||
const float kOutputReference[] = {0.030641f, 0.027406f, 0.028321f,
|
||||
-0.001343f, -0.004578f, 0.000977f};
|
||||
|
||||
RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 2, kOutputReference);
|
||||
}
|
||||
|
||||
TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo48kHz) {
|
||||
const float kOutputReference[] = {-0.009276f, -0.001601f, -0.008255f,
|
||||
-0.012975f, -0.015940f, -0.017820f};
|
||||
|
||||
RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
|
@ -1,94 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/intelligibility/intelligibility_utils.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace intelligibility {
|
||||
|
||||
namespace {
|
||||
|
||||
const float kMinFactor = 0.01f;
|
||||
const float kMaxFactor = 100.f;
|
||||
|
||||
// Return |current| changed towards |target|, with the relative change being at
|
||||
// most |limit|.
|
||||
float UpdateFactor(float target, float current, float limit) {
|
||||
const float gain = target / (current + std::numeric_limits<float>::epsilon());
|
||||
const float clamped_gain = rtc::SafeClamp(gain, 1 - limit, 1 + limit);
|
||||
return rtc::SafeClamp(current * clamped_gain, kMinFactor, kMaxFactor);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
template <typename T>
|
||||
PowerEstimator<T>::PowerEstimator(size_t num_freqs, float decay)
|
||||
: power_(num_freqs, 0.f), decay_(decay) {}
|
||||
|
||||
template <typename T>
|
||||
void PowerEstimator<T>::Step(const T* data) {
|
||||
for (size_t i = 0; i < power_.size(); ++i) {
|
||||
power_[i] = decay_ * power_[i] +
|
||||
(1.f - decay_) * std::abs(data[i]) * std::abs(data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template class PowerEstimator<float>;
|
||||
template class PowerEstimator<std::complex<float>>;
|
||||
|
||||
GainApplier::GainApplier(size_t freqs, float relative_change_limit)
|
||||
: num_freqs_(freqs),
|
||||
relative_change_limit_(relative_change_limit),
|
||||
target_(freqs, 1.f),
|
||||
current_(freqs, 1.f) {}
|
||||
|
||||
GainApplier::~GainApplier() {}
|
||||
|
||||
void GainApplier::Apply(const std::complex<float>* in_block,
|
||||
std::complex<float>* out_block) {
|
||||
for (size_t i = 0; i < num_freqs_; ++i) {
|
||||
current_[i] = UpdateFactor(target_[i], current_[i], relative_change_limit_);
|
||||
out_block[i] = sqrtf(fabsf(current_[i])) * in_block[i];
|
||||
}
|
||||
}
|
||||
|
||||
DelayBuffer::DelayBuffer(size_t delay, size_t num_channels)
|
||||
: buffer_(num_channels, std::vector<float>(delay, 0.f)), read_index_(0u) {}
|
||||
|
||||
DelayBuffer::~DelayBuffer() {}
|
||||
|
||||
void DelayBuffer::Delay(float* const* data, size_t length) {
|
||||
size_t sample_index = read_index_;
|
||||
for (size_t i = 0u; i < buffer_.size(); ++i) {
|
||||
sample_index = read_index_;
|
||||
for (size_t j = 0u; j < length; ++j) {
|
||||
float swap = data[i][j];
|
||||
data[i][j] = buffer_[i][sample_index];
|
||||
buffer_[i][sample_index] = swap;
|
||||
if (++sample_index == buffer_.size()) {
|
||||
sample_index = 0u;
|
||||
}
|
||||
}
|
||||
}
|
||||
read_index_ = sample_index;
|
||||
}
|
||||
|
||||
} // namespace intelligibility
|
||||
|
||||
} // namespace webrtc
|
|
@ -1,86 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
|
||||
#define MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
|
||||
|
||||
#include <complex>
|
||||
#include <vector>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace intelligibility {
|
||||
|
||||
// Internal helper for computing the power of a stream of arrays.
|
||||
// The result is an array of power per position: the i-th power is the power of
|
||||
// the stream of data on the i-th positions in the input arrays.
|
||||
template <typename T>
|
||||
class PowerEstimator {
|
||||
public:
|
||||
// Construct an instance for the given input array length (|freqs|), with the
|
||||
// appropriate parameters. |decay| is the forgetting factor.
|
||||
PowerEstimator(size_t freqs, float decay);
|
||||
|
||||
// Add a new data point to the series.
|
||||
void Step(const T* data);
|
||||
|
||||
// The current power array.
|
||||
const std::vector<float>& power() { return power_; };
|
||||
|
||||
private:
|
||||
// The current power array.
|
||||
std::vector<float> power_;
|
||||
|
||||
const float decay_;
|
||||
};
|
||||
|
||||
// Helper class for smoothing gain changes. On each application step, the
|
||||
// currently used gains are changed towards a set of settable target gains,
|
||||
// constrained by a limit on the relative changes.
|
||||
class GainApplier {
|
||||
public:
|
||||
GainApplier(size_t freqs, float relative_change_limit);
|
||||
|
||||
~GainApplier();
|
||||
|
||||
// Copy |in_block| to |out_block|, multiplied by the current set of gains,
|
||||
// and step the current set of gains towards the target set.
|
||||
void Apply(const std::complex<float>* in_block,
|
||||
std::complex<float>* out_block);
|
||||
|
||||
// Return the current target gain set. Modify this array to set the targets.
|
||||
float* target() { return target_.data(); }
|
||||
|
||||
private:
|
||||
const size_t num_freqs_;
|
||||
const float relative_change_limit_;
|
||||
std::vector<float> target_;
|
||||
std::vector<float> current_;
|
||||
};
|
||||
|
||||
// Helper class to delay a signal by an integer number of samples.
|
||||
class DelayBuffer {
|
||||
public:
|
||||
DelayBuffer(size_t delay, size_t num_channels);
|
||||
|
||||
~DelayBuffer();
|
||||
|
||||
void Delay(float* const* data, size_t length);
|
||||
|
||||
private:
|
||||
std::vector<std::vector<float>> buffer_;
|
||||
size_t read_index_;
|
||||
};
|
||||
|
||||
} // namespace intelligibility
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
|
|
@ -1,79 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
#include <vector>
|
||||
|
||||
#include "modules/audio_processing/intelligibility/intelligibility_utils.h"
|
||||
#include "rtc_base/arraysize.h"
|
||||
#include "test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace intelligibility {
|
||||
|
||||
std::vector<std::vector<std::complex<float>>> GenerateTestData(size_t freqs,
|
||||
size_t samples) {
|
||||
std::vector<std::vector<std::complex<float>>> data(samples);
|
||||
for (size_t i = 0; i < samples; ++i) {
|
||||
for (size_t j = 0; j < freqs; ++j) {
|
||||
const float val = 0.99f / ((i + 1) * (j + 1));
|
||||
data[i].push_back(std::complex<float>(val, val));
|
||||
}
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
// Tests PowerEstimator, for all power step types.
|
||||
TEST(IntelligibilityUtilsTest, TestPowerEstimator) {
|
||||
const size_t kFreqs = 10;
|
||||
const size_t kSamples = 100;
|
||||
const float kDecay = 0.5f;
|
||||
const std::vector<std::vector<std::complex<float>>> test_data(
|
||||
GenerateTestData(kFreqs, kSamples));
|
||||
PowerEstimator<std::complex<float>> power_estimator(kFreqs, kDecay);
|
||||
EXPECT_EQ(0, power_estimator.power()[0]);
|
||||
|
||||
// Makes sure Step is doing something.
|
||||
power_estimator.Step(test_data[0].data());
|
||||
for (size_t i = 1; i < kSamples; ++i) {
|
||||
power_estimator.Step(test_data[i].data());
|
||||
for (size_t j = 0; j < kFreqs; ++j) {
|
||||
EXPECT_GE(power_estimator.power()[j], 0.f);
|
||||
EXPECT_LE(power_estimator.power()[j], 1.f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Tests gain applier.
|
||||
TEST(IntelligibilityUtilsTest, TestGainApplier) {
|
||||
const size_t kFreqs = 10;
|
||||
const size_t kSamples = 100;
|
||||
const float kChangeLimit = 0.1f;
|
||||
GainApplier gain_applier(kFreqs, kChangeLimit);
|
||||
const std::vector<std::vector<std::complex<float>>> in_data(
|
||||
GenerateTestData(kFreqs, kSamples));
|
||||
std::vector<std::vector<std::complex<float>>> out_data(
|
||||
GenerateTestData(kFreqs, kSamples));
|
||||
for (size_t i = 0; i < kSamples; ++i) {
|
||||
gain_applier.Apply(in_data[i].data(), out_data[i].data());
|
||||
for (size_t j = 0; j < kFreqs; ++j) {
|
||||
EXPECT_GT(out_data[i][j].real(), 0.f);
|
||||
EXPECT_LT(out_data[i][j].real(), 1.f);
|
||||
EXPECT_GT(out_data[i][j].imag(), 0.f);
|
||||
EXPECT_LT(out_data[i][j].imag(), 1.f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace intelligibility
|
||||
|
||||
} // namespace webrtc
|
|
@ -1,96 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "common_audio/channel_buffer.h"
|
||||
#include "common_audio/include/audio_util.h"
|
||||
#include "common_audio/wav_file.h"
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
|
||||
#include "modules/audio_processing/noise_suppression_impl.h"
|
||||
#include "rtc_base/criticalsection.h"
|
||||
#include "rtc_base/flags.h"
|
||||
|
||||
using std::complex;
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
DEFINE_string(clear_file, "speech.wav", "Input file with clear speech.");
|
||||
DEFINE_string(noise_file, "noise.wav", "Input file with noise data.");
|
||||
DEFINE_string(out_file, "proc_enhanced.wav", "Enhanced output file.");
|
||||
DEFINE_bool(help, false, "Print this message.");
|
||||
|
||||
int int_main(int argc, char* argv[]) {
|
||||
if (rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true)) {
|
||||
return 1;
|
||||
}
|
||||
if (FLAG_help) {
|
||||
rtc::FlagList::Print(nullptr, false);
|
||||
return 0;
|
||||
}
|
||||
if (argc != 1) {
|
||||
printf("\n\nInput files must be little-endian 16-bit signed raw PCM.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
WavReader in_file(FLAG_clear_file);
|
||||
WavReader noise_file(FLAG_noise_file);
|
||||
WavWriter out_file(FLAG_out_file, in_file.sample_rate(),
|
||||
in_file.num_channels());
|
||||
rtc::CriticalSection crit;
|
||||
NoiseSuppressionImpl ns(&crit);
|
||||
IntelligibilityEnhancer enh(in_file.sample_rate(), in_file.num_channels(), 1u,
|
||||
NoiseSuppressionImpl::num_noise_bins());
|
||||
ns.Initialize(noise_file.num_channels(), noise_file.sample_rate());
|
||||
ns.Enable(true);
|
||||
const size_t in_samples = noise_file.sample_rate() / 100;
|
||||
const size_t noise_samples = noise_file.sample_rate() / 100;
|
||||
std::vector<float> in(in_samples * in_file.num_channels());
|
||||
std::vector<float> noise(noise_samples * noise_file.num_channels());
|
||||
ChannelBuffer<float> in_buf(in_samples, in_file.num_channels());
|
||||
ChannelBuffer<float> noise_buf(noise_samples, noise_file.num_channels());
|
||||
AudioBuffer capture_audio(noise_samples, noise_file.num_channels(),
|
||||
noise_samples, noise_file.num_channels(),
|
||||
noise_samples);
|
||||
AudioBuffer render_audio(in_samples, in_file.num_channels(), in_samples,
|
||||
in_file.num_channels(), in_samples);
|
||||
StreamConfig noise_config(noise_file.sample_rate(),
|
||||
noise_file.num_channels());
|
||||
StreamConfig in_config(in_file.sample_rate(), in_file.num_channels());
|
||||
while (in_file.ReadSamples(in.size(), in.data()) == in.size() &&
|
||||
noise_file.ReadSamples(noise.size(), noise.data()) == noise.size()) {
|
||||
FloatS16ToFloat(noise.data(), noise.size(), noise.data());
|
||||
FloatS16ToFloat(in.data(), in.size(), in.data());
|
||||
Deinterleave(in.data(), in_buf.num_frames(), in_buf.num_channels(),
|
||||
in_buf.channels());
|
||||
Deinterleave(noise.data(), noise_buf.num_frames(), noise_buf.num_channels(),
|
||||
noise_buf.channels());
|
||||
capture_audio.CopyFrom(noise_buf.channels(), noise_config);
|
||||
render_audio.CopyFrom(in_buf.channels(), in_config);
|
||||
ns.AnalyzeCaptureAudio(&capture_audio);
|
||||
ns.ProcessCaptureAudio(&capture_audio);
|
||||
enh.SetCaptureNoiseEstimate(ns.NoiseEstimate(), 1);
|
||||
enh.ProcessRenderAudio(&render_audio);
|
||||
render_audio.CopyTo(in_config, in_buf.channels());
|
||||
Interleave(in_buf.channels(), in_buf.num_frames(), in_buf.num_channels(),
|
||||
in.data());
|
||||
FloatToFloatS16(in.data(), in.size(), in.data());
|
||||
out_file.WriteSamples(in.data(), in.size());
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace webrtc
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
return webrtc::int_main(argc, argv);
|
||||
}
|
|
@ -427,16 +427,6 @@ void AecDumpBasedSimulator::HandleMessage(
|
|||
}
|
||||
}
|
||||
|
||||
if (msg.has_intelligibility_enhancer_enabled() || settings_.use_ie) {
|
||||
bool enable = settings_.use_ie ? *settings_.use_ie
|
||||
: msg.intelligibility_enhancer_enabled();
|
||||
config.Set<Intelligibility>(new Intelligibility(enable));
|
||||
if (settings_.use_verbose_logging) {
|
||||
std::cout << " intelligibility_enhancer_enabled: "
|
||||
<< (enable ? "true" : "false") << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if (msg.has_hpf_enabled() || settings_.use_hpf) {
|
||||
bool enable = settings_.use_hpf ? *settings_.use_hpf : msg.hpf_enabled();
|
||||
apm_config.high_pass_filter.enabled = enable;
|
||||
|
|
|
@ -678,9 +678,6 @@ void AudioProcessingSimulator::CreateAudioProcessor() {
|
|||
if (settings_.use_ts) {
|
||||
config.Set<ExperimentalNs>(new ExperimentalNs(*settings_.use_ts));
|
||||
}
|
||||
if (settings_.use_ie) {
|
||||
config.Set<Intelligibility>(new Intelligibility(*settings_.use_ie));
|
||||
}
|
||||
if (settings_.use_agc2) {
|
||||
apm_config.gain_controller2.enabled = *settings_.use_agc2;
|
||||
apm_config.gain_controller2.fixed_gain_db = settings_.agc2_fixed_gain_db;
|
||||
|
|
|
@ -87,9 +87,6 @@ DEFINE_int(ns,
|
|||
DEFINE_int(ts,
|
||||
kParameterNotSpecifiedValue,
|
||||
"Activate (1) or deactivate(0) the transient suppressor");
|
||||
DEFINE_int(ie,
|
||||
kParameterNotSpecifiedValue,
|
||||
"Activate (1) or deactivate(0) the intelligibility enhancer");
|
||||
DEFINE_int(vad,
|
||||
kParameterNotSpecifiedValue,
|
||||
"Activate (1) or deactivate(0) the voice activity detector");
|
||||
|
@ -247,7 +244,6 @@ SimulationSettings CreateSettings() {
|
|||
SetSettingIfFlagSet(FLAG_hpf, &settings.use_hpf);
|
||||
SetSettingIfFlagSet(FLAG_ns, &settings.use_ns);
|
||||
SetSettingIfFlagSet(FLAG_ts, &settings.use_ts);
|
||||
SetSettingIfFlagSet(FLAG_ie, &settings.use_ie);
|
||||
SetSettingIfFlagSet(FLAG_vad, &settings.use_vad);
|
||||
SetSettingIfFlagSet(FLAG_le, &settings.use_le);
|
||||
SetSettingIfSpecified(FLAG_aec_suppression_level,
|
||||
|
|
|
@ -186,10 +186,6 @@ void DebugDumpReplayer::MaybeRecreateApm(const audioproc::Config& msg) {
|
|||
config.Set<ExtendedFilter>(
|
||||
new ExtendedFilter(msg.aec_extended_filter_enabled()));
|
||||
|
||||
RTC_CHECK(msg.has_intelligibility_enhancer_enabled());
|
||||
config.Set<Intelligibility>(
|
||||
new Intelligibility(msg.intelligibility_enhancer_enabled()));
|
||||
|
||||
// We only create APM once, since changes on these fields should not
|
||||
// happen in current implementation.
|
||||
if (!apm_.get()) {
|
||||
|
|
|
@ -95,7 +95,7 @@ helps with that, producing plots similar to [this
|
|||
one](https://matplotlib.org/mpl_examples/pylab_examples/boxplot_demo_06.png).
|
||||
|
||||
Suppose some scores come from running the APM simulator `audioproc_f` with
|
||||
or without the intelligibility enhancer: `--ie=1` or `--ie=0`. Then two boxplots
|
||||
or without the level controller: `--lc=1` or `--lc=0`. Then two boxplots
|
||||
side by side can be generated with
|
||||
|
||||
```
|
||||
|
@ -103,7 +103,7 @@ $ ./apm_quality_assessment_boxplot.py \
|
|||
-o /path/to/output
|
||||
-v <score_name>
|
||||
-n /path/to/dir/with/apm_configs
|
||||
-z ie
|
||||
-z lc
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
|
|
@ -36,7 +36,6 @@ def _GenerateDefaultOverridden(config_override):
|
|||
settings.use_bf = rtc::Optional<bool>(false);
|
||||
settings.use_ed = rtc::Optional<bool>(false);
|
||||
settings.use_hpf = rtc::Optional<bool>(true);
|
||||
settings.use_ie = rtc::Optional<bool>(false);
|
||||
settings.use_le = rtc::Optional<bool>(true);
|
||||
settings.use_ns = rtc::Optional<bool>(true);
|
||||
settings.use_ts = rtc::Optional<bool>(true);
|
||||
|
@ -83,7 +82,6 @@ def _GenerateAllDefaultPlusOne():
|
|||
'with_drift_compensation': {'-drift_compensation': 1,},
|
||||
'with_residual_echo_detector': {'-ed': 1,},
|
||||
'with_AEC_extended_filter': {'-extended_filter': 1,},
|
||||
'with_intelligibility_enhancer': {'-ie': 1,},
|
||||
'with_LC': {'-lc': 1,},
|
||||
'with_refined_adaptive_filter': {'-refined_adaptive_filter': 1,},
|
||||
}
|
||||
|
|
|
@ -289,7 +289,6 @@ int do_main(int argc, char* argv[]) {
|
|||
PRINT_CONFIG(ns_enabled);
|
||||
PRINT_CONFIG(ns_level);
|
||||
PRINT_CONFIG(transient_suppression_enabled);
|
||||
PRINT_CONFIG(intelligibility_enhancer_enabled);
|
||||
PRINT_CONFIG(pre_amplifier_enabled);
|
||||
PRINT_CONFIG_FLOAT(pre_amplifier_fixed_gain_factor);
|
||||
|
||||
|
|
|
@ -63,7 +63,7 @@ std::unique_ptr<AudioProcessing> CreateApm(test::FuzzDataHelper* fuzz_data,
|
|||
bool ef = fuzz_data->ReadOrDefaultValue(true);
|
||||
bool raf = fuzz_data->ReadOrDefaultValue(true);
|
||||
static_cast<void>(fuzz_data->ReadOrDefaultValue(true));
|
||||
bool ie = fuzz_data->ReadOrDefaultValue(true);
|
||||
static_cast<void>(fuzz_data->ReadOrDefaultValue(true));
|
||||
bool red = fuzz_data->ReadOrDefaultValue(true);
|
||||
bool hpf = fuzz_data->ReadOrDefaultValue(true);
|
||||
bool aec3 = fuzz_data->ReadOrDefaultValue(true);
|
||||
|
@ -123,7 +123,6 @@ std::unique_ptr<AudioProcessing> CreateApm(test::FuzzDataHelper* fuzz_data,
|
|||
config.Set<ExtendedFilter>(new ExtendedFilter(ef));
|
||||
config.Set<RefinedAdaptiveFilter>(new RefinedAdaptiveFilter(raf));
|
||||
config.Set<DelayAgnostic>(new DelayAgnostic(true));
|
||||
config.Set<Intelligibility>(new Intelligibility(ie));
|
||||
|
||||
std::unique_ptr<AudioProcessing> apm(
|
||||
AudioProcessingBuilder()
|
||||
|
|
|
@ -53,8 +53,6 @@
|
|||
# "More configs" bots will build all the following configs in sequence.
|
||||
# This is using MB's "phases" feature.
|
||||
'Linux (more configs)': {
|
||||
'intelligibility_enhancer_no_include_tests':
|
||||
'intelligibility_enhancer_no_include_tests_x64',
|
||||
'bwe_test_logging':
|
||||
'bwe_test_logging_x64',
|
||||
'dummy_audio_file_devices_no_protobuf':
|
||||
|
@ -74,8 +72,6 @@
|
|||
'Android32 Builder x86 (dbg)': 'android_debug_static_bot_x86',
|
||||
'Android64 Builder x64 (dbg)': 'android_debug_static_bot_x64',
|
||||
'Android32 (more configs)': {
|
||||
'intelligibility_enhancer_no_include_tests':
|
||||
'intelligibility_enhancer_no_include_tests_android_arm',
|
||||
'bwe_test_logging':
|
||||
'bwe_test_logging_android_arm',
|
||||
'dummy_audio_file_devices_no_protobuf':
|
||||
|
@ -95,8 +91,6 @@
|
|||
'Win64 Release (Clang)': 'win_clang_release_bot_x64',
|
||||
'Win32 ASan': 'win_asan_clang_release_bot_x86',
|
||||
'Win (more configs)': {
|
||||
'intelligibility_enhancer_no_include_tests':
|
||||
'intelligibility_enhancer_no_include_tests_x86',
|
||||
'bwe_test_logging':
|
||||
'bwe_test_logging_x86',
|
||||
'dummy_audio_file_devices_no_protobuf':
|
||||
|
@ -186,8 +180,6 @@
|
|||
'linux_experimental': 'release_bot_x64',
|
||||
'linux_libfuzzer_rel': 'libfuzzer_asan_release_bot_x64',
|
||||
'linux_more_configs': {
|
||||
'intelligibility_enhancer_no_include_tests':
|
||||
'intelligibility_enhancer_no_include_tests_x64',
|
||||
'bwe_test_logging':
|
||||
'bwe_test_logging_x64',
|
||||
'dummy_audio_file_devices_no_protobuf':
|
||||
|
@ -209,8 +201,6 @@
|
|||
'android_arm64_rel': 'android_release_bot_arm64',
|
||||
'android_experimental': 'android_release_bot_arm',
|
||||
'android_more_configs': {
|
||||
'intelligibility_enhancer_no_include_tests':
|
||||
'intelligibility_enhancer_no_include_tests_android_arm',
|
||||
'bwe_test_logging':
|
||||
'bwe_test_logging_android_arm',
|
||||
'dummy_audio_file_devices_no_protobuf':
|
||||
|
@ -237,8 +227,6 @@
|
|||
'win_x64_win8': 'debug_bot_x64',
|
||||
'win_x64_win10': 'debug_bot_x64',
|
||||
'win_more_configs': {
|
||||
'intelligibility_enhancer_no_include_tests':
|
||||
'intelligibility_enhancer_no_include_tests_x86',
|
||||
'bwe_test_logging':
|
||||
'bwe_test_logging_x86',
|
||||
'dummy_audio_file_devices_no_protobuf':
|
||||
|
@ -402,9 +390,6 @@
|
|||
],
|
||||
|
||||
# More configs
|
||||
'intelligibility_enhancer_no_include_tests_x64': [
|
||||
'debug_bot', 'x64', 'intelligibility_enhancer', 'no_include_tests'
|
||||
],
|
||||
'bwe_test_logging_x64': [
|
||||
'debug_bot', 'x64', 'bwe_test_logging'
|
||||
],
|
||||
|
@ -415,9 +400,6 @@
|
|||
'debug_bot', 'x64', 'rtti', 'no_sctp'
|
||||
],
|
||||
|
||||
'intelligibility_enhancer_no_include_tests_x86': [
|
||||
'debug_bot', 'x86', 'intelligibility_enhancer', 'no_include_tests'
|
||||
],
|
||||
'bwe_test_logging_x86': [
|
||||
'debug_bot', 'x86', 'bwe_test_logging'
|
||||
],
|
||||
|
@ -428,10 +410,6 @@
|
|||
'debug_bot', 'x86', 'rtti', 'no_sctp'
|
||||
],
|
||||
|
||||
'intelligibility_enhancer_no_include_tests_android_arm': [
|
||||
'android', 'debug_static_bot', 'arm',
|
||||
'intelligibility_enhancer', 'no_include_tests'
|
||||
],
|
||||
'bwe_test_logging_android_arm': [
|
||||
'android', 'debug_static_bot', 'arm', 'bwe_test_logging'
|
||||
],
|
||||
|
@ -586,14 +564,6 @@
|
|||
'gn_args': 'target_cpu="x86"',
|
||||
},
|
||||
|
||||
'intelligibility_enhancer': {
|
||||
'gn_args': 'rtc_enable_intelligibility_enhancer=true',
|
||||
},
|
||||
|
||||
'no_include_tests': {
|
||||
'gn_args': 'rtc_include_tests=false',
|
||||
},
|
||||
|
||||
'bwe_test_logging': {
|
||||
'gn_args': 'rtc_enable_bwe_test_logging=true',
|
||||
},
|
||||
|
|
|
@ -75,9 +75,6 @@ declare_args() {
|
|||
# Selects fixed-point code where possible.
|
||||
rtc_prefer_fixed_point = false
|
||||
|
||||
# Disable the code for the intelligibility enhancer by default.
|
||||
rtc_enable_intelligibility_enhancer = false
|
||||
|
||||
# Enable when an external authentication mechanism is used for performing
|
||||
# packet authentication for RTP packets instead of libsrtp.
|
||||
rtc_enable_external_auth = build_with_chromium
|
||||
|
|
Loading…
Reference in a new issue