Removing the intelligibility enhancer.

The intelligibility enhancer is always disabled and it is the only non-test
target using the lapped transform in common_audio (which we planned to remove).

Bug: webrtc:9689, webrtc:5298
Change-Id: Ida65d3aa11ac366471e7e5cbc053108b376c67d8
Reviewed-on: https://webrtc-review.googlesource.com/96460
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Alex Loiko <aleloi@webrtc.org>
Reviewed-by: Fredrik Solenberg <solenberg@webrtc.org>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Reviewed-by: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#24504}
This commit is contained in:
Alessio Bazzica 2018-08-30 13:01:34 +02:00 committed by Commit Bot
parent fc173d00ec
commit cc22f51988
33 changed files with 17 additions and 1723 deletions

View file

@ -44,7 +44,6 @@ struct AudioOptions {
SetFrom(&extended_filter_aec, change.extended_filter_aec);
SetFrom(&delay_agnostic_aec, change.delay_agnostic_aec);
SetFrom(&experimental_ns, change.experimental_ns);
SetFrom(&intelligibility_enhancer, change.intelligibility_enhancer);
SetFrom(&residual_echo_detector, change.residual_echo_detector);
SetFrom(&tx_agc_target_dbov, change.tx_agc_target_dbov);
SetFrom(&tx_agc_digital_compression_gain,
@ -74,7 +73,6 @@ struct AudioOptions {
extended_filter_aec == o.extended_filter_aec &&
delay_agnostic_aec == o.delay_agnostic_aec &&
experimental_ns == o.experimental_ns &&
intelligibility_enhancer == o.intelligibility_enhancer &&
residual_echo_detector == o.residual_echo_detector &&
tx_agc_target_dbov == o.tx_agc_target_dbov &&
tx_agc_digital_compression_gain ==
@ -108,7 +106,6 @@ struct AudioOptions {
ost << ToStringIfSet("extended_filter_aec", extended_filter_aec);
ost << ToStringIfSet("delay_agnostic_aec", delay_agnostic_aec);
ost << ToStringIfSet("experimental_ns", experimental_ns);
ost << ToStringIfSet("intelligibility_enhancer", intelligibility_enhancer);
ost << ToStringIfSet("residual_echo_detector", residual_echo_detector);
ost << ToStringIfSet("tx_agc_target_dbov", tx_agc_target_dbov);
ost << ToStringIfSet("tx_agc_digital_compression_gain",
@ -153,7 +150,6 @@ struct AudioOptions {
absl::optional<bool> extended_filter_aec;
absl::optional<bool> delay_agnostic_aec;
absl::optional<bool> experimental_ns;
absl::optional<bool> intelligibility_enhancer;
// Note that tx_agc_* only applies to non-experimental AGC.
absl::optional<bool> residual_echo_detector;
absl::optional<uint16_t> tx_agc_target_dbov;

View file

@ -104,8 +104,6 @@ const char MediaConstraintsInterface::kNoiseSuppression[] =
"googNoiseSuppression";
const char MediaConstraintsInterface::kExperimentalNoiseSuppression[] =
"googNoiseSuppression2";
const char MediaConstraintsInterface::kIntelligibilityEnhancer[] =
"intelligibilityEnhancer";
const char MediaConstraintsInterface::kHighpassFilter[] = "googHighpassFilter";
const char MediaConstraintsInterface::kTypingNoiseDetection[] =
"googTypingNoiseDetection";
@ -241,9 +239,6 @@ void CopyConstraintsIntoAudioOptions(
ConstraintToOptional<bool>(
constraints, MediaConstraintsInterface::kExperimentalNoiseSuppression,
&options->experimental_ns);
ConstraintToOptional<bool>(
constraints, MediaConstraintsInterface::kIntelligibilityEnhancer,
&options->intelligibility_enhancer);
ConstraintToOptional<bool>(constraints,
MediaConstraintsInterface::kHighpassFilter,
&options->highpass_filter);

View file

@ -73,7 +73,6 @@ class MediaConstraintsInterface {
static const char kExperimentalAutoGainControl[]; // googAutoGainControl2
static const char kNoiseSuppression[]; // googNoiseSuppression
static const char kExperimentalNoiseSuppression[]; // googNoiseSuppression2
static const char kIntelligibilityEnhancer[]; // intelligibilityEnhancer
static const char kHighpassFilter[]; // googHighpassFilter
static const char kTypingNoiseDetection[]; // googTypingNoiseDetection
static const char kAudioMirroring[]; // googAudioMirroring

View file

@ -273,12 +273,6 @@ rtc_static_library("rtc_audio_video") {
suppressed_configs += [ "//build/config/clang:find_bad_constructs" ]
}
if (rtc_enable_intelligibility_enhancer) {
defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ]
} else {
defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ]
}
if (rtc_opus_support_120ms_ptime) {
defines += [ "WEBRTC_OPUS_SUPPORT_120MS_PTIME=1" ]
} else {

View file

@ -53,14 +53,6 @@ constexpr size_t kMaxUnsignaledRecvStreams = 4;
constexpr int kNackRtpHistoryMs = 5000;
// Check to verify that the define for the intelligibility enhancer is properly
// set.
#if !defined(WEBRTC_INTELLIGIBILITY_ENHANCER) || \
(WEBRTC_INTELLIGIBILITY_ENHANCER != 0 && \
WEBRTC_INTELLIGIBILITY_ENHANCER != 1)
#error "Set WEBRTC_INTELLIGIBILITY_ENHANCER to either 0 or 1"
#endif
// For SendSideBwe, Opus bitrate should be in the range between 6000 and 32000.
const int kOpusMinBitrateBps = 6000;
const int kOpusBitrateFbBps = 32000;
@ -296,7 +288,6 @@ void WebRtcVoiceEngine::Init() {
options.extended_filter_aec = false;
options.delay_agnostic_aec = false;
options.experimental_ns = false;
options.intelligibility_enhancer = false;
options.residual_echo_detector = true;
bool error = ApplyOptions(options);
RTC_DCHECK(error);
@ -410,11 +401,6 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) {
}
#endif
#if (WEBRTC_INTELLIGIBILITY_ENHANCER == 0)
// Hardcode the intelligibility enhancer to be off.
options.intelligibility_enhancer = false;
#endif
if (options.echo_cancellation) {
// Check if platform supports built-in EC. Currently only supported on
// Android and in combination with Java based audio layer.
@ -479,19 +465,9 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) {
webrtc::apm_helpers::SetAgcConfig(apm(), default_agc_config_);
}
if (options.intelligibility_enhancer) {
intelligibility_enhancer_ = options.intelligibility_enhancer;
}
if (intelligibility_enhancer_ && *intelligibility_enhancer_) {
RTC_LOG(LS_INFO) << "Enabling NS when Intelligibility Enhancer is active.";
options.noise_suppression = intelligibility_enhancer_;
}
if (options.noise_suppression) {
if (adm()->BuiltInNSIsAvailable()) {
bool builtin_ns =
*options.noise_suppression &&
!(intelligibility_enhancer_ && *intelligibility_enhancer_);
bool builtin_ns = *options.noise_suppression;
if (adm()->EnableBuiltInNS(builtin_ns) == 0 && builtin_ns) {
// Disable internal software NS if built-in NS is enabled,
// i.e., replace the software NS with the built-in NS.
@ -558,13 +534,6 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) {
new webrtc::ExperimentalNs(*experimental_ns_));
}
if (intelligibility_enhancer_) {
RTC_LOG(LS_INFO) << "Intelligibility Enhancer is enabled? "
<< *intelligibility_enhancer_;
config.Set<webrtc::Intelligibility>(
new webrtc::Intelligibility(*intelligibility_enhancer_));
}
webrtc::AudioProcessing::Config apm_config = apm()->GetConfig();
if (options.highpass_filter) {

View file

@ -120,15 +120,13 @@ class WebRtcVoiceEngine final {
bool initialized_ = false;
webrtc::AgcConfig default_agc_config_;
// Cache received extended_filter_aec, delay_agnostic_aec, experimental_ns
// and intelligibility_enhancer values, and apply them
// in case they are missing in the audio options. We need to do this because
// SetExtraOptions() will revert to defaults for options which are not
// provided.
// Cache received extended_filter_aec, delay_agnostic_aec and experimental_ns
// values, and apply them in case they are missing in the audio options.
// We need to do this because SetExtraOptions() will revert to defaults for
// options which are not provided.
absl::optional<bool> extended_filter_aec_;
absl::optional<bool> delay_agnostic_aec_;
absl::optional<bool> experimental_ns_;
absl::optional<bool> intelligibility_enhancer_;
// Jitter buffer settings for new streams.
size_t audio_jitter_buffer_max_packets_ = 50;
bool audio_jitter_buffer_fast_accelerate_ = false;

View file

@ -138,18 +138,6 @@ rtc_static_library("audio_processing") {
defines += [ "WEBRTC_UNTRUSTED_DELAY" ]
}
if (rtc_enable_intelligibility_enhancer) {
defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ]
sources += [
"intelligibility/intelligibility_enhancer.cc",
"intelligibility/intelligibility_enhancer.h",
"intelligibility/intelligibility_utils.cc",
"intelligibility/intelligibility_utils.h",
]
} else {
defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ]
}
if (rtc_prefer_fixed_point) {
defines += [ "WEBRTC_NS_FIXED" ]
} else {
@ -337,10 +325,6 @@ if (rtc_include_tests) {
":transient_suppression_test",
]
if (rtc_enable_intelligibility_enhancer) {
deps += [ ":intelligibility_proc" ]
}
if (rtc_enable_protobuf) {
deps += [
":audioproc_f",
@ -422,16 +406,6 @@ if (rtc_include_tests) {
defines = []
if (rtc_enable_intelligibility_enhancer) {
defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ]
sources += [
"intelligibility/intelligibility_enhancer_unittest.cc",
"intelligibility/intelligibility_utils_unittest.cc",
]
} else {
defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ]
}
if (rtc_prefer_fixed_point) {
defines += [ "WEBRTC_AUDIOPROC_FIXED_PROFILE" ]
} else {
@ -500,12 +474,6 @@ if (rtc_include_tests) {
"../../test:perf_test",
"../../test:test_support",
]
if (rtc_enable_intelligibility_enhancer) {
defines = [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ]
} else {
defines = [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ]
}
}
rtc_source_set("file_audio_generator_unittests") {
@ -666,24 +634,6 @@ if (rtc_include_tests) {
]
}
if (rtc_enable_intelligibility_enhancer) {
rtc_executable("intelligibility_proc") {
testonly = true
sources = [
"intelligibility/test/intelligibility_proc.cc",
]
deps = [
":audio_processing",
":audioproc_test_utils",
"../../common_audio",
"../../rtc_base:rtc_base_approved",
"../../system_wrappers:metrics_default",
"../../test:test_support",
"//testing/gtest",
]
}
}
if (rtc_enable_protobuf) {
proto_library("audioproc_unittest_proto") {
sources = [

View file

@ -45,8 +45,6 @@ void CopyFromConfigToEvent(const webrtc::InternalAPMConfig& config,
pb_cfg->set_transient_suppression_enabled(
config.transient_suppression_enabled);
pb_cfg->set_intelligibility_enhancer_enabled(
config.intelligibility_enhancer_enabled);
pb_cfg->set_pre_amplifier_enabled(config.pre_amplifier_enabled);
pb_cfg->set_pre_amplifier_fixed_gain_factor(

View file

@ -38,9 +38,6 @@
#include "rtc_base/system/arch.h"
#include "rtc_base/timeutils.h"
#include "rtc_base/trace_event.h"
#if WEBRTC_INTELLIGIBILITY_ENHANCER
#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
#endif
#include "modules/audio_processing/level_estimator_impl.h"
#include "modules/audio_processing/low_cut_filter.h"
#include "modules/audio_processing/noise_suppression_impl.h"
@ -50,14 +47,6 @@
#include "rtc_base/atomicops.h"
#include "system_wrappers/include/metrics.h"
// Check to verify that the define for the intelligibility enhancer is properly
// set.
#if !defined(WEBRTC_INTELLIGIBILITY_ENHANCER) || \
(WEBRTC_INTELLIGIBILITY_ENHANCER != 0 && \
WEBRTC_INTELLIGIBILITY_ENHANCER != 1)
#error "Set WEBRTC_INTELLIGIBILITY_ENHANCER to either 0 or 1"
#endif
#define RETURN_ON_ERR(expr) \
do { \
int err = (expr); \
@ -170,7 +159,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update(
bool mobile_echo_controller_enabled,
bool residual_echo_detector_enabled,
bool noise_suppressor_enabled,
bool intelligibility_enhancer_enabled,
bool adaptive_gain_controller_enabled,
bool gain_controller2_enabled,
bool pre_amplifier_enabled,
@ -186,8 +174,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update(
changed |=
(residual_echo_detector_enabled != residual_echo_detector_enabled_);
changed |= (noise_suppressor_enabled != noise_suppressor_enabled_);
changed |=
(intelligibility_enhancer_enabled != intelligibility_enhancer_enabled_);
changed |=
(adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_);
changed |=
@ -204,7 +190,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update(
mobile_echo_controller_enabled_ = mobile_echo_controller_enabled;
residual_echo_detector_enabled_ = residual_echo_detector_enabled;
noise_suppressor_enabled_ = noise_suppressor_enabled;
intelligibility_enhancer_enabled_ = intelligibility_enhancer_enabled;
adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled;
gain_controller2_enabled_ = gain_controller2_enabled;
pre_amplifier_enabled_ = pre_amplifier_enabled;
@ -221,12 +206,7 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update(
bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandSubModulesActive()
const {
#if WEBRTC_INTELLIGIBILITY_ENHANCER
return CaptureMultiBandProcessingActive() ||
intelligibility_enhancer_enabled_ || voice_activity_detector_enabled_;
#else
return CaptureMultiBandProcessingActive() || voice_activity_detector_enabled_;
#endif
}
bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandProcessingActive()
@ -260,11 +240,7 @@ bool AudioProcessingImpl::ApmSubmoduleStates::RenderFullBandProcessingActive()
bool AudioProcessingImpl::ApmSubmoduleStates::RenderMultiBandProcessingActive()
const {
#if WEBRTC_INTELLIGIBILITY_ENHANCER
return intelligibility_enhancer_enabled_;
#else
return false;
#endif
}
struct AudioProcessingImpl::ApmPublicSubmodules {
@ -283,9 +259,6 @@ struct AudioProcessingImpl::ApmPublicSubmodules {
// Accessed internally from both render and capture.
std::unique_ptr<TransientSuppressor> transient_suppressor;
#if WEBRTC_INTELLIGIBILITY_ENHANCER
std::unique_ptr<IntelligibilityEnhancer> intelligibility_enhancer;
#endif
};
struct AudioProcessingImpl::ApmPrivateSubmodules {
@ -405,7 +378,7 @@ AudioProcessingImpl::AudioProcessingImpl(
#else
capture_(config.Get<ExperimentalNs>().enabled),
#endif
capture_nonlocked_(config.Get<Intelligibility>().enabled) {
capture_nonlocked_() {
{
rtc::CritScope cs_render(&crit_render_);
rtc::CritScope cs_capture(&crit_capture_);
@ -589,9 +562,6 @@ int AudioProcessingImpl::InitializeLocked() {
public_submodules_->gain_control_for_experimental_agc->Initialize();
}
InitializeTransient();
#if WEBRTC_INTELLIGIBILITY_ENHANCER
InitializeIntelligibility();
#endif
InitializeLowCutFilter();
public_submodules_->noise_suppression->Initialize(num_proc_channels(),
proc_sample_rate_hz());
@ -742,15 +712,6 @@ void AudioProcessingImpl::SetExtraOptions(const webrtc::Config& config) {
config.Get<ExperimentalNs>().enabled;
InitializeTransient();
}
#if WEBRTC_INTELLIGIBILITY_ENHANCER
if (capture_nonlocked_.intelligibility_enabled !=
config.Get<Intelligibility>().enabled) {
capture_nonlocked_.intelligibility_enabled =
config.Get<Intelligibility>().enabled;
InitializeIntelligibility();
}
#endif
}
int AudioProcessingImpl::proc_sample_rate_hz() const {
@ -1306,18 +1267,6 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
capture_buffer->CopyLowPassToReference();
}
public_submodules_->noise_suppression->ProcessCaptureAudio(capture_buffer);
#if WEBRTC_INTELLIGIBILITY_ENHANCER
if (capture_nonlocked_.intelligibility_enabled) {
RTC_DCHECK(public_submodules_->noise_suppression->is_enabled());
const int gain_db =
public_submodules_->gain_control->is_enabled()
? public_submodules_->gain_control->compression_gain_db()
: 0;
const float gain = DbToRatio(gain_db);
public_submodules_->intelligibility_enhancer->SetCaptureNoiseEstimate(
public_submodules_->noise_suppression->NoiseEstimate(), gain);
}
#endif
// Ensure that the stream delay was set before the call to the
// AECM ProcessCaptureAudio function.
@ -1540,13 +1489,6 @@ int AudioProcessingImpl::ProcessRenderStreamLocked() {
render_buffer->SplitIntoFrequencyBands();
}
#if WEBRTC_INTELLIGIBILITY_ENHANCER
if (capture_nonlocked_.intelligibility_enabled) {
public_submodules_->intelligibility_enhancer->ProcessRenderAudio(
render_buffer);
}
#endif
if (submodule_states_.RenderMultiBandSubModulesActive()) {
QueueBandedRenderAudio(render_buffer);
}
@ -1809,7 +1751,6 @@ bool AudioProcessingImpl::UpdateActiveSubmoduleStates() {
public_submodules_->echo_control_mobile->is_enabled(),
config_.residual_echo_detector.enabled,
public_submodules_->noise_suppression->is_enabled(),
capture_nonlocked_.intelligibility_enabled,
public_submodules_->gain_control->is_enabled(),
config_.gain_controller2.enabled, config_.pre_amplifier.enabled,
capture_nonlocked_.echo_controller_enabled,
@ -1830,18 +1771,6 @@ void AudioProcessingImpl::InitializeTransient() {
}
}
void AudioProcessingImpl::InitializeIntelligibility() {
#if WEBRTC_INTELLIGIBILITY_ENHANCER
if (capture_nonlocked_.intelligibility_enabled) {
public_submodules_->intelligibility_enhancer.reset(
new IntelligibilityEnhancer(capture_nonlocked_.split_rate,
render_.render_audio->num_channels(),
render_.render_audio->num_bands(),
NoiseSuppressionImpl::num_noise_bins()));
}
#endif
}
void AudioProcessingImpl::InitializeLowCutFilter() {
if (config_.high_pass_filter.enabled) {
private_submodules_->low_cut_filter.reset(
@ -2029,8 +1958,6 @@ void AudioProcessingImpl::WriteAecDumpConfigMessage(bool forced) {
apm_config.transient_suppression_enabled =
capture_.transient_suppressor_enabled;
apm_config.intelligibility_enhancer_enabled =
capture_nonlocked_.intelligibility_enabled;
apm_config.experiments_description = experiments_description;
apm_config.pre_amplifier_enabled = config_.pre_amplifier.enabled;
apm_config.pre_amplifier_fixed_gain_factor =

View file

@ -183,7 +183,6 @@ class AudioProcessingImpl : public AudioProcessing {
bool mobile_echo_controller_enabled,
bool residual_echo_detector_enabled,
bool noise_suppressor_enabled,
bool intelligibility_enhancer_enabled,
bool adaptive_gain_controller_enabled,
bool gain_controller2_enabled,
bool pre_amplifier_enabled,
@ -208,7 +207,6 @@ class AudioProcessingImpl : public AudioProcessing {
bool mobile_echo_controller_enabled_ = false;
bool residual_echo_detector_enabled_ = false;
bool noise_suppressor_enabled_ = false;
bool intelligibility_enhancer_enabled_ = false;
bool adaptive_gain_controller_enabled_ = false;
bool gain_controller2_enabled_ = false;
bool pre_amplifier_enabled_ = false;
@ -245,8 +243,6 @@ class AudioProcessingImpl : public AudioProcessing {
// acquired.
void InitializeTransient()
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
void InitializeIntelligibility()
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
int InitializeLocked(const ProcessingConfig& config)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
void InitializeResidualEchoDetector()
@ -399,18 +395,16 @@ class AudioProcessingImpl : public AudioProcessing {
} capture_ RTC_GUARDED_BY(crit_capture_);
struct ApmCaptureNonLockedState {
ApmCaptureNonLockedState(bool intelligibility_enabled)
ApmCaptureNonLockedState()
: capture_processing_format(kSampleRate16kHz),
split_rate(kSampleRate16kHz),
stream_delay_ms(0),
intelligibility_enabled(intelligibility_enabled) {}
stream_delay_ms(0) {}
// Only the rate and samples fields of capture_processing_format_ are used
// because the forward processing number of channels is mutable and is
// tracked by the capture_audio_.
StreamConfig capture_processing_format;
int split_rate;
int stream_delay_ms;
bool intelligibility_enabled;
bool echo_controller_enabled = false;
} capture_nonlocked_;

View file

@ -26,14 +26,6 @@
#include "test/gtest.h"
#include "test/testsupport/perf_test.h"
// Check to verify that the define for the intelligibility enhancer is properly
// set.
#if !defined(WEBRTC_INTELLIGIBILITY_ENHANCER) || \
(WEBRTC_INTELLIGIBILITY_ENHANCER != 0 && \
WEBRTC_INTELLIGIBILITY_ENHANCER != 1)
#error "Set WEBRTC_INTELLIGIBILITY_ENHANCER to either 0 or 1"
#endif
namespace webrtc {
namespace {
@ -49,7 +41,6 @@ enum class ProcessorType { kRender, kCapture };
enum class SettingsType {
kDefaultApmDesktop,
kDefaultApmMobile,
kDefaultApmDesktopAndIntelligibilityEnhancer,
kAllSubmodulesTurnedOff,
kDefaultApmDesktopWithoutDelayAgnostic,
kDefaultApmDesktopWithoutExtendedFilter
@ -99,20 +90,6 @@ struct SimulationConfig {
simulation_configs.push_back(SimulationConfig(sample_rate, settings));
}
}
#if WEBRTC_INTELLIGIBILITY_ENHANCER == 1
const SettingsType intelligibility_enhancer_settings[] = {
SettingsType::kDefaultApmDesktopAndIntelligibilityEnhancer};
const int intelligibility_enhancer_sample_rates[] = {8000, 16000, 32000,
48000};
for (auto sample_rate : intelligibility_enhancer_sample_rates) {
for (auto settings : intelligibility_enhancer_settings) {
simulation_configs.push_back(SimulationConfig(sample_rate, settings));
}
}
#endif
#endif
const SettingsType mobile_settings[] = {SettingsType::kDefaultApmMobile};
@ -137,9 +114,6 @@ struct SimulationConfig {
case SettingsType::kDefaultApmDesktop:
description = "DefaultApmDesktop";
break;
case SettingsType::kDefaultApmDesktopAndIntelligibilityEnhancer:
description = "DefaultApmDesktopAndIntelligibilityEnhancer";
break;
case SettingsType::kAllSubmodulesTurnedOff:
description = "AllSubmodulesOff";
break;
@ -538,16 +512,6 @@ class CallSimulator : public ::testing::TestWithParam<SimulationConfig> {
apm_->SetExtraOptions(config);
break;
}
case SettingsType::kDefaultApmDesktopAndIntelligibilityEnhancer: {
Config config;
config.Set<Intelligibility>(new Intelligibility(true));
add_default_desktop_config(&config);
apm_.reset(AudioProcessingBuilder().Create(config));
ASSERT_TRUE(!!apm_);
set_default_desktop_apm_runtime_settings(apm_.get());
apm_->SetExtraOptions(config);
break;
}
case SettingsType::kAllSubmodulesTurnedOff: {
apm_.reset(AudioProcessingBuilder().Create());
ASSERT_TRUE(!!apm_);

View file

@ -47,7 +47,6 @@ message Stream {
// Contains the configurations of various APM component. A Config message is
// added when any of the fields are changed.
message Config {
// Next field number 19.
// Acoustic echo canceler.
optional bool aec_enabled = 1;
optional bool aec_delay_agnostic_enabled = 2;
@ -73,11 +72,12 @@ message Config {
// Semicolon-separated string containing experimental feature
// descriptions.
optional string experiments_description = 17;
// Intelligibility Enhancer.
optional bool intelligibility_enhancer_enabled = 18;
reserved 18; // Intelligibility enhancer enabled (deprecated).
// Pre amplifier.
optional bool pre_amplifier_enabled = 19;
optional float pre_amplifier_fixed_gain_factor = 20;
// Next field number 21.
}
message Event {

View file

@ -32,8 +32,6 @@ bool InternalAPMConfig::operator==(const InternalAPMConfig& other) {
hpf_enabled == other.hpf_enabled && ns_enabled == other.ns_enabled &&
ns_level == other.ns_level &&
transient_suppression_enabled == other.transient_suppression_enabled &&
intelligibility_enhancer_enabled ==
other.intelligibility_enhancer_enabled &&
noise_robust_agc_enabled == other.noise_robust_agc_enabled &&
pre_amplifier_enabled == other.pre_amplifier_enabled &&
pre_amplifier_fixed_gain_factor ==

View file

@ -49,7 +49,6 @@ struct InternalAPMConfig {
bool ns_enabled = false;
int ns_level = 0;
bool transient_suppression_enabled = false;
bool intelligibility_enhancer_enabled = false;
bool noise_robust_agc_enabled = false;
bool pre_amplifier_enabled = false;
float pre_amplifier_fixed_gain_factor = 1.f;

View file

@ -151,17 +151,6 @@ struct ExperimentalNs {
bool enabled;
};
// Use to enable intelligibility enhancer in audio processing.
//
// Note: If enabled and the reverse stream has more than one output channel,
// the reverse stream will become an upmixed mono signal.
struct Intelligibility {
Intelligibility() : enabled(false) {}
explicit Intelligibility(bool enabled) : enabled(enabled) {}
static const ConfigOptionID identifier = ConfigOptionID::kIntelligibility;
bool enabled;
};
// The Audio Processing Module (APM) provides a collection of voice processing
// components designed for real-time communications software.
//

View file

@ -31,7 +31,7 @@ enum class ConfigOptionID {
kExperimentalAgc,
kExperimentalNs,
kBeamforming, // Deprecated
kIntelligibility,
kIntelligibility, // Deprecated
kEchoCanceller3, // Deprecated
kAecRefinedAdaptiveFilter,
kLevelControl // Deprecated

View file

@ -1,390 +0,0 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
#include <math.h>
#include <stdlib.h>
#include <algorithm>
#include <limits>
#include <numeric>
#include "common_audio/include/audio_util.h"
#include "common_audio/window_generator.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/numerics/safe_minmax.h"
namespace webrtc {
namespace {
const size_t kErbResolution = 2;
const int kWindowSizeMs = 16;
const int kChunkSizeMs = 10; // Size provided by APM.
const float kClipFreqKhz = 0.2f;
const float kKbdAlpha = 1.5f;
const float kLambdaBot = -1.f; // Extreme values in bisection
const float kLambdaTop = -1e-5f; // search for lamda.
const float kVoiceProbabilityThreshold = 0.5f;
// Number of chunks after voice activity which is still considered speech.
const size_t kSpeechOffsetDelay = 10;
const float kDecayRate = 0.995f; // Power estimation decay rate.
const float kMaxRelativeGainChange = 0.005f;
const float kRho = 0.0004f; // Default production and interpretation SNR.
const float kPowerNormalizationFactor = 1.f / (1 << 30);
const float kMaxActiveSNR = 128.f; // 21dB
const float kMinInactiveSNR = 32.f; // 15dB
const size_t kGainUpdatePeriod = 10u;
// Returns dot product of vectors |a| and |b| with size |length|.
float DotProduct(const float* a, const float* b, size_t length) {
float ret = 0.f;
for (size_t i = 0; i < length; ++i) {
ret += a[i] * b[i];
}
return ret;
}
// Computes the power across ERB bands from the power spectral density |pow|.
// Stores it in |result|.
void MapToErbBands(const float* pow,
const std::vector<std::vector<float>>& filter_bank,
float* result) {
for (size_t i = 0; i < filter_bank.size(); ++i) {
RTC_DCHECK_GT(filter_bank[i].size(), 0);
result[i] = kPowerNormalizationFactor *
DotProduct(filter_bank[i].data(), pow, filter_bank[i].size());
}
}
} // namespace
IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,
size_t num_render_channels,
size_t num_bands,
size_t num_noise_bins)
: freqs_(RealFourier::ComplexLength(
RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),
num_noise_bins_(num_noise_bins),
chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)),
bank_size_(GetBankSize(sample_rate_hz, kErbResolution)),
sample_rate_hz_(sample_rate_hz),
num_render_channels_(num_render_channels),
clear_power_estimator_(freqs_, kDecayRate),
noise_power_estimator_(num_noise_bins, kDecayRate),
filtered_clear_pow_(bank_size_, 0.f),
filtered_noise_pow_(num_noise_bins, 0.f),
center_freqs_(bank_size_),
capture_filter_bank_(CreateErbBank(num_noise_bins)),
render_filter_bank_(CreateErbBank(freqs_)),
gains_eq_(bank_size_),
gain_applier_(freqs_, kMaxRelativeGainChange),
audio_s16_(chunk_length_),
chunks_since_voice_(kSpeechOffsetDelay),
is_speech_(false),
snr_(kMaxActiveSNR),
is_active_(false),
num_chunks_(0u),
num_active_chunks_(0u),
noise_estimation_buffer_(num_noise_bins),
noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,
std::vector<float>(num_noise_bins),
RenderQueueItemVerifier<float>(num_noise_bins)) {
RTC_DCHECK_LE(kRho, 1.f);
const size_t erb_index = static_cast<size_t>(
ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +
43.f));
start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);
size_t window_size = static_cast<size_t>(1) << RealFourier::FftOrder(freqs_);
std::vector<float> kbd_window(window_size);
WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size,
kbd_window.data());
render_mangler_.reset(new LappedTransform(
num_render_channels_, num_render_channels_, chunk_length_,
kbd_window.data(), window_size, window_size / 2, this));
const size_t initial_delay = render_mangler_->initial_delay();
for (size_t i = 0u; i < num_bands - 1; ++i) {
high_bands_buffers_.push_back(std::unique_ptr<intelligibility::DelayBuffer>(
new intelligibility::DelayBuffer(initial_delay, num_render_channels_)));
}
}
IntelligibilityEnhancer::~IntelligibilityEnhancer() {
// Don't rely on this log, since the destructor isn't called when the
// app/tab is killed.
if (num_chunks_ > 0) {
RTC_LOG(LS_INFO) << "Intelligibility Enhancer was active for "
<< 100.f * static_cast<float>(num_active_chunks_) /
num_chunks_
<< "% of the call.";
} else {
RTC_LOG(LS_INFO) << "Intelligibility Enhancer processed no chunk.";
}
}
void IntelligibilityEnhancer::SetCaptureNoiseEstimate(std::vector<float> noise,
float gain) {
RTC_DCHECK_EQ(noise.size(), num_noise_bins_);
for (auto& bin : noise) {
bin *= gain;
}
// Disregarding return value since buffer overflow is acceptable, because it
// is not critical to get each noise estimate.
if (noise_estimation_queue_.Insert(&noise)) {
};
}
void IntelligibilityEnhancer::ProcessRenderAudio(AudioBuffer* audio) {
RTC_DCHECK_EQ(num_render_channels_, audio->num_channels());
while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) {
noise_power_estimator_.Step(noise_estimation_buffer_.data());
}
float* const* low_band = audio->split_channels_f(kBand0To8kHz);
is_speech_ = IsSpeech(low_band[0]);
render_mangler_->ProcessChunk(low_band, low_band);
DelayHighBands(audio);
}
void IntelligibilityEnhancer::ProcessAudioBlock(
const std::complex<float>* const* in_block,
size_t in_channels,
size_t frames,
size_t /* out_channels */,
std::complex<float>* const* out_block) {
RTC_DCHECK_EQ(freqs_, frames);
if (is_speech_) {
clear_power_estimator_.Step(in_block[0]);
}
SnrBasedEffectActivation();
++num_chunks_;
if (is_active_) {
++num_active_chunks_;
if (num_chunks_ % kGainUpdatePeriod == 0) {
MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_,
filtered_clear_pow_.data());
MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_,
filtered_noise_pow_.data());
SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());
const float power_target =
std::accumulate(filtered_clear_pow_.data(),
filtered_clear_pow_.data() + bank_size_, 0.f);
const float power_top =
DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());
const float power_bot =
DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
if (power_target >= power_bot && power_target <= power_top) {
SolveForLambda(power_target);
UpdateErbGains();
} // Else experiencing power underflow, so do nothing.
}
}
for (size_t i = 0; i < in_channels; ++i) {
gain_applier_.Apply(in_block[i], out_block[i]);
}
}
void IntelligibilityEnhancer::SnrBasedEffectActivation() {
const float* clear_psd = clear_power_estimator_.power().data();
const float* noise_psd = noise_power_estimator_.power().data();
const float clear_power = std::accumulate(clear_psd, clear_psd + freqs_, 0.f);
const float noise_power = std::accumulate(noise_psd, noise_psd + freqs_, 0.f);
snr_ = kDecayRate * snr_ +
(1.f - kDecayRate) * clear_power /
(noise_power + std::numeric_limits<float>::epsilon());
if (is_active_) {
if (snr_ > kMaxActiveSNR) {
RTC_LOG(LS_INFO) << "Intelligibility Enhancer was deactivated at chunk "
<< num_chunks_;
is_active_ = false;
// Set the target gains to unity.
float* gains = gain_applier_.target();
for (size_t i = 0; i < freqs_; ++i) {
gains[i] = 1.f;
}
}
} else {
if (snr_ < kMinInactiveSNR) {
RTC_LOG(LS_INFO) << "Intelligibility Enhancer was activated at chunk "
<< num_chunks_;
is_active_ = true;
}
}
}
void IntelligibilityEnhancer::SolveForLambda(float power_target) {
const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values
const int kMaxIters = 100; // for these, based on experiments.
const float reciprocal_power_target =
1.f / (power_target + std::numeric_limits<float>::epsilon());
float lambda_bot = kLambdaBot;
float lambda_top = kLambdaTop;
float power_ratio = 2.f; // Ratio of achieved power to target power.
int iters = 0;
while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) {
const float lambda = (lambda_bot + lambda_top) / 2.f;
SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.data());
const float power =
DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
if (power < power_target) {
lambda_bot = lambda;
} else {
lambda_top = lambda;
}
power_ratio = std::fabs(power * reciprocal_power_target);
++iters;
}
}
void IntelligibilityEnhancer::UpdateErbGains() {
// (ERB gain) = filterbank' * (freq gain)
float* gains = gain_applier_.target();
for (size_t i = 0; i < freqs_; ++i) {
gains[i] = 0.f;
for (size_t j = 0; j < bank_size_; ++j) {
gains[i] += render_filter_bank_[j][i] * gains_eq_[j];
}
}
}
size_t IntelligibilityEnhancer::GetBankSize(int sample_rate,
size_t erb_resolution) {
float freq_limit = sample_rate / 2000.f;
size_t erb_scale = static_cast<size_t>(ceilf(
11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.f));
return erb_scale * erb_resolution;
}
std::vector<std::vector<float>> IntelligibilityEnhancer::CreateErbBank(
size_t num_freqs) {
std::vector<std::vector<float>> filter_bank(bank_size_);
size_t lf = 1, rf = 4;
for (size_t i = 0; i < bank_size_; ++i) {
float abs_temp = fabsf((i + 1.f) / static_cast<float>(kErbResolution));
center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp));
center_freqs_[i] -= 14678.49f;
}
float last_center_freq = center_freqs_[bank_size_ - 1];
for (size_t i = 0; i < bank_size_; ++i) {
center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq;
}
for (size_t i = 0; i < bank_size_; ++i) {
filter_bank[i].resize(num_freqs);
}
for (size_t i = 1; i <= bank_size_; ++i) {
size_t lll = static_cast<size_t>(
round(center_freqs_[rtc::SafeMax<size_t>(1, i - lf) - 1] * num_freqs /
(0.5f * sample_rate_hz_)));
size_t ll = static_cast<size_t>(
round(center_freqs_[rtc::SafeMax<size_t>(1, i) - 1] * num_freqs /
(0.5f * sample_rate_hz_)));
lll = rtc::SafeClamp<size_t>(lll, 1, num_freqs) - 1;
ll = rtc::SafeClamp<size_t>(ll, 1, num_freqs) - 1;
size_t rrr = static_cast<size_t>(
round(center_freqs_[rtc::SafeMin<size_t>(bank_size_, i + rf) - 1] *
num_freqs / (0.5f * sample_rate_hz_)));
size_t rr = static_cast<size_t>(
round(center_freqs_[rtc::SafeMin<size_t>(bank_size_, i + 1) - 1] *
num_freqs / (0.5f * sample_rate_hz_)));
rrr = rtc::SafeClamp<size_t>(rrr, 1, num_freqs) - 1;
rr = rtc::SafeClamp<size_t>(rr, 1, num_freqs) - 1;
float step = ll == lll ? 0.f : 1.f / (ll - lll);
float element = 0.f;
for (size_t j = lll; j <= ll; ++j) {
filter_bank[i - 1][j] = element;
element += step;
}
step = rr == rrr ? 0.f : 1.f / (rrr - rr);
element = 1.f;
for (size_t j = rr; j <= rrr; ++j) {
filter_bank[i - 1][j] = element;
element -= step;
}
for (size_t j = ll; j <= rr; ++j) {
filter_bank[i - 1][j] = 1.f;
}
}
for (size_t i = 0; i < num_freqs; ++i) {
float sum = 0.f;
for (size_t j = 0; j < bank_size_; ++j) {
sum += filter_bank[j][i];
}
for (size_t j = 0; j < bank_size_; ++j) {
filter_bank[j][i] /= sum;
}
}
return filter_bank;
}
void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
size_t start_freq,
float* sols) {
const float kMinPower = 1e-5f;
const float* pow_x0 = filtered_clear_pow_.data();
const float* pow_n0 = filtered_noise_pow_.data();
for (size_t n = 0; n < start_freq; ++n) {
sols[n] = 1.f;
}
// Analytic solution for optimal gains. See paper for derivation.
for (size_t n = start_freq; n < bank_size_; ++n) {
if (pow_x0[n] < kMinPower || pow_n0[n] < kMinPower) {
sols[n] = 1.f;
} else {
const float gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] +
lambda * pow_x0[n] * pow_n0[n] * pow_n0[n];
const float beta0 =
lambda * pow_x0[n] * (2.f - kRho) * pow_x0[n] * pow_n0[n];
const float alpha0 =
lambda * pow_x0[n] * (1.f - kRho) * pow_x0[n] * pow_x0[n];
RTC_DCHECK_LT(alpha0, 0.f);
// The quadratic equation should always have real roots, but to guard
// against numerical errors we limit it to a minimum of zero.
sols[n] = std::max(
0.f, (-beta0 - std::sqrt(std::max(
0.f, beta0 * beta0 - 4.f * alpha0 * gamma0))) /
(2.f * alpha0));
}
}
}
bool IntelligibilityEnhancer::IsSpeech(const float* audio) {
FloatToS16(audio, chunk_length_, audio_s16_.data());
vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);
if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {
chunks_since_voice_ = 0;
} else if (chunks_since_voice_ < kSpeechOffsetDelay) {
++chunks_since_voice_;
}
return chunks_since_voice_ < kSpeechOffsetDelay;
}
void IntelligibilityEnhancer::DelayHighBands(AudioBuffer* audio) {
RTC_DCHECK_EQ(audio->num_bands(), high_bands_buffers_.size() + 1);
for (size_t i = 0u; i < high_bands_buffers_.size(); ++i) {
Band band = static_cast<Band>(i + 1);
high_bands_buffers_[i]->Delay(audio->split_channels_f(band), chunk_length_);
}
}
} // namespace webrtc

View file

@ -1,137 +0,0 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
#define MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
#include <complex>
#include <memory>
#include <vector>
#include "common_audio/channel_buffer.h"
#include "common_audio/lapped_transform.h"
#include "modules/audio_processing/audio_buffer.h"
#include "modules/audio_processing/intelligibility/intelligibility_utils.h"
#include "modules/audio_processing/render_queue_item_verifier.h"
#include "modules/audio_processing/vad/voice_activity_detector.h"
#include "rtc_base/swap_queue.h"
namespace webrtc {
// Speech intelligibility enhancement module. Reads render and capture
// audio streams and modifies the render stream with a set of gains per
// frequency bin to enhance speech against the noise background.
// Details of the model and algorithm can be found in the original paper:
// http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788
class IntelligibilityEnhancer : public LappedTransform::Callback {
public:
IntelligibilityEnhancer(int sample_rate_hz,
size_t num_render_channels,
size_t num_bands,
size_t num_noise_bins);
~IntelligibilityEnhancer() override;
// Sets the capture noise magnitude spectrum estimate.
void SetCaptureNoiseEstimate(std::vector<float> noise, float gain);
// Reads chunk of speech in time domain and updates with modified signal.
void ProcessRenderAudio(AudioBuffer* audio);
bool active() const;
protected:
// All in frequency domain, receives input |in_block|, applies
// intelligibility enhancement, and writes result to |out_block|.
void ProcessAudioBlock(const std::complex<float>* const* in_block,
size_t in_channels,
size_t frames,
size_t out_channels,
std::complex<float>* const* out_block) override;
private:
FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestRenderUpdate);
FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);
FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);
FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest,
TestNoiseGainHasExpectedResult);
FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest,
TestAllBandsHaveSameDelay);
// Updates the SNR estimation and enables or disables this component using a
// hysteresis.
void SnrBasedEffectActivation();
// Bisection search for optimal |lambda|.
void SolveForLambda(float power_target);
// Transforms freq gains to ERB gains.
void UpdateErbGains();
// Returns number of ERB filters.
static size_t GetBankSize(int sample_rate, size_t erb_resolution);
// Initializes ERB filterbank.
std::vector<std::vector<float>> CreateErbBank(size_t num_freqs);
// Analytically solves quadratic for optimal gains given |lambda|.
// Negative gains are set to 0. Stores the results in |sols|.
void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);
// Returns true if the audio is speech.
bool IsSpeech(const float* audio);
// Delays the high bands to compensate for the processing delay in the low
// band.
void DelayHighBands(AudioBuffer* audio);
static const size_t kMaxNumNoiseEstimatesToBuffer = 5;
const size_t freqs_; // Num frequencies in frequency domain.
const size_t num_noise_bins_;
const size_t chunk_length_; // Chunk size in samples.
const size_t bank_size_; // Num ERB filters.
const int sample_rate_hz_;
const size_t num_render_channels_;
intelligibility::PowerEstimator<std::complex<float>> clear_power_estimator_;
intelligibility::PowerEstimator<float> noise_power_estimator_;
std::vector<float> filtered_clear_pow_;
std::vector<float> filtered_noise_pow_;
std::vector<float> center_freqs_;
std::vector<std::vector<float>> capture_filter_bank_;
std::vector<std::vector<float>> render_filter_bank_;
size_t start_freq_;
std::vector<float> gains_eq_; // Pre-filter modified gains.
intelligibility::GainApplier gain_applier_;
std::unique_ptr<LappedTransform> render_mangler_;
VoiceActivityDetector vad_;
std::vector<int16_t> audio_s16_;
size_t chunks_since_voice_;
bool is_speech_;
float snr_;
bool is_active_;
unsigned long int num_chunks_;
unsigned long int num_active_chunks_;
std::vector<float> noise_estimation_buffer_;
SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>
noise_estimation_queue_;
std::vector<std::unique_ptr<intelligibility::DelayBuffer>>
high_bands_buffers_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_

View file

@ -1,536 +0,0 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include <stdlib.h>
#include <algorithm>
#include <memory>
#include <vector>
#include "api/array_view.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "modules/audio_processing/audio_buffer.h"
#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
#include "modules/audio_processing/noise_suppression_impl.h"
#include "modules/audio_processing/test/audio_buffer_tools.h"
#include "modules/audio_processing/test/bitexactness_tools.h"
#include "rtc_base/arraysize.h"
#include "test/gtest.h"
namespace webrtc {
namespace {
// Target output for ERB create test. Generated with matlab.
const float kTestCenterFreqs[] = {
14.5213f, 29.735f, 45.6781f, 62.3884f, 79.9058f, 98.2691f, 117.521f,
137.708f, 158.879f, 181.084f, 204.378f, 228.816f, 254.459f, 281.371f,
309.618f, 339.273f, 370.411f, 403.115f, 437.469f, 473.564f, 511.497f,
551.371f, 593.293f, 637.386f, 683.77f, 732.581f, 783.96f, 838.06f,
895.046f, 955.09f, 1018.38f, 1085.13f, 1155.54f, 1229.85f, 1308.32f,
1391.22f, 1478.83f, 1571.5f, 1669.55f, 1773.37f, 1883.37f, 2000.f};
const float kTestFilterBank[][33] = {
{0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.2f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.25f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.25f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.25f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.157895f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.210526f, 0.117647f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.315789f, 0.176471f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.315789f, 0.352941f, 0.142857f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.352941f, 0.285714f,
0.157895f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f,
0.210526f, 0.111111f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.285714f, 0.315789f, 0.222222f, 0.111111f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.315789f, 0.333333f, 0.222222f, 0.111111f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f,
0.108108f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f,
0.243243f, 0.153846f, 0.0833333f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f,
0.324324f, 0.230769f, 0.166667f, 0.0909091f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.324324f, 0.307692f, 0.25f, 0.181818f, 0.0833333f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.307692f, 0.333333f,
0.363636f, 0.25f, 0.151515f, 0.0793651f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.166667f, 0.363636f, 0.333333f, 0.242424f,
0.190476f, 0.133333f, 0.0689655f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.333333f, 0.30303f, 0.253968f, 0.2f, 0.137931f,
0.0714286f, 0.f, 0.f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.30303f, 0.31746f, 0.333333f, 0.275862f, 0.214286f,
0.125f, 0.0655738f, 0.f, 0.f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.15873f, 0.333333f, 0.344828f, 0.357143f,
0.25f, 0.196721f, 0.137931f, 0.0816327f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.172414f, 0.357143f,
0.3125f, 0.245902f, 0.172414f, 0.102041f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.3125f, 0.327869f, 0.344828f, 0.204082f, 0.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.163934f, 0.344828f, 0.408163f, 0.5f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.204082f, 0.5f}};
static_assert(arraysize(kTestCenterFreqs) == arraysize(kTestFilterBank),
"Test filterbank badly initialized.");
// Target output for gain solving test. Generated with matlab.
const size_t kTestStartFreq = 12; // Lowest integral frequency for ERBs.
const float kTestZeroVar = 1.f;
const float kTestNonZeroVarLambdaTop[] = {
1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
static_assert(arraysize(kTestCenterFreqs) ==
arraysize(kTestNonZeroVarLambdaTop),
"Power test data badly initialized.");
const float kMaxTestError = 0.005f;
// Enhancer initialization parameters.
const int kSamples = 10000;
const int kSampleRate = 4000;
const int kNumChannels = 1;
const int kFragmentSize = kSampleRate / 100;
const size_t kNumNoiseBins = 129;
const size_t kNumBands = 1;
// Number of frames to process in the bitexactness tests.
const size_t kNumFramesToProcess = 1000;
int IntelligibilityEnhancerSampleRate(int sample_rate_hz) {
return (sample_rate_hz > AudioProcessing::kSampleRate16kHz
? AudioProcessing::kSampleRate16kHz
: sample_rate_hz);
}
// Process one frame of data and produce the output.
void ProcessOneFrame(int sample_rate_hz,
AudioBuffer* render_audio_buffer,
AudioBuffer* capture_audio_buffer,
NoiseSuppressionImpl* noise_suppressor,
IntelligibilityEnhancer* intelligibility_enhancer) {
if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
render_audio_buffer->SplitIntoFrequencyBands();
capture_audio_buffer->SplitIntoFrequencyBands();
}
intelligibility_enhancer->ProcessRenderAudio(render_audio_buffer);
noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer);
noise_suppressor->ProcessCaptureAudio(capture_audio_buffer);
intelligibility_enhancer->SetCaptureNoiseEstimate(
noise_suppressor->NoiseEstimate(), 0);
if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
render_audio_buffer->MergeFrequencyBands();
}
}
// Processes a specified amount of frames, verifies the results and reports
// any errors.
void RunBitexactnessTest(int sample_rate_hz,
size_t num_channels,
rtc::ArrayView<const float> output_reference) {
const StreamConfig render_config(sample_rate_hz, num_channels, false);
AudioBuffer render_buffer(
render_config.num_frames(), render_config.num_channels(),
render_config.num_frames(), render_config.num_channels(),
render_config.num_frames());
test::InputAudioFile render_file(
test::GetApmRenderTestVectorFileName(sample_rate_hz));
std::vector<float> render_input(render_buffer.num_frames() *
render_buffer.num_channels());
const StreamConfig capture_config(sample_rate_hz, num_channels, false);
AudioBuffer capture_buffer(
capture_config.num_frames(), capture_config.num_channels(),
capture_config.num_frames(), capture_config.num_channels(),
capture_config.num_frames());
test::InputAudioFile capture_file(
test::GetApmCaptureTestVectorFileName(sample_rate_hz));
std::vector<float> capture_input(render_buffer.num_frames() *
capture_buffer.num_channels());
rtc::CriticalSection crit_capture;
NoiseSuppressionImpl noise_suppressor(&crit_capture);
noise_suppressor.Initialize(capture_config.num_channels(), sample_rate_hz);
noise_suppressor.Enable(true);
IntelligibilityEnhancer intelligibility_enhancer(
IntelligibilityEnhancerSampleRate(sample_rate_hz),
render_config.num_channels(), kNumBands,
NoiseSuppressionImpl::num_noise_bins());
for (size_t frame_no = 0u; frame_no < kNumFramesToProcess; ++frame_no) {
ReadFloatSamplesFromStereoFile(render_buffer.num_frames(),
render_buffer.num_channels(), &render_file,
render_input);
ReadFloatSamplesFromStereoFile(capture_buffer.num_frames(),
capture_buffer.num_channels(), &capture_file,
capture_input);
test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer);
test::CopyVectorToAudioBuffer(capture_config, capture_input,
&capture_buffer);
ProcessOneFrame(sample_rate_hz, &render_buffer, &capture_buffer,
&noise_suppressor, &intelligibility_enhancer);
}
// Extract and verify the test results.
std::vector<float> render_output;
test::ExtractVectorFromAudioBuffer(render_config, &render_buffer,
&render_output);
const float kElementErrorBound = 1.f / static_cast<float>(1 << 15);
// Compare the output with the reference. Only the first values of the output
// from last frame processed are compared in order not having to specify all
// preceeding frames as testvectors. As the algorithm being tested has a
// memory, testing only the last frame implicitly also tests the preceeding
// frames.
EXPECT_TRUE(test::VerifyDeinterleavedArray(
render_buffer.num_frames(), render_config.num_channels(),
output_reference, render_output, kElementErrorBound));
}
float float_rand() {
return std::rand() * 2.f / RAND_MAX - 1;
}
} // namespace
class IntelligibilityEnhancerTest : public ::testing::Test {
protected:
IntelligibilityEnhancerTest()
: clear_buffer_(kFragmentSize,
kNumChannels,
kFragmentSize,
kNumChannels,
kFragmentSize),
stream_config_(kSampleRate, kNumChannels),
clear_data_(kSamples),
noise_data_(kNumNoiseBins),
orig_data_(kSamples) {
std::srand(1);
enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands,
kNumNoiseBins));
}
bool CheckUpdate() {
enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands,
kNumNoiseBins));
float* clear_cursor = clear_data_.data();
for (int i = 0; i < kSamples; i += kFragmentSize) {
enh_->SetCaptureNoiseEstimate(noise_data_, 1);
clear_buffer_.CopyFrom(&clear_cursor, stream_config_);
enh_->ProcessRenderAudio(&clear_buffer_);
clear_buffer_.CopyTo(stream_config_, &clear_cursor);
clear_cursor += kFragmentSize;
}
for (int i = initial_delay_; i < kSamples; i++) {
if (std::fabs(clear_data_[i] - orig_data_[i - initial_delay_]) >
kMaxTestError) {
return true;
}
}
return false;
}
std::unique_ptr<IntelligibilityEnhancer> enh_;
// Render clean speech buffer.
AudioBuffer clear_buffer_;
StreamConfig stream_config_;
std::vector<float> clear_data_;
std::vector<float> noise_data_;
std::vector<float> orig_data_;
size_t initial_delay_;
};
// For each class of generated data, tests that render stream is updated when
// it should be.
TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) {
initial_delay_ = enh_->render_mangler_->initial_delay();
std::fill(noise_data_.begin(), noise_data_.end(), 0.f);
std::fill(orig_data_.begin(), orig_data_.end(), 0.f);
std::fill(clear_data_.begin(), clear_data_.end(), 0.f);
EXPECT_FALSE(CheckUpdate());
std::generate(clear_data_.begin(), clear_data_.end(), float_rand);
orig_data_ = clear_data_;
EXPECT_FALSE(CheckUpdate());
std::generate(clear_data_.begin(), clear_data_.end(), float_rand);
orig_data_ = clear_data_;
std::generate(noise_data_.begin(), noise_data_.end(), float_rand);
FloatToFloatS16(noise_data_.data(), noise_data_.size(), noise_data_.data());
EXPECT_TRUE(CheckUpdate());
}
// Tests ERB bank creation, comparing against matlab output.
TEST_F(IntelligibilityEnhancerTest, TestErbCreation) {
ASSERT_EQ(arraysize(kTestCenterFreqs), enh_->bank_size_);
for (size_t i = 0; i < enh_->bank_size_; ++i) {
EXPECT_NEAR(kTestCenterFreqs[i], enh_->center_freqs_[i], kMaxTestError);
ASSERT_EQ(arraysize(kTestFilterBank[0]), enh_->freqs_);
for (size_t j = 0; j < enh_->freqs_; ++j) {
EXPECT_NEAR(kTestFilterBank[i][j], enh_->render_filter_bank_[i][j],
kMaxTestError);
}
}
}
// Tests analytic solution for optimal gains, comparing
// against matlab output.
TEST_F(IntelligibilityEnhancerTest, TestSolveForGains) {
ASSERT_EQ(kTestStartFreq, enh_->start_freq_);
std::vector<float> sols(enh_->bank_size_);
float lambda = -0.001f;
for (size_t i = 0; i < enh_->bank_size_; i++) {
enh_->filtered_clear_pow_[i] = 0.f;
enh_->filtered_noise_pow_[i] = 0.f;
}
enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data());
for (size_t i = 0; i < enh_->bank_size_; i++) {
EXPECT_NEAR(kTestZeroVar, sols[i], kMaxTestError);
}
for (size_t i = 0; i < enh_->bank_size_; i++) {
enh_->filtered_clear_pow_[i] = static_cast<float>(i + 1);
enh_->filtered_noise_pow_[i] = static_cast<float>(enh_->bank_size_ - i);
}
enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data());
for (size_t i = 0; i < enh_->bank_size_; i++) {
EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError);
}
lambda = -1.f;
enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data());
for (size_t i = 0; i < enh_->bank_size_; i++) {
EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError);
}
}
TEST_F(IntelligibilityEnhancerTest, TestNoiseGainHasExpectedResult) {
const float kGain = 2.f;
const float kTolerance = 0.007f;
std::vector<float> noise(kNumNoiseBins);
std::vector<float> noise_psd(kNumNoiseBins);
std::generate(noise.begin(), noise.end(), float_rand);
for (size_t i = 0; i < kNumNoiseBins; ++i) {
noise_psd[i] = kGain * kGain * noise[i] * noise[i];
}
float* clear_cursor = clear_data_.data();
for (size_t i = 0; i < kNumFramesToProcess; ++i) {
enh_->SetCaptureNoiseEstimate(noise, kGain);
clear_buffer_.CopyFrom(&clear_cursor, stream_config_);
enh_->ProcessRenderAudio(&clear_buffer_);
}
const std::vector<float>& estimated_psd =
enh_->noise_power_estimator_.power();
for (size_t i = 0; i < kNumNoiseBins; ++i) {
EXPECT_LT(std::abs(estimated_psd[i] - noise_psd[i]) / noise_psd[i],
kTolerance);
}
}
TEST_F(IntelligibilityEnhancerTest, TestAllBandsHaveSameDelay) {
const int kTestSampleRate = AudioProcessing::kSampleRate32kHz;
const int kTestSplitRate = AudioProcessing::kSampleRate16kHz;
const size_t kTestNumBands =
rtc::CheckedDivExact(kTestSampleRate, kTestSplitRate);
const size_t kTestFragmentSize = rtc::CheckedDivExact(kTestSampleRate, 100);
const size_t kTestSplitFragmentSize =
rtc::CheckedDivExact(kTestSplitRate, 100);
enh_.reset(new IntelligibilityEnhancer(kTestSplitRate, kNumChannels,
kTestNumBands, kNumNoiseBins));
size_t initial_delay = enh_->render_mangler_->initial_delay();
std::vector<float> rand_gen_buf(kTestFragmentSize);
AudioBuffer original_buffer(kTestFragmentSize, kNumChannels,
kTestFragmentSize, kNumChannels,
kTestFragmentSize);
AudioBuffer audio_buffer(kTestFragmentSize, kNumChannels, kTestFragmentSize,
kNumChannels, kTestFragmentSize);
for (size_t i = 0u; i < kTestNumBands; ++i) {
std::generate(rand_gen_buf.begin(), rand_gen_buf.end(), float_rand);
original_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(),
rand_gen_buf.size());
audio_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(),
rand_gen_buf.size());
}
enh_->ProcessRenderAudio(&audio_buffer);
for (size_t i = 0u; i < kTestNumBands; ++i) {
const float* original_ptr = original_buffer.split_bands_const_f(0)[i];
const float* audio_ptr = audio_buffer.split_bands_const_f(0)[i];
for (size_t j = initial_delay; j < kTestSplitFragmentSize; ++j) {
EXPECT_LT(std::fabs(original_ptr[j - initial_delay] - audio_ptr[j]),
kMaxTestError);
}
}
}
TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) {
const float kOutputReference[] = {-0.001892f, -0.003296f, -0.001953f};
RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 1, kOutputReference);
}
TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono16kHz) {
const float kOutputReference[] = {-0.000977f, -0.003296f, -0.002441f};
RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 1, kOutputReference);
}
TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono32kHz) {
const float kOutputReference[] = {0.003021f, -0.011780f, -0.008209f};
RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 1, kOutputReference);
}
TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono48kHz) {
const float kOutputReference[] = {-0.027696f, -0.026253f, -0.018001f};
RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 1, kOutputReference);
}
TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo8kHz) {
const float kOutputReference[] = {0.021454f, 0.035919f, 0.026428f,
-0.000641f, 0.000366f, 0.000641f};
RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 2, kOutputReference);
}
TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo16kHz) {
const float kOutputReference[] = {0.021362f, 0.035736f, 0.023895f,
-0.001404f, -0.001465f, 0.000549f};
RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 2, kOutputReference);
}
TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo32kHz) {
const float kOutputReference[] = {0.030641f, 0.027406f, 0.028321f,
-0.001343f, -0.004578f, 0.000977f};
RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 2, kOutputReference);
}
TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo48kHz) {
const float kOutputReference[] = {-0.009276f, -0.001601f, -0.008255f,
-0.012975f, -0.015940f, -0.017820f};
RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference);
}
} // namespace webrtc

View file

@ -1,94 +0,0 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/intelligibility/intelligibility_utils.h"
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include <limits>
#include "rtc_base/numerics/safe_minmax.h"
namespace webrtc {
namespace intelligibility {
namespace {
const float kMinFactor = 0.01f;
const float kMaxFactor = 100.f;
// Return |current| changed towards |target|, with the relative change being at
// most |limit|.
float UpdateFactor(float target, float current, float limit) {
const float gain = target / (current + std::numeric_limits<float>::epsilon());
const float clamped_gain = rtc::SafeClamp(gain, 1 - limit, 1 + limit);
return rtc::SafeClamp(current * clamped_gain, kMinFactor, kMaxFactor);
}
} // namespace
template <typename T>
PowerEstimator<T>::PowerEstimator(size_t num_freqs, float decay)
: power_(num_freqs, 0.f), decay_(decay) {}
template <typename T>
void PowerEstimator<T>::Step(const T* data) {
for (size_t i = 0; i < power_.size(); ++i) {
power_[i] = decay_ * power_[i] +
(1.f - decay_) * std::abs(data[i]) * std::abs(data[i]);
}
}
template class PowerEstimator<float>;
template class PowerEstimator<std::complex<float>>;
GainApplier::GainApplier(size_t freqs, float relative_change_limit)
: num_freqs_(freqs),
relative_change_limit_(relative_change_limit),
target_(freqs, 1.f),
current_(freqs, 1.f) {}
GainApplier::~GainApplier() {}
void GainApplier::Apply(const std::complex<float>* in_block,
std::complex<float>* out_block) {
for (size_t i = 0; i < num_freqs_; ++i) {
current_[i] = UpdateFactor(target_[i], current_[i], relative_change_limit_);
out_block[i] = sqrtf(fabsf(current_[i])) * in_block[i];
}
}
DelayBuffer::DelayBuffer(size_t delay, size_t num_channels)
: buffer_(num_channels, std::vector<float>(delay, 0.f)), read_index_(0u) {}
DelayBuffer::~DelayBuffer() {}
void DelayBuffer::Delay(float* const* data, size_t length) {
size_t sample_index = read_index_;
for (size_t i = 0u; i < buffer_.size(); ++i) {
sample_index = read_index_;
for (size_t j = 0u; j < length; ++j) {
float swap = data[i][j];
data[i][j] = buffer_[i][sample_index];
buffer_[i][sample_index] = swap;
if (++sample_index == buffer_.size()) {
sample_index = 0u;
}
}
}
read_index_ = sample_index;
}
} // namespace intelligibility
} // namespace webrtc

View file

@ -1,86 +0,0 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
#define MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
#include <complex>
#include <vector>
namespace webrtc {
namespace intelligibility {
// Internal helper for computing the power of a stream of arrays.
// The result is an array of power per position: the i-th power is the power of
// the stream of data on the i-th positions in the input arrays.
template <typename T>
class PowerEstimator {
public:
// Construct an instance for the given input array length (|freqs|), with the
// appropriate parameters. |decay| is the forgetting factor.
PowerEstimator(size_t freqs, float decay);
// Add a new data point to the series.
void Step(const T* data);
// The current power array.
const std::vector<float>& power() { return power_; };
private:
// The current power array.
std::vector<float> power_;
const float decay_;
};
// Helper class for smoothing gain changes. On each application step, the
// currently used gains are changed towards a set of settable target gains,
// constrained by a limit on the relative changes.
class GainApplier {
public:
GainApplier(size_t freqs, float relative_change_limit);
~GainApplier();
// Copy |in_block| to |out_block|, multiplied by the current set of gains,
// and step the current set of gains towards the target set.
void Apply(const std::complex<float>* in_block,
std::complex<float>* out_block);
// Return the current target gain set. Modify this array to set the targets.
float* target() { return target_.data(); }
private:
const size_t num_freqs_;
const float relative_change_limit_;
std::vector<float> target_;
std::vector<float> current_;
};
// Helper class to delay a signal by an integer number of samples.
class DelayBuffer {
public:
DelayBuffer(size_t delay, size_t num_channels);
~DelayBuffer();
void Delay(float* const* data, size_t length);
private:
std::vector<std::vector<float>> buffer_;
size_t read_index_;
};
} // namespace intelligibility
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_

View file

@ -1,79 +0,0 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <cmath>
#include <complex>
#include <vector>
#include "modules/audio_processing/intelligibility/intelligibility_utils.h"
#include "rtc_base/arraysize.h"
#include "test/gtest.h"
namespace webrtc {
namespace intelligibility {
std::vector<std::vector<std::complex<float>>> GenerateTestData(size_t freqs,
size_t samples) {
std::vector<std::vector<std::complex<float>>> data(samples);
for (size_t i = 0; i < samples; ++i) {
for (size_t j = 0; j < freqs; ++j) {
const float val = 0.99f / ((i + 1) * (j + 1));
data[i].push_back(std::complex<float>(val, val));
}
}
return data;
}
// Tests PowerEstimator, for all power step types.
TEST(IntelligibilityUtilsTest, TestPowerEstimator) {
const size_t kFreqs = 10;
const size_t kSamples = 100;
const float kDecay = 0.5f;
const std::vector<std::vector<std::complex<float>>> test_data(
GenerateTestData(kFreqs, kSamples));
PowerEstimator<std::complex<float>> power_estimator(kFreqs, kDecay);
EXPECT_EQ(0, power_estimator.power()[0]);
// Makes sure Step is doing something.
power_estimator.Step(test_data[0].data());
for (size_t i = 1; i < kSamples; ++i) {
power_estimator.Step(test_data[i].data());
for (size_t j = 0; j < kFreqs; ++j) {
EXPECT_GE(power_estimator.power()[j], 0.f);
EXPECT_LE(power_estimator.power()[j], 1.f);
}
}
}
// Tests gain applier.
TEST(IntelligibilityUtilsTest, TestGainApplier) {
const size_t kFreqs = 10;
const size_t kSamples = 100;
const float kChangeLimit = 0.1f;
GainApplier gain_applier(kFreqs, kChangeLimit);
const std::vector<std::vector<std::complex<float>>> in_data(
GenerateTestData(kFreqs, kSamples));
std::vector<std::vector<std::complex<float>>> out_data(
GenerateTestData(kFreqs, kSamples));
for (size_t i = 0; i < kSamples; ++i) {
gain_applier.Apply(in_data[i].data(), out_data[i].data());
for (size_t j = 0; j < kFreqs; ++j) {
EXPECT_GT(out_data[i][j].real(), 0.f);
EXPECT_LT(out_data[i][j].real(), 1.f);
EXPECT_GT(out_data[i][j].imag(), 0.f);
EXPECT_LT(out_data[i][j].imag(), 1.f);
}
}
}
} // namespace intelligibility
} // namespace webrtc

View file

@ -1,96 +0,0 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/channel_buffer.h"
#include "common_audio/include/audio_util.h"
#include "common_audio/wav_file.h"
#include "modules/audio_processing/audio_buffer.h"
#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
#include "modules/audio_processing/noise_suppression_impl.h"
#include "rtc_base/criticalsection.h"
#include "rtc_base/flags.h"
using std::complex;
namespace webrtc {
namespace {
DEFINE_string(clear_file, "speech.wav", "Input file with clear speech.");
DEFINE_string(noise_file, "noise.wav", "Input file with noise data.");
DEFINE_string(out_file, "proc_enhanced.wav", "Enhanced output file.");
DEFINE_bool(help, false, "Print this message.");
int int_main(int argc, char* argv[]) {
if (rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true)) {
return 1;
}
if (FLAG_help) {
rtc::FlagList::Print(nullptr, false);
return 0;
}
if (argc != 1) {
printf("\n\nInput files must be little-endian 16-bit signed raw PCM.\n");
return 0;
}
WavReader in_file(FLAG_clear_file);
WavReader noise_file(FLAG_noise_file);
WavWriter out_file(FLAG_out_file, in_file.sample_rate(),
in_file.num_channels());
rtc::CriticalSection crit;
NoiseSuppressionImpl ns(&crit);
IntelligibilityEnhancer enh(in_file.sample_rate(), in_file.num_channels(), 1u,
NoiseSuppressionImpl::num_noise_bins());
ns.Initialize(noise_file.num_channels(), noise_file.sample_rate());
ns.Enable(true);
const size_t in_samples = noise_file.sample_rate() / 100;
const size_t noise_samples = noise_file.sample_rate() / 100;
std::vector<float> in(in_samples * in_file.num_channels());
std::vector<float> noise(noise_samples * noise_file.num_channels());
ChannelBuffer<float> in_buf(in_samples, in_file.num_channels());
ChannelBuffer<float> noise_buf(noise_samples, noise_file.num_channels());
AudioBuffer capture_audio(noise_samples, noise_file.num_channels(),
noise_samples, noise_file.num_channels(),
noise_samples);
AudioBuffer render_audio(in_samples, in_file.num_channels(), in_samples,
in_file.num_channels(), in_samples);
StreamConfig noise_config(noise_file.sample_rate(),
noise_file.num_channels());
StreamConfig in_config(in_file.sample_rate(), in_file.num_channels());
while (in_file.ReadSamples(in.size(), in.data()) == in.size() &&
noise_file.ReadSamples(noise.size(), noise.data()) == noise.size()) {
FloatS16ToFloat(noise.data(), noise.size(), noise.data());
FloatS16ToFloat(in.data(), in.size(), in.data());
Deinterleave(in.data(), in_buf.num_frames(), in_buf.num_channels(),
in_buf.channels());
Deinterleave(noise.data(), noise_buf.num_frames(), noise_buf.num_channels(),
noise_buf.channels());
capture_audio.CopyFrom(noise_buf.channels(), noise_config);
render_audio.CopyFrom(in_buf.channels(), in_config);
ns.AnalyzeCaptureAudio(&capture_audio);
ns.ProcessCaptureAudio(&capture_audio);
enh.SetCaptureNoiseEstimate(ns.NoiseEstimate(), 1);
enh.ProcessRenderAudio(&render_audio);
render_audio.CopyTo(in_config, in_buf.channels());
Interleave(in_buf.channels(), in_buf.num_frames(), in_buf.num_channels(),
in.data());
FloatToFloatS16(in.data(), in.size(), in.data());
out_file.WriteSamples(in.data(), in.size());
}
return 0;
}
} // namespace
} // namespace webrtc
int main(int argc, char* argv[]) {
return webrtc::int_main(argc, argv);
}

View file

@ -427,16 +427,6 @@ void AecDumpBasedSimulator::HandleMessage(
}
}
if (msg.has_intelligibility_enhancer_enabled() || settings_.use_ie) {
bool enable = settings_.use_ie ? *settings_.use_ie
: msg.intelligibility_enhancer_enabled();
config.Set<Intelligibility>(new Intelligibility(enable));
if (settings_.use_verbose_logging) {
std::cout << " intelligibility_enhancer_enabled: "
<< (enable ? "true" : "false") << std::endl;
}
}
if (msg.has_hpf_enabled() || settings_.use_hpf) {
bool enable = settings_.use_hpf ? *settings_.use_hpf : msg.hpf_enabled();
apm_config.high_pass_filter.enabled = enable;

View file

@ -678,9 +678,6 @@ void AudioProcessingSimulator::CreateAudioProcessor() {
if (settings_.use_ts) {
config.Set<ExperimentalNs>(new ExperimentalNs(*settings_.use_ts));
}
if (settings_.use_ie) {
config.Set<Intelligibility>(new Intelligibility(*settings_.use_ie));
}
if (settings_.use_agc2) {
apm_config.gain_controller2.enabled = *settings_.use_agc2;
apm_config.gain_controller2.fixed_gain_db = settings_.agc2_fixed_gain_db;

View file

@ -87,9 +87,6 @@ DEFINE_int(ns,
DEFINE_int(ts,
kParameterNotSpecifiedValue,
"Activate (1) or deactivate(0) the transient suppressor");
DEFINE_int(ie,
kParameterNotSpecifiedValue,
"Activate (1) or deactivate(0) the intelligibility enhancer");
DEFINE_int(vad,
kParameterNotSpecifiedValue,
"Activate (1) or deactivate(0) the voice activity detector");
@ -247,7 +244,6 @@ SimulationSettings CreateSettings() {
SetSettingIfFlagSet(FLAG_hpf, &settings.use_hpf);
SetSettingIfFlagSet(FLAG_ns, &settings.use_ns);
SetSettingIfFlagSet(FLAG_ts, &settings.use_ts);
SetSettingIfFlagSet(FLAG_ie, &settings.use_ie);
SetSettingIfFlagSet(FLAG_vad, &settings.use_vad);
SetSettingIfFlagSet(FLAG_le, &settings.use_le);
SetSettingIfSpecified(FLAG_aec_suppression_level,

View file

@ -186,10 +186,6 @@ void DebugDumpReplayer::MaybeRecreateApm(const audioproc::Config& msg) {
config.Set<ExtendedFilter>(
new ExtendedFilter(msg.aec_extended_filter_enabled()));
RTC_CHECK(msg.has_intelligibility_enhancer_enabled());
config.Set<Intelligibility>(
new Intelligibility(msg.intelligibility_enhancer_enabled()));
// We only create APM once, since changes on these fields should not
// happen in current implementation.
if (!apm_.get()) {

View file

@ -95,7 +95,7 @@ helps with that, producing plots similar to [this
one](https://matplotlib.org/mpl_examples/pylab_examples/boxplot_demo_06.png).
Suppose some scores come from running the APM simulator `audioproc_f` with
or without the intelligibility enhancer: `--ie=1` or `--ie=0`. Then two boxplots
or without the level controller: `--lc=1` or `--lc=0`. Then two boxplots
side by side can be generated with
```
@ -103,7 +103,7 @@ $ ./apm_quality_assessment_boxplot.py \
-o /path/to/output
-v <score_name>
-n /path/to/dir/with/apm_configs
-z ie
-z lc
```
## Troubleshooting

View file

@ -36,7 +36,6 @@ def _GenerateDefaultOverridden(config_override):
settings.use_bf = rtc::Optional<bool>(false);
settings.use_ed = rtc::Optional<bool>(false);
settings.use_hpf = rtc::Optional<bool>(true);
settings.use_ie = rtc::Optional<bool>(false);
settings.use_le = rtc::Optional<bool>(true);
settings.use_ns = rtc::Optional<bool>(true);
settings.use_ts = rtc::Optional<bool>(true);
@ -83,7 +82,6 @@ def _GenerateAllDefaultPlusOne():
'with_drift_compensation': {'-drift_compensation': 1,},
'with_residual_echo_detector': {'-ed': 1,},
'with_AEC_extended_filter': {'-extended_filter': 1,},
'with_intelligibility_enhancer': {'-ie': 1,},
'with_LC': {'-lc': 1,},
'with_refined_adaptive_filter': {'-refined_adaptive_filter': 1,},
}

View file

@ -289,7 +289,6 @@ int do_main(int argc, char* argv[]) {
PRINT_CONFIG(ns_enabled);
PRINT_CONFIG(ns_level);
PRINT_CONFIG(transient_suppression_enabled);
PRINT_CONFIG(intelligibility_enhancer_enabled);
PRINT_CONFIG(pre_amplifier_enabled);
PRINT_CONFIG_FLOAT(pre_amplifier_fixed_gain_factor);

View file

@ -63,7 +63,7 @@ std::unique_ptr<AudioProcessing> CreateApm(test::FuzzDataHelper* fuzz_data,
bool ef = fuzz_data->ReadOrDefaultValue(true);
bool raf = fuzz_data->ReadOrDefaultValue(true);
static_cast<void>(fuzz_data->ReadOrDefaultValue(true));
bool ie = fuzz_data->ReadOrDefaultValue(true);
static_cast<void>(fuzz_data->ReadOrDefaultValue(true));
bool red = fuzz_data->ReadOrDefaultValue(true);
bool hpf = fuzz_data->ReadOrDefaultValue(true);
bool aec3 = fuzz_data->ReadOrDefaultValue(true);
@ -123,7 +123,6 @@ std::unique_ptr<AudioProcessing> CreateApm(test::FuzzDataHelper* fuzz_data,
config.Set<ExtendedFilter>(new ExtendedFilter(ef));
config.Set<RefinedAdaptiveFilter>(new RefinedAdaptiveFilter(raf));
config.Set<DelayAgnostic>(new DelayAgnostic(true));
config.Set<Intelligibility>(new Intelligibility(ie));
std::unique_ptr<AudioProcessing> apm(
AudioProcessingBuilder()

View file

@ -53,8 +53,6 @@
# "More configs" bots will build all the following configs in sequence.
# This is using MB's "phases" feature.
'Linux (more configs)': {
'intelligibility_enhancer_no_include_tests':
'intelligibility_enhancer_no_include_tests_x64',
'bwe_test_logging':
'bwe_test_logging_x64',
'dummy_audio_file_devices_no_protobuf':
@ -74,8 +72,6 @@
'Android32 Builder x86 (dbg)': 'android_debug_static_bot_x86',
'Android64 Builder x64 (dbg)': 'android_debug_static_bot_x64',
'Android32 (more configs)': {
'intelligibility_enhancer_no_include_tests':
'intelligibility_enhancer_no_include_tests_android_arm',
'bwe_test_logging':
'bwe_test_logging_android_arm',
'dummy_audio_file_devices_no_protobuf':
@ -95,8 +91,6 @@
'Win64 Release (Clang)': 'win_clang_release_bot_x64',
'Win32 ASan': 'win_asan_clang_release_bot_x86',
'Win (more configs)': {
'intelligibility_enhancer_no_include_tests':
'intelligibility_enhancer_no_include_tests_x86',
'bwe_test_logging':
'bwe_test_logging_x86',
'dummy_audio_file_devices_no_protobuf':
@ -186,8 +180,6 @@
'linux_experimental': 'release_bot_x64',
'linux_libfuzzer_rel': 'libfuzzer_asan_release_bot_x64',
'linux_more_configs': {
'intelligibility_enhancer_no_include_tests':
'intelligibility_enhancer_no_include_tests_x64',
'bwe_test_logging':
'bwe_test_logging_x64',
'dummy_audio_file_devices_no_protobuf':
@ -209,8 +201,6 @@
'android_arm64_rel': 'android_release_bot_arm64',
'android_experimental': 'android_release_bot_arm',
'android_more_configs': {
'intelligibility_enhancer_no_include_tests':
'intelligibility_enhancer_no_include_tests_android_arm',
'bwe_test_logging':
'bwe_test_logging_android_arm',
'dummy_audio_file_devices_no_protobuf':
@ -237,8 +227,6 @@
'win_x64_win8': 'debug_bot_x64',
'win_x64_win10': 'debug_bot_x64',
'win_more_configs': {
'intelligibility_enhancer_no_include_tests':
'intelligibility_enhancer_no_include_tests_x86',
'bwe_test_logging':
'bwe_test_logging_x86',
'dummy_audio_file_devices_no_protobuf':
@ -402,9 +390,6 @@
],
# More configs
'intelligibility_enhancer_no_include_tests_x64': [
'debug_bot', 'x64', 'intelligibility_enhancer', 'no_include_tests'
],
'bwe_test_logging_x64': [
'debug_bot', 'x64', 'bwe_test_logging'
],
@ -415,9 +400,6 @@
'debug_bot', 'x64', 'rtti', 'no_sctp'
],
'intelligibility_enhancer_no_include_tests_x86': [
'debug_bot', 'x86', 'intelligibility_enhancer', 'no_include_tests'
],
'bwe_test_logging_x86': [
'debug_bot', 'x86', 'bwe_test_logging'
],
@ -428,10 +410,6 @@
'debug_bot', 'x86', 'rtti', 'no_sctp'
],
'intelligibility_enhancer_no_include_tests_android_arm': [
'android', 'debug_static_bot', 'arm',
'intelligibility_enhancer', 'no_include_tests'
],
'bwe_test_logging_android_arm': [
'android', 'debug_static_bot', 'arm', 'bwe_test_logging'
],
@ -586,14 +564,6 @@
'gn_args': 'target_cpu="x86"',
},
'intelligibility_enhancer': {
'gn_args': 'rtc_enable_intelligibility_enhancer=true',
},
'no_include_tests': {
'gn_args': 'rtc_include_tests=false',
},
'bwe_test_logging': {
'gn_args': 'rtc_enable_bwe_test_logging=true',
},

View file

@ -75,9 +75,6 @@ declare_args() {
# Selects fixed-point code where possible.
rtc_prefer_fixed_point = false
# Disable the code for the intelligibility enhancer by default.
rtc_enable_intelligibility_enhancer = false
# Enable when an external authentication mechanism is used for performing
# packet authentication for RTP packets instead of libsrtp.
rtc_enable_external_auth = build_with_chromium