Dont always downsample to 16kHz in the reverse stream in APM

TBR=tina.legrand@webrtc.org

Review URL: https://codereview.webrtc.org/1773173002

Cr-Commit-Position: refs/heads/master@{#12024}
This commit is contained in:
aluebs 2016-03-16 18:26:35 -07:00 committed by Commit bot
parent 2bb3afa054
commit df6416aa50
5 changed files with 71 additions and 68 deletions

View file

@ -58,6 +58,21 @@
} while (0) } while (0)
namespace webrtc { namespace webrtc {
const int AudioProcessing::kNativeSampleRatesHz[] = {
AudioProcessing::kSampleRate8kHz,
AudioProcessing::kSampleRate16kHz,
#ifdef WEBRTC_ARCH_ARM_FAMILY
AudioProcessing::kSampleRate32kHz};
#else
AudioProcessing::kSampleRate32kHz,
AudioProcessing::kSampleRate48kHz};
#endif // WEBRTC_ARCH_ARM_FAMILY
const size_t AudioProcessing::kNumNativeSampleRates =
arraysize(AudioProcessing::kNativeSampleRatesHz);
const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing::
kNativeSampleRatesHz[AudioProcessing::kNumNativeSampleRates - 1];
namespace { namespace {
static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) { static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) {
@ -73,6 +88,21 @@ static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) {
assert(false); assert(false);
return false; return false;
} }
bool is_multi_band(int sample_rate_hz) {
return sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
sample_rate_hz == AudioProcessing::kSampleRate48kHz;
}
int ClosestNativeRate(int min_proc_rate) {
for (int rate : AudioProcessing::kNativeSampleRatesHz) {
if (rate >= min_proc_rate) {
return rate;
}
}
return AudioProcessing::kMaxNativeSampleRateHz;
}
} // namespace } // namespace
// Throughout webrtc, it's assumed that success is represented by zero. // Throughout webrtc, it's assumed that success is represented by zero.
@ -104,20 +134,6 @@ struct AudioProcessingImpl::ApmPrivateSubmodules {
std::unique_ptr<AgcManagerDirect> agc_manager; std::unique_ptr<AgcManagerDirect> agc_manager;
}; };
const int AudioProcessing::kNativeSampleRatesHz[] = {
AudioProcessing::kSampleRate8kHz,
AudioProcessing::kSampleRate16kHz,
#ifdef WEBRTC_ARCH_ARM_FAMILY
AudioProcessing::kSampleRate32kHz};
#else
AudioProcessing::kSampleRate32kHz,
AudioProcessing::kSampleRate48kHz};
#endif // WEBRTC_ARCH_ARM_FAMILY
const size_t AudioProcessing::kNumNativeSampleRates =
arraysize(AudioProcessing::kNativeSampleRatesHz);
const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing::
kNativeSampleRatesHz[AudioProcessing::kNumNativeSampleRates - 1];
AudioProcessing* AudioProcessing::Create() { AudioProcessing* AudioProcessing::Create() {
Config config; Config config;
return Create(config, nullptr); return Create(config, nullptr);
@ -346,32 +362,19 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
formats_.api_format = config; formats_.api_format = config;
// We process at the closest native rate >= min(input rate, output rate). capture_nonlocked_.fwd_proc_format = StreamConfig(ClosestNativeRate(std::min(
const int min_proc_rate = formats_.api_format.input_stream().sample_rate_hz(),
std::min(formats_.api_format.input_stream().sample_rate_hz(), formats_.api_format.output_stream().sample_rate_hz())));
formats_.api_format.output_stream().sample_rate_hz());
int fwd_proc_rate;
for (size_t i = 0; i < kNumNativeSampleRates; ++i) {
fwd_proc_rate = kNativeSampleRatesHz[i];
if (fwd_proc_rate >= min_proc_rate) {
break;
}
}
capture_nonlocked_.fwd_proc_format = StreamConfig(fwd_proc_rate); int rev_proc_rate = ClosestNativeRate(std::min(
formats_.api_format.reverse_input_stream().sample_rate_hz(),
// We normally process the reverse stream at 16 kHz. Unless... formats_.api_format.reverse_output_stream().sample_rate_hz()));
int rev_proc_rate = kSampleRate16kHz; // If the forward sample rate is 8 kHz, the reverse stream is also processed
// at this rate.
if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == kSampleRate8kHz) { if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == kSampleRate8kHz) {
// ...the forward stream is at 8 kHz.
rev_proc_rate = kSampleRate8kHz; rev_proc_rate = kSampleRate8kHz;
} else { } else {
if (formats_.api_format.reverse_input_stream().sample_rate_hz() == rev_proc_rate = std::max(rev_proc_rate, static_cast<int>(kSampleRate16kHz));
kSampleRate32kHz) {
// ...or the input is at 32 kHz, in which case we use the splitting
// filter rather than the resampler.
rev_proc_rate = kSampleRate32kHz;
}
} }
// Always downmix the reverse stream to mono for analysis. This has been // Always downmix the reverse stream to mono for analysis. This has been
@ -627,8 +630,7 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
capture_.capture_audio->DeinterleaveFrom(frame); capture_.capture_audio->DeinterleaveFrom(frame);
RETURN_ON_ERR(ProcessStreamLocked()); RETURN_ON_ERR(ProcessStreamLocked());
capture_.capture_audio->InterleaveTo(frame, capture_.capture_audio->InterleaveTo(frame, output_copy_needed());
output_copy_needed(is_data_processed()));
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_dump_.debug_file->Open()) { if (debug_dump_.debug_file->Open()) {
@ -674,8 +676,7 @@ int AudioProcessingImpl::ProcessStreamLocked() {
capture_nonlocked_.fwd_proc_format.num_frames()); capture_nonlocked_.fwd_proc_format.num_frames());
} }
bool data_processed = is_data_processed(); if (fwd_analysis_needed()) {
if (analysis_needed(data_processed)) {
ca->SplitIntoFrequencyBands(); ca->SplitIntoFrequencyBands();
} }
@ -733,7 +734,7 @@ int AudioProcessingImpl::ProcessStreamLocked() {
RETURN_ON_ERR(public_submodules_->gain_control->ProcessCaptureAudio( RETURN_ON_ERR(public_submodules_->gain_control->ProcessCaptureAudio(
ca, echo_cancellation()->stream_has_echo())); ca, echo_cancellation()->stream_has_echo()));
if (synthesis_needed(data_processed)) { if (fwd_synthesis_needed()) {
ca->MergeFrequencyBands(); ca->MergeFrequencyBands();
} }
@ -903,7 +904,7 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
int AudioProcessingImpl::ProcessReverseStreamLocked() { int AudioProcessingImpl::ProcessReverseStreamLocked() {
AudioBuffer* ra = render_.render_audio.get(); // For brevity. AudioBuffer* ra = render_.render_audio.get(); // For brevity.
if (formats_.rev_proc_format.sample_rate_hz() == kSampleRate32kHz) { if (rev_analysis_needed()) {
ra->SplitIntoFrequencyBands(); ra->SplitIntoFrequencyBands();
} }
@ -920,8 +921,7 @@ int AudioProcessingImpl::ProcessReverseStreamLocked() {
RETURN_ON_ERR(public_submodules_->gain_control->ProcessRenderAudio(ra)); RETURN_ON_ERR(public_submodules_->gain_control->ProcessRenderAudio(ra));
} }
if (formats_.rev_proc_format.sample_rate_hz() == kSampleRate32kHz && if (rev_synthesis_needed()) {
is_rev_processed()) {
ra->MergeFrequencyBands(); ra->MergeFrequencyBands();
} }
@ -1128,31 +1128,26 @@ bool AudioProcessingImpl::is_data_processed() const {
return false; return false;
} }
bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const { bool AudioProcessingImpl::output_copy_needed() const {
// Check if we've upmixed or downmixed the audio. // Check if we've upmixed or downmixed the audio.
return ((formats_.api_format.output_stream().num_channels() != return ((formats_.api_format.output_stream().num_channels() !=
formats_.api_format.input_stream().num_channels()) || formats_.api_format.input_stream().num_channels()) ||
is_data_processed || capture_.transient_suppressor_enabled); is_data_processed() || capture_.transient_suppressor_enabled);
} }
bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const { bool AudioProcessingImpl::fwd_synthesis_needed() const {
return (is_data_processed && return (is_data_processed() &&
(capture_nonlocked_.fwd_proc_format.sample_rate_hz() == is_multi_band(capture_nonlocked_.fwd_proc_format.sample_rate_hz()));
kSampleRate32kHz ||
capture_nonlocked_.fwd_proc_format.sample_rate_hz() ==
kSampleRate48kHz));
} }
bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const { bool AudioProcessingImpl::fwd_analysis_needed() const {
if (!is_data_processed && if (!is_data_processed() &&
!public_submodules_->voice_detection->is_enabled() && !public_submodules_->voice_detection->is_enabled() &&
!capture_.transient_suppressor_enabled) { !capture_.transient_suppressor_enabled) {
// Only public_submodules_->level_estimator is enabled. // Only public_submodules_->level_estimator is enabled.
return false; return false;
} else if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == } else if (is_multi_band(
kSampleRate32kHz || capture_nonlocked_.fwd_proc_format.sample_rate_hz())) {
capture_nonlocked_.fwd_proc_format.sample_rate_hz() ==
kSampleRate48kHz) {
// Something besides public_submodules_->level_estimator is enabled, and we // Something besides public_submodules_->level_estimator is enabled, and we
// have super-wb. // have super-wb.
return true; return true;
@ -1164,6 +1159,15 @@ bool AudioProcessingImpl::is_rev_processed() const {
return constants_.intelligibility_enabled; return constants_.intelligibility_enabled;
} }
bool AudioProcessingImpl::rev_synthesis_needed() const {
return (is_rev_processed() &&
is_multi_band(formats_.rev_proc_format.sample_rate_hz()));
}
bool AudioProcessingImpl::rev_analysis_needed() const {
return is_multi_band(formats_.rev_proc_format.sample_rate_hz());
}
bool AudioProcessingImpl::render_check_rev_conversion_needed() const { bool AudioProcessingImpl::render_check_rev_conversion_needed() const {
return rev_conversion_needed(); return rev_conversion_needed();
} }

View file

@ -208,13 +208,10 @@ class AudioProcessingImpl : public AudioProcessing {
// Capture-side exclusive methods possibly running APM in a multi-threaded // Capture-side exclusive methods possibly running APM in a multi-threaded
// manner that are called with the render lock already acquired. // manner that are called with the render lock already acquired.
int ProcessStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); int ProcessStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
bool output_copy_needed(bool is_data_processed) const bool output_copy_needed() const EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
bool is_data_processed() const EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); bool is_data_processed() const EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
bool synthesis_needed(bool is_data_processed) const bool fwd_synthesis_needed() const EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); bool fwd_analysis_needed() const EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
bool analysis_needed(bool is_data_processed) const
EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
void MaybeUpdateHistograms() EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); void MaybeUpdateHistograms() EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
// Render-side exclusive methods possibly running APM in a multi-threaded // Render-side exclusive methods possibly running APM in a multi-threaded
@ -225,6 +222,8 @@ class AudioProcessingImpl : public AudioProcessing {
const StreamConfig& output_config) const StreamConfig& output_config)
EXCLUSIVE_LOCKS_REQUIRED(crit_render_); EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
bool is_rev_processed() const EXCLUSIVE_LOCKS_REQUIRED(crit_render_); bool is_rev_processed() const EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
bool rev_synthesis_needed() const EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
bool rev_analysis_needed() const EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
int ProcessReverseStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_render_); int ProcessReverseStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
// Debug dump methods that are internal and called without locks. // Debug dump methods that are internal and called without locks.

View file

@ -2649,8 +2649,8 @@ INSTANTIATE_TEST_CASE_P(
CommonFormats, CommonFormats,
AudioProcessingTest, AudioProcessingTest,
testing::Values(std::tr1::make_tuple(48000, 48000, 48000, 48000, 0, 0), testing::Values(std::tr1::make_tuple(48000, 48000, 48000, 48000, 0, 0),
std::tr1::make_tuple(48000, 48000, 32000, 48000, 40, 30), std::tr1::make_tuple(48000, 48000, 32000, 48000, 35, 30),
std::tr1::make_tuple(48000, 48000, 16000, 48000, 40, 20), std::tr1::make_tuple(48000, 48000, 16000, 48000, 35, 20),
std::tr1::make_tuple(48000, 44100, 48000, 44100, 20, 20), std::tr1::make_tuple(48000, 44100, 48000, 44100, 20, 20),
std::tr1::make_tuple(48000, 44100, 32000, 44100, 20, 15), std::tr1::make_tuple(48000, 44100, 32000, 44100, 20, 15),
std::tr1::make_tuple(48000, 44100, 16000, 44100, 20, 15), std::tr1::make_tuple(48000, 44100, 16000, 44100, 20, 15),
@ -2697,7 +2697,7 @@ INSTANTIATE_TEST_CASE_P(
std::tr1::make_tuple(16000, 32000, 32000, 32000, 25, 0), std::tr1::make_tuple(16000, 32000, 32000, 32000, 25, 0),
std::tr1::make_tuple(16000, 32000, 16000, 32000, 25, 20), std::tr1::make_tuple(16000, 32000, 16000, 32000, 25, 20),
std::tr1::make_tuple(16000, 16000, 48000, 16000, 40, 20), std::tr1::make_tuple(16000, 16000, 48000, 16000, 40, 20),
std::tr1::make_tuple(16000, 16000, 32000, 16000, 50, 20), std::tr1::make_tuple(16000, 16000, 32000, 16000, 40, 20),
std::tr1::make_tuple(16000, 16000, 16000, 16000, 0, 0))); std::tr1::make_tuple(16000, 16000, 16000, 16000, 0, 0)));
#elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) #elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
@ -2753,7 +2753,7 @@ INSTANTIATE_TEST_CASE_P(
std::tr1::make_tuple(16000, 32000, 32000, 32000, 25, 0), std::tr1::make_tuple(16000, 32000, 32000, 32000, 25, 0),
std::tr1::make_tuple(16000, 32000, 16000, 32000, 25, 20), std::tr1::make_tuple(16000, 32000, 16000, 32000, 25, 20),
std::tr1::make_tuple(16000, 16000, 48000, 16000, 35, 20), std::tr1::make_tuple(16000, 16000, 48000, 16000, 35, 20),
std::tr1::make_tuple(16000, 16000, 32000, 16000, 40, 20), std::tr1::make_tuple(16000, 16000, 32000, 16000, 35, 20),
std::tr1::make_tuple(16000, 16000, 16000, 16000, 0, 0))); std::tr1::make_tuple(16000, 16000, 16000, 16000, 0, 0)));
#endif #endif