mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-13 05:40:42 +01:00
Dont always downsample to 16kHz in the reverse stream in APM
TBR=tina.legrand@webrtc.org Review URL: https://codereview.webrtc.org/1773173002 Cr-Commit-Position: refs/heads/master@{#12024}
This commit is contained in:
parent
2bb3afa054
commit
df6416aa50
5 changed files with 71 additions and 68 deletions
Binary file not shown.
Binary file not shown.
|
@ -58,6 +58,21 @@
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
namespace webrtc {
|
namespace webrtc {
|
||||||
|
|
||||||
|
const int AudioProcessing::kNativeSampleRatesHz[] = {
|
||||||
|
AudioProcessing::kSampleRate8kHz,
|
||||||
|
AudioProcessing::kSampleRate16kHz,
|
||||||
|
#ifdef WEBRTC_ARCH_ARM_FAMILY
|
||||||
|
AudioProcessing::kSampleRate32kHz};
|
||||||
|
#else
|
||||||
|
AudioProcessing::kSampleRate32kHz,
|
||||||
|
AudioProcessing::kSampleRate48kHz};
|
||||||
|
#endif // WEBRTC_ARCH_ARM_FAMILY
|
||||||
|
const size_t AudioProcessing::kNumNativeSampleRates =
|
||||||
|
arraysize(AudioProcessing::kNativeSampleRatesHz);
|
||||||
|
const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing::
|
||||||
|
kNativeSampleRatesHz[AudioProcessing::kNumNativeSampleRates - 1];
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) {
|
static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) {
|
||||||
|
@ -73,6 +88,21 @@ static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) {
|
||||||
assert(false);
|
assert(false);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool is_multi_band(int sample_rate_hz) {
|
||||||
|
return sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
|
||||||
|
sample_rate_hz == AudioProcessing::kSampleRate48kHz;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ClosestNativeRate(int min_proc_rate) {
|
||||||
|
for (int rate : AudioProcessing::kNativeSampleRatesHz) {
|
||||||
|
if (rate >= min_proc_rate) {
|
||||||
|
return rate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return AudioProcessing::kMaxNativeSampleRateHz;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
// Throughout webrtc, it's assumed that success is represented by zero.
|
// Throughout webrtc, it's assumed that success is represented by zero.
|
||||||
|
@ -104,20 +134,6 @@ struct AudioProcessingImpl::ApmPrivateSubmodules {
|
||||||
std::unique_ptr<AgcManagerDirect> agc_manager;
|
std::unique_ptr<AgcManagerDirect> agc_manager;
|
||||||
};
|
};
|
||||||
|
|
||||||
const int AudioProcessing::kNativeSampleRatesHz[] = {
|
|
||||||
AudioProcessing::kSampleRate8kHz,
|
|
||||||
AudioProcessing::kSampleRate16kHz,
|
|
||||||
#ifdef WEBRTC_ARCH_ARM_FAMILY
|
|
||||||
AudioProcessing::kSampleRate32kHz};
|
|
||||||
#else
|
|
||||||
AudioProcessing::kSampleRate32kHz,
|
|
||||||
AudioProcessing::kSampleRate48kHz};
|
|
||||||
#endif // WEBRTC_ARCH_ARM_FAMILY
|
|
||||||
const size_t AudioProcessing::kNumNativeSampleRates =
|
|
||||||
arraysize(AudioProcessing::kNativeSampleRatesHz);
|
|
||||||
const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing::
|
|
||||||
kNativeSampleRatesHz[AudioProcessing::kNumNativeSampleRates - 1];
|
|
||||||
|
|
||||||
AudioProcessing* AudioProcessing::Create() {
|
AudioProcessing* AudioProcessing::Create() {
|
||||||
Config config;
|
Config config;
|
||||||
return Create(config, nullptr);
|
return Create(config, nullptr);
|
||||||
|
@ -346,32 +362,19 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
|
||||||
|
|
||||||
formats_.api_format = config;
|
formats_.api_format = config;
|
||||||
|
|
||||||
// We process at the closest native rate >= min(input rate, output rate).
|
capture_nonlocked_.fwd_proc_format = StreamConfig(ClosestNativeRate(std::min(
|
||||||
const int min_proc_rate =
|
formats_.api_format.input_stream().sample_rate_hz(),
|
||||||
std::min(formats_.api_format.input_stream().sample_rate_hz(),
|
formats_.api_format.output_stream().sample_rate_hz())));
|
||||||
formats_.api_format.output_stream().sample_rate_hz());
|
|
||||||
int fwd_proc_rate;
|
|
||||||
for (size_t i = 0; i < kNumNativeSampleRates; ++i) {
|
|
||||||
fwd_proc_rate = kNativeSampleRatesHz[i];
|
|
||||||
if (fwd_proc_rate >= min_proc_rate) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
capture_nonlocked_.fwd_proc_format = StreamConfig(fwd_proc_rate);
|
int rev_proc_rate = ClosestNativeRate(std::min(
|
||||||
|
formats_.api_format.reverse_input_stream().sample_rate_hz(),
|
||||||
// We normally process the reverse stream at 16 kHz. Unless...
|
formats_.api_format.reverse_output_stream().sample_rate_hz()));
|
||||||
int rev_proc_rate = kSampleRate16kHz;
|
// If the forward sample rate is 8 kHz, the reverse stream is also processed
|
||||||
|
// at this rate.
|
||||||
if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == kSampleRate8kHz) {
|
if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == kSampleRate8kHz) {
|
||||||
// ...the forward stream is at 8 kHz.
|
|
||||||
rev_proc_rate = kSampleRate8kHz;
|
rev_proc_rate = kSampleRate8kHz;
|
||||||
} else {
|
} else {
|
||||||
if (formats_.api_format.reverse_input_stream().sample_rate_hz() ==
|
rev_proc_rate = std::max(rev_proc_rate, static_cast<int>(kSampleRate16kHz));
|
||||||
kSampleRate32kHz) {
|
|
||||||
// ...or the input is at 32 kHz, in which case we use the splitting
|
|
||||||
// filter rather than the resampler.
|
|
||||||
rev_proc_rate = kSampleRate32kHz;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Always downmix the reverse stream to mono for analysis. This has been
|
// Always downmix the reverse stream to mono for analysis. This has been
|
||||||
|
@ -627,8 +630,7 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
|
||||||
|
|
||||||
capture_.capture_audio->DeinterleaveFrom(frame);
|
capture_.capture_audio->DeinterleaveFrom(frame);
|
||||||
RETURN_ON_ERR(ProcessStreamLocked());
|
RETURN_ON_ERR(ProcessStreamLocked());
|
||||||
capture_.capture_audio->InterleaveTo(frame,
|
capture_.capture_audio->InterleaveTo(frame, output_copy_needed());
|
||||||
output_copy_needed(is_data_processed()));
|
|
||||||
|
|
||||||
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||||
if (debug_dump_.debug_file->Open()) {
|
if (debug_dump_.debug_file->Open()) {
|
||||||
|
@ -674,8 +676,7 @@ int AudioProcessingImpl::ProcessStreamLocked() {
|
||||||
capture_nonlocked_.fwd_proc_format.num_frames());
|
capture_nonlocked_.fwd_proc_format.num_frames());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool data_processed = is_data_processed();
|
if (fwd_analysis_needed()) {
|
||||||
if (analysis_needed(data_processed)) {
|
|
||||||
ca->SplitIntoFrequencyBands();
|
ca->SplitIntoFrequencyBands();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -733,7 +734,7 @@ int AudioProcessingImpl::ProcessStreamLocked() {
|
||||||
RETURN_ON_ERR(public_submodules_->gain_control->ProcessCaptureAudio(
|
RETURN_ON_ERR(public_submodules_->gain_control->ProcessCaptureAudio(
|
||||||
ca, echo_cancellation()->stream_has_echo()));
|
ca, echo_cancellation()->stream_has_echo()));
|
||||||
|
|
||||||
if (synthesis_needed(data_processed)) {
|
if (fwd_synthesis_needed()) {
|
||||||
ca->MergeFrequencyBands();
|
ca->MergeFrequencyBands();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -903,7 +904,7 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
|
||||||
|
|
||||||
int AudioProcessingImpl::ProcessReverseStreamLocked() {
|
int AudioProcessingImpl::ProcessReverseStreamLocked() {
|
||||||
AudioBuffer* ra = render_.render_audio.get(); // For brevity.
|
AudioBuffer* ra = render_.render_audio.get(); // For brevity.
|
||||||
if (formats_.rev_proc_format.sample_rate_hz() == kSampleRate32kHz) {
|
if (rev_analysis_needed()) {
|
||||||
ra->SplitIntoFrequencyBands();
|
ra->SplitIntoFrequencyBands();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -920,8 +921,7 @@ int AudioProcessingImpl::ProcessReverseStreamLocked() {
|
||||||
RETURN_ON_ERR(public_submodules_->gain_control->ProcessRenderAudio(ra));
|
RETURN_ON_ERR(public_submodules_->gain_control->ProcessRenderAudio(ra));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (formats_.rev_proc_format.sample_rate_hz() == kSampleRate32kHz &&
|
if (rev_synthesis_needed()) {
|
||||||
is_rev_processed()) {
|
|
||||||
ra->MergeFrequencyBands();
|
ra->MergeFrequencyBands();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1128,31 +1128,26 @@ bool AudioProcessingImpl::is_data_processed() const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const {
|
bool AudioProcessingImpl::output_copy_needed() const {
|
||||||
// Check if we've upmixed or downmixed the audio.
|
// Check if we've upmixed or downmixed the audio.
|
||||||
return ((formats_.api_format.output_stream().num_channels() !=
|
return ((formats_.api_format.output_stream().num_channels() !=
|
||||||
formats_.api_format.input_stream().num_channels()) ||
|
formats_.api_format.input_stream().num_channels()) ||
|
||||||
is_data_processed || capture_.transient_suppressor_enabled);
|
is_data_processed() || capture_.transient_suppressor_enabled);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const {
|
bool AudioProcessingImpl::fwd_synthesis_needed() const {
|
||||||
return (is_data_processed &&
|
return (is_data_processed() &&
|
||||||
(capture_nonlocked_.fwd_proc_format.sample_rate_hz() ==
|
is_multi_band(capture_nonlocked_.fwd_proc_format.sample_rate_hz()));
|
||||||
kSampleRate32kHz ||
|
|
||||||
capture_nonlocked_.fwd_proc_format.sample_rate_hz() ==
|
|
||||||
kSampleRate48kHz));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const {
|
bool AudioProcessingImpl::fwd_analysis_needed() const {
|
||||||
if (!is_data_processed &&
|
if (!is_data_processed() &&
|
||||||
!public_submodules_->voice_detection->is_enabled() &&
|
!public_submodules_->voice_detection->is_enabled() &&
|
||||||
!capture_.transient_suppressor_enabled) {
|
!capture_.transient_suppressor_enabled) {
|
||||||
// Only public_submodules_->level_estimator is enabled.
|
// Only public_submodules_->level_estimator is enabled.
|
||||||
return false;
|
return false;
|
||||||
} else if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() ==
|
} else if (is_multi_band(
|
||||||
kSampleRate32kHz ||
|
capture_nonlocked_.fwd_proc_format.sample_rate_hz())) {
|
||||||
capture_nonlocked_.fwd_proc_format.sample_rate_hz() ==
|
|
||||||
kSampleRate48kHz) {
|
|
||||||
// Something besides public_submodules_->level_estimator is enabled, and we
|
// Something besides public_submodules_->level_estimator is enabled, and we
|
||||||
// have super-wb.
|
// have super-wb.
|
||||||
return true;
|
return true;
|
||||||
|
@ -1164,6 +1159,15 @@ bool AudioProcessingImpl::is_rev_processed() const {
|
||||||
return constants_.intelligibility_enabled;
|
return constants_.intelligibility_enabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AudioProcessingImpl::rev_synthesis_needed() const {
|
||||||
|
return (is_rev_processed() &&
|
||||||
|
is_multi_band(formats_.rev_proc_format.sample_rate_hz()));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AudioProcessingImpl::rev_analysis_needed() const {
|
||||||
|
return is_multi_band(formats_.rev_proc_format.sample_rate_hz());
|
||||||
|
}
|
||||||
|
|
||||||
bool AudioProcessingImpl::render_check_rev_conversion_needed() const {
|
bool AudioProcessingImpl::render_check_rev_conversion_needed() const {
|
||||||
return rev_conversion_needed();
|
return rev_conversion_needed();
|
||||||
}
|
}
|
||||||
|
|
|
@ -208,13 +208,10 @@ class AudioProcessingImpl : public AudioProcessing {
|
||||||
// Capture-side exclusive methods possibly running APM in a multi-threaded
|
// Capture-side exclusive methods possibly running APM in a multi-threaded
|
||||||
// manner that are called with the render lock already acquired.
|
// manner that are called with the render lock already acquired.
|
||||||
int ProcessStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
int ProcessStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
||||||
bool output_copy_needed(bool is_data_processed) const
|
bool output_copy_needed() const EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
||||||
EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
|
||||||
bool is_data_processed() const EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
bool is_data_processed() const EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
||||||
bool synthesis_needed(bool is_data_processed) const
|
bool fwd_synthesis_needed() const EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
||||||
EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
bool fwd_analysis_needed() const EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
||||||
bool analysis_needed(bool is_data_processed) const
|
|
||||||
EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
|
||||||
void MaybeUpdateHistograms() EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
void MaybeUpdateHistograms() EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
||||||
|
|
||||||
// Render-side exclusive methods possibly running APM in a multi-threaded
|
// Render-side exclusive methods possibly running APM in a multi-threaded
|
||||||
|
@ -225,6 +222,8 @@ class AudioProcessingImpl : public AudioProcessing {
|
||||||
const StreamConfig& output_config)
|
const StreamConfig& output_config)
|
||||||
EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
|
EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
|
||||||
bool is_rev_processed() const EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
|
bool is_rev_processed() const EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
|
||||||
|
bool rev_synthesis_needed() const EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
|
||||||
|
bool rev_analysis_needed() const EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
|
||||||
int ProcessReverseStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
|
int ProcessReverseStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
|
||||||
|
|
||||||
// Debug dump methods that are internal and called without locks.
|
// Debug dump methods that are internal and called without locks.
|
||||||
|
|
|
@ -2649,8 +2649,8 @@ INSTANTIATE_TEST_CASE_P(
|
||||||
CommonFormats,
|
CommonFormats,
|
||||||
AudioProcessingTest,
|
AudioProcessingTest,
|
||||||
testing::Values(std::tr1::make_tuple(48000, 48000, 48000, 48000, 0, 0),
|
testing::Values(std::tr1::make_tuple(48000, 48000, 48000, 48000, 0, 0),
|
||||||
std::tr1::make_tuple(48000, 48000, 32000, 48000, 40, 30),
|
std::tr1::make_tuple(48000, 48000, 32000, 48000, 35, 30),
|
||||||
std::tr1::make_tuple(48000, 48000, 16000, 48000, 40, 20),
|
std::tr1::make_tuple(48000, 48000, 16000, 48000, 35, 20),
|
||||||
std::tr1::make_tuple(48000, 44100, 48000, 44100, 20, 20),
|
std::tr1::make_tuple(48000, 44100, 48000, 44100, 20, 20),
|
||||||
std::tr1::make_tuple(48000, 44100, 32000, 44100, 20, 15),
|
std::tr1::make_tuple(48000, 44100, 32000, 44100, 20, 15),
|
||||||
std::tr1::make_tuple(48000, 44100, 16000, 44100, 20, 15),
|
std::tr1::make_tuple(48000, 44100, 16000, 44100, 20, 15),
|
||||||
|
@ -2697,7 +2697,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||||
std::tr1::make_tuple(16000, 32000, 32000, 32000, 25, 0),
|
std::tr1::make_tuple(16000, 32000, 32000, 32000, 25, 0),
|
||||||
std::tr1::make_tuple(16000, 32000, 16000, 32000, 25, 20),
|
std::tr1::make_tuple(16000, 32000, 16000, 32000, 25, 20),
|
||||||
std::tr1::make_tuple(16000, 16000, 48000, 16000, 40, 20),
|
std::tr1::make_tuple(16000, 16000, 48000, 16000, 40, 20),
|
||||||
std::tr1::make_tuple(16000, 16000, 32000, 16000, 50, 20),
|
std::tr1::make_tuple(16000, 16000, 32000, 16000, 40, 20),
|
||||||
std::tr1::make_tuple(16000, 16000, 16000, 16000, 0, 0)));
|
std::tr1::make_tuple(16000, 16000, 16000, 16000, 0, 0)));
|
||||||
|
|
||||||
#elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
|
#elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
|
||||||
|
@ -2753,7 +2753,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||||
std::tr1::make_tuple(16000, 32000, 32000, 32000, 25, 0),
|
std::tr1::make_tuple(16000, 32000, 32000, 32000, 25, 0),
|
||||||
std::tr1::make_tuple(16000, 32000, 16000, 32000, 25, 20),
|
std::tr1::make_tuple(16000, 32000, 16000, 32000, 25, 20),
|
||||||
std::tr1::make_tuple(16000, 16000, 48000, 16000, 35, 20),
|
std::tr1::make_tuple(16000, 16000, 48000, 16000, 35, 20),
|
||||||
std::tr1::make_tuple(16000, 16000, 32000, 16000, 40, 20),
|
std::tr1::make_tuple(16000, 16000, 32000, 16000, 35, 20),
|
||||||
std::tr1::make_tuple(16000, 16000, 16000, 16000, 0, 0)));
|
std::tr1::make_tuple(16000, 16000, 16000, 16000, 0, 0)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue