From 1883d3e2313f3b3b21c076f6ce65f36fabb26bfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Per=20=C3=85hgren?= Date: Fri, 21 Feb 2020 13:31:07 +0100 Subject: [PATCH] Optimizations and refactoring of the APM 3-band split filter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL refactors and optimizes the 3-band split-filter in APM, which is a very computationally complex component. Beyond optimizing the code, the filter coefficients are also quantized to avoid denormals. The changes reduces the complexity of the split filter by about 30-50%. The CL has been tested for bitexactness on a number of aecdump recordings. (the CL also removes the now unused code for the sparse_fir_filter) Bug: webrtc:6181 Change-Id: If45f8d1f189c6812ccb03721156c77eb68181211 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/168189 Reviewed-by: Sam Zackrisson Reviewed-by: Karl Wiberg Commit-Queue: Per Ã…hgren Cr-Commit-Position: refs/heads/master@{#30592} --- common_audio/BUILD.gn | 4 +- common_audio/channel_buffer.h | 71 ++-- common_audio/sparse_fir_filter.cc | 60 ---- common_audio/sparse_fir_filter.h | 53 --- common_audio/sparse_fir_filter_unittest.cc | 219 ------------- modules/audio_processing/BUILD.gn | 1 + modules/audio_processing/splitting_filter.cc | 47 ++- modules/audio_processing/splitting_filter.h | 2 +- .../three_band_filter_bank.cc | 303 +++++++++++------- .../audio_processing/three_band_filter_bank.h | 56 ++-- 10 files changed, 303 insertions(+), 513 deletions(-) delete mode 100644 common_audio/sparse_fir_filter.cc delete mode 100644 common_audio/sparse_fir_filter.h delete mode 100644 common_audio/sparse_fir_filter_unittest.cc diff --git a/common_audio/BUILD.gn b/common_audio/BUILD.gn index 48bd9068fe..6c89bf257f 100644 --- a/common_audio/BUILD.gn +++ b/common_audio/BUILD.gn @@ -32,8 +32,6 @@ rtc_library("common_audio") { "resampler/sinc_resampler.cc", "smoothing_filter.cc", "smoothing_filter.h", - "sparse_fir_filter.cc", - "sparse_fir_filter.h", "vad/include/vad.h", "vad/vad.cc", "wav_file.cc", @@ -47,6 +45,7 @@ rtc_library("common_audio") { deps = [ ":common_audio_c", ":sinc_resampler", + "../api:array_view", "../rtc_base:checks", "../rtc_base:gtest_prod", "../rtc_base:rtc_base_approved", @@ -331,7 +330,6 @@ if (rtc_include_tests) { "signal_processing/real_fft_unittest.cc", "signal_processing/signal_processing_unittest.cc", "smoothing_filter_unittest.cc", - "sparse_fir_filter_unittest.cc", "vad/vad_core_unittest.cc", "vad/vad_filterbank_unittest.cc", "vad/vad_gmm_unittest.cc", diff --git a/common_audio/channel_buffer.h b/common_audio/channel_buffer.h index dc44369be6..f0270803f5 100644 --- a/common_audio/channel_buffer.h +++ b/common_audio/channel_buffer.h @@ -14,7 +14,9 @@ #include #include +#include +#include "api/array_view.h" #include "common_audio/include/audio_util.h" #include "rtc_base/checks.h" #include "rtc_base/gtest_prod_util.h" @@ -48,40 +50,60 @@ class ChannelBuffer { num_frames_per_band_(num_frames / num_bands), num_allocated_channels_(num_channels), num_channels_(num_channels), - num_bands_(num_bands) { - for (size_t i = 0; i < num_allocated_channels_; ++i) { - for (size_t j = 0; j < num_bands_; ++j) { - channels_[j * num_allocated_channels_ + i] = - &data_[i * num_frames_ + j * num_frames_per_band_]; - bands_[i * num_bands_ + j] = channels_[j * num_allocated_channels_ + i]; + num_bands_(num_bands), + bands_view_(num_allocated_channels_, + std::vector>(num_bands_)), + channels_view_( + num_bands_, + std::vector>(num_allocated_channels_)) { + // Temporarily cast away const_ness to allow populating the array views. + auto* bands_view = + const_cast>>*>(&bands_view_); + auto* channels_view = + const_cast>>*>( + &channels_view_); + + for (size_t ch = 0; ch < num_allocated_channels_; ++ch) { + for (size_t band = 0; band < num_bands_; ++band) { + (*channels_view)[band][ch] = rtc::ArrayView( + &data_[ch * num_frames_ + band * num_frames_per_band_], + num_frames_per_band_); + (*bands_view)[ch][band] = channels_view_[band][ch]; + channels_[band * num_allocated_channels_ + ch] = + channels_view_[band][ch].data(); + bands_[ch * num_bands_ + band] = + channels_[band * num_allocated_channels_ + ch]; } } } - // Returns a pointer array to the full-band channels (or lower band channels). - // Usage: - // channels()[channel][sample]. - // Where: - // 0 <= channel < |num_allocated_channels_| - // 0 <= sample < |num_frames_| - T* const* channels() { return channels(0); } - const T* const* channels() const { return channels(0); } - - // Returns a pointer array to the channels for a specific band. - // Usage: - // channels(band)[channel][sample]. + // Returns a pointer array to the channels. + // If band is explicitly specificed, the channels for a specific band are + // returned and the usage becomes: channels(band)[channel][sample]. // Where: // 0 <= band < |num_bands_| // 0 <= channel < |num_allocated_channels_| // 0 <= sample < |num_frames_per_band_| - const T* const* channels(size_t band) const { + + // If band is not explicitly specified, the full-band channels (or lower band + // channels) are returned and the usage becomes: channels()[channel][sample]. + // Where: + // 0 <= channel < |num_allocated_channels_| + // 0 <= sample < |num_frames_| + const T* const* channels(size_t band = 0) const { RTC_DCHECK_LT(band, num_bands_); return &channels_[band * num_allocated_channels_]; } - T* const* channels(size_t band) { + T* const* channels(size_t band = 0) { const ChannelBuffer* t = this; return const_cast(t->channels(band)); } + rtc::ArrayView> channels_view(size_t band = 0) { + return channels_view_[band]; + } + rtc::ArrayView> channels_view(size_t band = 0) const { + return channels_view_[band]; + } // Returns a pointer array to the bands for a specific channel. // Usage: @@ -100,6 +122,13 @@ class ChannelBuffer { return const_cast(t->bands(channel)); } + rtc::ArrayView> bands_view(size_t channel) { + return bands_view_[channel]; + } + rtc::ArrayView> bands_view(size_t channel) const { + return bands_view_[channel]; + } + // Sets the |slice| pointers to the |start_frame| position for each channel. // Returns |slice| for convenience. const T* const* Slice(T** slice, size_t start_frame) const { @@ -140,6 +169,8 @@ class ChannelBuffer { // Number of channels the user sees. size_t num_channels_; const size_t num_bands_; + const std::vector>> bands_view_; + const std::vector>> channels_view_; }; // One int16_t and one float ChannelBuffer that are kept in sync. The sync is diff --git a/common_audio/sparse_fir_filter.cc b/common_audio/sparse_fir_filter.cc deleted file mode 100644 index 772eb82e47..0000000000 --- a/common_audio/sparse_fir_filter.cc +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "common_audio/sparse_fir_filter.h" - -#include "rtc_base/checks.h" - -namespace webrtc { - -SparseFIRFilter::SparseFIRFilter(const float* nonzero_coeffs, - size_t num_nonzero_coeffs, - size_t sparsity, - size_t offset) - : sparsity_(sparsity), - offset_(offset), - nonzero_coeffs_(nonzero_coeffs, nonzero_coeffs + num_nonzero_coeffs), - state_(sparsity_ * (num_nonzero_coeffs - 1) + offset_, 0.f) { - RTC_CHECK_GE(num_nonzero_coeffs, 1); - RTC_CHECK_GE(sparsity, 1); -} - -SparseFIRFilter::~SparseFIRFilter() = default; - -void SparseFIRFilter::Filter(const float* in, size_t length, float* out) { - // Convolves the input signal |in| with the filter kernel |nonzero_coeffs_| - // taking into account the previous state. - for (size_t i = 0; i < length; ++i) { - out[i] = 0.f; - size_t j; - for (j = 0; i >= j * sparsity_ + offset_ && j < nonzero_coeffs_.size(); - ++j) { - out[i] += in[i - j * sparsity_ - offset_] * nonzero_coeffs_[j]; - } - for (; j < nonzero_coeffs_.size(); ++j) { - out[i] += state_[i + (nonzero_coeffs_.size() - j - 1) * sparsity_] * - nonzero_coeffs_[j]; - } - } - - // Update current state. - if (!state_.empty()) { - if (length >= state_.size()) { - std::memcpy(&state_[0], &in[length - state_.size()], - state_.size() * sizeof(*in)); - } else { - std::memmove(&state_[0], &state_[length], - (state_.size() - length) * sizeof(state_[0])); - std::memcpy(&state_[state_.size() - length], in, length * sizeof(*in)); - } - } -} - -} // namespace webrtc diff --git a/common_audio/sparse_fir_filter.h b/common_audio/sparse_fir_filter.h deleted file mode 100644 index 5197a8e28c..0000000000 --- a/common_audio/sparse_fir_filter.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef COMMON_AUDIO_SPARSE_FIR_FILTER_H_ -#define COMMON_AUDIO_SPARSE_FIR_FILTER_H_ - -#include -#include - -#include "rtc_base/constructor_magic.h" - -namespace webrtc { - -// A Finite Impulse Response filter implementation which takes advantage of a -// sparse structure with uniformly distributed non-zero coefficients. -class SparseFIRFilter final { - public: - // |num_nonzero_coeffs| is the number of non-zero coefficients, - // |nonzero_coeffs|. They are assumed to be uniformly distributed every - // |sparsity| samples and with an initial |offset|. The rest of the filter - // coefficients will be assumed zeros. For example, with sparsity = 3, and - // offset = 1 the filter coefficients will be: - // B = [0 coeffs[0] 0 0 coeffs[1] 0 0 coeffs[2] ... ] - // All initial state values will be zeros. - SparseFIRFilter(const float* nonzero_coeffs, - size_t num_nonzero_coeffs, - size_t sparsity, - size_t offset); - ~SparseFIRFilter(); - - // Filters the |in| data supplied. - // |out| must be previously allocated and it must be at least of |length|. - void Filter(const float* in, size_t length, float* out); - - private: - const size_t sparsity_; - const size_t offset_; - const std::vector nonzero_coeffs_; - std::vector state_; - - RTC_DISALLOW_COPY_AND_ASSIGN(SparseFIRFilter); -}; - -} // namespace webrtc - -#endif // COMMON_AUDIO_SPARSE_FIR_FILTER_H_ diff --git a/common_audio/sparse_fir_filter_unittest.cc b/common_audio/sparse_fir_filter_unittest.cc deleted file mode 100644 index 5dc7b6dc6f..0000000000 --- a/common_audio/sparse_fir_filter_unittest.cc +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "common_audio/sparse_fir_filter.h" - -#include - -#include "common_audio/fir_filter.h" -#include "common_audio/fir_filter_factory.h" -#include "rtc_base/arraysize.h" -#include "test/gtest.h" - -namespace webrtc { -namespace { - -static const float kCoeffs[] = {0.2f, 0.3f, 0.5f, 0.7f, 0.11f}; -static const float kInput[] = {1.f, 2.f, 3.f, 4.f, 5.f, - 6.f, 7.f, 8.f, 9.f, 10.f}; - -template -void VerifyOutput(const float (&expected_output)[N], const float (&output)[N]) { - EXPECT_EQ(0, memcmp(expected_output, output, sizeof(output))); -} - -} // namespace - -TEST(SparseFIRFilterTest, FilterAsIdentity) { - const float kCoeff = 1.f; - const size_t kNumCoeff = 1; - const size_t kSparsity = 3; - const size_t kOffset = 0; - float output[arraysize(kInput)]; - SparseFIRFilter filter(&kCoeff, kNumCoeff, kSparsity, kOffset); - filter.Filter(kInput, arraysize(kInput), output); - VerifyOutput(kInput, output); -} - -TEST(SparseFIRFilterTest, SameOutputForScalarCoefficientAndDifferentSparsity) { - const float kCoeff = 2.f; - const size_t kNumCoeff = 1; - const size_t kLowSparsity = 1; - const size_t kHighSparsity = 7; - const size_t kOffset = 0; - float low_sparsity_output[arraysize(kInput)]; - float high_sparsity_output[arraysize(kInput)]; - SparseFIRFilter low_sparsity_filter(&kCoeff, kNumCoeff, kLowSparsity, - kOffset); - SparseFIRFilter high_sparsity_filter(&kCoeff, kNumCoeff, kHighSparsity, - kOffset); - low_sparsity_filter.Filter(kInput, arraysize(kInput), low_sparsity_output); - high_sparsity_filter.Filter(kInput, arraysize(kInput), high_sparsity_output); - VerifyOutput(low_sparsity_output, high_sparsity_output); -} - -TEST(SparseFIRFilterTest, FilterUsedAsScalarMultiplication) { - const float kCoeff = 5.f; - const size_t kNumCoeff = 1; - const size_t kSparsity = 5; - const size_t kOffset = 0; - float output[arraysize(kInput)]; - SparseFIRFilter filter(&kCoeff, kNumCoeff, kSparsity, kOffset); - filter.Filter(kInput, arraysize(kInput), output); - EXPECT_FLOAT_EQ(5.f, output[0]); - EXPECT_FLOAT_EQ(20.f, output[3]); - EXPECT_FLOAT_EQ(25.f, output[4]); - EXPECT_FLOAT_EQ(50.f, output[arraysize(kInput) - 1]); -} - -TEST(SparseFIRFilterTest, FilterUsedAsInputShifting) { - const float kCoeff = 1.f; - const size_t kNumCoeff = 1; - const size_t kSparsity = 1; - const size_t kOffset = 4; - float output[arraysize(kInput)]; - SparseFIRFilter filter(&kCoeff, kNumCoeff, kSparsity, kOffset); - filter.Filter(kInput, arraysize(kInput), output); - EXPECT_FLOAT_EQ(0.f, output[0]); - EXPECT_FLOAT_EQ(0.f, output[3]); - EXPECT_FLOAT_EQ(1.f, output[4]); - EXPECT_FLOAT_EQ(2.f, output[5]); - EXPECT_FLOAT_EQ(6.f, output[arraysize(kInput) - 1]); -} - -TEST(SparseFIRFilterTest, FilterUsedAsArbitraryWeighting) { - const size_t kSparsity = 2; - const size_t kOffset = 1; - float output[arraysize(kInput)]; - SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset); - filter.Filter(kInput, arraysize(kInput), output); - EXPECT_FLOAT_EQ(0.f, output[0]); - EXPECT_FLOAT_EQ(0.9f, output[3]); - EXPECT_FLOAT_EQ(1.4f, output[4]); - EXPECT_FLOAT_EQ(2.4f, output[5]); - EXPECT_FLOAT_EQ(8.61f, output[arraysize(kInput) - 1]); -} - -TEST(SparseFIRFilterTest, FilterInLengthLesserOrEqualToCoefficientsLength) { - const size_t kSparsity = 1; - const size_t kOffset = 0; - float output[arraysize(kInput)]; - SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset); - filter.Filter(kInput, 2, output); - EXPECT_FLOAT_EQ(0.2f, output[0]); - EXPECT_FLOAT_EQ(0.7f, output[1]); -} - -TEST(SparseFIRFilterTest, MultipleFilterCalls) { - const size_t kSparsity = 1; - const size_t kOffset = 0; - float output[arraysize(kInput)]; - SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset); - filter.Filter(kInput, 2, output); - EXPECT_FLOAT_EQ(0.2f, output[0]); - EXPECT_FLOAT_EQ(0.7f, output[1]); - filter.Filter(kInput, 2, output); - EXPECT_FLOAT_EQ(1.3f, output[0]); - EXPECT_FLOAT_EQ(2.4f, output[1]); - filter.Filter(kInput, 2, output); - EXPECT_FLOAT_EQ(2.81f, output[0]); - EXPECT_FLOAT_EQ(2.62f, output[1]); - filter.Filter(kInput, 2, output); - EXPECT_FLOAT_EQ(2.81f, output[0]); - EXPECT_FLOAT_EQ(2.62f, output[1]); - filter.Filter(&kInput[3], 3, output); - EXPECT_FLOAT_EQ(3.41f, output[0]); - EXPECT_FLOAT_EQ(4.12f, output[1]); - EXPECT_FLOAT_EQ(6.21f, output[2]); - filter.Filter(&kInput[3], 3, output); - EXPECT_FLOAT_EQ(8.12f, output[0]); - EXPECT_FLOAT_EQ(9.14f, output[1]); - EXPECT_FLOAT_EQ(9.45f, output[2]); -} - -TEST(SparseFIRFilterTest, VerifySampleBasedVsBlockBasedFiltering) { - const size_t kSparsity = 3; - const size_t kOffset = 1; - float output_block_based[arraysize(kInput)]; - SparseFIRFilter filter_block(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset); - filter_block.Filter(kInput, arraysize(kInput), output_block_based); - float output_sample_based[arraysize(kInput)]; - SparseFIRFilter filter_sample(kCoeffs, arraysize(kCoeffs), kSparsity, - kOffset); - for (size_t i = 0; i < arraysize(kInput); ++i) - filter_sample.Filter(&kInput[i], 1, &output_sample_based[i]); - VerifyOutput(output_block_based, output_sample_based); -} - -TEST(SparseFIRFilterTest, SimpleHighPassFilter) { - const size_t kSparsity = 2; - const size_t kOffset = 2; - const float kHPCoeffs[] = {1.f, -1.f}; - const float kConstantInput[] = {1.f, 1.f, 1.f, 1.f, 1.f, - 1.f, 1.f, 1.f, 1.f, 1.f}; - float output[arraysize(kConstantInput)]; - SparseFIRFilter filter(kHPCoeffs, arraysize(kHPCoeffs), kSparsity, kOffset); - filter.Filter(kConstantInput, arraysize(kConstantInput), output); - EXPECT_FLOAT_EQ(0.f, output[0]); - EXPECT_FLOAT_EQ(0.f, output[1]); - EXPECT_FLOAT_EQ(1.f, output[2]); - EXPECT_FLOAT_EQ(1.f, output[3]); - for (size_t i = kSparsity + kOffset; i < arraysize(kConstantInput); ++i) - EXPECT_FLOAT_EQ(0.f, output[i]); -} - -TEST(SparseFIRFilterTest, SimpleLowPassFilter) { - const size_t kSparsity = 2; - const size_t kOffset = 2; - const float kLPCoeffs[] = {1.f, 1.f}; - const float kHighFrequencyInput[] = {1.f, 1.f, -1.f, -1.f, 1.f, - 1.f, -1.f, -1.f, 1.f, 1.f}; - float output[arraysize(kHighFrequencyInput)]; - SparseFIRFilter filter(kLPCoeffs, arraysize(kLPCoeffs), kSparsity, kOffset); - filter.Filter(kHighFrequencyInput, arraysize(kHighFrequencyInput), output); - EXPECT_FLOAT_EQ(0.f, output[0]); - EXPECT_FLOAT_EQ(0.f, output[1]); - EXPECT_FLOAT_EQ(1.f, output[2]); - EXPECT_FLOAT_EQ(1.f, output[3]); - for (size_t i = kSparsity + kOffset; i < arraysize(kHighFrequencyInput); ++i) - EXPECT_FLOAT_EQ(0.f, output[i]); -} - -TEST(SparseFIRFilterTest, SameOutputWhenSwappedCoefficientsAndInput) { - const size_t kSparsity = 1; - const size_t kOffset = 0; - float output[arraysize(kCoeffs)]; - float output_swapped[arraysize(kCoeffs)]; - SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset); - // Use arraysize(kCoeffs) for in_length to get same-length outputs. - filter.Filter(kInput, arraysize(kCoeffs), output); - SparseFIRFilter filter_swapped(kInput, arraysize(kCoeffs), kSparsity, - kOffset); - filter_swapped.Filter(kCoeffs, arraysize(kCoeffs), output_swapped); - VerifyOutput(output, output_swapped); -} - -TEST(SparseFIRFilterTest, SameOutputAsFIRFilterWhenSparsityOneAndOffsetZero) { - const size_t kSparsity = 1; - const size_t kOffset = 0; - float output[arraysize(kInput)]; - float sparse_output[arraysize(kInput)]; - std::unique_ptr filter( - CreateFirFilter(kCoeffs, arraysize(kCoeffs), arraysize(kInput))); - SparseFIRFilter sparse_filter(kCoeffs, arraysize(kCoeffs), kSparsity, - kOffset); - filter->Filter(kInput, arraysize(kInput), output); - sparse_filter.Filter(kInput, arraysize(kInput), sparse_output); - for (size_t i = 0; i < arraysize(kInput); ++i) { - EXPECT_FLOAT_EQ(output[i], sparse_output[i]); - } -} - -} // namespace webrtc diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn index 6215ab5b26..185e4f7fb1 100644 --- a/modules/audio_processing/BUILD.gn +++ b/modules/audio_processing/BUILD.gn @@ -73,6 +73,7 @@ rtc_library("audio_buffer") { deps = [ ":api", + "../../api:array_view", "../../api/audio:audio_frame_api", "../../common_audio", "../../common_audio:common_audio_c", diff --git a/modules/audio_processing/splitting_filter.cc b/modules/audio_processing/splitting_filter.cc index 62896280ed..d47090bc03 100644 --- a/modules/audio_processing/splitting_filter.cc +++ b/modules/audio_processing/splitting_filter.cc @@ -12,6 +12,7 @@ #include +#include "api/array_view.h" #include "common_audio/channel_buffer.h" #include "common_audio/signal_processing/include/signal_processing_library.h" #include "rtc_base/checks.h" @@ -27,16 +28,10 @@ constexpr size_t kTwoBandFilterSamplesPerFrame = 320; SplittingFilter::SplittingFilter(size_t num_channels, size_t num_bands, size_t num_frames) - : num_bands_(num_bands) { + : num_bands_(num_bands), + two_bands_states_(num_bands_ == 2 ? num_channels : 0), + three_band_filter_banks_(num_bands_ == 3 ? num_channels : 0) { RTC_CHECK(num_bands_ == 2 || num_bands_ == 3); - if (num_bands_ == 2) { - two_bands_states_.resize(num_channels); - } else if (num_bands_ == 3) { - for (size_t i = 0; i < num_channels; ++i) { - three_band_filter_banks_.push_back(std::unique_ptr( - new ThreeBandFilterBank(num_frames))); - } - } } SplittingFilter::~SplittingFilter() = default; @@ -105,18 +100,44 @@ void SplittingFilter::TwoBandsSynthesis(const ChannelBuffer* bands, void SplittingFilter::ThreeBandsAnalysis(const ChannelBuffer* data, ChannelBuffer* bands) { RTC_DCHECK_EQ(three_band_filter_banks_.size(), data->num_channels()); + RTC_DCHECK_LE(data->num_channels(), three_band_filter_banks_.size()); + RTC_DCHECK_LE(data->num_channels(), bands->num_channels()); + RTC_DCHECK_EQ(data->num_frames(), ThreeBandFilterBank::kFullBandSize); + RTC_DCHECK_EQ(bands->num_frames(), ThreeBandFilterBank::kFullBandSize); + RTC_DCHECK_EQ(bands->num_bands(), ThreeBandFilterBank::kNumBands); + RTC_DCHECK_EQ(bands->num_frames_per_band(), + ThreeBandFilterBank::kSplitBandSize); + for (size_t i = 0; i < three_band_filter_banks_.size(); ++i) { - three_band_filter_banks_[i]->Analysis(data->channels()[i], - data->num_frames(), bands->bands(i)); + three_band_filter_banks_[i].Analysis( + rtc::ArrayView( + data->channels_view()[i].data(), + ThreeBandFilterBank::kFullBandSize), + rtc::ArrayView, + ThreeBandFilterBank::kNumBands>( + bands->bands_view(i).data(), ThreeBandFilterBank::kNumBands)); } } void SplittingFilter::ThreeBandsSynthesis(const ChannelBuffer* bands, ChannelBuffer* data) { RTC_DCHECK_LE(data->num_channels(), three_band_filter_banks_.size()); + RTC_DCHECK_LE(data->num_channels(), bands->num_channels()); + RTC_DCHECK_LE(data->num_channels(), three_band_filter_banks_.size()); + RTC_DCHECK_EQ(data->num_frames(), ThreeBandFilterBank::kFullBandSize); + RTC_DCHECK_EQ(bands->num_frames(), ThreeBandFilterBank::kFullBandSize); + RTC_DCHECK_EQ(bands->num_bands(), ThreeBandFilterBank::kNumBands); + RTC_DCHECK_EQ(bands->num_frames_per_band(), + ThreeBandFilterBank::kSplitBandSize); + for (size_t i = 0; i < data->num_channels(); ++i) { - three_band_filter_banks_[i]->Synthesis( - bands->bands(i), bands->num_frames_per_band(), data->channels()[i]); + three_band_filter_banks_[i].Synthesis( + rtc::ArrayView, + ThreeBandFilterBank::kNumBands>( + bands->bands_view(i).data(), ThreeBandFilterBank::kNumBands), + rtc::ArrayView( + data->channels_view()[i].data(), + ThreeBandFilterBank::kFullBandSize)); } } diff --git a/modules/audio_processing/splitting_filter.h b/modules/audio_processing/splitting_filter.h index 3b33c35aec..e578dd07c1 100644 --- a/modules/audio_processing/splitting_filter.h +++ b/modules/audio_processing/splitting_filter.h @@ -64,7 +64,7 @@ class SplittingFilter { const size_t num_bands_; std::vector two_bands_states_; - std::vector> three_band_filter_banks_; + std::vector three_band_filter_banks_; }; } // namespace webrtc diff --git a/modules/audio_processing/three_band_filter_bank.cc b/modules/audio_processing/three_band_filter_bank.cc index dbbfc283c5..2a7d272e60 100644 --- a/modules/audio_processing/three_band_filter_bank.cc +++ b/modules/audio_processing/three_band_filter_bank.cc @@ -30,37 +30,33 @@ // // A similar logic can be applied to the synthesis stage. -// MSVC++ requires this to be set before any other includes to get M_PI. -#define _USE_MATH_DEFINES - #include "modules/audio_processing/three_band_filter_bank.h" -#include +#include #include "rtc_base/checks.h" namespace webrtc { namespace { -const size_t kNumBands = 3; -const size_t kSparsity = 4; - -// Factors to take into account when choosing |kNumCoeffs|: -// 1. Higher |kNumCoeffs|, means faster transition, which ensures less +// Factors to take into account when choosing |kFilterSize|: +// 1. Higher |kFilterSize|, means faster transition, which ensures less // aliasing. This is especially important when there is non-linear // processing between the splitting and merging. // 2. The delay that this filter bank introduces is -// |kNumBands| * |kSparsity| * |kNumCoeffs| / 2, so it increases linearly -// with |kNumCoeffs|. -// 3. The computation complexity also increases linearly with |kNumCoeffs|. -const size_t kNumCoeffs = 4; +// |kNumBands| * |kSparsity| * |kFilterSize| / 2, so it increases linearly +// with |kFilterSize|. +// 3. The computation complexity also increases linearly with |kFilterSize|. -// The Matlab code to generate these |kLowpassCoeffs| is: +// The Matlab code to generate these |kFilterCoeffs| is: // -// N = kNumBands * kSparsity * kNumCoeffs - 1; +// N = kNumBands * kSparsity * kFilterSize - 1; // h = fir1(N, 1 / (2 * kNumBands), kaiser(N + 1, 3.5)); -// reshape(h, kNumBands * kSparsity, kNumCoeffs); +// reshape(h, kNumBands * kSparsity, kFilterSize); // +// The code below uses the values of kFilterSize, kNumBands and kSparsity +// specified in the header. + // Because the total bandwidth of the lower and higher band is double the middle // one (because of the spectrum parity), the low-pass prototype is half the // bandwidth of 1 / (2 * |kNumBands|) and is then shifted with cosine modulation @@ -68,39 +64,84 @@ const size_t kNumCoeffs = 4; // A Kaiser window is used because of its flexibility and the alpha is set to // 3.5, since that sets a stop band attenuation of 40dB ensuring a fast // transition. -const float kLowpassCoeffs[kNumBands * kSparsity][kNumCoeffs] = { - {-0.00047749f, -0.00496888f, +0.16547118f, +0.00425496f}, - {-0.00173287f, -0.01585778f, +0.14989004f, +0.00994113f}, - {-0.00304815f, -0.02536082f, +0.12154542f, +0.01157993f}, - {-0.00383509f, -0.02982767f, +0.08543175f, +0.00983212f}, - {-0.00346946f, -0.02587886f, +0.04760441f, +0.00607594f}, - {-0.00154717f, -0.01136076f, +0.01387458f, +0.00186353f}, - {+0.00186353f, +0.01387458f, -0.01136076f, -0.00154717f}, - {+0.00607594f, +0.04760441f, -0.02587886f, -0.00346946f}, - {+0.00983212f, +0.08543175f, -0.02982767f, -0.00383509f}, - {+0.01157993f, +0.12154542f, -0.02536082f, -0.00304815f}, - {+0.00994113f, +0.14989004f, -0.01585778f, -0.00173287f}, - {+0.00425496f, +0.16547118f, -0.00496888f, -0.00047749f}}; -// Downsamples |in| into |out|, taking one every |kNumbands| starting from -// |offset|. |split_length| is the |out| length. |in| has to be at least -// |kNumBands| * |split_length| long. -void Downsample(const float* in, - size_t split_length, - size_t offset, - float* out) { - for (size_t i = 0; i < split_length; ++i) { - out[i] = in[kNumBands * i + offset]; - } -} +constexpr int kSubSampling = ThreeBandFilterBank::kNumBands; +constexpr int kDctSize = ThreeBandFilterBank::kNumBands; +static_assert(ThreeBandFilterBank::kNumBands * + ThreeBandFilterBank::kSplitBandSize == + ThreeBandFilterBank::kFullBandSize, + "The full band must be split in equally sized subbands"); -// Upsamples |in| into |out|, scaling by |kNumBands| and accumulating it every -// |kNumBands| starting from |offset|. |split_length| is the |in| length. |out| -// has to be at least |kNumBands| * |split_length| long. -void Upsample(const float* in, size_t split_length, size_t offset, float* out) { - for (size_t i = 0; i < split_length; ++i) { - out[kNumBands * i + offset] += kNumBands * in[i]; +const float + kFilterCoeffs[ThreeBandFilterBank::kNumNonZeroFilters][kFilterSize] = { + {-0.00047749f, -0.00496888f, +0.16547118f, +0.00425496f}, + {-0.00173287f, -0.01585778f, +0.14989004f, +0.00994113f}, + {-0.00304815f, -0.02536082f, +0.12154542f, +0.01157993f}, + {-0.00346946f, -0.02587886f, +0.04760441f, +0.00607594f}, + {-0.00154717f, -0.01136076f, +0.01387458f, +0.00186353f}, + {+0.00186353f, +0.01387458f, -0.01136076f, -0.00154717f}, + {+0.00607594f, +0.04760441f, -0.02587886f, -0.00346946f}, + {+0.00983212f, +0.08543175f, -0.02982767f, -0.00383509f}, + {+0.00994113f, +0.14989004f, -0.01585778f, -0.00173287f}, + {+0.00425496f, +0.16547118f, -0.00496888f, -0.00047749f}}; + +constexpr int kZeroFilterIndex1 = 3; +constexpr int kZeroFilterIndex2 = 9; + +const float kDctModulation[ThreeBandFilterBank::kNumNonZeroFilters][kDctSize] = + {{2.f, 2.f, 2.f}, + {1.73205077f, 0.f, -1.73205077f}, + {1.f, -2.f, 1.f}, + {-1.f, 2.f, -1.f}, + {-1.73205077f, 0.f, 1.73205077f}, + {-2.f, -2.f, -2.f}, + {-1.73205077f, 0.f, 1.73205077f}, + {-1.f, 2.f, -1.f}, + {1.f, -2.f, 1.f}, + {1.73205077f, 0.f, -1.73205077f}}; + +// Filters the input signal |in| with the filter |filter| using a shift by +// |in_shift|, taking into account the previous state. +void FilterCore( + rtc::ArrayView filter, + rtc::ArrayView in, + const int in_shift, + rtc::ArrayView out, + rtc::ArrayView state) { + constexpr int kMaxInShift = (kStride - 1); + RTC_DCHECK_GE(in_shift, 0); + RTC_DCHECK_LE(in_shift, kMaxInShift); + std::fill(out.begin(), out.end(), 0.f); + + for (int k = 0; k < in_shift; ++k) { + for (int i = 0, j = kMemorySize + k - in_shift; i < kFilterSize; + ++i, j -= kStride) { + out[k] += state[j] * filter[i]; + } } + + for (int k = in_shift, shift = 0; k < kFilterSize * kStride; ++k, ++shift) { + RTC_DCHECK_GE(shift, 0); + const int loop_limit = std::min(kFilterSize, 1 + (shift >> kStrideLog2)); + for (int i = 0, j = shift; i < loop_limit; ++i, j -= kStride) { + out[k] += in[j] * filter[i]; + } + for (int i = loop_limit, j = kMemorySize + shift - loop_limit * kStride; + i < kFilterSize; ++i, j -= kStride) { + out[k] += state[j] * filter[i]; + } + } + + for (int k = kFilterSize * kStride, shift = kFilterSize * kStride - in_shift; + k < ThreeBandFilterBank::kSplitBandSize; ++k, ++shift) { + for (int i = 0, j = shift; i < kFilterSize; ++i, j -= kStride) { + out[k] += in[j] * filter[i]; + } + } + + // Update current state. + std::copy(in.begin() + ThreeBandFilterBank::kSplitBandSize - kMemorySize, + in.end(), state.begin()); } } // namespace @@ -108,26 +149,15 @@ void Upsample(const float* in, size_t split_length, size_t offset, float* out) { // Because the low-pass filter prototype has half bandwidth it is possible to // use a DCT to shift it in both directions at the same time, to the center // frequencies [1 / 12, 3 / 12, 5 / 12]. -ThreeBandFilterBank::ThreeBandFilterBank(size_t length) - : in_buffer_(rtc::CheckedDivExact(length, kNumBands)), - out_buffer_(in_buffer_.size()) { - for (size_t i = 0; i < kSparsity; ++i) { - for (size_t j = 0; j < kNumBands; ++j) { - analysis_filters_.push_back( - std::unique_ptr(new SparseFIRFilter( - kLowpassCoeffs[i * kNumBands + j], kNumCoeffs, kSparsity, i))); - synthesis_filters_.push_back( - std::unique_ptr(new SparseFIRFilter( - kLowpassCoeffs[i * kNumBands + j], kNumCoeffs, kSparsity, i))); - } - } - dct_modulation_.resize(kNumBands * kSparsity); - for (size_t i = 0; i < dct_modulation_.size(); ++i) { - dct_modulation_[i].resize(kNumBands); - for (size_t j = 0; j < kNumBands; ++j) { - dct_modulation_[i][j] = - 2.f * cos(2.f * M_PI * i * (2.f * j + 1.f) / dct_modulation_.size()); - } +ThreeBandFilterBank::ThreeBandFilterBank() { + RTC_DCHECK_EQ(state_analysis_.size(), kNumNonZeroFilters); + RTC_DCHECK_EQ(state_synthesis_.size(), kNumNonZeroFilters); + for (int k = 0; k < kNumNonZeroFilters; ++k) { + RTC_DCHECK_EQ(state_analysis_[k].size(), kMemorySize); + RTC_DCHECK_EQ(state_synthesis_[k].size(), kMemorySize); + + state_analysis_[k].fill(0.f); + state_synthesis_[k].fill(0.f); } } @@ -139,20 +169,52 @@ ThreeBandFilterBank::~ThreeBandFilterBank() = default; // decomposition of the low-pass prototype filter and upsampled by a factor // of |kSparsity|. // 3. Modulating with cosines and accumulating to get the desired band. -void ThreeBandFilterBank::Analysis(const float* in, - size_t length, - float* const* out) { - RTC_CHECK_EQ(in_buffer_.size(), rtc::CheckedDivExact(length, kNumBands)); - for (size_t i = 0; i < kNumBands; ++i) { - memset(out[i], 0, in_buffer_.size() * sizeof(*out[i])); +void ThreeBandFilterBank::Analysis( + rtc::ArrayView in, + rtc::ArrayView, ThreeBandFilterBank::kNumBands> + out) { + // Initialize the output to zero. + for (int band = 0; band < ThreeBandFilterBank::kNumBands; ++band) { + RTC_DCHECK_EQ(out[band].size(), kSplitBandSize); + std::fill(out[band].begin(), out[band].end(), 0); } - for (size_t i = 0; i < kNumBands; ++i) { - Downsample(in, in_buffer_.size(), kNumBands - i - 1, &in_buffer_[0]); - for (size_t j = 0; j < kSparsity; ++j) { - const size_t offset = i + j * kNumBands; - analysis_filters_[offset]->Filter(&in_buffer_[0], in_buffer_.size(), - &out_buffer_[0]); - DownModulate(&out_buffer_[0], out_buffer_.size(), offset, out); + + for (int downsampling_index = 0; downsampling_index < kSubSampling; + ++downsampling_index) { + // Downsample to form the filter input. + std::array in_subsampled; + for (int k = 0; k < kSplitBandSize; ++k) { + in_subsampled[k] = + in[(kSubSampling - 1) - downsampling_index + kSubSampling * k]; + } + + for (int in_shift = 0; in_shift < kStride; ++in_shift) { + // Choose filter, skip zero filters. + const int index = downsampling_index + in_shift * kSubSampling; + if (index == kZeroFilterIndex1 || index == kZeroFilterIndex2) { + continue; + } + const int filter_index = + index < kZeroFilterIndex1 + ? index + : (index < kZeroFilterIndex2 ? index - 1 : index - 2); + + rtc::ArrayView filter( + kFilterCoeffs[filter_index]); + rtc::ArrayView dct_modulation( + kDctModulation[filter_index]); + rtc::ArrayView state(state_analysis_[filter_index]); + + // Filter. + std::array out_subsampled; + FilterCore(filter, in_subsampled, in_shift, out_subsampled, state); + + // Band and modulate the output. + for (int band = 0; band < ThreeBandFilterBank::kNumBands; ++band) { + for (int n = 0; n < kSplitBandSize; ++n) { + out[band][n] += dct_modulation[band] * out_subsampled[n]; + } + } } } } @@ -163,49 +225,50 @@ void ThreeBandFilterBank::Analysis(const float* in, // prototype filter upsampled by a factor of |kSparsity| and accumulating // |kSparsity| signals with different delays. // 3. Parallel to serial upsampling by a factor of |kNumBands|. -void ThreeBandFilterBank::Synthesis(const float* const* in, - size_t split_length, - float* out) { - RTC_CHECK_EQ(in_buffer_.size(), split_length); - memset(out, 0, kNumBands * in_buffer_.size() * sizeof(*out)); - for (size_t i = 0; i < kNumBands; ++i) { - for (size_t j = 0; j < kSparsity; ++j) { - const size_t offset = i + j * kNumBands; - UpModulate(in, in_buffer_.size(), offset, &in_buffer_[0]); - synthesis_filters_[offset]->Filter(&in_buffer_[0], in_buffer_.size(), - &out_buffer_[0]); - Upsample(&out_buffer_[0], out_buffer_.size(), i, out); - } - } -} +void ThreeBandFilterBank::Synthesis( + rtc::ArrayView, ThreeBandFilterBank::kNumBands> + in, + rtc::ArrayView out) { + std::fill(out.begin(), out.end(), 0); + for (int upsampling_index = 0; upsampling_index < kSubSampling; + ++upsampling_index) { + for (int in_shift = 0; in_shift < kStride; ++in_shift) { + // Choose filter, skip zero filters. + const int index = upsampling_index + in_shift * kSubSampling; + if (index == kZeroFilterIndex1 || index == kZeroFilterIndex2) { + continue; + } + const int filter_index = + index < kZeroFilterIndex1 + ? index + : (index < kZeroFilterIndex2 ? index - 1 : index - 2); -// Modulates |in| by |dct_modulation_| and accumulates it in each of the -// |kNumBands| bands of |out|. |offset| is the index in the period of the -// cosines used for modulation. |split_length| is the length of |in| and each -// band of |out|. -void ThreeBandFilterBank::DownModulate(const float* in, - size_t split_length, - size_t offset, - float* const* out) { - for (size_t i = 0; i < kNumBands; ++i) { - for (size_t j = 0; j < split_length; ++j) { - out[i][j] += dct_modulation_[offset][i] * in[j]; - } - } -} + rtc::ArrayView filter( + kFilterCoeffs[filter_index]); + rtc::ArrayView dct_modulation( + kDctModulation[filter_index]); + rtc::ArrayView state(state_synthesis_[filter_index]); -// Modulates each of the |kNumBands| bands of |in| by |dct_modulation_| and -// accumulates them in |out|. |out| is cleared before starting to accumulate. -// |offset| is the index in the period of the cosines used for modulation. -// |split_length| is the length of each band of |in| and |out|. -void ThreeBandFilterBank::UpModulate(const float* const* in, - size_t split_length, - size_t offset, - float* out) { - memset(out, 0, split_length * sizeof(*out)); - for (size_t i = 0; i < kNumBands; ++i) { - for (size_t j = 0; j < split_length; ++j) { - out[j] += dct_modulation_[offset][i] * in[i][j]; + // Prepare filter input by modulating the banded input. + std::array in_subsampled; + std::fill(in_subsampled.begin(), in_subsampled.end(), 0.f); + for (int band = 0; band < ThreeBandFilterBank::kNumBands; ++band) { + RTC_DCHECK_EQ(in[band].size(), kSplitBandSize); + for (int n = 0; n < kSplitBandSize; ++n) { + in_subsampled[n] += dct_modulation[band] * in[band][n]; + } + } + + // Filter. + std::array out_subsampled; + FilterCore(filter, in_subsampled, in_shift, out_subsampled, state); + + // Upsample. + constexpr float kUpsamplingScaling = kSubSampling; + for (int k = 0; k < kSplitBandSize; ++k) { + out[upsampling_index + kSubSampling * k] += + kUpsamplingScaling * out_subsampled[k]; + } } } } diff --git a/modules/audio_processing/three_band_filter_bank.h b/modules/audio_processing/three_band_filter_bank.h index ccbf2ddf97..e6346dec44 100644 --- a/modules/audio_processing/three_band_filter_bank.h +++ b/modules/audio_processing/three_band_filter_bank.h @@ -11,14 +11,25 @@ #ifndef MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ #define MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ +#include #include #include #include -#include "common_audio/sparse_fir_filter.h" +#include "api/array_view.h" namespace webrtc { +constexpr int kSparsity = 4; +constexpr int kStrideLog2 = 2; +constexpr int kStride = 1 << kStrideLog2; +constexpr int kNumZeroFilters = 2; +constexpr int kFilterSize = 4; +constexpr int kMemorySize = kFilterSize * kStride - 1; +static_assert(kMemorySize == 15, + "The memory size must be sufficient to provide memory for the " + "shifted filters"); + // An implementation of a 3-band FIR filter-bank with DCT modulation, similar to // the proposed in "Multirate Signal Processing for Communication Systems" by // Fredric J Harris. @@ -34,34 +45,31 @@ namespace webrtc { // depending on the input signal after compensating for the delay. class ThreeBandFilterBank final { public: - explicit ThreeBandFilterBank(size_t length); + static const int kNumBands = 3; + static const int kFullBandSize = 480; + static const int kSplitBandSize = + ThreeBandFilterBank::kFullBandSize / ThreeBandFilterBank::kNumBands; + static const int kNumNonZeroFilters = + kSparsity * ThreeBandFilterBank::kNumBands - kNumZeroFilters; + + ThreeBandFilterBank(); ~ThreeBandFilterBank(); - // Splits |in| into 3 downsampled frequency bands in |out|. - // |length| is the |in| length. Each of the 3 bands of |out| has to have a - // length of |length| / 3. - void Analysis(const float* in, size_t length, float* const* out); + // Splits |in| of size kFullBandSize into 3 downsampled frequency bands in + // |out|, each of size 160. + void Analysis(rtc::ArrayView in, + rtc::ArrayView, kNumBands> out); - // Merges the 3 downsampled frequency bands in |in| into |out|. - // |split_length| is the length of each band of |in|. |out| has to have at - // least a length of 3 * |split_length|. - void Synthesis(const float* const* in, size_t split_length, float* out); + // Merges the 3 downsampled frequency bands in |in|, each of size 160, into + // |out|, which is of size kFullBandSize. + void Synthesis(rtc::ArrayView, kNumBands> in, + rtc::ArrayView out); private: - void DownModulate(const float* in, - size_t split_length, - size_t offset, - float* const* out); - void UpModulate(const float* const* in, - size_t split_length, - size_t offset, - float* out); - - std::vector in_buffer_; - std::vector out_buffer_; - std::vector> analysis_filters_; - std::vector> synthesis_filters_; - std::vector> dct_modulation_; + std::array, kNumNonZeroFilters> + state_analysis_; + std::array, kNumNonZeroFilters> + state_synthesis_; }; } // namespace webrtc