mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-12 21:30:45 +01:00
Optimizations and refactoring of the APM 3-band split filter
This CL refactors and optimizes the 3-band split-filter in APM, which is a very computationally complex component. Beyond optimizing the code, the filter coefficients are also quantized to avoid denormals. The changes reduces the complexity of the split filter by about 30-50%. The CL has been tested for bitexactness on a number of aecdump recordings. (the CL also removes the now unused code for the sparse_fir_filter) Bug: webrtc:6181 Change-Id: If45f8d1f189c6812ccb03721156c77eb68181211 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/168189 Reviewed-by: Sam Zackrisson <saza@webrtc.org> Reviewed-by: Karl Wiberg <kwiberg@webrtc.org> Commit-Queue: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#30592}
This commit is contained in:
parent
0e089db913
commit
1883d3e231
10 changed files with 303 additions and 513 deletions
|
@ -32,8 +32,6 @@ rtc_library("common_audio") {
|
|||
"resampler/sinc_resampler.cc",
|
||||
"smoothing_filter.cc",
|
||||
"smoothing_filter.h",
|
||||
"sparse_fir_filter.cc",
|
||||
"sparse_fir_filter.h",
|
||||
"vad/include/vad.h",
|
||||
"vad/vad.cc",
|
||||
"wav_file.cc",
|
||||
|
@ -47,6 +45,7 @@ rtc_library("common_audio") {
|
|||
deps = [
|
||||
":common_audio_c",
|
||||
":sinc_resampler",
|
||||
"../api:array_view",
|
||||
"../rtc_base:checks",
|
||||
"../rtc_base:gtest_prod",
|
||||
"../rtc_base:rtc_base_approved",
|
||||
|
@ -331,7 +330,6 @@ if (rtc_include_tests) {
|
|||
"signal_processing/real_fft_unittest.cc",
|
||||
"signal_processing/signal_processing_unittest.cc",
|
||||
"smoothing_filter_unittest.cc",
|
||||
"sparse_fir_filter_unittest.cc",
|
||||
"vad/vad_core_unittest.cc",
|
||||
"vad/vad_filterbank_unittest.cc",
|
||||
"vad/vad_gmm_unittest.cc",
|
||||
|
|
|
@ -14,7 +14,9 @@
|
|||
#include <string.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "common_audio/include/audio_util.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/gtest_prod_util.h"
|
||||
|
@ -48,40 +50,60 @@ class ChannelBuffer {
|
|||
num_frames_per_band_(num_frames / num_bands),
|
||||
num_allocated_channels_(num_channels),
|
||||
num_channels_(num_channels),
|
||||
num_bands_(num_bands) {
|
||||
for (size_t i = 0; i < num_allocated_channels_; ++i) {
|
||||
for (size_t j = 0; j < num_bands_; ++j) {
|
||||
channels_[j * num_allocated_channels_ + i] =
|
||||
&data_[i * num_frames_ + j * num_frames_per_band_];
|
||||
bands_[i * num_bands_ + j] = channels_[j * num_allocated_channels_ + i];
|
||||
num_bands_(num_bands),
|
||||
bands_view_(num_allocated_channels_,
|
||||
std::vector<rtc::ArrayView<T>>(num_bands_)),
|
||||
channels_view_(
|
||||
num_bands_,
|
||||
std::vector<rtc::ArrayView<T>>(num_allocated_channels_)) {
|
||||
// Temporarily cast away const_ness to allow populating the array views.
|
||||
auto* bands_view =
|
||||
const_cast<std::vector<std::vector<rtc::ArrayView<T>>>*>(&bands_view_);
|
||||
auto* channels_view =
|
||||
const_cast<std::vector<std::vector<rtc::ArrayView<T>>>*>(
|
||||
&channels_view_);
|
||||
|
||||
for (size_t ch = 0; ch < num_allocated_channels_; ++ch) {
|
||||
for (size_t band = 0; band < num_bands_; ++band) {
|
||||
(*channels_view)[band][ch] = rtc::ArrayView<T>(
|
||||
&data_[ch * num_frames_ + band * num_frames_per_band_],
|
||||
num_frames_per_band_);
|
||||
(*bands_view)[ch][band] = channels_view_[band][ch];
|
||||
channels_[band * num_allocated_channels_ + ch] =
|
||||
channels_view_[band][ch].data();
|
||||
bands_[ch * num_bands_ + band] =
|
||||
channels_[band * num_allocated_channels_ + ch];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns a pointer array to the full-band channels (or lower band channels).
|
||||
// Usage:
|
||||
// channels()[channel][sample].
|
||||
// Where:
|
||||
// 0 <= channel < |num_allocated_channels_|
|
||||
// 0 <= sample < |num_frames_|
|
||||
T* const* channels() { return channels(0); }
|
||||
const T* const* channels() const { return channels(0); }
|
||||
|
||||
// Returns a pointer array to the channels for a specific band.
|
||||
// Usage:
|
||||
// channels(band)[channel][sample].
|
||||
// Returns a pointer array to the channels.
|
||||
// If band is explicitly specificed, the channels for a specific band are
|
||||
// returned and the usage becomes: channels(band)[channel][sample].
|
||||
// Where:
|
||||
// 0 <= band < |num_bands_|
|
||||
// 0 <= channel < |num_allocated_channels_|
|
||||
// 0 <= sample < |num_frames_per_band_|
|
||||
const T* const* channels(size_t band) const {
|
||||
|
||||
// If band is not explicitly specified, the full-band channels (or lower band
|
||||
// channels) are returned and the usage becomes: channels()[channel][sample].
|
||||
// Where:
|
||||
// 0 <= channel < |num_allocated_channels_|
|
||||
// 0 <= sample < |num_frames_|
|
||||
const T* const* channels(size_t band = 0) const {
|
||||
RTC_DCHECK_LT(band, num_bands_);
|
||||
return &channels_[band * num_allocated_channels_];
|
||||
}
|
||||
T* const* channels(size_t band) {
|
||||
T* const* channels(size_t band = 0) {
|
||||
const ChannelBuffer<T>* t = this;
|
||||
return const_cast<T* const*>(t->channels(band));
|
||||
}
|
||||
rtc::ArrayView<const rtc::ArrayView<T>> channels_view(size_t band = 0) {
|
||||
return channels_view_[band];
|
||||
}
|
||||
rtc::ArrayView<const rtc::ArrayView<T>> channels_view(size_t band = 0) const {
|
||||
return channels_view_[band];
|
||||
}
|
||||
|
||||
// Returns a pointer array to the bands for a specific channel.
|
||||
// Usage:
|
||||
|
@ -100,6 +122,13 @@ class ChannelBuffer {
|
|||
return const_cast<T* const*>(t->bands(channel));
|
||||
}
|
||||
|
||||
rtc::ArrayView<const rtc::ArrayView<T>> bands_view(size_t channel) {
|
||||
return bands_view_[channel];
|
||||
}
|
||||
rtc::ArrayView<const rtc::ArrayView<T>> bands_view(size_t channel) const {
|
||||
return bands_view_[channel];
|
||||
}
|
||||
|
||||
// Sets the |slice| pointers to the |start_frame| position for each channel.
|
||||
// Returns |slice| for convenience.
|
||||
const T* const* Slice(T** slice, size_t start_frame) const {
|
||||
|
@ -140,6 +169,8 @@ class ChannelBuffer {
|
|||
// Number of channels the user sees.
|
||||
size_t num_channels_;
|
||||
const size_t num_bands_;
|
||||
const std::vector<std::vector<rtc::ArrayView<T>>> bands_view_;
|
||||
const std::vector<std::vector<rtc::ArrayView<T>>> channels_view_;
|
||||
};
|
||||
|
||||
// One int16_t and one float ChannelBuffer that are kept in sync. The sync is
|
||||
|
|
|
@ -1,60 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "common_audio/sparse_fir_filter.h"
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
SparseFIRFilter::SparseFIRFilter(const float* nonzero_coeffs,
|
||||
size_t num_nonzero_coeffs,
|
||||
size_t sparsity,
|
||||
size_t offset)
|
||||
: sparsity_(sparsity),
|
||||
offset_(offset),
|
||||
nonzero_coeffs_(nonzero_coeffs, nonzero_coeffs + num_nonzero_coeffs),
|
||||
state_(sparsity_ * (num_nonzero_coeffs - 1) + offset_, 0.f) {
|
||||
RTC_CHECK_GE(num_nonzero_coeffs, 1);
|
||||
RTC_CHECK_GE(sparsity, 1);
|
||||
}
|
||||
|
||||
SparseFIRFilter::~SparseFIRFilter() = default;
|
||||
|
||||
void SparseFIRFilter::Filter(const float* in, size_t length, float* out) {
|
||||
// Convolves the input signal |in| with the filter kernel |nonzero_coeffs_|
|
||||
// taking into account the previous state.
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
out[i] = 0.f;
|
||||
size_t j;
|
||||
for (j = 0; i >= j * sparsity_ + offset_ && j < nonzero_coeffs_.size();
|
||||
++j) {
|
||||
out[i] += in[i - j * sparsity_ - offset_] * nonzero_coeffs_[j];
|
||||
}
|
||||
for (; j < nonzero_coeffs_.size(); ++j) {
|
||||
out[i] += state_[i + (nonzero_coeffs_.size() - j - 1) * sparsity_] *
|
||||
nonzero_coeffs_[j];
|
||||
}
|
||||
}
|
||||
|
||||
// Update current state.
|
||||
if (!state_.empty()) {
|
||||
if (length >= state_.size()) {
|
||||
std::memcpy(&state_[0], &in[length - state_.size()],
|
||||
state_.size() * sizeof(*in));
|
||||
} else {
|
||||
std::memmove(&state_[0], &state_[length],
|
||||
(state_.size() - length) * sizeof(state_[0]));
|
||||
std::memcpy(&state_[state_.size() - length], in, length * sizeof(*in));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
|
@ -1,53 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef COMMON_AUDIO_SPARSE_FIR_FILTER_H_
|
||||
#define COMMON_AUDIO_SPARSE_FIR_FILTER_H_
|
||||
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// A Finite Impulse Response filter implementation which takes advantage of a
|
||||
// sparse structure with uniformly distributed non-zero coefficients.
|
||||
class SparseFIRFilter final {
|
||||
public:
|
||||
// |num_nonzero_coeffs| is the number of non-zero coefficients,
|
||||
// |nonzero_coeffs|. They are assumed to be uniformly distributed every
|
||||
// |sparsity| samples and with an initial |offset|. The rest of the filter
|
||||
// coefficients will be assumed zeros. For example, with sparsity = 3, and
|
||||
// offset = 1 the filter coefficients will be:
|
||||
// B = [0 coeffs[0] 0 0 coeffs[1] 0 0 coeffs[2] ... ]
|
||||
// All initial state values will be zeros.
|
||||
SparseFIRFilter(const float* nonzero_coeffs,
|
||||
size_t num_nonzero_coeffs,
|
||||
size_t sparsity,
|
||||
size_t offset);
|
||||
~SparseFIRFilter();
|
||||
|
||||
// Filters the |in| data supplied.
|
||||
// |out| must be previously allocated and it must be at least of |length|.
|
||||
void Filter(const float* in, size_t length, float* out);
|
||||
|
||||
private:
|
||||
const size_t sparsity_;
|
||||
const size_t offset_;
|
||||
const std::vector<float> nonzero_coeffs_;
|
||||
std::vector<float> state_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(SparseFIRFilter);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // COMMON_AUDIO_SPARSE_FIR_FILTER_H_
|
|
@ -1,219 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "common_audio/sparse_fir_filter.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "common_audio/fir_filter.h"
|
||||
#include "common_audio/fir_filter_factory.h"
|
||||
#include "rtc_base/arraysize.h"
|
||||
#include "test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
static const float kCoeffs[] = {0.2f, 0.3f, 0.5f, 0.7f, 0.11f};
|
||||
static const float kInput[] = {1.f, 2.f, 3.f, 4.f, 5.f,
|
||||
6.f, 7.f, 8.f, 9.f, 10.f};
|
||||
|
||||
template <size_t N>
|
||||
void VerifyOutput(const float (&expected_output)[N], const float (&output)[N]) {
|
||||
EXPECT_EQ(0, memcmp(expected_output, output, sizeof(output)));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(SparseFIRFilterTest, FilterAsIdentity) {
|
||||
const float kCoeff = 1.f;
|
||||
const size_t kNumCoeff = 1;
|
||||
const size_t kSparsity = 3;
|
||||
const size_t kOffset = 0;
|
||||
float output[arraysize(kInput)];
|
||||
SparseFIRFilter filter(&kCoeff, kNumCoeff, kSparsity, kOffset);
|
||||
filter.Filter(kInput, arraysize(kInput), output);
|
||||
VerifyOutput(kInput, output);
|
||||
}
|
||||
|
||||
TEST(SparseFIRFilterTest, SameOutputForScalarCoefficientAndDifferentSparsity) {
|
||||
const float kCoeff = 2.f;
|
||||
const size_t kNumCoeff = 1;
|
||||
const size_t kLowSparsity = 1;
|
||||
const size_t kHighSparsity = 7;
|
||||
const size_t kOffset = 0;
|
||||
float low_sparsity_output[arraysize(kInput)];
|
||||
float high_sparsity_output[arraysize(kInput)];
|
||||
SparseFIRFilter low_sparsity_filter(&kCoeff, kNumCoeff, kLowSparsity,
|
||||
kOffset);
|
||||
SparseFIRFilter high_sparsity_filter(&kCoeff, kNumCoeff, kHighSparsity,
|
||||
kOffset);
|
||||
low_sparsity_filter.Filter(kInput, arraysize(kInput), low_sparsity_output);
|
||||
high_sparsity_filter.Filter(kInput, arraysize(kInput), high_sparsity_output);
|
||||
VerifyOutput(low_sparsity_output, high_sparsity_output);
|
||||
}
|
||||
|
||||
TEST(SparseFIRFilterTest, FilterUsedAsScalarMultiplication) {
|
||||
const float kCoeff = 5.f;
|
||||
const size_t kNumCoeff = 1;
|
||||
const size_t kSparsity = 5;
|
||||
const size_t kOffset = 0;
|
||||
float output[arraysize(kInput)];
|
||||
SparseFIRFilter filter(&kCoeff, kNumCoeff, kSparsity, kOffset);
|
||||
filter.Filter(kInput, arraysize(kInput), output);
|
||||
EXPECT_FLOAT_EQ(5.f, output[0]);
|
||||
EXPECT_FLOAT_EQ(20.f, output[3]);
|
||||
EXPECT_FLOAT_EQ(25.f, output[4]);
|
||||
EXPECT_FLOAT_EQ(50.f, output[arraysize(kInput) - 1]);
|
||||
}
|
||||
|
||||
TEST(SparseFIRFilterTest, FilterUsedAsInputShifting) {
|
||||
const float kCoeff = 1.f;
|
||||
const size_t kNumCoeff = 1;
|
||||
const size_t kSparsity = 1;
|
||||
const size_t kOffset = 4;
|
||||
float output[arraysize(kInput)];
|
||||
SparseFIRFilter filter(&kCoeff, kNumCoeff, kSparsity, kOffset);
|
||||
filter.Filter(kInput, arraysize(kInput), output);
|
||||
EXPECT_FLOAT_EQ(0.f, output[0]);
|
||||
EXPECT_FLOAT_EQ(0.f, output[3]);
|
||||
EXPECT_FLOAT_EQ(1.f, output[4]);
|
||||
EXPECT_FLOAT_EQ(2.f, output[5]);
|
||||
EXPECT_FLOAT_EQ(6.f, output[arraysize(kInput) - 1]);
|
||||
}
|
||||
|
||||
TEST(SparseFIRFilterTest, FilterUsedAsArbitraryWeighting) {
|
||||
const size_t kSparsity = 2;
|
||||
const size_t kOffset = 1;
|
||||
float output[arraysize(kInput)];
|
||||
SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset);
|
||||
filter.Filter(kInput, arraysize(kInput), output);
|
||||
EXPECT_FLOAT_EQ(0.f, output[0]);
|
||||
EXPECT_FLOAT_EQ(0.9f, output[3]);
|
||||
EXPECT_FLOAT_EQ(1.4f, output[4]);
|
||||
EXPECT_FLOAT_EQ(2.4f, output[5]);
|
||||
EXPECT_FLOAT_EQ(8.61f, output[arraysize(kInput) - 1]);
|
||||
}
|
||||
|
||||
TEST(SparseFIRFilterTest, FilterInLengthLesserOrEqualToCoefficientsLength) {
|
||||
const size_t kSparsity = 1;
|
||||
const size_t kOffset = 0;
|
||||
float output[arraysize(kInput)];
|
||||
SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset);
|
||||
filter.Filter(kInput, 2, output);
|
||||
EXPECT_FLOAT_EQ(0.2f, output[0]);
|
||||
EXPECT_FLOAT_EQ(0.7f, output[1]);
|
||||
}
|
||||
|
||||
TEST(SparseFIRFilterTest, MultipleFilterCalls) {
|
||||
const size_t kSparsity = 1;
|
||||
const size_t kOffset = 0;
|
||||
float output[arraysize(kInput)];
|
||||
SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset);
|
||||
filter.Filter(kInput, 2, output);
|
||||
EXPECT_FLOAT_EQ(0.2f, output[0]);
|
||||
EXPECT_FLOAT_EQ(0.7f, output[1]);
|
||||
filter.Filter(kInput, 2, output);
|
||||
EXPECT_FLOAT_EQ(1.3f, output[0]);
|
||||
EXPECT_FLOAT_EQ(2.4f, output[1]);
|
||||
filter.Filter(kInput, 2, output);
|
||||
EXPECT_FLOAT_EQ(2.81f, output[0]);
|
||||
EXPECT_FLOAT_EQ(2.62f, output[1]);
|
||||
filter.Filter(kInput, 2, output);
|
||||
EXPECT_FLOAT_EQ(2.81f, output[0]);
|
||||
EXPECT_FLOAT_EQ(2.62f, output[1]);
|
||||
filter.Filter(&kInput[3], 3, output);
|
||||
EXPECT_FLOAT_EQ(3.41f, output[0]);
|
||||
EXPECT_FLOAT_EQ(4.12f, output[1]);
|
||||
EXPECT_FLOAT_EQ(6.21f, output[2]);
|
||||
filter.Filter(&kInput[3], 3, output);
|
||||
EXPECT_FLOAT_EQ(8.12f, output[0]);
|
||||
EXPECT_FLOAT_EQ(9.14f, output[1]);
|
||||
EXPECT_FLOAT_EQ(9.45f, output[2]);
|
||||
}
|
||||
|
||||
TEST(SparseFIRFilterTest, VerifySampleBasedVsBlockBasedFiltering) {
|
||||
const size_t kSparsity = 3;
|
||||
const size_t kOffset = 1;
|
||||
float output_block_based[arraysize(kInput)];
|
||||
SparseFIRFilter filter_block(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset);
|
||||
filter_block.Filter(kInput, arraysize(kInput), output_block_based);
|
||||
float output_sample_based[arraysize(kInput)];
|
||||
SparseFIRFilter filter_sample(kCoeffs, arraysize(kCoeffs), kSparsity,
|
||||
kOffset);
|
||||
for (size_t i = 0; i < arraysize(kInput); ++i)
|
||||
filter_sample.Filter(&kInput[i], 1, &output_sample_based[i]);
|
||||
VerifyOutput(output_block_based, output_sample_based);
|
||||
}
|
||||
|
||||
TEST(SparseFIRFilterTest, SimpleHighPassFilter) {
|
||||
const size_t kSparsity = 2;
|
||||
const size_t kOffset = 2;
|
||||
const float kHPCoeffs[] = {1.f, -1.f};
|
||||
const float kConstantInput[] = {1.f, 1.f, 1.f, 1.f, 1.f,
|
||||
1.f, 1.f, 1.f, 1.f, 1.f};
|
||||
float output[arraysize(kConstantInput)];
|
||||
SparseFIRFilter filter(kHPCoeffs, arraysize(kHPCoeffs), kSparsity, kOffset);
|
||||
filter.Filter(kConstantInput, arraysize(kConstantInput), output);
|
||||
EXPECT_FLOAT_EQ(0.f, output[0]);
|
||||
EXPECT_FLOAT_EQ(0.f, output[1]);
|
||||
EXPECT_FLOAT_EQ(1.f, output[2]);
|
||||
EXPECT_FLOAT_EQ(1.f, output[3]);
|
||||
for (size_t i = kSparsity + kOffset; i < arraysize(kConstantInput); ++i)
|
||||
EXPECT_FLOAT_EQ(0.f, output[i]);
|
||||
}
|
||||
|
||||
TEST(SparseFIRFilterTest, SimpleLowPassFilter) {
|
||||
const size_t kSparsity = 2;
|
||||
const size_t kOffset = 2;
|
||||
const float kLPCoeffs[] = {1.f, 1.f};
|
||||
const float kHighFrequencyInput[] = {1.f, 1.f, -1.f, -1.f, 1.f,
|
||||
1.f, -1.f, -1.f, 1.f, 1.f};
|
||||
float output[arraysize(kHighFrequencyInput)];
|
||||
SparseFIRFilter filter(kLPCoeffs, arraysize(kLPCoeffs), kSparsity, kOffset);
|
||||
filter.Filter(kHighFrequencyInput, arraysize(kHighFrequencyInput), output);
|
||||
EXPECT_FLOAT_EQ(0.f, output[0]);
|
||||
EXPECT_FLOAT_EQ(0.f, output[1]);
|
||||
EXPECT_FLOAT_EQ(1.f, output[2]);
|
||||
EXPECT_FLOAT_EQ(1.f, output[3]);
|
||||
for (size_t i = kSparsity + kOffset; i < arraysize(kHighFrequencyInput); ++i)
|
||||
EXPECT_FLOAT_EQ(0.f, output[i]);
|
||||
}
|
||||
|
||||
TEST(SparseFIRFilterTest, SameOutputWhenSwappedCoefficientsAndInput) {
|
||||
const size_t kSparsity = 1;
|
||||
const size_t kOffset = 0;
|
||||
float output[arraysize(kCoeffs)];
|
||||
float output_swapped[arraysize(kCoeffs)];
|
||||
SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset);
|
||||
// Use arraysize(kCoeffs) for in_length to get same-length outputs.
|
||||
filter.Filter(kInput, arraysize(kCoeffs), output);
|
||||
SparseFIRFilter filter_swapped(kInput, arraysize(kCoeffs), kSparsity,
|
||||
kOffset);
|
||||
filter_swapped.Filter(kCoeffs, arraysize(kCoeffs), output_swapped);
|
||||
VerifyOutput(output, output_swapped);
|
||||
}
|
||||
|
||||
TEST(SparseFIRFilterTest, SameOutputAsFIRFilterWhenSparsityOneAndOffsetZero) {
|
||||
const size_t kSparsity = 1;
|
||||
const size_t kOffset = 0;
|
||||
float output[arraysize(kInput)];
|
||||
float sparse_output[arraysize(kInput)];
|
||||
std::unique_ptr<FIRFilter> filter(
|
||||
CreateFirFilter(kCoeffs, arraysize(kCoeffs), arraysize(kInput)));
|
||||
SparseFIRFilter sparse_filter(kCoeffs, arraysize(kCoeffs), kSparsity,
|
||||
kOffset);
|
||||
filter->Filter(kInput, arraysize(kInput), output);
|
||||
sparse_filter.Filter(kInput, arraysize(kInput), sparse_output);
|
||||
for (size_t i = 0; i < arraysize(kInput); ++i) {
|
||||
EXPECT_FLOAT_EQ(output[i], sparse_output[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
|
@ -73,6 +73,7 @@ rtc_library("audio_buffer") {
|
|||
|
||||
deps = [
|
||||
":api",
|
||||
"../../api:array_view",
|
||||
"../../api/audio:audio_frame_api",
|
||||
"../../common_audio",
|
||||
"../../common_audio:common_audio_c",
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
#include <array>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "common_audio/channel_buffer.h"
|
||||
#include "common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
@ -27,16 +28,10 @@ constexpr size_t kTwoBandFilterSamplesPerFrame = 320;
|
|||
SplittingFilter::SplittingFilter(size_t num_channels,
|
||||
size_t num_bands,
|
||||
size_t num_frames)
|
||||
: num_bands_(num_bands) {
|
||||
: num_bands_(num_bands),
|
||||
two_bands_states_(num_bands_ == 2 ? num_channels : 0),
|
||||
three_band_filter_banks_(num_bands_ == 3 ? num_channels : 0) {
|
||||
RTC_CHECK(num_bands_ == 2 || num_bands_ == 3);
|
||||
if (num_bands_ == 2) {
|
||||
two_bands_states_.resize(num_channels);
|
||||
} else if (num_bands_ == 3) {
|
||||
for (size_t i = 0; i < num_channels; ++i) {
|
||||
three_band_filter_banks_.push_back(std::unique_ptr<ThreeBandFilterBank>(
|
||||
new ThreeBandFilterBank(num_frames)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SplittingFilter::~SplittingFilter() = default;
|
||||
|
@ -105,18 +100,44 @@ void SplittingFilter::TwoBandsSynthesis(const ChannelBuffer<float>* bands,
|
|||
void SplittingFilter::ThreeBandsAnalysis(const ChannelBuffer<float>* data,
|
||||
ChannelBuffer<float>* bands) {
|
||||
RTC_DCHECK_EQ(three_band_filter_banks_.size(), data->num_channels());
|
||||
RTC_DCHECK_LE(data->num_channels(), three_band_filter_banks_.size());
|
||||
RTC_DCHECK_LE(data->num_channels(), bands->num_channels());
|
||||
RTC_DCHECK_EQ(data->num_frames(), ThreeBandFilterBank::kFullBandSize);
|
||||
RTC_DCHECK_EQ(bands->num_frames(), ThreeBandFilterBank::kFullBandSize);
|
||||
RTC_DCHECK_EQ(bands->num_bands(), ThreeBandFilterBank::kNumBands);
|
||||
RTC_DCHECK_EQ(bands->num_frames_per_band(),
|
||||
ThreeBandFilterBank::kSplitBandSize);
|
||||
|
||||
for (size_t i = 0; i < three_band_filter_banks_.size(); ++i) {
|
||||
three_band_filter_banks_[i]->Analysis(data->channels()[i],
|
||||
data->num_frames(), bands->bands(i));
|
||||
three_band_filter_banks_[i].Analysis(
|
||||
rtc::ArrayView<const float, ThreeBandFilterBank::kFullBandSize>(
|
||||
data->channels_view()[i].data(),
|
||||
ThreeBandFilterBank::kFullBandSize),
|
||||
rtc::ArrayView<const rtc::ArrayView<float>,
|
||||
ThreeBandFilterBank::kNumBands>(
|
||||
bands->bands_view(i).data(), ThreeBandFilterBank::kNumBands));
|
||||
}
|
||||
}
|
||||
|
||||
void SplittingFilter::ThreeBandsSynthesis(const ChannelBuffer<float>* bands,
|
||||
ChannelBuffer<float>* data) {
|
||||
RTC_DCHECK_LE(data->num_channels(), three_band_filter_banks_.size());
|
||||
RTC_DCHECK_LE(data->num_channels(), bands->num_channels());
|
||||
RTC_DCHECK_LE(data->num_channels(), three_band_filter_banks_.size());
|
||||
RTC_DCHECK_EQ(data->num_frames(), ThreeBandFilterBank::kFullBandSize);
|
||||
RTC_DCHECK_EQ(bands->num_frames(), ThreeBandFilterBank::kFullBandSize);
|
||||
RTC_DCHECK_EQ(bands->num_bands(), ThreeBandFilterBank::kNumBands);
|
||||
RTC_DCHECK_EQ(bands->num_frames_per_band(),
|
||||
ThreeBandFilterBank::kSplitBandSize);
|
||||
|
||||
for (size_t i = 0; i < data->num_channels(); ++i) {
|
||||
three_band_filter_banks_[i]->Synthesis(
|
||||
bands->bands(i), bands->num_frames_per_band(), data->channels()[i]);
|
||||
three_band_filter_banks_[i].Synthesis(
|
||||
rtc::ArrayView<const rtc::ArrayView<float>,
|
||||
ThreeBandFilterBank::kNumBands>(
|
||||
bands->bands_view(i).data(), ThreeBandFilterBank::kNumBands),
|
||||
rtc::ArrayView<float, ThreeBandFilterBank::kFullBandSize>(
|
||||
data->channels_view()[i].data(),
|
||||
ThreeBandFilterBank::kFullBandSize));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -64,7 +64,7 @@ class SplittingFilter {
|
|||
|
||||
const size_t num_bands_;
|
||||
std::vector<TwoBandsStates> two_bands_states_;
|
||||
std::vector<std::unique_ptr<ThreeBandFilterBank>> three_band_filter_banks_;
|
||||
std::vector<ThreeBandFilterBank> three_band_filter_banks_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
|
@ -30,37 +30,33 @@
|
|||
//
|
||||
// A similar logic can be applied to the synthesis stage.
|
||||
|
||||
// MSVC++ requires this to be set before any other includes to get M_PI.
|
||||
#define _USE_MATH_DEFINES
|
||||
|
||||
#include "modules/audio_processing/three_band_filter_bank.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <array>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
const size_t kNumBands = 3;
|
||||
const size_t kSparsity = 4;
|
||||
|
||||
// Factors to take into account when choosing |kNumCoeffs|:
|
||||
// 1. Higher |kNumCoeffs|, means faster transition, which ensures less
|
||||
// Factors to take into account when choosing |kFilterSize|:
|
||||
// 1. Higher |kFilterSize|, means faster transition, which ensures less
|
||||
// aliasing. This is especially important when there is non-linear
|
||||
// processing between the splitting and merging.
|
||||
// 2. The delay that this filter bank introduces is
|
||||
// |kNumBands| * |kSparsity| * |kNumCoeffs| / 2, so it increases linearly
|
||||
// with |kNumCoeffs|.
|
||||
// 3. The computation complexity also increases linearly with |kNumCoeffs|.
|
||||
const size_t kNumCoeffs = 4;
|
||||
// |kNumBands| * |kSparsity| * |kFilterSize| / 2, so it increases linearly
|
||||
// with |kFilterSize|.
|
||||
// 3. The computation complexity also increases linearly with |kFilterSize|.
|
||||
|
||||
// The Matlab code to generate these |kLowpassCoeffs| is:
|
||||
// The Matlab code to generate these |kFilterCoeffs| is:
|
||||
//
|
||||
// N = kNumBands * kSparsity * kNumCoeffs - 1;
|
||||
// N = kNumBands * kSparsity * kFilterSize - 1;
|
||||
// h = fir1(N, 1 / (2 * kNumBands), kaiser(N + 1, 3.5));
|
||||
// reshape(h, kNumBands * kSparsity, kNumCoeffs);
|
||||
// reshape(h, kNumBands * kSparsity, kFilterSize);
|
||||
//
|
||||
// The code below uses the values of kFilterSize, kNumBands and kSparsity
|
||||
// specified in the header.
|
||||
|
||||
// Because the total bandwidth of the lower and higher band is double the middle
|
||||
// one (because of the spectrum parity), the low-pass prototype is half the
|
||||
// bandwidth of 1 / (2 * |kNumBands|) and is then shifted with cosine modulation
|
||||
|
@ -68,39 +64,84 @@ const size_t kNumCoeffs = 4;
|
|||
// A Kaiser window is used because of its flexibility and the alpha is set to
|
||||
// 3.5, since that sets a stop band attenuation of 40dB ensuring a fast
|
||||
// transition.
|
||||
const float kLowpassCoeffs[kNumBands * kSparsity][kNumCoeffs] = {
|
||||
{-0.00047749f, -0.00496888f, +0.16547118f, +0.00425496f},
|
||||
{-0.00173287f, -0.01585778f, +0.14989004f, +0.00994113f},
|
||||
{-0.00304815f, -0.02536082f, +0.12154542f, +0.01157993f},
|
||||
{-0.00383509f, -0.02982767f, +0.08543175f, +0.00983212f},
|
||||
{-0.00346946f, -0.02587886f, +0.04760441f, +0.00607594f},
|
||||
{-0.00154717f, -0.01136076f, +0.01387458f, +0.00186353f},
|
||||
{+0.00186353f, +0.01387458f, -0.01136076f, -0.00154717f},
|
||||
{+0.00607594f, +0.04760441f, -0.02587886f, -0.00346946f},
|
||||
{+0.00983212f, +0.08543175f, -0.02982767f, -0.00383509f},
|
||||
{+0.01157993f, +0.12154542f, -0.02536082f, -0.00304815f},
|
||||
{+0.00994113f, +0.14989004f, -0.01585778f, -0.00173287f},
|
||||
{+0.00425496f, +0.16547118f, -0.00496888f, -0.00047749f}};
|
||||
|
||||
// Downsamples |in| into |out|, taking one every |kNumbands| starting from
|
||||
// |offset|. |split_length| is the |out| length. |in| has to be at least
|
||||
// |kNumBands| * |split_length| long.
|
||||
void Downsample(const float* in,
|
||||
size_t split_length,
|
||||
size_t offset,
|
||||
float* out) {
|
||||
for (size_t i = 0; i < split_length; ++i) {
|
||||
out[i] = in[kNumBands * i + offset];
|
||||
}
|
||||
}
|
||||
constexpr int kSubSampling = ThreeBandFilterBank::kNumBands;
|
||||
constexpr int kDctSize = ThreeBandFilterBank::kNumBands;
|
||||
static_assert(ThreeBandFilterBank::kNumBands *
|
||||
ThreeBandFilterBank::kSplitBandSize ==
|
||||
ThreeBandFilterBank::kFullBandSize,
|
||||
"The full band must be split in equally sized subbands");
|
||||
|
||||
// Upsamples |in| into |out|, scaling by |kNumBands| and accumulating it every
|
||||
// |kNumBands| starting from |offset|. |split_length| is the |in| length. |out|
|
||||
// has to be at least |kNumBands| * |split_length| long.
|
||||
void Upsample(const float* in, size_t split_length, size_t offset, float* out) {
|
||||
for (size_t i = 0; i < split_length; ++i) {
|
||||
out[kNumBands * i + offset] += kNumBands * in[i];
|
||||
const float
|
||||
kFilterCoeffs[ThreeBandFilterBank::kNumNonZeroFilters][kFilterSize] = {
|
||||
{-0.00047749f, -0.00496888f, +0.16547118f, +0.00425496f},
|
||||
{-0.00173287f, -0.01585778f, +0.14989004f, +0.00994113f},
|
||||
{-0.00304815f, -0.02536082f, +0.12154542f, +0.01157993f},
|
||||
{-0.00346946f, -0.02587886f, +0.04760441f, +0.00607594f},
|
||||
{-0.00154717f, -0.01136076f, +0.01387458f, +0.00186353f},
|
||||
{+0.00186353f, +0.01387458f, -0.01136076f, -0.00154717f},
|
||||
{+0.00607594f, +0.04760441f, -0.02587886f, -0.00346946f},
|
||||
{+0.00983212f, +0.08543175f, -0.02982767f, -0.00383509f},
|
||||
{+0.00994113f, +0.14989004f, -0.01585778f, -0.00173287f},
|
||||
{+0.00425496f, +0.16547118f, -0.00496888f, -0.00047749f}};
|
||||
|
||||
constexpr int kZeroFilterIndex1 = 3;
|
||||
constexpr int kZeroFilterIndex2 = 9;
|
||||
|
||||
const float kDctModulation[ThreeBandFilterBank::kNumNonZeroFilters][kDctSize] =
|
||||
{{2.f, 2.f, 2.f},
|
||||
{1.73205077f, 0.f, -1.73205077f},
|
||||
{1.f, -2.f, 1.f},
|
||||
{-1.f, 2.f, -1.f},
|
||||
{-1.73205077f, 0.f, 1.73205077f},
|
||||
{-2.f, -2.f, -2.f},
|
||||
{-1.73205077f, 0.f, 1.73205077f},
|
||||
{-1.f, 2.f, -1.f},
|
||||
{1.f, -2.f, 1.f},
|
||||
{1.73205077f, 0.f, -1.73205077f}};
|
||||
|
||||
// Filters the input signal |in| with the filter |filter| using a shift by
|
||||
// |in_shift|, taking into account the previous state.
|
||||
void FilterCore(
|
||||
rtc::ArrayView<const float, kFilterSize> filter,
|
||||
rtc::ArrayView<const float, ThreeBandFilterBank::kSplitBandSize> in,
|
||||
const int in_shift,
|
||||
rtc::ArrayView<float, ThreeBandFilterBank::kSplitBandSize> out,
|
||||
rtc::ArrayView<float, kMemorySize> state) {
|
||||
constexpr int kMaxInShift = (kStride - 1);
|
||||
RTC_DCHECK_GE(in_shift, 0);
|
||||
RTC_DCHECK_LE(in_shift, kMaxInShift);
|
||||
std::fill(out.begin(), out.end(), 0.f);
|
||||
|
||||
for (int k = 0; k < in_shift; ++k) {
|
||||
for (int i = 0, j = kMemorySize + k - in_shift; i < kFilterSize;
|
||||
++i, j -= kStride) {
|
||||
out[k] += state[j] * filter[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (int k = in_shift, shift = 0; k < kFilterSize * kStride; ++k, ++shift) {
|
||||
RTC_DCHECK_GE(shift, 0);
|
||||
const int loop_limit = std::min(kFilterSize, 1 + (shift >> kStrideLog2));
|
||||
for (int i = 0, j = shift; i < loop_limit; ++i, j -= kStride) {
|
||||
out[k] += in[j] * filter[i];
|
||||
}
|
||||
for (int i = loop_limit, j = kMemorySize + shift - loop_limit * kStride;
|
||||
i < kFilterSize; ++i, j -= kStride) {
|
||||
out[k] += state[j] * filter[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (int k = kFilterSize * kStride, shift = kFilterSize * kStride - in_shift;
|
||||
k < ThreeBandFilterBank::kSplitBandSize; ++k, ++shift) {
|
||||
for (int i = 0, j = shift; i < kFilterSize; ++i, j -= kStride) {
|
||||
out[k] += in[j] * filter[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Update current state.
|
||||
std::copy(in.begin() + ThreeBandFilterBank::kSplitBandSize - kMemorySize,
|
||||
in.end(), state.begin());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -108,26 +149,15 @@ void Upsample(const float* in, size_t split_length, size_t offset, float* out) {
|
|||
// Because the low-pass filter prototype has half bandwidth it is possible to
|
||||
// use a DCT to shift it in both directions at the same time, to the center
|
||||
// frequencies [1 / 12, 3 / 12, 5 / 12].
|
||||
ThreeBandFilterBank::ThreeBandFilterBank(size_t length)
|
||||
: in_buffer_(rtc::CheckedDivExact(length, kNumBands)),
|
||||
out_buffer_(in_buffer_.size()) {
|
||||
for (size_t i = 0; i < kSparsity; ++i) {
|
||||
for (size_t j = 0; j < kNumBands; ++j) {
|
||||
analysis_filters_.push_back(
|
||||
std::unique_ptr<SparseFIRFilter>(new SparseFIRFilter(
|
||||
kLowpassCoeffs[i * kNumBands + j], kNumCoeffs, kSparsity, i)));
|
||||
synthesis_filters_.push_back(
|
||||
std::unique_ptr<SparseFIRFilter>(new SparseFIRFilter(
|
||||
kLowpassCoeffs[i * kNumBands + j], kNumCoeffs, kSparsity, i)));
|
||||
}
|
||||
}
|
||||
dct_modulation_.resize(kNumBands * kSparsity);
|
||||
for (size_t i = 0; i < dct_modulation_.size(); ++i) {
|
||||
dct_modulation_[i].resize(kNumBands);
|
||||
for (size_t j = 0; j < kNumBands; ++j) {
|
||||
dct_modulation_[i][j] =
|
||||
2.f * cos(2.f * M_PI * i * (2.f * j + 1.f) / dct_modulation_.size());
|
||||
}
|
||||
ThreeBandFilterBank::ThreeBandFilterBank() {
|
||||
RTC_DCHECK_EQ(state_analysis_.size(), kNumNonZeroFilters);
|
||||
RTC_DCHECK_EQ(state_synthesis_.size(), kNumNonZeroFilters);
|
||||
for (int k = 0; k < kNumNonZeroFilters; ++k) {
|
||||
RTC_DCHECK_EQ(state_analysis_[k].size(), kMemorySize);
|
||||
RTC_DCHECK_EQ(state_synthesis_[k].size(), kMemorySize);
|
||||
|
||||
state_analysis_[k].fill(0.f);
|
||||
state_synthesis_[k].fill(0.f);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -139,20 +169,52 @@ ThreeBandFilterBank::~ThreeBandFilterBank() = default;
|
|||
// decomposition of the low-pass prototype filter and upsampled by a factor
|
||||
// of |kSparsity|.
|
||||
// 3. Modulating with cosines and accumulating to get the desired band.
|
||||
void ThreeBandFilterBank::Analysis(const float* in,
|
||||
size_t length,
|
||||
float* const* out) {
|
||||
RTC_CHECK_EQ(in_buffer_.size(), rtc::CheckedDivExact(length, kNumBands));
|
||||
for (size_t i = 0; i < kNumBands; ++i) {
|
||||
memset(out[i], 0, in_buffer_.size() * sizeof(*out[i]));
|
||||
void ThreeBandFilterBank::Analysis(
|
||||
rtc::ArrayView<const float, kFullBandSize> in,
|
||||
rtc::ArrayView<const rtc::ArrayView<float>, ThreeBandFilterBank::kNumBands>
|
||||
out) {
|
||||
// Initialize the output to zero.
|
||||
for (int band = 0; band < ThreeBandFilterBank::kNumBands; ++band) {
|
||||
RTC_DCHECK_EQ(out[band].size(), kSplitBandSize);
|
||||
std::fill(out[band].begin(), out[band].end(), 0);
|
||||
}
|
||||
for (size_t i = 0; i < kNumBands; ++i) {
|
||||
Downsample(in, in_buffer_.size(), kNumBands - i - 1, &in_buffer_[0]);
|
||||
for (size_t j = 0; j < kSparsity; ++j) {
|
||||
const size_t offset = i + j * kNumBands;
|
||||
analysis_filters_[offset]->Filter(&in_buffer_[0], in_buffer_.size(),
|
||||
&out_buffer_[0]);
|
||||
DownModulate(&out_buffer_[0], out_buffer_.size(), offset, out);
|
||||
|
||||
for (int downsampling_index = 0; downsampling_index < kSubSampling;
|
||||
++downsampling_index) {
|
||||
// Downsample to form the filter input.
|
||||
std::array<float, kSplitBandSize> in_subsampled;
|
||||
for (int k = 0; k < kSplitBandSize; ++k) {
|
||||
in_subsampled[k] =
|
||||
in[(kSubSampling - 1) - downsampling_index + kSubSampling * k];
|
||||
}
|
||||
|
||||
for (int in_shift = 0; in_shift < kStride; ++in_shift) {
|
||||
// Choose filter, skip zero filters.
|
||||
const int index = downsampling_index + in_shift * kSubSampling;
|
||||
if (index == kZeroFilterIndex1 || index == kZeroFilterIndex2) {
|
||||
continue;
|
||||
}
|
||||
const int filter_index =
|
||||
index < kZeroFilterIndex1
|
||||
? index
|
||||
: (index < kZeroFilterIndex2 ? index - 1 : index - 2);
|
||||
|
||||
rtc::ArrayView<const float, kFilterSize> filter(
|
||||
kFilterCoeffs[filter_index]);
|
||||
rtc::ArrayView<const float, kDctSize> dct_modulation(
|
||||
kDctModulation[filter_index]);
|
||||
rtc::ArrayView<float, kMemorySize> state(state_analysis_[filter_index]);
|
||||
|
||||
// Filter.
|
||||
std::array<float, kSplitBandSize> out_subsampled;
|
||||
FilterCore(filter, in_subsampled, in_shift, out_subsampled, state);
|
||||
|
||||
// Band and modulate the output.
|
||||
for (int band = 0; band < ThreeBandFilterBank::kNumBands; ++band) {
|
||||
for (int n = 0; n < kSplitBandSize; ++n) {
|
||||
out[band][n] += dct_modulation[band] * out_subsampled[n];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -163,49 +225,50 @@ void ThreeBandFilterBank::Analysis(const float* in,
|
|||
// prototype filter upsampled by a factor of |kSparsity| and accumulating
|
||||
// |kSparsity| signals with different delays.
|
||||
// 3. Parallel to serial upsampling by a factor of |kNumBands|.
|
||||
void ThreeBandFilterBank::Synthesis(const float* const* in,
|
||||
size_t split_length,
|
||||
float* out) {
|
||||
RTC_CHECK_EQ(in_buffer_.size(), split_length);
|
||||
memset(out, 0, kNumBands * in_buffer_.size() * sizeof(*out));
|
||||
for (size_t i = 0; i < kNumBands; ++i) {
|
||||
for (size_t j = 0; j < kSparsity; ++j) {
|
||||
const size_t offset = i + j * kNumBands;
|
||||
UpModulate(in, in_buffer_.size(), offset, &in_buffer_[0]);
|
||||
synthesis_filters_[offset]->Filter(&in_buffer_[0], in_buffer_.size(),
|
||||
&out_buffer_[0]);
|
||||
Upsample(&out_buffer_[0], out_buffer_.size(), i, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
void ThreeBandFilterBank::Synthesis(
|
||||
rtc::ArrayView<const rtc::ArrayView<float>, ThreeBandFilterBank::kNumBands>
|
||||
in,
|
||||
rtc::ArrayView<float, kFullBandSize> out) {
|
||||
std::fill(out.begin(), out.end(), 0);
|
||||
for (int upsampling_index = 0; upsampling_index < kSubSampling;
|
||||
++upsampling_index) {
|
||||
for (int in_shift = 0; in_shift < kStride; ++in_shift) {
|
||||
// Choose filter, skip zero filters.
|
||||
const int index = upsampling_index + in_shift * kSubSampling;
|
||||
if (index == kZeroFilterIndex1 || index == kZeroFilterIndex2) {
|
||||
continue;
|
||||
}
|
||||
const int filter_index =
|
||||
index < kZeroFilterIndex1
|
||||
? index
|
||||
: (index < kZeroFilterIndex2 ? index - 1 : index - 2);
|
||||
|
||||
// Modulates |in| by |dct_modulation_| and accumulates it in each of the
|
||||
// |kNumBands| bands of |out|. |offset| is the index in the period of the
|
||||
// cosines used for modulation. |split_length| is the length of |in| and each
|
||||
// band of |out|.
|
||||
void ThreeBandFilterBank::DownModulate(const float* in,
|
||||
size_t split_length,
|
||||
size_t offset,
|
||||
float* const* out) {
|
||||
for (size_t i = 0; i < kNumBands; ++i) {
|
||||
for (size_t j = 0; j < split_length; ++j) {
|
||||
out[i][j] += dct_modulation_[offset][i] * in[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
rtc::ArrayView<const float, kFilterSize> filter(
|
||||
kFilterCoeffs[filter_index]);
|
||||
rtc::ArrayView<const float, kDctSize> dct_modulation(
|
||||
kDctModulation[filter_index]);
|
||||
rtc::ArrayView<float, kMemorySize> state(state_synthesis_[filter_index]);
|
||||
|
||||
// Modulates each of the |kNumBands| bands of |in| by |dct_modulation_| and
|
||||
// accumulates them in |out|. |out| is cleared before starting to accumulate.
|
||||
// |offset| is the index in the period of the cosines used for modulation.
|
||||
// |split_length| is the length of each band of |in| and |out|.
|
||||
void ThreeBandFilterBank::UpModulate(const float* const* in,
|
||||
size_t split_length,
|
||||
size_t offset,
|
||||
float* out) {
|
||||
memset(out, 0, split_length * sizeof(*out));
|
||||
for (size_t i = 0; i < kNumBands; ++i) {
|
||||
for (size_t j = 0; j < split_length; ++j) {
|
||||
out[j] += dct_modulation_[offset][i] * in[i][j];
|
||||
// Prepare filter input by modulating the banded input.
|
||||
std::array<float, kSplitBandSize> in_subsampled;
|
||||
std::fill(in_subsampled.begin(), in_subsampled.end(), 0.f);
|
||||
for (int band = 0; band < ThreeBandFilterBank::kNumBands; ++band) {
|
||||
RTC_DCHECK_EQ(in[band].size(), kSplitBandSize);
|
||||
for (int n = 0; n < kSplitBandSize; ++n) {
|
||||
in_subsampled[n] += dct_modulation[band] * in[band][n];
|
||||
}
|
||||
}
|
||||
|
||||
// Filter.
|
||||
std::array<float, kSplitBandSize> out_subsampled;
|
||||
FilterCore(filter, in_subsampled, in_shift, out_subsampled, state);
|
||||
|
||||
// Upsample.
|
||||
constexpr float kUpsamplingScaling = kSubSampling;
|
||||
for (int k = 0; k < kSplitBandSize; ++k) {
|
||||
out[upsampling_index + kSubSampling * k] +=
|
||||
kUpsamplingScaling * out_subsampled[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,14 +11,25 @@
|
|||
#ifndef MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_
|
||||
#define MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common_audio/sparse_fir_filter.h"
|
||||
#include "api/array_view.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
constexpr int kSparsity = 4;
|
||||
constexpr int kStrideLog2 = 2;
|
||||
constexpr int kStride = 1 << kStrideLog2;
|
||||
constexpr int kNumZeroFilters = 2;
|
||||
constexpr int kFilterSize = 4;
|
||||
constexpr int kMemorySize = kFilterSize * kStride - 1;
|
||||
static_assert(kMemorySize == 15,
|
||||
"The memory size must be sufficient to provide memory for the "
|
||||
"shifted filters");
|
||||
|
||||
// An implementation of a 3-band FIR filter-bank with DCT modulation, similar to
|
||||
// the proposed in "Multirate Signal Processing for Communication Systems" by
|
||||
// Fredric J Harris.
|
||||
|
@ -34,34 +45,31 @@ namespace webrtc {
|
|||
// depending on the input signal after compensating for the delay.
|
||||
class ThreeBandFilterBank final {
|
||||
public:
|
||||
explicit ThreeBandFilterBank(size_t length);
|
||||
static const int kNumBands = 3;
|
||||
static const int kFullBandSize = 480;
|
||||
static const int kSplitBandSize =
|
||||
ThreeBandFilterBank::kFullBandSize / ThreeBandFilterBank::kNumBands;
|
||||
static const int kNumNonZeroFilters =
|
||||
kSparsity * ThreeBandFilterBank::kNumBands - kNumZeroFilters;
|
||||
|
||||
ThreeBandFilterBank();
|
||||
~ThreeBandFilterBank();
|
||||
|
||||
// Splits |in| into 3 downsampled frequency bands in |out|.
|
||||
// |length| is the |in| length. Each of the 3 bands of |out| has to have a
|
||||
// length of |length| / 3.
|
||||
void Analysis(const float* in, size_t length, float* const* out);
|
||||
// Splits |in| of size kFullBandSize into 3 downsampled frequency bands in
|
||||
// |out|, each of size 160.
|
||||
void Analysis(rtc::ArrayView<const float, kFullBandSize> in,
|
||||
rtc::ArrayView<const rtc::ArrayView<float>, kNumBands> out);
|
||||
|
||||
// Merges the 3 downsampled frequency bands in |in| into |out|.
|
||||
// |split_length| is the length of each band of |in|. |out| has to have at
|
||||
// least a length of 3 * |split_length|.
|
||||
void Synthesis(const float* const* in, size_t split_length, float* out);
|
||||
// Merges the 3 downsampled frequency bands in |in|, each of size 160, into
|
||||
// |out|, which is of size kFullBandSize.
|
||||
void Synthesis(rtc::ArrayView<const rtc::ArrayView<float>, kNumBands> in,
|
||||
rtc::ArrayView<float, kFullBandSize> out);
|
||||
|
||||
private:
|
||||
void DownModulate(const float* in,
|
||||
size_t split_length,
|
||||
size_t offset,
|
||||
float* const* out);
|
||||
void UpModulate(const float* const* in,
|
||||
size_t split_length,
|
||||
size_t offset,
|
||||
float* out);
|
||||
|
||||
std::vector<float> in_buffer_;
|
||||
std::vector<float> out_buffer_;
|
||||
std::vector<std::unique_ptr<SparseFIRFilter>> analysis_filters_;
|
||||
std::vector<std::unique_ptr<SparseFIRFilter>> synthesis_filters_;
|
||||
std::vector<std::vector<float>> dct_modulation_;
|
||||
std::array<std::array<float, kMemorySize>, kNumNonZeroFilters>
|
||||
state_analysis_;
|
||||
std::array<std::array<float, kMemorySize>, kNumNonZeroFilters>
|
||||
state_synthesis_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
Loading…
Reference in a new issue