mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-14 06:10:40 +01:00

Bug: webrtc:8671 Change-Id: I2431b1dd8dbe35fc8742c0640c3b35166e8ef6b7 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/171480 Reviewed-by: Sam Zackrisson <saza@webrtc.org> Commit-Queue: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#30876}
186 lines
6.6 KiB
C++
186 lines
6.6 KiB
C++
/*
|
|
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "modules/audio_processing/aec3/comfort_noise_generator.h"
|
|
|
|
// Defines WEBRTC_ARCH_X86_FAMILY, used below.
|
|
#include "rtc_base/system/arch.h"
|
|
|
|
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
|
#include <emmintrin.h>
|
|
#endif
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <cmath>
|
|
#include <cstdint>
|
|
#include <functional>
|
|
#include <numeric>
|
|
|
|
#include "common_audio/signal_processing/include/signal_processing_library.h"
|
|
#include "modules/audio_processing/aec3/vector_math.h"
|
|
#include "rtc_base/checks.h"
|
|
|
|
namespace webrtc {
|
|
|
|
namespace {
|
|
|
|
// Computes the noise floor value that matches a WGN input of noise_floor_dbfs.
|
|
float GetNoiseFloorFactor(float noise_floor_dbfs) {
|
|
// kdBfsNormalization = 20.f*log10(32768.f).
|
|
constexpr float kdBfsNormalization = 90.30899869919436f;
|
|
return 64.f * powf(10.f, (kdBfsNormalization + noise_floor_dbfs) * 0.1f);
|
|
}
|
|
|
|
// Table of sqrt(2) * sin(2*pi*i/32).
|
|
constexpr float kSqrt2Sin[32] = {
|
|
+0.0000000f, +0.2758994f, +0.5411961f, +0.7856950f, +1.0000000f,
|
|
+1.1758756f, +1.3065630f, +1.3870398f, +1.4142136f, +1.3870398f,
|
|
+1.3065630f, +1.1758756f, +1.0000000f, +0.7856950f, +0.5411961f,
|
|
+0.2758994f, +0.0000000f, -0.2758994f, -0.5411961f, -0.7856950f,
|
|
-1.0000000f, -1.1758756f, -1.3065630f, -1.3870398f, -1.4142136f,
|
|
-1.3870398f, -1.3065630f, -1.1758756f, -1.0000000f, -0.7856950f,
|
|
-0.5411961f, -0.2758994f};
|
|
|
|
void GenerateComfortNoise(Aec3Optimization optimization,
|
|
const std::array<float, kFftLengthBy2Plus1>& N2,
|
|
uint32_t* seed,
|
|
FftData* lower_band_noise,
|
|
FftData* upper_band_noise) {
|
|
FftData* N_low = lower_band_noise;
|
|
FftData* N_high = upper_band_noise;
|
|
|
|
// Compute square root spectrum.
|
|
std::array<float, kFftLengthBy2Plus1> N;
|
|
std::copy(N2.begin(), N2.end(), N.begin());
|
|
aec3::VectorMath(optimization).Sqrt(N);
|
|
|
|
// Compute the noise level for the upper bands.
|
|
constexpr float kOneByNumBands = 1.f / (kFftLengthBy2Plus1 / 2 + 1);
|
|
constexpr int kFftLengthBy2Plus1By2 = kFftLengthBy2Plus1 / 2;
|
|
const float high_band_noise_level =
|
|
std::accumulate(N.begin() + kFftLengthBy2Plus1By2, N.end(), 0.f) *
|
|
kOneByNumBands;
|
|
|
|
// The analysis and synthesis windowing cause loss of power when
|
|
// cross-fading the noise where frames are completely uncorrelated
|
|
// (generated with random phase), hence the factor sqrt(2).
|
|
// This is not the case for the speech signal where the input is overlapping
|
|
// (strong correlation).
|
|
N_low->re[0] = N_low->re[kFftLengthBy2] = N_high->re[0] =
|
|
N_high->re[kFftLengthBy2] = 0.f;
|
|
for (size_t k = 1; k < kFftLengthBy2; k++) {
|
|
constexpr int kIndexMask = 32 - 1;
|
|
// Generate a random 31-bit integer.
|
|
seed[0] = (seed[0] * 69069 + 1) & (0x80000000 - 1);
|
|
// Convert to a 5-bit index.
|
|
int i = seed[0] >> 26;
|
|
|
|
// y = sqrt(2) * sin(a)
|
|
const float x = kSqrt2Sin[i];
|
|
// x = sqrt(2) * cos(a) = sqrt(2) * sin(a + pi/2)
|
|
const float y = kSqrt2Sin[(i + 8) & kIndexMask];
|
|
|
|
// Form low-frequency noise via spectral shaping.
|
|
N_low->re[k] = N[k] * x;
|
|
N_low->im[k] = N[k] * y;
|
|
|
|
// Form the high-frequency noise via simple levelling.
|
|
N_high->re[k] = high_band_noise_level * x;
|
|
N_high->im[k] = high_band_noise_level * y;
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
ComfortNoiseGenerator::ComfortNoiseGenerator(const EchoCanceller3Config& config,
|
|
Aec3Optimization optimization,
|
|
size_t num_capture_channels)
|
|
: optimization_(optimization),
|
|
seed_(42),
|
|
num_capture_channels_(num_capture_channels),
|
|
noise_floor_(GetNoiseFloorFactor(config.comfort_noise.noise_floor_dbfs)),
|
|
N2_initial_(
|
|
std::make_unique<std::vector<std::array<float, kFftLengthBy2Plus1>>>(
|
|
num_capture_channels_)),
|
|
Y2_smoothed_(num_capture_channels_),
|
|
N2_(num_capture_channels_) {
|
|
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
|
(*N2_initial_)[ch].fill(0.f);
|
|
Y2_smoothed_[ch].fill(0.f);
|
|
N2_[ch].fill(1.0e6f);
|
|
}
|
|
}
|
|
|
|
ComfortNoiseGenerator::~ComfortNoiseGenerator() = default;
|
|
|
|
void ComfortNoiseGenerator::Compute(
|
|
bool saturated_capture,
|
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
|
capture_spectrum,
|
|
rtc::ArrayView<FftData> lower_band_noise,
|
|
rtc::ArrayView<FftData> upper_band_noise) {
|
|
const auto& Y2 = capture_spectrum;
|
|
|
|
if (!saturated_capture) {
|
|
// Smooth Y2.
|
|
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
|
std::transform(Y2_smoothed_[ch].begin(), Y2_smoothed_[ch].end(),
|
|
Y2[ch].begin(), Y2_smoothed_[ch].begin(),
|
|
[](float a, float b) { return a + 0.1f * (b - a); });
|
|
}
|
|
|
|
if (N2_counter_ > 50) {
|
|
// Update N2 from Y2_smoothed.
|
|
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
|
std::transform(N2_[ch].begin(), N2_[ch].end(), Y2_smoothed_[ch].begin(),
|
|
N2_[ch].begin(), [](float a, float b) {
|
|
return b < a ? (0.9f * b + 0.1f * a) * 1.0002f
|
|
: a * 1.0002f;
|
|
});
|
|
}
|
|
}
|
|
|
|
if (N2_initial_) {
|
|
if (++N2_counter_ == 1000) {
|
|
N2_initial_.reset();
|
|
} else {
|
|
// Compute the N2_initial from N2.
|
|
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
|
std::transform(N2_[ch].begin(), N2_[ch].end(),
|
|
(*N2_initial_)[ch].begin(), (*N2_initial_)[ch].begin(),
|
|
[](float a, float b) {
|
|
return a > b ? b + 0.001f * (a - b) : a;
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
|
for (auto& n : N2_[ch]) {
|
|
n = std::max(n, noise_floor_);
|
|
}
|
|
if (N2_initial_) {
|
|
for (auto& n : (*N2_initial_)[ch]) {
|
|
n = std::max(n, noise_floor_);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Choose N2 estimate to use.
|
|
const auto& N2 = N2_initial_ ? (*N2_initial_) : N2_;
|
|
|
|
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
|
GenerateComfortNoise(optimization_, N2[ch], &seed_, &lower_band_noise[ch],
|
|
&upper_band_noise[ch]);
|
|
}
|
|
}
|
|
|
|
} // namespace webrtc
|