mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-12 21:30:45 +01:00
Removing all external access to the integer sample data in AudioBuffer
This CL removes all external access to the integer sample data in the AudioBuffer class. It also removes the API in AudioBuffer that provides this. The purpose of this is to pave the way for removing the sample duplicating and implicit conversions between integer and floating point sample formats which is done inside the AudioBuffer. Bug: webrtc:10882 Change-Id: I1438b691bcef98278aef8e3c63624c367c2d12e9 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/149162 Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org> Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org> Commit-Queue: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#28912}
This commit is contained in:
parent
93d4c10ffc
commit
928146f546
20 changed files with 298 additions and 124 deletions
|
@ -12,11 +12,21 @@
|
|||
|
||||
namespace webrtc {
|
||||
|
||||
void FloatToS16(const float* src, size_t size, int16_t* dest) {
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
dest[i] = FloatToS16(src[i]);
|
||||
}
|
||||
|
||||
void S16ToFloat(const int16_t* src, size_t size, float* dest) {
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
dest[i] = S16ToFloat(src[i]);
|
||||
}
|
||||
|
||||
void S16ToFloatS16(const int16_t* src, size_t size, float* dest) {
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
dest[i] = src[i];
|
||||
}
|
||||
|
||||
void FloatS16ToS16(const float* src, size_t size, int16_t* dest) {
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
dest[i] = FloatS16ToS16(src[i]);
|
||||
|
|
|
@ -43,6 +43,13 @@ static inline int16_t FloatS16ToS16(float v) {
|
|||
return static_cast<int16_t>(v + std::copysign(0.5f, v));
|
||||
}
|
||||
|
||||
static inline int16_t FloatToS16(float v) {
|
||||
v *= 32768.f;
|
||||
v = std::min(v, 32767.f);
|
||||
v = std::max(v, -32768.f);
|
||||
return static_cast<int16_t>(v + std::copysign(0.5f, v));
|
||||
}
|
||||
|
||||
static inline float FloatToFloatS16(float v) {
|
||||
v = std::min(v, 1.f);
|
||||
v = std::max(v, -1.f);
|
||||
|
@ -56,7 +63,9 @@ static inline float FloatS16ToFloat(float v) {
|
|||
return v * kScaling;
|
||||
}
|
||||
|
||||
void FloatToS16(const float* src, size_t size, int16_t* dest);
|
||||
void S16ToFloat(const int16_t* src, size_t size, float* dest);
|
||||
void S16ToFloatS16(const int16_t* src, size_t size, float* dest);
|
||||
void FloatS16ToS16(const float* src, size_t size, int16_t* dest);
|
||||
void FloatToFloatS16(const float* src, size_t size, float* dest);
|
||||
void FloatS16ToFloat(const float* src, size_t size, float* dest);
|
||||
|
|
|
@ -33,18 +33,6 @@ T DBFS(T x) {
|
|||
return 20 * std::log10(x);
|
||||
}
|
||||
|
||||
void FloatToS16(const float* src, size_t size, int16_t* dest) {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
RTC_DCHECK_GE(32767.f, src[i]);
|
||||
RTC_DCHECK_LE(-32768.f, src[i]);
|
||||
if (src[i] >= 1.f)
|
||||
dest[i] = 32767;
|
||||
if (src[i] <= -1.f)
|
||||
dest[i] = -32768;
|
||||
dest[i] = static_cast<int16_t>(src[i] * 32767.5f);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
class PushSincResamplerTest : public ::testing::TestWithParam<
|
||||
|
|
|
@ -19,6 +19,8 @@ rtc_source_set("agc") {
|
|||
":level_estimation",
|
||||
"..:apm_logging",
|
||||
"..:gain_control_interface",
|
||||
"../../../common_audio",
|
||||
"../../../common_audio:common_audio_c",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:gtest_prod",
|
||||
"../../../rtc_base:logging",
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include <cstdio>
|
||||
#endif
|
||||
|
||||
#include "common_audio/include/audio_util.h"
|
||||
#include "modules/audio_processing/agc/gain_map_internal.h"
|
||||
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.h"
|
||||
#include "modules/audio_processing/include/gain_control.h"
|
||||
|
@ -59,6 +60,10 @@ const int kMaxResidualGainChange = 15;
|
|||
// restrictions from clipping events.
|
||||
const int kSurplusCompressionGain = 6;
|
||||
|
||||
// Maximum number of channels and number of samples per channel supported.
|
||||
constexpr size_t kMaxNumSamplesPerChannel = 1920;
|
||||
constexpr size_t kMaxNumChannels = 4;
|
||||
|
||||
int ClampLevel(int mic_level) {
|
||||
return rtc::SafeClamp(mic_level, kMinMicLevel, kMaxMicLevel);
|
||||
}
|
||||
|
@ -220,7 +225,7 @@ int AgcManagerDirect::Initialize() {
|
|||
return InitializeGainControl(gctrl_, disable_digital_adaptive_);
|
||||
}
|
||||
|
||||
void AgcManagerDirect::AnalyzePreProcess(int16_t* audio,
|
||||
void AgcManagerDirect::AnalyzePreProcess(float* audio,
|
||||
int num_channels,
|
||||
size_t samples_per_channel) {
|
||||
size_t length = num_channels * samples_per_channel;
|
||||
|
@ -228,7 +233,19 @@ void AgcManagerDirect::AnalyzePreProcess(int16_t* audio,
|
|||
return;
|
||||
}
|
||||
|
||||
file_preproc_->Write(audio, length);
|
||||
std::array<int16_t, kMaxNumSamplesPerChannel * kMaxNumChannels> audio_data;
|
||||
int16_t* audio_fix;
|
||||
size_t safe_length;
|
||||
if (audio) {
|
||||
audio_fix = audio_data.data();
|
||||
safe_length = std::min(audio_data.size(), length);
|
||||
FloatS16ToS16(audio, length, audio_fix);
|
||||
} else {
|
||||
audio_fix = nullptr;
|
||||
safe_length = length;
|
||||
}
|
||||
|
||||
file_preproc_->Write(audio_fix, safe_length);
|
||||
|
||||
if (frames_since_clipped_ < kClippedWaitFrames) {
|
||||
++frames_since_clipped_;
|
||||
|
@ -244,7 +261,7 @@ void AgcManagerDirect::AnalyzePreProcess(int16_t* audio,
|
|||
// maximum. This harsh treatment is an effort to avoid repeated clipped echo
|
||||
// events. As compensation for this restriction, the maximum compression
|
||||
// gain is increased, through SetMaxLevel().
|
||||
float clipped_ratio = agc_->AnalyzePreproc(audio, length);
|
||||
float clipped_ratio = agc_->AnalyzePreproc(audio_fix, safe_length);
|
||||
if (clipped_ratio > kClippedRatioThreshold) {
|
||||
RTC_DLOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio="
|
||||
<< clipped_ratio;
|
||||
|
@ -263,15 +280,31 @@ void AgcManagerDirect::AnalyzePreProcess(int16_t* audio,
|
|||
}
|
||||
frames_since_clipped_ = 0;
|
||||
}
|
||||
|
||||
if (audio) {
|
||||
S16ToFloatS16(audio_fix, safe_length, audio);
|
||||
}
|
||||
}
|
||||
|
||||
void AgcManagerDirect::Process(const int16_t* audio,
|
||||
void AgcManagerDirect::Process(const float* audio,
|
||||
size_t length,
|
||||
int sample_rate_hz) {
|
||||
if (capture_muted_) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::array<int16_t, kMaxNumSamplesPerChannel * kMaxNumChannels> audio_data;
|
||||
const int16_t* audio_fix;
|
||||
size_t safe_length;
|
||||
if (audio) {
|
||||
audio_fix = audio_data.data();
|
||||
safe_length = std::min(audio_data.size(), length);
|
||||
FloatS16ToS16(audio, length, audio_data.data());
|
||||
} else {
|
||||
audio_fix = nullptr;
|
||||
safe_length = length;
|
||||
}
|
||||
|
||||
if (check_volume_on_next_process_) {
|
||||
check_volume_on_next_process_ = false;
|
||||
// We have to wait until the first process call to check the volume,
|
||||
|
@ -279,14 +312,14 @@ void AgcManagerDirect::Process(const int16_t* audio,
|
|||
CheckVolumeAndReset();
|
||||
}
|
||||
|
||||
agc_->Process(audio, length, sample_rate_hz);
|
||||
agc_->Process(audio_fix, safe_length, sample_rate_hz);
|
||||
|
||||
UpdateGain();
|
||||
if (!disable_digital_adaptive_) {
|
||||
UpdateCompressor();
|
||||
}
|
||||
|
||||
file_postproc_->Write(audio, length);
|
||||
file_postproc_->Write(audio_fix, safe_length);
|
||||
|
||||
data_dumper_->DumpRaw("experimental_gain_control_compression_gain_db", 1,
|
||||
&compression_);
|
||||
|
|
|
@ -56,10 +56,10 @@ class AgcManagerDirect final {
|
|||
~AgcManagerDirect();
|
||||
|
||||
int Initialize();
|
||||
void AnalyzePreProcess(int16_t* audio,
|
||||
void AnalyzePreProcess(float* audio,
|
||||
int num_channels,
|
||||
size_t samples_per_channel);
|
||||
void Process(const int16_t* audio, size_t length, int sample_rate_hz);
|
||||
void Process(const float* audio, size_t length, int sample_rate_hz);
|
||||
|
||||
// Call when the capture stream has been muted/unmuted. This causes the
|
||||
// manager to disregard all incoming audio; chances are good it's background
|
||||
|
|
|
@ -169,29 +169,11 @@ void AudioBuffer::InitForNewData() {
|
|||
}
|
||||
}
|
||||
|
||||
const int16_t* const* AudioBuffer::channels_const() const {
|
||||
return data_->ibuf_const()->channels();
|
||||
}
|
||||
|
||||
int16_t* const* AudioBuffer::channels() {
|
||||
return data_->ibuf()->channels();
|
||||
}
|
||||
|
||||
const int16_t* const* AudioBuffer::split_bands_const(size_t channel) const {
|
||||
return split_data_.get() ? split_data_->ibuf_const()->bands(channel)
|
||||
: data_->ibuf_const()->bands(channel);
|
||||
}
|
||||
|
||||
int16_t* const* AudioBuffer::split_bands(size_t channel) {
|
||||
return split_data_.get() ? split_data_->ibuf()->bands(channel)
|
||||
: data_->ibuf()->bands(channel);
|
||||
}
|
||||
|
||||
const int16_t* const* AudioBuffer::split_channels_const(Band band) const {
|
||||
const float* const* AudioBuffer::split_channels_const_f(Band band) const {
|
||||
if (split_data_.get()) {
|
||||
return split_data_->ibuf_const()->channels(band);
|
||||
return split_data_->fbuf_const()->channels(band);
|
||||
} else {
|
||||
return band == kBand0To8kHz ? data_->ibuf_const()->channels() : nullptr;
|
||||
return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -308,4 +290,29 @@ void AudioBuffer::MergeFrequencyBands() {
|
|||
splitting_filter_->Synthesis(split_data_.get(), data_.get());
|
||||
}
|
||||
|
||||
void AudioBuffer::CopySplitChannelDataTo(size_t channel,
|
||||
int16_t* const* split_band_data) {
|
||||
for (size_t k = 0; k < num_bands(); ++k) {
|
||||
const float* band_data = split_bands_f(channel)[k];
|
||||
RTC_DCHECK(split_band_data[k]);
|
||||
RTC_DCHECK(band_data);
|
||||
for (size_t i = 0; i < num_frames_per_band(); ++i) {
|
||||
split_band_data[k][i] = FloatS16ToS16(band_data[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AudioBuffer::CopySplitChannelDataFrom(
|
||||
size_t channel,
|
||||
const int16_t* const* split_band_data) {
|
||||
for (size_t k = 0; k < num_bands(); ++k) {
|
||||
float* band_data = split_bands_f(channel)[k];
|
||||
RTC_DCHECK(split_band_data[k]);
|
||||
RTC_DCHECK(band_data);
|
||||
for (size_t i = 0; i < num_frames_per_band(); ++i) {
|
||||
band_data[i] = split_band_data[k][i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
|
@ -52,8 +52,6 @@ class AudioBuffer {
|
|||
// Where:
|
||||
// 0 <= channel < |num_proc_channels_|
|
||||
// 0 <= sample < |proc_num_frames_|
|
||||
int16_t* const* channels();
|
||||
const int16_t* const* channels_const() const;
|
||||
float* const* channels_f();
|
||||
const float* const* channels_const_f() const;
|
||||
|
||||
|
@ -64,8 +62,6 @@ class AudioBuffer {
|
|||
// 0 <= channel < |num_proc_channels_|
|
||||
// 0 <= band < |num_bands_|
|
||||
// 0 <= sample < |num_split_frames_|
|
||||
int16_t* const* split_bands(size_t channel);
|
||||
const int16_t* const* split_bands_const(size_t channel) const;
|
||||
float* const* split_bands_f(size_t channel);
|
||||
const float* const* split_bands_const_f(size_t channel) const;
|
||||
|
||||
|
@ -76,7 +72,7 @@ class AudioBuffer {
|
|||
// 0 <= band < |num_bands_|
|
||||
// 0 <= channel < |num_proc_channels_|
|
||||
// 0 <= sample < |num_split_frames_|
|
||||
const int16_t* const* split_channels_const(Band band) const;
|
||||
const float* const* split_channels_const_f(Band band) const;
|
||||
|
||||
// Use for int16 interleaved data.
|
||||
void DeinterleaveFrom(const AudioFrame* audioFrame);
|
||||
|
@ -93,6 +89,17 @@ class AudioBuffer {
|
|||
// Recombine the different bands into one signal.
|
||||
void MergeFrequencyBands();
|
||||
|
||||
// Copies the split bands data into the integer two-dimensional array.
|
||||
void CopySplitChannelDataTo(size_t channel, int16_t* const* split_band_data);
|
||||
|
||||
// Copies the data in the integer two-dimensional array into the split_bands
|
||||
// data.
|
||||
void CopySplitChannelDataFrom(size_t channel,
|
||||
const int16_t* const* split_band_data);
|
||||
|
||||
static const size_t kMaxSplitFrameLength = 160;
|
||||
static const size_t kMaxNumBands = 3;
|
||||
|
||||
private:
|
||||
FRIEND_TEST_ALL_PREFIXES(AudioBufferTest,
|
||||
SetNumChannelsSetsChannelBuffersNumChannels);
|
||||
|
|
|
@ -43,9 +43,9 @@ TEST(AudioFrameTest, ConstructFromAudioBuffer) {
|
|||
// But not the other way. The following will fail:
|
||||
// non_const_view = other_const_view;
|
||||
|
||||
AudioFrameView<int16_t> non_const_int16_view(
|
||||
buffer.channels(), buffer.num_channels(), buffer.num_frames());
|
||||
non_const_int16_view.channel(0)[0] = kIntConstant;
|
||||
EXPECT_EQ(buffer.channels()[0][0], kIntConstant);
|
||||
AudioFrameView<float> non_const_float_view(
|
||||
buffer.channels_f(), buffer.num_channels(), buffer.num_frames());
|
||||
non_const_float_view.channel(0)[0] = kIntConstant;
|
||||
EXPECT_EQ(buffer.channels_f()[0][0], kIntConstant);
|
||||
}
|
||||
} // namespace webrtc
|
||||
|
|
|
@ -1279,8 +1279,8 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
|
|||
capture_buffer->num_frames()));
|
||||
}
|
||||
|
||||
capture_input_rms_.Analyze(rtc::ArrayView<const int16_t>(
|
||||
capture_buffer->channels_const()[0],
|
||||
capture_input_rms_.Analyze(rtc::ArrayView<const float>(
|
||||
capture_buffer->channels_const_f()[0],
|
||||
capture_nonlocked_.capture_processing_format.num_frames()));
|
||||
const bool log_rms = ++capture_rms_interval_counter_ >= 1000;
|
||||
if (log_rms) {
|
||||
|
@ -1323,12 +1323,12 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
|
|||
if (constants_.use_experimental_agc &&
|
||||
public_submodules_->gain_control->is_enabled()) {
|
||||
private_submodules_->agc_manager->AnalyzePreProcess(
|
||||
capture_buffer->channels()[0], capture_buffer->num_channels(),
|
||||
capture_buffer->channels_f()[0], capture_buffer->num_channels(),
|
||||
capture_nonlocked_.capture_processing_format.num_frames());
|
||||
|
||||
if (constants_.use_experimental_agc_process_before_aec) {
|
||||
private_submodules_->agc_manager->Process(
|
||||
capture_buffer->channels()[0],
|
||||
capture_buffer->channels_const_f()[0],
|
||||
capture_nonlocked_.capture_processing_format.num_frames(),
|
||||
capture_nonlocked_.capture_processing_format.sample_rate_hz());
|
||||
}
|
||||
|
@ -1419,7 +1419,7 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
|
|||
public_submodules_->gain_control->is_enabled() &&
|
||||
!constants_.use_experimental_agc_process_before_aec) {
|
||||
private_submodules_->agc_manager->Process(
|
||||
capture_buffer->split_bands_const(0)[kBand0To8kHz],
|
||||
capture_buffer->split_bands_const_f(0)[kBand0To8kHz],
|
||||
capture_buffer->num_frames_per_band(), capture_nonlocked_.split_rate);
|
||||
}
|
||||
// TODO(peah): Add reporting from AEC3 whether there is echo.
|
||||
|
@ -1484,8 +1484,8 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
|
|||
capture_.stats.output_rms_dbfs = absl::nullopt;
|
||||
}
|
||||
|
||||
capture_output_rms_.Analyze(rtc::ArrayView<const int16_t>(
|
||||
capture_buffer->channels_const()[0],
|
||||
capture_output_rms_.Analyze(rtc::ArrayView<const float>(
|
||||
capture_buffer->channels_const_f()[0],
|
||||
capture_nonlocked_.capture_processing_format.num_frames()));
|
||||
if (log_rms) {
|
||||
RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak();
|
||||
|
|
|
@ -56,6 +56,7 @@ AudioProcessing::Error MapError(int err) {
|
|||
return AudioProcessing::kUnspecifiedError;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
struct EchoControlMobileImpl::StreamProperties {
|
||||
|
@ -131,7 +132,8 @@ void EchoControlMobileImpl::PackRenderAudioBuffer(
|
|||
size_t num_output_channels,
|
||||
size_t num_channels,
|
||||
std::vector<int16_t>* packed_buffer) {
|
||||
RTC_DCHECK_GE(160, audio->num_frames_per_band());
|
||||
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
|
||||
audio->num_frames_per_band());
|
||||
RTC_DCHECK_EQ(num_channels, audio->num_channels());
|
||||
|
||||
// The ordering convention must be followed to pass to the correct AECM.
|
||||
|
@ -139,12 +141,14 @@ void EchoControlMobileImpl::PackRenderAudioBuffer(
|
|||
int render_channel = 0;
|
||||
for (size_t i = 0; i < num_output_channels; i++) {
|
||||
for (size_t j = 0; j < audio->num_channels(); j++) {
|
||||
std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> data_to_buffer;
|
||||
FloatS16ToS16(audio->split_bands_const_f(render_channel)[kBand0To8kHz],
|
||||
audio->num_frames_per_band(), data_to_buffer.data());
|
||||
|
||||
// Buffer the samples in the render queue.
|
||||
packed_buffer->insert(
|
||||
packed_buffer->end(),
|
||||
audio->split_bands_const(render_channel)[kBand0To8kHz],
|
||||
(audio->split_bands_const(render_channel)[kBand0To8kHz] +
|
||||
audio->num_frames_per_band()));
|
||||
packed_buffer->end(), data_to_buffer.data(),
|
||||
data_to_buffer.data() + audio->num_frames_per_band());
|
||||
render_channel = (render_channel + 1) % audio->num_channels();
|
||||
}
|
||||
}
|
||||
|
@ -174,7 +178,21 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio,
|
|||
RTC_DCHECK_LT(capture, low_pass_reference_.size());
|
||||
const int16_t* noisy =
|
||||
reference_copied_ ? low_pass_reference_[capture].data() : nullptr;
|
||||
const int16_t* clean = audio->split_bands_const(capture)[kBand0To8kHz];
|
||||
|
||||
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
|
||||
audio->num_frames_per_band());
|
||||
|
||||
std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> split_bands_data;
|
||||
int16_t* split_bands = split_bands_data.data();
|
||||
const int16_t* clean = split_bands_data.data();
|
||||
if (audio->split_bands_f(capture)[kBand0To8kHz]) {
|
||||
FloatS16ToS16(audio->split_bands_f(capture)[kBand0To8kHz],
|
||||
audio->num_frames_per_band(), split_bands_data.data());
|
||||
} else {
|
||||
clean = nullptr;
|
||||
split_bands = nullptr;
|
||||
}
|
||||
|
||||
if (noisy == NULL) {
|
||||
noisy = clean;
|
||||
clean = NULL;
|
||||
|
@ -182,8 +200,13 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio,
|
|||
for (size_t render = 0; render < stream_properties_->num_reverse_channels;
|
||||
++render) {
|
||||
err = WebRtcAecm_Process(cancellers_[handle_index]->state(), noisy, clean,
|
||||
audio->split_bands(capture)[kBand0To8kHz],
|
||||
audio->num_frames_per_band(), stream_delay_ms);
|
||||
split_bands, audio->num_frames_per_band(),
|
||||
stream_delay_ms);
|
||||
|
||||
if (split_bands) {
|
||||
S16ToFloatS16(split_bands, audio->num_frames_per_band(),
|
||||
audio->split_bands_f(capture)[kBand0To8kHz]);
|
||||
}
|
||||
|
||||
if (err != AudioProcessing::kNoError) {
|
||||
return MapError(err);
|
||||
|
@ -192,9 +215,9 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio,
|
|||
++handle_index;
|
||||
}
|
||||
for (size_t band = 1u; band < audio->num_bands(); ++band) {
|
||||
memset(audio->split_bands(capture)[band], 0,
|
||||
memset(audio->split_bands_f(capture)[band], 0,
|
||||
audio->num_frames_per_band() *
|
||||
sizeof(audio->split_bands(capture)[band][0]));
|
||||
sizeof(audio->split_bands_f(capture)[band][0]));
|
||||
}
|
||||
}
|
||||
return AudioProcessing::kNoError;
|
||||
|
@ -204,9 +227,9 @@ void EchoControlMobileImpl::CopyLowPassReference(AudioBuffer* audio) {
|
|||
RTC_DCHECK_LE(audio->num_channels(), low_pass_reference_.size());
|
||||
reference_copied_ = true;
|
||||
for (size_t capture = 0; capture < audio->num_channels(); ++capture) {
|
||||
memcpy(low_pass_reference_[capture].data(),
|
||||
audio->split_bands_const(capture)[kBand0To8kHz],
|
||||
audio->num_frames_per_band() * sizeof(int16_t));
|
||||
FloatS16ToS16(audio->split_bands_const_f(capture)[kBand0To8kHz],
|
||||
audio->num_frames_per_band(),
|
||||
low_pass_reference_[capture].data());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -118,25 +118,25 @@ void GainControlImpl::ProcessRenderAudio(
|
|||
void GainControlImpl::PackRenderAudioBuffer(
|
||||
AudioBuffer* audio,
|
||||
std::vector<int16_t>* packed_buffer) {
|
||||
RTC_DCHECK_GE(160, audio->num_frames_per_band());
|
||||
|
||||
std::array<int16_t, 160> mixed_low_pass_data;
|
||||
rtc::ArrayView<const int16_t> mixed_low_pass;
|
||||
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
|
||||
audio->num_frames_per_band());
|
||||
std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> mixed_low_pass_data;
|
||||
rtc::ArrayView<const int16_t> mixed_low_pass(mixed_low_pass_data.data(),
|
||||
audio->num_frames_per_band());
|
||||
if (audio->num_proc_channels() == 1) {
|
||||
mixed_low_pass =
|
||||
rtc::ArrayView<const int16_t>(audio->split_bands_const(0)[kBand0To8kHz],
|
||||
audio->num_frames_per_band());
|
||||
FloatS16ToS16(audio->split_bands_const_f(0)[kBand0To8kHz],
|
||||
audio->num_frames_per_band(), mixed_low_pass_data.data());
|
||||
} else {
|
||||
const int num_channels = static_cast<int>(audio->num_channels());
|
||||
for (size_t i = 0; i < audio->num_frames_per_band(); ++i) {
|
||||
int32_t value = audio->split_channels_const(kBand0To8kHz)[0][i];
|
||||
int32_t value =
|
||||
FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[0][i]);
|
||||
for (int j = 1; j < num_channels; ++j) {
|
||||
value += audio->split_channels_const(kBand0To8kHz)[j][i];
|
||||
value +=
|
||||
FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[j][i]);
|
||||
}
|
||||
mixed_low_pass_data[i] = value / num_channels;
|
||||
}
|
||||
mixed_low_pass = rtc::ArrayView<const int16_t>(
|
||||
mixed_low_pass_data.data(), audio->num_frames_per_band());
|
||||
}
|
||||
|
||||
packed_buffer->clear();
|
||||
|
@ -150,17 +150,28 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {
|
|||
}
|
||||
|
||||
RTC_DCHECK(num_proc_channels_);
|
||||
RTC_DCHECK_GE(160, audio->num_frames_per_band());
|
||||
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
|
||||
audio->num_frames_per_band());
|
||||
RTC_DCHECK_EQ(audio->num_channels(), *num_proc_channels_);
|
||||
RTC_DCHECK_LE(*num_proc_channels_, gain_controllers_.size());
|
||||
|
||||
int16_t split_band_data[AudioBuffer::kMaxNumBands]
|
||||
[AudioBuffer::kMaxSplitFrameLength];
|
||||
int16_t* split_bands[AudioBuffer::kMaxNumBands] = {
|
||||
split_band_data[0], split_band_data[1], split_band_data[2]};
|
||||
|
||||
if (mode_ == kAdaptiveAnalog) {
|
||||
int capture_channel = 0;
|
||||
for (auto& gain_controller : gain_controllers_) {
|
||||
gain_controller->set_capture_level(analog_capture_level_);
|
||||
int err = WebRtcAgc_AddMic(
|
||||
gain_controller->state(), audio->split_bands(capture_channel),
|
||||
audio->num_bands(), audio->num_frames_per_band());
|
||||
|
||||
audio->CopySplitChannelDataTo(capture_channel, split_bands);
|
||||
|
||||
int err =
|
||||
WebRtcAgc_AddMic(gain_controller->state(), split_bands,
|
||||
audio->num_bands(), audio->num_frames_per_band());
|
||||
|
||||
audio->CopySplitChannelDataFrom(capture_channel, split_bands);
|
||||
|
||||
if (err != AudioProcessing::kNoError) {
|
||||
return AudioProcessing::kUnspecifiedError;
|
||||
|
@ -171,10 +182,15 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {
|
|||
int capture_channel = 0;
|
||||
for (auto& gain_controller : gain_controllers_) {
|
||||
int32_t capture_level_out = 0;
|
||||
int err = WebRtcAgc_VirtualMic(
|
||||
gain_controller->state(), audio->split_bands(capture_channel),
|
||||
audio->num_bands(), audio->num_frames_per_band(),
|
||||
analog_capture_level_, &capture_level_out);
|
||||
|
||||
audio->CopySplitChannelDataTo(capture_channel, split_bands);
|
||||
|
||||
int err =
|
||||
WebRtcAgc_VirtualMic(gain_controller->state(), split_bands,
|
||||
audio->num_bands(), audio->num_frames_per_band(),
|
||||
analog_capture_level_, &capture_level_out);
|
||||
|
||||
audio->CopySplitChannelDataFrom(capture_channel, split_bands);
|
||||
|
||||
gain_controller->set_capture_level(capture_level_out);
|
||||
|
||||
|
@ -199,7 +215,8 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio,
|
|||
}
|
||||
|
||||
RTC_DCHECK(num_proc_channels_);
|
||||
RTC_DCHECK_GE(160, audio->num_frames_per_band());
|
||||
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
|
||||
audio->num_frames_per_band());
|
||||
RTC_DCHECK_EQ(audio->num_channels(), *num_proc_channels_);
|
||||
|
||||
stream_is_saturated_ = false;
|
||||
|
@ -208,15 +225,22 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio,
|
|||
int32_t capture_level_out = 0;
|
||||
uint8_t saturation_warning = 0;
|
||||
|
||||
int16_t split_band_data[AudioBuffer::kMaxNumBands]
|
||||
[AudioBuffer::kMaxSplitFrameLength];
|
||||
int16_t* split_bands[AudioBuffer::kMaxNumBands] = {
|
||||
split_band_data[0], split_band_data[1], split_band_data[2]};
|
||||
audio->CopySplitChannelDataTo(capture_channel, split_bands);
|
||||
|
||||
// The call to stream_has_echo() is ok from a deadlock perspective
|
||||
// as the capture lock is allready held.
|
||||
int err = WebRtcAgc_Process(
|
||||
gain_controller->state(), audio->split_bands_const(capture_channel),
|
||||
audio->num_bands(), audio->num_frames_per_band(),
|
||||
audio->split_bands(capture_channel),
|
||||
gain_controller->state(), split_bands, audio->num_bands(),
|
||||
audio->num_frames_per_band(), split_bands,
|
||||
gain_controller->get_capture_level(), &capture_level_out,
|
||||
stream_has_echo, &saturation_warning);
|
||||
|
||||
audio->CopySplitChannelDataFrom(capture_channel, split_bands);
|
||||
|
||||
if (err != AudioProcessing::kNoError) {
|
||||
return AudioProcessing::kUnspecifiedError;
|
||||
}
|
||||
|
|
|
@ -40,8 +40,8 @@ void LevelEstimatorImpl::ProcessStream(AudioBuffer* audio) {
|
|||
}
|
||||
|
||||
for (size_t i = 0; i < audio->num_channels(); i++) {
|
||||
rms_->Analyze(rtc::ArrayView<const int16_t>(audio->channels_const()[i],
|
||||
audio->num_frames()));
|
||||
rms_->Analyze(rtc::ArrayView<const float>(audio->channels_const_f()[i],
|
||||
audio->num_frames()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -96,11 +96,18 @@ LowCutFilter::~LowCutFilter() {}
|
|||
|
||||
void LowCutFilter::Process(AudioBuffer* audio) {
|
||||
RTC_DCHECK(audio);
|
||||
RTC_DCHECK_GE(160, audio->num_frames_per_band());
|
||||
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
|
||||
audio->num_frames_per_band());
|
||||
RTC_DCHECK_EQ(filters_.size(), audio->num_channels());
|
||||
for (size_t i = 0; i < filters_.size(); i++) {
|
||||
filters_[i]->Process(audio->split_bands(i)[kBand0To8kHz],
|
||||
audio->num_frames_per_band());
|
||||
std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> samples_fixed;
|
||||
FloatS16ToS16(audio->split_bands_f(i)[kBand0To8kHz],
|
||||
audio->num_frames_per_band(), samples_fixed.data());
|
||||
|
||||
filters_[i]->Process(samples_fixed.data(), audio->num_frames_per_band());
|
||||
|
||||
S16ToFloatS16(samples_fixed.data(), audio->num_frames_per_band(),
|
||||
audio->split_bands_f(i)[kBand0To8kHz]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -101,8 +101,16 @@ void NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
|
|||
WebRtcNs_Process(suppressors_[i]->state(), audio->split_bands_const_f(i),
|
||||
audio->num_bands(), audio->split_bands_f(i));
|
||||
#elif defined(WEBRTC_NS_FIXED)
|
||||
WebRtcNsx_Process(suppressors_[i]->state(), audio->split_bands_const(i),
|
||||
audio->num_bands(), audio->split_bands(i));
|
||||
int16_t split_band_data[AudioBuffer::kMaxNumBands]
|
||||
[AudioBuffer::kMaxSplitFrameLength];
|
||||
int16_t* split_bands[AudioBuffer::kMaxNumBands] = {
|
||||
split_band_data[0], split_band_data[1], split_band_data[2]};
|
||||
audio->CopySplitChannelDataTo(i, split_bands);
|
||||
|
||||
WebRtcNsx_Process(suppressors_[i]->state(), split_bands, audio->num_bands(),
|
||||
split_bands);
|
||||
|
||||
audio->CopySplitChannelDataFrom(i, split_bands);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
|
@ -74,6 +74,27 @@ void RmsLevel::Analyze(rtc::ArrayView<const int16_t> data) {
|
|||
max_sum_square_ = std::max(max_sum_square_, sum_square);
|
||||
}
|
||||
|
||||
void RmsLevel::Analyze(rtc::ArrayView<const float> data) {
|
||||
if (data.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
CheckBlockSize(data.size());
|
||||
|
||||
float sum_square = 0.f;
|
||||
|
||||
for (float data_k : data) {
|
||||
int16_t tmp =
|
||||
static_cast<int16_t>(std::min(std::max(data_k, -32768.f), 32767.f));
|
||||
sum_square += tmp * tmp;
|
||||
}
|
||||
RTC_DCHECK_GE(sum_square, 0.f);
|
||||
sum_square_ += sum_square;
|
||||
sample_count_ += data.size();
|
||||
|
||||
max_sum_square_ = std::max(max_sum_square_, sum_square);
|
||||
}
|
||||
|
||||
void RmsLevel::AnalyzeMuted(size_t length) {
|
||||
CheckBlockSize(length);
|
||||
sample_count_ += length;
|
||||
|
|
|
@ -45,6 +45,7 @@ class RmsLevel {
|
|||
|
||||
// Pass each chunk of audio to Analyze() to accumulate the level.
|
||||
void Analyze(rtc::ArrayView<const int16_t> data);
|
||||
void Analyze(rtc::ArrayView<const float> data);
|
||||
|
||||
// If all samples with the given |length| have a magnitude of zero, this is
|
||||
// a shortcut to avoid some computation.
|
||||
|
|
|
@ -34,9 +34,18 @@ std::unique_ptr<RmsLevel> RunTest(rtc::ArrayView<const int16_t> input) {
|
|||
return level;
|
||||
}
|
||||
|
||||
std::vector<int16_t> CreateSinusoid(int frequency_hz,
|
||||
int amplitude,
|
||||
size_t num_samples) {
|
||||
std::unique_ptr<RmsLevel> RunTest(rtc::ArrayView<const float> input) {
|
||||
std::unique_ptr<RmsLevel> level(new RmsLevel);
|
||||
for (size_t n = 0; n + kBlockSizeSamples <= input.size();
|
||||
n += kBlockSizeSamples) {
|
||||
level->Analyze(input.subview(n, kBlockSizeSamples));
|
||||
}
|
||||
return level;
|
||||
}
|
||||
|
||||
std::vector<int16_t> CreateInt16Sinusoid(int frequency_hz,
|
||||
int amplitude,
|
||||
size_t num_samples) {
|
||||
std::vector<int16_t> x(num_samples);
|
||||
for (size_t n = 0; n < num_samples; ++n) {
|
||||
x[n] = rtc::saturated_cast<int16_t>(
|
||||
|
@ -44,16 +53,40 @@ std::vector<int16_t> CreateSinusoid(int frequency_hz,
|
|||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
std::vector<float> CreateFloatSinusoid(int frequency_hz,
|
||||
int amplitude,
|
||||
size_t num_samples) {
|
||||
std::vector<int16_t> x16 =
|
||||
CreateInt16Sinusoid(frequency_hz, amplitude, num_samples);
|
||||
std::vector<float> x(x16.size());
|
||||
for (size_t n = 0; n < x.size(); ++n) {
|
||||
x[n] = x16[n];
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(RmsLevelTest, VerifyIndentityBetweenFloatAndFix) {
|
||||
auto x_f = CreateFloatSinusoid(1000, INT16_MAX, kSampleRateHz);
|
||||
auto x_i = CreateFloatSinusoid(1000, INT16_MAX, kSampleRateHz);
|
||||
auto level_f = RunTest(x_f);
|
||||
auto level_i = RunTest(x_i);
|
||||
int avg_i = level_i->Average();
|
||||
int avg_f = level_f->Average();
|
||||
EXPECT_EQ(3, avg_i); // -3 dBFS
|
||||
EXPECT_EQ(avg_f, avg_i);
|
||||
}
|
||||
|
||||
TEST(RmsLevelTest, Run1000HzFullScale) {
|
||||
auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz);
|
||||
auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz);
|
||||
auto level = RunTest(x);
|
||||
EXPECT_EQ(3, level->Average()); // -3 dBFS
|
||||
}
|
||||
|
||||
TEST(RmsLevelTest, Run1000HzFullScaleAverageAndPeak) {
|
||||
auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz);
|
||||
auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz);
|
||||
auto level = RunTest(x);
|
||||
auto stats = level->AverageAndPeak();
|
||||
EXPECT_EQ(3, stats.average); // -3 dBFS
|
||||
|
@ -61,7 +94,7 @@ TEST(RmsLevelTest, Run1000HzFullScaleAverageAndPeak) {
|
|||
}
|
||||
|
||||
TEST(RmsLevelTest, Run1000HzHalfScale) {
|
||||
auto x = CreateSinusoid(1000, INT16_MAX / 2, kSampleRateHz);
|
||||
auto x = CreateInt16Sinusoid(1000, INT16_MAX / 2, kSampleRateHz);
|
||||
auto level = RunTest(x);
|
||||
EXPECT_EQ(9, level->Average()); // -9 dBFS
|
||||
}
|
||||
|
@ -93,14 +126,14 @@ TEST(RmsLevelTest, NoSamplesAverageAndPeak) {
|
|||
}
|
||||
|
||||
TEST(RmsLevelTest, PollTwice) {
|
||||
auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz);
|
||||
auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz);
|
||||
auto level = RunTest(x);
|
||||
level->Average();
|
||||
EXPECT_EQ(127, level->Average()); // Stats should be reset at this point.
|
||||
}
|
||||
|
||||
TEST(RmsLevelTest, Reset) {
|
||||
auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz);
|
||||
auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz);
|
||||
auto level = RunTest(x);
|
||||
level->Reset();
|
||||
EXPECT_EQ(127, level->Average()); // Stats should be reset at this point.
|
||||
|
@ -108,7 +141,7 @@ TEST(RmsLevelTest, Reset) {
|
|||
|
||||
// Inserts 1 second of full-scale sinusoid, followed by 1 second of muted.
|
||||
TEST(RmsLevelTest, ProcessMuted) {
|
||||
auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz);
|
||||
auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz);
|
||||
auto level = RunTest(x);
|
||||
const size_t kBlocksPerSecond = rtc::CheckedDivExact(
|
||||
static_cast<size_t>(kSampleRateHz), kBlockSizeSamples);
|
||||
|
@ -123,8 +156,8 @@ TEST(RmsLevelTest, ProcessMuted) {
|
|||
// to the vast majority of the signal being half-scale, and the peak to be
|
||||
// -3 dBFS.
|
||||
TEST(RmsLevelTest, RunHalfScaleAndInsertFullScale) {
|
||||
auto half_scale = CreateSinusoid(1000, INT16_MAX / 2, kSampleRateHz);
|
||||
auto full_scale = CreateSinusoid(1000, INT16_MAX, kSampleRateHz / 100);
|
||||
auto half_scale = CreateInt16Sinusoid(1000, INT16_MAX / 2, kSampleRateHz);
|
||||
auto full_scale = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz / 100);
|
||||
auto x = half_scale;
|
||||
x.insert(x.end(), full_scale.begin(), full_scale.end());
|
||||
x.insert(x.end(), half_scale.begin(), half_scale.end());
|
||||
|
@ -137,10 +170,10 @@ TEST(RmsLevelTest, RunHalfScaleAndInsertFullScale) {
|
|||
}
|
||||
|
||||
TEST(RmsLevelTest, ResetOnBlockSizeChange) {
|
||||
auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz);
|
||||
auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz);
|
||||
auto level = RunTest(x);
|
||||
// Create a new signal with half amplitude, but double block length.
|
||||
auto y = CreateSinusoid(1000, INT16_MAX / 2, kBlockSizeSamples * 2);
|
||||
auto y = CreateInt16Sinusoid(1000, INT16_MAX / 2, kBlockSizeSamples * 2);
|
||||
level->Analyze(y);
|
||||
auto stats = level->AverageAndPeak();
|
||||
// Expect all stats to only be influenced by the last signal (y), since the
|
||||
|
|
|
@ -58,24 +58,25 @@ bool VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
|
|||
rtc::CritScope cs(crit_);
|
||||
RTC_DCHECK(enabled_);
|
||||
|
||||
RTC_DCHECK_GE(160, audio->num_frames_per_band());
|
||||
std::array<int16_t, 160> mixed_low_pass_data;
|
||||
rtc::ArrayView<const int16_t> mixed_low_pass;
|
||||
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
|
||||
audio->num_frames_per_band());
|
||||
std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> mixed_low_pass_data;
|
||||
rtc::ArrayView<const int16_t> mixed_low_pass(mixed_low_pass_data.data(),
|
||||
audio->num_frames_per_band());
|
||||
if (audio->num_proc_channels() == 1) {
|
||||
mixed_low_pass =
|
||||
rtc::ArrayView<const int16_t>(audio->split_bands_const(0)[kBand0To8kHz],
|
||||
audio->num_frames_per_band());
|
||||
FloatS16ToS16(audio->split_bands_const_f(0)[kBand0To8kHz],
|
||||
audio->num_frames_per_band(), mixed_low_pass_data.data());
|
||||
} else {
|
||||
const int num_channels = static_cast<int>(audio->num_channels());
|
||||
for (size_t i = 0; i < audio->num_frames_per_band(); ++i) {
|
||||
int32_t value = audio->split_channels_const(kBand0To8kHz)[0][i];
|
||||
int32_t value =
|
||||
FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[0][i]);
|
||||
for (int j = 1; j < num_channels; ++j) {
|
||||
value += audio->split_channels_const(kBand0To8kHz)[j][i];
|
||||
value +=
|
||||
FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[j][i]);
|
||||
}
|
||||
mixed_low_pass_data[i] = value / num_channels;
|
||||
}
|
||||
mixed_low_pass = rtc::ArrayView<const int16_t>(
|
||||
mixed_low_pass_data.data(), audio->num_frames_per_band());
|
||||
}
|
||||
|
||||
int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
|
||||
|
|
|
@ -1 +1 @@
|
|||
bc19d9e9fd9503cad02f3b0c21cbd63ed3c5f22c
|
||||
d67b879f3b4a31b3c4f3587bd4418be5f9df5105
|
Loading…
Reference in a new issue