From 928146f546193cfd56256aa828e2e19a013012cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Per=20=C3=85hgren?= Date: Tue, 20 Aug 2019 09:19:21 +0200 Subject: [PATCH] Removing all external access to the integer sample data in AudioBuffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL removes all external access to the integer sample data in the AudioBuffer class. It also removes the API in AudioBuffer that provides this. The purpose of this is to pave the way for removing the sample duplicating and implicit conversions between integer and floating point sample formats which is done inside the AudioBuffer. Bug: webrtc:10882 Change-Id: I1438b691bcef98278aef8e3c63624c367c2d12e9 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/149162 Reviewed-by: Gustaf Ullberg Reviewed-by: Henrik Lundin Commit-Queue: Per Ã…hgren Cr-Commit-Position: refs/heads/master@{#28912} --- common_audio/audio_util.cc | 10 +++ common_audio/include/audio_util.h | 9 +++ .../resampler/push_sinc_resampler_unittest.cc | 12 ---- modules/audio_processing/agc/BUILD.gn | 2 + .../agc/agc_manager_direct.cc | 45 ++++++++++-- .../audio_processing/agc/agc_manager_direct.h | 4 +- modules/audio_processing/audio_buffer.cc | 49 +++++++------ modules/audio_processing/audio_buffer.h | 17 +++-- .../audio_frame_view_unittest.cc | 8 +-- .../audio_processing/audio_processing_impl.cc | 14 ++-- .../echo_control_mobile_impl.cc | 49 +++++++++---- modules/audio_processing/gain_control_impl.cc | 70 +++++++++++++------ .../audio_processing/level_estimator_impl.cc | 4 +- modules/audio_processing/low_cut_filter.cc | 13 +++- .../noise_suppression_impl.cc | 12 +++- modules/audio_processing/rms_level.cc | 21 ++++++ modules/audio_processing/rms_level.h | 1 + .../audio_processing/rms_level_unittest.cc | 59 ++++++++++++---- .../audio_processing/voice_detection_impl.cc | 21 +++--- .../output_data_float.pb.sha1 | 2 +- 20 files changed, 298 insertions(+), 124 deletions(-) diff --git a/common_audio/audio_util.cc b/common_audio/audio_util.cc index eb132ca633..b1e4d9ac3c 100644 --- a/common_audio/audio_util.cc +++ b/common_audio/audio_util.cc @@ -12,11 +12,21 @@ namespace webrtc { +void FloatToS16(const float* src, size_t size, int16_t* dest) { + for (size_t i = 0; i < size; ++i) + dest[i] = FloatToS16(src[i]); +} + void S16ToFloat(const int16_t* src, size_t size, float* dest) { for (size_t i = 0; i < size; ++i) dest[i] = S16ToFloat(src[i]); } +void S16ToFloatS16(const int16_t* src, size_t size, float* dest) { + for (size_t i = 0; i < size; ++i) + dest[i] = src[i]; +} + void FloatS16ToS16(const float* src, size_t size, int16_t* dest) { for (size_t i = 0; i < size; ++i) dest[i] = FloatS16ToS16(src[i]); diff --git a/common_audio/include/audio_util.h b/common_audio/include/audio_util.h index 9925e54b02..f6b6bfdcd6 100644 --- a/common_audio/include/audio_util.h +++ b/common_audio/include/audio_util.h @@ -43,6 +43,13 @@ static inline int16_t FloatS16ToS16(float v) { return static_cast(v + std::copysign(0.5f, v)); } +static inline int16_t FloatToS16(float v) { + v *= 32768.f; + v = std::min(v, 32767.f); + v = std::max(v, -32768.f); + return static_cast(v + std::copysign(0.5f, v)); +} + static inline float FloatToFloatS16(float v) { v = std::min(v, 1.f); v = std::max(v, -1.f); @@ -56,7 +63,9 @@ static inline float FloatS16ToFloat(float v) { return v * kScaling; } +void FloatToS16(const float* src, size_t size, int16_t* dest); void S16ToFloat(const int16_t* src, size_t size, float* dest); +void S16ToFloatS16(const int16_t* src, size_t size, float* dest); void FloatS16ToS16(const float* src, size_t size, int16_t* dest); void FloatToFloatS16(const float* src, size_t size, float* dest); void FloatS16ToFloat(const float* src, size_t size, float* dest); diff --git a/common_audio/resampler/push_sinc_resampler_unittest.cc b/common_audio/resampler/push_sinc_resampler_unittest.cc index f9943b3cc8..dc7cdec141 100644 --- a/common_audio/resampler/push_sinc_resampler_unittest.cc +++ b/common_audio/resampler/push_sinc_resampler_unittest.cc @@ -33,18 +33,6 @@ T DBFS(T x) { return 20 * std::log10(x); } -void FloatToS16(const float* src, size_t size, int16_t* dest) { - for (size_t i = 0; i < size; ++i) { - RTC_DCHECK_GE(32767.f, src[i]); - RTC_DCHECK_LE(-32768.f, src[i]); - if (src[i] >= 1.f) - dest[i] = 32767; - if (src[i] <= -1.f) - dest[i] = -32768; - dest[i] = static_cast(src[i] * 32767.5f); - } -} - } // namespace class PushSincResamplerTest : public ::testing::TestWithParam< diff --git a/modules/audio_processing/agc/BUILD.gn b/modules/audio_processing/agc/BUILD.gn index 69db73dbdf..09781b2428 100644 --- a/modules/audio_processing/agc/BUILD.gn +++ b/modules/audio_processing/agc/BUILD.gn @@ -19,6 +19,8 @@ rtc_source_set("agc") { ":level_estimation", "..:apm_logging", "..:gain_control_interface", + "../../../common_audio", + "../../../common_audio:common_audio_c", "../../../rtc_base:checks", "../../../rtc_base:gtest_prod", "../../../rtc_base:logging", diff --git a/modules/audio_processing/agc/agc_manager_direct.cc b/modules/audio_processing/agc/agc_manager_direct.cc index 5c4deeccbf..4b0de78cb5 100644 --- a/modules/audio_processing/agc/agc_manager_direct.cc +++ b/modules/audio_processing/agc/agc_manager_direct.cc @@ -17,6 +17,7 @@ #include #endif +#include "common_audio/include/audio_util.h" #include "modules/audio_processing/agc/gain_map_internal.h" #include "modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.h" #include "modules/audio_processing/include/gain_control.h" @@ -59,6 +60,10 @@ const int kMaxResidualGainChange = 15; // restrictions from clipping events. const int kSurplusCompressionGain = 6; +// Maximum number of channels and number of samples per channel supported. +constexpr size_t kMaxNumSamplesPerChannel = 1920; +constexpr size_t kMaxNumChannels = 4; + int ClampLevel(int mic_level) { return rtc::SafeClamp(mic_level, kMinMicLevel, kMaxMicLevel); } @@ -220,7 +225,7 @@ int AgcManagerDirect::Initialize() { return InitializeGainControl(gctrl_, disable_digital_adaptive_); } -void AgcManagerDirect::AnalyzePreProcess(int16_t* audio, +void AgcManagerDirect::AnalyzePreProcess(float* audio, int num_channels, size_t samples_per_channel) { size_t length = num_channels * samples_per_channel; @@ -228,7 +233,19 @@ void AgcManagerDirect::AnalyzePreProcess(int16_t* audio, return; } - file_preproc_->Write(audio, length); + std::array audio_data; + int16_t* audio_fix; + size_t safe_length; + if (audio) { + audio_fix = audio_data.data(); + safe_length = std::min(audio_data.size(), length); + FloatS16ToS16(audio, length, audio_fix); + } else { + audio_fix = nullptr; + safe_length = length; + } + + file_preproc_->Write(audio_fix, safe_length); if (frames_since_clipped_ < kClippedWaitFrames) { ++frames_since_clipped_; @@ -244,7 +261,7 @@ void AgcManagerDirect::AnalyzePreProcess(int16_t* audio, // maximum. This harsh treatment is an effort to avoid repeated clipped echo // events. As compensation for this restriction, the maximum compression // gain is increased, through SetMaxLevel(). - float clipped_ratio = agc_->AnalyzePreproc(audio, length); + float clipped_ratio = agc_->AnalyzePreproc(audio_fix, safe_length); if (clipped_ratio > kClippedRatioThreshold) { RTC_DLOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio=" << clipped_ratio; @@ -263,15 +280,31 @@ void AgcManagerDirect::AnalyzePreProcess(int16_t* audio, } frames_since_clipped_ = 0; } + + if (audio) { + S16ToFloatS16(audio_fix, safe_length, audio); + } } -void AgcManagerDirect::Process(const int16_t* audio, +void AgcManagerDirect::Process(const float* audio, size_t length, int sample_rate_hz) { if (capture_muted_) { return; } + std::array audio_data; + const int16_t* audio_fix; + size_t safe_length; + if (audio) { + audio_fix = audio_data.data(); + safe_length = std::min(audio_data.size(), length); + FloatS16ToS16(audio, length, audio_data.data()); + } else { + audio_fix = nullptr; + safe_length = length; + } + if (check_volume_on_next_process_) { check_volume_on_next_process_ = false; // We have to wait until the first process call to check the volume, @@ -279,14 +312,14 @@ void AgcManagerDirect::Process(const int16_t* audio, CheckVolumeAndReset(); } - agc_->Process(audio, length, sample_rate_hz); + agc_->Process(audio_fix, safe_length, sample_rate_hz); UpdateGain(); if (!disable_digital_adaptive_) { UpdateCompressor(); } - file_postproc_->Write(audio, length); + file_postproc_->Write(audio_fix, safe_length); data_dumper_->DumpRaw("experimental_gain_control_compression_gain_db", 1, &compression_); diff --git a/modules/audio_processing/agc/agc_manager_direct.h b/modules/audio_processing/agc/agc_manager_direct.h index 8c9fc4da9c..a637fca9a5 100644 --- a/modules/audio_processing/agc/agc_manager_direct.h +++ b/modules/audio_processing/agc/agc_manager_direct.h @@ -56,10 +56,10 @@ class AgcManagerDirect final { ~AgcManagerDirect(); int Initialize(); - void AnalyzePreProcess(int16_t* audio, + void AnalyzePreProcess(float* audio, int num_channels, size_t samples_per_channel); - void Process(const int16_t* audio, size_t length, int sample_rate_hz); + void Process(const float* audio, size_t length, int sample_rate_hz); // Call when the capture stream has been muted/unmuted. This causes the // manager to disregard all incoming audio; chances are good it's background diff --git a/modules/audio_processing/audio_buffer.cc b/modules/audio_processing/audio_buffer.cc index 584111c29a..32668fa079 100644 --- a/modules/audio_processing/audio_buffer.cc +++ b/modules/audio_processing/audio_buffer.cc @@ -169,29 +169,11 @@ void AudioBuffer::InitForNewData() { } } -const int16_t* const* AudioBuffer::channels_const() const { - return data_->ibuf_const()->channels(); -} - -int16_t* const* AudioBuffer::channels() { - return data_->ibuf()->channels(); -} - -const int16_t* const* AudioBuffer::split_bands_const(size_t channel) const { - return split_data_.get() ? split_data_->ibuf_const()->bands(channel) - : data_->ibuf_const()->bands(channel); -} - -int16_t* const* AudioBuffer::split_bands(size_t channel) { - return split_data_.get() ? split_data_->ibuf()->bands(channel) - : data_->ibuf()->bands(channel); -} - -const int16_t* const* AudioBuffer::split_channels_const(Band band) const { +const float* const* AudioBuffer::split_channels_const_f(Band band) const { if (split_data_.get()) { - return split_data_->ibuf_const()->channels(band); + return split_data_->fbuf_const()->channels(band); } else { - return band == kBand0To8kHz ? data_->ibuf_const()->channels() : nullptr; + return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr; } } @@ -308,4 +290,29 @@ void AudioBuffer::MergeFrequencyBands() { splitting_filter_->Synthesis(split_data_.get(), data_.get()); } +void AudioBuffer::CopySplitChannelDataTo(size_t channel, + int16_t* const* split_band_data) { + for (size_t k = 0; k < num_bands(); ++k) { + const float* band_data = split_bands_f(channel)[k]; + RTC_DCHECK(split_band_data[k]); + RTC_DCHECK(band_data); + for (size_t i = 0; i < num_frames_per_band(); ++i) { + split_band_data[k][i] = FloatS16ToS16(band_data[i]); + } + } +} + +void AudioBuffer::CopySplitChannelDataFrom( + size_t channel, + const int16_t* const* split_band_data) { + for (size_t k = 0; k < num_bands(); ++k) { + float* band_data = split_bands_f(channel)[k]; + RTC_DCHECK(split_band_data[k]); + RTC_DCHECK(band_data); + for (size_t i = 0; i < num_frames_per_band(); ++i) { + band_data[i] = split_band_data[k][i]; + } + } +} + } // namespace webrtc diff --git a/modules/audio_processing/audio_buffer.h b/modules/audio_processing/audio_buffer.h index c1bfb63673..16d5616a46 100644 --- a/modules/audio_processing/audio_buffer.h +++ b/modules/audio_processing/audio_buffer.h @@ -52,8 +52,6 @@ class AudioBuffer { // Where: // 0 <= channel < |num_proc_channels_| // 0 <= sample < |proc_num_frames_| - int16_t* const* channels(); - const int16_t* const* channels_const() const; float* const* channels_f(); const float* const* channels_const_f() const; @@ -64,8 +62,6 @@ class AudioBuffer { // 0 <= channel < |num_proc_channels_| // 0 <= band < |num_bands_| // 0 <= sample < |num_split_frames_| - int16_t* const* split_bands(size_t channel); - const int16_t* const* split_bands_const(size_t channel) const; float* const* split_bands_f(size_t channel); const float* const* split_bands_const_f(size_t channel) const; @@ -76,7 +72,7 @@ class AudioBuffer { // 0 <= band < |num_bands_| // 0 <= channel < |num_proc_channels_| // 0 <= sample < |num_split_frames_| - const int16_t* const* split_channels_const(Band band) const; + const float* const* split_channels_const_f(Band band) const; // Use for int16 interleaved data. void DeinterleaveFrom(const AudioFrame* audioFrame); @@ -93,6 +89,17 @@ class AudioBuffer { // Recombine the different bands into one signal. void MergeFrequencyBands(); + // Copies the split bands data into the integer two-dimensional array. + void CopySplitChannelDataTo(size_t channel, int16_t* const* split_band_data); + + // Copies the data in the integer two-dimensional array into the split_bands + // data. + void CopySplitChannelDataFrom(size_t channel, + const int16_t* const* split_band_data); + + static const size_t kMaxSplitFrameLength = 160; + static const size_t kMaxNumBands = 3; + private: FRIEND_TEST_ALL_PREFIXES(AudioBufferTest, SetNumChannelsSetsChannelBuffersNumChannels); diff --git a/modules/audio_processing/audio_frame_view_unittest.cc b/modules/audio_processing/audio_frame_view_unittest.cc index 7a9d126103..70b63b1cb3 100644 --- a/modules/audio_processing/audio_frame_view_unittest.cc +++ b/modules/audio_processing/audio_frame_view_unittest.cc @@ -43,9 +43,9 @@ TEST(AudioFrameTest, ConstructFromAudioBuffer) { // But not the other way. The following will fail: // non_const_view = other_const_view; - AudioFrameView non_const_int16_view( - buffer.channels(), buffer.num_channels(), buffer.num_frames()); - non_const_int16_view.channel(0)[0] = kIntConstant; - EXPECT_EQ(buffer.channels()[0][0], kIntConstant); + AudioFrameView non_const_float_view( + buffer.channels_f(), buffer.num_channels(), buffer.num_frames()); + non_const_float_view.channel(0)[0] = kIntConstant; + EXPECT_EQ(buffer.channels_f()[0][0], kIntConstant); } } // namespace webrtc diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index 804802f7a7..fc463c1dbb 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -1279,8 +1279,8 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { capture_buffer->num_frames())); } - capture_input_rms_.Analyze(rtc::ArrayView( - capture_buffer->channels_const()[0], + capture_input_rms_.Analyze(rtc::ArrayView( + capture_buffer->channels_const_f()[0], capture_nonlocked_.capture_processing_format.num_frames())); const bool log_rms = ++capture_rms_interval_counter_ >= 1000; if (log_rms) { @@ -1323,12 +1323,12 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { if (constants_.use_experimental_agc && public_submodules_->gain_control->is_enabled()) { private_submodules_->agc_manager->AnalyzePreProcess( - capture_buffer->channels()[0], capture_buffer->num_channels(), + capture_buffer->channels_f()[0], capture_buffer->num_channels(), capture_nonlocked_.capture_processing_format.num_frames()); if (constants_.use_experimental_agc_process_before_aec) { private_submodules_->agc_manager->Process( - capture_buffer->channels()[0], + capture_buffer->channels_const_f()[0], capture_nonlocked_.capture_processing_format.num_frames(), capture_nonlocked_.capture_processing_format.sample_rate_hz()); } @@ -1419,7 +1419,7 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { public_submodules_->gain_control->is_enabled() && !constants_.use_experimental_agc_process_before_aec) { private_submodules_->agc_manager->Process( - capture_buffer->split_bands_const(0)[kBand0To8kHz], + capture_buffer->split_bands_const_f(0)[kBand0To8kHz], capture_buffer->num_frames_per_band(), capture_nonlocked_.split_rate); } // TODO(peah): Add reporting from AEC3 whether there is echo. @@ -1484,8 +1484,8 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { capture_.stats.output_rms_dbfs = absl::nullopt; } - capture_output_rms_.Analyze(rtc::ArrayView( - capture_buffer->channels_const()[0], + capture_output_rms_.Analyze(rtc::ArrayView( + capture_buffer->channels_const_f()[0], capture_nonlocked_.capture_processing_format.num_frames())); if (log_rms) { RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak(); diff --git a/modules/audio_processing/echo_control_mobile_impl.cc b/modules/audio_processing/echo_control_mobile_impl.cc index c8084ea39b..982287b0e7 100644 --- a/modules/audio_processing/echo_control_mobile_impl.cc +++ b/modules/audio_processing/echo_control_mobile_impl.cc @@ -56,6 +56,7 @@ AudioProcessing::Error MapError(int err) { return AudioProcessing::kUnspecifiedError; } } + } // namespace struct EchoControlMobileImpl::StreamProperties { @@ -131,7 +132,8 @@ void EchoControlMobileImpl::PackRenderAudioBuffer( size_t num_output_channels, size_t num_channels, std::vector* packed_buffer) { - RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); RTC_DCHECK_EQ(num_channels, audio->num_channels()); // The ordering convention must be followed to pass to the correct AECM. @@ -139,12 +141,14 @@ void EchoControlMobileImpl::PackRenderAudioBuffer( int render_channel = 0; for (size_t i = 0; i < num_output_channels; i++) { for (size_t j = 0; j < audio->num_channels(); j++) { + std::array data_to_buffer; + FloatS16ToS16(audio->split_bands_const_f(render_channel)[kBand0To8kHz], + audio->num_frames_per_band(), data_to_buffer.data()); + // Buffer the samples in the render queue. packed_buffer->insert( - packed_buffer->end(), - audio->split_bands_const(render_channel)[kBand0To8kHz], - (audio->split_bands_const(render_channel)[kBand0To8kHz] + - audio->num_frames_per_band())); + packed_buffer->end(), data_to_buffer.data(), + data_to_buffer.data() + audio->num_frames_per_band()); render_channel = (render_channel + 1) % audio->num_channels(); } } @@ -174,7 +178,21 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio, RTC_DCHECK_LT(capture, low_pass_reference_.size()); const int16_t* noisy = reference_copied_ ? low_pass_reference_[capture].data() : nullptr; - const int16_t* clean = audio->split_bands_const(capture)[kBand0To8kHz]; + + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); + + std::array split_bands_data; + int16_t* split_bands = split_bands_data.data(); + const int16_t* clean = split_bands_data.data(); + if (audio->split_bands_f(capture)[kBand0To8kHz]) { + FloatS16ToS16(audio->split_bands_f(capture)[kBand0To8kHz], + audio->num_frames_per_band(), split_bands_data.data()); + } else { + clean = nullptr; + split_bands = nullptr; + } + if (noisy == NULL) { noisy = clean; clean = NULL; @@ -182,8 +200,13 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio, for (size_t render = 0; render < stream_properties_->num_reverse_channels; ++render) { err = WebRtcAecm_Process(cancellers_[handle_index]->state(), noisy, clean, - audio->split_bands(capture)[kBand0To8kHz], - audio->num_frames_per_band(), stream_delay_ms); + split_bands, audio->num_frames_per_band(), + stream_delay_ms); + + if (split_bands) { + S16ToFloatS16(split_bands, audio->num_frames_per_band(), + audio->split_bands_f(capture)[kBand0To8kHz]); + } if (err != AudioProcessing::kNoError) { return MapError(err); @@ -192,9 +215,9 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio, ++handle_index; } for (size_t band = 1u; band < audio->num_bands(); ++band) { - memset(audio->split_bands(capture)[band], 0, + memset(audio->split_bands_f(capture)[band], 0, audio->num_frames_per_band() * - sizeof(audio->split_bands(capture)[band][0])); + sizeof(audio->split_bands_f(capture)[band][0])); } } return AudioProcessing::kNoError; @@ -204,9 +227,9 @@ void EchoControlMobileImpl::CopyLowPassReference(AudioBuffer* audio) { RTC_DCHECK_LE(audio->num_channels(), low_pass_reference_.size()); reference_copied_ = true; for (size_t capture = 0; capture < audio->num_channels(); ++capture) { - memcpy(low_pass_reference_[capture].data(), - audio->split_bands_const(capture)[kBand0To8kHz], - audio->num_frames_per_band() * sizeof(int16_t)); + FloatS16ToS16(audio->split_bands_const_f(capture)[kBand0To8kHz], + audio->num_frames_per_band(), + low_pass_reference_[capture].data()); } } diff --git a/modules/audio_processing/gain_control_impl.cc b/modules/audio_processing/gain_control_impl.cc index 58559430a1..2fb8a18333 100644 --- a/modules/audio_processing/gain_control_impl.cc +++ b/modules/audio_processing/gain_control_impl.cc @@ -118,25 +118,25 @@ void GainControlImpl::ProcessRenderAudio( void GainControlImpl::PackRenderAudioBuffer( AudioBuffer* audio, std::vector* packed_buffer) { - RTC_DCHECK_GE(160, audio->num_frames_per_band()); - - std::array mixed_low_pass_data; - rtc::ArrayView mixed_low_pass; + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); + std::array mixed_low_pass_data; + rtc::ArrayView mixed_low_pass(mixed_low_pass_data.data(), + audio->num_frames_per_band()); if (audio->num_proc_channels() == 1) { - mixed_low_pass = - rtc::ArrayView(audio->split_bands_const(0)[kBand0To8kHz], - audio->num_frames_per_band()); + FloatS16ToS16(audio->split_bands_const_f(0)[kBand0To8kHz], + audio->num_frames_per_band(), mixed_low_pass_data.data()); } else { const int num_channels = static_cast(audio->num_channels()); for (size_t i = 0; i < audio->num_frames_per_band(); ++i) { - int32_t value = audio->split_channels_const(kBand0To8kHz)[0][i]; + int32_t value = + FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[0][i]); for (int j = 1; j < num_channels; ++j) { - value += audio->split_channels_const(kBand0To8kHz)[j][i]; + value += + FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[j][i]); } mixed_low_pass_data[i] = value / num_channels; } - mixed_low_pass = rtc::ArrayView( - mixed_low_pass_data.data(), audio->num_frames_per_band()); } packed_buffer->clear(); @@ -150,17 +150,28 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { } RTC_DCHECK(num_proc_channels_); - RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); RTC_DCHECK_EQ(audio->num_channels(), *num_proc_channels_); RTC_DCHECK_LE(*num_proc_channels_, gain_controllers_.size()); + int16_t split_band_data[AudioBuffer::kMaxNumBands] + [AudioBuffer::kMaxSplitFrameLength]; + int16_t* split_bands[AudioBuffer::kMaxNumBands] = { + split_band_data[0], split_band_data[1], split_band_data[2]}; + if (mode_ == kAdaptiveAnalog) { int capture_channel = 0; for (auto& gain_controller : gain_controllers_) { gain_controller->set_capture_level(analog_capture_level_); - int err = WebRtcAgc_AddMic( - gain_controller->state(), audio->split_bands(capture_channel), - audio->num_bands(), audio->num_frames_per_band()); + + audio->CopySplitChannelDataTo(capture_channel, split_bands); + + int err = + WebRtcAgc_AddMic(gain_controller->state(), split_bands, + audio->num_bands(), audio->num_frames_per_band()); + + audio->CopySplitChannelDataFrom(capture_channel, split_bands); if (err != AudioProcessing::kNoError) { return AudioProcessing::kUnspecifiedError; @@ -171,10 +182,15 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { int capture_channel = 0; for (auto& gain_controller : gain_controllers_) { int32_t capture_level_out = 0; - int err = WebRtcAgc_VirtualMic( - gain_controller->state(), audio->split_bands(capture_channel), - audio->num_bands(), audio->num_frames_per_band(), - analog_capture_level_, &capture_level_out); + + audio->CopySplitChannelDataTo(capture_channel, split_bands); + + int err = + WebRtcAgc_VirtualMic(gain_controller->state(), split_bands, + audio->num_bands(), audio->num_frames_per_band(), + analog_capture_level_, &capture_level_out); + + audio->CopySplitChannelDataFrom(capture_channel, split_bands); gain_controller->set_capture_level(capture_level_out); @@ -199,7 +215,8 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio, } RTC_DCHECK(num_proc_channels_); - RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); RTC_DCHECK_EQ(audio->num_channels(), *num_proc_channels_); stream_is_saturated_ = false; @@ -208,15 +225,22 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio, int32_t capture_level_out = 0; uint8_t saturation_warning = 0; + int16_t split_band_data[AudioBuffer::kMaxNumBands] + [AudioBuffer::kMaxSplitFrameLength]; + int16_t* split_bands[AudioBuffer::kMaxNumBands] = { + split_band_data[0], split_band_data[1], split_band_data[2]}; + audio->CopySplitChannelDataTo(capture_channel, split_bands); + // The call to stream_has_echo() is ok from a deadlock perspective // as the capture lock is allready held. int err = WebRtcAgc_Process( - gain_controller->state(), audio->split_bands_const(capture_channel), - audio->num_bands(), audio->num_frames_per_band(), - audio->split_bands(capture_channel), + gain_controller->state(), split_bands, audio->num_bands(), + audio->num_frames_per_band(), split_bands, gain_controller->get_capture_level(), &capture_level_out, stream_has_echo, &saturation_warning); + audio->CopySplitChannelDataFrom(capture_channel, split_bands); + if (err != AudioProcessing::kNoError) { return AudioProcessing::kUnspecifiedError; } diff --git a/modules/audio_processing/level_estimator_impl.cc b/modules/audio_processing/level_estimator_impl.cc index 5b49b35fdc..8adbf19bde 100644 --- a/modules/audio_processing/level_estimator_impl.cc +++ b/modules/audio_processing/level_estimator_impl.cc @@ -40,8 +40,8 @@ void LevelEstimatorImpl::ProcessStream(AudioBuffer* audio) { } for (size_t i = 0; i < audio->num_channels(); i++) { - rms_->Analyze(rtc::ArrayView(audio->channels_const()[i], - audio->num_frames())); + rms_->Analyze(rtc::ArrayView(audio->channels_const_f()[i], + audio->num_frames())); } } diff --git a/modules/audio_processing/low_cut_filter.cc b/modules/audio_processing/low_cut_filter.cc index 1ee955d7d7..7398481a6c 100644 --- a/modules/audio_processing/low_cut_filter.cc +++ b/modules/audio_processing/low_cut_filter.cc @@ -96,11 +96,18 @@ LowCutFilter::~LowCutFilter() {} void LowCutFilter::Process(AudioBuffer* audio) { RTC_DCHECK(audio); - RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); RTC_DCHECK_EQ(filters_.size(), audio->num_channels()); for (size_t i = 0; i < filters_.size(); i++) { - filters_[i]->Process(audio->split_bands(i)[kBand0To8kHz], - audio->num_frames_per_band()); + std::array samples_fixed; + FloatS16ToS16(audio->split_bands_f(i)[kBand0To8kHz], + audio->num_frames_per_band(), samples_fixed.data()); + + filters_[i]->Process(samples_fixed.data(), audio->num_frames_per_band()); + + S16ToFloatS16(samples_fixed.data(), audio->num_frames_per_band(), + audio->split_bands_f(i)[kBand0To8kHz]); } } diff --git a/modules/audio_processing/noise_suppression_impl.cc b/modules/audio_processing/noise_suppression_impl.cc index bfaddd989e..c83471750c 100644 --- a/modules/audio_processing/noise_suppression_impl.cc +++ b/modules/audio_processing/noise_suppression_impl.cc @@ -101,8 +101,16 @@ void NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) { WebRtcNs_Process(suppressors_[i]->state(), audio->split_bands_const_f(i), audio->num_bands(), audio->split_bands_f(i)); #elif defined(WEBRTC_NS_FIXED) - WebRtcNsx_Process(suppressors_[i]->state(), audio->split_bands_const(i), - audio->num_bands(), audio->split_bands(i)); + int16_t split_band_data[AudioBuffer::kMaxNumBands] + [AudioBuffer::kMaxSplitFrameLength]; + int16_t* split_bands[AudioBuffer::kMaxNumBands] = { + split_band_data[0], split_band_data[1], split_band_data[2]}; + audio->CopySplitChannelDataTo(i, split_bands); + + WebRtcNsx_Process(suppressors_[i]->state(), split_bands, audio->num_bands(), + split_bands); + + audio->CopySplitChannelDataFrom(i, split_bands); #endif } } diff --git a/modules/audio_processing/rms_level.cc b/modules/audio_processing/rms_level.cc index 63280d1e67..6992a15194 100644 --- a/modules/audio_processing/rms_level.cc +++ b/modules/audio_processing/rms_level.cc @@ -74,6 +74,27 @@ void RmsLevel::Analyze(rtc::ArrayView data) { max_sum_square_ = std::max(max_sum_square_, sum_square); } +void RmsLevel::Analyze(rtc::ArrayView data) { + if (data.empty()) { + return; + } + + CheckBlockSize(data.size()); + + float sum_square = 0.f; + + for (float data_k : data) { + int16_t tmp = + static_cast(std::min(std::max(data_k, -32768.f), 32767.f)); + sum_square += tmp * tmp; + } + RTC_DCHECK_GE(sum_square, 0.f); + sum_square_ += sum_square; + sample_count_ += data.size(); + + max_sum_square_ = std::max(max_sum_square_, sum_square); +} + void RmsLevel::AnalyzeMuted(size_t length) { CheckBlockSize(length); sample_count_ += length; diff --git a/modules/audio_processing/rms_level.h b/modules/audio_processing/rms_level.h index e6b5849ead..8b52b6cebe 100644 --- a/modules/audio_processing/rms_level.h +++ b/modules/audio_processing/rms_level.h @@ -45,6 +45,7 @@ class RmsLevel { // Pass each chunk of audio to Analyze() to accumulate the level. void Analyze(rtc::ArrayView data); + void Analyze(rtc::ArrayView data); // If all samples with the given |length| have a magnitude of zero, this is // a shortcut to avoid some computation. diff --git a/modules/audio_processing/rms_level_unittest.cc b/modules/audio_processing/rms_level_unittest.cc index a1ceaad7fb..daf355d074 100644 --- a/modules/audio_processing/rms_level_unittest.cc +++ b/modules/audio_processing/rms_level_unittest.cc @@ -34,9 +34,18 @@ std::unique_ptr RunTest(rtc::ArrayView input) { return level; } -std::vector CreateSinusoid(int frequency_hz, - int amplitude, - size_t num_samples) { +std::unique_ptr RunTest(rtc::ArrayView input) { + std::unique_ptr level(new RmsLevel); + for (size_t n = 0; n + kBlockSizeSamples <= input.size(); + n += kBlockSizeSamples) { + level->Analyze(input.subview(n, kBlockSizeSamples)); + } + return level; +} + +std::vector CreateInt16Sinusoid(int frequency_hz, + int amplitude, + size_t num_samples) { std::vector x(num_samples); for (size_t n = 0; n < num_samples; ++n) { x[n] = rtc::saturated_cast( @@ -44,16 +53,40 @@ std::vector CreateSinusoid(int frequency_hz, } return x; } + +std::vector CreateFloatSinusoid(int frequency_hz, + int amplitude, + size_t num_samples) { + std::vector x16 = + CreateInt16Sinusoid(frequency_hz, amplitude, num_samples); + std::vector x(x16.size()); + for (size_t n = 0; n < x.size(); ++n) { + x[n] = x16[n]; + } + return x; +} + } // namespace +TEST(RmsLevelTest, VerifyIndentityBetweenFloatAndFix) { + auto x_f = CreateFloatSinusoid(1000, INT16_MAX, kSampleRateHz); + auto x_i = CreateFloatSinusoid(1000, INT16_MAX, kSampleRateHz); + auto level_f = RunTest(x_f); + auto level_i = RunTest(x_i); + int avg_i = level_i->Average(); + int avg_f = level_f->Average(); + EXPECT_EQ(3, avg_i); // -3 dBFS + EXPECT_EQ(avg_f, avg_i); +} + TEST(RmsLevelTest, Run1000HzFullScale) { - auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz); + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); auto level = RunTest(x); EXPECT_EQ(3, level->Average()); // -3 dBFS } TEST(RmsLevelTest, Run1000HzFullScaleAverageAndPeak) { - auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz); + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); auto level = RunTest(x); auto stats = level->AverageAndPeak(); EXPECT_EQ(3, stats.average); // -3 dBFS @@ -61,7 +94,7 @@ TEST(RmsLevelTest, Run1000HzFullScaleAverageAndPeak) { } TEST(RmsLevelTest, Run1000HzHalfScale) { - auto x = CreateSinusoid(1000, INT16_MAX / 2, kSampleRateHz); + auto x = CreateInt16Sinusoid(1000, INT16_MAX / 2, kSampleRateHz); auto level = RunTest(x); EXPECT_EQ(9, level->Average()); // -9 dBFS } @@ -93,14 +126,14 @@ TEST(RmsLevelTest, NoSamplesAverageAndPeak) { } TEST(RmsLevelTest, PollTwice) { - auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz); + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); auto level = RunTest(x); level->Average(); EXPECT_EQ(127, level->Average()); // Stats should be reset at this point. } TEST(RmsLevelTest, Reset) { - auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz); + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); auto level = RunTest(x); level->Reset(); EXPECT_EQ(127, level->Average()); // Stats should be reset at this point. @@ -108,7 +141,7 @@ TEST(RmsLevelTest, Reset) { // Inserts 1 second of full-scale sinusoid, followed by 1 second of muted. TEST(RmsLevelTest, ProcessMuted) { - auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz); + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); auto level = RunTest(x); const size_t kBlocksPerSecond = rtc::CheckedDivExact( static_cast(kSampleRateHz), kBlockSizeSamples); @@ -123,8 +156,8 @@ TEST(RmsLevelTest, ProcessMuted) { // to the vast majority of the signal being half-scale, and the peak to be // -3 dBFS. TEST(RmsLevelTest, RunHalfScaleAndInsertFullScale) { - auto half_scale = CreateSinusoid(1000, INT16_MAX / 2, kSampleRateHz); - auto full_scale = CreateSinusoid(1000, INT16_MAX, kSampleRateHz / 100); + auto half_scale = CreateInt16Sinusoid(1000, INT16_MAX / 2, kSampleRateHz); + auto full_scale = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz / 100); auto x = half_scale; x.insert(x.end(), full_scale.begin(), full_scale.end()); x.insert(x.end(), half_scale.begin(), half_scale.end()); @@ -137,10 +170,10 @@ TEST(RmsLevelTest, RunHalfScaleAndInsertFullScale) { } TEST(RmsLevelTest, ResetOnBlockSizeChange) { - auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz); + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); auto level = RunTest(x); // Create a new signal with half amplitude, but double block length. - auto y = CreateSinusoid(1000, INT16_MAX / 2, kBlockSizeSamples * 2); + auto y = CreateInt16Sinusoid(1000, INT16_MAX / 2, kBlockSizeSamples * 2); level->Analyze(y); auto stats = level->AverageAndPeak(); // Expect all stats to only be influenced by the last signal (y), since the diff --git a/modules/audio_processing/voice_detection_impl.cc b/modules/audio_processing/voice_detection_impl.cc index 0263de4651..3b0eb7c7ca 100644 --- a/modules/audio_processing/voice_detection_impl.cc +++ b/modules/audio_processing/voice_detection_impl.cc @@ -58,24 +58,25 @@ bool VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { rtc::CritScope cs(crit_); RTC_DCHECK(enabled_); - RTC_DCHECK_GE(160, audio->num_frames_per_band()); - std::array mixed_low_pass_data; - rtc::ArrayView mixed_low_pass; + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); + std::array mixed_low_pass_data; + rtc::ArrayView mixed_low_pass(mixed_low_pass_data.data(), + audio->num_frames_per_band()); if (audio->num_proc_channels() == 1) { - mixed_low_pass = - rtc::ArrayView(audio->split_bands_const(0)[kBand0To8kHz], - audio->num_frames_per_band()); + FloatS16ToS16(audio->split_bands_const_f(0)[kBand0To8kHz], + audio->num_frames_per_band(), mixed_low_pass_data.data()); } else { const int num_channels = static_cast(audio->num_channels()); for (size_t i = 0; i < audio->num_frames_per_band(); ++i) { - int32_t value = audio->split_channels_const(kBand0To8kHz)[0][i]; + int32_t value = + FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[0][i]); for (int j = 1; j < num_channels; ++j) { - value += audio->split_channels_const(kBand0To8kHz)[j][i]; + value += + FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[j][i]); } mixed_low_pass_data[i] = value / num_channels; } - mixed_low_pass = rtc::ArrayView( - mixed_low_pass_data.data(), audio->num_frames_per_band()); } int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_, diff --git a/resources/audio_processing/output_data_float.pb.sha1 b/resources/audio_processing/output_data_float.pb.sha1 index e9c065b7e2..c1b6f1a984 100644 --- a/resources/audio_processing/output_data_float.pb.sha1 +++ b/resources/audio_processing/output_data_float.pb.sha1 @@ -1 +1 @@ -bc19d9e9fd9503cad02f3b0c21cbd63ed3c5f22c \ No newline at end of file +d67b879f3b4a31b3c4f3587bd4418be5f9df5105 \ No newline at end of file