diff --git a/common_audio/audio_util.cc b/common_audio/audio_util.cc index eb132ca633..b1e4d9ac3c 100644 --- a/common_audio/audio_util.cc +++ b/common_audio/audio_util.cc @@ -12,11 +12,21 @@ namespace webrtc { +void FloatToS16(const float* src, size_t size, int16_t* dest) { + for (size_t i = 0; i < size; ++i) + dest[i] = FloatToS16(src[i]); +} + void S16ToFloat(const int16_t* src, size_t size, float* dest) { for (size_t i = 0; i < size; ++i) dest[i] = S16ToFloat(src[i]); } +void S16ToFloatS16(const int16_t* src, size_t size, float* dest) { + for (size_t i = 0; i < size; ++i) + dest[i] = src[i]; +} + void FloatS16ToS16(const float* src, size_t size, int16_t* dest) { for (size_t i = 0; i < size; ++i) dest[i] = FloatS16ToS16(src[i]); diff --git a/common_audio/include/audio_util.h b/common_audio/include/audio_util.h index 9925e54b02..f6b6bfdcd6 100644 --- a/common_audio/include/audio_util.h +++ b/common_audio/include/audio_util.h @@ -43,6 +43,13 @@ static inline int16_t FloatS16ToS16(float v) { return static_cast(v + std::copysign(0.5f, v)); } +static inline int16_t FloatToS16(float v) { + v *= 32768.f; + v = std::min(v, 32767.f); + v = std::max(v, -32768.f); + return static_cast(v + std::copysign(0.5f, v)); +} + static inline float FloatToFloatS16(float v) { v = std::min(v, 1.f); v = std::max(v, -1.f); @@ -56,7 +63,9 @@ static inline float FloatS16ToFloat(float v) { return v * kScaling; } +void FloatToS16(const float* src, size_t size, int16_t* dest); void S16ToFloat(const int16_t* src, size_t size, float* dest); +void S16ToFloatS16(const int16_t* src, size_t size, float* dest); void FloatS16ToS16(const float* src, size_t size, int16_t* dest); void FloatToFloatS16(const float* src, size_t size, float* dest); void FloatS16ToFloat(const float* src, size_t size, float* dest); diff --git a/common_audio/resampler/push_sinc_resampler_unittest.cc b/common_audio/resampler/push_sinc_resampler_unittest.cc index f9943b3cc8..dc7cdec141 100644 --- a/common_audio/resampler/push_sinc_resampler_unittest.cc +++ b/common_audio/resampler/push_sinc_resampler_unittest.cc @@ -33,18 +33,6 @@ T DBFS(T x) { return 20 * std::log10(x); } -void FloatToS16(const float* src, size_t size, int16_t* dest) { - for (size_t i = 0; i < size; ++i) { - RTC_DCHECK_GE(32767.f, src[i]); - RTC_DCHECK_LE(-32768.f, src[i]); - if (src[i] >= 1.f) - dest[i] = 32767; - if (src[i] <= -1.f) - dest[i] = -32768; - dest[i] = static_cast(src[i] * 32767.5f); - } -} - } // namespace class PushSincResamplerTest : public ::testing::TestWithParam< diff --git a/modules/audio_processing/agc/BUILD.gn b/modules/audio_processing/agc/BUILD.gn index 69db73dbdf..09781b2428 100644 --- a/modules/audio_processing/agc/BUILD.gn +++ b/modules/audio_processing/agc/BUILD.gn @@ -19,6 +19,8 @@ rtc_source_set("agc") { ":level_estimation", "..:apm_logging", "..:gain_control_interface", + "../../../common_audio", + "../../../common_audio:common_audio_c", "../../../rtc_base:checks", "../../../rtc_base:gtest_prod", "../../../rtc_base:logging", diff --git a/modules/audio_processing/agc/agc_manager_direct.cc b/modules/audio_processing/agc/agc_manager_direct.cc index 5c4deeccbf..4b0de78cb5 100644 --- a/modules/audio_processing/agc/agc_manager_direct.cc +++ b/modules/audio_processing/agc/agc_manager_direct.cc @@ -17,6 +17,7 @@ #include #endif +#include "common_audio/include/audio_util.h" #include "modules/audio_processing/agc/gain_map_internal.h" #include "modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.h" #include "modules/audio_processing/include/gain_control.h" @@ -59,6 +60,10 @@ const int kMaxResidualGainChange = 15; // restrictions from clipping events. const int kSurplusCompressionGain = 6; +// Maximum number of channels and number of samples per channel supported. +constexpr size_t kMaxNumSamplesPerChannel = 1920; +constexpr size_t kMaxNumChannels = 4; + int ClampLevel(int mic_level) { return rtc::SafeClamp(mic_level, kMinMicLevel, kMaxMicLevel); } @@ -220,7 +225,7 @@ int AgcManagerDirect::Initialize() { return InitializeGainControl(gctrl_, disable_digital_adaptive_); } -void AgcManagerDirect::AnalyzePreProcess(int16_t* audio, +void AgcManagerDirect::AnalyzePreProcess(float* audio, int num_channels, size_t samples_per_channel) { size_t length = num_channels * samples_per_channel; @@ -228,7 +233,19 @@ void AgcManagerDirect::AnalyzePreProcess(int16_t* audio, return; } - file_preproc_->Write(audio, length); + std::array audio_data; + int16_t* audio_fix; + size_t safe_length; + if (audio) { + audio_fix = audio_data.data(); + safe_length = std::min(audio_data.size(), length); + FloatS16ToS16(audio, length, audio_fix); + } else { + audio_fix = nullptr; + safe_length = length; + } + + file_preproc_->Write(audio_fix, safe_length); if (frames_since_clipped_ < kClippedWaitFrames) { ++frames_since_clipped_; @@ -244,7 +261,7 @@ void AgcManagerDirect::AnalyzePreProcess(int16_t* audio, // maximum. This harsh treatment is an effort to avoid repeated clipped echo // events. As compensation for this restriction, the maximum compression // gain is increased, through SetMaxLevel(). - float clipped_ratio = agc_->AnalyzePreproc(audio, length); + float clipped_ratio = agc_->AnalyzePreproc(audio_fix, safe_length); if (clipped_ratio > kClippedRatioThreshold) { RTC_DLOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio=" << clipped_ratio; @@ -263,15 +280,31 @@ void AgcManagerDirect::AnalyzePreProcess(int16_t* audio, } frames_since_clipped_ = 0; } + + if (audio) { + S16ToFloatS16(audio_fix, safe_length, audio); + } } -void AgcManagerDirect::Process(const int16_t* audio, +void AgcManagerDirect::Process(const float* audio, size_t length, int sample_rate_hz) { if (capture_muted_) { return; } + std::array audio_data; + const int16_t* audio_fix; + size_t safe_length; + if (audio) { + audio_fix = audio_data.data(); + safe_length = std::min(audio_data.size(), length); + FloatS16ToS16(audio, length, audio_data.data()); + } else { + audio_fix = nullptr; + safe_length = length; + } + if (check_volume_on_next_process_) { check_volume_on_next_process_ = false; // We have to wait until the first process call to check the volume, @@ -279,14 +312,14 @@ void AgcManagerDirect::Process(const int16_t* audio, CheckVolumeAndReset(); } - agc_->Process(audio, length, sample_rate_hz); + agc_->Process(audio_fix, safe_length, sample_rate_hz); UpdateGain(); if (!disable_digital_adaptive_) { UpdateCompressor(); } - file_postproc_->Write(audio, length); + file_postproc_->Write(audio_fix, safe_length); data_dumper_->DumpRaw("experimental_gain_control_compression_gain_db", 1, &compression_); diff --git a/modules/audio_processing/agc/agc_manager_direct.h b/modules/audio_processing/agc/agc_manager_direct.h index 8c9fc4da9c..a637fca9a5 100644 --- a/modules/audio_processing/agc/agc_manager_direct.h +++ b/modules/audio_processing/agc/agc_manager_direct.h @@ -56,10 +56,10 @@ class AgcManagerDirect final { ~AgcManagerDirect(); int Initialize(); - void AnalyzePreProcess(int16_t* audio, + void AnalyzePreProcess(float* audio, int num_channels, size_t samples_per_channel); - void Process(const int16_t* audio, size_t length, int sample_rate_hz); + void Process(const float* audio, size_t length, int sample_rate_hz); // Call when the capture stream has been muted/unmuted. This causes the // manager to disregard all incoming audio; chances are good it's background diff --git a/modules/audio_processing/audio_buffer.cc b/modules/audio_processing/audio_buffer.cc index 584111c29a..32668fa079 100644 --- a/modules/audio_processing/audio_buffer.cc +++ b/modules/audio_processing/audio_buffer.cc @@ -169,29 +169,11 @@ void AudioBuffer::InitForNewData() { } } -const int16_t* const* AudioBuffer::channels_const() const { - return data_->ibuf_const()->channels(); -} - -int16_t* const* AudioBuffer::channels() { - return data_->ibuf()->channels(); -} - -const int16_t* const* AudioBuffer::split_bands_const(size_t channel) const { - return split_data_.get() ? split_data_->ibuf_const()->bands(channel) - : data_->ibuf_const()->bands(channel); -} - -int16_t* const* AudioBuffer::split_bands(size_t channel) { - return split_data_.get() ? split_data_->ibuf()->bands(channel) - : data_->ibuf()->bands(channel); -} - -const int16_t* const* AudioBuffer::split_channels_const(Band band) const { +const float* const* AudioBuffer::split_channels_const_f(Band band) const { if (split_data_.get()) { - return split_data_->ibuf_const()->channels(band); + return split_data_->fbuf_const()->channels(band); } else { - return band == kBand0To8kHz ? data_->ibuf_const()->channels() : nullptr; + return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr; } } @@ -308,4 +290,29 @@ void AudioBuffer::MergeFrequencyBands() { splitting_filter_->Synthesis(split_data_.get(), data_.get()); } +void AudioBuffer::CopySplitChannelDataTo(size_t channel, + int16_t* const* split_band_data) { + for (size_t k = 0; k < num_bands(); ++k) { + const float* band_data = split_bands_f(channel)[k]; + RTC_DCHECK(split_band_data[k]); + RTC_DCHECK(band_data); + for (size_t i = 0; i < num_frames_per_band(); ++i) { + split_band_data[k][i] = FloatS16ToS16(band_data[i]); + } + } +} + +void AudioBuffer::CopySplitChannelDataFrom( + size_t channel, + const int16_t* const* split_band_data) { + for (size_t k = 0; k < num_bands(); ++k) { + float* band_data = split_bands_f(channel)[k]; + RTC_DCHECK(split_band_data[k]); + RTC_DCHECK(band_data); + for (size_t i = 0; i < num_frames_per_band(); ++i) { + band_data[i] = split_band_data[k][i]; + } + } +} + } // namespace webrtc diff --git a/modules/audio_processing/audio_buffer.h b/modules/audio_processing/audio_buffer.h index c1bfb63673..16d5616a46 100644 --- a/modules/audio_processing/audio_buffer.h +++ b/modules/audio_processing/audio_buffer.h @@ -52,8 +52,6 @@ class AudioBuffer { // Where: // 0 <= channel < |num_proc_channels_| // 0 <= sample < |proc_num_frames_| - int16_t* const* channels(); - const int16_t* const* channels_const() const; float* const* channels_f(); const float* const* channels_const_f() const; @@ -64,8 +62,6 @@ class AudioBuffer { // 0 <= channel < |num_proc_channels_| // 0 <= band < |num_bands_| // 0 <= sample < |num_split_frames_| - int16_t* const* split_bands(size_t channel); - const int16_t* const* split_bands_const(size_t channel) const; float* const* split_bands_f(size_t channel); const float* const* split_bands_const_f(size_t channel) const; @@ -76,7 +72,7 @@ class AudioBuffer { // 0 <= band < |num_bands_| // 0 <= channel < |num_proc_channels_| // 0 <= sample < |num_split_frames_| - const int16_t* const* split_channels_const(Band band) const; + const float* const* split_channels_const_f(Band band) const; // Use for int16 interleaved data. void DeinterleaveFrom(const AudioFrame* audioFrame); @@ -93,6 +89,17 @@ class AudioBuffer { // Recombine the different bands into one signal. void MergeFrequencyBands(); + // Copies the split bands data into the integer two-dimensional array. + void CopySplitChannelDataTo(size_t channel, int16_t* const* split_band_data); + + // Copies the data in the integer two-dimensional array into the split_bands + // data. + void CopySplitChannelDataFrom(size_t channel, + const int16_t* const* split_band_data); + + static const size_t kMaxSplitFrameLength = 160; + static const size_t kMaxNumBands = 3; + private: FRIEND_TEST_ALL_PREFIXES(AudioBufferTest, SetNumChannelsSetsChannelBuffersNumChannels); diff --git a/modules/audio_processing/audio_frame_view_unittest.cc b/modules/audio_processing/audio_frame_view_unittest.cc index 7a9d126103..70b63b1cb3 100644 --- a/modules/audio_processing/audio_frame_view_unittest.cc +++ b/modules/audio_processing/audio_frame_view_unittest.cc @@ -43,9 +43,9 @@ TEST(AudioFrameTest, ConstructFromAudioBuffer) { // But not the other way. The following will fail: // non_const_view = other_const_view; - AudioFrameView non_const_int16_view( - buffer.channels(), buffer.num_channels(), buffer.num_frames()); - non_const_int16_view.channel(0)[0] = kIntConstant; - EXPECT_EQ(buffer.channels()[0][0], kIntConstant); + AudioFrameView non_const_float_view( + buffer.channels_f(), buffer.num_channels(), buffer.num_frames()); + non_const_float_view.channel(0)[0] = kIntConstant; + EXPECT_EQ(buffer.channels_f()[0][0], kIntConstant); } } // namespace webrtc diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index 804802f7a7..fc463c1dbb 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -1279,8 +1279,8 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { capture_buffer->num_frames())); } - capture_input_rms_.Analyze(rtc::ArrayView( - capture_buffer->channels_const()[0], + capture_input_rms_.Analyze(rtc::ArrayView( + capture_buffer->channels_const_f()[0], capture_nonlocked_.capture_processing_format.num_frames())); const bool log_rms = ++capture_rms_interval_counter_ >= 1000; if (log_rms) { @@ -1323,12 +1323,12 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { if (constants_.use_experimental_agc && public_submodules_->gain_control->is_enabled()) { private_submodules_->agc_manager->AnalyzePreProcess( - capture_buffer->channels()[0], capture_buffer->num_channels(), + capture_buffer->channels_f()[0], capture_buffer->num_channels(), capture_nonlocked_.capture_processing_format.num_frames()); if (constants_.use_experimental_agc_process_before_aec) { private_submodules_->agc_manager->Process( - capture_buffer->channels()[0], + capture_buffer->channels_const_f()[0], capture_nonlocked_.capture_processing_format.num_frames(), capture_nonlocked_.capture_processing_format.sample_rate_hz()); } @@ -1419,7 +1419,7 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { public_submodules_->gain_control->is_enabled() && !constants_.use_experimental_agc_process_before_aec) { private_submodules_->agc_manager->Process( - capture_buffer->split_bands_const(0)[kBand0To8kHz], + capture_buffer->split_bands_const_f(0)[kBand0To8kHz], capture_buffer->num_frames_per_band(), capture_nonlocked_.split_rate); } // TODO(peah): Add reporting from AEC3 whether there is echo. @@ -1484,8 +1484,8 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { capture_.stats.output_rms_dbfs = absl::nullopt; } - capture_output_rms_.Analyze(rtc::ArrayView( - capture_buffer->channels_const()[0], + capture_output_rms_.Analyze(rtc::ArrayView( + capture_buffer->channels_const_f()[0], capture_nonlocked_.capture_processing_format.num_frames())); if (log_rms) { RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak(); diff --git a/modules/audio_processing/echo_control_mobile_impl.cc b/modules/audio_processing/echo_control_mobile_impl.cc index c8084ea39b..982287b0e7 100644 --- a/modules/audio_processing/echo_control_mobile_impl.cc +++ b/modules/audio_processing/echo_control_mobile_impl.cc @@ -56,6 +56,7 @@ AudioProcessing::Error MapError(int err) { return AudioProcessing::kUnspecifiedError; } } + } // namespace struct EchoControlMobileImpl::StreamProperties { @@ -131,7 +132,8 @@ void EchoControlMobileImpl::PackRenderAudioBuffer( size_t num_output_channels, size_t num_channels, std::vector* packed_buffer) { - RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); RTC_DCHECK_EQ(num_channels, audio->num_channels()); // The ordering convention must be followed to pass to the correct AECM. @@ -139,12 +141,14 @@ void EchoControlMobileImpl::PackRenderAudioBuffer( int render_channel = 0; for (size_t i = 0; i < num_output_channels; i++) { for (size_t j = 0; j < audio->num_channels(); j++) { + std::array data_to_buffer; + FloatS16ToS16(audio->split_bands_const_f(render_channel)[kBand0To8kHz], + audio->num_frames_per_band(), data_to_buffer.data()); + // Buffer the samples in the render queue. packed_buffer->insert( - packed_buffer->end(), - audio->split_bands_const(render_channel)[kBand0To8kHz], - (audio->split_bands_const(render_channel)[kBand0To8kHz] + - audio->num_frames_per_band())); + packed_buffer->end(), data_to_buffer.data(), + data_to_buffer.data() + audio->num_frames_per_band()); render_channel = (render_channel + 1) % audio->num_channels(); } } @@ -174,7 +178,21 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio, RTC_DCHECK_LT(capture, low_pass_reference_.size()); const int16_t* noisy = reference_copied_ ? low_pass_reference_[capture].data() : nullptr; - const int16_t* clean = audio->split_bands_const(capture)[kBand0To8kHz]; + + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); + + std::array split_bands_data; + int16_t* split_bands = split_bands_data.data(); + const int16_t* clean = split_bands_data.data(); + if (audio->split_bands_f(capture)[kBand0To8kHz]) { + FloatS16ToS16(audio->split_bands_f(capture)[kBand0To8kHz], + audio->num_frames_per_band(), split_bands_data.data()); + } else { + clean = nullptr; + split_bands = nullptr; + } + if (noisy == NULL) { noisy = clean; clean = NULL; @@ -182,8 +200,13 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio, for (size_t render = 0; render < stream_properties_->num_reverse_channels; ++render) { err = WebRtcAecm_Process(cancellers_[handle_index]->state(), noisy, clean, - audio->split_bands(capture)[kBand0To8kHz], - audio->num_frames_per_band(), stream_delay_ms); + split_bands, audio->num_frames_per_band(), + stream_delay_ms); + + if (split_bands) { + S16ToFloatS16(split_bands, audio->num_frames_per_band(), + audio->split_bands_f(capture)[kBand0To8kHz]); + } if (err != AudioProcessing::kNoError) { return MapError(err); @@ -192,9 +215,9 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio, ++handle_index; } for (size_t band = 1u; band < audio->num_bands(); ++band) { - memset(audio->split_bands(capture)[band], 0, + memset(audio->split_bands_f(capture)[band], 0, audio->num_frames_per_band() * - sizeof(audio->split_bands(capture)[band][0])); + sizeof(audio->split_bands_f(capture)[band][0])); } } return AudioProcessing::kNoError; @@ -204,9 +227,9 @@ void EchoControlMobileImpl::CopyLowPassReference(AudioBuffer* audio) { RTC_DCHECK_LE(audio->num_channels(), low_pass_reference_.size()); reference_copied_ = true; for (size_t capture = 0; capture < audio->num_channels(); ++capture) { - memcpy(low_pass_reference_[capture].data(), - audio->split_bands_const(capture)[kBand0To8kHz], - audio->num_frames_per_band() * sizeof(int16_t)); + FloatS16ToS16(audio->split_bands_const_f(capture)[kBand0To8kHz], + audio->num_frames_per_band(), + low_pass_reference_[capture].data()); } } diff --git a/modules/audio_processing/gain_control_impl.cc b/modules/audio_processing/gain_control_impl.cc index 58559430a1..2fb8a18333 100644 --- a/modules/audio_processing/gain_control_impl.cc +++ b/modules/audio_processing/gain_control_impl.cc @@ -118,25 +118,25 @@ void GainControlImpl::ProcessRenderAudio( void GainControlImpl::PackRenderAudioBuffer( AudioBuffer* audio, std::vector* packed_buffer) { - RTC_DCHECK_GE(160, audio->num_frames_per_band()); - - std::array mixed_low_pass_data; - rtc::ArrayView mixed_low_pass; + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); + std::array mixed_low_pass_data; + rtc::ArrayView mixed_low_pass(mixed_low_pass_data.data(), + audio->num_frames_per_band()); if (audio->num_proc_channels() == 1) { - mixed_low_pass = - rtc::ArrayView(audio->split_bands_const(0)[kBand0To8kHz], - audio->num_frames_per_band()); + FloatS16ToS16(audio->split_bands_const_f(0)[kBand0To8kHz], + audio->num_frames_per_band(), mixed_low_pass_data.data()); } else { const int num_channels = static_cast(audio->num_channels()); for (size_t i = 0; i < audio->num_frames_per_band(); ++i) { - int32_t value = audio->split_channels_const(kBand0To8kHz)[0][i]; + int32_t value = + FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[0][i]); for (int j = 1; j < num_channels; ++j) { - value += audio->split_channels_const(kBand0To8kHz)[j][i]; + value += + FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[j][i]); } mixed_low_pass_data[i] = value / num_channels; } - mixed_low_pass = rtc::ArrayView( - mixed_low_pass_data.data(), audio->num_frames_per_band()); } packed_buffer->clear(); @@ -150,17 +150,28 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { } RTC_DCHECK(num_proc_channels_); - RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); RTC_DCHECK_EQ(audio->num_channels(), *num_proc_channels_); RTC_DCHECK_LE(*num_proc_channels_, gain_controllers_.size()); + int16_t split_band_data[AudioBuffer::kMaxNumBands] + [AudioBuffer::kMaxSplitFrameLength]; + int16_t* split_bands[AudioBuffer::kMaxNumBands] = { + split_band_data[0], split_band_data[1], split_band_data[2]}; + if (mode_ == kAdaptiveAnalog) { int capture_channel = 0; for (auto& gain_controller : gain_controllers_) { gain_controller->set_capture_level(analog_capture_level_); - int err = WebRtcAgc_AddMic( - gain_controller->state(), audio->split_bands(capture_channel), - audio->num_bands(), audio->num_frames_per_band()); + + audio->CopySplitChannelDataTo(capture_channel, split_bands); + + int err = + WebRtcAgc_AddMic(gain_controller->state(), split_bands, + audio->num_bands(), audio->num_frames_per_band()); + + audio->CopySplitChannelDataFrom(capture_channel, split_bands); if (err != AudioProcessing::kNoError) { return AudioProcessing::kUnspecifiedError; @@ -171,10 +182,15 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { int capture_channel = 0; for (auto& gain_controller : gain_controllers_) { int32_t capture_level_out = 0; - int err = WebRtcAgc_VirtualMic( - gain_controller->state(), audio->split_bands(capture_channel), - audio->num_bands(), audio->num_frames_per_band(), - analog_capture_level_, &capture_level_out); + + audio->CopySplitChannelDataTo(capture_channel, split_bands); + + int err = + WebRtcAgc_VirtualMic(gain_controller->state(), split_bands, + audio->num_bands(), audio->num_frames_per_band(), + analog_capture_level_, &capture_level_out); + + audio->CopySplitChannelDataFrom(capture_channel, split_bands); gain_controller->set_capture_level(capture_level_out); @@ -199,7 +215,8 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio, } RTC_DCHECK(num_proc_channels_); - RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); RTC_DCHECK_EQ(audio->num_channels(), *num_proc_channels_); stream_is_saturated_ = false; @@ -208,15 +225,22 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio, int32_t capture_level_out = 0; uint8_t saturation_warning = 0; + int16_t split_band_data[AudioBuffer::kMaxNumBands] + [AudioBuffer::kMaxSplitFrameLength]; + int16_t* split_bands[AudioBuffer::kMaxNumBands] = { + split_band_data[0], split_band_data[1], split_band_data[2]}; + audio->CopySplitChannelDataTo(capture_channel, split_bands); + // The call to stream_has_echo() is ok from a deadlock perspective // as the capture lock is allready held. int err = WebRtcAgc_Process( - gain_controller->state(), audio->split_bands_const(capture_channel), - audio->num_bands(), audio->num_frames_per_band(), - audio->split_bands(capture_channel), + gain_controller->state(), split_bands, audio->num_bands(), + audio->num_frames_per_band(), split_bands, gain_controller->get_capture_level(), &capture_level_out, stream_has_echo, &saturation_warning); + audio->CopySplitChannelDataFrom(capture_channel, split_bands); + if (err != AudioProcessing::kNoError) { return AudioProcessing::kUnspecifiedError; } diff --git a/modules/audio_processing/level_estimator_impl.cc b/modules/audio_processing/level_estimator_impl.cc index 5b49b35fdc..8adbf19bde 100644 --- a/modules/audio_processing/level_estimator_impl.cc +++ b/modules/audio_processing/level_estimator_impl.cc @@ -40,8 +40,8 @@ void LevelEstimatorImpl::ProcessStream(AudioBuffer* audio) { } for (size_t i = 0; i < audio->num_channels(); i++) { - rms_->Analyze(rtc::ArrayView(audio->channels_const()[i], - audio->num_frames())); + rms_->Analyze(rtc::ArrayView(audio->channels_const_f()[i], + audio->num_frames())); } } diff --git a/modules/audio_processing/low_cut_filter.cc b/modules/audio_processing/low_cut_filter.cc index 1ee955d7d7..7398481a6c 100644 --- a/modules/audio_processing/low_cut_filter.cc +++ b/modules/audio_processing/low_cut_filter.cc @@ -96,11 +96,18 @@ LowCutFilter::~LowCutFilter() {} void LowCutFilter::Process(AudioBuffer* audio) { RTC_DCHECK(audio); - RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); RTC_DCHECK_EQ(filters_.size(), audio->num_channels()); for (size_t i = 0; i < filters_.size(); i++) { - filters_[i]->Process(audio->split_bands(i)[kBand0To8kHz], - audio->num_frames_per_band()); + std::array samples_fixed; + FloatS16ToS16(audio->split_bands_f(i)[kBand0To8kHz], + audio->num_frames_per_band(), samples_fixed.data()); + + filters_[i]->Process(samples_fixed.data(), audio->num_frames_per_band()); + + S16ToFloatS16(samples_fixed.data(), audio->num_frames_per_band(), + audio->split_bands_f(i)[kBand0To8kHz]); } } diff --git a/modules/audio_processing/noise_suppression_impl.cc b/modules/audio_processing/noise_suppression_impl.cc index bfaddd989e..c83471750c 100644 --- a/modules/audio_processing/noise_suppression_impl.cc +++ b/modules/audio_processing/noise_suppression_impl.cc @@ -101,8 +101,16 @@ void NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) { WebRtcNs_Process(suppressors_[i]->state(), audio->split_bands_const_f(i), audio->num_bands(), audio->split_bands_f(i)); #elif defined(WEBRTC_NS_FIXED) - WebRtcNsx_Process(suppressors_[i]->state(), audio->split_bands_const(i), - audio->num_bands(), audio->split_bands(i)); + int16_t split_band_data[AudioBuffer::kMaxNumBands] + [AudioBuffer::kMaxSplitFrameLength]; + int16_t* split_bands[AudioBuffer::kMaxNumBands] = { + split_band_data[0], split_band_data[1], split_band_data[2]}; + audio->CopySplitChannelDataTo(i, split_bands); + + WebRtcNsx_Process(suppressors_[i]->state(), split_bands, audio->num_bands(), + split_bands); + + audio->CopySplitChannelDataFrom(i, split_bands); #endif } } diff --git a/modules/audio_processing/rms_level.cc b/modules/audio_processing/rms_level.cc index 63280d1e67..6992a15194 100644 --- a/modules/audio_processing/rms_level.cc +++ b/modules/audio_processing/rms_level.cc @@ -74,6 +74,27 @@ void RmsLevel::Analyze(rtc::ArrayView data) { max_sum_square_ = std::max(max_sum_square_, sum_square); } +void RmsLevel::Analyze(rtc::ArrayView data) { + if (data.empty()) { + return; + } + + CheckBlockSize(data.size()); + + float sum_square = 0.f; + + for (float data_k : data) { + int16_t tmp = + static_cast(std::min(std::max(data_k, -32768.f), 32767.f)); + sum_square += tmp * tmp; + } + RTC_DCHECK_GE(sum_square, 0.f); + sum_square_ += sum_square; + sample_count_ += data.size(); + + max_sum_square_ = std::max(max_sum_square_, sum_square); +} + void RmsLevel::AnalyzeMuted(size_t length) { CheckBlockSize(length); sample_count_ += length; diff --git a/modules/audio_processing/rms_level.h b/modules/audio_processing/rms_level.h index e6b5849ead..8b52b6cebe 100644 --- a/modules/audio_processing/rms_level.h +++ b/modules/audio_processing/rms_level.h @@ -45,6 +45,7 @@ class RmsLevel { // Pass each chunk of audio to Analyze() to accumulate the level. void Analyze(rtc::ArrayView data); + void Analyze(rtc::ArrayView data); // If all samples with the given |length| have a magnitude of zero, this is // a shortcut to avoid some computation. diff --git a/modules/audio_processing/rms_level_unittest.cc b/modules/audio_processing/rms_level_unittest.cc index a1ceaad7fb..daf355d074 100644 --- a/modules/audio_processing/rms_level_unittest.cc +++ b/modules/audio_processing/rms_level_unittest.cc @@ -34,9 +34,18 @@ std::unique_ptr RunTest(rtc::ArrayView input) { return level; } -std::vector CreateSinusoid(int frequency_hz, - int amplitude, - size_t num_samples) { +std::unique_ptr RunTest(rtc::ArrayView input) { + std::unique_ptr level(new RmsLevel); + for (size_t n = 0; n + kBlockSizeSamples <= input.size(); + n += kBlockSizeSamples) { + level->Analyze(input.subview(n, kBlockSizeSamples)); + } + return level; +} + +std::vector CreateInt16Sinusoid(int frequency_hz, + int amplitude, + size_t num_samples) { std::vector x(num_samples); for (size_t n = 0; n < num_samples; ++n) { x[n] = rtc::saturated_cast( @@ -44,16 +53,40 @@ std::vector CreateSinusoid(int frequency_hz, } return x; } + +std::vector CreateFloatSinusoid(int frequency_hz, + int amplitude, + size_t num_samples) { + std::vector x16 = + CreateInt16Sinusoid(frequency_hz, amplitude, num_samples); + std::vector x(x16.size()); + for (size_t n = 0; n < x.size(); ++n) { + x[n] = x16[n]; + } + return x; +} + } // namespace +TEST(RmsLevelTest, VerifyIndentityBetweenFloatAndFix) { + auto x_f = CreateFloatSinusoid(1000, INT16_MAX, kSampleRateHz); + auto x_i = CreateFloatSinusoid(1000, INT16_MAX, kSampleRateHz); + auto level_f = RunTest(x_f); + auto level_i = RunTest(x_i); + int avg_i = level_i->Average(); + int avg_f = level_f->Average(); + EXPECT_EQ(3, avg_i); // -3 dBFS + EXPECT_EQ(avg_f, avg_i); +} + TEST(RmsLevelTest, Run1000HzFullScale) { - auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz); + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); auto level = RunTest(x); EXPECT_EQ(3, level->Average()); // -3 dBFS } TEST(RmsLevelTest, Run1000HzFullScaleAverageAndPeak) { - auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz); + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); auto level = RunTest(x); auto stats = level->AverageAndPeak(); EXPECT_EQ(3, stats.average); // -3 dBFS @@ -61,7 +94,7 @@ TEST(RmsLevelTest, Run1000HzFullScaleAverageAndPeak) { } TEST(RmsLevelTest, Run1000HzHalfScale) { - auto x = CreateSinusoid(1000, INT16_MAX / 2, kSampleRateHz); + auto x = CreateInt16Sinusoid(1000, INT16_MAX / 2, kSampleRateHz); auto level = RunTest(x); EXPECT_EQ(9, level->Average()); // -9 dBFS } @@ -93,14 +126,14 @@ TEST(RmsLevelTest, NoSamplesAverageAndPeak) { } TEST(RmsLevelTest, PollTwice) { - auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz); + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); auto level = RunTest(x); level->Average(); EXPECT_EQ(127, level->Average()); // Stats should be reset at this point. } TEST(RmsLevelTest, Reset) { - auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz); + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); auto level = RunTest(x); level->Reset(); EXPECT_EQ(127, level->Average()); // Stats should be reset at this point. @@ -108,7 +141,7 @@ TEST(RmsLevelTest, Reset) { // Inserts 1 second of full-scale sinusoid, followed by 1 second of muted. TEST(RmsLevelTest, ProcessMuted) { - auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz); + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); auto level = RunTest(x); const size_t kBlocksPerSecond = rtc::CheckedDivExact( static_cast(kSampleRateHz), kBlockSizeSamples); @@ -123,8 +156,8 @@ TEST(RmsLevelTest, ProcessMuted) { // to the vast majority of the signal being half-scale, and the peak to be // -3 dBFS. TEST(RmsLevelTest, RunHalfScaleAndInsertFullScale) { - auto half_scale = CreateSinusoid(1000, INT16_MAX / 2, kSampleRateHz); - auto full_scale = CreateSinusoid(1000, INT16_MAX, kSampleRateHz / 100); + auto half_scale = CreateInt16Sinusoid(1000, INT16_MAX / 2, kSampleRateHz); + auto full_scale = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz / 100); auto x = half_scale; x.insert(x.end(), full_scale.begin(), full_scale.end()); x.insert(x.end(), half_scale.begin(), half_scale.end()); @@ -137,10 +170,10 @@ TEST(RmsLevelTest, RunHalfScaleAndInsertFullScale) { } TEST(RmsLevelTest, ResetOnBlockSizeChange) { - auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz); + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); auto level = RunTest(x); // Create a new signal with half amplitude, but double block length. - auto y = CreateSinusoid(1000, INT16_MAX / 2, kBlockSizeSamples * 2); + auto y = CreateInt16Sinusoid(1000, INT16_MAX / 2, kBlockSizeSamples * 2); level->Analyze(y); auto stats = level->AverageAndPeak(); // Expect all stats to only be influenced by the last signal (y), since the diff --git a/modules/audio_processing/voice_detection_impl.cc b/modules/audio_processing/voice_detection_impl.cc index 0263de4651..3b0eb7c7ca 100644 --- a/modules/audio_processing/voice_detection_impl.cc +++ b/modules/audio_processing/voice_detection_impl.cc @@ -58,24 +58,25 @@ bool VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { rtc::CritScope cs(crit_); RTC_DCHECK(enabled_); - RTC_DCHECK_GE(160, audio->num_frames_per_band()); - std::array mixed_low_pass_data; - rtc::ArrayView mixed_low_pass; + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); + std::array mixed_low_pass_data; + rtc::ArrayView mixed_low_pass(mixed_low_pass_data.data(), + audio->num_frames_per_band()); if (audio->num_proc_channels() == 1) { - mixed_low_pass = - rtc::ArrayView(audio->split_bands_const(0)[kBand0To8kHz], - audio->num_frames_per_band()); + FloatS16ToS16(audio->split_bands_const_f(0)[kBand0To8kHz], + audio->num_frames_per_band(), mixed_low_pass_data.data()); } else { const int num_channels = static_cast(audio->num_channels()); for (size_t i = 0; i < audio->num_frames_per_band(); ++i) { - int32_t value = audio->split_channels_const(kBand0To8kHz)[0][i]; + int32_t value = + FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[0][i]); for (int j = 1; j < num_channels; ++j) { - value += audio->split_channels_const(kBand0To8kHz)[j][i]; + value += + FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[j][i]); } mixed_low_pass_data[i] = value / num_channels; } - mixed_low_pass = rtc::ArrayView( - mixed_low_pass_data.data(), audio->num_frames_per_band()); } int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_, diff --git a/resources/audio_processing/output_data_float.pb.sha1 b/resources/audio_processing/output_data_float.pb.sha1 index e9c065b7e2..c1b6f1a984 100644 --- a/resources/audio_processing/output_data_float.pb.sha1 +++ b/resources/audio_processing/output_data_float.pb.sha1 @@ -1 +1 @@ -bc19d9e9fd9503cad02f3b0c21cbd63ed3c5f22c \ No newline at end of file +d67b879f3b4a31b3c4f3587bd4418be5f9df5105 \ No newline at end of file