Removing all external access to the integer sample data in AudioBuffer

This CL removes all external access to the integer sample data in the
AudioBuffer class. It also removes the API in AudioBuffer that provides this.

The purpose of this is to pave the way for removing the sample
duplicating and implicit conversions between integer and floating point
sample formats which is done inside the AudioBuffer.

Bug: webrtc:10882
Change-Id: I1438b691bcef98278aef8e3c63624c367c2d12e9
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/149162
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org>
Commit-Queue: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#28912}
This commit is contained in:
Per Åhgren 2019-08-20 09:19:21 +02:00 committed by Commit Bot
parent 93d4c10ffc
commit 928146f546
20 changed files with 298 additions and 124 deletions

View file

@ -12,11 +12,21 @@
namespace webrtc {
void FloatToS16(const float* src, size_t size, int16_t* dest) {
for (size_t i = 0; i < size; ++i)
dest[i] = FloatToS16(src[i]);
}
void S16ToFloat(const int16_t* src, size_t size, float* dest) {
for (size_t i = 0; i < size; ++i)
dest[i] = S16ToFloat(src[i]);
}
void S16ToFloatS16(const int16_t* src, size_t size, float* dest) {
for (size_t i = 0; i < size; ++i)
dest[i] = src[i];
}
void FloatS16ToS16(const float* src, size_t size, int16_t* dest) {
for (size_t i = 0; i < size; ++i)
dest[i] = FloatS16ToS16(src[i]);

View file

@ -43,6 +43,13 @@ static inline int16_t FloatS16ToS16(float v) {
return static_cast<int16_t>(v + std::copysign(0.5f, v));
}
static inline int16_t FloatToS16(float v) {
v *= 32768.f;
v = std::min(v, 32767.f);
v = std::max(v, -32768.f);
return static_cast<int16_t>(v + std::copysign(0.5f, v));
}
static inline float FloatToFloatS16(float v) {
v = std::min(v, 1.f);
v = std::max(v, -1.f);
@ -56,7 +63,9 @@ static inline float FloatS16ToFloat(float v) {
return v * kScaling;
}
void FloatToS16(const float* src, size_t size, int16_t* dest);
void S16ToFloat(const int16_t* src, size_t size, float* dest);
void S16ToFloatS16(const int16_t* src, size_t size, float* dest);
void FloatS16ToS16(const float* src, size_t size, int16_t* dest);
void FloatToFloatS16(const float* src, size_t size, float* dest);
void FloatS16ToFloat(const float* src, size_t size, float* dest);

View file

@ -33,18 +33,6 @@ T DBFS(T x) {
return 20 * std::log10(x);
}
void FloatToS16(const float* src, size_t size, int16_t* dest) {
for (size_t i = 0; i < size; ++i) {
RTC_DCHECK_GE(32767.f, src[i]);
RTC_DCHECK_LE(-32768.f, src[i]);
if (src[i] >= 1.f)
dest[i] = 32767;
if (src[i] <= -1.f)
dest[i] = -32768;
dest[i] = static_cast<int16_t>(src[i] * 32767.5f);
}
}
} // namespace
class PushSincResamplerTest : public ::testing::TestWithParam<

View file

@ -19,6 +19,8 @@ rtc_source_set("agc") {
":level_estimation",
"..:apm_logging",
"..:gain_control_interface",
"../../../common_audio",
"../../../common_audio:common_audio_c",
"../../../rtc_base:checks",
"../../../rtc_base:gtest_prod",
"../../../rtc_base:logging",

View file

@ -17,6 +17,7 @@
#include <cstdio>
#endif
#include "common_audio/include/audio_util.h"
#include "modules/audio_processing/agc/gain_map_internal.h"
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.h"
#include "modules/audio_processing/include/gain_control.h"
@ -59,6 +60,10 @@ const int kMaxResidualGainChange = 15;
// restrictions from clipping events.
const int kSurplusCompressionGain = 6;
// Maximum number of channels and number of samples per channel supported.
constexpr size_t kMaxNumSamplesPerChannel = 1920;
constexpr size_t kMaxNumChannels = 4;
int ClampLevel(int mic_level) {
return rtc::SafeClamp(mic_level, kMinMicLevel, kMaxMicLevel);
}
@ -220,7 +225,7 @@ int AgcManagerDirect::Initialize() {
return InitializeGainControl(gctrl_, disable_digital_adaptive_);
}
void AgcManagerDirect::AnalyzePreProcess(int16_t* audio,
void AgcManagerDirect::AnalyzePreProcess(float* audio,
int num_channels,
size_t samples_per_channel) {
size_t length = num_channels * samples_per_channel;
@ -228,7 +233,19 @@ void AgcManagerDirect::AnalyzePreProcess(int16_t* audio,
return;
}
file_preproc_->Write(audio, length);
std::array<int16_t, kMaxNumSamplesPerChannel * kMaxNumChannels> audio_data;
int16_t* audio_fix;
size_t safe_length;
if (audio) {
audio_fix = audio_data.data();
safe_length = std::min(audio_data.size(), length);
FloatS16ToS16(audio, length, audio_fix);
} else {
audio_fix = nullptr;
safe_length = length;
}
file_preproc_->Write(audio_fix, safe_length);
if (frames_since_clipped_ < kClippedWaitFrames) {
++frames_since_clipped_;
@ -244,7 +261,7 @@ void AgcManagerDirect::AnalyzePreProcess(int16_t* audio,
// maximum. This harsh treatment is an effort to avoid repeated clipped echo
// events. As compensation for this restriction, the maximum compression
// gain is increased, through SetMaxLevel().
float clipped_ratio = agc_->AnalyzePreproc(audio, length);
float clipped_ratio = agc_->AnalyzePreproc(audio_fix, safe_length);
if (clipped_ratio > kClippedRatioThreshold) {
RTC_DLOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio="
<< clipped_ratio;
@ -263,15 +280,31 @@ void AgcManagerDirect::AnalyzePreProcess(int16_t* audio,
}
frames_since_clipped_ = 0;
}
if (audio) {
S16ToFloatS16(audio_fix, safe_length, audio);
}
}
void AgcManagerDirect::Process(const int16_t* audio,
void AgcManagerDirect::Process(const float* audio,
size_t length,
int sample_rate_hz) {
if (capture_muted_) {
return;
}
std::array<int16_t, kMaxNumSamplesPerChannel * kMaxNumChannels> audio_data;
const int16_t* audio_fix;
size_t safe_length;
if (audio) {
audio_fix = audio_data.data();
safe_length = std::min(audio_data.size(), length);
FloatS16ToS16(audio, length, audio_data.data());
} else {
audio_fix = nullptr;
safe_length = length;
}
if (check_volume_on_next_process_) {
check_volume_on_next_process_ = false;
// We have to wait until the first process call to check the volume,
@ -279,14 +312,14 @@ void AgcManagerDirect::Process(const int16_t* audio,
CheckVolumeAndReset();
}
agc_->Process(audio, length, sample_rate_hz);
agc_->Process(audio_fix, safe_length, sample_rate_hz);
UpdateGain();
if (!disable_digital_adaptive_) {
UpdateCompressor();
}
file_postproc_->Write(audio, length);
file_postproc_->Write(audio_fix, safe_length);
data_dumper_->DumpRaw("experimental_gain_control_compression_gain_db", 1,
&compression_);

View file

@ -56,10 +56,10 @@ class AgcManagerDirect final {
~AgcManagerDirect();
int Initialize();
void AnalyzePreProcess(int16_t* audio,
void AnalyzePreProcess(float* audio,
int num_channels,
size_t samples_per_channel);
void Process(const int16_t* audio, size_t length, int sample_rate_hz);
void Process(const float* audio, size_t length, int sample_rate_hz);
// Call when the capture stream has been muted/unmuted. This causes the
// manager to disregard all incoming audio; chances are good it's background

View file

@ -169,29 +169,11 @@ void AudioBuffer::InitForNewData() {
}
}
const int16_t* const* AudioBuffer::channels_const() const {
return data_->ibuf_const()->channels();
}
int16_t* const* AudioBuffer::channels() {
return data_->ibuf()->channels();
}
const int16_t* const* AudioBuffer::split_bands_const(size_t channel) const {
return split_data_.get() ? split_data_->ibuf_const()->bands(channel)
: data_->ibuf_const()->bands(channel);
}
int16_t* const* AudioBuffer::split_bands(size_t channel) {
return split_data_.get() ? split_data_->ibuf()->bands(channel)
: data_->ibuf()->bands(channel);
}
const int16_t* const* AudioBuffer::split_channels_const(Band band) const {
const float* const* AudioBuffer::split_channels_const_f(Band band) const {
if (split_data_.get()) {
return split_data_->ibuf_const()->channels(band);
return split_data_->fbuf_const()->channels(band);
} else {
return band == kBand0To8kHz ? data_->ibuf_const()->channels() : nullptr;
return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr;
}
}
@ -308,4 +290,29 @@ void AudioBuffer::MergeFrequencyBands() {
splitting_filter_->Synthesis(split_data_.get(), data_.get());
}
void AudioBuffer::CopySplitChannelDataTo(size_t channel,
int16_t* const* split_band_data) {
for (size_t k = 0; k < num_bands(); ++k) {
const float* band_data = split_bands_f(channel)[k];
RTC_DCHECK(split_band_data[k]);
RTC_DCHECK(band_data);
for (size_t i = 0; i < num_frames_per_band(); ++i) {
split_band_data[k][i] = FloatS16ToS16(band_data[i]);
}
}
}
void AudioBuffer::CopySplitChannelDataFrom(
size_t channel,
const int16_t* const* split_band_data) {
for (size_t k = 0; k < num_bands(); ++k) {
float* band_data = split_bands_f(channel)[k];
RTC_DCHECK(split_band_data[k]);
RTC_DCHECK(band_data);
for (size_t i = 0; i < num_frames_per_band(); ++i) {
band_data[i] = split_band_data[k][i];
}
}
}
} // namespace webrtc

View file

@ -52,8 +52,6 @@ class AudioBuffer {
// Where:
// 0 <= channel < |num_proc_channels_|
// 0 <= sample < |proc_num_frames_|
int16_t* const* channels();
const int16_t* const* channels_const() const;
float* const* channels_f();
const float* const* channels_const_f() const;
@ -64,8 +62,6 @@ class AudioBuffer {
// 0 <= channel < |num_proc_channels_|
// 0 <= band < |num_bands_|
// 0 <= sample < |num_split_frames_|
int16_t* const* split_bands(size_t channel);
const int16_t* const* split_bands_const(size_t channel) const;
float* const* split_bands_f(size_t channel);
const float* const* split_bands_const_f(size_t channel) const;
@ -76,7 +72,7 @@ class AudioBuffer {
// 0 <= band < |num_bands_|
// 0 <= channel < |num_proc_channels_|
// 0 <= sample < |num_split_frames_|
const int16_t* const* split_channels_const(Band band) const;
const float* const* split_channels_const_f(Band band) const;
// Use for int16 interleaved data.
void DeinterleaveFrom(const AudioFrame* audioFrame);
@ -93,6 +89,17 @@ class AudioBuffer {
// Recombine the different bands into one signal.
void MergeFrequencyBands();
// Copies the split bands data into the integer two-dimensional array.
void CopySplitChannelDataTo(size_t channel, int16_t* const* split_band_data);
// Copies the data in the integer two-dimensional array into the split_bands
// data.
void CopySplitChannelDataFrom(size_t channel,
const int16_t* const* split_band_data);
static const size_t kMaxSplitFrameLength = 160;
static const size_t kMaxNumBands = 3;
private:
FRIEND_TEST_ALL_PREFIXES(AudioBufferTest,
SetNumChannelsSetsChannelBuffersNumChannels);

View file

@ -43,9 +43,9 @@ TEST(AudioFrameTest, ConstructFromAudioBuffer) {
// But not the other way. The following will fail:
// non_const_view = other_const_view;
AudioFrameView<int16_t> non_const_int16_view(
buffer.channels(), buffer.num_channels(), buffer.num_frames());
non_const_int16_view.channel(0)[0] = kIntConstant;
EXPECT_EQ(buffer.channels()[0][0], kIntConstant);
AudioFrameView<float> non_const_float_view(
buffer.channels_f(), buffer.num_channels(), buffer.num_frames());
non_const_float_view.channel(0)[0] = kIntConstant;
EXPECT_EQ(buffer.channels_f()[0][0], kIntConstant);
}
} // namespace webrtc

View file

@ -1279,8 +1279,8 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
capture_buffer->num_frames()));
}
capture_input_rms_.Analyze(rtc::ArrayView<const int16_t>(
capture_buffer->channels_const()[0],
capture_input_rms_.Analyze(rtc::ArrayView<const float>(
capture_buffer->channels_const_f()[0],
capture_nonlocked_.capture_processing_format.num_frames()));
const bool log_rms = ++capture_rms_interval_counter_ >= 1000;
if (log_rms) {
@ -1323,12 +1323,12 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
if (constants_.use_experimental_agc &&
public_submodules_->gain_control->is_enabled()) {
private_submodules_->agc_manager->AnalyzePreProcess(
capture_buffer->channels()[0], capture_buffer->num_channels(),
capture_buffer->channels_f()[0], capture_buffer->num_channels(),
capture_nonlocked_.capture_processing_format.num_frames());
if (constants_.use_experimental_agc_process_before_aec) {
private_submodules_->agc_manager->Process(
capture_buffer->channels()[0],
capture_buffer->channels_const_f()[0],
capture_nonlocked_.capture_processing_format.num_frames(),
capture_nonlocked_.capture_processing_format.sample_rate_hz());
}
@ -1419,7 +1419,7 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
public_submodules_->gain_control->is_enabled() &&
!constants_.use_experimental_agc_process_before_aec) {
private_submodules_->agc_manager->Process(
capture_buffer->split_bands_const(0)[kBand0To8kHz],
capture_buffer->split_bands_const_f(0)[kBand0To8kHz],
capture_buffer->num_frames_per_band(), capture_nonlocked_.split_rate);
}
// TODO(peah): Add reporting from AEC3 whether there is echo.
@ -1484,8 +1484,8 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
capture_.stats.output_rms_dbfs = absl::nullopt;
}
capture_output_rms_.Analyze(rtc::ArrayView<const int16_t>(
capture_buffer->channels_const()[0],
capture_output_rms_.Analyze(rtc::ArrayView<const float>(
capture_buffer->channels_const_f()[0],
capture_nonlocked_.capture_processing_format.num_frames()));
if (log_rms) {
RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak();

View file

@ -56,6 +56,7 @@ AudioProcessing::Error MapError(int err) {
return AudioProcessing::kUnspecifiedError;
}
}
} // namespace
struct EchoControlMobileImpl::StreamProperties {
@ -131,7 +132,8 @@ void EchoControlMobileImpl::PackRenderAudioBuffer(
size_t num_output_channels,
size_t num_channels,
std::vector<int16_t>* packed_buffer) {
RTC_DCHECK_GE(160, audio->num_frames_per_band());
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
audio->num_frames_per_band());
RTC_DCHECK_EQ(num_channels, audio->num_channels());
// The ordering convention must be followed to pass to the correct AECM.
@ -139,12 +141,14 @@ void EchoControlMobileImpl::PackRenderAudioBuffer(
int render_channel = 0;
for (size_t i = 0; i < num_output_channels; i++) {
for (size_t j = 0; j < audio->num_channels(); j++) {
std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> data_to_buffer;
FloatS16ToS16(audio->split_bands_const_f(render_channel)[kBand0To8kHz],
audio->num_frames_per_band(), data_to_buffer.data());
// Buffer the samples in the render queue.
packed_buffer->insert(
packed_buffer->end(),
audio->split_bands_const(render_channel)[kBand0To8kHz],
(audio->split_bands_const(render_channel)[kBand0To8kHz] +
audio->num_frames_per_band()));
packed_buffer->end(), data_to_buffer.data(),
data_to_buffer.data() + audio->num_frames_per_band());
render_channel = (render_channel + 1) % audio->num_channels();
}
}
@ -174,7 +178,21 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio,
RTC_DCHECK_LT(capture, low_pass_reference_.size());
const int16_t* noisy =
reference_copied_ ? low_pass_reference_[capture].data() : nullptr;
const int16_t* clean = audio->split_bands_const(capture)[kBand0To8kHz];
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
audio->num_frames_per_band());
std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> split_bands_data;
int16_t* split_bands = split_bands_data.data();
const int16_t* clean = split_bands_data.data();
if (audio->split_bands_f(capture)[kBand0To8kHz]) {
FloatS16ToS16(audio->split_bands_f(capture)[kBand0To8kHz],
audio->num_frames_per_band(), split_bands_data.data());
} else {
clean = nullptr;
split_bands = nullptr;
}
if (noisy == NULL) {
noisy = clean;
clean = NULL;
@ -182,8 +200,13 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio,
for (size_t render = 0; render < stream_properties_->num_reverse_channels;
++render) {
err = WebRtcAecm_Process(cancellers_[handle_index]->state(), noisy, clean,
audio->split_bands(capture)[kBand0To8kHz],
audio->num_frames_per_band(), stream_delay_ms);
split_bands, audio->num_frames_per_band(),
stream_delay_ms);
if (split_bands) {
S16ToFloatS16(split_bands, audio->num_frames_per_band(),
audio->split_bands_f(capture)[kBand0To8kHz]);
}
if (err != AudioProcessing::kNoError) {
return MapError(err);
@ -192,9 +215,9 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio,
++handle_index;
}
for (size_t band = 1u; band < audio->num_bands(); ++band) {
memset(audio->split_bands(capture)[band], 0,
memset(audio->split_bands_f(capture)[band], 0,
audio->num_frames_per_band() *
sizeof(audio->split_bands(capture)[band][0]));
sizeof(audio->split_bands_f(capture)[band][0]));
}
}
return AudioProcessing::kNoError;
@ -204,9 +227,9 @@ void EchoControlMobileImpl::CopyLowPassReference(AudioBuffer* audio) {
RTC_DCHECK_LE(audio->num_channels(), low_pass_reference_.size());
reference_copied_ = true;
for (size_t capture = 0; capture < audio->num_channels(); ++capture) {
memcpy(low_pass_reference_[capture].data(),
audio->split_bands_const(capture)[kBand0To8kHz],
audio->num_frames_per_band() * sizeof(int16_t));
FloatS16ToS16(audio->split_bands_const_f(capture)[kBand0To8kHz],
audio->num_frames_per_band(),
low_pass_reference_[capture].data());
}
}

View file

@ -118,25 +118,25 @@ void GainControlImpl::ProcessRenderAudio(
void GainControlImpl::PackRenderAudioBuffer(
AudioBuffer* audio,
std::vector<int16_t>* packed_buffer) {
RTC_DCHECK_GE(160, audio->num_frames_per_band());
std::array<int16_t, 160> mixed_low_pass_data;
rtc::ArrayView<const int16_t> mixed_low_pass;
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
audio->num_frames_per_band());
std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> mixed_low_pass_data;
rtc::ArrayView<const int16_t> mixed_low_pass(mixed_low_pass_data.data(),
audio->num_frames_per_band());
if (audio->num_proc_channels() == 1) {
mixed_low_pass =
rtc::ArrayView<const int16_t>(audio->split_bands_const(0)[kBand0To8kHz],
audio->num_frames_per_band());
FloatS16ToS16(audio->split_bands_const_f(0)[kBand0To8kHz],
audio->num_frames_per_band(), mixed_low_pass_data.data());
} else {
const int num_channels = static_cast<int>(audio->num_channels());
for (size_t i = 0; i < audio->num_frames_per_band(); ++i) {
int32_t value = audio->split_channels_const(kBand0To8kHz)[0][i];
int32_t value =
FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[0][i]);
for (int j = 1; j < num_channels; ++j) {
value += audio->split_channels_const(kBand0To8kHz)[j][i];
value +=
FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[j][i]);
}
mixed_low_pass_data[i] = value / num_channels;
}
mixed_low_pass = rtc::ArrayView<const int16_t>(
mixed_low_pass_data.data(), audio->num_frames_per_band());
}
packed_buffer->clear();
@ -150,17 +150,28 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {
}
RTC_DCHECK(num_proc_channels_);
RTC_DCHECK_GE(160, audio->num_frames_per_band());
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
audio->num_frames_per_band());
RTC_DCHECK_EQ(audio->num_channels(), *num_proc_channels_);
RTC_DCHECK_LE(*num_proc_channels_, gain_controllers_.size());
int16_t split_band_data[AudioBuffer::kMaxNumBands]
[AudioBuffer::kMaxSplitFrameLength];
int16_t* split_bands[AudioBuffer::kMaxNumBands] = {
split_band_data[0], split_band_data[1], split_band_data[2]};
if (mode_ == kAdaptiveAnalog) {
int capture_channel = 0;
for (auto& gain_controller : gain_controllers_) {
gain_controller->set_capture_level(analog_capture_level_);
int err = WebRtcAgc_AddMic(
gain_controller->state(), audio->split_bands(capture_channel),
audio->num_bands(), audio->num_frames_per_band());
audio->CopySplitChannelDataTo(capture_channel, split_bands);
int err =
WebRtcAgc_AddMic(gain_controller->state(), split_bands,
audio->num_bands(), audio->num_frames_per_band());
audio->CopySplitChannelDataFrom(capture_channel, split_bands);
if (err != AudioProcessing::kNoError) {
return AudioProcessing::kUnspecifiedError;
@ -171,10 +182,15 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {
int capture_channel = 0;
for (auto& gain_controller : gain_controllers_) {
int32_t capture_level_out = 0;
int err = WebRtcAgc_VirtualMic(
gain_controller->state(), audio->split_bands(capture_channel),
audio->num_bands(), audio->num_frames_per_band(),
analog_capture_level_, &capture_level_out);
audio->CopySplitChannelDataTo(capture_channel, split_bands);
int err =
WebRtcAgc_VirtualMic(gain_controller->state(), split_bands,
audio->num_bands(), audio->num_frames_per_band(),
analog_capture_level_, &capture_level_out);
audio->CopySplitChannelDataFrom(capture_channel, split_bands);
gain_controller->set_capture_level(capture_level_out);
@ -199,7 +215,8 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio,
}
RTC_DCHECK(num_proc_channels_);
RTC_DCHECK_GE(160, audio->num_frames_per_band());
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
audio->num_frames_per_band());
RTC_DCHECK_EQ(audio->num_channels(), *num_proc_channels_);
stream_is_saturated_ = false;
@ -208,15 +225,22 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio,
int32_t capture_level_out = 0;
uint8_t saturation_warning = 0;
int16_t split_band_data[AudioBuffer::kMaxNumBands]
[AudioBuffer::kMaxSplitFrameLength];
int16_t* split_bands[AudioBuffer::kMaxNumBands] = {
split_band_data[0], split_band_data[1], split_band_data[2]};
audio->CopySplitChannelDataTo(capture_channel, split_bands);
// The call to stream_has_echo() is ok from a deadlock perspective
// as the capture lock is allready held.
int err = WebRtcAgc_Process(
gain_controller->state(), audio->split_bands_const(capture_channel),
audio->num_bands(), audio->num_frames_per_band(),
audio->split_bands(capture_channel),
gain_controller->state(), split_bands, audio->num_bands(),
audio->num_frames_per_band(), split_bands,
gain_controller->get_capture_level(), &capture_level_out,
stream_has_echo, &saturation_warning);
audio->CopySplitChannelDataFrom(capture_channel, split_bands);
if (err != AudioProcessing::kNoError) {
return AudioProcessing::kUnspecifiedError;
}

View file

@ -40,8 +40,8 @@ void LevelEstimatorImpl::ProcessStream(AudioBuffer* audio) {
}
for (size_t i = 0; i < audio->num_channels(); i++) {
rms_->Analyze(rtc::ArrayView<const int16_t>(audio->channels_const()[i],
audio->num_frames()));
rms_->Analyze(rtc::ArrayView<const float>(audio->channels_const_f()[i],
audio->num_frames()));
}
}

View file

@ -96,11 +96,18 @@ LowCutFilter::~LowCutFilter() {}
void LowCutFilter::Process(AudioBuffer* audio) {
RTC_DCHECK(audio);
RTC_DCHECK_GE(160, audio->num_frames_per_band());
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
audio->num_frames_per_band());
RTC_DCHECK_EQ(filters_.size(), audio->num_channels());
for (size_t i = 0; i < filters_.size(); i++) {
filters_[i]->Process(audio->split_bands(i)[kBand0To8kHz],
audio->num_frames_per_band());
std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> samples_fixed;
FloatS16ToS16(audio->split_bands_f(i)[kBand0To8kHz],
audio->num_frames_per_band(), samples_fixed.data());
filters_[i]->Process(samples_fixed.data(), audio->num_frames_per_band());
S16ToFloatS16(samples_fixed.data(), audio->num_frames_per_band(),
audio->split_bands_f(i)[kBand0To8kHz]);
}
}

View file

@ -101,8 +101,16 @@ void NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
WebRtcNs_Process(suppressors_[i]->state(), audio->split_bands_const_f(i),
audio->num_bands(), audio->split_bands_f(i));
#elif defined(WEBRTC_NS_FIXED)
WebRtcNsx_Process(suppressors_[i]->state(), audio->split_bands_const(i),
audio->num_bands(), audio->split_bands(i));
int16_t split_band_data[AudioBuffer::kMaxNumBands]
[AudioBuffer::kMaxSplitFrameLength];
int16_t* split_bands[AudioBuffer::kMaxNumBands] = {
split_band_data[0], split_band_data[1], split_band_data[2]};
audio->CopySplitChannelDataTo(i, split_bands);
WebRtcNsx_Process(suppressors_[i]->state(), split_bands, audio->num_bands(),
split_bands);
audio->CopySplitChannelDataFrom(i, split_bands);
#endif
}
}

View file

@ -74,6 +74,27 @@ void RmsLevel::Analyze(rtc::ArrayView<const int16_t> data) {
max_sum_square_ = std::max(max_sum_square_, sum_square);
}
void RmsLevel::Analyze(rtc::ArrayView<const float> data) {
if (data.empty()) {
return;
}
CheckBlockSize(data.size());
float sum_square = 0.f;
for (float data_k : data) {
int16_t tmp =
static_cast<int16_t>(std::min(std::max(data_k, -32768.f), 32767.f));
sum_square += tmp * tmp;
}
RTC_DCHECK_GE(sum_square, 0.f);
sum_square_ += sum_square;
sample_count_ += data.size();
max_sum_square_ = std::max(max_sum_square_, sum_square);
}
void RmsLevel::AnalyzeMuted(size_t length) {
CheckBlockSize(length);
sample_count_ += length;

View file

@ -45,6 +45,7 @@ class RmsLevel {
// Pass each chunk of audio to Analyze() to accumulate the level.
void Analyze(rtc::ArrayView<const int16_t> data);
void Analyze(rtc::ArrayView<const float> data);
// If all samples with the given |length| have a magnitude of zero, this is
// a shortcut to avoid some computation.

View file

@ -34,9 +34,18 @@ std::unique_ptr<RmsLevel> RunTest(rtc::ArrayView<const int16_t> input) {
return level;
}
std::vector<int16_t> CreateSinusoid(int frequency_hz,
int amplitude,
size_t num_samples) {
std::unique_ptr<RmsLevel> RunTest(rtc::ArrayView<const float> input) {
std::unique_ptr<RmsLevel> level(new RmsLevel);
for (size_t n = 0; n + kBlockSizeSamples <= input.size();
n += kBlockSizeSamples) {
level->Analyze(input.subview(n, kBlockSizeSamples));
}
return level;
}
std::vector<int16_t> CreateInt16Sinusoid(int frequency_hz,
int amplitude,
size_t num_samples) {
std::vector<int16_t> x(num_samples);
for (size_t n = 0; n < num_samples; ++n) {
x[n] = rtc::saturated_cast<int16_t>(
@ -44,16 +53,40 @@ std::vector<int16_t> CreateSinusoid(int frequency_hz,
}
return x;
}
std::vector<float> CreateFloatSinusoid(int frequency_hz,
int amplitude,
size_t num_samples) {
std::vector<int16_t> x16 =
CreateInt16Sinusoid(frequency_hz, amplitude, num_samples);
std::vector<float> x(x16.size());
for (size_t n = 0; n < x.size(); ++n) {
x[n] = x16[n];
}
return x;
}
} // namespace
TEST(RmsLevelTest, VerifyIndentityBetweenFloatAndFix) {
auto x_f = CreateFloatSinusoid(1000, INT16_MAX, kSampleRateHz);
auto x_i = CreateFloatSinusoid(1000, INT16_MAX, kSampleRateHz);
auto level_f = RunTest(x_f);
auto level_i = RunTest(x_i);
int avg_i = level_i->Average();
int avg_f = level_f->Average();
EXPECT_EQ(3, avg_i); // -3 dBFS
EXPECT_EQ(avg_f, avg_i);
}
TEST(RmsLevelTest, Run1000HzFullScale) {
auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz);
auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz);
auto level = RunTest(x);
EXPECT_EQ(3, level->Average()); // -3 dBFS
}
TEST(RmsLevelTest, Run1000HzFullScaleAverageAndPeak) {
auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz);
auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz);
auto level = RunTest(x);
auto stats = level->AverageAndPeak();
EXPECT_EQ(3, stats.average); // -3 dBFS
@ -61,7 +94,7 @@ TEST(RmsLevelTest, Run1000HzFullScaleAverageAndPeak) {
}
TEST(RmsLevelTest, Run1000HzHalfScale) {
auto x = CreateSinusoid(1000, INT16_MAX / 2, kSampleRateHz);
auto x = CreateInt16Sinusoid(1000, INT16_MAX / 2, kSampleRateHz);
auto level = RunTest(x);
EXPECT_EQ(9, level->Average()); // -9 dBFS
}
@ -93,14 +126,14 @@ TEST(RmsLevelTest, NoSamplesAverageAndPeak) {
}
TEST(RmsLevelTest, PollTwice) {
auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz);
auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz);
auto level = RunTest(x);
level->Average();
EXPECT_EQ(127, level->Average()); // Stats should be reset at this point.
}
TEST(RmsLevelTest, Reset) {
auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz);
auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz);
auto level = RunTest(x);
level->Reset();
EXPECT_EQ(127, level->Average()); // Stats should be reset at this point.
@ -108,7 +141,7 @@ TEST(RmsLevelTest, Reset) {
// Inserts 1 second of full-scale sinusoid, followed by 1 second of muted.
TEST(RmsLevelTest, ProcessMuted) {
auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz);
auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz);
auto level = RunTest(x);
const size_t kBlocksPerSecond = rtc::CheckedDivExact(
static_cast<size_t>(kSampleRateHz), kBlockSizeSamples);
@ -123,8 +156,8 @@ TEST(RmsLevelTest, ProcessMuted) {
// to the vast majority of the signal being half-scale, and the peak to be
// -3 dBFS.
TEST(RmsLevelTest, RunHalfScaleAndInsertFullScale) {
auto half_scale = CreateSinusoid(1000, INT16_MAX / 2, kSampleRateHz);
auto full_scale = CreateSinusoid(1000, INT16_MAX, kSampleRateHz / 100);
auto half_scale = CreateInt16Sinusoid(1000, INT16_MAX / 2, kSampleRateHz);
auto full_scale = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz / 100);
auto x = half_scale;
x.insert(x.end(), full_scale.begin(), full_scale.end());
x.insert(x.end(), half_scale.begin(), half_scale.end());
@ -137,10 +170,10 @@ TEST(RmsLevelTest, RunHalfScaleAndInsertFullScale) {
}
TEST(RmsLevelTest, ResetOnBlockSizeChange) {
auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz);
auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz);
auto level = RunTest(x);
// Create a new signal with half amplitude, but double block length.
auto y = CreateSinusoid(1000, INT16_MAX / 2, kBlockSizeSamples * 2);
auto y = CreateInt16Sinusoid(1000, INT16_MAX / 2, kBlockSizeSamples * 2);
level->Analyze(y);
auto stats = level->AverageAndPeak();
// Expect all stats to only be influenced by the last signal (y), since the

View file

@ -58,24 +58,25 @@ bool VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
rtc::CritScope cs(crit_);
RTC_DCHECK(enabled_);
RTC_DCHECK_GE(160, audio->num_frames_per_band());
std::array<int16_t, 160> mixed_low_pass_data;
rtc::ArrayView<const int16_t> mixed_low_pass;
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
audio->num_frames_per_band());
std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> mixed_low_pass_data;
rtc::ArrayView<const int16_t> mixed_low_pass(mixed_low_pass_data.data(),
audio->num_frames_per_band());
if (audio->num_proc_channels() == 1) {
mixed_low_pass =
rtc::ArrayView<const int16_t>(audio->split_bands_const(0)[kBand0To8kHz],
audio->num_frames_per_band());
FloatS16ToS16(audio->split_bands_const_f(0)[kBand0To8kHz],
audio->num_frames_per_band(), mixed_low_pass_data.data());
} else {
const int num_channels = static_cast<int>(audio->num_channels());
for (size_t i = 0; i < audio->num_frames_per_band(); ++i) {
int32_t value = audio->split_channels_const(kBand0To8kHz)[0][i];
int32_t value =
FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[0][i]);
for (int j = 1; j < num_channels; ++j) {
value += audio->split_channels_const(kBand0To8kHz)[j][i];
value +=
FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[j][i]);
}
mixed_low_pass_data[i] = value / num_channels;
}
mixed_low_pass = rtc::ArrayView<const int16_t>(
mixed_low_pass_data.data(), audio->num_frames_per_band());
}
int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,

View file

@ -1 +1 @@
bc19d9e9fd9503cad02f3b0c21cbd63ed3c5f22c
d67b879f3b4a31b3c4f3587bd4418be5f9df5105