diff --git a/data/audio_processing/output_data_fixed.pb b/data/audio_processing/output_data_fixed.pb index eb525c32a6..6e36d5594a 100644 Binary files a/data/audio_processing/output_data_fixed.pb and b/data/audio_processing/output_data_fixed.pb differ diff --git a/data/audio_processing/output_data_float.pb b/data/audio_processing/output_data_float.pb index 79619e7fda..0fd2fe8cd8 100644 Binary files a/data/audio_processing/output_data_float.pb and b/data/audio_processing/output_data_float.pb differ diff --git a/webrtc/common_audio/include/audio_util.h b/webrtc/common_audio/include/audio_util.h index 5a4e815131..767b21c5aa 100644 --- a/webrtc/common_audio/include/audio_util.h +++ b/webrtc/common_audio/include/audio_util.h @@ -49,7 +49,7 @@ static inline int16_t FloatS16ToS16(float v) { } static inline float FloatToFloatS16(float v) { - return v > 0 ? v * limits_int16::max() : -v * limits_int16::min(); + return v * (v > 0 ? limits_int16::max() : -limits_int16::min()); } static inline float FloatS16ToFloat(float v) { diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc index cd8d93aa70..63d69cfb30 100644 --- a/webrtc/modules/audio_processing/audio_buffer.cc +++ b/webrtc/modules/audio_processing/audio_buffer.cc @@ -51,18 +51,11 @@ int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) { return -1; } -void StereoToMono(const float* left, const float* right, float* out, +template +void StereoToMono(const T* left, const T* right, T* out, int samples_per_channel) { - for (int i = 0; i < samples_per_channel; ++i) { + for (int i = 0; i < samples_per_channel; ++i) out[i] = (left[i] + right[i]) / 2; - } -} - -void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out, - int samples_per_channel) { - for (int i = 0; i < samples_per_channel; ++i) { - out[i] = (left[i] + right[i]) >> 1; - } } } // namespace @@ -114,13 +107,7 @@ class IFChannelBuffer { void RefreshI() { if (!ivalid_) { assert(fvalid_); - const float* const float_data = fbuf_.data(); - int16_t* const int_data = ibuf_.data(); - const int length = ibuf_.length(); - for (int i = 0; i < length; ++i) - int_data[i] = WEBRTC_SPL_SAT(std::numeric_limits::max(), - float_data[i], - std::numeric_limits::min()); + FloatS16ToS16(fbuf_.data(), ibuf_.length(), ibuf_.data()); ivalid_ = true; } } @@ -228,10 +215,10 @@ void AudioBuffer::CopyFrom(const float* const* data, data_ptr = process_buffer_->channels(); } - // Convert to int16. + // Convert to the S16 range. for (int i = 0; i < num_proc_channels_; ++i) { - FloatToS16(data_ptr[i], proc_samples_per_channel_, - channels_->ibuf()->channel(i)); + FloatToFloatS16(data_ptr[i], proc_samples_per_channel_, + channels_->fbuf()->channel(i)); } } @@ -241,16 +228,15 @@ void AudioBuffer::CopyTo(int samples_per_channel, assert(samples_per_channel == output_samples_per_channel_); assert(ChannelsFromLayout(layout) == num_proc_channels_); - // Convert to float. + // Convert to the float range. float* const* data_ptr = data; if (output_samples_per_channel_ != proc_samples_per_channel_) { // Convert to an intermediate buffer for subsequent resampling. data_ptr = process_buffer_->channels(); } for (int i = 0; i < num_proc_channels_; ++i) { - S16ToFloat(channels_->ibuf()->channel(i), - proc_samples_per_channel_, - data_ptr[i]); + FloatS16ToFloat(channels_->fbuf()->channel(i), proc_samples_per_channel_, + data_ptr[i]); } // Resample. @@ -449,12 +435,7 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) { // Downmix directly; no explicit deinterleaving needed. int16_t* downmixed = channels_->ibuf()->channel(0); for (int i = 0; i < input_samples_per_channel_; ++i) { - // HACK(ajm): The downmixing in the int16_t path is in practice never - // called from production code. We do this weird scaling to and from float - // to satisfy tests checking for bit-exactness with the float path. - float downmix_float = (S16ToFloat(frame->data_[i * 2]) + - S16ToFloat(frame->data_[i * 2 + 1])) / 2; - downmixed[i] = FloatToS16(downmix_float); + downmixed[i] = (frame->data_[i * 2] + frame->data_[i * 2 + 1]) / 2; } } else { assert(num_proc_channels_ == num_input_channels_); diff --git a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc index 282da94782..401391aa2b 100644 --- a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc +++ b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc @@ -96,14 +96,13 @@ int TruncateToMultipleOf10(int value) { void MixStereoToMono(const float* stereo, float* mono, int samples_per_channel) { - for (int i = 0; i < samples_per_channel; ++i) { + for (int i = 0; i < samples_per_channel; ++i) mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) / 2; - } } void MixStereoToMono(const int16_t* stereo, int16_t* mono, int samples_per_channel) { - for (int i = 0; i < samples_per_channel; i++) + for (int i = 0; i < samples_per_channel; ++i) mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) >> 1; } @@ -1650,7 +1649,7 @@ TEST_F(ApmTest, DebugDumpFromFileHandle) { #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP } -TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) { +TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) { audioproc::OutputData ref_data; OpenFileAndReadMessage(ref_filename_, &ref_data); @@ -1679,7 +1678,8 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) { Init(fapm.get()); ChannelBuffer output_cb(samples_per_channel, num_input_channels); - scoped_ptr output_int16(new int16_t[output_length]); + ChannelBuffer output_int16(samples_per_channel, + num_input_channels); int analog_level = 127; while (ReadFrame(far_file_, revframe_, revfloat_cb_.get()) && @@ -1701,7 +1701,9 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) { EXPECT_NOERR(fapm->gain_control()->set_stream_analog_level(analog_level)); EXPECT_NOERR(apm_->ProcessStream(frame_)); - // TODO(ajm): Update to support different output rates. + Deinterleave(frame_->data_, samples_per_channel, num_output_channels, + output_int16.channels()); + EXPECT_NOERR(fapm->ProcessStream( float_cb_->channels(), samples_per_channel, @@ -1711,24 +1713,34 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) { LayoutFromChannels(num_output_channels), float_cb_->channels())); - // Convert to interleaved int16. FloatToS16(float_cb_->data(), output_length, output_cb.data()); - Interleave(output_cb.channels(), - samples_per_channel, - num_output_channels, - output_int16.get()); - // Verify float and int16 paths produce identical output. - EXPECT_EQ(0, memcmp(frame_->data_, output_int16.get(), output_length)); + for (int j = 0; j < num_output_channels; ++j) { + float variance = 0; + float snr = ComputeSNR(output_int16.channel(j), output_cb.channel(j), + samples_per_channel, &variance); + #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) + // There are a few chunks in the fixed-point profile that give low SNR. + // Listening confirmed the difference is acceptable. + const float kVarianceThreshold = 150; + const float kSNRThreshold = 10; + #else + const float kVarianceThreshold = 20; + const float kSNRThreshold = 20; + #endif + // Skip frames with low energy. + if (sqrt(variance) > kVarianceThreshold) { + EXPECT_LT(kSNRThreshold, snr); + } + } analog_level = fapm->gain_control()->stream_analog_level(); EXPECT_EQ(apm_->gain_control()->stream_analog_level(), fapm->gain_control()->stream_analog_level()); EXPECT_EQ(apm_->echo_cancellation()->stream_has_echo(), fapm->echo_cancellation()->stream_has_echo()); - EXPECT_EQ(apm_->voice_detection()->stream_has_voice(), - fapm->voice_detection()->stream_has_voice()); - EXPECT_EQ(apm_->noise_suppression()->speech_probability(), - fapm->noise_suppression()->speech_probability()); + EXPECT_NEAR(apm_->noise_suppression()->speech_probability(), + fapm->noise_suppression()->speech_probability(), + 0.0005); // Reset in case of downmixing. frame_->num_channels_ = test->num_input_channels(); @@ -2109,7 +2121,9 @@ class AudioProcessingTest int num_output_channels, int num_reverse_channels, std::string output_file_prefix) { - scoped_ptr ap(AudioProcessing::Create()); + Config config; + config.Set(new ExperimentalAgc(false)); + scoped_ptr ap(AudioProcessing::Create(config)); EnableAllAPComponents(ap.get()); ap->Initialize(input_rate, output_rate, diff --git a/webrtc/modules/audio_processing/test/test_utils.h b/webrtc/modules/audio_processing/test/test_utils.h index 61edd8f35b..a99f3427de 100644 --- a/webrtc/modules/audio_processing/test/test_utils.h +++ b/webrtc/modules/audio_processing/test/test_utils.h @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include #include #include "webrtc/audio_processing/debug.pb.h" @@ -153,4 +154,26 @@ static inline bool ReadMessageFromFile(FILE* file, return msg->ParseFromArray(bytes.get(), size); } +template +float ComputeSNR(const T* ref, const T* test, int length, float* variance) { + float mse = 0; + float mean = 0; + *variance = 0; + for (int i = 0; i < length; ++i) { + T error = ref[i] - test[i]; + mse += error * error; + *variance += ref[i] * ref[i]; + mean += ref[i]; + } + mse /= length; + *variance /= length; + mean /= length; + *variance -= mean * mean; + + float snr = 100; // We assign 100 dB to the zero-error case. + if (mse > 0) + snr = 10 * log10(*variance / mse); + return snr; +} + } // namespace webrtc