From 8328e7c44d59bb9fcbc7f8a033beb3d073929518 Mon Sep 17 00:00:00 2001 From: "andrew@webrtc.org" Date: Fri, 31 Oct 2014 04:58:14 +0000 Subject: [PATCH] Revert "Revert part of r7561, "Refactor audio conversion functions."" This restores the conversion changes to AudioProcessing originally added in r7561, with minor alterations to ensure it passes all tests. TBR=kwiberg Review URL: https://webrtc-codereview.appspot.com/28899004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@7574 4adac7df-926f-26a2-2b94-8c16560cd09d --- data/audio_processing/output_data_fixed.pb | Bin 188 -> 188 bytes data/audio_processing/output_data_float.pb | Bin 1404 -> 1404 bytes webrtc/common_audio/include/audio_util.h | 2 +- .../modules/audio_processing/audio_buffer.cc | 41 ++++---------- .../test/audio_processing_unittest.cc | 50 +++++++++++------- .../audio_processing/test/test_utils.h | 23 ++++++++ 6 files changed, 67 insertions(+), 49 deletions(-) diff --git a/data/audio_processing/output_data_fixed.pb b/data/audio_processing/output_data_fixed.pb index eb525c32a615eba13566863e13c6f72a3ca5f1d9..6e36d5594ab36bdfa11952265f54d682afa4b494 100644 GIT binary patch delta 90 zcmdnPxQB6qtHc4800u5m4n_ec2_}VxT7!Cvd6Etc9v>&hYM9>>1+tz)6fl7m9Iyi_ UST6w--i#~^);AZV?+Z{E0QH0wmH+?% delta 81 zcmdnPxQB6qtHc(T00u5m4n_ec2_}VxT7!Cvd6Etc9k(2{X0RSrw6I%cP diff --git a/data/audio_processing/output_data_float.pb b/data/audio_processing/output_data_float.pb index 79619e7fda15cd321254c39c1007bd1dbd295ca7..0fd2fe8cd845015c59aa78b735bc2f0004b06a75 100644 GIT binary patch delta 295 zcmeyv^@nSM2>WLqhmTAi4U_Fy%qH_P+cSQhI8lP};KWsdjB6+Uk(_*yIfyZAat33+ z6swH1Jy!w;qX3fxlR`tSK^vpRCqajoOdfkD3o>~~Z+IwY&y@(1{>5m}&SV0a}p>RRXl4lhI<;WC6x-(U#5f z_FP#o383LX&37iBVGbAN*8|FcjRiRiXlw_g#kR>cjOB6+8PcWqq`_`xLUQw0u$%XR V-8>KCW~em8&ASn9X3+(@831E(Q>_31 delta 304 zcmeyv^@nSM2*)QLhmTAi|CuJ+vzSiiWwvMhK5?Q12crO!1d~ETtw9^3#V0|BmrNc9CJQooNUwh=XU~-glm5kM(9USFT?iz< zd9nekhfMNkpnNYQ2a^D!1f#+MJA*Euel*Laer*BTkqK1;w4;;JVmTkk43L9FH*S`< z=gNY~0<8cVabvOoW4LsX9#95sILL88!#fx)b_jrc);!scrBup60qk}rB)5MAyL}(n S?GGVthe~6(on`JPIeP#OU{~4z diff --git a/webrtc/common_audio/include/audio_util.h b/webrtc/common_audio/include/audio_util.h index 5a4e815131..767b21c5aa 100644 --- a/webrtc/common_audio/include/audio_util.h +++ b/webrtc/common_audio/include/audio_util.h @@ -49,7 +49,7 @@ static inline int16_t FloatS16ToS16(float v) { } static inline float FloatToFloatS16(float v) { - return v > 0 ? v * limits_int16::max() : -v * limits_int16::min(); + return v * (v > 0 ? limits_int16::max() : -limits_int16::min()); } static inline float FloatS16ToFloat(float v) { diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc index cd8d93aa70..63d69cfb30 100644 --- a/webrtc/modules/audio_processing/audio_buffer.cc +++ b/webrtc/modules/audio_processing/audio_buffer.cc @@ -51,18 +51,11 @@ int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) { return -1; } -void StereoToMono(const float* left, const float* right, float* out, +template +void StereoToMono(const T* left, const T* right, T* out, int samples_per_channel) { - for (int i = 0; i < samples_per_channel; ++i) { + for (int i = 0; i < samples_per_channel; ++i) out[i] = (left[i] + right[i]) / 2; - } -} - -void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out, - int samples_per_channel) { - for (int i = 0; i < samples_per_channel; ++i) { - out[i] = (left[i] + right[i]) >> 1; - } } } // namespace @@ -114,13 +107,7 @@ class IFChannelBuffer { void RefreshI() { if (!ivalid_) { assert(fvalid_); - const float* const float_data = fbuf_.data(); - int16_t* const int_data = ibuf_.data(); - const int length = ibuf_.length(); - for (int i = 0; i < length; ++i) - int_data[i] = WEBRTC_SPL_SAT(std::numeric_limits::max(), - float_data[i], - std::numeric_limits::min()); + FloatS16ToS16(fbuf_.data(), ibuf_.length(), ibuf_.data()); ivalid_ = true; } } @@ -228,10 +215,10 @@ void AudioBuffer::CopyFrom(const float* const* data, data_ptr = process_buffer_->channels(); } - // Convert to int16. + // Convert to the S16 range. for (int i = 0; i < num_proc_channels_; ++i) { - FloatToS16(data_ptr[i], proc_samples_per_channel_, - channels_->ibuf()->channel(i)); + FloatToFloatS16(data_ptr[i], proc_samples_per_channel_, + channels_->fbuf()->channel(i)); } } @@ -241,16 +228,15 @@ void AudioBuffer::CopyTo(int samples_per_channel, assert(samples_per_channel == output_samples_per_channel_); assert(ChannelsFromLayout(layout) == num_proc_channels_); - // Convert to float. + // Convert to the float range. float* const* data_ptr = data; if (output_samples_per_channel_ != proc_samples_per_channel_) { // Convert to an intermediate buffer for subsequent resampling. data_ptr = process_buffer_->channels(); } for (int i = 0; i < num_proc_channels_; ++i) { - S16ToFloat(channels_->ibuf()->channel(i), - proc_samples_per_channel_, - data_ptr[i]); + FloatS16ToFloat(channels_->fbuf()->channel(i), proc_samples_per_channel_, + data_ptr[i]); } // Resample. @@ -449,12 +435,7 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) { // Downmix directly; no explicit deinterleaving needed. int16_t* downmixed = channels_->ibuf()->channel(0); for (int i = 0; i < input_samples_per_channel_; ++i) { - // HACK(ajm): The downmixing in the int16_t path is in practice never - // called from production code. We do this weird scaling to and from float - // to satisfy tests checking for bit-exactness with the float path. - float downmix_float = (S16ToFloat(frame->data_[i * 2]) + - S16ToFloat(frame->data_[i * 2 + 1])) / 2; - downmixed[i] = FloatToS16(downmix_float); + downmixed[i] = (frame->data_[i * 2] + frame->data_[i * 2 + 1]) / 2; } } else { assert(num_proc_channels_ == num_input_channels_); diff --git a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc index 282da94782..401391aa2b 100644 --- a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc +++ b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc @@ -96,14 +96,13 @@ int TruncateToMultipleOf10(int value) { void MixStereoToMono(const float* stereo, float* mono, int samples_per_channel) { - for (int i = 0; i < samples_per_channel; ++i) { + for (int i = 0; i < samples_per_channel; ++i) mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) / 2; - } } void MixStereoToMono(const int16_t* stereo, int16_t* mono, int samples_per_channel) { - for (int i = 0; i < samples_per_channel; i++) + for (int i = 0; i < samples_per_channel; ++i) mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) >> 1; } @@ -1650,7 +1649,7 @@ TEST_F(ApmTest, DebugDumpFromFileHandle) { #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP } -TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) { +TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) { audioproc::OutputData ref_data; OpenFileAndReadMessage(ref_filename_, &ref_data); @@ -1679,7 +1678,8 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) { Init(fapm.get()); ChannelBuffer output_cb(samples_per_channel, num_input_channels); - scoped_ptr output_int16(new int16_t[output_length]); + ChannelBuffer output_int16(samples_per_channel, + num_input_channels); int analog_level = 127; while (ReadFrame(far_file_, revframe_, revfloat_cb_.get()) && @@ -1701,7 +1701,9 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) { EXPECT_NOERR(fapm->gain_control()->set_stream_analog_level(analog_level)); EXPECT_NOERR(apm_->ProcessStream(frame_)); - // TODO(ajm): Update to support different output rates. + Deinterleave(frame_->data_, samples_per_channel, num_output_channels, + output_int16.channels()); + EXPECT_NOERR(fapm->ProcessStream( float_cb_->channels(), samples_per_channel, @@ -1711,24 +1713,34 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) { LayoutFromChannels(num_output_channels), float_cb_->channels())); - // Convert to interleaved int16. FloatToS16(float_cb_->data(), output_length, output_cb.data()); - Interleave(output_cb.channels(), - samples_per_channel, - num_output_channels, - output_int16.get()); - // Verify float and int16 paths produce identical output. - EXPECT_EQ(0, memcmp(frame_->data_, output_int16.get(), output_length)); + for (int j = 0; j < num_output_channels; ++j) { + float variance = 0; + float snr = ComputeSNR(output_int16.channel(j), output_cb.channel(j), + samples_per_channel, &variance); + #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) + // There are a few chunks in the fixed-point profile that give low SNR. + // Listening confirmed the difference is acceptable. + const float kVarianceThreshold = 150; + const float kSNRThreshold = 10; + #else + const float kVarianceThreshold = 20; + const float kSNRThreshold = 20; + #endif + // Skip frames with low energy. + if (sqrt(variance) > kVarianceThreshold) { + EXPECT_LT(kSNRThreshold, snr); + } + } analog_level = fapm->gain_control()->stream_analog_level(); EXPECT_EQ(apm_->gain_control()->stream_analog_level(), fapm->gain_control()->stream_analog_level()); EXPECT_EQ(apm_->echo_cancellation()->stream_has_echo(), fapm->echo_cancellation()->stream_has_echo()); - EXPECT_EQ(apm_->voice_detection()->stream_has_voice(), - fapm->voice_detection()->stream_has_voice()); - EXPECT_EQ(apm_->noise_suppression()->speech_probability(), - fapm->noise_suppression()->speech_probability()); + EXPECT_NEAR(apm_->noise_suppression()->speech_probability(), + fapm->noise_suppression()->speech_probability(), + 0.0005); // Reset in case of downmixing. frame_->num_channels_ = test->num_input_channels(); @@ -2109,7 +2121,9 @@ class AudioProcessingTest int num_output_channels, int num_reverse_channels, std::string output_file_prefix) { - scoped_ptr ap(AudioProcessing::Create()); + Config config; + config.Set(new ExperimentalAgc(false)); + scoped_ptr ap(AudioProcessing::Create(config)); EnableAllAPComponents(ap.get()); ap->Initialize(input_rate, output_rate, diff --git a/webrtc/modules/audio_processing/test/test_utils.h b/webrtc/modules/audio_processing/test/test_utils.h index 61edd8f35b..a99f3427de 100644 --- a/webrtc/modules/audio_processing/test/test_utils.h +++ b/webrtc/modules/audio_processing/test/test_utils.h @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include #include #include "webrtc/audio_processing/debug.pb.h" @@ -153,4 +154,26 @@ static inline bool ReadMessageFromFile(FILE* file, return msg->ParseFromArray(bytes.get(), size); } +template +float ComputeSNR(const T* ref, const T* test, int length, float* variance) { + float mse = 0; + float mean = 0; + *variance = 0; + for (int i = 0; i < length; ++i) { + T error = ref[i] - test[i]; + mse += error * error; + *variance += ref[i] * ref[i]; + mean += ref[i]; + } + mse /= length; + *variance /= length; + mean /= length; + *variance -= mean * mean; + + float snr = 100; // We assign 100 dB to the zero-error case. + if (mse > 0) + snr = 10 * log10(*variance / mse); + return snr; +} + } // namespace webrtc