ACM: Adding unittests for the remixing functionality

On top of adding unittests for the remixing, the CL moves the code tested to a separate file in order to allow it to be tested. Bug: webrtc:11007 Change-Id: I531736517bbcc715b3c1bf3a4256c42208c5b778 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/155740 Commit-Queue: Per Åhgren <peah@webrtc.org> Reviewed-by: Karl Wiberg <kwiberg@webrtc.org> Cr-Commit-Position: refs/heads/master@{#29839}
2025-05-13 05:40:42 +01:00 · 2019-11-19 21:00:59 +01:00 · 2019-11-19 21:00:59 +01:00 · 4dd56a3830
commit 4dd56a3830
parent 0e3198e434
6 changed files with 354 additions and 112 deletions
--- a/modules/audio_coding/BUILD.gn
+++ b/modules/audio_coding/BUILD.gn
@ -29,6 +29,8 @@ rtc_library("audio_coding") {
  sources = [
    "acm2/acm_receiver.cc",
    "acm2/acm_receiver.h",
+    "acm2/acm_remixing.cc",
+    "acm2/acm_remixing.h",
    "acm2/acm_resampler.cc",
    "acm2/acm_resampler.h",
    "acm2/audio_coding_module.cc",
@ -1972,6 +1974,7 @@ if (rtc_include_tests) {

    sources = [
      "acm2/acm_receiver_unittest.cc",
+      "acm2/acm_remixing_unittest.cc",
      "acm2/audio_coding_module_unittest.cc",
      "acm2/call_statistics_unittest.cc",
      "audio_network_adaptor/audio_network_adaptor_impl_unittest.cc",
--- a/modules/audio_coding/acm2/acm_remixing.cc
+++ b/modules/audio_coding/acm2/acm_remixing.cc
@ -0,0 +1,114 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_coding/acm2/acm_remixing.h"
+
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+void DownMixFrame(const AudioFrame& input, rtc::ArrayView<int16_t> output) {
+  RTC_DCHECK_EQ(input.num_channels_, 2);
+  RTC_DCHECK_EQ(output.size(), input.samples_per_channel_);
+
+  if (input.muted()) {
+    std::fill(output.begin(), output.begin() + input.samples_per_channel_, 0);
+  } else {
+    const int16_t* const input_data = input.data();
+    for (size_t n = 0; n < input.samples_per_channel_; ++n) {
+      output[n] = rtc::dchecked_cast<int16_t>(
+          (int32_t{input_data[2 * n]} + int32_t{input_data[2 * n + 1]}) >> 1);
+    }
+  }
+}
+
+void ReMixFrame(const AudioFrame& input,
+                size_t num_output_channels,
+                std::vector<int16_t>* output) {
+  const size_t output_size = num_output_channels * input.samples_per_channel_;
+  RTC_DCHECK(!(input.num_channels_ == 0 && num_output_channels > 0 &&
+               input.samples_per_channel_ > 0));
+
+  if (output->size() != output_size) {
+    output->resize(output_size);
+  }
+
+  // For muted frames, fill the frame with zeros.
+  if (input.muted()) {
+    std::fill(output->begin(), output->end(), 0);
+    return;
+  }
+
+  // Ensure that the special case of zero input channels is handled correctly
+  // (zero samples per channel is already handled correctly in the code below).
+  if (input.num_channels_ == 0) {
+    return;
+  }
+
+  const int16_t* const input_data = input.data();
+  size_t out_index = 0;
+
+  // When upmixing is needed and the input is mono copy the left channel
+  // into the left and right channels, and set any remaining channels to zero.
+  if (input.num_channels_ == 1 && input.num_channels_ < num_output_channels) {
+    for (size_t k = 0; k < input.samples_per_channel_; ++k) {
+      (*output)[out_index++] = input_data[k];
+      (*output)[out_index++] = input_data[k];
+      for (size_t j = 2; j < num_output_channels; ++j) {
+        (*output)[out_index++] = 0;
+      }
+      RTC_DCHECK_EQ(out_index, (k + 1) * num_output_channels);
+    }
+    RTC_DCHECK_EQ(out_index, input.samples_per_channel_ * num_output_channels);
+    return;
+  }
+
+  size_t in_index = 0;
+
+  // When upmixing is needed and the output is surround, copy the available
+  // channels directly, and set the remaining channels to zero.
+  if (input.num_channels_ < num_output_channels) {
+    for (size_t k = 0; k < input.samples_per_channel_; ++k) {
+      for (size_t j = 0; j < input.num_channels_; ++j) {
+        (*output)[out_index++] = input_data[in_index++];
+      }
+      for (size_t j = input.num_channels_; j < num_output_channels; ++j) {
+        (*output)[out_index++] = 0;
+      }
+      RTC_DCHECK_EQ(in_index, (k + 1) * input.num_channels_);
+      RTC_DCHECK_EQ(out_index, (k + 1) * num_output_channels);
+    }
+    RTC_DCHECK_EQ(in_index, input.samples_per_channel_ * input.num_channels_);
+    RTC_DCHECK_EQ(out_index, input.samples_per_channel_ * num_output_channels);
+
+    return;
+  }
+
+  // When downmixing is needed, and the input is stereo, average the channels.
+  if (input.num_channels_ == 2) {
+    for (size_t n = 0; n < input.samples_per_channel_; ++n) {
+      (*output)[n] = rtc::dchecked_cast<int16_t>(
+          (int32_t{input_data[2 * n]} + int32_t{input_data[2 * n + 1]}) >> 1);
+    }
+    return;
+  }
+
+  // When downmixing is needed, and the input is multichannel, drop the surplus
+  // channels.
+  const size_t num_channels_to_drop = input.num_channels_ - num_output_channels;
+  for (size_t k = 0; k < input.samples_per_channel_; ++k) {
+    for (size_t j = 0; j < num_output_channels; ++j) {
+      (*output)[out_index++] = input_data[in_index++];
+    }
+    in_index += num_channels_to_drop;
+  }
+}
+
+}  // namespace webrtc
--- a/modules/audio_coding/acm2/acm_remixing.h
+++ b/modules/audio_coding/acm2/acm_remixing.h
@ -0,0 +1,34 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_CODING_ACM2_ACM_REMIXING_H_
+#define MODULES_AUDIO_CODING_ACM2_ACM_REMIXING_H_
+
+#include <vector>
+
+#include "api/audio/audio_frame.h"
+
+namespace webrtc {
+
+// Stereo-to-mono downmixing. The length of the output must equal to the number
+// of samples per channel in the input.
+void DownMixFrame(const AudioFrame& input, rtc::ArrayView<int16_t> output);
+
+// Remixes the interleaved input frame to an interleaved output data vector. The
+// remixed data replaces the data in the output vector which is resized if
+// needed. The remixing supports any combination of input and output channels,
+// as well as any number of samples per channel.
+void ReMixFrame(const AudioFrame& input,
+                size_t num_output_channels,
+                std::vector<int16_t>* output);
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_CODING_ACM2_ACM_REMIXING_H_
--- a/modules/audio_coding/acm2/acm_remixing_unittest.cc
+++ b/modules/audio_coding/acm2/acm_remixing_unittest.cc
@ -0,0 +1,191 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_coding/acm2/acm_remixing.h"
+
+#include <vector>
+
+#include "api/audio/audio_frame.h"
+#include "system_wrappers/include/clock.h"
+#include "test/gmock.h"
+#include "test/gtest.h"
+#include "test/testsupport/file_utils.h"
+
+using ::testing::AllOf;
+using ::testing::Each;
+using ::testing::ElementsAreArray;
+using ::testing::SizeIs;
+
+namespace webrtc {
+
+TEST(AcmRemixing, DownMixFrame) {
+  std::vector<int16_t> out(480, 0);
+  AudioFrame in;
+  in.num_channels_ = 2;
+  in.samples_per_channel_ = 480;
+
+  int16_t* const in_data = in.mutable_data();
+  for (size_t k = 0; k < in.samples_per_channel_; ++k) {
+    in_data[2 * k] = 2;
+    in_data[2 * k + 1] = 0;
+  }
+
+  DownMixFrame(in, out);
+
+  EXPECT_THAT(out, AllOf(SizeIs(480), Each(1)));
+}
+
+TEST(AcmRemixing, DownMixMutedFrame) {
+  std::vector<int16_t> out(480, 0);
+  AudioFrame in;
+  in.num_channels_ = 2;
+  in.samples_per_channel_ = 480;
+
+  int16_t* const in_data = in.mutable_data();
+  for (size_t k = 0; k < in.samples_per_channel_; ++k) {
+    in_data[2 * k] = 2;
+    in_data[2 * k + 1] = 0;
+  }
+
+  in.Mute();
+
+  DownMixFrame(in, out);
+
+  EXPECT_THAT(out, AllOf(SizeIs(480), Each(0)));
+}
+
+TEST(AcmRemixing, RemixMutedStereoFrameTo6Channels) {
+  std::vector<int16_t> out(480, 0);
+  AudioFrame in;
+  in.num_channels_ = 2;
+  in.samples_per_channel_ = 480;
+
+  int16_t* const in_data = in.mutable_data();
+  for (size_t k = 0; k < in.samples_per_channel_; ++k) {
+    in_data[2 * k] = 1;
+    in_data[2 * k + 1] = 2;
+  }
+  in.Mute();
+
+  ReMixFrame(in, 6, &out);
+  EXPECT_EQ(6 * 480u, out.size());
+
+  EXPECT_THAT(out, AllOf(SizeIs(in.samples_per_channel_ * 6), Each(0)));
+}
+
+TEST(AcmRemixing, RemixStereoFrameTo6Channels) {
+  std::vector<int16_t> out(480, 0);
+  AudioFrame in;
+  in.num_channels_ = 2;
+  in.samples_per_channel_ = 480;
+
+  int16_t* const in_data = in.mutable_data();
+  for (size_t k = 0; k < in.samples_per_channel_; ++k) {
+    in_data[2 * k] = 1;
+    in_data[2 * k + 1] = 2;
+  }
+
+  ReMixFrame(in, 6, &out);
+  EXPECT_EQ(6 * 480u, out.size());
+
+  std::vector<int16_t> expected_output(in.samples_per_channel_ * 6);
+  for (size_t k = 0; k < in.samples_per_channel_; ++k) {
+    expected_output[6 * k] = 1;
+    expected_output[6 * k + 1] = 2;
+  }
+
+  EXPECT_THAT(out, ElementsAreArray(expected_output));
+}
+
+TEST(AcmRemixing, RemixMonoFrameTo6Channels) {
+  std::vector<int16_t> out(480, 0);
+  AudioFrame in;
+  in.num_channels_ = 1;
+  in.samples_per_channel_ = 480;
+
+  int16_t* const in_data = in.mutable_data();
+  for (size_t k = 0; k < in.samples_per_channel_; ++k) {
+    in_data[k] = 1;
+  }
+
+  ReMixFrame(in, 6, &out);
+  EXPECT_EQ(6 * 480u, out.size());
+
+  std::vector<int16_t> expected_output(in.samples_per_channel_ * 6, 0);
+  for (size_t k = 0; k < in.samples_per_channel_; ++k) {
+    expected_output[6 * k] = 1;
+    expected_output[6 * k + 1] = 1;
+  }
+
+  EXPECT_THAT(out, ElementsAreArray(expected_output));
+}
+
+TEST(AcmRemixing, RemixStereoFrameToMono) {
+  std::vector<int16_t> out(480, 0);
+  AudioFrame in;
+  in.num_channels_ = 2;
+  in.samples_per_channel_ = 480;
+
+  int16_t* const in_data = in.mutable_data();
+  for (size_t k = 0; k < in.samples_per_channel_; ++k) {
+    in_data[2 * k] = 2;
+    in_data[2 * k + 1] = 0;
+  }
+
+  ReMixFrame(in, 1, &out);
+  EXPECT_EQ(480u, out.size());
+
+  EXPECT_THAT(out, AllOf(SizeIs(in.samples_per_channel_), Each(1)));
+}
+
+TEST(AcmRemixing, RemixMonoFrameToStereo) {
+  std::vector<int16_t> out(480, 0);
+  AudioFrame in;
+  in.num_channels_ = 1;
+  in.samples_per_channel_ = 480;
+
+  int16_t* const in_data = in.mutable_data();
+  for (size_t k = 0; k < in.samples_per_channel_; ++k) {
+    in_data[k] = 1;
+  }
+
+  ReMixFrame(in, 2, &out);
+  EXPECT_EQ(960u, out.size());
+
+  EXPECT_THAT(out, AllOf(SizeIs(2 * in.samples_per_channel_), Each(1)));
+}
+
+TEST(AcmRemixing, Remix3ChannelFrameToStereo) {
+  std::vector<int16_t> out(480, 0);
+  AudioFrame in;
+  in.num_channels_ = 3;
+  in.samples_per_channel_ = 480;
+
+  int16_t* const in_data = in.mutable_data();
+  for (size_t k = 0; k < in.samples_per_channel_; ++k) {
+    for (size_t j = 0; j < 3; ++j) {
+      in_data[3 * k + j] = j;
+    }
+  }
+
+  ReMixFrame(in, 2, &out);
+  EXPECT_EQ(2 * 480u, out.size());
+
+  std::vector<int16_t> expected_output(in.samples_per_channel_ * 2);
+  for (size_t k = 0; k < in.samples_per_channel_; ++k) {
+    for (size_t j = 0; j < 2; ++j) {
+      expected_output[2 * k + j] = static_cast<int>(j);
+    }
+  }
+
+  EXPECT_THAT(out, ElementsAreArray(expected_output));
+}
+
+}  // namespace webrtc
--- a/modules/audio_coding/acm2/audio_coding_module.cc
+++ b/modules/audio_coding/acm2/audio_coding_module.cc
@ -18,6 +18,7 @@
 #include "absl/strings/match.h"
 #include "api/array_view.h"
 #include "modules/audio_coding/acm2/acm_receiver.h"
+#include "modules/audio_coding/acm2/acm_remixing.h"
 #include "modules/audio_coding/acm2/acm_resampler.h"
 #include "modules/include/module_common_types.h"
 #include "modules/include/module_common_types_public.h"
@ -199,110 +200,6 @@ void UpdateCodecTypeHistogram(size_t codec_type) {
          webrtc::AudioEncoder::CodecType::kMaxLoggedAudioCodecTypes));
 }

-// Stereo-to-mono can be used as in-place.
-void DownMix(const AudioFrame& frame,
-             size_t length_out_buff,
-             int16_t* out_buff) {
-  RTC_DCHECK_EQ(frame.num_channels_, 2);
-  RTC_DCHECK_GE(length_out_buff, frame.samples_per_channel_);
-
-  if (!frame.muted()) {
-    const int16_t* frame_data = frame.data();
-    for (size_t n = 0; n < frame.samples_per_channel_; ++n) {
-      out_buff[n] =
-          static_cast<int16_t>((static_cast<int32_t>(frame_data[2 * n]) +
-                                static_cast<int32_t>(frame_data[2 * n + 1])) >>
-                               1);
-    }
-  } else {
-    std::fill(out_buff, out_buff + frame.samples_per_channel_, 0);
-  }
-}
-
-// Remixes the input frame to an output data vector. The output vector is
-// resized if needed.
-void ReMix(const AudioFrame& input,
-           size_t num_output_channels,
-           std::vector<int16_t>* output) {
-  const size_t output_size = num_output_channels * input.samples_per_channel_;
-
-  if (output->size() != output_size) {
-    output->resize(output_size);
-  }
-
-  // For muted frames, fill the frame with zeros.
-  if (input.muted()) {
-    std::fill(output->begin(), output->end(), 0);
-    return;
-  }
-
-  // Ensure that the special case of zero input channels is handled correctly
-  // (zero samples per channel is already handled correctly in the code below).
-  if (input.num_channels_ == 0) {
-    return;
-  }
-
-  const int16_t* input_data = input.data();
-  size_t out_index = 0;
-
-  // When upmixing is needed and the input is mono copy the left channel
-  // into the left and right channels, and set any remaining channels to zero.
-  if (input.num_channels_ == 1 && input.num_channels_ < num_output_channels) {
-    for (size_t k = 0; k < input.samples_per_channel_; ++k) {
-      (*output)[out_index++] = input_data[k];
-      (*output)[out_index++] = input_data[k];
-      for (size_t j = 2; j < num_output_channels; ++j) {
-        (*output)[out_index++] = 0;
-      }
-      RTC_DCHECK_EQ(out_index, (k + 1) * num_output_channels);
-    }
-    RTC_DCHECK_EQ(out_index, input.samples_per_channel_ * num_output_channels);
-    return;
-  }
-
-  size_t in_index = 0;
-
-  // When upmixing is needed and the output is surround, copy the available
-  // channels directly, and set the remaining channels to zero.
-  if (input.num_channels_ < num_output_channels) {
-    for (size_t k = 0; k < input.samples_per_channel_; ++k) {
-      for (size_t j = 0; j < input.num_channels_; ++j) {
-        (*output)[out_index++] = input_data[in_index++];
-      }
-      for (size_t j = input.num_channels_; j < num_output_channels; ++j) {
-        (*output)[out_index++] = 0;
-      }
-      RTC_DCHECK_EQ(in_index, (k + 1) * input.num_channels_);
-      RTC_DCHECK_EQ(out_index, (k + 1) * num_output_channels);
-    }
-    RTC_DCHECK_EQ(in_index, input.samples_per_channel_ * input.num_channels_);
-    RTC_DCHECK_EQ(out_index, input.samples_per_channel_ * num_output_channels);
-
-    return;
-  }
-
-  // When downmixing is needed, and the input is stereo, average the channels.
-  if (input.num_channels_ == 2) {
-    for (size_t n = 0; n < input.samples_per_channel_; ++n) {
-      (*output)[n] =
-          static_cast<int16_t>((static_cast<int32_t>(input_data[2 * n]) +
-                                static_cast<int32_t>(input_data[2 * n + 1])) >>
-                               1);
-    }
-    return;
-  }
-
-  // When downmixing is needed, and the input is multichannel, drop the surplus
-  // channels.
-  const size_t num_channels_to_drop = input.num_channels_ - num_output_channels;
-  for (size_t k = 0; k < input.samples_per_channel_; ++k) {
-    for (size_t j = 0; j < num_output_channels; ++j) {
-      (*output)[out_index++] = input_data[in_index++];
-    }
-    in_index += num_channels_to_drop;
-  }
-}
-
 void AudioCodingModuleImpl::ChangeLogger::MaybeLog(int value) {
  if (value != last_value_ || first_time_) {
    first_time_ = false;
@ -499,7 +396,7 @@ int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame,
  if (!same_num_channels) {
    // Remixes the input frame to the output data and in the process resize the
    // output data if needed.
-    ReMix(*ptr_frame, current_num_channels, &input_data->buffer);
+    ReMixFrame(*ptr_frame, current_num_channels, &input_data->buffer);

    // For pushing data to primary, point the |ptr_audio| to correct buffer.
    input_data->audio = input_data->buffer.data();
@ -567,21 +464,24 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,

  *ptr_out = &preprocess_frame_;
  preprocess_frame_.num_channels_ = in_frame.num_channels_;
-  int16_t audio[WEBRTC_10MS_PCM_AUDIO];
+  preprocess_frame_.samples_per_channel_ = in_frame.samples_per_channel_;
+  std::array<int16_t, WEBRTC_10MS_PCM_AUDIO> audio;
  const int16_t* src_ptr_audio = in_frame.data();
  if (down_mix) {
    // If a resampling is required the output of a down-mix is written into a
    // local buffer, otherwise, it will be written to the output frame.
    int16_t* dest_ptr_audio =
-        resample ? audio : preprocess_frame_.mutable_data();
-    DownMix(in_frame, WEBRTC_10MS_PCM_AUDIO, dest_ptr_audio);
+        resample ? audio.data() : preprocess_frame_.mutable_data();
+    RTC_DCHECK_GE(audio.size(), in_frame.samples_per_channel_);
+    DownMixFrame(in_frame,
+                 rtc::ArrayView<int16_t>(
+                     dest_ptr_audio, preprocess_frame_.samples_per_channel_));
    preprocess_frame_.num_channels_ = 1;
    // Set the input of the resampler is the down-mixed signal.
-    src_ptr_audio = audio;
+    src_ptr_audio = audio.data();
  }

  preprocess_frame_.timestamp_ = expected_codec_ts_;
-  preprocess_frame_.samples_per_channel_ = in_frame.samples_per_channel_;
  preprocess_frame_.sample_rate_hz_ = in_frame.sample_rate_hz_;
  // If it is required, we have to do a resampling.
  if (resample) {
--- a/modules/audio_coding/acm2/audio_coding_module_unittest.cc
+++ b/modules/audio_coding/acm2/audio_coding_module_unittest.cc
@ -1638,7 +1638,7 @@ TEST_F(AcmSetBitRateNewApi, OpusFromFormat_48khz_20ms_50kbps) {
 // send surround audio.
 TEST_F(AudioCodingModuleTestOldApi, SendingMultiChannelForMonoInput) {
  constexpr int kSampleRateHz = 48000;
-  constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000;
+  constexpr int kSamplesPerChannel = kSampleRateHz * 10 / 1000;

  audio_format_ = SdpAudioFormat({"multiopus",
                                  kSampleRateHz,
@ -1692,7 +1692,7 @@ TEST_F(AudioCodingModuleTestOldApi, SendingStereoForMonoInput) {
  constexpr int kSampleRateHz = 48000;
  constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000;

-  audio_format_ = SdpAudioFormat("opus", kSampleRateHz, 2);
+  audio_format_ = SdpAudioFormat("L16", kSampleRateHz, 2);

  RegisterCodec();