webrtc/modules/audio_mixer/frame_combiner_unittest.cc
Doudou Kisabaka fe6595f006 Include all RTP packet infos from the mix list when updating the audio frame for mixing.
Users of the mixer can use this information to determine which sources were included in the frame.

Bug: webrtc:12745
Change-Id: I11a8e3b1f4e8f95eb870336cad8dd082330bdf02
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/217768
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Reviewed-by: Minyue Li <minyue@webrtc.org>
Reviewed-by: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Chen Xing <chxg@google.com>
Commit-Queue: Doudou Kisabaka <doudouk@google.com>
Cr-Commit-Position: refs/heads/master@{#34035}
2021-05-18 11:05:37 +00:00

341 lines
13 KiB
C++

/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_mixer/frame_combiner.h"
#include <cstdint>
#include <initializer_list>
#include <numeric>
#include <string>
#include <type_traits>
#include <vector>
#include "absl/types/optional.h"
#include "api/array_view.h"
#include "api/rtp_packet_info.h"
#include "api/rtp_packet_infos.h"
#include "audio/utility/audio_frame_operations.h"
#include "modules/audio_mixer/gain_change_calculator.h"
#include "modules/audio_mixer/sine_wave_generator.h"
#include "rtc_base/checks.h"
#include "rtc_base/strings/string_builder.h"
#include "test/gmock.h"
#include "test/gtest.h"
namespace webrtc {
namespace {
using ::testing::ElementsAreArray;
using ::testing::IsEmpty;
using ::testing::UnorderedElementsAreArray;
using LimiterType = FrameCombiner::LimiterType;
struct FrameCombinerConfig {
bool use_limiter;
int sample_rate_hz;
int number_of_channels;
float wave_frequency;
};
std::string ProduceDebugText(int sample_rate_hz,
int number_of_channels,
int number_of_sources) {
rtc::StringBuilder ss;
ss << "Sample rate: " << sample_rate_hz << " ,";
ss << "number of channels: " << number_of_channels << " ,";
ss << "number of sources: " << number_of_sources;
return ss.Release();
}
std::string ProduceDebugText(const FrameCombinerConfig& config) {
rtc::StringBuilder ss;
ss << "Sample rate: " << config.sample_rate_hz << " ,";
ss << "number of channels: " << config.number_of_channels << " ,";
ss << "limiter active: " << (config.use_limiter ? "on" : "off") << " ,";
ss << "wave frequency: " << config.wave_frequency << " ,";
return ss.Release();
}
AudioFrame frame1;
AudioFrame frame2;
void SetUpFrames(int sample_rate_hz, int number_of_channels) {
RtpPacketInfo packet_info1(
/*ssrc=*/1001, /*csrcs=*/{}, /*rtp_timestamp=*/1000,
/*audio_level=*/absl::nullopt, /*absolute_capture_time=*/absl::nullopt,
/*receive_time_ms=*/1);
RtpPacketInfo packet_info2(
/*ssrc=*/4004, /*csrcs=*/{}, /*rtp_timestamp=*/1234,
/*audio_level=*/absl::nullopt, /*absolute_capture_time=*/absl::nullopt,
/*receive_time_ms=*/2);
RtpPacketInfo packet_info3(
/*ssrc=*/7007, /*csrcs=*/{}, /*rtp_timestamp=*/1333,
/*audio_level=*/absl::nullopt, /*absolute_capture_time=*/absl::nullopt,
/*receive_time_ms=*/2);
frame1.packet_infos_ = RtpPacketInfos({packet_info1});
frame2.packet_infos_ = RtpPacketInfos({packet_info2, packet_info3});
for (auto* frame : {&frame1, &frame2}) {
frame->UpdateFrame(0, nullptr, rtc::CheckedDivExact(sample_rate_hz, 100),
sample_rate_hz, AudioFrame::kNormalSpeech,
AudioFrame::kVadActive, number_of_channels);
}
}
} // namespace
// The limiter requires sample rate divisible by 2000.
TEST(FrameCombiner, BasicApiCallsLimiter) {
FrameCombiner combiner(true);
for (const int rate : {8000, 18000, 34000, 48000}) {
for (const int number_of_channels : {1, 2, 4, 8}) {
const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
SetUpFrames(rate, number_of_channels);
for (const int number_of_frames : {0, 1, 2}) {
SCOPED_TRACE(
ProduceDebugText(rate, number_of_channels, number_of_frames));
const std::vector<AudioFrame*> frames_to_combine(
all_frames.begin(), all_frames.begin() + number_of_frames);
AudioFrame audio_frame_for_mixing;
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing);
}
}
}
}
// The RtpPacketInfos field of the mixed packet should contain the union of the
// RtpPacketInfos from the frames that were actually mixed.
TEST(FrameCombiner, ContainsAllRtpPacketInfos) {
static constexpr int kSampleRateHz = 48000;
static constexpr int kNumChannels = 1;
FrameCombiner combiner(true);
const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
SetUpFrames(kSampleRateHz, kNumChannels);
for (const int number_of_frames : {0, 1, 2}) {
SCOPED_TRACE(
ProduceDebugText(kSampleRateHz, kNumChannels, number_of_frames));
const std::vector<AudioFrame*> frames_to_combine(
all_frames.begin(), all_frames.begin() + number_of_frames);
std::vector<RtpPacketInfo> packet_infos;
for (const auto& frame : frames_to_combine) {
packet_infos.insert(packet_infos.end(), frame->packet_infos_.begin(),
frame->packet_infos_.end());
}
AudioFrame audio_frame_for_mixing;
combiner.Combine(frames_to_combine, kNumChannels, kSampleRateHz,
frames_to_combine.size(), &audio_frame_for_mixing);
EXPECT_THAT(audio_frame_for_mixing.packet_infos_,
UnorderedElementsAreArray(packet_infos));
}
}
// There are DCHECKs in place to check for invalid parameters.
TEST(FrameCombinerDeathTest, DebugBuildCrashesWithManyChannels) {
FrameCombiner combiner(true);
for (const int rate : {8000, 18000, 34000, 48000}) {
for (const int number_of_channels : {10, 20, 21}) {
if (static_cast<size_t>(rate / 100 * number_of_channels) >
AudioFrame::kMaxDataSizeSamples) {
continue;
}
const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
SetUpFrames(rate, number_of_channels);
const int number_of_frames = 2;
SCOPED_TRACE(
ProduceDebugText(rate, number_of_channels, number_of_frames));
const std::vector<AudioFrame*> frames_to_combine(
all_frames.begin(), all_frames.begin() + number_of_frames);
AudioFrame audio_frame_for_mixing;
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
EXPECT_DEATH(
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing),
"");
#elif !RTC_DCHECK_IS_ON
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing);
#endif
}
}
}
TEST(FrameCombinerDeathTest, DebugBuildCrashesWithHighRate) {
FrameCombiner combiner(true);
for (const int rate : {50000, 96000, 128000, 196000}) {
for (const int number_of_channels : {1, 2, 3}) {
if (static_cast<size_t>(rate / 100 * number_of_channels) >
AudioFrame::kMaxDataSizeSamples) {
continue;
}
const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
SetUpFrames(rate, number_of_channels);
const int number_of_frames = 2;
SCOPED_TRACE(
ProduceDebugText(rate, number_of_channels, number_of_frames));
const std::vector<AudioFrame*> frames_to_combine(
all_frames.begin(), all_frames.begin() + number_of_frames);
AudioFrame audio_frame_for_mixing;
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
EXPECT_DEATH(
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing),
"");
#elif !RTC_DCHECK_IS_ON
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing);
#endif
}
}
}
// With no limiter, the rate has to be divisible by 100 since we use
// 10 ms frames.
TEST(FrameCombiner, BasicApiCallsNoLimiter) {
FrameCombiner combiner(false);
for (const int rate : {8000, 10000, 11000, 32000, 44100}) {
for (const int number_of_channels : {1, 2, 4, 8}) {
const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
SetUpFrames(rate, number_of_channels);
for (const int number_of_frames : {0, 1, 2}) {
SCOPED_TRACE(
ProduceDebugText(rate, number_of_channels, number_of_frames));
const std::vector<AudioFrame*> frames_to_combine(
all_frames.begin(), all_frames.begin() + number_of_frames);
AudioFrame audio_frame_for_mixing;
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing);
}
}
}
}
TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) {
FrameCombiner combiner(false);
for (const int rate : {8000, 10000, 11000, 32000, 44100}) {
for (const int number_of_channels : {1, 2}) {
SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 0));
AudioFrame audio_frame_for_mixing;
const std::vector<AudioFrame*> frames_to_combine;
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing);
const int16_t* audio_frame_for_mixing_data =
audio_frame_for_mixing.data();
const std::vector<int16_t> mixed_data(
audio_frame_for_mixing_data,
audio_frame_for_mixing_data + number_of_channels * rate / 100);
const std::vector<int16_t> expected(number_of_channels * rate / 100, 0);
EXPECT_EQ(mixed_data, expected);
EXPECT_THAT(audio_frame_for_mixing.packet_infos_, IsEmpty());
}
}
}
TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) {
FrameCombiner combiner(false);
for (const int rate : {8000, 10000, 11000, 32000, 44100}) {
for (const int number_of_channels : {1, 2, 4, 8, 10}) {
SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1));
AudioFrame audio_frame_for_mixing;
SetUpFrames(rate, number_of_channels);
int16_t* frame1_data = frame1.mutable_data();
std::iota(frame1_data, frame1_data + number_of_channels * rate / 100, 0);
const std::vector<AudioFrame*> frames_to_combine = {&frame1};
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing);
const int16_t* audio_frame_for_mixing_data =
audio_frame_for_mixing.data();
const std::vector<int16_t> mixed_data(
audio_frame_for_mixing_data,
audio_frame_for_mixing_data + number_of_channels * rate / 100);
std::vector<int16_t> expected(number_of_channels * rate / 100);
std::iota(expected.begin(), expected.end(), 0);
EXPECT_EQ(mixed_data, expected);
EXPECT_THAT(audio_frame_for_mixing.packet_infos_,
ElementsAreArray(frame1.packet_infos_));
}
}
}
// Send a sine wave through the FrameCombiner, and check that the
// difference between input and output varies smoothly. Also check
// that it is inside reasonable bounds. This is to catch issues like
// chromium:695993 and chromium:816875.
TEST(FrameCombiner, GainCurveIsSmoothForAlternatingNumberOfStreams) {
// Rates are divisible by 2000 when limiter is active.
std::vector<FrameCombinerConfig> configs = {
{false, 30100, 2, 50.f}, {false, 16500, 1, 3200.f},
{true, 8000, 1, 3200.f}, {true, 16000, 1, 50.f},
{true, 18000, 8, 3200.f}, {true, 10000, 2, 50.f},
};
for (const auto& config : configs) {
SCOPED_TRACE(ProduceDebugText(config));
FrameCombiner combiner(config.use_limiter);
constexpr int16_t wave_amplitude = 30000;
SineWaveGenerator wave_generator(config.wave_frequency, wave_amplitude);
GainChangeCalculator change_calculator;
float cumulative_change = 0.f;
constexpr size_t iterations = 100;
for (size_t i = 0; i < iterations; ++i) {
SetUpFrames(config.sample_rate_hz, config.number_of_channels);
wave_generator.GenerateNextFrame(&frame1);
AudioFrameOperations::Mute(&frame2);
std::vector<AudioFrame*> frames_to_combine = {&frame1};
if (i % 2 == 0) {
frames_to_combine.push_back(&frame2);
}
const size_t number_of_samples =
frame1.samples_per_channel_ * config.number_of_channels;
// Ensures limiter is on if 'use_limiter'.
constexpr size_t number_of_streams = 2;
AudioFrame audio_frame_for_mixing;
combiner.Combine(frames_to_combine, config.number_of_channels,
config.sample_rate_hz, number_of_streams,
&audio_frame_for_mixing);
cumulative_change += change_calculator.CalculateGainChange(
rtc::ArrayView<const int16_t>(frame1.data(), number_of_samples),
rtc::ArrayView<const int16_t>(audio_frame_for_mixing.data(),
number_of_samples));
}
// Check that the gain doesn't vary too much.
EXPECT_LT(cumulative_change, 10);
// Check that the latest gain is within reasonable bounds. It
// should be slightly less that 1.
EXPECT_LT(0.9f, change_calculator.LatestGain());
EXPECT_LT(change_calculator.LatestGain(), 1.01f);
}
}
} // namespace webrtc