NetEq: Create an audio interruption metric

This CL adds a new metric to NetEq, which logs whenever a loss
concealment event has lasted longer than 150 ms (an "interruption").
The number of such events, as well as the sum length of them, is kept
in a SampleCounter, which can be queried at any time.

Any initial PLC at the beginning of a call, before the first packet is
decoded, is ignored.

Unit tests and piping to neteq_rtpplay are included.

Bug: webrtc:10549
Change-Id: I8a224a34254c47c74317617f420f6de997232d88
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/132796
Reviewed-by: Minyue Li <minyue@webrtc.org>
Commit-Queue: Henrik Lundin <henrik.lundin@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#27781}
This commit is contained in:
Henrik Lundin 2019-04-26 09:47:07 +02:00 committed by Commit Bot
parent 42fa30fdac
commit 2a8bd090a3
8 changed files with 164 additions and 0 deletions

View file

@ -324,12 +324,14 @@ void Expand::SetParametersForNormalAfterExpand() {
lag_index_direction_ = 0;
stop_muting_ = true; // Do not mute signal any more.
statistics_->LogDelayedPacketOutageEvent(expand_duration_samples_, fs_hz_);
statistics_->EndExpandEvent(fs_hz_);
}
void Expand::SetParametersForMergeAfterExpand() {
current_lag_index_ = -1; /* out of the 3 possible ones */
lag_index_direction_ = 1; /* make sure we get the "optimal" lag */
stop_muting_ = true;
statistics_->EndExpandEvent(fs_hz_);
}
bool Expand::Muted() const {

View file

@ -87,6 +87,11 @@ struct NetEqLifetimeStatistics {
// packet can be made dynamic.
uint64_t relative_packet_arrival_delay_ms = 0;
uint64_t jitter_buffer_packets_received = 0;
// An interruption is a loss-concealment event lasting at least 150 ms. The
// two stats below count the number os such events and the total duration of
// these events.
uint64_t interruption_count = 0;
uint64_t total_interruption_duration_ms = 0;
};
// Metrics that describe the operations performed in NetEq, and the internal

View file

@ -822,6 +822,9 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame,
switch (operation) {
case kNormal: {
DoNormal(decoded_buffer_.get(), length, speech_type, play_dtmf);
if (length > 0) {
stats_->DecodedOutputPlayed();
}
break;
}
case kMerge: {

View file

@ -689,6 +689,65 @@ TEST_F(NetEqImplTest, FirstPacketUnknown) {
}
}
// This test verifies that audio interruption is not logged for the initial
// PLC period before the first packet is deocoded.
// TODO(henrik.lundin) Maybe move this test to neteq_network_stats_unittest.cc.
TEST_F(NetEqImplTest, NoAudioInterruptionLoggedBeforeFirstDecode) {
UseNoMocks();
CreateInstance();
const uint8_t kPayloadType = 17; // Just an arbitrary number.
const uint32_t kReceiveTime = 17; // Value doesn't matter for this test.
const int kSampleRateHz = 8000;
const size_t kPayloadLengthSamples =
static_cast<size_t>(10 * kSampleRateHz / 1000); // 10 ms.
const size_t kPayloadLengthBytes = kPayloadLengthSamples * 2;
uint8_t payload[kPayloadLengthBytes] = {0};
RTPHeader rtp_header;
rtp_header.payloadType = kPayloadType;
rtp_header.sequenceNumber = 0x1234;
rtp_header.timestamp = 0x12345678;
rtp_header.ssrc = 0x87654321;
// Register the payload type.
EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType,
SdpAudioFormat("l16", 8000, 1)));
// Pull audio several times. No packets have been inserted yet.
const size_t kMaxOutputSize = static_cast<size_t>(10 * kSampleRateHz / 1000);
AudioFrame output;
bool muted;
for (int i = 0; i < 100; ++i) {
EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted));
ASSERT_LE(output.samples_per_channel_, kMaxOutputSize);
EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_);
EXPECT_EQ(1u, output.num_channels_);
EXPECT_NE(AudioFrame::kNormalSpeech, output.speech_type_);
}
// Insert 10 packets.
for (size_t i = 0; i < 10; ++i) {
rtp_header.sequenceNumber++;
rtp_header.timestamp += kPayloadLengthSamples;
EXPECT_EQ(NetEq::kOK,
neteq_->InsertPacket(rtp_header, payload, kReceiveTime));
EXPECT_EQ(i + 1, packet_buffer_->NumPacketsInBuffer());
}
// Pull audio repeatedly and make sure we get normal output, that is not PLC.
for (size_t i = 0; i < 3; ++i) {
EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted));
ASSERT_LE(output.samples_per_channel_, kMaxOutputSize);
EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_);
EXPECT_EQ(1u, output.num_channels_);
EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_)
<< "NetEq did not decode the packets as expected.";
}
auto lifetime_stats = neteq_->GetLifetimeStatistics();
EXPECT_EQ(0u, lifetime_stats.interruption_count);
}
// This test verifies that NetEq can handle comfort noise and enters/quits codec
// internal CNG mode properly.
TEST_F(NetEqImplTest, CodecInternalCng) {

View file

@ -29,6 +29,8 @@ size_t AddIntToSizeTWithLowerCap(int a, size_t b) {
"int must not be wider than size_t for this to work");
return (a < 0 && ret > b) ? 0 : ret;
}
constexpr int kInterruptionLenMs = 150;
} // namespace
// Allocating the static const so that it can be passed by reference to
@ -176,6 +178,24 @@ void StatisticsCalculator::ExpandedNoiseSamplesCorrection(int num_samples) {
ConcealedSamplesCorrection(num_samples, false);
}
void StatisticsCalculator::DecodedOutputPlayed() {
decoded_output_played_ = true;
}
void StatisticsCalculator::EndExpandEvent(int fs_hz) {
RTC_DCHECK_GE(lifetime_stats_.concealed_samples,
concealed_samples_at_event_end_);
const int event_duration_ms =
1000 *
(lifetime_stats_.concealed_samples - concealed_samples_at_event_end_) /
fs_hz;
if (event_duration_ms >= kInterruptionLenMs && decoded_output_played_) {
lifetime_stats_.interruption_count++;
lifetime_stats_.total_interruption_duration_ms += event_duration_ms;
}
concealed_samples_at_event_end_ = lifetime_stats_.concealed_samples;
}
void StatisticsCalculator::ConcealedSamplesCorrection(int num_samples,
bool is_voice) {
if (num_samples < 0) {

View file

@ -50,6 +50,11 @@ class StatisticsCalculator {
// Same as ExpandedVoiceSamplesCorrection but for noise samples.
void ExpandedNoiseSamplesCorrection(int num_samples);
void DecodedOutputPlayed();
// Mark end of expand event; triggers some stats to be reported.
void EndExpandEvent(int fs_hz);
// Reports that |num_samples| samples were produced through preemptive
// expansion.
void PreemptiveExpandedSamples(size_t num_samples);
@ -200,6 +205,7 @@ class StatisticsCalculator {
size_t added_zero_samples_;
size_t expanded_speech_samples_;
size_t expanded_noise_samples_;
size_t concealed_samples_at_event_end_ = 0;
size_t discarded_packets_;
size_t lost_timestamps_;
uint32_t timestamps_since_last_report_;
@ -209,6 +215,7 @@ class StatisticsCalculator {
PeriodicUmaCount delayed_packet_outage_counter_;
PeriodicUmaAverage excess_buffer_delay_;
PeriodicUmaCount buffer_full_counter_;
bool decoded_output_played_ = false;
RTC_DISALLOW_COPY_AND_ASSIGN(StatisticsCalculator);
};

View file

@ -128,4 +128,60 @@ TEST(StatisticsCalculator, ReceivedPacket) {
EXPECT_EQ(2u, stats_output.jitter_buffer_packets_received);
}
TEST(StatisticsCalculator, InterruptionCounter) {
constexpr int fs_khz = 48;
constexpr int fs_hz = fs_khz * 1000;
StatisticsCalculator stats;
stats.DecodedOutputPlayed();
stats.EndExpandEvent(fs_hz);
auto lts = stats.GetLifetimeStatistics();
EXPECT_EQ(0u, lts.interruption_count);
EXPECT_EQ(0u, lts.total_interruption_duration_ms);
// Add an event that is shorter than 150 ms. Should not be logged.
stats.ExpandedVoiceSamples(10 * fs_khz, false); // 10 ms.
stats.ExpandedNoiseSamples(139 * fs_khz, false); // 139 ms.
stats.EndExpandEvent(fs_hz);
lts = stats.GetLifetimeStatistics();
EXPECT_EQ(0u, lts.interruption_count);
// Add an event that is longer than 150 ms. Should be logged.
stats.ExpandedVoiceSamples(140 * fs_khz, false); // 140 ms.
stats.ExpandedNoiseSamples(11 * fs_khz, false); // 11 ms.
stats.EndExpandEvent(fs_hz);
lts = stats.GetLifetimeStatistics();
EXPECT_EQ(1u, lts.interruption_count);
EXPECT_EQ(151u, lts.total_interruption_duration_ms);
// Add one more long event.
stats.ExpandedVoiceSamples(100 * fs_khz, false); // 100 ms.
stats.ExpandedNoiseSamples(5000 * fs_khz, false); // 5000 ms.
stats.EndExpandEvent(fs_hz);
lts = stats.GetLifetimeStatistics();
EXPECT_EQ(2u, lts.interruption_count);
EXPECT_EQ(5100u + 151u, lts.total_interruption_duration_ms);
}
TEST(StatisticsCalculator, InterruptionCounterDoNotLogBeforeDecoding) {
constexpr int fs_khz = 48;
constexpr int fs_hz = fs_khz * 1000;
StatisticsCalculator stats;
// Add an event that is longer than 150 ms. Should normally be logged, but we
// have not called DecodedOutputPlayed() yet, so it shouldn't this time.
stats.ExpandedVoiceSamples(151 * fs_khz, false); // 151 ms.
stats.EndExpandEvent(fs_hz);
auto lts = stats.GetLifetimeStatistics();
EXPECT_EQ(0u, lts.interruption_count);
// Call DecodedOutputPlayed(). Logging should happen after this.
stats.DecodedOutputPlayed();
// Add one more long event.
stats.ExpandedVoiceSamples(151 * fs_khz, false); // 151 ms.
stats.EndExpandEvent(fs_hz);
lts = stats.GetLifetimeStatistics();
EXPECT_EQ(1u, lts.interruption_count);
}
} // namespace webrtc

View file

@ -75,6 +75,18 @@ void NetEqStatsPlotter::SimulationEnded(int64_t simulation_time_ms) {
printf("%s\n", concealment_event.ToString().c_str());
printf(" end of concealment_events_ms\n");
}
const auto lifetime_stats_vector = stats_getter_->lifetime_stats();
if (!lifetime_stats_vector->empty()) {
auto lifetime_stats = lifetime_stats_vector->back().second;
printf(" num_interruptions: %" PRId64 "\n",
lifetime_stats.interruption_count);
printf(" sum_interruption_length_ms: %" PRId64 " ms\n",
lifetime_stats.total_interruption_duration_ms);
printf(" interruption ratio: %f%%\n",
100.0 * lifetime_stats.total_interruption_duration_ms /
simulation_time_ms);
}
}
} // namespace test