NetEq: Create an audio interruption metric

This CL adds a new metric to NetEq, which logs whenever a loss concealment event has lasted longer than 150 ms (an "interruption"). The number of such events, as well as the sum length of them, is kept in a SampleCounter, which can be queried at any time. Any initial PLC at the beginning of a call, before the first packet is decoded, is ignored. Unit tests and piping to neteq_rtpplay are included. Bug: webrtc:10549 Change-Id: I8a224a34254c47c74317617f420f6de997232d88 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/132796 Reviewed-by: Minyue Li <minyue@webrtc.org> Commit-Queue: Henrik Lundin <henrik.lundin@webrtc.org> Cr-Commit-Position: refs/heads/master@{#27781}
2025-05-13 22:00:47 +01:00 · 2019-04-26 09:47:07 +02:00 · 2019-04-26 09:47:07 +02:00 · 2a8bd090a3
commit 2a8bd090a3
parent 42fa30fdac
8 changed files with 164 additions and 0 deletions
--- a/modules/audio_coding/neteq/expand.cc
+++ b/modules/audio_coding/neteq/expand.cc
@ -324,12 +324,14 @@ void Expand::SetParametersForNormalAfterExpand() {
  lag_index_direction_ = 0;
  stop_muting_ = true;  // Do not mute signal any more.
  statistics_->LogDelayedPacketOutageEvent(expand_duration_samples_, fs_hz_);
+  statistics_->EndExpandEvent(fs_hz_);
 }

 void Expand::SetParametersForMergeAfterExpand() {
  current_lag_index_ = -1;  /* out of the 3 possible ones */
  lag_index_direction_ = 1; /* make sure we get the "optimal" lag */
  stop_muting_ = true;
+  statistics_->EndExpandEvent(fs_hz_);
 }

 bool Expand::Muted() const {
--- a/modules/audio_coding/neteq/include/neteq.h
+++ b/modules/audio_coding/neteq/include/neteq.h
@ -87,6 +87,11 @@ struct NetEqLifetimeStatistics {
  // packet can be made dynamic.
  uint64_t relative_packet_arrival_delay_ms = 0;
  uint64_t jitter_buffer_packets_received = 0;
+  // An interruption is a loss-concealment event lasting at least 150 ms. The
+  // two stats below count the number os such events and the total duration of
+  // these events.
+  uint64_t interruption_count = 0;
+  uint64_t total_interruption_duration_ms = 0;
 };

 // Metrics that describe the operations performed in NetEq, and the internal
--- a/modules/audio_coding/neteq/neteq_impl.cc
+++ b/modules/audio_coding/neteq/neteq_impl.cc
@ -822,6 +822,9 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame,
  switch (operation) {
    case kNormal: {
      DoNormal(decoded_buffer_.get(), length, speech_type, play_dtmf);
+      if (length > 0) {
+        stats_->DecodedOutputPlayed();
+      }
      break;
    }
    case kMerge: {
--- a/modules/audio_coding/neteq/neteq_impl_unittest.cc
+++ b/modules/audio_coding/neteq/neteq_impl_unittest.cc
@ -689,6 +689,65 @@ TEST_F(NetEqImplTest, FirstPacketUnknown) {
  }
 }

+// This test verifies that audio interruption is not logged for the initial
+// PLC period before the first packet is deocoded.
+// TODO(henrik.lundin) Maybe move this test to neteq_network_stats_unittest.cc.
+TEST_F(NetEqImplTest, NoAudioInterruptionLoggedBeforeFirstDecode) {
+  UseNoMocks();
+  CreateInstance();
+
+  const uint8_t kPayloadType = 17;   // Just an arbitrary number.
+  const uint32_t kReceiveTime = 17;  // Value doesn't matter for this test.
+  const int kSampleRateHz = 8000;
+  const size_t kPayloadLengthSamples =
+      static_cast<size_t>(10 * kSampleRateHz / 1000);  // 10 ms.
+  const size_t kPayloadLengthBytes = kPayloadLengthSamples * 2;
+  uint8_t payload[kPayloadLengthBytes] = {0};
+  RTPHeader rtp_header;
+  rtp_header.payloadType = kPayloadType;
+  rtp_header.sequenceNumber = 0x1234;
+  rtp_header.timestamp = 0x12345678;
+  rtp_header.ssrc = 0x87654321;
+
+  // Register the payload type.
+  EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType,
+                                          SdpAudioFormat("l16", 8000, 1)));
+
+  // Pull audio several times. No packets have been inserted yet.
+  const size_t kMaxOutputSize = static_cast<size_t>(10 * kSampleRateHz / 1000);
+  AudioFrame output;
+  bool muted;
+  for (int i = 0; i < 100; ++i) {
+    EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted));
+    ASSERT_LE(output.samples_per_channel_, kMaxOutputSize);
+    EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_);
+    EXPECT_EQ(1u, output.num_channels_);
+    EXPECT_NE(AudioFrame::kNormalSpeech, output.speech_type_);
+  }
+
+  // Insert 10 packets.
+  for (size_t i = 0; i < 10; ++i) {
+    rtp_header.sequenceNumber++;
+    rtp_header.timestamp += kPayloadLengthSamples;
+    EXPECT_EQ(NetEq::kOK,
+              neteq_->InsertPacket(rtp_header, payload, kReceiveTime));
+    EXPECT_EQ(i + 1, packet_buffer_->NumPacketsInBuffer());
+  }
+
+  // Pull audio repeatedly and make sure we get normal output, that is not PLC.
+  for (size_t i = 0; i < 3; ++i) {
+    EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted));
+    ASSERT_LE(output.samples_per_channel_, kMaxOutputSize);
+    EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_);
+    EXPECT_EQ(1u, output.num_channels_);
+    EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_)
+        << "NetEq did not decode the packets as expected.";
+  }
+
+  auto lifetime_stats = neteq_->GetLifetimeStatistics();
+  EXPECT_EQ(0u, lifetime_stats.interruption_count);
+}
+
 // This test verifies that NetEq can handle comfort noise and enters/quits codec
 // internal CNG mode properly.
 TEST_F(NetEqImplTest, CodecInternalCng) {
--- a/modules/audio_coding/neteq/statistics_calculator.cc
+++ b/modules/audio_coding/neteq/statistics_calculator.cc
@ -29,6 +29,8 @@ size_t AddIntToSizeTWithLowerCap(int a, size_t b) {
                "int must not be wider than size_t for this to work");
  return (a < 0 && ret > b) ? 0 : ret;
 }
+
+constexpr int kInterruptionLenMs = 150;
 }  // namespace

 // Allocating the static const so that it can be passed by reference to
@ -176,6 +178,24 @@ void StatisticsCalculator::ExpandedNoiseSamplesCorrection(int num_samples) {
  ConcealedSamplesCorrection(num_samples, false);
 }

+void StatisticsCalculator::DecodedOutputPlayed() {
+  decoded_output_played_ = true;
+}
+
+void StatisticsCalculator::EndExpandEvent(int fs_hz) {
+  RTC_DCHECK_GE(lifetime_stats_.concealed_samples,
+                concealed_samples_at_event_end_);
+  const int event_duration_ms =
+      1000 *
+      (lifetime_stats_.concealed_samples - concealed_samples_at_event_end_) /
+      fs_hz;
+  if (event_duration_ms >= kInterruptionLenMs && decoded_output_played_) {
+    lifetime_stats_.interruption_count++;
+    lifetime_stats_.total_interruption_duration_ms += event_duration_ms;
+  }
+  concealed_samples_at_event_end_ = lifetime_stats_.concealed_samples;
+}
+
 void StatisticsCalculator::ConcealedSamplesCorrection(int num_samples,
                                                      bool is_voice) {
  if (num_samples < 0) {
--- a/modules/audio_coding/neteq/statistics_calculator.h
+++ b/modules/audio_coding/neteq/statistics_calculator.h
@ -50,6 +50,11 @@ class StatisticsCalculator {
  // Same as ExpandedVoiceSamplesCorrection but for noise samples.
  void ExpandedNoiseSamplesCorrection(int num_samples);

+  void DecodedOutputPlayed();
+
+  // Mark end of expand event; triggers some stats to be reported.
+  void EndExpandEvent(int fs_hz);
+
  // Reports that |num_samples| samples were produced through preemptive
  // expansion.
  void PreemptiveExpandedSamples(size_t num_samples);
@ -200,6 +205,7 @@ class StatisticsCalculator {
  size_t added_zero_samples_;
  size_t expanded_speech_samples_;
  size_t expanded_noise_samples_;
+  size_t concealed_samples_at_event_end_ = 0;
  size_t discarded_packets_;
  size_t lost_timestamps_;
  uint32_t timestamps_since_last_report_;
@ -209,6 +215,7 @@ class StatisticsCalculator {
  PeriodicUmaCount delayed_packet_outage_counter_;
  PeriodicUmaAverage excess_buffer_delay_;
  PeriodicUmaCount buffer_full_counter_;
+  bool decoded_output_played_ = false;

  RTC_DISALLOW_COPY_AND_ASSIGN(StatisticsCalculator);
 };
--- a/modules/audio_coding/neteq/statistics_calculator_unittest.cc
+++ b/modules/audio_coding/neteq/statistics_calculator_unittest.cc
@ -128,4 +128,60 @@ TEST(StatisticsCalculator, ReceivedPacket) {
  EXPECT_EQ(2u, stats_output.jitter_buffer_packets_received);
 }

+TEST(StatisticsCalculator, InterruptionCounter) {
+  constexpr int fs_khz = 48;
+  constexpr int fs_hz = fs_khz * 1000;
+  StatisticsCalculator stats;
+  stats.DecodedOutputPlayed();
+  stats.EndExpandEvent(fs_hz);
+  auto lts = stats.GetLifetimeStatistics();
+  EXPECT_EQ(0u, lts.interruption_count);
+  EXPECT_EQ(0u, lts.total_interruption_duration_ms);
+
+  // Add an event that is shorter than 150 ms. Should not be logged.
+  stats.ExpandedVoiceSamples(10 * fs_khz, false);   // 10 ms.
+  stats.ExpandedNoiseSamples(139 * fs_khz, false);  // 139 ms.
+  stats.EndExpandEvent(fs_hz);
+  lts = stats.GetLifetimeStatistics();
+  EXPECT_EQ(0u, lts.interruption_count);
+
+  // Add an event that is longer than 150 ms. Should be logged.
+  stats.ExpandedVoiceSamples(140 * fs_khz, false);  // 140 ms.
+  stats.ExpandedNoiseSamples(11 * fs_khz, false);   // 11 ms.
+  stats.EndExpandEvent(fs_hz);
+  lts = stats.GetLifetimeStatistics();
+  EXPECT_EQ(1u, lts.interruption_count);
+  EXPECT_EQ(151u, lts.total_interruption_duration_ms);
+
+  // Add one more long event.
+  stats.ExpandedVoiceSamples(100 * fs_khz, false);   // 100 ms.
+  stats.ExpandedNoiseSamples(5000 * fs_khz, false);  // 5000 ms.
+  stats.EndExpandEvent(fs_hz);
+  lts = stats.GetLifetimeStatistics();
+  EXPECT_EQ(2u, lts.interruption_count);
+  EXPECT_EQ(5100u + 151u, lts.total_interruption_duration_ms);
+}
+
+TEST(StatisticsCalculator, InterruptionCounterDoNotLogBeforeDecoding) {
+  constexpr int fs_khz = 48;
+  constexpr int fs_hz = fs_khz * 1000;
+  StatisticsCalculator stats;
+
+  // Add an event that is longer than 150 ms. Should normally be logged, but we
+  // have not called DecodedOutputPlayed() yet, so it shouldn't this time.
+  stats.ExpandedVoiceSamples(151 * fs_khz, false);  // 151 ms.
+  stats.EndExpandEvent(fs_hz);
+  auto lts = stats.GetLifetimeStatistics();
+  EXPECT_EQ(0u, lts.interruption_count);
+
+  // Call DecodedOutputPlayed(). Logging should happen after this.
+  stats.DecodedOutputPlayed();
+
+  // Add one more long event.
+  stats.ExpandedVoiceSamples(151 * fs_khz, false);  // 151 ms.
+  stats.EndExpandEvent(fs_hz);
+  lts = stats.GetLifetimeStatistics();
+  EXPECT_EQ(1u, lts.interruption_count);
+}
+
 }  // namespace webrtc
--- a/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc
+++ b/modules/audio_coding/neteq/tools/neteq_stats_plotter.cc
@ -75,6 +75,18 @@ void NetEqStatsPlotter::SimulationEnded(int64_t simulation_time_ms) {
      printf("%s\n", concealment_event.ToString().c_str());
    printf(" end of concealment_events_ms\n");
  }
+
+  const auto lifetime_stats_vector = stats_getter_->lifetime_stats();
+  if (!lifetime_stats_vector->empty()) {
+    auto lifetime_stats = lifetime_stats_vector->back().second;
+    printf("  num_interruptions: %" PRId64 "\n",
+           lifetime_stats.interruption_count);
+    printf("  sum_interruption_length_ms: %" PRId64 " ms\n",
+           lifetime_stats.total_interruption_duration_ms);
+    printf("  interruption ratio: %f%%\n",
+           100.0 * lifetime_stats.total_interruption_duration_ms /
+               simulation_time_ms);
+  }
 }

 }  // namespace test