mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-13 22:00:47 +01:00
NetEq: Create an audio interruption metric
This CL adds a new metric to NetEq, which logs whenever a loss concealment event has lasted longer than 150 ms (an "interruption"). The number of such events, as well as the sum length of them, is kept in a SampleCounter, which can be queried at any time. Any initial PLC at the beginning of a call, before the first packet is decoded, is ignored. Unit tests and piping to neteq_rtpplay are included. Bug: webrtc:10549 Change-Id: I8a224a34254c47c74317617f420f6de997232d88 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/132796 Reviewed-by: Minyue Li <minyue@webrtc.org> Commit-Queue: Henrik Lundin <henrik.lundin@webrtc.org> Cr-Commit-Position: refs/heads/master@{#27781}
This commit is contained in:
parent
42fa30fdac
commit
2a8bd090a3
8 changed files with 164 additions and 0 deletions
|
@ -324,12 +324,14 @@ void Expand::SetParametersForNormalAfterExpand() {
|
|||
lag_index_direction_ = 0;
|
||||
stop_muting_ = true; // Do not mute signal any more.
|
||||
statistics_->LogDelayedPacketOutageEvent(expand_duration_samples_, fs_hz_);
|
||||
statistics_->EndExpandEvent(fs_hz_);
|
||||
}
|
||||
|
||||
void Expand::SetParametersForMergeAfterExpand() {
|
||||
current_lag_index_ = -1; /* out of the 3 possible ones */
|
||||
lag_index_direction_ = 1; /* make sure we get the "optimal" lag */
|
||||
stop_muting_ = true;
|
||||
statistics_->EndExpandEvent(fs_hz_);
|
||||
}
|
||||
|
||||
bool Expand::Muted() const {
|
||||
|
|
|
@ -87,6 +87,11 @@ struct NetEqLifetimeStatistics {
|
|||
// packet can be made dynamic.
|
||||
uint64_t relative_packet_arrival_delay_ms = 0;
|
||||
uint64_t jitter_buffer_packets_received = 0;
|
||||
// An interruption is a loss-concealment event lasting at least 150 ms. The
|
||||
// two stats below count the number os such events and the total duration of
|
||||
// these events.
|
||||
uint64_t interruption_count = 0;
|
||||
uint64_t total_interruption_duration_ms = 0;
|
||||
};
|
||||
|
||||
// Metrics that describe the operations performed in NetEq, and the internal
|
||||
|
|
|
@ -822,6 +822,9 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame,
|
|||
switch (operation) {
|
||||
case kNormal: {
|
||||
DoNormal(decoded_buffer_.get(), length, speech_type, play_dtmf);
|
||||
if (length > 0) {
|
||||
stats_->DecodedOutputPlayed();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kMerge: {
|
||||
|
|
|
@ -689,6 +689,65 @@ TEST_F(NetEqImplTest, FirstPacketUnknown) {
|
|||
}
|
||||
}
|
||||
|
||||
// This test verifies that audio interruption is not logged for the initial
|
||||
// PLC period before the first packet is deocoded.
|
||||
// TODO(henrik.lundin) Maybe move this test to neteq_network_stats_unittest.cc.
|
||||
TEST_F(NetEqImplTest, NoAudioInterruptionLoggedBeforeFirstDecode) {
|
||||
UseNoMocks();
|
||||
CreateInstance();
|
||||
|
||||
const uint8_t kPayloadType = 17; // Just an arbitrary number.
|
||||
const uint32_t kReceiveTime = 17; // Value doesn't matter for this test.
|
||||
const int kSampleRateHz = 8000;
|
||||
const size_t kPayloadLengthSamples =
|
||||
static_cast<size_t>(10 * kSampleRateHz / 1000); // 10 ms.
|
||||
const size_t kPayloadLengthBytes = kPayloadLengthSamples * 2;
|
||||
uint8_t payload[kPayloadLengthBytes] = {0};
|
||||
RTPHeader rtp_header;
|
||||
rtp_header.payloadType = kPayloadType;
|
||||
rtp_header.sequenceNumber = 0x1234;
|
||||
rtp_header.timestamp = 0x12345678;
|
||||
rtp_header.ssrc = 0x87654321;
|
||||
|
||||
// Register the payload type.
|
||||
EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType,
|
||||
SdpAudioFormat("l16", 8000, 1)));
|
||||
|
||||
// Pull audio several times. No packets have been inserted yet.
|
||||
const size_t kMaxOutputSize = static_cast<size_t>(10 * kSampleRateHz / 1000);
|
||||
AudioFrame output;
|
||||
bool muted;
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted));
|
||||
ASSERT_LE(output.samples_per_channel_, kMaxOutputSize);
|
||||
EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_);
|
||||
EXPECT_EQ(1u, output.num_channels_);
|
||||
EXPECT_NE(AudioFrame::kNormalSpeech, output.speech_type_);
|
||||
}
|
||||
|
||||
// Insert 10 packets.
|
||||
for (size_t i = 0; i < 10; ++i) {
|
||||
rtp_header.sequenceNumber++;
|
||||
rtp_header.timestamp += kPayloadLengthSamples;
|
||||
EXPECT_EQ(NetEq::kOK,
|
||||
neteq_->InsertPacket(rtp_header, payload, kReceiveTime));
|
||||
EXPECT_EQ(i + 1, packet_buffer_->NumPacketsInBuffer());
|
||||
}
|
||||
|
||||
// Pull audio repeatedly and make sure we get normal output, that is not PLC.
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted));
|
||||
ASSERT_LE(output.samples_per_channel_, kMaxOutputSize);
|
||||
EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_);
|
||||
EXPECT_EQ(1u, output.num_channels_);
|
||||
EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_)
|
||||
<< "NetEq did not decode the packets as expected.";
|
||||
}
|
||||
|
||||
auto lifetime_stats = neteq_->GetLifetimeStatistics();
|
||||
EXPECT_EQ(0u, lifetime_stats.interruption_count);
|
||||
}
|
||||
|
||||
// This test verifies that NetEq can handle comfort noise and enters/quits codec
|
||||
// internal CNG mode properly.
|
||||
TEST_F(NetEqImplTest, CodecInternalCng) {
|
||||
|
|
|
@ -29,6 +29,8 @@ size_t AddIntToSizeTWithLowerCap(int a, size_t b) {
|
|||
"int must not be wider than size_t for this to work");
|
||||
return (a < 0 && ret > b) ? 0 : ret;
|
||||
}
|
||||
|
||||
constexpr int kInterruptionLenMs = 150;
|
||||
} // namespace
|
||||
|
||||
// Allocating the static const so that it can be passed by reference to
|
||||
|
@ -176,6 +178,24 @@ void StatisticsCalculator::ExpandedNoiseSamplesCorrection(int num_samples) {
|
|||
ConcealedSamplesCorrection(num_samples, false);
|
||||
}
|
||||
|
||||
void StatisticsCalculator::DecodedOutputPlayed() {
|
||||
decoded_output_played_ = true;
|
||||
}
|
||||
|
||||
void StatisticsCalculator::EndExpandEvent(int fs_hz) {
|
||||
RTC_DCHECK_GE(lifetime_stats_.concealed_samples,
|
||||
concealed_samples_at_event_end_);
|
||||
const int event_duration_ms =
|
||||
1000 *
|
||||
(lifetime_stats_.concealed_samples - concealed_samples_at_event_end_) /
|
||||
fs_hz;
|
||||
if (event_duration_ms >= kInterruptionLenMs && decoded_output_played_) {
|
||||
lifetime_stats_.interruption_count++;
|
||||
lifetime_stats_.total_interruption_duration_ms += event_duration_ms;
|
||||
}
|
||||
concealed_samples_at_event_end_ = lifetime_stats_.concealed_samples;
|
||||
}
|
||||
|
||||
void StatisticsCalculator::ConcealedSamplesCorrection(int num_samples,
|
||||
bool is_voice) {
|
||||
if (num_samples < 0) {
|
||||
|
|
|
@ -50,6 +50,11 @@ class StatisticsCalculator {
|
|||
// Same as ExpandedVoiceSamplesCorrection but for noise samples.
|
||||
void ExpandedNoiseSamplesCorrection(int num_samples);
|
||||
|
||||
void DecodedOutputPlayed();
|
||||
|
||||
// Mark end of expand event; triggers some stats to be reported.
|
||||
void EndExpandEvent(int fs_hz);
|
||||
|
||||
// Reports that |num_samples| samples were produced through preemptive
|
||||
// expansion.
|
||||
void PreemptiveExpandedSamples(size_t num_samples);
|
||||
|
@ -200,6 +205,7 @@ class StatisticsCalculator {
|
|||
size_t added_zero_samples_;
|
||||
size_t expanded_speech_samples_;
|
||||
size_t expanded_noise_samples_;
|
||||
size_t concealed_samples_at_event_end_ = 0;
|
||||
size_t discarded_packets_;
|
||||
size_t lost_timestamps_;
|
||||
uint32_t timestamps_since_last_report_;
|
||||
|
@ -209,6 +215,7 @@ class StatisticsCalculator {
|
|||
PeriodicUmaCount delayed_packet_outage_counter_;
|
||||
PeriodicUmaAverage excess_buffer_delay_;
|
||||
PeriodicUmaCount buffer_full_counter_;
|
||||
bool decoded_output_played_ = false;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(StatisticsCalculator);
|
||||
};
|
||||
|
|
|
@ -128,4 +128,60 @@ TEST(StatisticsCalculator, ReceivedPacket) {
|
|||
EXPECT_EQ(2u, stats_output.jitter_buffer_packets_received);
|
||||
}
|
||||
|
||||
TEST(StatisticsCalculator, InterruptionCounter) {
|
||||
constexpr int fs_khz = 48;
|
||||
constexpr int fs_hz = fs_khz * 1000;
|
||||
StatisticsCalculator stats;
|
||||
stats.DecodedOutputPlayed();
|
||||
stats.EndExpandEvent(fs_hz);
|
||||
auto lts = stats.GetLifetimeStatistics();
|
||||
EXPECT_EQ(0u, lts.interruption_count);
|
||||
EXPECT_EQ(0u, lts.total_interruption_duration_ms);
|
||||
|
||||
// Add an event that is shorter than 150 ms. Should not be logged.
|
||||
stats.ExpandedVoiceSamples(10 * fs_khz, false); // 10 ms.
|
||||
stats.ExpandedNoiseSamples(139 * fs_khz, false); // 139 ms.
|
||||
stats.EndExpandEvent(fs_hz);
|
||||
lts = stats.GetLifetimeStatistics();
|
||||
EXPECT_EQ(0u, lts.interruption_count);
|
||||
|
||||
// Add an event that is longer than 150 ms. Should be logged.
|
||||
stats.ExpandedVoiceSamples(140 * fs_khz, false); // 140 ms.
|
||||
stats.ExpandedNoiseSamples(11 * fs_khz, false); // 11 ms.
|
||||
stats.EndExpandEvent(fs_hz);
|
||||
lts = stats.GetLifetimeStatistics();
|
||||
EXPECT_EQ(1u, lts.interruption_count);
|
||||
EXPECT_EQ(151u, lts.total_interruption_duration_ms);
|
||||
|
||||
// Add one more long event.
|
||||
stats.ExpandedVoiceSamples(100 * fs_khz, false); // 100 ms.
|
||||
stats.ExpandedNoiseSamples(5000 * fs_khz, false); // 5000 ms.
|
||||
stats.EndExpandEvent(fs_hz);
|
||||
lts = stats.GetLifetimeStatistics();
|
||||
EXPECT_EQ(2u, lts.interruption_count);
|
||||
EXPECT_EQ(5100u + 151u, lts.total_interruption_duration_ms);
|
||||
}
|
||||
|
||||
TEST(StatisticsCalculator, InterruptionCounterDoNotLogBeforeDecoding) {
|
||||
constexpr int fs_khz = 48;
|
||||
constexpr int fs_hz = fs_khz * 1000;
|
||||
StatisticsCalculator stats;
|
||||
|
||||
// Add an event that is longer than 150 ms. Should normally be logged, but we
|
||||
// have not called DecodedOutputPlayed() yet, so it shouldn't this time.
|
||||
stats.ExpandedVoiceSamples(151 * fs_khz, false); // 151 ms.
|
||||
stats.EndExpandEvent(fs_hz);
|
||||
auto lts = stats.GetLifetimeStatistics();
|
||||
EXPECT_EQ(0u, lts.interruption_count);
|
||||
|
||||
// Call DecodedOutputPlayed(). Logging should happen after this.
|
||||
stats.DecodedOutputPlayed();
|
||||
|
||||
// Add one more long event.
|
||||
stats.ExpandedVoiceSamples(151 * fs_khz, false); // 151 ms.
|
||||
stats.EndExpandEvent(fs_hz);
|
||||
lts = stats.GetLifetimeStatistics();
|
||||
EXPECT_EQ(1u, lts.interruption_count);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
|
|
@ -75,6 +75,18 @@ void NetEqStatsPlotter::SimulationEnded(int64_t simulation_time_ms) {
|
|||
printf("%s\n", concealment_event.ToString().c_str());
|
||||
printf(" end of concealment_events_ms\n");
|
||||
}
|
||||
|
||||
const auto lifetime_stats_vector = stats_getter_->lifetime_stats();
|
||||
if (!lifetime_stats_vector->empty()) {
|
||||
auto lifetime_stats = lifetime_stats_vector->back().second;
|
||||
printf(" num_interruptions: %" PRId64 "\n",
|
||||
lifetime_stats.interruption_count);
|
||||
printf(" sum_interruption_length_ms: %" PRId64 " ms\n",
|
||||
lifetime_stats.total_interruption_duration_ms);
|
||||
printf(" interruption ratio: %f%%\n",
|
||||
100.0 * lifetime_stats.total_interruption_duration_ms /
|
||||
simulation_time_ms);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
|
|
Loading…
Reference in a new issue