Adding a delay line to NetEq's output

This change adds an optional delay to NetEq's output. Note, this is not
equivalent to increasing the jitter buffer with the same extra length.

Bug: b/156734419
Change-Id: I8b70b6b3bffcfd3da296ccf29853864baa03d6bb
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/175110
Commit-Queue: Henrik Lundin <henrik.lundin@webrtc.org>
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Reviewed-by: Ivo Creusen <ivoc@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#31343}
This commit is contained in:
Henrik Lundin 2020-05-25 11:26:15 +02:00 committed by Commit Bot
parent 848ea9f0d3
commit c49e9c253f
12 changed files with 306 additions and 14 deletions

View file

@ -11,6 +11,8 @@
#include "api/audio/audio_frame.h"
#include <string.h>
#include <algorithm>
#include <utility>
#include "rtc_base/checks.h"
#include "rtc_base/time_utils.h"
@ -22,6 +24,28 @@ AudioFrame::AudioFrame() {
static_assert(sizeof(data_) == kMaxDataSizeBytes, "kMaxDataSizeBytes");
}
void swap(AudioFrame& a, AudioFrame& b) {
using std::swap;
swap(a.timestamp_, b.timestamp_);
swap(a.elapsed_time_ms_, b.elapsed_time_ms_);
swap(a.ntp_time_ms_, b.ntp_time_ms_);
swap(a.samples_per_channel_, b.samples_per_channel_);
swap(a.sample_rate_hz_, b.sample_rate_hz_);
swap(a.num_channels_, b.num_channels_);
swap(a.channel_layout_, b.channel_layout_);
swap(a.speech_type_, b.speech_type_);
swap(a.vad_activity_, b.vad_activity_);
swap(a.profile_timestamp_ms_, b.profile_timestamp_ms_);
swap(a.packet_infos_, b.packet_infos_);
const size_t length_a = a.samples_per_channel_ * a.num_channels_;
const size_t length_b = b.samples_per_channel_ * b.num_channels_;
RTC_DCHECK_LE(length_a, AudioFrame::kMaxDataSizeSamples);
RTC_DCHECK_LE(length_b, AudioFrame::kMaxDataSizeSamples);
std::swap_ranges(a.data_, a.data_ + std::max(length_a, length_b), b.data_);
swap(a.muted_, b.muted_);
swap(a.absolute_capture_timestamp_ms_, b.absolute_capture_timestamp_ms_);
}
void AudioFrame::Reset() {
ResetWithoutMuting();
muted_ = true;

View file

@ -14,6 +14,8 @@
#include <stddef.h>
#include <stdint.h>
#include <utility>
#include "api/audio/channel_layout.h"
#include "api/rtp_packet_infos.h"
#include "rtc_base/constructor_magic.h"
@ -58,6 +60,8 @@ class AudioFrame {
AudioFrame();
friend void swap(AudioFrame& a, AudioFrame& b);
// Resets all members to their default state.
void Reset();
// Same as Reset(), but leaves mute state unchanged. Muting a frame requires

View file

@ -133,4 +133,54 @@ TEST(AudioFrameTest, CopyFrom) {
EXPECT_EQ(0, memcmp(frame2.data(), frame1.data(), sizeof(samples)));
}
TEST(AudioFrameTest, SwapFrames) {
AudioFrame frame1, frame2;
int16_t samples1[kNumChannelsMono * kSamplesPerChannel];
for (size_t i = 0; i < kNumChannelsMono * kSamplesPerChannel; ++i) {
samples1[i] = i;
}
frame1.UpdateFrame(kTimestamp, samples1, kSamplesPerChannel, kSampleRateHz,
AudioFrame::kPLC, AudioFrame::kVadActive,
kNumChannelsMono);
frame1.set_absolute_capture_timestamp_ms(12345678);
const auto frame1_channel_layout = frame1.channel_layout();
int16_t samples2[(kNumChannelsMono + 1) * (kSamplesPerChannel + 1)];
for (size_t i = 0; i < (kNumChannelsMono + 1) * (kSamplesPerChannel + 1);
++i) {
samples2[i] = 1000 + i;
}
frame2.UpdateFrame(kTimestamp + 1, samples2, kSamplesPerChannel + 1,
kSampleRateHz + 1, AudioFrame::kNormalSpeech,
AudioFrame::kVadPassive, kNumChannelsMono + 1);
const auto frame2_channel_layout = frame2.channel_layout();
swap(frame1, frame2);
EXPECT_EQ(kTimestamp + 1, frame1.timestamp_);
ASSERT_EQ(kSamplesPerChannel + 1, frame1.samples_per_channel_);
EXPECT_EQ(kSampleRateHz + 1, frame1.sample_rate_hz_);
EXPECT_EQ(AudioFrame::kNormalSpeech, frame1.speech_type_);
EXPECT_EQ(AudioFrame::kVadPassive, frame1.vad_activity_);
ASSERT_EQ(kNumChannelsMono + 1, frame1.num_channels_);
for (size_t i = 0; i < (kNumChannelsMono + 1) * (kSamplesPerChannel + 1);
++i) {
EXPECT_EQ(samples2[i], frame1.data()[i]);
}
EXPECT_FALSE(frame1.absolute_capture_timestamp_ms());
EXPECT_EQ(frame2_channel_layout, frame1.channel_layout());
EXPECT_EQ(kTimestamp, frame2.timestamp_);
ASSERT_EQ(kSamplesPerChannel, frame2.samples_per_channel_);
EXPECT_EQ(kSampleRateHz, frame2.sample_rate_hz_);
EXPECT_EQ(AudioFrame::kPLC, frame2.speech_type_);
EXPECT_EQ(AudioFrame::kVadActive, frame2.vad_activity_);
ASSERT_EQ(kNumChannelsMono, frame2.num_channels_);
for (size_t i = 0; i < kNumChannelsMono * kSamplesPerChannel; ++i) {
EXPECT_EQ(samples1[i], frame2.data()[i]);
}
EXPECT_EQ(12345678, frame2.absolute_capture_timestamp_ms());
EXPECT_EQ(frame1_channel_layout, frame2.channel_layout());
}
} // namespace webrtc

View file

@ -30,7 +30,8 @@ std::string NetEq::Config::ToString() const {
<< ", min_delay_ms=" << min_delay_ms << ", enable_fast_accelerate="
<< (enable_fast_accelerate ? "true" : "false")
<< ", enable_muted_state=" << (enable_muted_state ? "true" : "false")
<< ", enable_rtx_handling=" << (enable_rtx_handling ? "true" : "false");
<< ", enable_rtx_handling=" << (enable_rtx_handling ? "true" : "false")
<< ", extra_output_delay_ms=" << extra_output_delay_ms;
return ss.str();
}

View file

@ -138,6 +138,10 @@ class NetEq {
bool enable_rtx_handling = false;
absl::optional<AudioCodecPairId> codec_pair_id;
bool for_test_no_time_stretching = false; // Use only for testing.
// Adds extra delay to the output of NetEq, without affecting jitter or
// loss behavior. This is mainly for testing. Value must be a non-negative
// multiple of 10 ms.
int extra_output_delay_ms = 0;
};
enum ReturnCodes { kOK = 0, kFail = -1 };

View file

@ -140,7 +140,10 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config,
10, // Report once every 10 s.
tick_timer_.get()),
no_time_stretching_(config.for_test_no_time_stretching),
enable_rtx_handling_(config.enable_rtx_handling) {
enable_rtx_handling_(config.enable_rtx_handling),
output_delay_chain_(
rtc::CheckedDivExact(config.extra_output_delay_ms, 10)),
output_delay_chain_ms_(config.extra_output_delay_ms) {
RTC_LOG(LS_INFO) << "NetEq config: " << config.ToString();
int fs = config.sample_rate_hz;
if (fs != 8000 && fs != 16000 && fs != 32000 && fs != 48000) {
@ -255,6 +258,25 @@ int NetEqImpl::GetAudio(AudioFrame* audio_frame,
last_output_sample_rate_hz_ == 32000 ||
last_output_sample_rate_hz_ == 48000)
<< "Unexpected sample rate " << last_output_sample_rate_hz_;
if (!output_delay_chain_.empty()) {
if (output_delay_chain_empty_) {
for (auto& f : output_delay_chain_) {
f.CopyFrom(*audio_frame);
}
output_delay_chain_empty_ = false;
delayed_last_output_sample_rate_hz_ = last_output_sample_rate_hz_;
} else {
RTC_DCHECK_GE(output_delay_chain_ix_, 0);
RTC_DCHECK_LT(output_delay_chain_ix_, output_delay_chain_.size());
swap(output_delay_chain_[output_delay_chain_ix_], *audio_frame);
*muted = audio_frame->muted();
output_delay_chain_ix_ =
(output_delay_chain_ix_ + 1) % output_delay_chain_.size();
delayed_last_output_sample_rate_hz_ = audio_frame->sample_rate_hz();
}
}
return kOK;
}
@ -297,7 +319,8 @@ bool NetEqImpl::SetMinimumDelay(int delay_ms) {
rtc::CritScope lock(&crit_sect_);
if (delay_ms >= 0 && delay_ms <= 10000) {
assert(controller_.get());
return controller_->SetMinimumDelay(delay_ms);
return controller_->SetMinimumDelay(
std::max(delay_ms - output_delay_chain_ms_, 0));
}
return false;
}
@ -306,7 +329,8 @@ bool NetEqImpl::SetMaximumDelay(int delay_ms) {
rtc::CritScope lock(&crit_sect_);
if (delay_ms >= 0 && delay_ms <= 10000) {
assert(controller_.get());
return controller_->SetMaximumDelay(delay_ms);
return controller_->SetMaximumDelay(
std::max(delay_ms - output_delay_chain_ms_, 0));
}
return false;
}
@ -327,7 +351,7 @@ int NetEqImpl::GetBaseMinimumDelayMs() const {
int NetEqImpl::TargetDelayMs() const {
rtc::CritScope lock(&crit_sect_);
RTC_DCHECK(controller_.get());
return controller_->TargetLevelMs();
return controller_->TargetLevelMs() + output_delay_chain_ms_;
}
int NetEqImpl::FilteredCurrentDelayMs() const {
@ -337,7 +361,8 @@ int NetEqImpl::FilteredCurrentDelayMs() const {
const int delay_samples =
controller_->GetFilteredBufferLevel() + sync_buffer_->FutureLength();
// The division below will truncate. The return value is in ms.
return delay_samples / rtc::CheckedDivExact(fs_hz_, 1000);
return delay_samples / rtc::CheckedDivExact(fs_hz_, 1000) +
output_delay_chain_ms_;
}
int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) {
@ -351,6 +376,13 @@ int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) {
stats->jitter_peaks_found = controller_->PeakFound();
stats_->GetNetworkStatistics(fs_hz_, total_samples_in_buffers,
decoder_frame_length_, stats);
// Compensate for output delay chain.
stats->current_buffer_size_ms += output_delay_chain_ms_;
stats->preferred_buffer_size_ms += output_delay_chain_ms_;
stats->mean_waiting_time_ms += output_delay_chain_ms_;
stats->median_waiting_time_ms += output_delay_chain_ms_;
stats->min_waiting_time_ms += output_delay_chain_ms_;
stats->max_waiting_time_ms += output_delay_chain_ms_;
return 0;
}
@ -394,12 +426,19 @@ absl::optional<uint32_t> NetEqImpl::GetPlayoutTimestamp() const {
// which is indicated by returning an empty value.
return absl::nullopt;
}
return timestamp_scaler_->ToExternal(playout_timestamp_);
size_t sum_samples_in_output_delay_chain = 0;
for (const auto& audio_frame : output_delay_chain_) {
sum_samples_in_output_delay_chain += audio_frame.samples_per_channel();
}
return timestamp_scaler_->ToExternal(
playout_timestamp_ -
static_cast<uint32_t>(sum_samples_in_output_delay_chain));
}
int NetEqImpl::last_output_sample_rate_hz() const {
rtc::CritScope lock(&crit_sect_);
return last_output_sample_rate_hz_;
return delayed_last_output_sample_rate_hz_.value_or(
last_output_sample_rate_hz_);
}
absl::optional<NetEq::DecoderFormat> NetEqImpl::GetDecoderFormat(
@ -1988,8 +2027,9 @@ int NetEqImpl::ExtractPackets(size_t required_samples,
extracted_samples = packet->timestamp - first_timestamp + packet_duration;
RTC_DCHECK(controller_);
stats_->JitterBufferDelay(packet_duration, waiting_time_ms,
controller_->TargetLevelMs());
stats_->JitterBufferDelay(
packet_duration, waiting_time_ms + output_delay_chain_ms_,
controller_->TargetLevelMs() + output_delay_chain_ms_);
packet_list->push_back(std::move(*packet)); // Store packet in list.
packet = absl::nullopt; // Ensure it's never used after the move.

View file

@ -402,6 +402,22 @@ class NetEqImpl : public webrtc::NetEq {
bool no_time_stretching_ RTC_GUARDED_BY(crit_sect_); // Only used for test.
rtc::BufferT<int16_t> concealment_audio_ RTC_GUARDED_BY(crit_sect_);
const bool enable_rtx_handling_ RTC_GUARDED_BY(crit_sect_);
// Data members used for adding extra delay to the output of NetEq.
// Vector of AudioFrames which contains the delayed audio. Accessed as a
// circular buffer.
std::vector<AudioFrame> output_delay_chain_ RTC_GUARDED_BY(crit_sect_);
// Index into output_delay_chain_.
size_t output_delay_chain_ix_ RTC_GUARDED_BY(crit_sect_) = 0;
// The delay in ms (which is 10 times the number of elements in
// output_delay_chain_).
const int output_delay_chain_ms_ RTC_GUARDED_BY(crit_sect_);
// Did output_delay_chain_ get populated yet?
bool output_delay_chain_empty_ RTC_GUARDED_BY(crit_sect_) = true;
// Contains the sample rate of the AudioFrame last emitted from the delay
// chain. If the extra output delay chain is not used, or if no audio has been
// emitted yet, the variable is empty.
absl::optional<int> delayed_last_output_sample_rate_hz_
RTC_GUARDED_BY(crit_sect_);
private:
RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);

View file

@ -1102,5 +1102,156 @@ TEST(NetEqNoTimeStretchingMode, RunTest) {
EXPECT_EQ(0, stats.preemptive_rate);
}
namespace {
// Helper classes and data types and functions for NetEqOutputDelayTest.
class VectorAudioSink : public AudioSink {
public:
// Does not take ownership of the vector.
VectorAudioSink(std::vector<int16_t>* output_vector) : v_(output_vector) {}
virtual ~VectorAudioSink() = default;
bool WriteArray(const int16_t* audio, size_t num_samples) override {
v_->reserve(v_->size() + num_samples);
for (size_t i = 0; i < num_samples; ++i) {
v_->push_back(audio[i]);
}
return true;
}
private:
std::vector<int16_t>* const v_;
};
struct TestResult {
NetEqLifetimeStatistics lifetime_stats;
NetEqNetworkStatistics network_stats;
absl::optional<uint32_t> playout_timestamp;
int target_delay_ms;
int filtered_current_delay_ms;
int sample_rate_hz;
};
// This class is used as callback object to NetEqTest to collect some stats
// at the end of the simulation.
class SimEndStatsCollector : public NetEqSimulationEndedCallback {
public:
SimEndStatsCollector(TestResult& result) : result_(result) {}
void SimulationEnded(int64_t /*simulation_time_ms*/, NetEq* neteq) override {
result_.playout_timestamp = neteq->GetPlayoutTimestamp();
result_.target_delay_ms = neteq->TargetDelayMs();
result_.filtered_current_delay_ms = neteq->FilteredCurrentDelayMs();
result_.sample_rate_hz = neteq->last_output_sample_rate_hz();
}
private:
TestResult& result_;
};
TestResult DelayLineNetEqTest(int delay_ms,
std::vector<int16_t>* output_vector) {
NetEq::Config config;
config.for_test_no_time_stretching = true;
config.extra_output_delay_ms = delay_ms;
auto codecs = NetEqTest::StandardDecoderMap();
NetEqPacketSourceInput::RtpHeaderExtensionMap rtp_ext_map = {
{1, kRtpExtensionAudioLevel},
{3, kRtpExtensionAbsoluteSendTime},
{5, kRtpExtensionTransportSequenceNumber},
{7, kRtpExtensionVideoContentType},
{8, kRtpExtensionVideoTiming}};
std::unique_ptr<NetEqInput> input = std::make_unique<NetEqRtpDumpInput>(
webrtc::test::ResourcePath("audio_coding/neteq_universal_new", "rtp"),
rtp_ext_map, absl::nullopt /*No SSRC filter*/);
std::unique_ptr<TimeLimitedNetEqInput> input_time_limit(
new TimeLimitedNetEqInput(std::move(input), 10000));
std::unique_ptr<AudioSink> output =
std::make_unique<VectorAudioSink>(output_vector);
TestResult result;
SimEndStatsCollector stats_collector(result);
NetEqTest::Callbacks callbacks;
callbacks.simulation_ended_callback = &stats_collector;
NetEqTest test(config, CreateBuiltinAudioDecoderFactory(), codecs,
/*text_log=*/nullptr, /*neteq_factory=*/nullptr,
/*input=*/std::move(input_time_limit), std::move(output),
callbacks);
test.Run();
result.lifetime_stats = test.LifetimeStats();
result.network_stats = test.SimulationStats();
return result;
}
} // namespace
// Tests the extra output delay functionality of NetEq.
TEST(NetEqOutputDelayTest, RunTest) {
std::vector<int16_t> output;
const auto result_no_delay = DelayLineNetEqTest(0, &output);
std::vector<int16_t> output_delayed;
constexpr int kDelayMs = 100;
const auto result_delay = DelayLineNetEqTest(kDelayMs, &output_delayed);
// Verify that the loss concealment remains unchanged. The point of the delay
// is to not affect the jitter buffering behavior.
// First verify that there are concealments in the test.
EXPECT_GT(result_no_delay.lifetime_stats.concealed_samples, 0u);
// And that not all of the output is concealment.
EXPECT_GT(result_no_delay.lifetime_stats.total_samples_received,
result_no_delay.lifetime_stats.concealed_samples);
// Now verify that they remain unchanged by the delay.
EXPECT_EQ(result_no_delay.lifetime_stats.concealed_samples,
result_delay.lifetime_stats.concealed_samples);
// Accelerate and pre-emptive expand should also be unchanged.
EXPECT_EQ(result_no_delay.lifetime_stats.inserted_samples_for_deceleration,
result_delay.lifetime_stats.inserted_samples_for_deceleration);
EXPECT_EQ(result_no_delay.lifetime_stats.removed_samples_for_acceleration,
result_delay.lifetime_stats.removed_samples_for_acceleration);
// Verify that delay stats are increased with the delay chain.
EXPECT_EQ(
result_no_delay.lifetime_stats.jitter_buffer_delay_ms +
kDelayMs * result_no_delay.lifetime_stats.jitter_buffer_emitted_count,
result_delay.lifetime_stats.jitter_buffer_delay_ms);
EXPECT_EQ(
result_no_delay.lifetime_stats.jitter_buffer_target_delay_ms +
kDelayMs * result_no_delay.lifetime_stats.jitter_buffer_emitted_count,
result_delay.lifetime_stats.jitter_buffer_target_delay_ms);
EXPECT_EQ(result_no_delay.network_stats.current_buffer_size_ms + kDelayMs,
result_delay.network_stats.current_buffer_size_ms);
EXPECT_EQ(result_no_delay.network_stats.preferred_buffer_size_ms + kDelayMs,
result_delay.network_stats.preferred_buffer_size_ms);
EXPECT_EQ(result_no_delay.network_stats.mean_waiting_time_ms + kDelayMs,
result_delay.network_stats.mean_waiting_time_ms);
EXPECT_EQ(result_no_delay.network_stats.median_waiting_time_ms + kDelayMs,
result_delay.network_stats.median_waiting_time_ms);
EXPECT_EQ(result_no_delay.network_stats.min_waiting_time_ms + kDelayMs,
result_delay.network_stats.min_waiting_time_ms);
EXPECT_EQ(result_no_delay.network_stats.max_waiting_time_ms + kDelayMs,
result_delay.network_stats.max_waiting_time_ms);
ASSERT_TRUE(result_no_delay.playout_timestamp);
ASSERT_TRUE(result_delay.playout_timestamp);
EXPECT_EQ(*result_no_delay.playout_timestamp -
static_cast<uint32_t>(
kDelayMs *
rtc::CheckedDivExact(result_no_delay.sample_rate_hz, 1000)),
*result_delay.playout_timestamp);
EXPECT_EQ(result_no_delay.target_delay_ms + kDelayMs,
result_delay.target_delay_ms);
EXPECT_EQ(result_no_delay.filtered_current_delay_ms + kDelayMs,
result_delay.filtered_current_delay_ms);
// Verify expected delay in decoded signal. The test vector uses 8 kHz sample
// rate, so the delay will be 8 times the delay in ms.
constexpr size_t kExpectedDelaySamples = kDelayMs * 8;
for (size_t i = 0;
i < output.size() && i + kExpectedDelaySamples < output_delayed.size();
++i) {
EXPECT_EQ(output[i], output_delayed[i + kExpectedDelaySamples]);
}
}
} // namespace test
} // namespace webrtc

View file

@ -33,7 +33,8 @@ NetEqStatsPlotter::NetEqStatsPlotter(bool make_matlab_plot,
stats_getter_.reset(new NetEqStatsGetter(std::move(delay_analyzer)));
}
void NetEqStatsPlotter::SimulationEnded(int64_t simulation_time_ms) {
void NetEqStatsPlotter::SimulationEnded(int64_t simulation_time_ms,
NetEq* /*neteq*/) {
if (make_matlab_plot_) {
auto matlab_script_name = base_file_name_;
std::replace(matlab_script_name.begin(), matlab_script_name.end(), '.',

View file

@ -28,7 +28,7 @@ class NetEqStatsPlotter : public NetEqSimulationEndedCallback {
bool show_concealment_events,
std::string base_file_name);
void SimulationEnded(int64_t simulation_time_ms) override;
void SimulationEnded(int64_t simulation_time_ms, NetEq* neteq) override;
NetEqStatsGetter* stats_getter() { return stats_getter_.get(); }

View file

@ -91,7 +91,8 @@ int64_t NetEqTest::Run() {
simulation_time += step_result.simulation_step_ms;
} while (!step_result.is_simulation_finished);
if (callbacks_.simulation_ended_callback) {
callbacks_.simulation_ended_callback->SimulationEnded(simulation_time);
callbacks_.simulation_ended_callback->SimulationEnded(simulation_time,
neteq_.get());
}
return simulation_time;
}

View file

@ -61,7 +61,7 @@ class NetEqGetAudioCallback {
class NetEqSimulationEndedCallback {
public:
virtual ~NetEqSimulationEndedCallback() = default;
virtual void SimulationEnded(int64_t simulation_time_ms) = 0;
virtual void SimulationEnded(int64_t simulation_time_ms, NetEq* neteq) = 0;
};
// Class that provides an input--output test for NetEq. The input (both packets