mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-19 08:37:54 +01:00

This is a slight change in behavior that fixes a bug where all expansions are not counted due to more than 10ms available in the sync buffer, which can happen after repeated expansions. The counter should also be updated when in muted mode. Bug: webrtc:13322 Change-Id: I067689ee251d3d1ae990a27cdd271f718b0d6f2f Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/257360 Reviewed-by: Ivo Creusen <ivoc@webrtc.org> Commit-Queue: Jakob Ivarsson <jakobi@webrtc.org> Cr-Commit-Position: refs/heads/main@{#36483}
420 lines
16 KiB
C++
420 lines
16 KiB
C++
/*
|
|
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "modules/audio_coding/neteq/decision_logic.h"
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string>
|
|
|
|
#include "absl/types/optional.h"
|
|
#include "modules/audio_coding/neteq/packet_buffer.h"
|
|
#include "rtc_base/checks.h"
|
|
#include "rtc_base/experiments/field_trial_parser.h"
|
|
#include "rtc_base/logging.h"
|
|
#include "rtc_base/numerics/safe_conversions.h"
|
|
#include "system_wrappers/include/field_trial.h"
|
|
|
|
namespace webrtc {
|
|
|
|
namespace {
|
|
|
|
constexpr int kPostponeDecodingLevel = 50;
|
|
constexpr int kDefaultTargetLevelWindowMs = 100;
|
|
constexpr int kDecelerationTargetLevelOffsetMs = 85;
|
|
|
|
std::unique_ptr<DelayManager> CreateDelayManager(
|
|
const NetEqController::Config& neteq_config) {
|
|
DelayManager::Config config;
|
|
config.max_packets_in_buffer = neteq_config.max_packets_in_buffer;
|
|
config.base_minimum_delay_ms = neteq_config.base_min_delay_ms;
|
|
config.Log();
|
|
return std::make_unique<DelayManager>(config, neteq_config.tick_timer);
|
|
}
|
|
|
|
bool IsExpand(NetEq::Mode mode) {
|
|
return mode == NetEq::Mode::kExpand || mode == NetEq::Mode::kCodecPlc;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
DecisionLogic::DecisionLogic(NetEqController::Config config)
|
|
: DecisionLogic(config,
|
|
CreateDelayManager(config),
|
|
std::make_unique<BufferLevelFilter>()) {}
|
|
|
|
DecisionLogic::DecisionLogic(
|
|
NetEqController::Config config,
|
|
std::unique_ptr<DelayManager> delay_manager,
|
|
std::unique_ptr<BufferLevelFilter> buffer_level_filter)
|
|
: delay_manager_(std::move(delay_manager)),
|
|
buffer_level_filter_(std::move(buffer_level_filter)),
|
|
tick_timer_(config.tick_timer),
|
|
disallow_time_stretching_(!config.allow_time_stretching),
|
|
timescale_countdown_(
|
|
tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)),
|
|
target_level_window_ms_("target_level_window",
|
|
kDefaultTargetLevelWindowMs,
|
|
0,
|
|
absl::nullopt) {
|
|
const std::string field_trial_name =
|
|
field_trial::FindFullName("WebRTC-Audio-NetEqDecisionLogicSettings");
|
|
ParseFieldTrial({&target_level_window_ms_}, field_trial_name);
|
|
RTC_LOG(LS_INFO) << "NetEq decision logic settings:"
|
|
<< " target_level_window_ms=" << target_level_window_ms_;
|
|
}
|
|
|
|
DecisionLogic::~DecisionLogic() = default;
|
|
|
|
void DecisionLogic::Reset() {
|
|
cng_state_ = kCngOff;
|
|
noise_fast_forward_ = 0;
|
|
packet_length_samples_ = 0;
|
|
sample_memory_ = 0;
|
|
prev_time_scale_ = false;
|
|
last_pack_cng_or_dtmf_ = true;
|
|
timescale_countdown_.reset();
|
|
num_consecutive_expands_ = 0;
|
|
time_stretched_cn_samples_ = 0;
|
|
}
|
|
|
|
void DecisionLogic::SoftReset() {
|
|
packet_length_samples_ = 0;
|
|
sample_memory_ = 0;
|
|
prev_time_scale_ = false;
|
|
last_pack_cng_or_dtmf_ = true;
|
|
timescale_countdown_ =
|
|
tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1);
|
|
time_stretched_cn_samples_ = 0;
|
|
delay_manager_->Reset();
|
|
buffer_level_filter_->Reset();
|
|
}
|
|
|
|
void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) {
|
|
// TODO(hlundin): Change to an enumerator and skip assert.
|
|
RTC_DCHECK(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 ||
|
|
fs_hz == 48000);
|
|
sample_rate_ = fs_hz;
|
|
output_size_samples_ = output_size_samples;
|
|
}
|
|
|
|
NetEq::Operation DecisionLogic::GetDecision(const NetEqStatus& status,
|
|
bool* reset_decoder) {
|
|
// If last mode was CNG (or Expand, since this could be covering up for
|
|
// a lost CNG packet), remember that CNG is on. This is needed if comfort
|
|
// noise is interrupted by DTMF.
|
|
if (status.last_mode == NetEq::Mode::kRfc3389Cng) {
|
|
cng_state_ = kCngRfc3389On;
|
|
} else if (status.last_mode == NetEq::Mode::kCodecInternalCng) {
|
|
cng_state_ = kCngInternalOn;
|
|
}
|
|
|
|
if (IsExpand(status.last_mode)) {
|
|
++num_consecutive_expands_;
|
|
} else {
|
|
num_consecutive_expands_ = 0;
|
|
}
|
|
|
|
prev_time_scale_ =
|
|
prev_time_scale_ &&
|
|
(status.last_mode == NetEq::Mode::kAccelerateSuccess ||
|
|
status.last_mode == NetEq::Mode::kAccelerateLowEnergy ||
|
|
status.last_mode == NetEq::Mode::kPreemptiveExpandSuccess ||
|
|
status.last_mode == NetEq::Mode::kPreemptiveExpandLowEnergy);
|
|
|
|
// Do not update buffer history if currently playing CNG since it will bias
|
|
// the filtered buffer level.
|
|
if (status.last_mode != NetEq::Mode::kRfc3389Cng &&
|
|
status.last_mode != NetEq::Mode::kCodecInternalCng) {
|
|
FilterBufferLevel(status.packet_buffer_info.span_samples);
|
|
}
|
|
|
|
// Guard for errors, to avoid getting stuck in error mode.
|
|
if (status.last_mode == NetEq::Mode::kError) {
|
|
if (!status.next_packet) {
|
|
return NetEq::Operation::kExpand;
|
|
} else {
|
|
// Use kUndefined to flag for a reset.
|
|
return NetEq::Operation::kUndefined;
|
|
}
|
|
}
|
|
|
|
if (status.next_packet && status.next_packet->is_cng) {
|
|
return CngOperation(status.last_mode, status.target_timestamp,
|
|
status.next_packet->timestamp,
|
|
status.generated_noise_samples);
|
|
}
|
|
|
|
// Handle the case with no packet at all available (except maybe DTMF).
|
|
if (!status.next_packet) {
|
|
return NoPacket(status.play_dtmf);
|
|
}
|
|
|
|
// If the expand period was very long, reset NetEQ since it is likely that the
|
|
// sender was restarted.
|
|
if (num_consecutive_expands_ > kReinitAfterExpands) {
|
|
*reset_decoder = true;
|
|
return NetEq::Operation::kNormal;
|
|
}
|
|
|
|
// Make sure we don't restart audio too soon after an expansion to avoid
|
|
// running out of data right away again. We should only wait if there are no
|
|
// DTX or CNG packets in the buffer (otherwise we should just play out what we
|
|
// have, since we cannot know the exact duration of DTX or CNG packets), and
|
|
// if the mute factor is low enough (otherwise the expansion was short enough
|
|
// to not be noticable).
|
|
// Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
|
|
const int target_level_samples =
|
|
delay_manager_->TargetDelayMs() * sample_rate_ / 1000;
|
|
if (IsExpand(status.last_mode) && status.expand_mutefactor < 16384 / 2 &&
|
|
status.packet_buffer_info.span_samples <
|
|
static_cast<size_t>(target_level_samples * kPostponeDecodingLevel /
|
|
100) &&
|
|
!status.packet_buffer_info.dtx_or_cng) {
|
|
return NetEq::Operation::kExpand;
|
|
}
|
|
|
|
const uint32_t five_seconds_samples = static_cast<uint32_t>(5 * sample_rate_);
|
|
// Check if the required packet is available.
|
|
if (status.target_timestamp == status.next_packet->timestamp) {
|
|
return ExpectedPacketAvailable(status.last_mode, status.play_dtmf);
|
|
} else if (!PacketBuffer::IsObsoleteTimestamp(status.next_packet->timestamp,
|
|
status.target_timestamp,
|
|
five_seconds_samples)) {
|
|
return FuturePacketAvailable(
|
|
status.last_packet_samples, status.last_mode, status.target_timestamp,
|
|
status.next_packet->timestamp, status.play_dtmf,
|
|
status.generated_noise_samples, status.packet_buffer_info.span_samples,
|
|
status.packet_buffer_info.num_packets);
|
|
} else {
|
|
// This implies that available_timestamp < target_timestamp, which can
|
|
// happen when a new stream or codec is received. Signal for a reset.
|
|
return NetEq::Operation::kUndefined;
|
|
}
|
|
}
|
|
|
|
void DecisionLogic::NotifyMutedState() {
|
|
++num_consecutive_expands_;
|
|
}
|
|
|
|
absl::optional<int> DecisionLogic::PacketArrived(
|
|
int fs_hz,
|
|
bool should_update_stats,
|
|
const PacketArrivedInfo& info) {
|
|
buffer_flush_ = buffer_flush_ || info.buffer_flush;
|
|
if (info.is_cng_or_dtmf) {
|
|
last_pack_cng_or_dtmf_ = true;
|
|
return absl::nullopt;
|
|
}
|
|
if (!should_update_stats) {
|
|
return absl::nullopt;
|
|
}
|
|
if (info.packet_length_samples > 0 && fs_hz > 0 &&
|
|
info.packet_length_samples != packet_length_samples_) {
|
|
packet_length_samples_ = info.packet_length_samples;
|
|
delay_manager_->SetPacketAudioLength(packet_length_samples_ * 1000 / fs_hz);
|
|
}
|
|
auto relative_delay = delay_manager_->Update(
|
|
info.main_timestamp, fs_hz, /*reset=*/last_pack_cng_or_dtmf_);
|
|
last_pack_cng_or_dtmf_ = false;
|
|
return relative_delay;
|
|
}
|
|
|
|
void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples) {
|
|
buffer_level_filter_->SetTargetBufferLevel(delay_manager_->TargetDelayMs());
|
|
|
|
int time_stretched_samples = time_stretched_cn_samples_;
|
|
if (prev_time_scale_) {
|
|
time_stretched_samples += sample_memory_;
|
|
timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval);
|
|
}
|
|
|
|
if (buffer_flush_) {
|
|
buffer_level_filter_->SetFilteredBufferLevel(buffer_size_samples);
|
|
buffer_flush_ = false;
|
|
} else {
|
|
buffer_level_filter_->Update(buffer_size_samples, time_stretched_samples);
|
|
}
|
|
prev_time_scale_ = false;
|
|
time_stretched_cn_samples_ = 0;
|
|
}
|
|
|
|
NetEq::Operation DecisionLogic::CngOperation(NetEq::Mode prev_mode,
|
|
uint32_t target_timestamp,
|
|
uint32_t available_timestamp,
|
|
size_t generated_noise_samples) {
|
|
// Signed difference between target and available timestamp.
|
|
int32_t timestamp_diff = static_cast<int32_t>(
|
|
static_cast<uint32_t>(generated_noise_samples + target_timestamp) -
|
|
available_timestamp);
|
|
int optimal_level_samp =
|
|
delay_manager_->TargetDelayMs() * sample_rate_ / 1000;
|
|
const int64_t excess_waiting_time_samp =
|
|
-static_cast<int64_t>(timestamp_diff) - optimal_level_samp;
|
|
|
|
if (excess_waiting_time_samp > optimal_level_samp / 2) {
|
|
// The waiting time for this packet will be longer than 1.5
|
|
// times the wanted buffer delay. Apply fast-forward to cut the
|
|
// waiting time down to the optimal.
|
|
noise_fast_forward_ = rtc::saturated_cast<size_t>(noise_fast_forward_ +
|
|
excess_waiting_time_samp);
|
|
timestamp_diff =
|
|
rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp);
|
|
}
|
|
|
|
if (timestamp_diff < 0 && prev_mode == NetEq::Mode::kRfc3389Cng) {
|
|
// Not time to play this packet yet. Wait another round before using this
|
|
// packet. Keep on playing CNG from previous CNG parameters.
|
|
return NetEq::Operation::kRfc3389CngNoPacket;
|
|
} else {
|
|
// Otherwise, go for the CNG packet now.
|
|
noise_fast_forward_ = 0;
|
|
return NetEq::Operation::kRfc3389Cng;
|
|
}
|
|
}
|
|
|
|
NetEq::Operation DecisionLogic::NoPacket(bool play_dtmf) {
|
|
if (cng_state_ == kCngRfc3389On) {
|
|
// Keep on playing comfort noise.
|
|
return NetEq::Operation::kRfc3389CngNoPacket;
|
|
} else if (cng_state_ == kCngInternalOn) {
|
|
// Keep on playing codec internal comfort noise.
|
|
return NetEq::Operation::kCodecInternalCng;
|
|
} else if (play_dtmf) {
|
|
return NetEq::Operation::kDtmf;
|
|
} else {
|
|
// Nothing to play, do expand.
|
|
return NetEq::Operation::kExpand;
|
|
}
|
|
}
|
|
|
|
NetEq::Operation DecisionLogic::ExpectedPacketAvailable(NetEq::Mode prev_mode,
|
|
bool play_dtmf) {
|
|
if (!disallow_time_stretching_ && prev_mode != NetEq::Mode::kExpand &&
|
|
!play_dtmf) {
|
|
const int samples_per_ms = sample_rate_ / 1000;
|
|
const int target_level_samples =
|
|
delay_manager_->TargetDelayMs() * samples_per_ms;
|
|
const int low_limit =
|
|
std::max(target_level_samples * 3 / 4,
|
|
target_level_samples -
|
|
kDecelerationTargetLevelOffsetMs * samples_per_ms);
|
|
// `higher_limit` is equal to `target_level`, but should at
|
|
// least be 20 ms higher than `lower_limit`.
|
|
const int high_limit =
|
|
std::max(target_level_samples, low_limit + 20 * samples_per_ms);
|
|
|
|
const int buffer_level_samples =
|
|
buffer_level_filter_->filtered_current_level();
|
|
if (buffer_level_samples >= high_limit << 2)
|
|
return NetEq::Operation::kFastAccelerate;
|
|
if (TimescaleAllowed()) {
|
|
if (buffer_level_samples >= high_limit)
|
|
return NetEq::Operation::kAccelerate;
|
|
if (buffer_level_samples < low_limit)
|
|
return NetEq::Operation::kPreemptiveExpand;
|
|
}
|
|
}
|
|
return NetEq::Operation::kNormal;
|
|
}
|
|
|
|
NetEq::Operation DecisionLogic::FuturePacketAvailable(
|
|
size_t decoder_frame_length,
|
|
NetEq::Mode prev_mode,
|
|
uint32_t target_timestamp,
|
|
uint32_t available_timestamp,
|
|
bool play_dtmf,
|
|
size_t generated_noise_samples,
|
|
size_t span_samples_in_packet_buffer,
|
|
size_t num_packets_in_packet_buffer) {
|
|
// Required packet is not available, but a future packet is.
|
|
// Check if we should continue with an ongoing expand because the new packet
|
|
// is too far into the future.
|
|
uint32_t timestamp_leap = available_timestamp - target_timestamp;
|
|
if (IsExpand(prev_mode) && !ReinitAfterExpands(timestamp_leap) &&
|
|
!MaxWaitForPacket() && PacketTooEarly(timestamp_leap) &&
|
|
UnderTargetLevel()) {
|
|
if (play_dtmf) {
|
|
// Still have DTMF to play, so do not do expand.
|
|
return NetEq::Operation::kDtmf;
|
|
} else {
|
|
// Nothing to play.
|
|
return NetEq::Operation::kExpand;
|
|
}
|
|
}
|
|
|
|
if (prev_mode == NetEq::Mode::kCodecPlc) {
|
|
return NetEq::Operation::kNormal;
|
|
}
|
|
|
|
// If previous was comfort noise, then no merge is needed.
|
|
if (prev_mode == NetEq::Mode::kRfc3389Cng ||
|
|
prev_mode == NetEq::Mode::kCodecInternalCng) {
|
|
const size_t target_level_samples =
|
|
delay_manager_->TargetDelayMs() * sample_rate_ / 1000;
|
|
const bool generated_enough_noise =
|
|
static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
|
|
available_timestamp;
|
|
const size_t target_threshold_samples =
|
|
target_level_window_ms_ / 2 * (sample_rate_ / 1000);
|
|
const bool above_target_window =
|
|
span_samples_in_packet_buffer >
|
|
target_level_samples + target_threshold_samples;
|
|
const bool below_target_window =
|
|
target_level_samples > target_threshold_samples &&
|
|
span_samples_in_packet_buffer <
|
|
target_level_samples - target_threshold_samples;
|
|
// Keep the delay same as before CNG, but make sure that it is within the
|
|
// target window.
|
|
if ((generated_enough_noise && !below_target_window) ||
|
|
above_target_window) {
|
|
time_stretched_cn_samples_ = timestamp_leap - generated_noise_samples;
|
|
return NetEq::Operation::kNormal;
|
|
}
|
|
|
|
// Too early to play this new packet; keep on playing comfort noise.
|
|
if (prev_mode == NetEq::Mode::kRfc3389Cng) {
|
|
return NetEq::Operation::kRfc3389CngNoPacket;
|
|
}
|
|
// prevPlayMode == kModeCodecInternalCng.
|
|
return NetEq::Operation::kCodecInternalCng;
|
|
}
|
|
|
|
// Do not merge unless we have done an expand before.
|
|
if (prev_mode == NetEq::Mode::kExpand) {
|
|
return NetEq::Operation::kMerge;
|
|
} else if (play_dtmf) {
|
|
// Play DTMF instead of expand.
|
|
return NetEq::Operation::kDtmf;
|
|
} else {
|
|
return NetEq::Operation::kExpand;
|
|
}
|
|
}
|
|
|
|
bool DecisionLogic::UnderTargetLevel() const {
|
|
return buffer_level_filter_->filtered_current_level() <
|
|
delay_manager_->TargetDelayMs() * sample_rate_ / 1000;
|
|
}
|
|
|
|
bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const {
|
|
return timestamp_leap >=
|
|
static_cast<uint32_t>(output_size_samples_ * kReinitAfterExpands);
|
|
}
|
|
|
|
bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const {
|
|
return timestamp_leap >
|
|
static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_);
|
|
}
|
|
|
|
bool DecisionLogic::MaxWaitForPacket() const {
|
|
return num_consecutive_expands_ >= kMaxWaitForPacket;
|
|
}
|
|
|
|
} // namespace webrtc
|