webrtc/modules/audio_coding/neteq/decision_logic.cc

/*
 *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "modules/audio_coding/neteq/decision_logic.h"

#include <stdio.h>

#include <string>

#include "absl/types/optional.h"
#include "modules/audio_coding/neteq/packet_buffer.h"
#include "rtc_base/checks.h"
#include "rtc_base/experiments/field_trial_parser.h"
#include "rtc_base/logging.h"
#include "rtc_base/numerics/safe_conversions.h"
#include "system_wrappers/include/field_trial.h"

namespace webrtc {

namespace {

constexpr int kPostponeDecodingLevel = 50;
constexpr int kDefaultTargetLevelWindowMs = 100;
constexpr int kDecelerationTargetLevelOffsetMs = 85;

std::unique_ptr<DelayManager> CreateDelayManager(
    const NetEqController::Config& neteq_config) {
  DelayManager::Config config;
  config.max_packets_in_buffer = neteq_config.max_packets_in_buffer;
  config.base_minimum_delay_ms = neteq_config.base_min_delay_ms;
  config.Log();
  return std::make_unique<DelayManager>(config, neteq_config.tick_timer);
}

bool IsExpand(NetEq::Mode mode) {
  return mode == NetEq::Mode::kExpand || mode == NetEq::Mode::kCodecPlc;
}

}  // namespace

DecisionLogic::DecisionLogic(NetEqController::Config config)
    : DecisionLogic(config,
                    CreateDelayManager(config),
                    std::make_unique<BufferLevelFilter>()) {}

DecisionLogic::DecisionLogic(
    NetEqController::Config config,
    std::unique_ptr<DelayManager> delay_manager,
    std::unique_ptr<BufferLevelFilter> buffer_level_filter)
    : delay_manager_(std::move(delay_manager)),
      buffer_level_filter_(std::move(buffer_level_filter)),
      tick_timer_(config.tick_timer),
      disallow_time_stretching_(!config.allow_time_stretching),
      timescale_countdown_(
          tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)),
      target_level_window_ms_("target_level_window",
                              kDefaultTargetLevelWindowMs,
                              0,
                              absl::nullopt) {
  const std::string field_trial_name =
      field_trial::FindFullName("WebRTC-Audio-NetEqDecisionLogicSettings");
  ParseFieldTrial({&target_level_window_ms_}, field_trial_name);
  RTC_LOG(LS_INFO) << "NetEq decision logic settings:"
                   << " target_level_window_ms=" << target_level_window_ms_;
}

DecisionLogic::~DecisionLogic() = default;

void DecisionLogic::Reset() {
  cng_state_ = kCngOff;
  noise_fast_forward_ = 0;
  packet_length_samples_ = 0;
  sample_memory_ = 0;
  prev_time_scale_ = false;
  last_pack_cng_or_dtmf_ = true;
  timescale_countdown_.reset();
  num_consecutive_expands_ = 0;
  time_stretched_cn_samples_ = 0;
}

void DecisionLogic::SoftReset() {
  packet_length_samples_ = 0;
  sample_memory_ = 0;
  prev_time_scale_ = false;
  last_pack_cng_or_dtmf_ = true;
  timescale_countdown_ =
      tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1);
  time_stretched_cn_samples_ = 0;
  delay_manager_->Reset();
  buffer_level_filter_->Reset();
}

void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) {
  // TODO(hlundin): Change to an enumerator and skip assert.
  RTC_DCHECK(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 ||
             fs_hz == 48000);
  sample_rate_ = fs_hz;
  output_size_samples_ = output_size_samples;
}

NetEq::Operation DecisionLogic::GetDecision(const NetEqStatus& status,
                                            bool* reset_decoder) {
  // If last mode was CNG (or Expand, since this could be covering up for
  // a lost CNG packet), remember that CNG is on. This is needed if comfort
  // noise is interrupted by DTMF.
  if (status.last_mode == NetEq::Mode::kRfc3389Cng) {
    cng_state_ = kCngRfc3389On;
  } else if (status.last_mode == NetEq::Mode::kCodecInternalCng) {
    cng_state_ = kCngInternalOn;
  }

  if (IsExpand(status.last_mode)) {
    ++num_consecutive_expands_;
  } else {
    num_consecutive_expands_ = 0;
  }

  prev_time_scale_ =
      prev_time_scale_ &&
      (status.last_mode == NetEq::Mode::kAccelerateSuccess ||
       status.last_mode == NetEq::Mode::kAccelerateLowEnergy ||
       status.last_mode == NetEq::Mode::kPreemptiveExpandSuccess ||
       status.last_mode == NetEq::Mode::kPreemptiveExpandLowEnergy);

  // Do not update buffer history if currently playing CNG since it will bias
  // the filtered buffer level.
  if (status.last_mode != NetEq::Mode::kRfc3389Cng &&
      status.last_mode != NetEq::Mode::kCodecInternalCng) {
    FilterBufferLevel(status.packet_buffer_info.span_samples);
  }

  // Guard for errors, to avoid getting stuck in error mode.
  if (status.last_mode == NetEq::Mode::kError) {
    if (!status.next_packet) {
      return NetEq::Operation::kExpand;
    } else {
      // Use kUndefined to flag for a reset.
      return NetEq::Operation::kUndefined;
    }
  }

  if (status.next_packet && status.next_packet->is_cng) {
    return CngOperation(status.last_mode, status.target_timestamp,
                        status.next_packet->timestamp,
                        status.generated_noise_samples);
  }

  // Handle the case with no packet at all available (except maybe DTMF).
  if (!status.next_packet) {
    return NoPacket(status.play_dtmf);
  }

  // If the expand period was very long, reset NetEQ since it is likely that the
  // sender was restarted.
  if (num_consecutive_expands_ > kReinitAfterExpands) {
    *reset_decoder = true;
    return NetEq::Operation::kNormal;
  }

  // Make sure we don't restart audio too soon after an expansion to avoid
  // running out of data right away again. We should only wait if there are no
  // DTX or CNG packets in the buffer (otherwise we should just play out what we
  // have, since we cannot know the exact duration of DTX or CNG packets), and
  // if the mute factor is low enough (otherwise the expansion was short enough
  // to not be noticable).
  // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
  const int target_level_samples =
      delay_manager_->TargetDelayMs() * sample_rate_ / 1000;
  if (IsExpand(status.last_mode) && status.expand_mutefactor < 16384 / 2 &&
      status.packet_buffer_info.span_samples <
          static_cast<size_t>(target_level_samples * kPostponeDecodingLevel /
                              100) &&
      !status.packet_buffer_info.dtx_or_cng) {
    return NetEq::Operation::kExpand;
  }

  const uint32_t five_seconds_samples = static_cast<uint32_t>(5 * sample_rate_);
  // Check if the required packet is available.
  if (status.target_timestamp == status.next_packet->timestamp) {
    return ExpectedPacketAvailable(status.last_mode, status.play_dtmf);
  } else if (!PacketBuffer::IsObsoleteTimestamp(status.next_packet->timestamp,
                                                status.target_timestamp,
                                                five_seconds_samples)) {
    return FuturePacketAvailable(
        status.last_packet_samples, status.last_mode, status.target_timestamp,
        status.next_packet->timestamp, status.play_dtmf,
        status.generated_noise_samples, status.packet_buffer_info.span_samples,
        status.packet_buffer_info.num_packets);
  } else {
    // This implies that available_timestamp < target_timestamp, which can
    // happen when a new stream or codec is received. Signal for a reset.
    return NetEq::Operation::kUndefined;
  }
}

void DecisionLogic::NotifyMutedState() {
  ++num_consecutive_expands_;
}

absl::optional<int> DecisionLogic::PacketArrived(
    int fs_hz,
    bool should_update_stats,
    const PacketArrivedInfo& info) {
  buffer_flush_ = buffer_flush_ || info.buffer_flush;
  if (info.is_cng_or_dtmf) {
    last_pack_cng_or_dtmf_ = true;
    return absl::nullopt;
  }
  if (!should_update_stats) {
    return absl::nullopt;
  }
  if (info.packet_length_samples > 0 && fs_hz > 0 &&
      info.packet_length_samples != packet_length_samples_) {
    packet_length_samples_ = info.packet_length_samples;
    delay_manager_->SetPacketAudioLength(packet_length_samples_ * 1000 / fs_hz);
  }
  auto relative_delay = delay_manager_->Update(
      info.main_timestamp, fs_hz, /*reset=*/last_pack_cng_or_dtmf_);
  last_pack_cng_or_dtmf_ = false;
  return relative_delay;
}

void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples) {
  buffer_level_filter_->SetTargetBufferLevel(delay_manager_->TargetDelayMs());

  int time_stretched_samples = time_stretched_cn_samples_;
  if (prev_time_scale_) {
    time_stretched_samples += sample_memory_;
    timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval);
  }

  if (buffer_flush_) {
    buffer_level_filter_->SetFilteredBufferLevel(buffer_size_samples);
    buffer_flush_ = false;
  } else {
    buffer_level_filter_->Update(buffer_size_samples, time_stretched_samples);
  }
  prev_time_scale_ = false;
  time_stretched_cn_samples_ = 0;
}

NetEq::Operation DecisionLogic::CngOperation(NetEq::Mode prev_mode,
                                             uint32_t target_timestamp,
                                             uint32_t available_timestamp,
                                             size_t generated_noise_samples) {
  // Signed difference between target and available timestamp.
  int32_t timestamp_diff = static_cast<int32_t>(
      static_cast<uint32_t>(generated_noise_samples + target_timestamp) -
      available_timestamp);
  int optimal_level_samp =
      delay_manager_->TargetDelayMs() * sample_rate_ / 1000;
  const int64_t excess_waiting_time_samp =
      -static_cast<int64_t>(timestamp_diff) - optimal_level_samp;

  if (excess_waiting_time_samp > optimal_level_samp / 2) {
    // The waiting time for this packet will be longer than 1.5
    // times the wanted buffer delay. Apply fast-forward to cut the
    // waiting time down to the optimal.
    noise_fast_forward_ = rtc::saturated_cast<size_t>(noise_fast_forward_ +
                                                      excess_waiting_time_samp);
    timestamp_diff =
        rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp);
  }

  if (timestamp_diff < 0 && prev_mode == NetEq::Mode::kRfc3389Cng) {
    // Not time to play this packet yet. Wait another round before using this
    // packet. Keep on playing CNG from previous CNG parameters.
    return NetEq::Operation::kRfc3389CngNoPacket;
  } else {
    // Otherwise, go for the CNG packet now.
    noise_fast_forward_ = 0;
    return NetEq::Operation::kRfc3389Cng;
  }
}

NetEq::Operation DecisionLogic::NoPacket(bool play_dtmf) {
  if (cng_state_ == kCngRfc3389On) {
    // Keep on playing comfort noise.
    return NetEq::Operation::kRfc3389CngNoPacket;
  } else if (cng_state_ == kCngInternalOn) {
    // Keep on playing codec internal comfort noise.
    return NetEq::Operation::kCodecInternalCng;
  } else if (play_dtmf) {
    return NetEq::Operation::kDtmf;
  } else {
    // Nothing to play, do expand.
    return NetEq::Operation::kExpand;
  }
}

NetEq::Operation DecisionLogic::ExpectedPacketAvailable(NetEq::Mode prev_mode,
                                                        bool play_dtmf) {
  if (!disallow_time_stretching_ && prev_mode != NetEq::Mode::kExpand &&
      !play_dtmf) {
    const int samples_per_ms = sample_rate_ / 1000;
    const int target_level_samples =
        delay_manager_->TargetDelayMs() * samples_per_ms;
    const int low_limit =
        std::max(target_level_samples * 3 / 4,
                 target_level_samples -
                     kDecelerationTargetLevelOffsetMs * samples_per_ms);
    // `higher_limit` is equal to `target_level`, but should at
    // least be 20 ms higher than `lower_limit`.
    const int high_limit =
        std::max(target_level_samples, low_limit + 20 * samples_per_ms);

    const int buffer_level_samples =
        buffer_level_filter_->filtered_current_level();
    if (buffer_level_samples >= high_limit << 2)
      return NetEq::Operation::kFastAccelerate;
    if (TimescaleAllowed()) {
      if (buffer_level_samples >= high_limit)
        return NetEq::Operation::kAccelerate;
      if (buffer_level_samples < low_limit)
        return NetEq::Operation::kPreemptiveExpand;
    }
  }
  return NetEq::Operation::kNormal;
}

NetEq::Operation DecisionLogic::FuturePacketAvailable(
    size_t decoder_frame_length,
    NetEq::Mode prev_mode,
    uint32_t target_timestamp,
    uint32_t available_timestamp,
    bool play_dtmf,
    size_t generated_noise_samples,
    size_t span_samples_in_packet_buffer,
    size_t num_packets_in_packet_buffer) {
  // Required packet is not available, but a future packet is.
  // Check if we should continue with an ongoing expand because the new packet
  // is too far into the future.
  uint32_t timestamp_leap = available_timestamp - target_timestamp;
  if (IsExpand(prev_mode) && !ReinitAfterExpands(timestamp_leap) &&
      !MaxWaitForPacket() && PacketTooEarly(timestamp_leap) &&
      UnderTargetLevel()) {
    if (play_dtmf) {
      // Still have DTMF to play, so do not do expand.
      return NetEq::Operation::kDtmf;
    } else {
      // Nothing to play.
      return NetEq::Operation::kExpand;
    }
  }

  if (prev_mode == NetEq::Mode::kCodecPlc) {
    return NetEq::Operation::kNormal;
  }

  // If previous was comfort noise, then no merge is needed.
  if (prev_mode == NetEq::Mode::kRfc3389Cng ||
      prev_mode == NetEq::Mode::kCodecInternalCng) {
    const size_t target_level_samples =
        delay_manager_->TargetDelayMs() * sample_rate_ / 1000;
    const bool generated_enough_noise =
        static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
        available_timestamp;
    const size_t target_threshold_samples =
        target_level_window_ms_ / 2 * (sample_rate_ / 1000);
    const bool above_target_window =
        span_samples_in_packet_buffer >
        target_level_samples + target_threshold_samples;
    const bool below_target_window =
        target_level_samples > target_threshold_samples &&
        span_samples_in_packet_buffer <
            target_level_samples - target_threshold_samples;
    // Keep the delay same as before CNG, but make sure that it is within the
    // target window.
    if ((generated_enough_noise && !below_target_window) ||
        above_target_window) {
      time_stretched_cn_samples_ = timestamp_leap - generated_noise_samples;
      return NetEq::Operation::kNormal;
    }

    // Too early to play this new packet; keep on playing comfort noise.
    if (prev_mode == NetEq::Mode::kRfc3389Cng) {
      return NetEq::Operation::kRfc3389CngNoPacket;
    }
    // prevPlayMode == kModeCodecInternalCng.
    return NetEq::Operation::kCodecInternalCng;
  }

  // Do not merge unless we have done an expand before.
  if (prev_mode == NetEq::Mode::kExpand) {
    return NetEq::Operation::kMerge;
  } else if (play_dtmf) {
    // Play DTMF instead of expand.
    return NetEq::Operation::kDtmf;
  } else {
    return NetEq::Operation::kExpand;
  }
}

bool DecisionLogic::UnderTargetLevel() const {
  return buffer_level_filter_->filtered_current_level() <
         delay_manager_->TargetDelayMs() * sample_rate_ / 1000;
}

bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const {
  return timestamp_leap >=
         static_cast<uint32_t>(output_size_samples_ * kReinitAfterExpands);
}

bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const {
  return timestamp_leap >
         static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_);
}

bool DecisionLogic::MaxWaitForPacket() const {
  return num_consecutive_expands_ >= kMaxWaitForPacket;
}

}  // namespace webrtc