webrtc/modules/video_coding/jitter_estimator.cc

/*
 *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "modules/video_coding/jitter_estimator.h"

#include <assert.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include <string>

#include "absl/types/optional.h"
#include "modules/video_coding/internal_defines.h"
#include "modules/video_coding/rtt_filter.h"
#include "rtc_base/experiments/jitter_upper_bound_experiment.h"
#include "system_wrappers/include/clock.h"
#include "system_wrappers/include/field_trial.h"

namespace webrtc {
namespace {
static constexpr uint32_t kStartupDelaySamples = 30;
static constexpr int64_t kFsAccuStartupSamples = 5;
static constexpr double kMaxFramerateEstimate = 200.0;
static constexpr int64_t kNackCountTimeoutMs = 60000;
static constexpr double kDefaultMaxTimestampDeviationInSigmas = 3.5;
}  // namespace

VCMJitterEstimator::VCMJitterEstimator(const Clock* clock,
                                       int32_t vcmId,
                                       int32_t receiverId)
    : _vcmId(vcmId),
      _receiverId(receiverId),
      _phi(0.97),
      _psi(0.9999),
      _alphaCountMax(400),
      _thetaLow(0.000001),
      _nackLimit(3),
      _numStdDevDelayOutlier(15),
      _numStdDevFrameSizeOutlier(3),
      _noiseStdDevs(2.33),       // ~Less than 1% chance
                                 // (look up in normal distribution table)...
      _noiseStdDevOffset(30.0),  // ...of getting 30 ms freezes
      _rttFilter(),
      fps_counter_(30),  // TODO(sprang): Use an estimator with limit based on
                         // time, rather than number of samples.
      time_deviation_upper_bound_(
          JitterUpperBoundExperiment::GetUpperBoundSigmas().value_or(
              kDefaultMaxTimestampDeviationInSigmas)),
      clock_(clock) {
  Reset();
}

VCMJitterEstimator::~VCMJitterEstimator() {}

VCMJitterEstimator& VCMJitterEstimator::operator=(
    const VCMJitterEstimator& rhs) {
  if (this != &rhs) {
    memcpy(_thetaCov, rhs._thetaCov, sizeof(_thetaCov));
    memcpy(_Qcov, rhs._Qcov, sizeof(_Qcov));

    _vcmId = rhs._vcmId;
    _receiverId = rhs._receiverId;
    _avgFrameSize = rhs._avgFrameSize;
    _varFrameSize = rhs._varFrameSize;
    _maxFrameSize = rhs._maxFrameSize;
    _fsSum = rhs._fsSum;
    _fsCount = rhs._fsCount;
    _lastUpdateT = rhs._lastUpdateT;
    _prevEstimate = rhs._prevEstimate;
    _prevFrameSize = rhs._prevFrameSize;
    _avgNoise = rhs._avgNoise;
    _alphaCount = rhs._alphaCount;
    _filterJitterEstimate = rhs._filterJitterEstimate;
    _startupCount = rhs._startupCount;
    _latestNackTimestamp = rhs._latestNackTimestamp;
    _nackCount = rhs._nackCount;
    _rttFilter = rhs._rttFilter;
    clock_ = rhs.clock_;
  }
  return *this;
}

// Resets the JitterEstimate
void VCMJitterEstimator::Reset() {
  _theta[0] = 1 / (512e3 / 8);
  _theta[1] = 0;
  _varNoise = 4.0;

  _thetaCov[0][0] = 1e-4;
  _thetaCov[1][1] = 1e2;
  _thetaCov[0][1] = _thetaCov[1][0] = 0;
  _Qcov[0][0] = 2.5e-10;
  _Qcov[1][1] = 1e-10;
  _Qcov[0][1] = _Qcov[1][0] = 0;
  _avgFrameSize = 500;
  _maxFrameSize = 500;
  _varFrameSize = 100;
  _lastUpdateT = -1;
  _prevEstimate = -1.0;
  _prevFrameSize = 0;
  _avgNoise = 0.0;
  _alphaCount = 1;
  _filterJitterEstimate = 0.0;
  _latestNackTimestamp = 0;
  _nackCount = 0;
  _latestNackTimestamp = 0;
  _fsSum = 0;
  _fsCount = 0;
  _startupCount = 0;
  _rttFilter.Reset();
  fps_counter_.Reset();
}

void VCMJitterEstimator::ResetNackCount() {
  _nackCount = 0;
}

// Updates the estimates with the new measurements
void VCMJitterEstimator::UpdateEstimate(int64_t frameDelayMS,
                                        uint32_t frameSizeBytes,
                                        bool incompleteFrame /* = false */) {
  if (frameSizeBytes == 0) {
    return;
  }
  int deltaFS = frameSizeBytes - _prevFrameSize;
  if (_fsCount < kFsAccuStartupSamples) {
    _fsSum += frameSizeBytes;
    _fsCount++;
  } else if (_fsCount == kFsAccuStartupSamples) {
    // Give the frame size filter
    _avgFrameSize = static_cast<double>(_fsSum) / static_cast<double>(_fsCount);
    _fsCount++;
  }
  if (!incompleteFrame || frameSizeBytes > _avgFrameSize) {
    double avgFrameSize = _phi * _avgFrameSize + (1 - _phi) * frameSizeBytes;
    if (frameSizeBytes < _avgFrameSize + 2 * sqrt(_varFrameSize)) {
      // Only update the average frame size if this sample wasn't a
      // key frame
      _avgFrameSize = avgFrameSize;
    }
    // Update the variance anyway since we want to capture cases where we only
    // get
    // key frames.
    _varFrameSize = VCM_MAX(
        _phi * _varFrameSize + (1 - _phi) * (frameSizeBytes - avgFrameSize) *
                                   (frameSizeBytes - avgFrameSize),
        1.0);
  }

  // Update max frameSize estimate
  _maxFrameSize =
      VCM_MAX(_psi * _maxFrameSize, static_cast<double>(frameSizeBytes));

  if (_prevFrameSize == 0) {
    _prevFrameSize = frameSizeBytes;
    return;
  }
  _prevFrameSize = frameSizeBytes;

  // Cap frameDelayMS based on the current time deviation noise.
  int64_t max_time_deviation_ms =
      static_cast<int64_t>(time_deviation_upper_bound_ * sqrt(_varNoise) + 0.5);
  frameDelayMS = std::max(std::min(frameDelayMS, max_time_deviation_ms),
                          -max_time_deviation_ms);

  // Only update the Kalman filter if the sample is not considered
  // an extreme outlier. Even if it is an extreme outlier from a
  // delay point of view, if the frame size also is large the
  // deviation is probably due to an incorrect line slope.
  double deviation = DeviationFromExpectedDelay(frameDelayMS, deltaFS);

  if (fabs(deviation) < _numStdDevDelayOutlier * sqrt(_varNoise) ||
      frameSizeBytes >
          _avgFrameSize + _numStdDevFrameSizeOutlier * sqrt(_varFrameSize)) {
    // Update the variance of the deviation from the
    // line given by the Kalman filter
    EstimateRandomJitter(deviation, incompleteFrame);
    // Prevent updating with frames which have been congested by a large
    // frame, and therefore arrives almost at the same time as that frame.
    // This can occur when we receive a large frame (key frame) which
    // has been delayed. The next frame is of normal size (delta frame),
    // and thus deltaFS will be << 0. This removes all frame samples
    // which arrives after a key frame.
    if ((!incompleteFrame || deviation >= 0.0) &&
        static_cast<double>(deltaFS) > -0.25 * _maxFrameSize) {
      // Update the Kalman filter with the new data
      KalmanEstimateChannel(frameDelayMS, deltaFS);
    }
  } else {
    int nStdDev =
        (deviation >= 0) ? _numStdDevDelayOutlier : -_numStdDevDelayOutlier;
    EstimateRandomJitter(nStdDev * sqrt(_varNoise), incompleteFrame);
  }
  // Post process the total estimated jitter
  if (_startupCount >= kStartupDelaySamples) {
    PostProcessEstimate();
  } else {
    _startupCount++;
  }
}

// Updates the nack/packet ratio
void VCMJitterEstimator::FrameNacked() {
  if (_nackCount < _nackLimit) {
    _nackCount++;
  }
  _latestNackTimestamp = clock_->TimeInMicroseconds();
}

// Updates Kalman estimate of the channel
// The caller is expected to sanity check the inputs.
void VCMJitterEstimator::KalmanEstimateChannel(int64_t frameDelayMS,
                                               int32_t deltaFSBytes) {
  double Mh[2];
  double hMh_sigma;
  double kalmanGain[2];
  double measureRes;
  double t00, t01;

  // Kalman filtering

  // Prediction
  // M = M + Q
  _thetaCov[0][0] += _Qcov[0][0];
  _thetaCov[0][1] += _Qcov[0][1];
  _thetaCov[1][0] += _Qcov[1][0];
  _thetaCov[1][1] += _Qcov[1][1];

  // Kalman gain
  // K = M*h'/(sigma2n + h*M*h') = M*h'/(1 + h*M*h')
  // h = [dFS 1]
  // Mh = M*h'
  // hMh_sigma = h*M*h' + R
  Mh[0] = _thetaCov[0][0] * deltaFSBytes + _thetaCov[0][1];
  Mh[1] = _thetaCov[1][0] * deltaFSBytes + _thetaCov[1][1];
  // sigma weights measurements with a small deltaFS as noisy and
  // measurements with large deltaFS as good
  if (_maxFrameSize < 1.0) {
    return;
  }
  double sigma = (300.0 * exp(-fabs(static_cast<double>(deltaFSBytes)) /
                              (1e0 * _maxFrameSize)) +
                  1) *
                 sqrt(_varNoise);
  if (sigma < 1.0) {
    sigma = 1.0;
  }
  hMh_sigma = deltaFSBytes * Mh[0] + Mh[1] + sigma;
  if ((hMh_sigma < 1e-9 && hMh_sigma >= 0) ||
      (hMh_sigma > -1e-9 && hMh_sigma <= 0)) {
    assert(false);
    return;
  }
  kalmanGain[0] = Mh[0] / hMh_sigma;
  kalmanGain[1] = Mh[1] / hMh_sigma;

  // Correction
  // theta = theta + K*(dT - h*theta)
  measureRes = frameDelayMS - (deltaFSBytes * _theta[0] + _theta[1]);
  _theta[0] += kalmanGain[0] * measureRes;
  _theta[1] += kalmanGain[1] * measureRes;

  if (_theta[0] < _thetaLow) {
    _theta[0] = _thetaLow;
  }

  // M = (I - K*h)*M
  t00 = _thetaCov[0][0];
  t01 = _thetaCov[0][1];
  _thetaCov[0][0] = (1 - kalmanGain[0] * deltaFSBytes) * t00 -
                    kalmanGain[0] * _thetaCov[1][0];
  _thetaCov[0][1] = (1 - kalmanGain[0] * deltaFSBytes) * t01 -
                    kalmanGain[0] * _thetaCov[1][1];
  _thetaCov[1][0] = _thetaCov[1][0] * (1 - kalmanGain[1]) -
                    kalmanGain[1] * deltaFSBytes * t00;
  _thetaCov[1][1] = _thetaCov[1][1] * (1 - kalmanGain[1]) -
                    kalmanGain[1] * deltaFSBytes * t01;

  // Covariance matrix, must be positive semi-definite
  assert(_thetaCov[0][0] + _thetaCov[1][1] >= 0 &&
         _thetaCov[0][0] * _thetaCov[1][1] -
                 _thetaCov[0][1] * _thetaCov[1][0] >=
             0 &&
         _thetaCov[0][0] >= 0);
}

// Calculate difference in delay between a sample and the
// expected delay estimated by the Kalman filter
double VCMJitterEstimator::DeviationFromExpectedDelay(
    int64_t frameDelayMS,
    int32_t deltaFSBytes) const {
  return frameDelayMS - (_theta[0] * deltaFSBytes + _theta[1]);
}

// Estimates the random jitter by calculating the variance of the
// sample distance from the line given by theta.
void VCMJitterEstimator::EstimateRandomJitter(double d_dT,
                                              bool incompleteFrame) {
  uint64_t now = clock_->TimeInMicroseconds();
  if (_lastUpdateT != -1) {
    fps_counter_.AddSample(now - _lastUpdateT);
  }
  _lastUpdateT = now;

  if (_alphaCount == 0) {
    assert(false);
    return;
  }
  double alpha =
      static_cast<double>(_alphaCount - 1) / static_cast<double>(_alphaCount);
  _alphaCount++;
  if (_alphaCount > _alphaCountMax)
    _alphaCount = _alphaCountMax;

  // In order to avoid a low frame rate stream to react slower to changes,
  // scale the alpha weight relative a 30 fps stream.
  double fps = GetFrameRate();
  if (fps > 0.0) {
    double rate_scale = 30.0 / fps;
    // At startup, there can be a lot of noise in the fps estimate.
    // Interpolate rate_scale linearly, from 1.0 at sample #1, to 30.0 / fps
    // at sample #kStartupDelaySamples.
    if (_alphaCount < kStartupDelaySamples) {
      rate_scale =
          (_alphaCount * rate_scale + (kStartupDelaySamples - _alphaCount)) /
          kStartupDelaySamples;
    }
    alpha = pow(alpha, rate_scale);
  }

  double avgNoise = alpha * _avgNoise + (1 - alpha) * d_dT;
  double varNoise =
      alpha * _varNoise + (1 - alpha) * (d_dT - _avgNoise) * (d_dT - _avgNoise);
  if (!incompleteFrame || varNoise > _varNoise) {
    _avgNoise = avgNoise;
    _varNoise = varNoise;
  }
  if (_varNoise < 1.0) {
    // The variance should never be zero, since we might get
    // stuck and consider all samples as outliers.
    _varNoise = 1.0;
  }
}

double VCMJitterEstimator::NoiseThreshold() const {
  double noiseThreshold = _noiseStdDevs * sqrt(_varNoise) - _noiseStdDevOffset;
  if (noiseThreshold < 1.0) {
    noiseThreshold = 1.0;
  }
  return noiseThreshold;
}

// Calculates the current jitter estimate from the filtered estimates
double VCMJitterEstimator::CalculateEstimate() {
  double ret = _theta[0] * (_maxFrameSize - _avgFrameSize) + NoiseThreshold();

  // A very low estimate (or negative) is neglected
  if (ret < 1.0) {
    if (_prevEstimate <= 0.01) {
      ret = 1.0;
    } else {
      ret = _prevEstimate;
    }
  }
  if (ret > 10000.0) {  // Sanity
    ret = 10000.0;
  }
  _prevEstimate = ret;
  return ret;
}

void VCMJitterEstimator::PostProcessEstimate() {
  _filterJitterEstimate = CalculateEstimate();
}

void VCMJitterEstimator::UpdateRtt(int64_t rttMs) {
  _rttFilter.Update(rttMs);
}

void VCMJitterEstimator::UpdateMaxFrameSize(uint32_t frameSizeBytes) {
  if (_maxFrameSize < frameSizeBytes) {
    _maxFrameSize = frameSizeBytes;
  }
}

// Returns the current filtered estimate if available,
// otherwise tries to calculate an estimate.
int VCMJitterEstimator::GetJitterEstimate(double rttMultiplier) {
  double jitterMS = CalculateEstimate() + OPERATING_SYSTEM_JITTER;
  uint64_t now = clock_->TimeInMicroseconds();

  if (now - _latestNackTimestamp > kNackCountTimeoutMs * 1000)
    _nackCount = 0;

  if (_filterJitterEstimate > jitterMS)
    jitterMS = _filterJitterEstimate;
  if (_nackCount >= _nackLimit)
    jitterMS += _rttFilter.RttMs() * rttMultiplier;

  static const double kJitterScaleLowThreshold = 5.0;
  static const double kJitterScaleHighThreshold = 10.0;
  double fps = GetFrameRate();
  // Ignore jitter for very low fps streams.
  if (fps < kJitterScaleLowThreshold) {
    if (fps == 0.0) {
      return rtc::checked_cast<int>(std::max(0.0, jitterMS) + 0.5);
    }
    return 0;
  }

  // Semi-low frame rate; scale by factor linearly interpolated from 0.0 at
  // kJitterScaleLowThreshold to 1.0 at kJitterScaleHighThreshold.
  if (fps < kJitterScaleHighThreshold) {
    jitterMS = (1.0 / (kJitterScaleHighThreshold - kJitterScaleLowThreshold)) *
               (fps - kJitterScaleLowThreshold) * jitterMS;
  }

  return rtc::checked_cast<int>(std::max(0.0, jitterMS) + 0.5);
}

double VCMJitterEstimator::GetFrameRate() const {
  if (fps_counter_.ComputeMean() == 0.0)
    return 0;

  double fps = 1000000.0 / fps_counter_.ComputeMean();
  // Sanity check.
  assert(fps >= 0.0);
  if (fps > kMaxFramerateEstimate) {
    fps = kMaxFramerateEstimate;
  }
  return fps;
}
}  // namespace webrtc