Use robust variance computation in RollingAccumulator.

Previous one was failing NumericStabilityForVariance test. Bug: webrtc:10412 Change-Id: I97ba321743ebb8cc0923ae13a17c9997779fc305 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/133029 Commit-Queue: Yves Gerey <yvesg@google.com> Reviewed-by: Harald Alvestrand <hta@webrtc.org> Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org> Cr-Commit-Position: refs/heads/master@{#27931}
2025-05-12 21:30:45 +01:00 · 2019-05-13 17:01:32 +02:00 · 2019-05-13 17:01:32 +02:00 · 3dfb68082e
commit 3dfb68082e
parent 2ebf523978
5 changed files with 151 additions and 54 deletions
--- a/rtc_base/BUILD.gn
+++ b/rtc_base/BUILD.gn
@ -912,6 +912,8 @@ rtc_static_library("rtc_base") {
      "ssl_roots.h",
    ]

+    deps += [ ":rtc_numerics" ]
+
    if (is_win) {
      sources += [ "win32_socket_init.h" ]
      if (current_os != "winuwp") {
--- a/rtc_base/numerics/running_statistics.h
+++ b/rtc_base/numerics/running_statistics.h
@ -17,6 +17,7 @@

 #include "absl/types/optional.h"

+#include "rtc_base/checks.h"
 #include "rtc_base/numerics/math_utils.h"

 namespace webrtc {
@ -28,6 +29,10 @@ namespace webrtc {
 // min, max, mean, variance and standard deviation.
 // If you need to get percentiles, please use webrtc::SamplesStatsCounter.
 //
+// Please note RemoveSample() won't affect min and max.
+// If you want a full-fledged moving window over N last samples,
+// please use webrtc::RollingAccumulator.
+//
 // The measures return absl::nullopt if no samples were fed (Size() == 0),
 // otherwise the returned optional is guaranteed to contain a value.
 //
@ -54,6 +59,24 @@ class RunningStatistics {
    cumul_ += delta * delta2;
  }

+  // Remove a previously added value in O(1) time.
+  // Nb: This doesn't affect min or max.
+  // Calling RemoveSample when Size()==0 is incorrect.
+  void RemoveSample(T sample) {
+    RTC_DCHECK_GT(Size(), 0);
+    // In production, just saturate at 0.
+    if (Size() == 0) {
+      return;
+    }
+    // Since samples order doesn't matter, this is the
+    // exact reciprocal of Welford's incremental update.
+    --size_;
+    const double delta = sample - mean_;
+    mean_ -= delta / size_;
+    const double delta2 = sample - mean_;
+    cumul_ -= delta * delta2;
+  }
+
  // Merge other stats, as if samples were added one by one, but in O(1).
  void MergeStatistics(const RunningStatistics<T>& other) {
    if (other.size_ == 0) {
@ -78,11 +101,12 @@ class RunningStatistics {

  // Get Measures ////////////////////////////////////////////

-  // Returns number of samples involved,
-  // that is number of times AddSample() was called.
+  // Returns number of samples involved via AddSample() or MergeStatistics(),
+  // minus number of times RemoveSample() was called.
  int64_t Size() const { return size_; }

-  // Returns min in O(1) time.
+  // Returns minimum among all seen samples, in O(1) time.
+  // This isn't affected by RemoveSample().
  absl::optional<T> GetMin() const {
    if (size_ == 0) {
      return absl::nullopt;
@ -90,7 +114,8 @@ class RunningStatistics {
    return min_;
  }

-  // Returns max in O(1) time.
+  // Returns maximum among all seen samples, in O(1) time.
+  // This isn't affected by RemoveSample().
  absl::optional<T> GetMax() const {
    if (size_ == 0) {
      return absl::nullopt;
--- a/rtc_base/numerics/running_statistics_unittest.cc
+++ b/rtc_base/numerics/running_statistics_unittest.cc
@ -56,7 +56,7 @@ constexpr int SIZE_FOR_MERGE = 5;

 }  // namespace

-TEST(RunningStatisticsTest, FullSimpleTest) {
+TEST(RunningStatistics, FullSimpleTest) {
  auto stats = CreateStatsFilledWithIntsFrom1ToN(100);

  EXPECT_DOUBLE_EQ(*stats.GetMin(), 1.0);
@ -76,7 +76,49 @@ TEST(RunningStatistics, VarianceAndDeviation) {
  EXPECT_DOUBLE_EQ(*stats.GetStandardDeviation(), sqrt(4.5));
 }

-TEST(RunningStatisticsTest, VarianceFromUniformDistribution) {
+TEST(RunningStatistics, RemoveSample) {
+  // We check that adding then removing sample is no-op,
+  // or so (due to loss of precision).
+  RunningStatistics<int> stats;
+  stats.AddSample(2);
+  stats.AddSample(2);
+  stats.AddSample(-1);
+  stats.AddSample(5);
+
+  constexpr int iterations = 1e5;
+  for (int i = 0; i < iterations; ++i) {
+    stats.AddSample(i);
+    stats.RemoveSample(i);
+
+    EXPECT_NEAR(*stats.GetMean(), 2.0, 1e-8);
+    EXPECT_NEAR(*stats.GetVariance(), 4.5, 1e-3);
+    EXPECT_NEAR(*stats.GetStandardDeviation(), sqrt(4.5), 1e-4);
+  }
+}
+
+TEST(RunningStatistics, RemoveSamplesSequence) {
+  // We check that adding then removing a sequence of samples is no-op,
+  // or so (due to loss of precision).
+  RunningStatistics<int> stats;
+  stats.AddSample(2);
+  stats.AddSample(2);
+  stats.AddSample(-1);
+  stats.AddSample(5);
+
+  constexpr int iterations = 1e4;
+  for (int i = 0; i < iterations; ++i) {
+    stats.AddSample(i);
+  }
+  for (int i = 0; i < iterations; ++i) {
+    stats.RemoveSample(i);
+  }
+
+  EXPECT_NEAR(*stats.GetMean(), 2.0, 1e-7);
+  EXPECT_NEAR(*stats.GetVariance(), 4.5, 1e-3);
+  EXPECT_NEAR(*stats.GetStandardDeviation(), sqrt(4.5), 1e-4);
+}
+
+TEST(RunningStatistics, VarianceFromUniformDistribution) {
  // Check variance converge to 1/12 for [0;1) uniform distribution.
  // Acts as a sanity check for NumericStabilityForVariance test.
  auto stats = CreateStatsFromUniformDistribution(1e6, 0, 1);
@ -84,7 +126,7 @@ TEST(RunningStatisticsTest, VarianceFromUniformDistribution) {
  EXPECT_NEAR(*stats.GetVariance(), 1. / 12, 1e-3);
 }

-TEST(RunningStatisticsTest, NumericStabilityForVariance) {
+TEST(RunningStatistics, NumericStabilityForVariance) {
  // Same test as VarianceFromUniformDistribution,
  // except the range is shifted to [1e9;1e9+1).
  // Variance should also converge to 1/12.
@ -96,6 +138,26 @@ TEST(RunningStatisticsTest, NumericStabilityForVariance) {
  EXPECT_NEAR(*stats.GetVariance(), 1. / 12, 1e-3);
 }

+TEST(RunningStatistics, MinRemainsUnchangedAfterRemove) {
+  // We don't want to recompute min (that's RollingAccumulator's role),
+  // check we get the overall min.
+  RunningStatistics<int> stats;
+  stats.AddSample(1);
+  stats.AddSample(2);
+  stats.RemoveSample(1);
+  EXPECT_EQ(stats.GetMin(), 1);
+}
+
+TEST(RunningStatistics, MaxRemainsUnchangedAfterRemove) {
+  // We don't want to recompute max (that's RollingAccumulator's role),
+  // check we get the overall max.
+  RunningStatistics<int> stats;
+  stats.AddSample(1);
+  stats.AddSample(2);
+  stats.RemoveSample(2);
+  EXPECT_EQ(stats.GetMax(), 2);
+}
+
 TEST_P(RunningStatisticsTest, MergeStatistics) {
  int data[SIZE_FOR_MERGE] = {2, 2, -1, 5, 10};
  // Split the data in different partitions.
--- a/rtc_base/rolling_accumulator.h
+++ b/rtc_base/rolling_accumulator.h
@ -17,6 +17,7 @@

 #include "rtc_base/checks.h"
 #include "rtc_base/constructor_magic.h"
+#include "rtc_base/numerics/running_statistics.h"

 namespace rtc {

@ -28,19 +29,18 @@ template <typename T>
 class RollingAccumulator {
 public:
  explicit RollingAccumulator(size_t max_count) : samples_(max_count) {
+    RTC_DCHECK(max_count > 0);
    Reset();
  }
  ~RollingAccumulator() {}

  size_t max_count() const { return samples_.size(); }

-  size_t count() const { return count_; }
+  size_t count() const { return static_cast<size_t>(stats_.Size()); }

  void Reset() {
-    count_ = 0U;
+    stats_ = webrtc::RunningStatistics<T>();
    next_index_ = 0U;
-    sum_ = 0.0;
-    sum_2_ = 0.0;
    max_ = T();
    max_stale_ = false;
    min_ = T();
@ -48,52 +48,40 @@ class RollingAccumulator {
  }

  void AddSample(T sample) {
-    if (count_ == max_count()) {
+    if (count() == max_count()) {
      // Remove oldest sample.
      T sample_to_remove = samples_[next_index_];
-      sum_ -= sample_to_remove;
-      sum_2_ -= static_cast<double>(sample_to_remove) * sample_to_remove;
+      stats_.RemoveSample(sample_to_remove);
      if (sample_to_remove >= max_) {
        max_stale_ = true;
      }
      if (sample_to_remove <= min_) {
        min_stale_ = true;
      }
-    } else {
-      // Increase count of samples.
-      ++count_;
    }
    // Add new sample.
    samples_[next_index_] = sample;
-    sum_ += sample;
-    sum_2_ += static_cast<double>(sample) * sample;
-    if (count_ == 1 || sample >= max_) {
+    if (count() == 0 || sample >= max_) {
      max_ = sample;
      max_stale_ = false;
    }
-    if (count_ == 1 || sample <= min_) {
+    if (count() == 0 || sample <= min_) {
      min_ = sample;
      min_stale_ = false;
    }
+    stats_.AddSample(sample);
    // Update next_index_.
    next_index_ = (next_index_ + 1) % max_count();
  }

-  T ComputeSum() const { return static_cast<T>(sum_); }
-
-  double ComputeMean() const {
-    if (count_ == 0) {
-      return 0.0;
-    }
-    return sum_ / count_;
-  }
+  double ComputeMean() const { return stats_.GetMean().value_or(0); }

  T ComputeMax() const {
    if (max_stale_) {
-      RTC_DCHECK(count_ > 0)
-          << "It shouldn't be possible for max_stale_ && count_ == 0";
+      RTC_DCHECK(count() > 0)
+          << "It shouldn't be possible for max_stale_ && count() == 0";
      max_ = samples_[next_index_];
-      for (size_t i = 1u; i < count_; i++) {
+      for (size_t i = 1u; i < count(); i++) {
        max_ = std::max(max_, samples_[(next_index_ + i) % max_count()]);
      }
      max_stale_ = false;
@ -103,10 +91,10 @@ class RollingAccumulator {

  T ComputeMin() const {
    if (min_stale_) {
-      RTC_DCHECK(count_ > 0)
-          << "It shouldn't be possible for min_stale_ && count_ == 0";
+      RTC_DCHECK(count() > 0)
+          << "It shouldn't be possible for min_stale_ && count() == 0";
      min_ = samples_[next_index_];
-      for (size_t i = 1u; i < count_; i++) {
+      for (size_t i = 1u; i < count(); i++) {
        min_ = std::min(min_, samples_[(next_index_ + i) % max_count()]);
      }
      min_stale_ = false;
@ -118,14 +106,14 @@ class RollingAccumulator {
  // Weights nth sample with weight (learning_rate)^n. Learning_rate should be
  // between (0.0, 1.0], otherwise the non-weighted mean is returned.
  double ComputeWeightedMean(double learning_rate) const {
-    if (count_ < 1 || learning_rate <= 0.0 || learning_rate >= 1.0) {
+    if (count() < 1 || learning_rate <= 0.0 || learning_rate >= 1.0) {
      return ComputeMean();
    }
    double weighted_mean = 0.0;
    double current_weight = 1.0;
    double weight_sum = 0.0;
    const size_t max_size = max_count();
-    for (size_t i = 0; i < count_; ++i) {
+    for (size_t i = 0; i < count(); ++i) {
      current_weight *= learning_rate;
      weight_sum += current_weight;
      // Add max_size to prevent underflow.
@ -137,22 +125,11 @@ class RollingAccumulator {

  // Compute estimated variance.  Estimation is more accurate
  // as the number of samples grows.
-  double ComputeVariance() const {
-    if (count_ == 0) {
-      return 0.0;
-    }
-    // Var = E[x^2] - (E[x])^2
-    double count_inv = 1.0 / count_;
-    double mean_2 = sum_2_ * count_inv;
-    double mean = sum_ * count_inv;
-    return mean_2 - (mean * mean);
-  }
+  double ComputeVariance() const { return stats_.GetVariance().value_or(0); }

 private:
-  size_t count_;
+  webrtc::RunningStatistics<T> stats_;
  size_t next_index_;
-  double sum_;    // Sum(x) - double to avoid overflow
-  double sum_2_;  // Sum(x*x) - double to avoid overflow
  mutable T max_;
  mutable bool max_stale_;
  mutable T min_;
--- a/rtc_base/rolling_accumulator_unittest.cc
+++ b/rtc_base/rolling_accumulator_unittest.cc
@ -8,6 +8,8 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include <random>
+
 #include "rtc_base/rolling_accumulator.h"

 #include "test/gtest.h"
@ -18,6 +20,18 @@ namespace {

 const double kLearningRate = 0.5;

+// Add |n| samples drawn from uniform distribution in [a;b].
+void FillStatsFromUniformDistribution(RollingAccumulator<double>& stats,
+                                      int n,
+                                      double a,
+                                      double b) {
+  std::mt19937 gen{std::random_device()()};
+  std::uniform_real_distribution<> dis(a, b);
+
+  for (int i = 1; i <= n; i++) {
+    stats.AddSample(dis(gen));
+  }
+}
 }  // namespace

 TEST(RollingAccumulatorTest, ZeroSamples) {
@ -37,7 +51,6 @@ TEST(RollingAccumulatorTest, SomeSamples) {
  }

  EXPECT_EQ(4U, accum.count());
-  EXPECT_EQ(6, accum.ComputeSum());
  EXPECT_DOUBLE_EQ(1.5, accum.ComputeMean());
  EXPECT_NEAR(2.26666, accum.ComputeWeightedMean(kLearningRate), 0.01);
  EXPECT_DOUBLE_EQ(1.25, accum.ComputeVariance());
@ -52,7 +65,6 @@ TEST(RollingAccumulatorTest, RollingSamples) {
  }

  EXPECT_EQ(10U, accum.count());
-  EXPECT_EQ(65, accum.ComputeSum());
  EXPECT_DOUBLE_EQ(6.5, accum.ComputeMean());
  EXPECT_NEAR(10.0, accum.ComputeWeightedMean(kLearningRate), 0.01);
  EXPECT_NEAR(9.0, accum.ComputeVariance(), 1.0);
@ -79,7 +91,6 @@ TEST(RollingAccumulatorTest, ResetSamples) {
  }

  EXPECT_EQ(5U, accum.count());
-  EXPECT_EQ(10, accum.ComputeSum());
  EXPECT_DOUBLE_EQ(2.0, accum.ComputeMean());
  EXPECT_EQ(0, accum.ComputeMin());
  EXPECT_EQ(4, accum.ComputeMax());
@ -92,7 +103,6 @@ TEST(RollingAccumulatorTest, RollingSamplesDouble) {
  }

  EXPECT_EQ(10u, accum.count());
-  EXPECT_DOUBLE_EQ(875.0, accum.ComputeSum());
  EXPECT_DOUBLE_EQ(87.5, accum.ComputeMean());
  EXPECT_NEAR(105.049, accum.ComputeWeightedMean(kLearningRate), 0.1);
  EXPECT_NEAR(229.166667, accum.ComputeVariance(), 25);
@ -116,4 +126,25 @@ TEST(RollingAccumulatorTest, ComputeWeightedMeanCornerCases) {
  EXPECT_NEAR(6.0, accum.ComputeWeightedMean(kLearningRate), 0.1);
 }

+TEST(RollingAccumulatorTest, VarianceFromUniformDistribution) {
+  // Check variance converge to 1/12 for [0;1) uniform distribution.
+  // Acts as a sanity check for NumericStabilityForVariance test.
+  RollingAccumulator<double> stats(/*max_count=*/0.5e6);
+  FillStatsFromUniformDistribution(stats, 1e6, 0, 1);
+
+  EXPECT_NEAR(stats.ComputeVariance(), 1. / 12, 1e-3);
+}
+
+TEST(RollingAccumulatorTest, NumericStabilityForVariance) {
+  // Same test as VarianceFromUniformDistribution,
+  // except the range is shifted to [1e9;1e9+1).
+  // Variance should also converge to 1/12.
+  // NB: Although we lose precision for the samples themselves, the fractional
+  //     part still enjoys 22 bits of mantissa and errors should even out,
+  //     so that couldn't explain a mismatch.
+  RollingAccumulator<double> stats(/*max_count=*/0.5e6);
+  FillStatsFromUniformDistribution(stats, 1e6, 1e9, 1e9 + 1);
+
+  EXPECT_NEAR(stats.ComputeVariance(), 1. / 12, 1e-3);
+}
 }  // namespace rtc