Revert "Remove post-decode VAD"

This reverts commit 89cf26f1e0. Reason for revert: breaking upstream projects Original change's description: > Remove post-decode VAD > > Bug: webrtc:15806 > Change-Id: I6acf8734a70703085cfc1ccf82a79ee0931f59a4 > Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/336460 > Reviewed-by: Sam Zackrisson <saza@webrtc.org> > Commit-Queue: Tomas Lundqvist <tomasl@google.com> > Reviewed-by: Jakob Ivarsson‎ <jakobi@webrtc.org> > Cr-Commit-Position: refs/heads/main@{#41653} Bug: webrtc:15806 Change-Id: I20e383a6b6d625d86830ecec1be01b42b22e86a2 No-Presubmit: true No-Tree-Checks: true No-Try: true Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/337420 Bot-Commit: rubber-stamper@appspot.gserviceaccount.com <rubber-stamper@appspot.gserviceaccount.com> Owners-Override: Jeremy Leconte <jleconte@google.com> Commit-Queue: Jeremy Leconte <jleconte@google.com> Reviewed-by: Jakob Ivarsson‎ <jakobi@webrtc.org> Cr-Commit-Position: refs/heads/main@{#41657}
2025-05-12 21:30:45 +01:00 · 2024-02-01 15:11:31 +00:00 · 2024-02-01 15:11:31 +00:00 · 687ef0a136
commit 687ef0a136
parent 53e41a2bc6
13 changed files with 358 additions and 23 deletions
--- a/api/neteq/neteq.cc
+++ b/api/neteq/neteq.cc
@ -24,7 +24,8 @@ NetEq::Config& NetEq::Config::operator=(Config&&) = default;
 std::string NetEq::Config::ToString() const {
  char buf[1024];
  rtc::SimpleStringBuilder ss(buf);
-  ss << "sample_rate_hz=" << sample_rate_hz
+  ss << "sample_rate_hz=" << sample_rate_hz << ", enable_post_decode_vad="
+     << (enable_post_decode_vad ? "true" : "false")
     << ", max_packets_in_buffer=" << max_packets_in_buffer
     << ", min_delay_ms=" << min_delay_ms << ", enable_fast_accelerate="
     << (enable_fast_accelerate ? "true" : "false")
--- a/api/neteq/neteq.h
+++ b/api/neteq/neteq.h
@ -130,6 +130,7 @@ class NetEq {
    std::string ToString() const;

    int sample_rate_hz = 48000;  // Initial value. Will change with input data.
+    bool enable_post_decode_vad = false;
    size_t max_packets_in_buffer = 200;
    int max_delay_ms = 0;
    int min_delay_ms = 0;
@ -277,6 +278,13 @@ class NetEq {
  // statistics are never reset.
  virtual NetEqOperationsAndState GetOperationsAndState() const = 0;

+  // Enables post-decode VAD. When enabled, GetAudio() will return
+  // kOutputVADPassive when the signal contains no speech.
+  virtual void EnableVad() = 0;
+
+  // Disables post-decode VAD.
+  virtual void DisableVad() = 0;
+
  // Returns the RTP timestamp for the last sample delivered by GetAudio().
  // The return value will be empty if no valid timestamp is available.
  virtual absl::optional<uint32_t> GetPlayoutTimestamp() const = 0;
--- a/modules/audio_coding/BUILD.gn
+++ b/modules/audio_coding/BUILD.gn
@ -689,6 +689,8 @@ rtc_library("neteq") {
    "neteq/packet_arrival_history.h",
    "neteq/packet_buffer.cc",
    "neteq/packet_buffer.h",
+    "neteq/post_decode_vad.cc",
+    "neteq/post_decode_vad.h",
    "neteq/preemptive_expand.cc",
    "neteq/preemptive_expand.h",
    "neteq/random_vector.cc",
@ -1653,6 +1655,7 @@ if (rtc_include_tests) {
        "neteq/normal_unittest.cc",
        "neteq/packet_arrival_history_unittest.cc",
        "neteq/packet_buffer_unittest.cc",
+        "neteq/post_decode_vad_unittest.cc",
        "neteq/random_vector_unittest.cc",
        "neteq/red_payload_splitter_unittest.cc",
        "neteq/reorder_optimizer_unittest.cc",
--- a/modules/audio_coding/acm2/acm_receiver.cc
+++ b/modules/audio_coding/acm2/acm_receiver.cc
@ -50,7 +50,11 @@ std::unique_ptr<NetEq> CreateNetEq(

 AcmReceiver::Config::Config(
    rtc::scoped_refptr<AudioDecoderFactory> decoder_factory)
-    : clock(*Clock::GetRealTimeClock()), decoder_factory(decoder_factory) {}
+    : clock(*Clock::GetRealTimeClock()), decoder_factory(decoder_factory) {
+  // Post-decode VAD is disabled by default in NetEq, however, Audio
+  // Conference Mixer relies on VAD decisions and fails without them.
+  neteq_config.enable_post_decode_vad = true;
+}

 AcmReceiver::Config::Config(const Config&) = default;
 AcmReceiver::Config::~Config() = default;
--- a/modules/audio_coding/acm2/acm_receiver_unittest.cc
+++ b/modules/audio_coding/acm2/acm_receiver_unittest.cc
@ -190,6 +190,9 @@ class AcmReceiverTestFaxModeOldApi : public AcmReceiverTestOldApi {
    const size_t output_channels = info.num_channels;
    const size_t samples_per_ms = rtc::checked_cast<size_t>(
        rtc::CheckedDivExact(output_sample_rate_hz, 1000));
+    const AudioFrame::VADActivity expected_vad_activity =
+        output_sample_rate_hz > 16000 ? AudioFrame::kVadActive
+                                      : AudioFrame::kVadPassive;

    // Expect the first output timestamp to be 5*fs/8000 samples before the
    // first inserted timestamp (because of NetEq's look-ahead). (This value is
@ -214,6 +217,7 @@ class AcmReceiverTestFaxModeOldApi : public AcmReceiverTestOldApi {
        EXPECT_EQ(output_sample_rate_hz, frame.sample_rate_hz_);
        EXPECT_EQ(output_channels, frame.num_channels_);
        EXPECT_EQ(AudioFrame::kNormalSpeech, frame.speech_type_);
+        EXPECT_EQ(expected_vad_activity, frame.vad_activity_);
        EXPECT_FALSE(muted);
      }
    }
@ -238,6 +242,61 @@ TEST_F(AcmReceiverTestFaxModeOldApi, MAYBE_VerifyAudioFrameOpus) {
  RunVerifyAudioFrame({"opus", 48000, 2});
 }

+#if defined(WEBRTC_ANDROID)
+#define MAYBE_PostdecodingVad DISABLED_PostdecodingVad
+#else
+#define MAYBE_PostdecodingVad PostdecodingVad
+#endif
+TEST_F(AcmReceiverTestOldApi, MAYBE_PostdecodingVad) {
+  EXPECT_TRUE(config_.neteq_config.enable_post_decode_vad);
+  constexpr int payload_type = 34;
+  const SdpAudioFormat codec = {"L16", 16000, 1};
+  const AudioCodecInfo info = SetEncoder(payload_type, codec);
+  receiver_->SetCodecs({{payload_type, codec}});
+  constexpr int kNumPackets = 5;
+  AudioFrame frame;
+  for (int n = 0; n < kNumPackets; ++n) {
+    const int num_10ms_frames = InsertOnePacketOfSilence(info);
+    for (int k = 0; k < num_10ms_frames; ++k) {
+      bool muted;
+      ASSERT_EQ(0, receiver_->GetAudio(info.sample_rate_hz, &frame, &muted));
+    }
+  }
+  EXPECT_EQ(AudioFrame::kVadPassive, frame.vad_activity_);
+}
+
+class AcmReceiverTestPostDecodeVadPassiveOldApi : public AcmReceiverTestOldApi {
+ protected:
+  AcmReceiverTestPostDecodeVadPassiveOldApi() {
+    config_.neteq_config.enable_post_decode_vad = false;
+  }
+};
+
+#if defined(WEBRTC_ANDROID)
+#define MAYBE_PostdecodingVad DISABLED_PostdecodingVad
+#else
+#define MAYBE_PostdecodingVad PostdecodingVad
+#endif
+TEST_F(AcmReceiverTestPostDecodeVadPassiveOldApi, MAYBE_PostdecodingVad) {
+  EXPECT_FALSE(config_.neteq_config.enable_post_decode_vad);
+  constexpr int payload_type = 34;
+  const SdpAudioFormat codec = {"L16", 16000, 1};
+  const AudioCodecInfo info = SetEncoder(payload_type, codec);
+  auto const value = encoder_factory_->QueryAudioEncoder(codec);
+  ASSERT_TRUE(value.has_value());
+  receiver_->SetCodecs({{payload_type, codec}});
+  const int kNumPackets = 5;
+  AudioFrame frame;
+  for (int n = 0; n < kNumPackets; ++n) {
+    const int num_10ms_frames = InsertOnePacketOfSilence(info);
+    for (int k = 0; k < num_10ms_frames; ++k) {
+      bool muted;
+      ASSERT_EQ(0, receiver_->GetAudio(info.sample_rate_hz, &frame, &muted));
+    }
+  }
+  EXPECT_EQ(AudioFrame::kVadUnknown, frame.vad_activity_);
+}
+
 #if defined(WEBRTC_ANDROID)
 #define MAYBE_LastAudioCodec DISABLED_LastAudioCodec
 #else
--- a/modules/audio_coding/neteq/background_noise.cc
+++ b/modules/audio_coding/neteq/background_noise.cc
@ -17,6 +17,7 @@
 #include "common_audio/signal_processing/include/signal_processing_library.h"
 #include "modules/audio_coding/neteq/audio_multi_vector.h"
 #include "modules/audio_coding/neteq/cross_correlation.h"
+#include "modules/audio_coding/neteq/post_decode_vad.h"

 namespace webrtc {
 namespace {
@ -43,11 +44,17 @@ void BackgroundNoise::Reset() {
  }
 }

-bool BackgroundNoise::Update(const AudioMultiVector& sync_buffer) {
+bool BackgroundNoise::Update(const AudioMultiVector& input,
+                             const PostDecodeVad& vad) {
  bool filter_params_saved = false;
+  if (vad.running() && vad.active_speech()) {
+    // Do not update the background noise parameters if we know that the signal
+    // is active speech.
+    return filter_params_saved;
+  }

  int32_t auto_correlation[kMaxLpcOrder + 1];
-  int16_t filter_output[kMaxLpcOrder + kResidualLength];
+  int16_t fiter_output[kMaxLpcOrder + kResidualLength];
  int16_t reflection_coefficients[kMaxLpcOrder];
  int16_t lpc_coefficients[kMaxLpcOrder + 1];

@ -55,13 +62,14 @@ bool BackgroundNoise::Update(const AudioMultiVector& sync_buffer) {
    ChannelParameters& parameters = channel_parameters_[channel_ix];
    int16_t temp_signal_array[kVecLen + kMaxLpcOrder] = {0};
    int16_t* temp_signal = &temp_signal_array[kMaxLpcOrder];
-    RTC_DCHECK_GE(sync_buffer.Size(), kVecLen);
-    sync_buffer[channel_ix].CopyTo(kVecLen, sync_buffer.Size() - kVecLen,
-                                   temp_signal);
+    RTC_DCHECK_GE(input.Size(), kVecLen);
+    input[channel_ix].CopyTo(kVecLen, input.Size() - kVecLen, temp_signal);
    int32_t sample_energy =
        CalculateAutoCorrelation(temp_signal, kVecLen, auto_correlation);

-    if (sample_energy < parameters.energy_update_threshold) {
+    if ((!vad.running() &&
+         sample_energy < parameters.energy_update_threshold) ||
+        (vad.running() && !vad.active_speech())) {
      // Generate LPC coefficients.
      if (auto_correlation[0] <= 0) {
        // Center value in auto-correlation is not positive. Do not update.
@ -87,10 +95,10 @@ bool BackgroundNoise::Update(const AudioMultiVector& sync_buffer) {

      // Generate the CNG gain factor by looking at the energy of the residual.
      WebRtcSpl_FilterMAFastQ12(temp_signal + kVecLen - kResidualLength,
-                                filter_output, lpc_coefficients,
+                                fiter_output, lpc_coefficients,
                                kMaxLpcOrder + 1, kResidualLength);
      int32_t residual_energy = WebRtcSpl_DotProductWithScale(
-          filter_output, filter_output, kResidualLength, 0);
+          fiter_output, fiter_output, kResidualLength, 0);

      // Check spectral flatness.
      // Comparing the residual variance with the input signal variance tells
@ -109,8 +117,9 @@ bool BackgroundNoise::Update(const AudioMultiVector& sync_buffer) {
        filter_params_saved = true;
      }
    } else {
-      // Will only happen if `sample_energy` is not low enough. Increase the
-      // threshold for update so that it increases by a factor 4 in 4 seconds.
+      // Will only happen if post-decode VAD is disabled and `sample_energy` is
+      // not low enough. Increase the threshold for update so that it increases
+      // by a factor 4 in 4 seconds.
      IncrementEnergyThreshold(channel_ix, sample_energy);
    }
  }
--- a/modules/audio_coding/neteq/background_noise.h
+++ b/modules/audio_coding/neteq/background_noise.h
@ -39,9 +39,9 @@ class BackgroundNoise {
  void Reset();

  // Updates the parameter estimates based on the signal currently in the
-  // `sync_buffer`.
+  // `sync_buffer`, and on the latest decision in `vad` if it is running.
  // Returns true if the filter parameters are updated.
-  bool Update(const AudioMultiVector& sync_buffer);
+  bool Update(const AudioMultiVector& sync_buffer, const PostDecodeVad& vad);

  // Generates background noise given a random vector and writes the output to
  // `buffer`.
--- a/modules/audio_coding/neteq/neteq_impl.cc
+++ b/modules/audio_coding/neteq/neteq_impl.cc
@ -36,6 +36,7 @@
 #include "modules/audio_coding/neteq/normal.h"
 #include "modules/audio_coding/neteq/packet.h"
 #include "modules/audio_coding/neteq/packet_buffer.h"
+#include "modules/audio_coding/neteq/post_decode_vad.h"
 #include "modules/audio_coding/neteq/preemptive_expand.h"
 #include "modules/audio_coding/neteq/red_payload_splitter.h"
 #include "modules/audio_coding/neteq/statistics_calculator.h"
@ -69,26 +70,49 @@ std::unique_ptr<NetEqController> CreateNetEqController(
  return controller_factory.CreateNetEqController(config);
 }

-AudioFrame::SpeechType ToSpeechType(NetEqImpl::OutputType type) {
+void SetAudioFrameActivityAndType(bool vad_enabled,
+                                  NetEqImpl::OutputType type,
+                                  AudioFrame::VADActivity last_vad_activity,
+                                  AudioFrame* audio_frame) {
  switch (type) {
    case NetEqImpl::OutputType::kNormalSpeech: {
-      return AudioFrame::kNormalSpeech;
+      audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
+      audio_frame->vad_activity_ = AudioFrame::kVadActive;
+      break;
+    }
+    case NetEqImpl::OutputType::kVadPassive: {
+      // This should only be reached if the VAD is enabled.
+      RTC_DCHECK(vad_enabled);
+      audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
+      audio_frame->vad_activity_ = AudioFrame::kVadPassive;
+      break;
    }
    case NetEqImpl::OutputType::kCNG: {
-      return AudioFrame::kCNG;
+      audio_frame->speech_type_ = AudioFrame::kCNG;
+      audio_frame->vad_activity_ = AudioFrame::kVadPassive;
+      break;
    }
    case NetEqImpl::OutputType::kPLC: {
-      return AudioFrame::kPLC;
+      audio_frame->speech_type_ = AudioFrame::kPLC;
+      audio_frame->vad_activity_ = last_vad_activity;
+      break;
    }
    case NetEqImpl::OutputType::kPLCCNG: {
-      return AudioFrame::kPLCCNG;
+      audio_frame->speech_type_ = AudioFrame::kPLCCNG;
+      audio_frame->vad_activity_ = AudioFrame::kVadPassive;
+      break;
    }
    case NetEqImpl::OutputType::kCodecPLC: {
-      return AudioFrame::kCodecPLC;
+      audio_frame->speech_type_ = AudioFrame::kCodecPLC;
+      audio_frame->vad_activity_ = last_vad_activity;
+      break;
    }
    default:
      RTC_DCHECK_NOTREACHED();
-      return AudioFrame::kUndefined;
+  }
+  if (!vad_enabled) {
+    // Always set kVadUnknown when receive VAD is inactive.
+    audio_frame->vad_activity_ = AudioFrame::kVadUnknown;
  }
 }

@ -145,6 +169,7 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config,
      packet_buffer_(std::move(deps.packet_buffer)),
      red_payload_splitter_(std::move(deps.red_payload_splitter)),
      timestamp_scaler_(std::move(deps.timestamp_scaler)),
+      vad_(new PostDecodeVad()),
      expand_factory_(std::move(deps.expand_factory)),
      accelerate_factory_(std::move(deps.accelerate_factory)),
      preemptive_expand_factory_(std::move(deps.preemptive_expand_factory)),
@ -186,6 +211,10 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config,
  if (create_components) {
    SetSampleRateAndChannels(fs, 1);  // Default is 1 channel.
  }
+  RTC_DCHECK(!vad_->enabled());
+  if (config.enable_post_decode_vad) {
+    vad_->Enable();
+  }
 }

 NetEqImpl::~NetEqImpl() = default;
@ -223,7 +252,9 @@ int NetEqImpl::GetAudio(AudioFrame* audio_frame,
      audio_frame->sample_rate_hz_,
      rtc::dchecked_cast<int>(audio_frame->samples_per_channel_ * 100));
  RTC_DCHECK_EQ(*muted, audio_frame->muted());
-  audio_frame->speech_type_ = ToSpeechType(LastOutputType());
+  SetAudioFrameActivityAndType(vad_->enabled(), LastOutputType(),
+                               last_vad_activity_, audio_frame);
+  last_vad_activity_ = audio_frame->vad_activity_;
  last_output_sample_rate_hz_ = audio_frame->sample_rate_hz_;
  RTC_DCHECK(last_output_sample_rate_hz_ == 8000 ||
             last_output_sample_rate_hz_ == 16000 ||
@ -367,6 +398,18 @@ NetEqOperationsAndState NetEqImpl::GetOperationsAndState() const {
  return result;
 }

+void NetEqImpl::EnableVad() {
+  MutexLock lock(&mutex_);
+  RTC_DCHECK(vad_.get());
+  vad_->Enable();
+}
+
+void NetEqImpl::DisableVad() {
+  MutexLock lock(&mutex_);
+  RTC_DCHECK(vad_.get());
+  vad_->Disable();
+}
+
 absl::optional<uint32_t> NetEqImpl::GetPlayoutTimestamp() const {
  MutexLock lock(&mutex_);
  if (first_packet_ || last_mode_ == Mode::kRfc3389Cng ||
@ -815,8 +858,11 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame,
    last_decoded_type_ = speech_type;
  }

+  RTC_DCHECK(vad_.get());
  bool sid_frame_available =
      (operation == Operation::kRfc3389Cng && !packet_list.empty());
+  vad_->Update(decoded_buffer_.get(), static_cast<size_t>(length), speech_type,
+               sid_frame_available, fs_hz_);

  // This is the criterion that we did decode some data through the speech
  // decoder, and the operation resulted in comfort noise.
@ -966,7 +1012,7 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame,
      (last_mode_ == Mode::kPreemptiveExpandFail) ||
      (last_mode_ == Mode::kRfc3389Cng) ||
      (last_mode_ == Mode::kCodecInternalCng)) {
-    background_noise_->Update(*sync_buffer_);
+    background_noise_->Update(*sync_buffer_, *vad_.get());
  }

  if (operation == Operation::kDtmf) {
@ -2042,6 +2088,10 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) {
  if (cng_decoder)
    cng_decoder->Reset();

+  // Reinit post-decode VAD with new sample rate.
+  RTC_DCHECK(vad_.get());  // Cannot be NULL here.
+  vad_->Init();
+
  // Delete algorithm buffer and create a new one.
  algorithm_buffer_.reset(new AudioMultiVector(channels));

@ -2082,6 +2132,7 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) {
 }

 NetEqImpl::OutputType NetEqImpl::LastOutputType() {
+  RTC_DCHECK(vad_.get());
  RTC_DCHECK(expand_.get());
  if (last_mode_ == Mode::kCodecInternalCng ||
      last_mode_ == Mode::kRfc3389Cng) {
@ -2091,6 +2142,8 @@ NetEqImpl::OutputType NetEqImpl::LastOutputType() {
    return OutputType::kPLCCNG;
  } else if (last_mode_ == Mode::kExpand) {
    return OutputType::kPLC;
+  } else if (vad_->running() && !vad_->active_speech()) {
+    return OutputType::kVadPassive;
  } else if (last_mode_ == Mode::kCodecPlc) {
    return OutputType::kCodecPLC;
  } else {
--- a/modules/audio_coding/neteq/neteq_impl.h
+++ b/modules/audio_coding/neteq/neteq_impl.h
@ -48,6 +48,7 @@ class Merge;
 class NackTracker;
 class Normal;
 class RedPayloadSplitter;
+class PostDecodeVad;
 class PreemptiveExpand;
 class RandomVector;
 class SyncBuffer;
@ -170,6 +171,13 @@ class NetEqImpl : public webrtc::NetEq {

  NetEqOperationsAndState GetOperationsAndState() const override;

+  // Enables post-decode VAD. When enabled, GetAudio() will return
+  // kOutputVADPassive when the signal contains no speech.
+  void EnableVad() override;
+
+  // Disables post-decode VAD.
+  void DisableVad() override;
+
  absl::optional<uint32_t> GetPlayoutTimestamp() const override;

  int last_output_sample_rate_hz() const override;
@ -348,6 +356,7 @@ class NetEqImpl : public webrtc::NetEq {
      RTC_GUARDED_BY(mutex_);
  const std::unique_ptr<TimestampScaler> timestamp_scaler_
      RTC_GUARDED_BY(mutex_);
+  const std::unique_ptr<PostDecodeVad> vad_ RTC_GUARDED_BY(mutex_);
  const std::unique_ptr<ExpandFactory> expand_factory_ RTC_GUARDED_BY(mutex_);
  const std::unique_ptr<AccelerateFactory> accelerate_factory_
      RTC_GUARDED_BY(mutex_);
@ -388,6 +397,8 @@ class NetEqImpl : public webrtc::NetEq {
  std::unique_ptr<NackTracker> nack_ RTC_GUARDED_BY(mutex_);
  bool nack_enabled_ RTC_GUARDED_BY(mutex_);
  const bool enable_muted_state_ RTC_GUARDED_BY(mutex_);
+  AudioFrame::VADActivity last_vad_activity_ RTC_GUARDED_BY(mutex_) =
+      AudioFrame::kVadPassive;
  std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_
      RTC_GUARDED_BY(mutex_);
  std::vector<RtpPacketInfo> last_decoded_packet_infos_ RTC_GUARDED_BY(mutex_);
--- a/modules/audio_coding/neteq/post_decode_vad.cc
+++ b/modules/audio_coding/neteq/post_decode_vad.cc
@ -0,0 +1,90 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_coding/neteq/post_decode_vad.h"
+
+namespace webrtc {
+
+PostDecodeVad::~PostDecodeVad() {
+  if (vad_instance_)
+    WebRtcVad_Free(vad_instance_);
+}
+
+void PostDecodeVad::Enable() {
+  if (!vad_instance_) {
+    // Create the instance.
+    vad_instance_ = WebRtcVad_Create();
+    if (vad_instance_ == nullptr) {
+      // Failed to create instance.
+      Disable();
+      return;
+    }
+  }
+  Init();
+  enabled_ = true;
+}
+
+void PostDecodeVad::Disable() {
+  enabled_ = false;
+  running_ = false;
+}
+
+void PostDecodeVad::Init() {
+  running_ = false;
+  if (vad_instance_) {
+    WebRtcVad_Init(vad_instance_);
+    WebRtcVad_set_mode(vad_instance_, kVadMode);
+    running_ = true;
+  }
+}
+
+void PostDecodeVad::Update(int16_t* signal,
+                           size_t length,
+                           AudioDecoder::SpeechType speech_type,
+                           bool sid_frame,
+                           int fs_hz) {
+  if (!vad_instance_ || !enabled_) {
+    return;
+  }
+
+  if (speech_type == AudioDecoder::kComfortNoise || sid_frame ||
+      fs_hz > 16000) {
+    // TODO(hlundin): Remove restriction on fs_hz.
+    running_ = false;
+    active_speech_ = true;
+    sid_interval_counter_ = 0;
+  } else if (!running_) {
+    ++sid_interval_counter_;
+  }
+
+  if (sid_interval_counter_ >= kVadAutoEnable) {
+    Init();
+  }
+
+  if (length > 0 && running_) {
+    size_t vad_sample_index = 0;
+    active_speech_ = false;
+    // Loop through frame sizes 30, 20, and 10 ms.
+    for (int vad_frame_size_ms = 30; vad_frame_size_ms >= 10;
+         vad_frame_size_ms -= 10) {
+      size_t vad_frame_size_samples =
+          static_cast<size_t>(vad_frame_size_ms * fs_hz / 1000);
+      while (length - vad_sample_index >= vad_frame_size_samples) {
+        int vad_return =
+            WebRtcVad_Process(vad_instance_, fs_hz, &signal[vad_sample_index],
+                              vad_frame_size_samples);
+        active_speech_ |= (vad_return == 1);
+        vad_sample_index += vad_frame_size_samples;
+      }
+    }
+  }
+}
+
+}  // namespace webrtc
--- a/modules/audio_coding/neteq/post_decode_vad.h
+++ b/modules/audio_coding/neteq/post_decode_vad.h
@ -0,0 +1,71 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_
+#define MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "api/audio_codecs/audio_decoder.h"
+#include "common_audio/vad/include/webrtc_vad.h"
+
+namespace webrtc {
+
+class PostDecodeVad {
+ public:
+  PostDecodeVad()
+      : enabled_(false),
+        running_(false),
+        active_speech_(true),
+        sid_interval_counter_(0),
+        vad_instance_(NULL) {}
+
+  virtual ~PostDecodeVad();
+
+  PostDecodeVad(const PostDecodeVad&) = delete;
+  PostDecodeVad& operator=(const PostDecodeVad&) = delete;
+
+  // Enables post-decode VAD.
+  void Enable();
+
+  // Disables post-decode VAD.
+  void Disable();
+
+  // Initializes post-decode VAD.
+  void Init();
+
+  // Updates post-decode VAD with the audio data in `signal` having `length`
+  // samples. The data is of type `speech_type`, at the sample rate `fs_hz`.
+  void Update(int16_t* signal,
+              size_t length,
+              AudioDecoder::SpeechType speech_type,
+              bool sid_frame,
+              int fs_hz);
+
+  // Accessors.
+  bool enabled() const { return enabled_; }
+  bool running() const { return running_; }
+  bool active_speech() const { return active_speech_; }
+
+ private:
+  static const int kVadMode = 0;  // Sets aggressiveness to "Normal".
+  // Number of Update() calls without CNG/SID before re-enabling VAD.
+  static const int kVadAutoEnable = 3000;
+
+  bool enabled_;
+  bool running_;
+  bool active_speech_;
+  int sid_interval_counter_;
+  ::VadInst* vad_instance_;
+};
+
+}  // namespace webrtc
+#endif  // MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_
--- a/modules/audio_coding/neteq/post_decode_vad_unittest.cc
+++ b/modules/audio_coding/neteq/post_decode_vad_unittest.cc
@ -0,0 +1,25 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Unit tests for PostDecodeVad class.
+
+#include "modules/audio_coding/neteq/post_decode_vad.h"
+
+#include "test/gtest.h"
+
+namespace webrtc {
+
+TEST(PostDecodeVad, CreateAndDestroy) {
+  PostDecodeVad vad;
+}
+
+// TODO(hlundin): Write more tests.
+
+}  // namespace webrtc
--- a/test/fuzzers/neteq_signal_fuzzer.cc
+++ b/test/fuzzers/neteq_signal_fuzzer.cc
@ -179,6 +179,7 @@ void FuzzOneInputTest(const uint8_t* data, size_t size) {
  // Configure NetEq and the NetEqTest object.
  NetEqTest::Callbacks callbacks;
  NetEq::Config config;
+  config.enable_post_decode_vad = true;
  config.enable_fast_accelerate = true;
  auto codecs = NetEqTest::StandardDecoderMap();
  // rate_types contains the payload types that will be used for encoding.