Revert "Remove post-decode VAD"

This reverts commit 89cf26f1e0.

Reason for revert: breaking upstream projects

Original change's description:
> Remove post-decode VAD
>
> Bug: webrtc:15806
> Change-Id: I6acf8734a70703085cfc1ccf82a79ee0931f59a4
> Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/336460
> Reviewed-by: Sam Zackrisson <saza@webrtc.org>
> Commit-Queue: Tomas Lundqvist <tomasl@google.com>
> Reviewed-by: Jakob Ivarsson‎ <jakobi@webrtc.org>
> Cr-Commit-Position: refs/heads/main@{#41653}

Bug: webrtc:15806
Change-Id: I20e383a6b6d625d86830ecec1be01b42b22e86a2
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/337420
Bot-Commit: rubber-stamper@appspot.gserviceaccount.com <rubber-stamper@appspot.gserviceaccount.com>
Owners-Override: Jeremy Leconte <jleconte@google.com>
Commit-Queue: Jeremy Leconte <jleconte@google.com>
Reviewed-by: Jakob Ivarsson‎ <jakobi@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#41657}
This commit is contained in:
Jeremy Leconte 2024-02-01 15:11:31 +00:00 committed by WebRTC LUCI CQ
parent 53e41a2bc6
commit 687ef0a136
13 changed files with 358 additions and 23 deletions

View file

@ -24,7 +24,8 @@ NetEq::Config& NetEq::Config::operator=(Config&&) = default;
std::string NetEq::Config::ToString() const {
char buf[1024];
rtc::SimpleStringBuilder ss(buf);
ss << "sample_rate_hz=" << sample_rate_hz
ss << "sample_rate_hz=" << sample_rate_hz << ", enable_post_decode_vad="
<< (enable_post_decode_vad ? "true" : "false")
<< ", max_packets_in_buffer=" << max_packets_in_buffer
<< ", min_delay_ms=" << min_delay_ms << ", enable_fast_accelerate="
<< (enable_fast_accelerate ? "true" : "false")

View file

@ -130,6 +130,7 @@ class NetEq {
std::string ToString() const;
int sample_rate_hz = 48000; // Initial value. Will change with input data.
bool enable_post_decode_vad = false;
size_t max_packets_in_buffer = 200;
int max_delay_ms = 0;
int min_delay_ms = 0;
@ -277,6 +278,13 @@ class NetEq {
// statistics are never reset.
virtual NetEqOperationsAndState GetOperationsAndState() const = 0;
// Enables post-decode VAD. When enabled, GetAudio() will return
// kOutputVADPassive when the signal contains no speech.
virtual void EnableVad() = 0;
// Disables post-decode VAD.
virtual void DisableVad() = 0;
// Returns the RTP timestamp for the last sample delivered by GetAudio().
// The return value will be empty if no valid timestamp is available.
virtual absl::optional<uint32_t> GetPlayoutTimestamp() const = 0;

View file

@ -689,6 +689,8 @@ rtc_library("neteq") {
"neteq/packet_arrival_history.h",
"neteq/packet_buffer.cc",
"neteq/packet_buffer.h",
"neteq/post_decode_vad.cc",
"neteq/post_decode_vad.h",
"neteq/preemptive_expand.cc",
"neteq/preemptive_expand.h",
"neteq/random_vector.cc",
@ -1653,6 +1655,7 @@ if (rtc_include_tests) {
"neteq/normal_unittest.cc",
"neteq/packet_arrival_history_unittest.cc",
"neteq/packet_buffer_unittest.cc",
"neteq/post_decode_vad_unittest.cc",
"neteq/random_vector_unittest.cc",
"neteq/red_payload_splitter_unittest.cc",
"neteq/reorder_optimizer_unittest.cc",

View file

@ -50,7 +50,11 @@ std::unique_ptr<NetEq> CreateNetEq(
AcmReceiver::Config::Config(
rtc::scoped_refptr<AudioDecoderFactory> decoder_factory)
: clock(*Clock::GetRealTimeClock()), decoder_factory(decoder_factory) {}
: clock(*Clock::GetRealTimeClock()), decoder_factory(decoder_factory) {
// Post-decode VAD is disabled by default in NetEq, however, Audio
// Conference Mixer relies on VAD decisions and fails without them.
neteq_config.enable_post_decode_vad = true;
}
AcmReceiver::Config::Config(const Config&) = default;
AcmReceiver::Config::~Config() = default;

View file

@ -190,6 +190,9 @@ class AcmReceiverTestFaxModeOldApi : public AcmReceiverTestOldApi {
const size_t output_channels = info.num_channels;
const size_t samples_per_ms = rtc::checked_cast<size_t>(
rtc::CheckedDivExact(output_sample_rate_hz, 1000));
const AudioFrame::VADActivity expected_vad_activity =
output_sample_rate_hz > 16000 ? AudioFrame::kVadActive
: AudioFrame::kVadPassive;
// Expect the first output timestamp to be 5*fs/8000 samples before the
// first inserted timestamp (because of NetEq's look-ahead). (This value is
@ -214,6 +217,7 @@ class AcmReceiverTestFaxModeOldApi : public AcmReceiverTestOldApi {
EXPECT_EQ(output_sample_rate_hz, frame.sample_rate_hz_);
EXPECT_EQ(output_channels, frame.num_channels_);
EXPECT_EQ(AudioFrame::kNormalSpeech, frame.speech_type_);
EXPECT_EQ(expected_vad_activity, frame.vad_activity_);
EXPECT_FALSE(muted);
}
}
@ -238,6 +242,61 @@ TEST_F(AcmReceiverTestFaxModeOldApi, MAYBE_VerifyAudioFrameOpus) {
RunVerifyAudioFrame({"opus", 48000, 2});
}
#if defined(WEBRTC_ANDROID)
#define MAYBE_PostdecodingVad DISABLED_PostdecodingVad
#else
#define MAYBE_PostdecodingVad PostdecodingVad
#endif
TEST_F(AcmReceiverTestOldApi, MAYBE_PostdecodingVad) {
EXPECT_TRUE(config_.neteq_config.enable_post_decode_vad);
constexpr int payload_type = 34;
const SdpAudioFormat codec = {"L16", 16000, 1};
const AudioCodecInfo info = SetEncoder(payload_type, codec);
receiver_->SetCodecs({{payload_type, codec}});
constexpr int kNumPackets = 5;
AudioFrame frame;
for (int n = 0; n < kNumPackets; ++n) {
const int num_10ms_frames = InsertOnePacketOfSilence(info);
for (int k = 0; k < num_10ms_frames; ++k) {
bool muted;
ASSERT_EQ(0, receiver_->GetAudio(info.sample_rate_hz, &frame, &muted));
}
}
EXPECT_EQ(AudioFrame::kVadPassive, frame.vad_activity_);
}
class AcmReceiverTestPostDecodeVadPassiveOldApi : public AcmReceiverTestOldApi {
protected:
AcmReceiverTestPostDecodeVadPassiveOldApi() {
config_.neteq_config.enable_post_decode_vad = false;
}
};
#if defined(WEBRTC_ANDROID)
#define MAYBE_PostdecodingVad DISABLED_PostdecodingVad
#else
#define MAYBE_PostdecodingVad PostdecodingVad
#endif
TEST_F(AcmReceiverTestPostDecodeVadPassiveOldApi, MAYBE_PostdecodingVad) {
EXPECT_FALSE(config_.neteq_config.enable_post_decode_vad);
constexpr int payload_type = 34;
const SdpAudioFormat codec = {"L16", 16000, 1};
const AudioCodecInfo info = SetEncoder(payload_type, codec);
auto const value = encoder_factory_->QueryAudioEncoder(codec);
ASSERT_TRUE(value.has_value());
receiver_->SetCodecs({{payload_type, codec}});
const int kNumPackets = 5;
AudioFrame frame;
for (int n = 0; n < kNumPackets; ++n) {
const int num_10ms_frames = InsertOnePacketOfSilence(info);
for (int k = 0; k < num_10ms_frames; ++k) {
bool muted;
ASSERT_EQ(0, receiver_->GetAudio(info.sample_rate_hz, &frame, &muted));
}
}
EXPECT_EQ(AudioFrame::kVadUnknown, frame.vad_activity_);
}
#if defined(WEBRTC_ANDROID)
#define MAYBE_LastAudioCodec DISABLED_LastAudioCodec
#else

View file

@ -17,6 +17,7 @@
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "modules/audio_coding/neteq/audio_multi_vector.h"
#include "modules/audio_coding/neteq/cross_correlation.h"
#include "modules/audio_coding/neteq/post_decode_vad.h"
namespace webrtc {
namespace {
@ -43,11 +44,17 @@ void BackgroundNoise::Reset() {
}
}
bool BackgroundNoise::Update(const AudioMultiVector& sync_buffer) {
bool BackgroundNoise::Update(const AudioMultiVector& input,
const PostDecodeVad& vad) {
bool filter_params_saved = false;
if (vad.running() && vad.active_speech()) {
// Do not update the background noise parameters if we know that the signal
// is active speech.
return filter_params_saved;
}
int32_t auto_correlation[kMaxLpcOrder + 1];
int16_t filter_output[kMaxLpcOrder + kResidualLength];
int16_t fiter_output[kMaxLpcOrder + kResidualLength];
int16_t reflection_coefficients[kMaxLpcOrder];
int16_t lpc_coefficients[kMaxLpcOrder + 1];
@ -55,13 +62,14 @@ bool BackgroundNoise::Update(const AudioMultiVector& sync_buffer) {
ChannelParameters& parameters = channel_parameters_[channel_ix];
int16_t temp_signal_array[kVecLen + kMaxLpcOrder] = {0};
int16_t* temp_signal = &temp_signal_array[kMaxLpcOrder];
RTC_DCHECK_GE(sync_buffer.Size(), kVecLen);
sync_buffer[channel_ix].CopyTo(kVecLen, sync_buffer.Size() - kVecLen,
temp_signal);
RTC_DCHECK_GE(input.Size(), kVecLen);
input[channel_ix].CopyTo(kVecLen, input.Size() - kVecLen, temp_signal);
int32_t sample_energy =
CalculateAutoCorrelation(temp_signal, kVecLen, auto_correlation);
if (sample_energy < parameters.energy_update_threshold) {
if ((!vad.running() &&
sample_energy < parameters.energy_update_threshold) ||
(vad.running() && !vad.active_speech())) {
// Generate LPC coefficients.
if (auto_correlation[0] <= 0) {
// Center value in auto-correlation is not positive. Do not update.
@ -87,10 +95,10 @@ bool BackgroundNoise::Update(const AudioMultiVector& sync_buffer) {
// Generate the CNG gain factor by looking at the energy of the residual.
WebRtcSpl_FilterMAFastQ12(temp_signal + kVecLen - kResidualLength,
filter_output, lpc_coefficients,
fiter_output, lpc_coefficients,
kMaxLpcOrder + 1, kResidualLength);
int32_t residual_energy = WebRtcSpl_DotProductWithScale(
filter_output, filter_output, kResidualLength, 0);
fiter_output, fiter_output, kResidualLength, 0);
// Check spectral flatness.
// Comparing the residual variance with the input signal variance tells
@ -109,8 +117,9 @@ bool BackgroundNoise::Update(const AudioMultiVector& sync_buffer) {
filter_params_saved = true;
}
} else {
// Will only happen if `sample_energy` is not low enough. Increase the
// threshold for update so that it increases by a factor 4 in 4 seconds.
// Will only happen if post-decode VAD is disabled and `sample_energy` is
// not low enough. Increase the threshold for update so that it increases
// by a factor 4 in 4 seconds.
IncrementEnergyThreshold(channel_ix, sample_energy);
}
}

View file

@ -39,9 +39,9 @@ class BackgroundNoise {
void Reset();
// Updates the parameter estimates based on the signal currently in the
// `sync_buffer`.
// `sync_buffer`, and on the latest decision in `vad` if it is running.
// Returns true if the filter parameters are updated.
bool Update(const AudioMultiVector& sync_buffer);
bool Update(const AudioMultiVector& sync_buffer, const PostDecodeVad& vad);
// Generates background noise given a random vector and writes the output to
// `buffer`.

View file

@ -36,6 +36,7 @@
#include "modules/audio_coding/neteq/normal.h"
#include "modules/audio_coding/neteq/packet.h"
#include "modules/audio_coding/neteq/packet_buffer.h"
#include "modules/audio_coding/neteq/post_decode_vad.h"
#include "modules/audio_coding/neteq/preemptive_expand.h"
#include "modules/audio_coding/neteq/red_payload_splitter.h"
#include "modules/audio_coding/neteq/statistics_calculator.h"
@ -69,26 +70,49 @@ std::unique_ptr<NetEqController> CreateNetEqController(
return controller_factory.CreateNetEqController(config);
}
AudioFrame::SpeechType ToSpeechType(NetEqImpl::OutputType type) {
void SetAudioFrameActivityAndType(bool vad_enabled,
NetEqImpl::OutputType type,
AudioFrame::VADActivity last_vad_activity,
AudioFrame* audio_frame) {
switch (type) {
case NetEqImpl::OutputType::kNormalSpeech: {
return AudioFrame::kNormalSpeech;
audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
audio_frame->vad_activity_ = AudioFrame::kVadActive;
break;
}
case NetEqImpl::OutputType::kVadPassive: {
// This should only be reached if the VAD is enabled.
RTC_DCHECK(vad_enabled);
audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
audio_frame->vad_activity_ = AudioFrame::kVadPassive;
break;
}
case NetEqImpl::OutputType::kCNG: {
return AudioFrame::kCNG;
audio_frame->speech_type_ = AudioFrame::kCNG;
audio_frame->vad_activity_ = AudioFrame::kVadPassive;
break;
}
case NetEqImpl::OutputType::kPLC: {
return AudioFrame::kPLC;
audio_frame->speech_type_ = AudioFrame::kPLC;
audio_frame->vad_activity_ = last_vad_activity;
break;
}
case NetEqImpl::OutputType::kPLCCNG: {
return AudioFrame::kPLCCNG;
audio_frame->speech_type_ = AudioFrame::kPLCCNG;
audio_frame->vad_activity_ = AudioFrame::kVadPassive;
break;
}
case NetEqImpl::OutputType::kCodecPLC: {
return AudioFrame::kCodecPLC;
audio_frame->speech_type_ = AudioFrame::kCodecPLC;
audio_frame->vad_activity_ = last_vad_activity;
break;
}
default:
RTC_DCHECK_NOTREACHED();
return AudioFrame::kUndefined;
}
if (!vad_enabled) {
// Always set kVadUnknown when receive VAD is inactive.
audio_frame->vad_activity_ = AudioFrame::kVadUnknown;
}
}
@ -145,6 +169,7 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config,
packet_buffer_(std::move(deps.packet_buffer)),
red_payload_splitter_(std::move(deps.red_payload_splitter)),
timestamp_scaler_(std::move(deps.timestamp_scaler)),
vad_(new PostDecodeVad()),
expand_factory_(std::move(deps.expand_factory)),
accelerate_factory_(std::move(deps.accelerate_factory)),
preemptive_expand_factory_(std::move(deps.preemptive_expand_factory)),
@ -186,6 +211,10 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config,
if (create_components) {
SetSampleRateAndChannels(fs, 1); // Default is 1 channel.
}
RTC_DCHECK(!vad_->enabled());
if (config.enable_post_decode_vad) {
vad_->Enable();
}
}
NetEqImpl::~NetEqImpl() = default;
@ -223,7 +252,9 @@ int NetEqImpl::GetAudio(AudioFrame* audio_frame,
audio_frame->sample_rate_hz_,
rtc::dchecked_cast<int>(audio_frame->samples_per_channel_ * 100));
RTC_DCHECK_EQ(*muted, audio_frame->muted());
audio_frame->speech_type_ = ToSpeechType(LastOutputType());
SetAudioFrameActivityAndType(vad_->enabled(), LastOutputType(),
last_vad_activity_, audio_frame);
last_vad_activity_ = audio_frame->vad_activity_;
last_output_sample_rate_hz_ = audio_frame->sample_rate_hz_;
RTC_DCHECK(last_output_sample_rate_hz_ == 8000 ||
last_output_sample_rate_hz_ == 16000 ||
@ -367,6 +398,18 @@ NetEqOperationsAndState NetEqImpl::GetOperationsAndState() const {
return result;
}
void NetEqImpl::EnableVad() {
MutexLock lock(&mutex_);
RTC_DCHECK(vad_.get());
vad_->Enable();
}
void NetEqImpl::DisableVad() {
MutexLock lock(&mutex_);
RTC_DCHECK(vad_.get());
vad_->Disable();
}
absl::optional<uint32_t> NetEqImpl::GetPlayoutTimestamp() const {
MutexLock lock(&mutex_);
if (first_packet_ || last_mode_ == Mode::kRfc3389Cng ||
@ -815,8 +858,11 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame,
last_decoded_type_ = speech_type;
}
RTC_DCHECK(vad_.get());
bool sid_frame_available =
(operation == Operation::kRfc3389Cng && !packet_list.empty());
vad_->Update(decoded_buffer_.get(), static_cast<size_t>(length), speech_type,
sid_frame_available, fs_hz_);
// This is the criterion that we did decode some data through the speech
// decoder, and the operation resulted in comfort noise.
@ -966,7 +1012,7 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame,
(last_mode_ == Mode::kPreemptiveExpandFail) ||
(last_mode_ == Mode::kRfc3389Cng) ||
(last_mode_ == Mode::kCodecInternalCng)) {
background_noise_->Update(*sync_buffer_);
background_noise_->Update(*sync_buffer_, *vad_.get());
}
if (operation == Operation::kDtmf) {
@ -2042,6 +2088,10 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) {
if (cng_decoder)
cng_decoder->Reset();
// Reinit post-decode VAD with new sample rate.
RTC_DCHECK(vad_.get()); // Cannot be NULL here.
vad_->Init();
// Delete algorithm buffer and create a new one.
algorithm_buffer_.reset(new AudioMultiVector(channels));
@ -2082,6 +2132,7 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) {
}
NetEqImpl::OutputType NetEqImpl::LastOutputType() {
RTC_DCHECK(vad_.get());
RTC_DCHECK(expand_.get());
if (last_mode_ == Mode::kCodecInternalCng ||
last_mode_ == Mode::kRfc3389Cng) {
@ -2091,6 +2142,8 @@ NetEqImpl::OutputType NetEqImpl::LastOutputType() {
return OutputType::kPLCCNG;
} else if (last_mode_ == Mode::kExpand) {
return OutputType::kPLC;
} else if (vad_->running() && !vad_->active_speech()) {
return OutputType::kVadPassive;
} else if (last_mode_ == Mode::kCodecPlc) {
return OutputType::kCodecPLC;
} else {

View file

@ -48,6 +48,7 @@ class Merge;
class NackTracker;
class Normal;
class RedPayloadSplitter;
class PostDecodeVad;
class PreemptiveExpand;
class RandomVector;
class SyncBuffer;
@ -170,6 +171,13 @@ class NetEqImpl : public webrtc::NetEq {
NetEqOperationsAndState GetOperationsAndState() const override;
// Enables post-decode VAD. When enabled, GetAudio() will return
// kOutputVADPassive when the signal contains no speech.
void EnableVad() override;
// Disables post-decode VAD.
void DisableVad() override;
absl::optional<uint32_t> GetPlayoutTimestamp() const override;
int last_output_sample_rate_hz() const override;
@ -348,6 +356,7 @@ class NetEqImpl : public webrtc::NetEq {
RTC_GUARDED_BY(mutex_);
const std::unique_ptr<TimestampScaler> timestamp_scaler_
RTC_GUARDED_BY(mutex_);
const std::unique_ptr<PostDecodeVad> vad_ RTC_GUARDED_BY(mutex_);
const std::unique_ptr<ExpandFactory> expand_factory_ RTC_GUARDED_BY(mutex_);
const std::unique_ptr<AccelerateFactory> accelerate_factory_
RTC_GUARDED_BY(mutex_);
@ -388,6 +397,8 @@ class NetEqImpl : public webrtc::NetEq {
std::unique_ptr<NackTracker> nack_ RTC_GUARDED_BY(mutex_);
bool nack_enabled_ RTC_GUARDED_BY(mutex_);
const bool enable_muted_state_ RTC_GUARDED_BY(mutex_);
AudioFrame::VADActivity last_vad_activity_ RTC_GUARDED_BY(mutex_) =
AudioFrame::kVadPassive;
std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_
RTC_GUARDED_BY(mutex_);
std::vector<RtpPacketInfo> last_decoded_packet_infos_ RTC_GUARDED_BY(mutex_);

View file

@ -0,0 +1,90 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/post_decode_vad.h"
namespace webrtc {
PostDecodeVad::~PostDecodeVad() {
if (vad_instance_)
WebRtcVad_Free(vad_instance_);
}
void PostDecodeVad::Enable() {
if (!vad_instance_) {
// Create the instance.
vad_instance_ = WebRtcVad_Create();
if (vad_instance_ == nullptr) {
// Failed to create instance.
Disable();
return;
}
}
Init();
enabled_ = true;
}
void PostDecodeVad::Disable() {
enabled_ = false;
running_ = false;
}
void PostDecodeVad::Init() {
running_ = false;
if (vad_instance_) {
WebRtcVad_Init(vad_instance_);
WebRtcVad_set_mode(vad_instance_, kVadMode);
running_ = true;
}
}
void PostDecodeVad::Update(int16_t* signal,
size_t length,
AudioDecoder::SpeechType speech_type,
bool sid_frame,
int fs_hz) {
if (!vad_instance_ || !enabled_) {
return;
}
if (speech_type == AudioDecoder::kComfortNoise || sid_frame ||
fs_hz > 16000) {
// TODO(hlundin): Remove restriction on fs_hz.
running_ = false;
active_speech_ = true;
sid_interval_counter_ = 0;
} else if (!running_) {
++sid_interval_counter_;
}
if (sid_interval_counter_ >= kVadAutoEnable) {
Init();
}
if (length > 0 && running_) {
size_t vad_sample_index = 0;
active_speech_ = false;
// Loop through frame sizes 30, 20, and 10 ms.
for (int vad_frame_size_ms = 30; vad_frame_size_ms >= 10;
vad_frame_size_ms -= 10) {
size_t vad_frame_size_samples =
static_cast<size_t>(vad_frame_size_ms * fs_hz / 1000);
while (length - vad_sample_index >= vad_frame_size_samples) {
int vad_return =
WebRtcVad_Process(vad_instance_, fs_hz, &signal[vad_sample_index],
vad_frame_size_samples);
active_speech_ |= (vad_return == 1);
vad_sample_index += vad_frame_size_samples;
}
}
}
}
} // namespace webrtc

View file

@ -0,0 +1,71 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_
#define MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_
#include <stddef.h>
#include <stdint.h>
#include "api/audio_codecs/audio_decoder.h"
#include "common_audio/vad/include/webrtc_vad.h"
namespace webrtc {
class PostDecodeVad {
public:
PostDecodeVad()
: enabled_(false),
running_(false),
active_speech_(true),
sid_interval_counter_(0),
vad_instance_(NULL) {}
virtual ~PostDecodeVad();
PostDecodeVad(const PostDecodeVad&) = delete;
PostDecodeVad& operator=(const PostDecodeVad&) = delete;
// Enables post-decode VAD.
void Enable();
// Disables post-decode VAD.
void Disable();
// Initializes post-decode VAD.
void Init();
// Updates post-decode VAD with the audio data in `signal` having `length`
// samples. The data is of type `speech_type`, at the sample rate `fs_hz`.
void Update(int16_t* signal,
size_t length,
AudioDecoder::SpeechType speech_type,
bool sid_frame,
int fs_hz);
// Accessors.
bool enabled() const { return enabled_; }
bool running() const { return running_; }
bool active_speech() const { return active_speech_; }
private:
static const int kVadMode = 0; // Sets aggressiveness to "Normal".
// Number of Update() calls without CNG/SID before re-enabling VAD.
static const int kVadAutoEnable = 3000;
bool enabled_;
bool running_;
bool active_speech_;
int sid_interval_counter_;
::VadInst* vad_instance_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_

View file

@ -0,0 +1,25 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for PostDecodeVad class.
#include "modules/audio_coding/neteq/post_decode_vad.h"
#include "test/gtest.h"
namespace webrtc {
TEST(PostDecodeVad, CreateAndDestroy) {
PostDecodeVad vad;
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View file

@ -179,6 +179,7 @@ void FuzzOneInputTest(const uint8_t* data, size_t size) {
// Configure NetEq and the NetEqTest object.
NetEqTest::Callbacks callbacks;
NetEq::Config config;
config.enable_post_decode_vad = true;
config.enable_fast_accelerate = true;
auto codecs = NetEqTest::StandardDecoderMap();
// rate_types contains the payload types that will be used for encoding.