Reland "Remove post-decode VAD"

This is a reland of commit 89cf26f1e0

Original change's description:
> Remove post-decode VAD
>
> Bug: webrtc:15806
> Change-Id: I6acf8734a70703085cfc1ccf82a79ee0931f59a4
> Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/336460
> Reviewed-by: Sam Zackrisson <saza@webrtc.org>
> Commit-Queue: Tomas Lundqvist <tomasl@google.com>
> Reviewed-by: Jakob Ivarsson‎ <jakobi@webrtc.org>
> Cr-Commit-Position: refs/heads/main@{#41653}

Bug: webrtc:15806
Change-Id: I1c2c0ce568c3c1817ff5c65bee91b9f961d46559
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/337442
Reviewed-by: Jakob Ivarsson‎ <jakobi@webrtc.org>
Commit-Queue: Tomas Lundqvist <tomasl@google.com>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#41688}
This commit is contained in:
Tomas Lundqvist 2024-02-07 15:17:39 +00:00 committed by WebRTC LUCI CQ
parent 39ac25d6ec
commit aaa123debb
13 changed files with 34 additions and 370 deletions

View file

@ -24,8 +24,7 @@ NetEq::Config& NetEq::Config::operator=(Config&&) = default;
std::string NetEq::Config::ToString() const { std::string NetEq::Config::ToString() const {
char buf[1024]; char buf[1024];
rtc::SimpleStringBuilder ss(buf); rtc::SimpleStringBuilder ss(buf);
ss << "sample_rate_hz=" << sample_rate_hz << ", enable_post_decode_vad=" ss << "sample_rate_hz=" << sample_rate_hz
<< (enable_post_decode_vad ? "true" : "false")
<< ", max_packets_in_buffer=" << max_packets_in_buffer << ", max_packets_in_buffer=" << max_packets_in_buffer
<< ", min_delay_ms=" << min_delay_ms << ", enable_fast_accelerate=" << ", min_delay_ms=" << min_delay_ms << ", enable_fast_accelerate="
<< (enable_fast_accelerate ? "true" : "false") << (enable_fast_accelerate ? "true" : "false")

View file

@ -130,7 +130,6 @@ class NetEq {
std::string ToString() const; std::string ToString() const;
int sample_rate_hz = 48000; // Initial value. Will change with input data. int sample_rate_hz = 48000; // Initial value. Will change with input data.
bool enable_post_decode_vad = false;
size_t max_packets_in_buffer = 200; size_t max_packets_in_buffer = 200;
int max_delay_ms = 0; int max_delay_ms = 0;
int min_delay_ms = 0; int min_delay_ms = 0;
@ -197,18 +196,17 @@ class NetEq {
// Instructs NetEq to deliver 10 ms of audio data. The data is written to // Instructs NetEq to deliver 10 ms of audio data. The data is written to
// `audio_frame`. All data in `audio_frame` is wiped; `data_`, `speech_type_`, // `audio_frame`. All data in `audio_frame` is wiped; `data_`, `speech_type_`,
// `num_channels_`, `sample_rate_hz_`, `samples_per_channel_`, and // `num_channels_`, `sample_rate_hz_` and `samples_per_channel_` are updated
// `vad_activity_` are updated upon success. If an error is returned, some // upon success. If an error is returned, some fields may not have been
// fields may not have been updated, or may contain inconsistent values. // updated, or may contain inconsistent values. If muted state is enabled
// If muted state is enabled (through Config::enable_muted_state), `muted` // (through Config::enable_muted_state), `muted` may be set to true after a
// may be set to true after a prolonged expand period. When this happens, the // prolonged expand period. When this happens, the `data_` in `audio_frame`
// `data_` in `audio_frame` is not written, but should be interpreted as being // is not written, but should be interpreted as being all zeros. For testing
// all zeros. For testing purposes, an override can be supplied in the // purposes, an override can be supplied in the `action_override` argument,
// `action_override` argument, which will cause NetEq to take this action // which will cause NetEq to take this action next, instead of the action it
// next, instead of the action it would normally choose. An optional output // would normally choose. An optional output argument for fetching the current
// argument for fetching the current sample rate can be provided, which // sample rate can be provided, which will return the same value as
// will return the same value as last_output_sample_rate_hz() but will avoid // last_output_sample_rate_hz() but will avoid additional synchronization.
// additional synchronization.
// Returns kOK on success, or kFail in case of an error. // Returns kOK on success, or kFail in case of an error.
virtual int GetAudio( virtual int GetAudio(
AudioFrame* audio_frame, AudioFrame* audio_frame,
@ -278,13 +276,6 @@ class NetEq {
// statistics are never reset. // statistics are never reset.
virtual NetEqOperationsAndState GetOperationsAndState() const = 0; virtual NetEqOperationsAndState GetOperationsAndState() const = 0;
// Enables post-decode VAD. When enabled, GetAudio() will return
// kOutputVADPassive when the signal contains no speech.
virtual void EnableVad() {}
// Disables post-decode VAD.
virtual void DisableVad() {}
// Returns the RTP timestamp for the last sample delivered by GetAudio(). // Returns the RTP timestamp for the last sample delivered by GetAudio().
// The return value will be empty if no valid timestamp is available. // The return value will be empty if no valid timestamp is available.
virtual absl::optional<uint32_t> GetPlayoutTimestamp() const = 0; virtual absl::optional<uint32_t> GetPlayoutTimestamp() const = 0;

View file

@ -689,8 +689,6 @@ rtc_library("neteq") {
"neteq/packet_arrival_history.h", "neteq/packet_arrival_history.h",
"neteq/packet_buffer.cc", "neteq/packet_buffer.cc",
"neteq/packet_buffer.h", "neteq/packet_buffer.h",
"neteq/post_decode_vad.cc",
"neteq/post_decode_vad.h",
"neteq/preemptive_expand.cc", "neteq/preemptive_expand.cc",
"neteq/preemptive_expand.h", "neteq/preemptive_expand.h",
"neteq/random_vector.cc", "neteq/random_vector.cc",
@ -1655,7 +1653,6 @@ if (rtc_include_tests) {
"neteq/normal_unittest.cc", "neteq/normal_unittest.cc",
"neteq/packet_arrival_history_unittest.cc", "neteq/packet_arrival_history_unittest.cc",
"neteq/packet_buffer_unittest.cc", "neteq/packet_buffer_unittest.cc",
"neteq/post_decode_vad_unittest.cc",
"neteq/random_vector_unittest.cc", "neteq/random_vector_unittest.cc",
"neteq/red_payload_splitter_unittest.cc", "neteq/red_payload_splitter_unittest.cc",
"neteq/reorder_optimizer_unittest.cc", "neteq/reorder_optimizer_unittest.cc",

View file

@ -50,11 +50,7 @@ std::unique_ptr<NetEq> CreateNetEq(
AcmReceiver::Config::Config( AcmReceiver::Config::Config(
rtc::scoped_refptr<AudioDecoderFactory> decoder_factory) rtc::scoped_refptr<AudioDecoderFactory> decoder_factory)
: clock(*Clock::GetRealTimeClock()), decoder_factory(decoder_factory) { : clock(*Clock::GetRealTimeClock()), decoder_factory(decoder_factory) {}
// Post-decode VAD is disabled by default in NetEq, however, Audio
// Conference Mixer relies on VAD decisions and fails without them.
neteq_config.enable_post_decode_vad = true;
}
AcmReceiver::Config::Config(const Config&) = default; AcmReceiver::Config::Config(const Config&) = default;
AcmReceiver::Config::~Config() = default; AcmReceiver::Config::~Config() = default;

View file

@ -190,9 +190,6 @@ class AcmReceiverTestFaxModeOldApi : public AcmReceiverTestOldApi {
const size_t output_channels = info.num_channels; const size_t output_channels = info.num_channels;
const size_t samples_per_ms = rtc::checked_cast<size_t>( const size_t samples_per_ms = rtc::checked_cast<size_t>(
rtc::CheckedDivExact(output_sample_rate_hz, 1000)); rtc::CheckedDivExact(output_sample_rate_hz, 1000));
const AudioFrame::VADActivity expected_vad_activity =
output_sample_rate_hz > 16000 ? AudioFrame::kVadActive
: AudioFrame::kVadPassive;
// Expect the first output timestamp to be 5*fs/8000 samples before the // Expect the first output timestamp to be 5*fs/8000 samples before the
// first inserted timestamp (because of NetEq's look-ahead). (This value is // first inserted timestamp (because of NetEq's look-ahead). (This value is
@ -217,7 +214,6 @@ class AcmReceiverTestFaxModeOldApi : public AcmReceiverTestOldApi {
EXPECT_EQ(output_sample_rate_hz, frame.sample_rate_hz_); EXPECT_EQ(output_sample_rate_hz, frame.sample_rate_hz_);
EXPECT_EQ(output_channels, frame.num_channels_); EXPECT_EQ(output_channels, frame.num_channels_);
EXPECT_EQ(AudioFrame::kNormalSpeech, frame.speech_type_); EXPECT_EQ(AudioFrame::kNormalSpeech, frame.speech_type_);
EXPECT_EQ(expected_vad_activity, frame.vad_activity_);
EXPECT_FALSE(muted); EXPECT_FALSE(muted);
} }
} }
@ -242,61 +238,6 @@ TEST_F(AcmReceiverTestFaxModeOldApi, MAYBE_VerifyAudioFrameOpus) {
RunVerifyAudioFrame({"opus", 48000, 2}); RunVerifyAudioFrame({"opus", 48000, 2});
} }
#if defined(WEBRTC_ANDROID)
#define MAYBE_PostdecodingVad DISABLED_PostdecodingVad
#else
#define MAYBE_PostdecodingVad PostdecodingVad
#endif
TEST_F(AcmReceiverTestOldApi, MAYBE_PostdecodingVad) {
EXPECT_TRUE(config_.neteq_config.enable_post_decode_vad);
constexpr int payload_type = 34;
const SdpAudioFormat codec = {"L16", 16000, 1};
const AudioCodecInfo info = SetEncoder(payload_type, codec);
receiver_->SetCodecs({{payload_type, codec}});
constexpr int kNumPackets = 5;
AudioFrame frame;
for (int n = 0; n < kNumPackets; ++n) {
const int num_10ms_frames = InsertOnePacketOfSilence(info);
for (int k = 0; k < num_10ms_frames; ++k) {
bool muted;
ASSERT_EQ(0, receiver_->GetAudio(info.sample_rate_hz, &frame, &muted));
}
}
EXPECT_EQ(AudioFrame::kVadPassive, frame.vad_activity_);
}
class AcmReceiverTestPostDecodeVadPassiveOldApi : public AcmReceiverTestOldApi {
protected:
AcmReceiverTestPostDecodeVadPassiveOldApi() {
config_.neteq_config.enable_post_decode_vad = false;
}
};
#if defined(WEBRTC_ANDROID)
#define MAYBE_PostdecodingVad DISABLED_PostdecodingVad
#else
#define MAYBE_PostdecodingVad PostdecodingVad
#endif
TEST_F(AcmReceiverTestPostDecodeVadPassiveOldApi, MAYBE_PostdecodingVad) {
EXPECT_FALSE(config_.neteq_config.enable_post_decode_vad);
constexpr int payload_type = 34;
const SdpAudioFormat codec = {"L16", 16000, 1};
const AudioCodecInfo info = SetEncoder(payload_type, codec);
auto const value = encoder_factory_->QueryAudioEncoder(codec);
ASSERT_TRUE(value.has_value());
receiver_->SetCodecs({{payload_type, codec}});
const int kNumPackets = 5;
AudioFrame frame;
for (int n = 0; n < kNumPackets; ++n) {
const int num_10ms_frames = InsertOnePacketOfSilence(info);
for (int k = 0; k < num_10ms_frames; ++k) {
bool muted;
ASSERT_EQ(0, receiver_->GetAudio(info.sample_rate_hz, &frame, &muted));
}
}
EXPECT_EQ(AudioFrame::kVadUnknown, frame.vad_activity_);
}
#if defined(WEBRTC_ANDROID) #if defined(WEBRTC_ANDROID)
#define MAYBE_LastAudioCodec DISABLED_LastAudioCodec #define MAYBE_LastAudioCodec DISABLED_LastAudioCodec
#else #else

View file

@ -17,7 +17,6 @@
#include "common_audio/signal_processing/include/signal_processing_library.h" #include "common_audio/signal_processing/include/signal_processing_library.h"
#include "modules/audio_coding/neteq/audio_multi_vector.h" #include "modules/audio_coding/neteq/audio_multi_vector.h"
#include "modules/audio_coding/neteq/cross_correlation.h" #include "modules/audio_coding/neteq/cross_correlation.h"
#include "modules/audio_coding/neteq/post_decode_vad.h"
namespace webrtc { namespace webrtc {
namespace { namespace {
@ -44,17 +43,11 @@ void BackgroundNoise::Reset() {
} }
} }
bool BackgroundNoise::Update(const AudioMultiVector& input, bool BackgroundNoise::Update(const AudioMultiVector& sync_buffer) {
const PostDecodeVad& vad) {
bool filter_params_saved = false; bool filter_params_saved = false;
if (vad.running() && vad.active_speech()) {
// Do not update the background noise parameters if we know that the signal
// is active speech.
return filter_params_saved;
}
int32_t auto_correlation[kMaxLpcOrder + 1]; int32_t auto_correlation[kMaxLpcOrder + 1];
int16_t fiter_output[kMaxLpcOrder + kResidualLength]; int16_t filter_output[kMaxLpcOrder + kResidualLength];
int16_t reflection_coefficients[kMaxLpcOrder]; int16_t reflection_coefficients[kMaxLpcOrder];
int16_t lpc_coefficients[kMaxLpcOrder + 1]; int16_t lpc_coefficients[kMaxLpcOrder + 1];
@ -62,14 +55,13 @@ bool BackgroundNoise::Update(const AudioMultiVector& input,
ChannelParameters& parameters = channel_parameters_[channel_ix]; ChannelParameters& parameters = channel_parameters_[channel_ix];
int16_t temp_signal_array[kVecLen + kMaxLpcOrder] = {0}; int16_t temp_signal_array[kVecLen + kMaxLpcOrder] = {0};
int16_t* temp_signal = &temp_signal_array[kMaxLpcOrder]; int16_t* temp_signal = &temp_signal_array[kMaxLpcOrder];
RTC_DCHECK_GE(input.Size(), kVecLen); RTC_DCHECK_GE(sync_buffer.Size(), kVecLen);
input[channel_ix].CopyTo(kVecLen, input.Size() - kVecLen, temp_signal); sync_buffer[channel_ix].CopyTo(kVecLen, sync_buffer.Size() - kVecLen,
temp_signal);
int32_t sample_energy = int32_t sample_energy =
CalculateAutoCorrelation(temp_signal, kVecLen, auto_correlation); CalculateAutoCorrelation(temp_signal, kVecLen, auto_correlation);
if ((!vad.running() && if (sample_energy < parameters.energy_update_threshold) {
sample_energy < parameters.energy_update_threshold) ||
(vad.running() && !vad.active_speech())) {
// Generate LPC coefficients. // Generate LPC coefficients.
if (auto_correlation[0] <= 0) { if (auto_correlation[0] <= 0) {
// Center value in auto-correlation is not positive. Do not update. // Center value in auto-correlation is not positive. Do not update.
@ -95,10 +87,10 @@ bool BackgroundNoise::Update(const AudioMultiVector& input,
// Generate the CNG gain factor by looking at the energy of the residual. // Generate the CNG gain factor by looking at the energy of the residual.
WebRtcSpl_FilterMAFastQ12(temp_signal + kVecLen - kResidualLength, WebRtcSpl_FilterMAFastQ12(temp_signal + kVecLen - kResidualLength,
fiter_output, lpc_coefficients, filter_output, lpc_coefficients,
kMaxLpcOrder + 1, kResidualLength); kMaxLpcOrder + 1, kResidualLength);
int32_t residual_energy = WebRtcSpl_DotProductWithScale( int32_t residual_energy = WebRtcSpl_DotProductWithScale(
fiter_output, fiter_output, kResidualLength, 0); filter_output, filter_output, kResidualLength, 0);
// Check spectral flatness. // Check spectral flatness.
// Comparing the residual variance with the input signal variance tells // Comparing the residual variance with the input signal variance tells
@ -117,9 +109,8 @@ bool BackgroundNoise::Update(const AudioMultiVector& input,
filter_params_saved = true; filter_params_saved = true;
} }
} else { } else {
// Will only happen if post-decode VAD is disabled and `sample_energy` is // Will only happen if `sample_energy` is not low enough. Increase the
// not low enough. Increase the threshold for update so that it increases // threshold for update so that it increases by a factor 4 in 4 seconds.
// by a factor 4 in 4 seconds.
IncrementEnergyThreshold(channel_ix, sample_energy); IncrementEnergyThreshold(channel_ix, sample_energy);
} }
} }

View file

@ -39,9 +39,9 @@ class BackgroundNoise {
void Reset(); void Reset();
// Updates the parameter estimates based on the signal currently in the // Updates the parameter estimates based on the signal currently in the
// `sync_buffer`, and on the latest decision in `vad` if it is running. // `sync_buffer`.
// Returns true if the filter parameters are updated. // Returns true if the filter parameters are updated.
bool Update(const AudioMultiVector& sync_buffer, const PostDecodeVad& vad); bool Update(const AudioMultiVector& sync_buffer);
// Generates background noise given a random vector and writes the output to // Generates background noise given a random vector and writes the output to
// `buffer`. // `buffer`.

View file

@ -37,7 +37,6 @@
#include "modules/audio_coding/neteq/normal.h" #include "modules/audio_coding/neteq/normal.h"
#include "modules/audio_coding/neteq/packet.h" #include "modules/audio_coding/neteq/packet.h"
#include "modules/audio_coding/neteq/packet_buffer.h" #include "modules/audio_coding/neteq/packet_buffer.h"
#include "modules/audio_coding/neteq/post_decode_vad.h"
#include "modules/audio_coding/neteq/preemptive_expand.h" #include "modules/audio_coding/neteq/preemptive_expand.h"
#include "modules/audio_coding/neteq/red_payload_splitter.h" #include "modules/audio_coding/neteq/red_payload_splitter.h"
#include "modules/audio_coding/neteq/statistics_calculator.h" #include "modules/audio_coding/neteq/statistics_calculator.h"
@ -72,49 +71,26 @@ std::unique_ptr<NetEqController> CreateNetEqController(
return controller_factory.CreateNetEqController(config); return controller_factory.CreateNetEqController(config);
} }
void SetAudioFrameActivityAndType(bool vad_enabled, AudioFrame::SpeechType ToSpeechType(NetEqImpl::OutputType type) {
NetEqImpl::OutputType type,
AudioFrame::VADActivity last_vad_activity,
AudioFrame* audio_frame) {
switch (type) { switch (type) {
case NetEqImpl::OutputType::kNormalSpeech: { case NetEqImpl::OutputType::kNormalSpeech: {
audio_frame->speech_type_ = AudioFrame::kNormalSpeech; return AudioFrame::kNormalSpeech;
audio_frame->vad_activity_ = AudioFrame::kVadActive;
break;
}
case NetEqImpl::OutputType::kVadPassive: {
// This should only be reached if the VAD is enabled.
RTC_DCHECK(vad_enabled);
audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
audio_frame->vad_activity_ = AudioFrame::kVadPassive;
break;
} }
case NetEqImpl::OutputType::kCNG: { case NetEqImpl::OutputType::kCNG: {
audio_frame->speech_type_ = AudioFrame::kCNG; return AudioFrame::kCNG;
audio_frame->vad_activity_ = AudioFrame::kVadPassive;
break;
} }
case NetEqImpl::OutputType::kPLC: { case NetEqImpl::OutputType::kPLC: {
audio_frame->speech_type_ = AudioFrame::kPLC; return AudioFrame::kPLC;
audio_frame->vad_activity_ = last_vad_activity;
break;
} }
case NetEqImpl::OutputType::kPLCCNG: { case NetEqImpl::OutputType::kPLCCNG: {
audio_frame->speech_type_ = AudioFrame::kPLCCNG; return AudioFrame::kPLCCNG;
audio_frame->vad_activity_ = AudioFrame::kVadPassive;
break;
} }
case NetEqImpl::OutputType::kCodecPLC: { case NetEqImpl::OutputType::kCodecPLC: {
audio_frame->speech_type_ = AudioFrame::kCodecPLC; return AudioFrame::kCodecPLC;
audio_frame->vad_activity_ = last_vad_activity;
break;
} }
default: default:
RTC_DCHECK_NOTREACHED(); RTC_DCHECK_NOTREACHED();
} return AudioFrame::kUndefined;
if (!vad_enabled) {
// Always set kVadUnknown when receive VAD is inactive.
audio_frame->vad_activity_ = AudioFrame::kVadUnknown;
} }
} }
@ -171,7 +147,6 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config,
packet_buffer_(std::move(deps.packet_buffer)), packet_buffer_(std::move(deps.packet_buffer)),
red_payload_splitter_(std::move(deps.red_payload_splitter)), red_payload_splitter_(std::move(deps.red_payload_splitter)),
timestamp_scaler_(std::move(deps.timestamp_scaler)), timestamp_scaler_(std::move(deps.timestamp_scaler)),
vad_(new PostDecodeVad()),
expand_factory_(std::move(deps.expand_factory)), expand_factory_(std::move(deps.expand_factory)),
accelerate_factory_(std::move(deps.accelerate_factory)), accelerate_factory_(std::move(deps.accelerate_factory)),
preemptive_expand_factory_(std::move(deps.preemptive_expand_factory)), preemptive_expand_factory_(std::move(deps.preemptive_expand_factory)),
@ -215,10 +190,6 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config,
if (create_components) { if (create_components) {
SetSampleRateAndChannels(fs, 1); // Default is 1 channel. SetSampleRateAndChannels(fs, 1); // Default is 1 channel.
} }
RTC_DCHECK(!vad_->enabled());
if (config.enable_post_decode_vad) {
vad_->Enable();
}
} }
NetEqImpl::~NetEqImpl() = default; NetEqImpl::~NetEqImpl() = default;
@ -256,9 +227,7 @@ int NetEqImpl::GetAudio(AudioFrame* audio_frame,
audio_frame->sample_rate_hz_, audio_frame->sample_rate_hz_,
rtc::dchecked_cast<int>(audio_frame->samples_per_channel_ * 100)); rtc::dchecked_cast<int>(audio_frame->samples_per_channel_ * 100));
RTC_DCHECK_EQ(*muted, audio_frame->muted()); RTC_DCHECK_EQ(*muted, audio_frame->muted());
SetAudioFrameActivityAndType(vad_->enabled(), LastOutputType(), audio_frame->speech_type_ = ToSpeechType(LastOutputType());
last_vad_activity_, audio_frame);
last_vad_activity_ = audio_frame->vad_activity_;
last_output_sample_rate_hz_ = audio_frame->sample_rate_hz_; last_output_sample_rate_hz_ = audio_frame->sample_rate_hz_;
RTC_DCHECK(last_output_sample_rate_hz_ == 8000 || RTC_DCHECK(last_output_sample_rate_hz_ == 8000 ||
last_output_sample_rate_hz_ == 16000 || last_output_sample_rate_hz_ == 16000 ||
@ -402,18 +371,6 @@ NetEqOperationsAndState NetEqImpl::GetOperationsAndState() const {
return result; return result;
} }
void NetEqImpl::EnableVad() {
MutexLock lock(&mutex_);
RTC_DCHECK(vad_.get());
vad_->Enable();
}
void NetEqImpl::DisableVad() {
MutexLock lock(&mutex_);
RTC_DCHECK(vad_.get());
vad_->Disable();
}
absl::optional<uint32_t> NetEqImpl::GetPlayoutTimestamp() const { absl::optional<uint32_t> NetEqImpl::GetPlayoutTimestamp() const {
MutexLock lock(&mutex_); MutexLock lock(&mutex_);
if (first_packet_ || last_mode_ == Mode::kRfc3389Cng || if (first_packet_ || last_mode_ == Mode::kRfc3389Cng ||
@ -874,11 +831,8 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame,
last_decoded_type_ = speech_type; last_decoded_type_ = speech_type;
} }
RTC_DCHECK(vad_.get());
bool sid_frame_available = bool sid_frame_available =
(operation == Operation::kRfc3389Cng && !packet_list.empty()); (operation == Operation::kRfc3389Cng && !packet_list.empty());
vad_->Update(decoded_buffer_.get(), static_cast<size_t>(length), speech_type,
sid_frame_available, fs_hz_);
// This is the criterion that we did decode some data through the speech // This is the criterion that we did decode some data through the speech
// decoder, and the operation resulted in comfort noise. // decoder, and the operation resulted in comfort noise.
@ -1028,7 +982,7 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame,
(last_mode_ == Mode::kPreemptiveExpandFail) || (last_mode_ == Mode::kPreemptiveExpandFail) ||
(last_mode_ == Mode::kRfc3389Cng) || (last_mode_ == Mode::kRfc3389Cng) ||
(last_mode_ == Mode::kCodecInternalCng)) { (last_mode_ == Mode::kCodecInternalCng)) {
background_noise_->Update(*sync_buffer_, *vad_.get()); background_noise_->Update(*sync_buffer_);
} }
if (operation == Operation::kDtmf) { if (operation == Operation::kDtmf) {
@ -2104,10 +2058,6 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) {
if (cng_decoder) if (cng_decoder)
cng_decoder->Reset(); cng_decoder->Reset();
// Reinit post-decode VAD with new sample rate.
RTC_DCHECK(vad_.get()); // Cannot be NULL here.
vad_->Init();
// Delete algorithm buffer and create a new one. // Delete algorithm buffer and create a new one.
algorithm_buffer_.reset(new AudioMultiVector(channels)); algorithm_buffer_.reset(new AudioMultiVector(channels));
@ -2148,7 +2098,6 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) {
} }
NetEqImpl::OutputType NetEqImpl::LastOutputType() { NetEqImpl::OutputType NetEqImpl::LastOutputType() {
RTC_DCHECK(vad_.get());
RTC_DCHECK(expand_.get()); RTC_DCHECK(expand_.get());
if (last_mode_ == Mode::kCodecInternalCng || if (last_mode_ == Mode::kCodecInternalCng ||
last_mode_ == Mode::kRfc3389Cng) { last_mode_ == Mode::kRfc3389Cng) {
@ -2158,8 +2107,6 @@ NetEqImpl::OutputType NetEqImpl::LastOutputType() {
return OutputType::kPLCCNG; return OutputType::kPLCCNG;
} else if (last_mode_ == Mode::kExpand) { } else if (last_mode_ == Mode::kExpand) {
return OutputType::kPLC; return OutputType::kPLC;
} else if (vad_->running() && !vad_->active_speech()) {
return OutputType::kVadPassive;
} else if (last_mode_ == Mode::kCodecPlc) { } else if (last_mode_ == Mode::kCodecPlc) {
return OutputType::kCodecPLC; return OutputType::kCodecPLC;
} else { } else {

View file

@ -48,7 +48,6 @@ class Merge;
class NackTracker; class NackTracker;
class Normal; class Normal;
class RedPayloadSplitter; class RedPayloadSplitter;
class PostDecodeVad;
class PreemptiveExpand; class PreemptiveExpand;
class RandomVector; class RandomVector;
class SyncBuffer; class SyncBuffer;
@ -171,13 +170,6 @@ class NetEqImpl : public webrtc::NetEq {
NetEqOperationsAndState GetOperationsAndState() const override; NetEqOperationsAndState GetOperationsAndState() const override;
// Enables post-decode VAD. When enabled, GetAudio() will return
// kOutputVADPassive when the signal contains no speech.
void EnableVad() override;
// Disables post-decode VAD.
void DisableVad() override;
absl::optional<uint32_t> GetPlayoutTimestamp() const override; absl::optional<uint32_t> GetPlayoutTimestamp() const override;
int last_output_sample_rate_hz() const override; int last_output_sample_rate_hz() const override;
@ -359,7 +351,6 @@ class NetEqImpl : public webrtc::NetEq {
RTC_GUARDED_BY(mutex_); RTC_GUARDED_BY(mutex_);
const std::unique_ptr<TimestampScaler> timestamp_scaler_ const std::unique_ptr<TimestampScaler> timestamp_scaler_
RTC_GUARDED_BY(mutex_); RTC_GUARDED_BY(mutex_);
const std::unique_ptr<PostDecodeVad> vad_ RTC_GUARDED_BY(mutex_);
const std::unique_ptr<ExpandFactory> expand_factory_ RTC_GUARDED_BY(mutex_); const std::unique_ptr<ExpandFactory> expand_factory_ RTC_GUARDED_BY(mutex_);
const std::unique_ptr<AccelerateFactory> accelerate_factory_ const std::unique_ptr<AccelerateFactory> accelerate_factory_
RTC_GUARDED_BY(mutex_); RTC_GUARDED_BY(mutex_);
@ -401,8 +392,6 @@ class NetEqImpl : public webrtc::NetEq {
std::unique_ptr<NackTracker> nack_ RTC_GUARDED_BY(mutex_); std::unique_ptr<NackTracker> nack_ RTC_GUARDED_BY(mutex_);
bool nack_enabled_ RTC_GUARDED_BY(mutex_); bool nack_enabled_ RTC_GUARDED_BY(mutex_);
const bool enable_muted_state_ RTC_GUARDED_BY(mutex_); const bool enable_muted_state_ RTC_GUARDED_BY(mutex_);
AudioFrame::VADActivity last_vad_activity_ RTC_GUARDED_BY(mutex_) =
AudioFrame::kVadPassive;
std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_ std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_
RTC_GUARDED_BY(mutex_); RTC_GUARDED_BY(mutex_);
std::vector<RtpPacketInfo> last_decoded_packet_infos_ RTC_GUARDED_BY(mutex_); std::vector<RtpPacketInfo> last_decoded_packet_infos_ RTC_GUARDED_BY(mutex_);

View file

@ -1,90 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/neteq/post_decode_vad.h"
namespace webrtc {
PostDecodeVad::~PostDecodeVad() {
if (vad_instance_)
WebRtcVad_Free(vad_instance_);
}
void PostDecodeVad::Enable() {
if (!vad_instance_) {
// Create the instance.
vad_instance_ = WebRtcVad_Create();
if (vad_instance_ == nullptr) {
// Failed to create instance.
Disable();
return;
}
}
Init();
enabled_ = true;
}
void PostDecodeVad::Disable() {
enabled_ = false;
running_ = false;
}
void PostDecodeVad::Init() {
running_ = false;
if (vad_instance_) {
WebRtcVad_Init(vad_instance_);
WebRtcVad_set_mode(vad_instance_, kVadMode);
running_ = true;
}
}
void PostDecodeVad::Update(int16_t* signal,
size_t length,
AudioDecoder::SpeechType speech_type,
bool sid_frame,
int fs_hz) {
if (!vad_instance_ || !enabled_) {
return;
}
if (speech_type == AudioDecoder::kComfortNoise || sid_frame ||
fs_hz > 16000) {
// TODO(hlundin): Remove restriction on fs_hz.
running_ = false;
active_speech_ = true;
sid_interval_counter_ = 0;
} else if (!running_) {
++sid_interval_counter_;
}
if (sid_interval_counter_ >= kVadAutoEnable) {
Init();
}
if (length > 0 && running_) {
size_t vad_sample_index = 0;
active_speech_ = false;
// Loop through frame sizes 30, 20, and 10 ms.
for (int vad_frame_size_ms = 30; vad_frame_size_ms >= 10;
vad_frame_size_ms -= 10) {
size_t vad_frame_size_samples =
static_cast<size_t>(vad_frame_size_ms * fs_hz / 1000);
while (length - vad_sample_index >= vad_frame_size_samples) {
int vad_return =
WebRtcVad_Process(vad_instance_, fs_hz, &signal[vad_sample_index],
vad_frame_size_samples);
active_speech_ |= (vad_return == 1);
vad_sample_index += vad_frame_size_samples;
}
}
}
}
} // namespace webrtc

View file

@ -1,71 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_
#define MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_
#include <stddef.h>
#include <stdint.h>
#include "api/audio_codecs/audio_decoder.h"
#include "common_audio/vad/include/webrtc_vad.h"
namespace webrtc {
class PostDecodeVad {
public:
PostDecodeVad()
: enabled_(false),
running_(false),
active_speech_(true),
sid_interval_counter_(0),
vad_instance_(NULL) {}
virtual ~PostDecodeVad();
PostDecodeVad(const PostDecodeVad&) = delete;
PostDecodeVad& operator=(const PostDecodeVad&) = delete;
// Enables post-decode VAD.
void Enable();
// Disables post-decode VAD.
void Disable();
// Initializes post-decode VAD.
void Init();
// Updates post-decode VAD with the audio data in `signal` having `length`
// samples. The data is of type `speech_type`, at the sample rate `fs_hz`.
void Update(int16_t* signal,
size_t length,
AudioDecoder::SpeechType speech_type,
bool sid_frame,
int fs_hz);
// Accessors.
bool enabled() const { return enabled_; }
bool running() const { return running_; }
bool active_speech() const { return active_speech_; }
private:
static const int kVadMode = 0; // Sets aggressiveness to "Normal".
// Number of Update() calls without CNG/SID before re-enabling VAD.
static const int kVadAutoEnable = 3000;
bool enabled_;
bool running_;
bool active_speech_;
int sid_interval_counter_;
::VadInst* vad_instance_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_

View file

@ -1,25 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for PostDecodeVad class.
#include "modules/audio_coding/neteq/post_decode_vad.h"
#include "test/gtest.h"
namespace webrtc {
TEST(PostDecodeVad, CreateAndDestroy) {
PostDecodeVad vad;
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View file

@ -179,7 +179,6 @@ void FuzzOneInputTest(const uint8_t* data, size_t size) {
// Configure NetEq and the NetEqTest object. // Configure NetEq and the NetEqTest object.
NetEqTest::Callbacks callbacks; NetEqTest::Callbacks callbacks;
NetEq::Config config; NetEq::Config config;
config.enable_post_decode_vad = true;
config.enable_fast_accelerate = true; config.enable_fast_accelerate = true;
auto codecs = NetEqTest::StandardDecoderMap(); auto codecs = NetEqTest::StandardDecoderMap();
// rate_types contains the payload types that will be used for encoding. // rate_types contains the payload types that will be used for encoding.