webrtc/modules/audio_processing/vad/voice_activity_detector.h
Yves Gerey 988cc0870b [Cleanup] Add missing #include. Remove useless ones.
This CL is the result of running include-what-you-use tool on part
of the code base (audio target and dependencies) plus manual fixes.

bug: webrtc:8311
Change-Id: I277d281ce943c3ecc1bd45fd8d83055931743604
Reviewed-on: https://webrtc-review.googlesource.com/c/106280
Commit-Queue: Yves Gerey <yvesg@google.com>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Reviewed-by: Patrik Höglund <phoglund@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#25311}
2018-10-23 11:32:56 +00:00

71 lines
2.4 KiB
C++

/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
#define MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include <vector>
#include "common_audio/resampler/include/resampler.h"
#include "modules/audio_processing/vad/common.h"
#include "modules/audio_processing/vad/pitch_based_vad.h"
#include "modules/audio_processing/vad/standalone_vad.h"
#include "modules/audio_processing/vad/vad_audio_proc.h"
namespace webrtc {
// A Voice Activity Detector (VAD) that combines the voice probability from the
// StandaloneVad and PitchBasedVad to get a more robust estimation.
class VoiceActivityDetector {
public:
VoiceActivityDetector();
~VoiceActivityDetector();
// Processes each audio chunk and estimates the voice probability.
void ProcessChunk(const int16_t* audio, size_t length, int sample_rate_hz);
// Returns a vector of voice probabilities for each chunk. It can be empty for
// some chunks, but it catches up afterwards returning multiple values at
// once.
const std::vector<double>& chunkwise_voice_probabilities() const {
return chunkwise_voice_probabilities_;
}
// Returns a vector of RMS values for each chunk. It has the same length as
// chunkwise_voice_probabilities().
const std::vector<double>& chunkwise_rms() const { return chunkwise_rms_; }
// Returns the last voice probability, regardless of the internal
// implementation, although it has a few chunks of delay.
float last_voice_probability() const { return last_voice_probability_; }
private:
// TODO(aluebs): Change these to float.
std::vector<double> chunkwise_voice_probabilities_;
std::vector<double> chunkwise_rms_;
float last_voice_probability_;
Resampler resampler_;
VadAudioProc audio_processing_;
std::unique_ptr<StandaloneVad> standalone_vad_;
PitchBasedVad pitch_based_vad_;
int16_t resampled_[kLength10Ms];
AudioFeatures features_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_