webrtc/modules/audio_processing/vad/voice_activity_detector.h
Mirko Bonadei 92ea95e34a Fixing WebRTC after moving from src/webrtc to src/
In https://webrtc-review.googlesource.com/c/src/+/1560 we moved WebRTC
from src/webrtc to src/ (in order to preserve an healthy git history).
This CL takes care of fixing header guards, #include paths, etc...

NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
TBR=tommi@webrtc.org


Bug: chromium:611808
Change-Id: Iea91618212bee0af16aa3f05071eab8f93706578
Reviewed-on: https://webrtc-review.googlesource.com/1561
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Reviewed-by: Henrik Kjellander <kjellander@webrtc.org>
Commit-Queue: Mirko Bonadei <mbonadei@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#19846}
2017-09-15 05:02:56 +00:00

71 lines
2.5 KiB
C++

/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
#define MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
#include <memory>
#include <vector>
#include "common_audio/resampler/include/resampler.h"
#include "modules/audio_processing/vad/vad_audio_proc.h"
#include "modules/audio_processing/vad/common.h"
#include "modules/audio_processing/vad/pitch_based_vad.h"
#include "modules/audio_processing/vad/standalone_vad.h"
namespace webrtc {
// A Voice Activity Detector (VAD) that combines the voice probability from the
// StandaloneVad and PitchBasedVad to get a more robust estimation.
class VoiceActivityDetector {
public:
VoiceActivityDetector();
~VoiceActivityDetector();
// Processes each audio chunk and estimates the voice probability. The maximum
// supported sample rate is 32kHz.
// TODO(aluebs): Change |length| to size_t.
void ProcessChunk(const int16_t* audio, size_t length, int sample_rate_hz);
// Returns a vector of voice probabilities for each chunk. It can be empty for
// some chunks, but it catches up afterwards returning multiple values at
// once.
const std::vector<double>& chunkwise_voice_probabilities() const {
return chunkwise_voice_probabilities_;
}
// Returns a vector of RMS values for each chunk. It has the same length as
// chunkwise_voice_probabilities().
const std::vector<double>& chunkwise_rms() const { return chunkwise_rms_; }
// Returns the last voice probability, regardless of the internal
// implementation, although it has a few chunks of delay.
float last_voice_probability() const { return last_voice_probability_; }
private:
// TODO(aluebs): Change these to float.
std::vector<double> chunkwise_voice_probabilities_;
std::vector<double> chunkwise_rms_;
float last_voice_probability_;
Resampler resampler_;
VadAudioProc audio_processing_;
std::unique_ptr<StandaloneVad> standalone_vad_;
PitchBasedVad pitch_based_vad_;
int16_t resampled_[kLength10Ms];
AudioFeatures features_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_