webrtc/modules/audio_processing/agc2/vad_wrapper.h
Tommi d6ef33e59b Remove PushResampler<T>::InitializeIfNeeded
This switches from accepting a sample rate and convert to channel
size over to accepting the channel size.

Instead of InitializeIfNeeded:

* Offer a way to explicitly initialize PushResampler via the ctor
  (needed for VoiceActivityDetectorWrapper)
* Implicitly check for the right configuration from within Resample().
  (All calls to Resample() were preceded by a call to Initialize)

As part of this, refactor VoiceActivityDetectorWrapper (VADW):
* VADW is now initialized in the constructor and more const.
* Remove VADW::Initialize() and instead reconstruct VADW if needed.

Add constants for max sample rate and num channels to audio_util.h
In many cases the numbers for these values are embedded in the code
which has led to some inconsistency.

Bug: chromium:335805780
Change-Id: Iead0d52eb1b261a8d64e93f51401147c8fba32f0
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/353360
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Commit-Queue: Tomas Gunnarsson <tommi@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#42587}
2024-07-04 10:33:21 +00:00

79 lines
3 KiB
C++

/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_
#define MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_
#include <memory>
#include <vector>
#include "api/array_view.h"
#include "common_audio/resampler/include/push_resampler.h"
#include "modules/audio_processing/agc2/cpu_features.h"
#include "modules/audio_processing/include/audio_frame_view.h"
namespace webrtc {
// Wraps a single-channel Voice Activity Detector (VAD) which is used to analyze
// the first channel of the input audio frames. Takes care of resampling the
// input frames to match the sample rate of the wrapped VAD and periodically
// resets the VAD.
class VoiceActivityDetectorWrapper {
public:
// Single channel VAD interface.
class MonoVad {
public:
virtual ~MonoVad() = default;
// Returns the sample rate (Hz) required for the input frames analyzed by
// `ComputeProbability`.
virtual int SampleRateHz() const = 0;
// Resets the internal state.
virtual void Reset() = 0;
// Analyzes an audio frame and returns the speech probability.
virtual float Analyze(rtc::ArrayView<const float> frame) = 0;
};
// Ctor. Uses `cpu_features` to instantiate the default VAD.
VoiceActivityDetectorWrapper(const AvailableCpuFeatures& cpu_features,
int sample_rate_hz);
// Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call
// `MonoVad::Reset()`; it must be equal to or greater than the duration of two
// frames. Uses `cpu_features` to instantiate the default VAD.
VoiceActivityDetectorWrapper(int vad_reset_period_ms,
const AvailableCpuFeatures& cpu_features,
int sample_rate_hz);
// Ctor. Uses a custom `vad`.
VoiceActivityDetectorWrapper(int vad_reset_period_ms,
std::unique_ptr<MonoVad> vad,
int sample_rate_hz);
VoiceActivityDetectorWrapper(const VoiceActivityDetectorWrapper&) = delete;
VoiceActivityDetectorWrapper& operator=(const VoiceActivityDetectorWrapper&) =
delete;
~VoiceActivityDetectorWrapper();
// Analyzes the first channel of `frame` and returns the speech probability.
// `frame` must be a 10 ms frame with the sample rate specified in the last
// `Initialize()` call.
float Analyze(AudioFrameView<const float> frame);
private:
const int vad_reset_period_frames_;
const int frame_size_;
int time_to_vad_reset_;
std::unique_ptr<MonoVad> vad_;
std::vector<float> resampled_buffer_;
PushResampler<float> resampler_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_