mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-13 13:50:40 +01:00

This CL moves the implementation of of the AudioFrame support from the implementation of AudioProcessing to proxy methods that map the call to the integer stream interfaces (added in another CL). The CL also changes the WebRTC code using the AudioFrame interfaces to instead use the proxy methods. This CL will be followed by one more CL that removes the usage of the AudioFrame class from the rest of APM (apart from the AudioProcessing API). Bug: webrtc:5298 Change-Id: Iecb72e9fa896ebea3ac30e558489c1bac88f5891 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/170110 Reviewed-by: Karl Wiberg <kwiberg@webrtc.org> Reviewed-by: Sam Zackrisson <saza@webrtc.org> Commit-Queue: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#30812}
92 lines
2.9 KiB
C++
92 lines
2.9 KiB
C++
/*
|
|
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "modules/audio_processing/voice_detection.h"
|
|
|
|
#include "common_audio/vad/include/webrtc_vad.h"
|
|
#include "modules/audio_processing/audio_buffer.h"
|
|
#include "rtc_base/checks.h"
|
|
|
|
namespace webrtc {
|
|
class VoiceDetection::Vad {
|
|
public:
|
|
Vad() {
|
|
state_ = WebRtcVad_Create();
|
|
RTC_CHECK(state_);
|
|
int error = WebRtcVad_Init(state_);
|
|
RTC_DCHECK_EQ(0, error);
|
|
}
|
|
~Vad() { WebRtcVad_Free(state_); }
|
|
|
|
Vad(Vad&) = delete;
|
|
Vad& operator=(Vad&) = delete;
|
|
|
|
VadInst* state() { return state_; }
|
|
|
|
private:
|
|
VadInst* state_ = nullptr;
|
|
};
|
|
|
|
VoiceDetection::VoiceDetection(int sample_rate_hz, Likelihood likelihood)
|
|
: sample_rate_hz_(sample_rate_hz),
|
|
frame_size_samples_(static_cast<size_t>(sample_rate_hz_ / 100)),
|
|
likelihood_(likelihood),
|
|
vad_(new Vad()) {
|
|
int mode = 2;
|
|
switch (likelihood) {
|
|
case VoiceDetection::kVeryLowLikelihood:
|
|
mode = 3;
|
|
break;
|
|
case VoiceDetection::kLowLikelihood:
|
|
mode = 2;
|
|
break;
|
|
case VoiceDetection::kModerateLikelihood:
|
|
mode = 1;
|
|
break;
|
|
case VoiceDetection::kHighLikelihood:
|
|
mode = 0;
|
|
break;
|
|
default:
|
|
RTC_NOTREACHED();
|
|
break;
|
|
}
|
|
int error = WebRtcVad_set_mode(vad_->state(), mode);
|
|
RTC_DCHECK_EQ(0, error);
|
|
}
|
|
|
|
VoiceDetection::~VoiceDetection() {}
|
|
|
|
bool VoiceDetection::ProcessCaptureAudio(AudioBuffer* audio) {
|
|
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
|
|
audio->num_frames_per_band());
|
|
std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> mixed_low_pass_data;
|
|
rtc::ArrayView<const int16_t> mixed_low_pass(mixed_low_pass_data.data(),
|
|
audio->num_frames_per_band());
|
|
if (audio->num_channels() == 1) {
|
|
FloatS16ToS16(audio->split_bands_const(0)[kBand0To8kHz],
|
|
audio->num_frames_per_band(), mixed_low_pass_data.data());
|
|
} else {
|
|
const int num_channels = static_cast<int>(audio->num_channels());
|
|
for (size_t i = 0; i < audio->num_frames_per_band(); ++i) {
|
|
int32_t value =
|
|
FloatS16ToS16(audio->split_channels_const(kBand0To8kHz)[0][i]);
|
|
for (int j = 1; j < num_channels; ++j) {
|
|
value += FloatS16ToS16(audio->split_channels_const(kBand0To8kHz)[j][i]);
|
|
}
|
|
mixed_low_pass_data[i] = value / num_channels;
|
|
}
|
|
}
|
|
|
|
int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
|
|
mixed_low_pass.data(), frame_size_samples_);
|
|
RTC_DCHECK(vad_ret == 0 || vad_ret == 1);
|
|
return vad_ret == 0 ? false : true;
|
|
}
|
|
} // namespace webrtc
|