mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-13 13:50:40 +01:00

This reverts commit 81c0cf287c
.
Reason for revert: internal test failures
Original change's description:
> Simplification and refactoring of the AudioBuffer code
>
> This CL performs a major refactoring and simplification
> of the AudioBuffer code that.
> -Removes 7 of the 9 internal buffers of the AudioBuffer.
> -Avoids the implicit copying required to keep the
> internal buffers in sync.
> -Removes all code relating to handling of fixed-point
> sample data in the AudioBuffer.
> -Changes the naming of the class methods to reflect
> that only floating point is handled.
> -Corrects some bugs in the code.
> -Extends the handling of internal downmixing to be
> more generic.
>
> Bug: webrtc:10882
> Change-Id: I12c8af156fbe366b154744a0a1b3d926bf7be572
> Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/149828
> Commit-Queue: Per Åhgren <peah@webrtc.org>
> Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#28928}
TBR=gustaf@webrtc.org,peah@webrtc.org
Change-Id: I2729e3ad24b3a9b40b368b84cb565c859e79b51e
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: webrtc:10882
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/150084
Reviewed-by: Steve Anton <steveanton@webrtc.org>
Commit-Queue: Steve Anton <steveanton@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#28931}
169 lines
4.8 KiB
C++
169 lines
4.8 KiB
C++
/*
|
|
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "modules/audio_processing/voice_detection_impl.h"
|
|
|
|
#include "api/audio/audio_frame.h"
|
|
#include "common_audio/vad/include/webrtc_vad.h"
|
|
#include "modules/audio_processing/audio_buffer.h"
|
|
#include "rtc_base/checks.h"
|
|
#include "rtc_base/constructor_magic.h"
|
|
|
|
namespace webrtc {
|
|
class VoiceDetectionImpl::Vad {
|
|
public:
|
|
Vad() {
|
|
state_ = WebRtcVad_Create();
|
|
RTC_CHECK(state_);
|
|
int error = WebRtcVad_Init(state_);
|
|
RTC_DCHECK_EQ(0, error);
|
|
}
|
|
~Vad() { WebRtcVad_Free(state_); }
|
|
VadInst* state() { return state_; }
|
|
|
|
private:
|
|
VadInst* state_ = nullptr;
|
|
RTC_DISALLOW_COPY_AND_ASSIGN(Vad);
|
|
};
|
|
|
|
VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit)
|
|
: crit_(crit) {
|
|
RTC_DCHECK(crit);
|
|
}
|
|
|
|
VoiceDetectionImpl::~VoiceDetectionImpl() {}
|
|
|
|
void VoiceDetectionImpl::Initialize(int sample_rate_hz) {
|
|
rtc::CritScope cs(crit_);
|
|
sample_rate_hz_ = sample_rate_hz;
|
|
std::unique_ptr<Vad> new_vad;
|
|
if (enabled_) {
|
|
new_vad.reset(new Vad());
|
|
}
|
|
vad_.swap(new_vad);
|
|
using_external_vad_ = false;
|
|
frame_size_samples_ =
|
|
static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000;
|
|
set_likelihood(likelihood_);
|
|
}
|
|
|
|
bool VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
|
|
rtc::CritScope cs(crit_);
|
|
RTC_DCHECK(enabled_);
|
|
|
|
RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
|
|
audio->num_frames_per_band());
|
|
std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> mixed_low_pass_data;
|
|
rtc::ArrayView<const int16_t> mixed_low_pass(mixed_low_pass_data.data(),
|
|
audio->num_frames_per_band());
|
|
if (audio->num_proc_channels() == 1) {
|
|
FloatS16ToS16(audio->split_bands_const_f(0)[kBand0To8kHz],
|
|
audio->num_frames_per_band(), mixed_low_pass_data.data());
|
|
} else {
|
|
const int num_channels = static_cast<int>(audio->num_channels());
|
|
for (size_t i = 0; i < audio->num_frames_per_band(); ++i) {
|
|
int32_t value =
|
|
FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[0][i]);
|
|
for (int j = 1; j < num_channels; ++j) {
|
|
value +=
|
|
FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[j][i]);
|
|
}
|
|
mixed_low_pass_data[i] = value / num_channels;
|
|
}
|
|
}
|
|
|
|
int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
|
|
mixed_low_pass.data(), frame_size_samples_);
|
|
if (vad_ret == 0) {
|
|
stream_has_voice_ = false;
|
|
return false;
|
|
} else if (vad_ret == 1) {
|
|
stream_has_voice_ = true;
|
|
} else {
|
|
RTC_NOTREACHED();
|
|
}
|
|
|
|
return stream_has_voice_;
|
|
}
|
|
|
|
int VoiceDetectionImpl::Enable(bool enable) {
|
|
rtc::CritScope cs(crit_);
|
|
if (enabled_ != enable) {
|
|
enabled_ = enable;
|
|
Initialize(sample_rate_hz_);
|
|
}
|
|
return AudioProcessing::kNoError;
|
|
}
|
|
|
|
bool VoiceDetectionImpl::is_enabled() const {
|
|
rtc::CritScope cs(crit_);
|
|
return enabled_;
|
|
}
|
|
|
|
int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
|
|
rtc::CritScope cs(crit_);
|
|
using_external_vad_ = true;
|
|
stream_has_voice_ = has_voice;
|
|
return AudioProcessing::kNoError;
|
|
}
|
|
|
|
bool VoiceDetectionImpl::stream_has_voice() const {
|
|
rtc::CritScope cs(crit_);
|
|
// TODO(ajm): enable this assertion?
|
|
// RTC_DCHECK(using_external_vad_ || is_component_enabled());
|
|
return stream_has_voice_;
|
|
}
|
|
|
|
int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
|
|
rtc::CritScope cs(crit_);
|
|
likelihood_ = likelihood;
|
|
if (enabled_) {
|
|
int mode = 2;
|
|
switch (likelihood) {
|
|
case VoiceDetection::kVeryLowLikelihood:
|
|
mode = 3;
|
|
break;
|
|
case VoiceDetection::kLowLikelihood:
|
|
mode = 2;
|
|
break;
|
|
case VoiceDetection::kModerateLikelihood:
|
|
mode = 1;
|
|
break;
|
|
case VoiceDetection::kHighLikelihood:
|
|
mode = 0;
|
|
break;
|
|
default:
|
|
RTC_NOTREACHED();
|
|
break;
|
|
}
|
|
int error = WebRtcVad_set_mode(vad_->state(), mode);
|
|
RTC_DCHECK_EQ(0, error);
|
|
}
|
|
return AudioProcessing::kNoError;
|
|
}
|
|
|
|
VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
|
|
rtc::CritScope cs(crit_);
|
|
return likelihood_;
|
|
}
|
|
|
|
int VoiceDetectionImpl::set_frame_size_ms(int size) {
|
|
rtc::CritScope cs(crit_);
|
|
RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported.
|
|
frame_size_ms_ = size;
|
|
Initialize(sample_rate_hz_);
|
|
return AudioProcessing::kNoError;
|
|
}
|
|
|
|
int VoiceDetectionImpl::frame_size_ms() const {
|
|
rtc::CritScope cs(crit_);
|
|
return frame_size_ms_;
|
|
}
|
|
} // namespace webrtc
|