mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-14 06:10:40 +01:00

Implements RTCAudioSourceStats members: - audioLevel - totalAudioEnergy - totalSamplesDuration In this CL description these are collectively referred to as the audio levels. The audio levels are removed from sending "track" stats (in Chrome, these are now reported as undefined instead of 0). Background: For sending tracks, audio levels were always reported as 0 in Chrome (https://crbug.com/736403), while audio levels were correctly reported for receiving tracks. This problem affected the standard getStats() but not the legacy getStats(), blocking some people from migrating. This was likely not a problem in native third_party/webrtc code because the delivery of audio frames from device to send-stream uses a different code path outside of chromium. A recent PR (https://github.com/w3c/webrtc-stats/pull/451) moved the send-side audio levels to the RTCAudioSourceStats, while keeping the receive-side audio levels on the "track" stats. This allows an implementation to report the audio levels even if samples are not sent onto the network (such as if an ICE connection has not been established yet), reflecting some of the current implementation. Changes: 1. Audio levels are added to RTCAudioSourceStats. Send-side audio "track" stats are left undefined. Receive-side audio "track" stats are not changed in this CL and continue to work. 2. Audio level computation is moved from the AudioState and AudioTransportImpl to the AudioSendStream. This is because a) the AudioTransportImpl::RecordedDataIsAvailable() code path is not exercised in chromium, and b) audio levels should, per-spec, not be calculated on a per-call basis, for which the AudioState is defined. 3. The audio level computation is now performed in AudioSendStream::SendAudioData(), a code path used by both native and chromium code. 4. Comments are added to document behavior of existing code, such as AudioLevel and AudioSendStream::SendAudioData(). Note: In this CL, just like before this CL, audio level is only calculated after an AudioSendStream has been created. This means that before an O/A negotiation, audio levels are unavailable. According to spec, if we have an audio source, we should have audio levels. An immediate solution to this would have been to calculate the audio level at pc/rtp_sender.cc. The problem is that the LocalAudioSinkAdapter::OnData() code path, while exercised in chromium, is not exercised in native code. The issue of calculating audio levels on a per-source bases rather than on a per-send stream basis is left to https://crbug.com/webrtc/10771, an existing "media-source" bug. This CL can be verified manually in Chrome at: https://codepen.io/anon/pen/vqRGyq Bug: chromium:736403, webrtc:10771 Change-Id: I8036cd9984f3b187c3177470a8c0d6670a201a5a Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/143789 Reviewed-by: Oskar Sundbom <ossu@webrtc.org> Reviewed-by: Stefan Holmer <stefan@webrtc.org> Commit-Queue: Henrik Boström <hbos@webrtc.org> Cr-Commit-Position: refs/heads/master@{#28480}
249 lines
10 KiB
C++
249 lines
10 KiB
C++
/*
|
|
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "audio/audio_transport_impl.h"
|
|
|
|
#include <algorithm>
|
|
#include <memory>
|
|
#include <utility>
|
|
|
|
#include "audio/remix_resample.h"
|
|
#include "audio/utility/audio_frame_operations.h"
|
|
#include "call/audio_send_stream.h"
|
|
#include "rtc_base/checks.h"
|
|
|
|
namespace webrtc {
|
|
|
|
namespace {
|
|
|
|
// We want to process at the lowest sample rate and channel count possible
|
|
// without losing information. Choose the lowest native rate at least equal to
|
|
// the minimum of input and codec rates, choose lowest channel count, and
|
|
// configure the audio frame.
|
|
void InitializeCaptureFrame(int input_sample_rate,
|
|
int send_sample_rate_hz,
|
|
size_t input_num_channels,
|
|
size_t send_num_channels,
|
|
AudioFrame* audio_frame) {
|
|
RTC_DCHECK(audio_frame);
|
|
int min_processing_rate_hz = std::min(input_sample_rate, send_sample_rate_hz);
|
|
for (int native_rate_hz : AudioProcessing::kNativeSampleRatesHz) {
|
|
audio_frame->sample_rate_hz_ = native_rate_hz;
|
|
if (audio_frame->sample_rate_hz_ >= min_processing_rate_hz) {
|
|
break;
|
|
}
|
|
}
|
|
audio_frame->num_channels_ = std::min(input_num_channels, send_num_channels);
|
|
}
|
|
|
|
void ProcessCaptureFrame(uint32_t delay_ms,
|
|
bool key_pressed,
|
|
bool swap_stereo_channels,
|
|
AudioProcessing* audio_processing,
|
|
AudioFrame* audio_frame) {
|
|
RTC_DCHECK(audio_processing);
|
|
RTC_DCHECK(audio_frame);
|
|
audio_processing->set_stream_delay_ms(delay_ms);
|
|
audio_processing->set_stream_key_pressed(key_pressed);
|
|
int error = audio_processing->ProcessStream(audio_frame);
|
|
RTC_DCHECK_EQ(0, error) << "ProcessStream() error: " << error;
|
|
if (swap_stereo_channels) {
|
|
AudioFrameOperations::SwapStereoChannels(audio_frame);
|
|
}
|
|
}
|
|
|
|
// Resample audio in |frame| to given sample rate preserving the
|
|
// channel count and place the result in |destination|.
|
|
int Resample(const AudioFrame& frame,
|
|
const int destination_sample_rate,
|
|
PushResampler<int16_t>* resampler,
|
|
int16_t* destination) {
|
|
const int number_of_channels = static_cast<int>(frame.num_channels_);
|
|
const int target_number_of_samples_per_channel =
|
|
destination_sample_rate / 100;
|
|
resampler->InitializeIfNeeded(frame.sample_rate_hz_, destination_sample_rate,
|
|
number_of_channels);
|
|
|
|
// TODO(yujo): make resampler take an AudioFrame, and add special case
|
|
// handling of muted frames.
|
|
return resampler->Resample(
|
|
frame.data(), frame.samples_per_channel_ * number_of_channels,
|
|
destination, number_of_channels * target_number_of_samples_per_channel);
|
|
}
|
|
} // namespace
|
|
|
|
AudioTransportImpl::AudioTransportImpl(AudioMixer* mixer,
|
|
AudioProcessing* audio_processing)
|
|
: audio_processing_(audio_processing), mixer_(mixer) {
|
|
RTC_DCHECK(mixer);
|
|
RTC_DCHECK(audio_processing);
|
|
}
|
|
|
|
AudioTransportImpl::~AudioTransportImpl() {}
|
|
|
|
// Not used in Chromium. Process captured audio and distribute to all sending
|
|
// streams, and try to do this at the lowest possible sample rate.
|
|
int32_t AudioTransportImpl::RecordedDataIsAvailable(
|
|
const void* audio_data,
|
|
const size_t number_of_frames,
|
|
const size_t bytes_per_sample,
|
|
const size_t number_of_channels,
|
|
const uint32_t sample_rate,
|
|
const uint32_t audio_delay_milliseconds,
|
|
const int32_t /*clock_drift*/,
|
|
const uint32_t /*volume*/,
|
|
const bool key_pressed,
|
|
uint32_t& /*new_mic_volume*/) { // NOLINT: to avoid changing APIs
|
|
RTC_DCHECK(audio_data);
|
|
RTC_DCHECK_GE(number_of_channels, 1);
|
|
RTC_DCHECK_LE(number_of_channels, 2);
|
|
RTC_DCHECK_EQ(2 * number_of_channels, bytes_per_sample);
|
|
RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz);
|
|
// 100 = 1 second / data duration (10 ms).
|
|
RTC_DCHECK_EQ(number_of_frames * 100, sample_rate);
|
|
RTC_DCHECK_LE(bytes_per_sample * number_of_frames * number_of_channels,
|
|
AudioFrame::kMaxDataSizeBytes);
|
|
|
|
int send_sample_rate_hz = 0;
|
|
size_t send_num_channels = 0;
|
|
bool swap_stereo_channels = false;
|
|
{
|
|
rtc::CritScope lock(&capture_lock_);
|
|
send_sample_rate_hz = send_sample_rate_hz_;
|
|
send_num_channels = send_num_channels_;
|
|
swap_stereo_channels = swap_stereo_channels_;
|
|
}
|
|
|
|
std::unique_ptr<AudioFrame> audio_frame(new AudioFrame());
|
|
InitializeCaptureFrame(sample_rate, send_sample_rate_hz, number_of_channels,
|
|
send_num_channels, audio_frame.get());
|
|
voe::RemixAndResample(static_cast<const int16_t*>(audio_data),
|
|
number_of_frames, number_of_channels, sample_rate,
|
|
&capture_resampler_, audio_frame.get());
|
|
ProcessCaptureFrame(audio_delay_milliseconds, key_pressed,
|
|
swap_stereo_channels, audio_processing_,
|
|
audio_frame.get());
|
|
|
|
// Typing detection (utilizes the APM/VAD decision). We let the VAD determine
|
|
// if we're using this feature or not.
|
|
// TODO(solenberg): GetConfig() takes a lock. Work around that.
|
|
bool typing_detected = false;
|
|
if (audio_processing_->GetConfig().voice_detection.enabled) {
|
|
if (audio_frame->vad_activity_ != AudioFrame::kVadUnknown) {
|
|
bool vad_active = audio_frame->vad_activity_ == AudioFrame::kVadActive;
|
|
typing_detected = typing_detection_.Process(key_pressed, vad_active);
|
|
}
|
|
}
|
|
|
|
// Copy frame and push to each sending stream. The copy is required since an
|
|
// encoding task will be posted internally to each stream.
|
|
{
|
|
rtc::CritScope lock(&capture_lock_);
|
|
typing_noise_detected_ = typing_detected;
|
|
|
|
RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0);
|
|
if (!sending_streams_.empty()) {
|
|
auto it = sending_streams_.begin();
|
|
while (++it != sending_streams_.end()) {
|
|
std::unique_ptr<AudioFrame> audio_frame_copy(new AudioFrame());
|
|
audio_frame_copy->CopyFrom(*audio_frame);
|
|
(*it)->SendAudioData(std::move(audio_frame_copy));
|
|
}
|
|
// Send the original frame to the first stream w/o copying.
|
|
(*sending_streams_.begin())->SendAudioData(std::move(audio_frame));
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Mix all received streams, feed the result to the AudioProcessing module, then
|
|
// resample the result to the requested output rate.
|
|
int32_t AudioTransportImpl::NeedMorePlayData(const size_t nSamples,
|
|
const size_t nBytesPerSample,
|
|
const size_t nChannels,
|
|
const uint32_t samplesPerSec,
|
|
void* audioSamples,
|
|
size_t& nSamplesOut,
|
|
int64_t* elapsed_time_ms,
|
|
int64_t* ntp_time_ms) {
|
|
RTC_DCHECK_EQ(sizeof(int16_t) * nChannels, nBytesPerSample);
|
|
RTC_DCHECK_GE(nChannels, 1);
|
|
RTC_DCHECK_LE(nChannels, 2);
|
|
RTC_DCHECK_GE(
|
|
samplesPerSec,
|
|
static_cast<uint32_t>(AudioProcessing::NativeRate::kSampleRate8kHz));
|
|
|
|
// 100 = 1 second / data duration (10 ms).
|
|
RTC_DCHECK_EQ(nSamples * 100, samplesPerSec);
|
|
RTC_DCHECK_LE(nBytesPerSample * nSamples * nChannels,
|
|
AudioFrame::kMaxDataSizeBytes);
|
|
|
|
mixer_->Mix(nChannels, &mixed_frame_);
|
|
*elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
|
|
*ntp_time_ms = mixed_frame_.ntp_time_ms_;
|
|
|
|
const auto error = audio_processing_->ProcessReverseStream(&mixed_frame_);
|
|
RTC_DCHECK_EQ(error, AudioProcessing::kNoError);
|
|
|
|
nSamplesOut = Resample(mixed_frame_, samplesPerSec, &render_resampler_,
|
|
static_cast<int16_t*>(audioSamples));
|
|
RTC_DCHECK_EQ(nSamplesOut, nChannels * nSamples);
|
|
return 0;
|
|
}
|
|
|
|
// Used by Chromium - same as NeedMorePlayData() but because Chrome has its
|
|
// own APM instance, does not call audio_processing_->ProcessReverseStream().
|
|
void AudioTransportImpl::PullRenderData(int bits_per_sample,
|
|
int sample_rate,
|
|
size_t number_of_channels,
|
|
size_t number_of_frames,
|
|
void* audio_data,
|
|
int64_t* elapsed_time_ms,
|
|
int64_t* ntp_time_ms) {
|
|
RTC_DCHECK_EQ(bits_per_sample, 16);
|
|
RTC_DCHECK_GE(number_of_channels, 1);
|
|
RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz);
|
|
|
|
// 100 = 1 second / data duration (10 ms).
|
|
RTC_DCHECK_EQ(number_of_frames * 100, sample_rate);
|
|
|
|
// 8 = bits per byte.
|
|
RTC_DCHECK_LE(bits_per_sample / 8 * number_of_frames * number_of_channels,
|
|
AudioFrame::kMaxDataSizeBytes);
|
|
mixer_->Mix(number_of_channels, &mixed_frame_);
|
|
*elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
|
|
*ntp_time_ms = mixed_frame_.ntp_time_ms_;
|
|
|
|
auto output_samples = Resample(mixed_frame_, sample_rate, &render_resampler_,
|
|
static_cast<int16_t*>(audio_data));
|
|
RTC_DCHECK_EQ(output_samples, number_of_channels * number_of_frames);
|
|
}
|
|
|
|
void AudioTransportImpl::UpdateSendingStreams(
|
|
std::vector<AudioSendStream*> streams,
|
|
int send_sample_rate_hz,
|
|
size_t send_num_channels) {
|
|
rtc::CritScope lock(&capture_lock_);
|
|
sending_streams_ = std::move(streams);
|
|
send_sample_rate_hz_ = send_sample_rate_hz;
|
|
send_num_channels_ = send_num_channels;
|
|
}
|
|
|
|
void AudioTransportImpl::SetStereoChannelSwapping(bool enable) {
|
|
rtc::CritScope lock(&capture_lock_);
|
|
swap_stereo_channels_ = enable;
|
|
}
|
|
|
|
bool AudioTransportImpl::typing_noise_detected() const {
|
|
rtc::CritScope lock(&capture_lock_);
|
|
return typing_noise_detected_;
|
|
}
|
|
} // namespace webrtc
|