mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-12 21:30:45 +01:00

This reverts commit3f87250a4f
. Reason for revert: Downstream is fixed Original change's description: > Revert "Remove RTC_DISALLOW_COPY_AND_ASSIGN usages completely" > > This reverts commit5f0eb93d2a
. > > Reason for revert: Breaks downstream project. I'm going to fix that one and create a reland of this CL after. > > Original change's description: > > Remove RTC_DISALLOW_COPY_AND_ASSIGN usages completely > > > > Bug: webrtc:13555, webrtc:13082 > > Change-Id: Iff2cda6f516739419e97e975e03f77a98f74be03 > > Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/249260 > > Reviewed-by: Harald Alvestrand <hta@webrtc.org> > > Reviewed-by: Artem Titov <titovartem@webrtc.org> > > Commit-Queue: (Daniel.L) Byoungchan Lee <daniel.l@hpcnt.com> > > Cr-Commit-Position: refs/heads/main@{#35805} > > TBR=hta@webrtc.org,titovartem@webrtc.org,daniel.l@hpcnt.com,webrtc-scoped@luci-project-accounts.iam.gserviceaccount.com > > Change-Id: I33d497f1132adfe6d151023195a388d9b7d548f9 > No-Presubmit: true > No-Tree-Checks: true > No-Try: true > Bug: webrtc:13555, webrtc:13082 > Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/249364 > Reviewed-by: Artem Titov <titovartem@webrtc.org> > Owners-Override: Artem Titov <titovartem@webrtc.org> > Reviewed-by: Andrey Logvin <landrey@webrtc.org> > Reviewed-by: Björn Terelius <terelius@webrtc.org> > Commit-Queue: Artem Titov <titovartem@webrtc.org> > Cr-Commit-Position: refs/heads/main@{#35807} # Not skipping CQ checks because this is a reland. Bug: webrtc:13555, webrtc:13082 Change-Id: I7ef1ef3b6e3c41b1a96014aa75f003c0fcf33949 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/249365 Reviewed-by: Harald Alvestrand <hta@webrtc.org> Commit-Queue: Artem Titov <titovartem@webrtc.org> Cr-Commit-Position: refs/heads/main@{#35814}
234 lines
9 KiB
C++
234 lines
9 KiB
C++
/*
|
|
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "modules/audio_processing/test/conversational_speech/simulator.h"
|
|
|
|
#include <math.h>
|
|
|
|
#include <algorithm>
|
|
#include <memory>
|
|
#include <set>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "api/array_view.h"
|
|
#include "common_audio/include/audio_util.h"
|
|
#include "common_audio/wav_file.h"
|
|
#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h"
|
|
#include "rtc_base/logging.h"
|
|
#include "rtc_base/numerics/safe_conversions.h"
|
|
#include "test/testsupport/file_utils.h"
|
|
|
|
namespace webrtc {
|
|
namespace test {
|
|
namespace {
|
|
|
|
using conversational_speech::MultiEndCall;
|
|
using conversational_speech::SpeakerOutputFilePaths;
|
|
using conversational_speech::WavReaderInterface;
|
|
|
|
// Combines output path and speaker names to define the output file paths for
|
|
// the near-end and far=end audio tracks.
|
|
std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>>
|
|
InitSpeakerOutputFilePaths(const std::set<std::string>& speaker_names,
|
|
const std::string& output_path) {
|
|
// Create map.
|
|
auto speaker_output_file_paths_map =
|
|
std::make_unique<std::map<std::string, SpeakerOutputFilePaths>>();
|
|
|
|
// Add near-end and far-end output paths into the map.
|
|
for (const auto& speaker_name : speaker_names) {
|
|
const std::string near_end_path =
|
|
test::JoinFilename(output_path, "s_" + speaker_name + "-near_end.wav");
|
|
RTC_LOG(LS_VERBOSE) << "The near-end audio track will be created in "
|
|
<< near_end_path << ".";
|
|
|
|
const std::string far_end_path =
|
|
test::JoinFilename(output_path, "s_" + speaker_name + "-far_end.wav");
|
|
RTC_LOG(LS_VERBOSE) << "The far-end audio track will be created in "
|
|
<< far_end_path << ".";
|
|
|
|
// Add to map.
|
|
speaker_output_file_paths_map->emplace(
|
|
std::piecewise_construct, std::forward_as_tuple(speaker_name),
|
|
std::forward_as_tuple(near_end_path, far_end_path));
|
|
}
|
|
|
|
return speaker_output_file_paths_map;
|
|
}
|
|
|
|
// Class that provides one WavWriter for the near-end and one for the far-end
|
|
// output track of a speaker.
|
|
class SpeakerWavWriters {
|
|
public:
|
|
SpeakerWavWriters(const SpeakerOutputFilePaths& output_file_paths,
|
|
int sample_rate)
|
|
: near_end_wav_writer_(output_file_paths.near_end, sample_rate, 1u),
|
|
far_end_wav_writer_(output_file_paths.far_end, sample_rate, 1u) {}
|
|
WavWriter* near_end_wav_writer() { return &near_end_wav_writer_; }
|
|
WavWriter* far_end_wav_writer() { return &far_end_wav_writer_; }
|
|
|
|
private:
|
|
WavWriter near_end_wav_writer_;
|
|
WavWriter far_end_wav_writer_;
|
|
};
|
|
|
|
// Initializes one WavWriter instance for each speaker and both the near-end and
|
|
// far-end output tracks.
|
|
std::unique_ptr<std::map<std::string, SpeakerWavWriters>>
|
|
InitSpeakersWavWriters(const std::map<std::string, SpeakerOutputFilePaths>&
|
|
speaker_output_file_paths,
|
|
int sample_rate) {
|
|
// Create map.
|
|
auto speaker_wav_writers_map =
|
|
std::make_unique<std::map<std::string, SpeakerWavWriters>>();
|
|
|
|
// Add SpeakerWavWriters instance into the map.
|
|
for (auto it = speaker_output_file_paths.begin();
|
|
it != speaker_output_file_paths.end(); ++it) {
|
|
speaker_wav_writers_map->emplace(
|
|
std::piecewise_construct, std::forward_as_tuple(it->first),
|
|
std::forward_as_tuple(it->second, sample_rate));
|
|
}
|
|
|
|
return speaker_wav_writers_map;
|
|
}
|
|
|
|
// Reads all the samples for each audio track.
|
|
std::unique_ptr<std::map<std::string, std::vector<int16_t>>> PreloadAudioTracks(
|
|
const std::map<std::string, std::unique_ptr<WavReaderInterface>>&
|
|
audiotrack_readers) {
|
|
// Create map.
|
|
auto audiotracks_map =
|
|
std::make_unique<std::map<std::string, std::vector<int16_t>>>();
|
|
|
|
// Add audio track vectors.
|
|
for (auto it = audiotrack_readers.begin(); it != audiotrack_readers.end();
|
|
++it) {
|
|
// Add map entry.
|
|
audiotracks_map->emplace(std::piecewise_construct,
|
|
std::forward_as_tuple(it->first),
|
|
std::forward_as_tuple(it->second->NumSamples()));
|
|
|
|
// Read samples.
|
|
it->second->ReadInt16Samples(audiotracks_map->at(it->first));
|
|
}
|
|
|
|
return audiotracks_map;
|
|
}
|
|
|
|
// Writes all the values in `source_samples` via `wav_writer`. If the number of
|
|
// previously written samples in `wav_writer` is less than `interval_begin`, it
|
|
// adds zeros as left padding. The padding corresponds to intervals during which
|
|
// a speaker is not active.
|
|
void PadLeftWriteChunk(rtc::ArrayView<const int16_t> source_samples,
|
|
size_t interval_begin,
|
|
WavWriter* wav_writer) {
|
|
// Add left padding.
|
|
RTC_CHECK(wav_writer);
|
|
RTC_CHECK_GE(interval_begin, wav_writer->num_samples());
|
|
size_t padding_size = interval_begin - wav_writer->num_samples();
|
|
if (padding_size != 0) {
|
|
const std::vector<int16_t> padding(padding_size, 0);
|
|
wav_writer->WriteSamples(padding.data(), padding_size);
|
|
}
|
|
|
|
// Write source samples.
|
|
wav_writer->WriteSamples(source_samples.data(), source_samples.size());
|
|
}
|
|
|
|
// Appends zeros via `wav_writer`. The number of zeros is always non-negative
|
|
// and equal to the difference between the previously written samples and
|
|
// `pad_samples`.
|
|
void PadRightWrite(WavWriter* wav_writer, size_t pad_samples) {
|
|
RTC_CHECK(wav_writer);
|
|
RTC_CHECK_GE(pad_samples, wav_writer->num_samples());
|
|
size_t padding_size = pad_samples - wav_writer->num_samples();
|
|
if (padding_size != 0) {
|
|
const std::vector<int16_t> padding(padding_size, 0);
|
|
wav_writer->WriteSamples(padding.data(), padding_size);
|
|
}
|
|
}
|
|
|
|
void ScaleSignal(rtc::ArrayView<const int16_t> source_samples,
|
|
int gain,
|
|
rtc::ArrayView<int16_t> output_samples) {
|
|
const float gain_linear = DbToRatio(gain);
|
|
RTC_DCHECK_EQ(source_samples.size(), output_samples.size());
|
|
std::transform(source_samples.begin(), source_samples.end(),
|
|
output_samples.begin(), [gain_linear](int16_t x) -> int16_t {
|
|
return rtc::saturated_cast<int16_t>(x * gain_linear);
|
|
});
|
|
}
|
|
|
|
} // namespace
|
|
|
|
namespace conversational_speech {
|
|
|
|
std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>> Simulate(
|
|
const MultiEndCall& multiend_call,
|
|
const std::string& output_path) {
|
|
// Set output file paths and initialize wav writers.
|
|
const auto& speaker_names = multiend_call.speaker_names();
|
|
auto speaker_output_file_paths =
|
|
InitSpeakerOutputFilePaths(speaker_names, output_path);
|
|
auto speakers_wav_writers = InitSpeakersWavWriters(
|
|
*speaker_output_file_paths, multiend_call.sample_rate());
|
|
|
|
// Preload all the input audio tracks.
|
|
const auto& audiotrack_readers = multiend_call.audiotrack_readers();
|
|
auto audiotracks = PreloadAudioTracks(audiotrack_readers);
|
|
|
|
// TODO(alessiob): When speaker_names.size() == 2, near-end and far-end
|
|
// across the 2 speakers are symmetric; hence, the code below could be
|
|
// replaced by only creating the near-end or the far-end. However, this would
|
|
// require to split the unit tests and document the behavior in README.md.
|
|
// In practice, it should not be an issue since the files are not expected to
|
|
// be signinificant.
|
|
|
|
// Write near-end and far-end output tracks.
|
|
for (const auto& speaking_turn : multiend_call.speaking_turns()) {
|
|
const std::string& active_speaker_name = speaking_turn.speaker_name;
|
|
const auto source_audiotrack =
|
|
audiotracks->at(speaking_turn.audiotrack_file_name);
|
|
std::vector<int16_t> scaled_audiotrack(source_audiotrack.size());
|
|
ScaleSignal(source_audiotrack, speaking_turn.gain, scaled_audiotrack);
|
|
|
|
// Write active speaker's chunk to active speaker's near-end.
|
|
PadLeftWriteChunk(
|
|
scaled_audiotrack, speaking_turn.begin,
|
|
speakers_wav_writers->at(active_speaker_name).near_end_wav_writer());
|
|
|
|
// Write active speaker's chunk to other participants' far-ends.
|
|
for (const std::string& speaker_name : speaker_names) {
|
|
if (speaker_name == active_speaker_name)
|
|
continue;
|
|
PadLeftWriteChunk(
|
|
scaled_audiotrack, speaking_turn.begin,
|
|
speakers_wav_writers->at(speaker_name).far_end_wav_writer());
|
|
}
|
|
}
|
|
|
|
// Finalize all the output tracks with right padding.
|
|
// This is required to make all the output tracks duration equal.
|
|
size_t duration_samples = multiend_call.total_duration_samples();
|
|
for (const std::string& speaker_name : speaker_names) {
|
|
PadRightWrite(speakers_wav_writers->at(speaker_name).near_end_wav_writer(),
|
|
duration_samples);
|
|
PadRightWrite(speakers_wav_writers->at(speaker_name).far_end_wav_writer(),
|
|
duration_samples);
|
|
}
|
|
|
|
return speaker_output_file_paths;
|
|
}
|
|
|
|
} // namespace conversational_speech
|
|
} // namespace test
|
|
} // namespace webrtc
|