webrtc/modules/audio_processing/test/audio_processing_simulator.cc

/*
 *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "modules/audio_processing/test/audio_processing_simulator.h"

#include <algorithm>
#include <fstream>
#include <iostream>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "api/audio/echo_canceller3_config_json.h"
#include "api/audio/echo_canceller3_factory.h"
#include "common_audio/include/audio_util.h"
#include "modules/audio_processing/aec_dump/aec_dump_factory.h"
#include "modules/audio_processing/echo_control_mobile_impl.h"
#include "modules/audio_processing/include/audio_processing.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "modules/audio_processing/test/fake_recording_device.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/strings/json.h"
#include "rtc_base/strings/string_builder.h"

namespace webrtc {
namespace test {
namespace {
// Helper for reading JSON from a file and parsing it to an AEC3 configuration.
EchoCanceller3Config ReadAec3ConfigFromJsonFile(const std::string& filename) {
  std::string json_string;
  std::string s;
  std::ifstream f(filename.c_str());
  if (f.fail()) {
    std::cout << "Failed to open the file " << filename << std::endl;
    RTC_CHECK(false);
  }
  while (std::getline(f, s)) {
    json_string += s;
  }

  bool parsing_successful;
  EchoCanceller3Config cfg;
  Aec3ConfigFromJsonString(json_string, &cfg, &parsing_successful);
  if (!parsing_successful) {
    std::cout << "Parsing of json string failed: " << std::endl
              << json_string << std::endl;
    RTC_CHECK(false);
  }
  RTC_CHECK(EchoCanceller3Config::Validate(&cfg));

  return cfg;
}

void CopyFromAudioFrame(const AudioFrame& src, ChannelBuffer<float>* dest) {
  RTC_CHECK_EQ(src.num_channels_, dest->num_channels());
  RTC_CHECK_EQ(src.samples_per_channel_, dest->num_frames());
  // Copy the data from the input buffer.
  std::vector<float> tmp(src.samples_per_channel_ * src.num_channels_);
  S16ToFloat(src.data(), tmp.size(), tmp.data());
  Deinterleave(tmp.data(), src.samples_per_channel_, src.num_channels_,
               dest->channels());
}

std::string GetIndexedOutputWavFilename(const std::string& wav_name,
                                        int counter) {
  rtc::StringBuilder ss;
  ss << wav_name.substr(0, wav_name.size() - 4) << "_" << counter
     << wav_name.substr(wav_name.size() - 4);
  return ss.Release();
}

void WriteEchoLikelihoodGraphFileHeader(std::ofstream* output_file) {
  (*output_file) << "import numpy as np" << std::endl
                 << "import matplotlib.pyplot as plt" << std::endl
                 << "y = np.array([";
}

void WriteEchoLikelihoodGraphFileFooter(std::ofstream* output_file) {
  (*output_file) << "])" << std::endl
                 << "if __name__ == '__main__':" << std::endl
                 << "  x = np.arange(len(y))*.01" << std::endl
                 << "  plt.plot(x, y)" << std::endl
                 << "  plt.ylabel('Echo likelihood')" << std::endl
                 << "  plt.xlabel('Time (s)')" << std::endl
                 << "  plt.show()" << std::endl;
}

// RAII class for execution time measurement. Updates the provided
// ApiCallStatistics based on the time between ScopedTimer creation and
// leaving the enclosing scope.
class ScopedTimer {
 public:
  ScopedTimer(ApiCallStatistics* api_call_statistics_,
              ApiCallStatistics::CallType call_type)
      : start_time_(rtc::TimeNanos()),
        call_type_(call_type),
        api_call_statistics_(api_call_statistics_) {}

  ~ScopedTimer() {
    api_call_statistics_->Add(rtc::TimeNanos() - start_time_, call_type_);
  }

 private:
  const int64_t start_time_;
  const ApiCallStatistics::CallType call_type_;
  ApiCallStatistics* const api_call_statistics_;
};

}  // namespace

SimulationSettings::SimulationSettings() = default;
SimulationSettings::SimulationSettings(const SimulationSettings&) = default;
SimulationSettings::~SimulationSettings() = default;

void CopyToAudioFrame(const ChannelBuffer<float>& src, AudioFrame* dest) {
  RTC_CHECK_EQ(src.num_channels(), dest->num_channels_);
  RTC_CHECK_EQ(src.num_frames(), dest->samples_per_channel_);
  int16_t* dest_data = dest->mutable_data();
  for (size_t ch = 0; ch < dest->num_channels_; ++ch) {
    for (size_t sample = 0; sample < dest->samples_per_channel_; ++sample) {
      dest_data[sample * dest->num_channels_ + ch] =
          src.channels()[ch][sample] * 32767;
    }
  }
}

AudioProcessingSimulator::AudioProcessingSimulator(
    const SimulationSettings& settings,
    std::unique_ptr<AudioProcessingBuilder> ap_builder)
    : settings_(settings),
      ap_builder_(ap_builder ? std::move(ap_builder)
                             : std::make_unique<AudioProcessingBuilder>()),
      analog_mic_level_(settings.initial_mic_level),
      fake_recording_device_(
          settings.initial_mic_level,
          settings_.simulate_mic_gain ? *settings.simulated_mic_kind : 0),
      worker_queue_("file_writer_task_queue") {
  RTC_CHECK(!settings_.dump_internal_data || WEBRTC_APM_DEBUG_DUMP == 1);
  ApmDataDumper::SetActivated(settings_.dump_internal_data);
  if (settings_.dump_internal_data_output_dir.has_value()) {
    ApmDataDumper::SetOutputDirectory(
        settings_.dump_internal_data_output_dir.value());
  }

  if (settings_.ed_graph_output_filename &&
      !settings_.ed_graph_output_filename->empty()) {
    residual_echo_likelihood_graph_writer_.open(
        *settings_.ed_graph_output_filename);
    RTC_CHECK(residual_echo_likelihood_graph_writer_.is_open());
    WriteEchoLikelihoodGraphFileHeader(&residual_echo_likelihood_graph_writer_);
  }

  if (settings_.simulate_mic_gain)
    RTC_LOG(LS_VERBOSE) << "Simulating analog mic gain";
}

AudioProcessingSimulator::~AudioProcessingSimulator() {
  if (residual_echo_likelihood_graph_writer_.is_open()) {
    WriteEchoLikelihoodGraphFileFooter(&residual_echo_likelihood_graph_writer_);
    residual_echo_likelihood_graph_writer_.close();
  }
}

void AudioProcessingSimulator::ProcessStream(bool fixed_interface) {
  // Optionally use the fake recording device to simulate analog gain.
  if (settings_.simulate_mic_gain) {
    if (settings_.aec_dump_input_filename) {
      // When the analog gain is simulated and an AEC dump is used as input, set
      // the undo level to |aec_dump_mic_level_| to virtually restore the
      // unmodified microphone signal level.
      fake_recording_device_.SetUndoMicLevel(aec_dump_mic_level_);
    }

    if (fixed_interface) {
      fake_recording_device_.SimulateAnalogGain(&fwd_frame_);
    } else {
      fake_recording_device_.SimulateAnalogGain(in_buf_.get());
    }

    // Notify the current mic level to AGC.
    ap_->set_stream_analog_level(fake_recording_device_.MicLevel());
  } else {
    // Notify the current mic level to AGC.
    ap_->set_stream_analog_level(settings_.aec_dump_input_filename
                                     ? aec_dump_mic_level_
                                     : analog_mic_level_);
  }

  // Process the current audio frame.
  if (fixed_interface) {
    {
      const auto st = ScopedTimer(&api_call_statistics_,
                                  ApiCallStatistics::CallType::kCapture);
      RTC_CHECK_EQ(AudioProcessing::kNoError, ap_->ProcessStream(&fwd_frame_));
    }
    CopyFromAudioFrame(fwd_frame_, out_buf_.get());
  } else {
    const auto st = ScopedTimer(&api_call_statistics_,
                                ApiCallStatistics::CallType::kCapture);
    RTC_CHECK_EQ(AudioProcessing::kNoError,
                 ap_->ProcessStream(in_buf_->channels(), in_config_,
                                    out_config_, out_buf_->channels()));
  }

  // Store the mic level suggested by AGC.
  // Note that when the analog gain is simulated and an AEC dump is used as
  // input, |analog_mic_level_| will not be used with set_stream_analog_level().
  analog_mic_level_ = ap_->recommended_stream_analog_level();
  if (settings_.simulate_mic_gain) {
    fake_recording_device_.SetMicLevel(analog_mic_level_);
  }
  if (buffer_memory_writer_) {
    RTC_CHECK(!buffer_file_writer_);
    buffer_memory_writer_->Write(*out_buf_);
  } else if (buffer_file_writer_) {
    RTC_CHECK(!buffer_memory_writer_);
    buffer_file_writer_->Write(*out_buf_);
  }

  if (linear_aec_output_file_writer_) {
    bool output_available = ap_->GetLinearAecOutput(linear_aec_output_buf_);
    RTC_CHECK(output_available);
    RTC_CHECK_GT(linear_aec_output_buf_.size(), 0);
    RTC_CHECK_EQ(linear_aec_output_buf_[0].size(), 160);
    for (size_t k = 0; k < linear_aec_output_buf_[0].size(); ++k) {
      for (size_t ch = 0; ch < linear_aec_output_buf_.size(); ++ch) {
        RTC_CHECK_EQ(linear_aec_output_buf_[ch].size(), 160);
        linear_aec_output_file_writer_->WriteSamples(
            &linear_aec_output_buf_[ch][k], 1);
      }
    }
  }

  if (residual_echo_likelihood_graph_writer_.is_open()) {
    auto stats = ap_->GetStatistics(true /*has_remote_tracks*/);
    residual_echo_likelihood_graph_writer_
        << stats.residual_echo_likelihood.value_or(-1.f) << ", ";
  }

  ++num_process_stream_calls_;
}

void AudioProcessingSimulator::ProcessReverseStream(bool fixed_interface) {
  if (fixed_interface) {
    {
      const auto st = ScopedTimer(&api_call_statistics_,
                                  ApiCallStatistics::CallType::kRender);
      RTC_CHECK_EQ(AudioProcessing::kNoError,
                   ap_->ProcessReverseStream(&rev_frame_));
    }
    CopyFromAudioFrame(rev_frame_, reverse_out_buf_.get());
  } else {
    const auto st = ScopedTimer(&api_call_statistics_,
                                ApiCallStatistics::CallType::kRender);
    RTC_CHECK_EQ(AudioProcessing::kNoError,
                 ap_->ProcessReverseStream(
                     reverse_in_buf_->channels(), reverse_in_config_,
                     reverse_out_config_, reverse_out_buf_->channels()));
  }

  if (reverse_buffer_file_writer_) {
    reverse_buffer_file_writer_->Write(*reverse_out_buf_);
  }

  ++num_reverse_process_stream_calls_;
}

void AudioProcessingSimulator::SetupBuffersConfigsOutputs(
    int input_sample_rate_hz,
    int output_sample_rate_hz,
    int reverse_input_sample_rate_hz,
    int reverse_output_sample_rate_hz,
    int input_num_channels,
    int output_num_channels,
    int reverse_input_num_channels,
    int reverse_output_num_channels) {
  in_config_ = StreamConfig(input_sample_rate_hz, input_num_channels);
  in_buf_.reset(new ChannelBuffer<float>(
      rtc::CheckedDivExact(input_sample_rate_hz, kChunksPerSecond),
      input_num_channels));

  reverse_in_config_ =
      StreamConfig(reverse_input_sample_rate_hz, reverse_input_num_channels);
  reverse_in_buf_.reset(new ChannelBuffer<float>(
      rtc::CheckedDivExact(reverse_input_sample_rate_hz, kChunksPerSecond),
      reverse_input_num_channels));

  out_config_ = StreamConfig(output_sample_rate_hz, output_num_channels);
  out_buf_.reset(new ChannelBuffer<float>(
      rtc::CheckedDivExact(output_sample_rate_hz, kChunksPerSecond),
      output_num_channels));

  reverse_out_config_ =
      StreamConfig(reverse_output_sample_rate_hz, reverse_output_num_channels);
  reverse_out_buf_.reset(new ChannelBuffer<float>(
      rtc::CheckedDivExact(reverse_output_sample_rate_hz, kChunksPerSecond),
      reverse_output_num_channels));

  fwd_frame_.sample_rate_hz_ = input_sample_rate_hz;
  fwd_frame_.samples_per_channel_ =
      rtc::CheckedDivExact(fwd_frame_.sample_rate_hz_, kChunksPerSecond);
  fwd_frame_.num_channels_ = input_num_channels;

  rev_frame_.sample_rate_hz_ = reverse_input_sample_rate_hz;
  rev_frame_.samples_per_channel_ =
      rtc::CheckedDivExact(rev_frame_.sample_rate_hz_, kChunksPerSecond);
  rev_frame_.num_channels_ = reverse_input_num_channels;

  if (settings_.use_verbose_logging) {
    rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);

    std::cout << "Sample rates:" << std::endl;
    std::cout << " Forward input: " << input_sample_rate_hz << std::endl;
    std::cout << " Forward output: " << output_sample_rate_hz << std::endl;
    std::cout << " Reverse input: " << reverse_input_sample_rate_hz
              << std::endl;
    std::cout << " Reverse output: " << reverse_output_sample_rate_hz
              << std::endl;
    std::cout << "Number of channels: " << std::endl;
    std::cout << " Forward input: " << input_num_channels << std::endl;
    std::cout << " Forward output: " << output_num_channels << std::endl;
    std::cout << " Reverse input: " << reverse_input_num_channels << std::endl;
    std::cout << " Reverse output: " << reverse_output_num_channels
              << std::endl;
  }

  SetupOutput();
}

void AudioProcessingSimulator::SetupOutput() {
  if (settings_.output_filename) {
    std::string filename;
    if (settings_.store_intermediate_output) {
      filename = GetIndexedOutputWavFilename(*settings_.output_filename,
                                             output_reset_counter_);
    } else {
      filename = *settings_.output_filename;
    }

    std::unique_ptr<WavWriter> out_file(
        new WavWriter(filename, out_config_.sample_rate_hz(),
                      static_cast<size_t>(out_config_.num_channels())));
    buffer_file_writer_.reset(new ChannelBufferWavWriter(std::move(out_file)));
  } else if (settings_.aec_dump_input_string.has_value()) {
    buffer_memory_writer_ = std::make_unique<ChannelBufferVectorWriter>(
        settings_.processed_capture_samples);
  }

  if (settings_.linear_aec_output_filename) {
    std::string filename;
    if (settings_.store_intermediate_output) {
      filename = GetIndexedOutputWavFilename(
          *settings_.linear_aec_output_filename, output_reset_counter_);
    } else {
      filename = *settings_.linear_aec_output_filename;
    }

    linear_aec_output_file_writer_.reset(
        new WavWriter(filename, 16000, out_config_.num_channels()));

    linear_aec_output_buf_.resize(out_config_.num_channels());
  }

  if (settings_.reverse_output_filename) {
    std::string filename;
    if (settings_.store_intermediate_output) {
      filename = GetIndexedOutputWavFilename(*settings_.reverse_output_filename,
                                             output_reset_counter_);
    } else {
      filename = *settings_.reverse_output_filename;
    }

    std::unique_ptr<WavWriter> reverse_out_file(
        new WavWriter(filename, reverse_out_config_.sample_rate_hz(),
                      static_cast<size_t>(reverse_out_config_.num_channels())));
    reverse_buffer_file_writer_.reset(
        new ChannelBufferWavWriter(std::move(reverse_out_file)));
  }

  ++output_reset_counter_;
}

void AudioProcessingSimulator::DestroyAudioProcessor() {
  if (settings_.aec_dump_output_filename) {
    ap_->DetachAecDump();
  }
}

void AudioProcessingSimulator::CreateAudioProcessor() {
  Config config;
  AudioProcessing::Config apm_config;
  std::unique_ptr<EchoControlFactory> echo_control_factory;
  if (settings_.use_ts) {
    config.Set<ExperimentalNs>(new ExperimentalNs(*settings_.use_ts));
  }
  if (settings_.multi_channel_render) {
    apm_config.pipeline.multi_channel_render = *settings_.multi_channel_render;
  }

  if (settings_.multi_channel_capture) {
    apm_config.pipeline.multi_channel_capture =
        *settings_.multi_channel_capture;
  }

  if (settings_.use_agc2) {
    apm_config.gain_controller2.enabled = *settings_.use_agc2;
    if (settings_.agc2_fixed_gain_db) {
      apm_config.gain_controller2.fixed_digital.gain_db =
          *settings_.agc2_fixed_gain_db;
    }
    if (settings_.agc2_use_adaptive_gain) {
      apm_config.gain_controller2.adaptive_digital.enabled =
          *settings_.agc2_use_adaptive_gain;
      apm_config.gain_controller2.adaptive_digital.level_estimator =
          settings_.agc2_adaptive_level_estimator;
    }
  }
  if (settings_.use_pre_amplifier) {
    apm_config.pre_amplifier.enabled = *settings_.use_pre_amplifier;
    if (settings_.pre_amplifier_gain_factor) {
      apm_config.pre_amplifier.fixed_gain_factor =
          *settings_.pre_amplifier_gain_factor;
    }
  }

  const bool use_aec = settings_.use_aec && *settings_.use_aec;
  const bool use_aecm = settings_.use_aecm && *settings_.use_aecm;
  if (use_aec || use_aecm) {
    apm_config.echo_canceller.enabled = true;
    apm_config.echo_canceller.mobile_mode = use_aecm;
    apm_config.echo_canceller.use_legacy_aec = false;
  }
  apm_config.echo_canceller.export_linear_aec_output =
      !!settings_.linear_aec_output_filename;

  if (use_aec) {
    EchoCanceller3Config cfg;
    if (settings_.aec_settings_filename) {
      if (settings_.use_verbose_logging) {
        std::cout << "Reading AEC Parameters from JSON input." << std::endl;
      }
      cfg = ReadAec3ConfigFromJsonFile(*settings_.aec_settings_filename);
    }

    if (settings_.linear_aec_output_filename) {
      cfg.filter.export_linear_aec_output = true;
    }

    echo_control_factory.reset(new EchoCanceller3Factory(cfg));

    if (settings_.print_aec_parameter_values) {
      if (!settings_.use_quiet_output) {
        std::cout << "AEC settings:" << std::endl;
      }
      std::cout << Aec3ConfigToJsonString(cfg) << std::endl;
    }
  }

  if (settings_.use_hpf) {
    apm_config.high_pass_filter.enabled = *settings_.use_hpf;
  }

  if (settings_.use_le) {
    apm_config.level_estimation.enabled = *settings_.use_le;
  }

  if (settings_.use_vad) {
    apm_config.voice_detection.enabled = *settings_.use_vad;
  }

  if (settings_.use_agc) {
    apm_config.gain_controller1.enabled = *settings_.use_agc;
  }
  if (settings_.agc_mode) {
    apm_config.gain_controller1.mode =
        static_cast<webrtc::AudioProcessing::Config::GainController1::Mode>(
            *settings_.agc_mode);
  }
  if (settings_.use_agc_limiter) {
    apm_config.gain_controller1.enable_limiter = *settings_.use_agc_limiter;
  }
  if (settings_.agc_target_level) {
    apm_config.gain_controller1.target_level_dbfs = *settings_.agc_target_level;
  }
  if (settings_.agc_compression_gain) {
    apm_config.gain_controller1.compression_gain_db =
        *settings_.agc_compression_gain;
  }

  config.Set<ExperimentalAgc>(new ExperimentalAgc(
      !settings_.use_experimental_agc || *settings_.use_experimental_agc,
      !!settings_.use_experimental_agc_agc2_level_estimator &&
          *settings_.use_experimental_agc_agc2_level_estimator,
      !!settings_.experimental_agc_disable_digital_adaptive &&
          *settings_.experimental_agc_disable_digital_adaptive,
      !!settings_.experimental_agc_analyze_before_aec &&
          *settings_.experimental_agc_analyze_before_aec));
  if (settings_.use_ed) {
    apm_config.residual_echo_detector.enabled = *settings_.use_ed;
  }

  if (settings_.maximum_internal_processing_rate) {
    apm_config.pipeline.maximum_internal_processing_rate =
        *settings_.maximum_internal_processing_rate;
  }

  const bool use_legacy_ns =
      settings_.use_legacy_ns && *settings_.use_legacy_ns;
  if (use_legacy_ns) {
    apm_config.noise_suppression.use_legacy_ns = use_legacy_ns;
  }

  if (settings_.use_ns) {
    apm_config.noise_suppression.enabled = *settings_.use_ns;
  }
  if (settings_.ns_level) {
    const int level = *settings_.ns_level;
    RTC_CHECK_GE(level, 0);
    RTC_CHECK_LE(level, 3);
    apm_config.noise_suppression.level =
        static_cast<AudioProcessing::Config::NoiseSuppression::Level>(level);
  }

  RTC_CHECK(ap_builder_);
  if (echo_control_factory) {
    ap_builder_->SetEchoControlFactory(std::move(echo_control_factory));
  }
  ap_.reset((*ap_builder_).Create(config));

  RTC_CHECK(ap_);

  ap_->ApplyConfig(apm_config);

  if (settings_.use_ts) {
    ap_->set_stream_key_pressed(*settings_.use_ts);
  }

  if (settings_.aec_dump_output_filename) {
    ap_->AttachAecDump(AecDumpFactory::Create(
        *settings_.aec_dump_output_filename, -1, &worker_queue_));
  }
}

}  // namespace test
}  // namespace webrtc