Add low bitrate redundancy support

2025-05-13 05:40:42 +01:00 · 2023-10-31 13:14:36 -07:00 · 2023-10-31 13:14:36 -07:00 · 62d543d814
commit 62d543d814
parent c1124288a3
7 changed files with 229 additions and 9 deletions
--- a/api/audio_codecs/opus/audio_encoder_opus_config.h
+++ b/api/audio_codecs/opus/audio_encoder_opus_config.h
@ -21,7 +21,8 @@
 namespace webrtc {

 struct RTC_EXPORT AudioEncoderOpusConfig {
-  static constexpr int kDefaultFrameSizeMs = 20;
+  // RingRTC change to ensure that opus encoders start with 60ms frame size
+  static constexpr int kDefaultFrameSizeMs = 60;

  // Opus API allows a min bitrate of 500bps, but Opus documentation suggests
  // bitrate should be in the range of 6000 to 510000, inclusive.
--- a/modules/audio_coding/codecs/opus/audio_encoder_opus.cc
+++ b/modules/audio_coding/codecs/opus/audio_encoder_opus.cc
@ -1001,7 +1001,7 @@ bool AudioEncoderOpusImpl::Configure(const webrtc::AudioEncoder::Config& config)
  config_.fec_enabled = config.enable_fec;
  config_.cbr_enabled = config.enable_cbr;
  config_.complexity = config.complexity;
-  config_.low_rate_complexity = config_.low_rate_complexity;
+  config_.low_rate_complexity = config.complexity;
  config_.dtx_enabled = config.enable_dtx;

  if (config.adaptation > 0) {
--- a/modules/audio_coding/codecs/opus/audio_encoder_opus.h
+++ b/modules/audio_coding/codecs/opus/audio_encoder_opus.h
@ -106,6 +106,9 @@ class AudioEncoderOpusImpl final : public AudioEncoder {
  // RingRTC change to configure opus
  bool Configure(const webrtc::AudioEncoder::Config& config) override;

+  // RingRTC change to add low bitrate redundancy
+  void Clear() { input_buffer_.clear(); }
+
  // Getters for testing.
  float packet_loss_rate() const { return packet_loss_rate_; }
  AudioEncoderOpusConfig::ApplicationMode application() const {
--- a/modules/audio_coding/codecs/opus/opus_interface.cc
+++ b/modules/audio_coding/codecs/opus/opus_interface.cc
@ -15,6 +15,8 @@

 #include "api/array_view.h"
 #include "rtc_base/checks.h"
+// RingRTC change to log opus setters
+#include "rtc_base/logging.h"
 #include "system_wrappers/include/field_trial.h"

 enum {
@ -269,6 +271,8 @@ int WebRtcOpus_Encode(OpusEncInst* inst,
       : opus_multistream_encoder_ctl(inst->multistream_encoder, vargs))

 int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) {
+  // RingRTC change to log opus setters
+  RTC_LOG(LS_INFO) << "WebRtcOpus_SetBitRate " << rate;
  if (inst) {
    return ENCODER_CTL(inst, OPUS_SET_BITRATE(rate));
  } else {
@ -277,6 +281,8 @@ int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) {
 }

 int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) {
+  // RingRTC change to log opus setters
+  RTC_LOG(LS_WARNING) << "WebRtcOpus_SetPacketLossRate " << loss_rate;
  if (inst) {
    return ENCODER_CTL(inst, OPUS_SET_PACKET_LOSS_PERC(loss_rate));
  } else {
@ -287,6 +293,9 @@ int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) {
 int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz) {
  opus_int32 set_bandwidth;

+  // RingRTC change to log opus setters
+  RTC_LOG(LS_INFO) << "WebRtcOpus_SetMaxPlaybackRate " << frequency_hz;
+
  if (!inst)
    return -1;

@ -344,6 +353,8 @@ int16_t WebRtcOpus_GetMaxPlaybackRate(OpusEncInst* const inst,
 }

 int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) {
+  // RingRTC change to log opus setters
+  RTC_LOG(LS_INFO) << "WebRtcOpus_EnableFec";
  if (inst) {
    return ENCODER_CTL(inst, OPUS_SET_INBAND_FEC(1));
  } else {
@ -352,6 +363,8 @@ int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) {
 }

 int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) {
+  // RingRTC change to log opus setters
+  RTC_LOG(LS_INFO) << "WebRtcOpus_DisableFec";
  if (inst) {
    return ENCODER_CTL(inst, OPUS_SET_INBAND_FEC(0));
  } else {
@ -360,6 +373,8 @@ int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) {
 }

 int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst) {
+  // RingRTC change to log opus setters
+  RTC_LOG(LS_INFO) << "WebRtcOpus_EnableDtx";
  if (inst) {
    if (webrtc::field_trial::IsEnabled(kSetSignalVoiceWithDtxFieldTrial)) {
      int ret = ENCODER_CTL(inst, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE));
@ -374,6 +389,8 @@ int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst) {
 }

 int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) {
+  // RingRTC change to log opus setters
+  RTC_LOG(LS_INFO) << "WebRtcOpus_DisableDtx";
  if (inst) {
    if (webrtc::field_trial::IsEnabled(kSetSignalVoiceWithDtxFieldTrial)) {
      int ret = ENCODER_CTL(inst, OPUS_SET_SIGNAL(OPUS_AUTO));
@ -398,6 +415,8 @@ int16_t WebRtcOpus_GetUseDtx(OpusEncInst* inst) {
 }

 int16_t WebRtcOpus_EnableCbr(OpusEncInst* inst) {
+  // RingRTC change to log opus setters
+  RTC_LOG(LS_INFO) << "WebRtcOpus_EnableCbr";
  if (inst) {
    return ENCODER_CTL(inst, OPUS_SET_VBR(0));
  } else {
@ -406,6 +425,8 @@ int16_t WebRtcOpus_EnableCbr(OpusEncInst* inst) {
 }

 int16_t WebRtcOpus_DisableCbr(OpusEncInst* inst) {
+  // RingRTC change to log opus setters
+  RTC_LOG(LS_INFO) << "WebRtcOpus_DisableCbr";
  if (inst) {
    return ENCODER_CTL(inst, OPUS_SET_VBR(1));
  } else {
@ -414,6 +435,8 @@ int16_t WebRtcOpus_DisableCbr(OpusEncInst* inst) {
 }

 int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) {
+  // RingRTC change to log opus setters
+  RTC_LOG(LS_INFO) << "WebRtcOpus_SetComplexity " << complexity;
  if (inst) {
    return ENCODER_CTL(inst, OPUS_SET_COMPLEXITY(complexity));
  } else {
@ -434,6 +457,8 @@ int32_t WebRtcOpus_GetBandwidth(OpusEncInst* inst) {
 }

 int16_t WebRtcOpus_SetBandwidth(OpusEncInst* inst, int32_t bandwidth) {
+  // RingRTC change to log opus setters
+  RTC_LOG(LS_INFO) << "WebRtcOpus_SetBandwidth " << bandwidth;
  if (inst) {
    return ENCODER_CTL(inst, OPUS_SET_BANDWIDTH(bandwidth));
  } else {
--- a/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc
+++ b/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc
@ -20,6 +20,10 @@
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"

+// RingRTC change to add low bitrate redundancy
+#include "rtc_base/experiments/field_trial_parser.h"
+#include "system_wrappers/include/field_trial.h"
+
 namespace webrtc {
 static constexpr const int kRedMaxPacketSize =
    1 << 10;  // RED packets must be less than 1024 bytes to fit the 10 bit
@ -56,7 +60,12 @@ AudioEncoderCopyRed::AudioEncoderCopyRed(Config&& config,
    : speech_encoder_(std::move(config.speech_encoder)),
      primary_encoded_(0, kAudioMaxRtpPacketLen),
      max_packet_length_(kAudioMaxRtpPacketLen),
-      red_payload_type_(config.payload_type) {
+      red_payload_type_(config.payload_type),
+      // RingRTC change to add low bitrate redundancy
+      use_lbred_(false),
+      use_loss_primary_(true),
+      use_loss_secondary_(false),
+      secondary_encoded_(0, kAudioMaxRtpPacketLen) {
  RTC_CHECK(speech_encoder_) << "Speech encoder not provided.";

  auto number_of_redundant_encodings =
@ -66,10 +75,79 @@ AudioEncoderCopyRed::AudioEncoderCopyRed(Config&& config,
    redundant.second.EnsureCapacity(kAudioMaxRtpPacketLen);
    redundant_encodings_.push_front(std::move(redundant));
  }
+
+  // RingRTC change to add low bitrate redundancy
+  ConfigureLBRedExperiment();
 }

 AudioEncoderCopyRed::~AudioEncoderCopyRed() = default;

+// RingRTC change to add low bitrate redundancy
+void AudioEncoderCopyRed::ConfigureLBRedExperiment() {
+  constexpr char kFieldTrialName[] = "RingRTC-Audio-LBRed-For-Opus";
+
+  if (field_trial::IsEnabled(kFieldTrialName)) {
+    FieldTrialFlag enabled("Enabled", false);
+
+    // Default values are from the best results during testing.
+    FieldTrialParameter<bool> cbr("cbr", true);
+    FieldTrialParameter<bool> dtx("dtx", false);
+    FieldTrialConstrained<int> complexity("complexity", 4, 0, 10);
+    FieldTrialConstrained<int> bandwidth("bandwidth", 1103, -1000, 1105);
+    FieldTrialConstrained<int> bitrate("bitrate", 10000, 6000, 40000);
+    FieldTrialConstrained<int> ptime("ptime", 60, 20, 120);
+    FieldTrialParameter<bool> loss_pri("loss_pri", true);
+    FieldTrialParameter<bool> loss_sec("loss_sec", false);
+    FieldTrialConstrained<int> bitrate_pri("bitrate_pri", 22000, 6000, 40000);
+
+    ParseFieldTrial(
+        {&enabled,&cbr,&dtx,&complexity,&bandwidth,
+         &bitrate,&ptime,&loss_pri,&loss_sec,&bitrate_pri},
+        field_trial::FindFullName(kFieldTrialName));
+
+    RTC_LOG(LS_WARNING) << "ConfigureLBRedExperiment:"
+                        << " cbr: " << cbr.Get()
+                        << ", dtx: " << dtx.Get()
+                        << ", complexity: " << complexity.Get()
+                        << ", bandwidth: " << bandwidth.Get()
+                        << ", bitrate: " << bitrate.Get()
+                        << ", ptime: " << ptime.Get()
+                        << ", loss_pri: " << loss_pri.Get()
+                        << ", loss_sec: " << loss_sec.Get()
+                        << ", bitrate_pri: " << bitrate_pri.Get();
+
+    use_lbred_ = true;
+    use_loss_primary_ = loss_pri.Get();
+    use_loss_secondary_ = loss_sec.Get();
+    bitrate_primary_ = bitrate_pri.Get();
+
+    AudioEncoderOpusConfig config;
+    constexpr int opus_payload_type = 102;
+
+    speech_encoder_secondary_ = std::make_unique<AudioEncoderOpusImpl>(config, opus_payload_type);
+
+    webrtc::AudioEncoder::Config config_secondary;
+    config_secondary.enable_cbr = cbr.Get();
+    config_secondary.enable_dtx = dtx.Get();
+    config_secondary.complexity = complexity.Get();
+    config_secondary.bandwidth = bandwidth.Get();
+    config_secondary.initial_bitrate_bps = bitrate.Get();
+    config_secondary.initial_packet_size_ms = ptime.Get();
+
+    // Fields that don't change for redundancy.
+    config_secondary.min_bitrate_bps = config_secondary.initial_bitrate_bps;
+    config_secondary.max_bitrate_bps = config_secondary.initial_bitrate_bps;
+    config_secondary.min_packet_size_ms = config_secondary.initial_packet_size_ms;
+    config_secondary.max_packet_size_ms = config_secondary.initial_packet_size_ms;
+    config_secondary.enable_fec = false;
+    config_secondary.adaptation = 0;
+
+    speech_encoder_secondary_->Configure(config_secondary);
+
+    last_packet_speech_ = false;
+  }
+}
+
 int AudioEncoderCopyRed::SampleRateHz() const {
  return speech_encoder_->SampleRateHz();
 }
@ -104,6 +182,63 @@ AudioEncoder::EncodedInfo AudioEncoderCopyRed::EncodeImpl(
  RTC_CHECK(info.redundant.empty()) << "Cannot use nested redundant encoders.";
  RTC_DCHECK_EQ(primary_encoded_.size(), info.encoded_bytes);

+  // RingRTC change to add low bitrate redundancy
+  bool use_secondary = false;
+
+  if (info.send_even_if_empty) {
+    RTC_LOG(LS_VERBOSE) << "info encoded_bytes: " << info.encoded_bytes
+                        << ", encoded_timestamp: " << info.encoded_timestamp
+                        << ", payload_type: " << info.payload_type
+                        << ", speech: " << info.speech
+                        << ", encoder_type: " << info.encoder_type;
+  }
+
+  // We will pre-fill the buffers of the secondary encoder every time. This
+  // function is called every 10ms, so the encoder needs to be ready for the
+  // actual encoding when a complete packet is collected. If it turns out
+  // that the primary did not encode speech, the secondary encoder will be
+  // cleared.
+
+  EncodedInfo info_secondary;
+
+  if (use_lbred_) {
+    // The secondary encoder is enabled.
+    secondary_encoded_.Clear();
+
+    if (info.send_even_if_empty) {
+      // The primary encoder has completed an encoding (N * 10ms).
+
+      // We only want to encode with the secondary when the primary encoder
+      // detects speech OR the last packet was speech and the current primary
+      // encoding includes at least _some_ speech.
+      if (info.speech || (last_packet_speech_ && info.encoded_bytes > 2)) {
+        // We have the final primary encoding AND it is speech.
+        info_secondary = speech_encoder_secondary_->Encode(rtp_timestamp, audio, &secondary_encoded_);
+        if (info.send_even_if_empty != info_secondary.send_even_if_empty) {
+          // This should currently be impossible, but check for now.
+          RTC_LOG(LS_ERROR) << "Primary and secondary encoders are NOT IN SYNC!";
+        } else {
+          use_secondary = true;
+
+          RTC_LOG(LS_VERBOSE) << "info_secondary encoded_bytes: " << info_secondary.encoded_bytes
+                              << ", encoded_timestamp: " << info_secondary.encoded_timestamp
+                              << ", payload_type: " << info_secondary.payload_type
+                              << ", speech: " << info_secondary.speech
+                              << ", encoder_type: " << info_secondary.encoder_type;
+        }
+      } else {
+        // We have the final primary encoding AND it is NOT speech. Clear the
+        // secondary encoder to and be ready for the next packet.
+        speech_encoder_secondary_->Clear();
+      }
+
+      last_packet_speech_ = info.speech;
+    } else {
+      // Pre-fill the secondary encoder's buffer to be ready for encoding.
+      info_secondary = speech_encoder_secondary_->Encode(rtp_timestamp, audio, &secondary_encoded_);
+    }
+  }
+
  if (info.encoded_bytes == 0 || info.encoded_bytes >= kRedMaxPacketSize) {
    return info;
  }
@ -170,10 +305,23 @@ AudioEncoder::EncodedInfo AudioEncoderCopyRed::EncodeImpl(
    rit->second.SetData(next->second);
  }
  it = redundant_encodings_.begin();
+
+  // RingRTC change to add low bitrate redundancy
+  if (use_lbred_) {
+    if (use_secondary) {
+      // Store the secondary encoder's result as redundant data.
+      if (it != redundant_encodings_.end()) {
+        it->first = info_secondary;
+        it->second.SetData(secondary_encoded_);
+      }
+    }
+  } else {
+    // Store the primary encoder's result as redundant data.
    if (it != redundant_encodings_.end()) {
      it->first = info;
      it->second.SetData(primary_encoded_);
    }
+  }

  // Update main EncodedInfo.
  info.payload_type = red_payload_type_;
@ -183,6 +331,10 @@ AudioEncoder::EncodedInfo AudioEncoderCopyRed::EncodeImpl(

 void AudioEncoderCopyRed::Reset() {
  speech_encoder_->Reset();
+  // RingRTC change to add low bitrate redundancy
+  if (use_lbred_) {
+    speech_encoder_secondary_->Reset();
+  }
  auto number_of_redundant_encodings = redundant_encodings_.size();
  redundant_encodings_.clear();
  for (size_t i = 0; i < number_of_redundant_encodings; i++) {
@ -224,9 +376,16 @@ void AudioEncoderCopyRed::DisableAudioNetworkAdaptor() {

 void AudioEncoderCopyRed::OnReceivedUplinkPacketLossFraction(
    float uplink_packet_loss_fraction) {
+  // RingRTC change to add low bitrate redundancy
+  if (use_loss_primary_) {
    speech_encoder_->OnReceivedUplinkPacketLossFraction(
        uplink_packet_loss_fraction);
  }
+  if (use_loss_secondary_) {
+    speech_encoder_secondary_->OnReceivedUplinkPacketLossFraction(
+        uplink_packet_loss_fraction);
+  }
+}

 void AudioEncoderCopyRed::OnReceivedUplinkBandwidth(
    int target_audio_bitrate_bps,
@ -271,7 +430,19 @@ AudioEncoderCopyRed::ReclaimContainedEncoders() {

 // RingRTC change to configure opus (the only codec we use RED with)
 bool AudioEncoderCopyRed::Configure(const webrtc::AudioEncoder::Config& config) {
+  if (use_lbred_) {
+    webrtc::AudioEncoder::Config new_config = config;
+
+    // Override some configuration parameters if using LBRED.
+    new_config.initial_bitrate_bps = bitrate_primary_;
+    new_config.min_bitrate_bps = bitrate_primary_;
+    new_config.max_bitrate_bps = bitrate_primary_;
+    new_config.enable_fec = false;
+
+    return speech_encoder_->Configure(new_config);
+  } else {
    return speech_encoder_->Configure(config);
  }
+}

 }  // namespace webrtc
--- a/modules/audio_coding/codecs/red/audio_encoder_copy_red.h
+++ b/modules/audio_coding/codecs/red/audio_encoder_copy_red.h
@ -25,6 +25,9 @@
 #include "api/units/time_delta.h"
 #include "rtc_base/buffer.h"

+// RingRTC change to add low bitrate redundancy
+#include "modules/audio_coding/codecs/opus/audio_encoder_opus.h"
+
 namespace webrtc {

 // This class implements redundant audio coding as described in
@ -98,6 +101,16 @@ class AudioEncoderCopyRed final : public AudioEncoder {
  size_t max_packet_length_;
  int red_payload_type_;
  std::list<std::pair<EncodedInfo, rtc::Buffer>> redundant_encodings_;
+
+  // RingRTC change to add low bitrate redundancy
+  void ConfigureLBRedExperiment();
+  bool use_lbred_;
+  bool use_loss_primary_;
+  bool use_loss_secondary_;
+  int bitrate_primary_;
+  std::unique_ptr<AudioEncoderOpusImpl> speech_encoder_secondary_;
+  rtc::Buffer secondary_encoded_;
+  bool last_packet_speech_;
 };

 }  // namespace webrtc
--- a/ringrtc/rffi/src/peer_connection.cc
+++ b/ringrtc/rffi/src/peer_connection.cc
@ -21,6 +21,7 @@
 #include "rtc_base/message_digest.h"
 #include "rtc_base/string_encode.h"
 #include "rtc_base/third_party/base64/base64.h"
+#include "system_wrappers/include/field_trial.h"

 #include <algorithm>
 #include <string>
@ -296,6 +297,12 @@ Rust_sessionDescriptionFromV4(bool offer,
  auto opus_red = cricket::CreateAudioCodec(OPUS_RED_PT, cricket::kRedCodecName, 48000, 2);
  opus_red.SetParam("", std::to_string(OPUS_PT) + "/" + std::to_string(OPUS_PT));

+  // If the LBRED field trial is enabled, force RED.
+  constexpr char kFieldTrialName[] = "RingRTC-Audio-LBRed-For-Opus";
+  if (field_trial::IsEnabled(kFieldTrialName)) {
+    enable_red_audio = true;
+  }
+
  if (enable_red_audio) {
    // Add RED before Opus to use it by default when sending.
    audio->AddCodec(opus_red);