Add more audio control and safe defaults

2025-05-12 21:30:45 +01:00 · 2023-08-23 10:42:30 -07:00 · 2023-08-23 10:42:30 -07:00 · 7da0a87124
commit 7da0a87124
parent 968d756463
15 changed files with 113 additions and 38 deletions
--- a/.gitignore
+++ b/.gitignore
@ -71,4 +71,5 @@
 /x86-generic_out/
 /xcodebuild
 /.vscode
+/.idea
 !webrtc/*
--- a/api/audio_codecs/audio_encoder.h
+++ b/api/audio_codecs/audio_encoder.h
@ -119,8 +119,17 @@ class AudioEncoder {
  // Very OPUS-specific
  struct Config {
    // AKA ptime or frame size
-    // One of 10, 20, 40, 60, 120
-    uint32_t packet_size_ms = 20;
+    // One of 10, 20, 40, 60, 80, 100, 120
+    int32_t initial_packet_size_ms = 60;
+    int32_t min_packet_size_ms = 60;
+    int32_t max_packet_size_ms = 60;
+
+    // 500 to 192000
+    // Start at initial_bitrate_bps, and let the BWE and bitrate allocator
+    // move up to max_bitrate_bps or down to min_bitrate_bps.
+    int32_t initial_bitrate_bps = 32000;
+    int32_t min_bitrate_bps = 32000;
+    int32_t max_bitrate_bps = 32000;

    // 1101 = OPUS_BANDWIDTH_NARROWBAND
    // 1102 = OPUS_BADWIDTH_MEDIUMBAND
@ -128,20 +137,21 @@ class AudioEncoder {
    // 1104 = OPUS_BANDWIDTH_SUPERWIDEBAND
    // 1105 = OPUS_BANDWIDTH_FULLBAND
    int32_t bandwidth = -1000;  // OPUS_AUTO
-    // 500 to 192000
-    // Start at start_bitrate_bps, and let the BWE and bitrate allocator
-    // move up to max_bitrate_bps or down to min_bitrate_bps.
-    int32_t start_bitrate_bps = 40000;
-    int32_t min_bitrate_bps = 16000;
-    int32_t max_bitrate_bps = 40000;
+
    // 0 (least complex) to 9 (most complex)
    int32_t complexity = 9;
-    // 0 = CBR; 1 = VBR
-    int32_t enable_vbr = 0;
-    // 0 = disable; 1 = enable
-    int32_t enable_dtx = 0;
-    // 0 = disable; 1 = enable
-    int32_t enable_fec = 1;
+
+    // Adaptation method to use, 0 to disable
+    int32_t adaptation = 0;
+
+    // CBR is used by default
+    bool enable_cbr = true;
+
+    // DTX is enabled by default
+    bool enable_dtx = true;
+
+    // In-band FEC is enabled by default
+    bool enable_fec = true;
  };

  virtual ~AudioEncoder() = default;
--- a/audio/audio_receive_stream.cc
+++ b/audio/audio_receive_stream.cc
@ -71,6 +71,8 @@ std::unique_ptr<voe::ChannelReceiveInterface> CreateChannelReceive(
      config.rtcp_send_transport, event_log, config.rtp.local_ssrc,
      config.rtp.remote_ssrc, config.jitter_buffer_max_packets,
      config.jitter_buffer_fast_accelerate, config.jitter_buffer_min_delay_ms,
+      // RingRTC change to configure the RTCP report interval.
+      config.rtcp_report_interval_ms,
      config.enable_non_sender_rtt, config.decoder_factory,
      config.codec_pair_id, std::move(config.frame_decryptor),
      config.crypto_options, std::move(config.frame_transformer));
--- a/audio/audio_send_stream.cc
+++ b/audio/audio_send_stream.cc
@ -922,9 +922,10 @@ void AudioSendStream::ConfigureEncoder(const webrtc::AudioEncoder::Config& confi

  // This makes it so that if BWE changes cause us to change the bitrate,
  // it doesn't actually change.
-  config_.min_bitrate_bps = config.min_bitrate_bps;
-  config_.max_bitrate_bps = config.max_bitrate_bps;
-  frame_length_range_ = {{TimeDelta::Millis(config.packet_size_ms), TimeDelta::Millis(config.packet_size_ms)}};
+  config_.min_bitrate_bps = config.initial_bitrate_bps;
+  config_.max_bitrate_bps = config.initial_bitrate_bps;
+  frame_length_range_ = {{TimeDelta::Millis(config.initial_packet_size_ms),
+                          TimeDelta::Millis(config.initial_packet_size_ms)}};
  channel_send_->CallEncoder([&](AudioEncoder* encoder) {
    if (!encoder->Configure(config)) {
      RTC_LOG(LS_INFO) << "Failed to configure audio send stream";
--- a/audio/channel_receive.cc
+++ b/audio/channel_receive.cc
@ -97,6 +97,8 @@ class ChannelReceive : public ChannelReceiveInterface,
      size_t jitter_buffer_max_packets,
      bool jitter_buffer_fast_playout,
      int jitter_buffer_min_delay_ms,
+      // RingRTC change to configure the RTCP report interval.
+      int rtcp_report_interval_ms,
      bool enable_non_sender_rtt,
      rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,
      absl::optional<AudioCodecPairId> codec_pair_id,
@ -535,6 +537,8 @@ ChannelReceive::ChannelReceive(
    size_t jitter_buffer_max_packets,
    bool jitter_buffer_fast_playout,
    int jitter_buffer_min_delay_ms,
+    // RingRTC change to configure the RTCP report interval.
+    int rtcp_report_interval_ms,
    bool enable_non_sender_rtt,
    rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,
    absl::optional<AudioCodecPairId> codec_pair_id,
@ -586,6 +590,8 @@ ChannelReceive::ChannelReceive(
  configuration.local_media_ssrc = local_ssrc;
  configuration.rtcp_packet_type_counter_observer = this;
  configuration.non_sender_rtt_measurement = enable_non_sender_rtt;
+  // RingRTC change to configure the RTCP report interval.
+  configuration.rtcp_report_interval_ms = rtcp_report_interval_ms;

  if (frame_transformer)
    InitFrameTransformerDelegate(std::move(frame_transformer));
@ -1108,6 +1114,8 @@ std::unique_ptr<ChannelReceiveInterface> CreateChannelReceive(
    size_t jitter_buffer_max_packets,
    bool jitter_buffer_fast_playout,
    int jitter_buffer_min_delay_ms,
+    // RingRTC change to configure the RTCP report interval.
+    int rtcp_report_interval_ms,
    bool enable_non_sender_rtt,
    rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,
    absl::optional<AudioCodecPairId> codec_pair_id,
@ -1118,6 +1126,8 @@ std::unique_ptr<ChannelReceiveInterface> CreateChannelReceive(
      clock, neteq_factory, audio_device_module, rtcp_send_transport,
      rtc_event_log, local_ssrc, remote_ssrc, jitter_buffer_max_packets,
      jitter_buffer_fast_playout, jitter_buffer_min_delay_ms,
+      // RingRTC change to configure the RTCP report interval.
+      rtcp_report_interval_ms,
      enable_non_sender_rtt, decoder_factory, codec_pair_id,
      std::move(frame_decryptor), crypto_options, std::move(frame_transformer));
 }
--- a/audio/channel_receive.h
+++ b/audio/channel_receive.h
@ -181,6 +181,8 @@ std::unique_ptr<ChannelReceiveInterface> CreateChannelReceive(
    size_t jitter_buffer_max_packets,
    bool jitter_buffer_fast_playout,
    int jitter_buffer_min_delay_ms,
+    // RingRTC change to configure the RTCP report interval.
+    int rtcp_report_interval_ms,
    bool enable_non_sender_rtt,
    rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,
    absl::optional<AudioCodecPairId> codec_pair_id,
--- a/call/audio_receive_stream.h
+++ b/call/audio_receive_stream.h
@ -130,6 +130,9 @@ class AudioReceiveStreamInterface : public MediaReceiveStreamInterface {
    bool jitter_buffer_fast_accelerate = false;
    int jitter_buffer_min_delay_ms = 0;

+    // RingRTC change to configure the RTCP report interval.
+    int rtcp_report_interval_ms = 5000;
+
    // Identifier for an A/V synchronization group. Empty string to disable.
    // TODO(pbos): Synchronize streams in a sync group, not just one video
    // stream to one audio stream. Tracked by issue webrtc:4762.
--- a/media/engine/webrtc_voice_engine.cc
+++ b/media/engine/webrtc_voice_engine.cc
@ -279,6 +279,8 @@ webrtc::AudioReceiveStreamInterface::Config BuildReceiveStreamConfig(
    size_t jitter_buffer_max_packets,
    bool jitter_buffer_fast_accelerate,
    int jitter_buffer_min_delay_ms,
+    // RingRTC change to configure the RTCP report interval.
+    int rtcp_report_interval_ms,
    rtc::scoped_refptr<webrtc::FrameDecryptorInterface> frame_decryptor,
    const webrtc::CryptoOptions& crypto_options,
    rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer) {
@ -297,6 +299,8 @@ webrtc::AudioReceiveStreamInterface::Config BuildReceiveStreamConfig(
  config.jitter_buffer_max_packets = jitter_buffer_max_packets;
  config.jitter_buffer_fast_accelerate = jitter_buffer_fast_accelerate;
  config.jitter_buffer_min_delay_ms = jitter_buffer_min_delay_ms;
+  // RingRTC change to configure the RTCP report interval.
+  config.rtcp_report_interval_ms = rtcp_report_interval_ms;
  config.frame_decryptor = std::move(frame_decryptor);
  config.crypto_options = crypto_options;
  config.frame_transformer = std::move(frame_transformer);
@ -2307,7 +2311,9 @@ bool WebRtcVoiceReceiveChannel::AddRecvStream(const StreamParams& sp) {
      sp.stream_ids(), recv_rtp_extensions_, this, engine()->decoder_factory_,
      decoder_map_, codec_pair_id_, engine()->audio_jitter_buffer_max_packets_,
      engine()->audio_jitter_buffer_fast_accelerate_,
-      engine()->audio_jitter_buffer_min_delay_ms_, unsignaled_frame_decryptor_,
+      engine()->audio_jitter_buffer_min_delay_ms_,
+      // RingRTC change to configure the RTCP report interval.
+      audio_config_.rtcp_report_interval_ms, unsignaled_frame_decryptor_,
      crypto_options_, unsignaled_frame_transformer_);

  recv_streams_.insert(std::make_pair(
--- a/modules/audio_coding/codecs/opus/audio_encoder_opus.cc
+++ b/modules/audio_coding/codecs/opus/audio_encoder_opus.cc
@ -994,15 +994,20 @@ bool AudioEncoderOpusImpl::Configure(const webrtc::AudioEncoder::Config& config)
  // This sets next_frame_length_ms_ until the next time audio is sampled,
  // and then it sets config_.frame_size_ms as well.
  // It needs to be delayed to avoid a CHECK in Encode.
-  SetFrameLength(config.packet_size_ms);
+  SetFrameLength(config.initial_packet_size_ms);

  // I don't think any of the below are necessary, but the above is, so we might as well set these.
-  config_.bitrate_bps = config.start_bitrate_bps;
-  config_.fec_enabled = config.enable_fec == 1;
-  config_.cbr_enabled = config.enable_vbr == 0;
+  config_.bitrate_bps = config.initial_bitrate_bps;
+  config_.fec_enabled = config.enable_fec;
+  config_.cbr_enabled = config.enable_cbr;
  config_.complexity = config.complexity;
  config_.low_rate_complexity = config_.low_rate_complexity;
-  config_.dtx_enabled = config.enable_dtx == 1;
+  config_.dtx_enabled = config.enable_dtx;
+
+  if (config.adaptation > 0) {
+    RTC_LOG(LS_WARNING) << "ringrtc_adapt!,audio,0," << config.initial_bitrate_bps
+                        << "," << config.initial_packet_size_ms;
+  }

  if (WebRtcOpus_SetBandwidth(inst_, config.bandwidth) == -1) {
    RTC_LOG(LS_WARNING) << "Failed to configure OPUS to bandwidth=" << config.bandwidth;
@ -1010,11 +1015,11 @@ bool AudioEncoderOpusImpl::Configure(const webrtc::AudioEncoder::Config& config)
  }
  RTC_LOG(LS_INFO) << "Successfully configured OPUS to bandwidth=" << config.bandwidth;

-  if (WebRtcOpus_SetBitRate(inst_, config.start_bitrate_bps) == -1) {
-    RTC_LOG(LS_WARNING) << "Failed to configure OPUS to bitrate_bps=" << config.start_bitrate_bps;
+  if (WebRtcOpus_SetBitRate(inst_, config.initial_bitrate_bps) == -1) {
+    RTC_LOG(LS_WARNING) << "Failed to configure OPUS to bitrate_bps=" << config.initial_bitrate_bps;
    return false;
  }
-  RTC_LOG(LS_INFO) << "Successfully configured OPUS to bitrate_bps=" << config.start_bitrate_bps;
+  RTC_LOG(LS_INFO) << "Successfully configured OPUS to bitrate_bps=" << config.initial_bitrate_bps;

  if (WebRtcOpus_SetComplexity(inst_, config.complexity) == -1) {
    RTC_LOG(LS_WARNING) << "Failed to configure OPUS to complexity=" << config.complexity;
@ -1022,7 +1027,7 @@ bool AudioEncoderOpusImpl::Configure(const webrtc::AudioEncoder::Config& config)
  }
  RTC_LOG(LS_INFO) << "Successfully configured OPUS to complexity=" << config.complexity;

-  if (config.enable_fec == 1) {
+  if (config.enable_fec) {
    if (WebRtcOpus_EnableFec(inst_) == -1) {
      RTC_LOG(LS_WARNING) << "Failed to configure OPUS to enable_fec=" << config.enable_fec;
      return false;
@ -1035,7 +1040,7 @@ bool AudioEncoderOpusImpl::Configure(const webrtc::AudioEncoder::Config& config)
  }
  RTC_LOG(LS_INFO) << "Successfully configured OPUS to enable_fec=" << config.enable_fec;

-  if (config.enable_dtx == 1) {
+  if (config.enable_dtx) {
    if (WebRtcOpus_EnableDtx(inst_) == -1) {
      RTC_LOG(LS_WARNING) << "Failed to configure OPUS to enable_dtx=" << config.enable_dtx;
      return false;
@ -1048,14 +1053,14 @@ bool AudioEncoderOpusImpl::Configure(const webrtc::AudioEncoder::Config& config)
  }
  RTC_LOG(LS_INFO) << "Successfully configured OPUS to enable_dtx=" << config.enable_dtx;

-  if (config.enable_vbr == 0) {
+  if (config.enable_cbr) {
    if (WebRtcOpus_EnableCbr(inst_) == -1) {
-      RTC_LOG(LS_WARNING) << "Failed to configure OPUS to enable_vbr=" << config.enable_vbr;
+      RTC_LOG(LS_WARNING) << "Failed to configure OPUS to enable_cbr=" << config.enable_cbr;
      return false;
    }
  } else {
    if (WebRtcOpus_DisableCbr(inst_) == -1) {
-      RTC_LOG(LS_WARNING) << "Failed to configure OPUS to enable_vbr=" << config.enable_vbr;
+      RTC_LOG(LS_WARNING) << "Failed to configure OPUS to enable_cbr=" << config.enable_cbr;
      return false;
    }
  }
@ -1063,5 +1068,4 @@ bool AudioEncoderOpusImpl::Configure(const webrtc::AudioEncoder::Config& config)
  return true;
 }

-
 }  // namespace webrtc
--- a/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc
+++ b/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc
@ -269,4 +269,9 @@ AudioEncoderCopyRed::ReclaimContainedEncoders() {
  return rtc::ArrayView<std::unique_ptr<AudioEncoder>>(&speech_encoder_, 1);
 }

+// RingRTC change to configure opus (the only codec we use RED with)
+bool AudioEncoderCopyRed::Configure(const webrtc::AudioEncoder::Config& config) {
+  return speech_encoder_->Configure(config);
+}
+
 }  // namespace webrtc
--- a/modules/audio_coding/codecs/red/audio_encoder_copy_red.h
+++ b/modules/audio_coding/codecs/red/audio_encoder_copy_red.h
@ -84,6 +84,9 @@ class AudioEncoderCopyRed final : public AudioEncoder {
  rtc::ArrayView<std::unique_ptr<AudioEncoder>> ReclaimContainedEncoders()
      override;

+  // RingRTC change to configure opus (the only codec we use RED with)
+  bool Configure(const webrtc::AudioEncoder::Config& config) override;
+
 protected:
  EncodedInfo EncodeImpl(uint32_t rtp_timestamp,
                         rtc::ArrayView<const int16_t> audio,
--- a/ringrtc/rffi/api/peer_connection_factory.h
+++ b/ringrtc/rffi/api/peer_connection_factory.h
@ -109,6 +109,7 @@ RUSTEXPORT webrtc::PeerConnectionInterface* Rust_createPeerConnection(
  webrtc::rffi::PeerConnectionObserverRffi* observer_borrowed,
  RffiPeerConnectionKind kind,
  int audio_jitter_buffer_max_packets,
+  int audio_rtcp_report_interval_ms,
  RffiIceServer ice_server,
  webrtc::AudioTrackInterface* outgoing_audio_track_borrowed_rc,
  webrtc::VideoTrackInterface* outgoing_video_track_borrowed_rc);
--- a/ringrtc/rffi/api/peer_connection_intf.h
+++ b/ringrtc/rffi/api/peer_connection_intf.h
@ -100,6 +100,7 @@ RUSTEXPORT webrtc::SessionDescriptionInterface*
 Rust_sessionDescriptionFromV4(bool offer,
                              const RffiConnectionParametersV4* v4_borrowed,
                              bool enable_tcc_audio,
+                              bool enable_red_audio,
                              bool enable_vp9);

 RUSTEXPORT void
--- a/ringrtc/rffi/src/peer_connection.cc
+++ b/ringrtc/rffi/src/peer_connection.cc
@ -39,6 +39,7 @@ int TX_TIME_OFFSET_EXT_ID = 13;
 // 101 used by connection.rs
 int DATA_PT = 101;
 int OPUS_PT = 102;
+int OPUS_RED_PT = 105;
 int VP8_PT = 108;
 int VP8_RTX_PT = 118;
 int VP9_PT = 109;
@ -246,12 +247,14 @@ RUSTEXPORT webrtc::SessionDescriptionInterface*
 Rust_sessionDescriptionFromV4(bool offer,
                              const RffiConnectionParametersV4* v4_borrowed,
                              bool enable_tcc_audio,
+                              bool enable_red_audio,
                              bool enable_vp9) {
  // Major changes from the default WebRTC behavior:
  // 1. We remove all codecs except Opus, VP8, and VP9
  // 2. We remove all header extensions except for transport-cc, video orientation,
  //    and abs send time.
  // 3. Opus CBR and DTX is enabled.
+  // 4. RED is enabled for audio.

  // For some reason, WebRTC insists that the video SSRCs for one side don't 
  // overlap with SSRCs from the other side.  To avoid potential problems, we'll give the
@ -289,13 +292,22 @@ Rust_sessionDescriptionFromV4(bool offer,
  auto video = std::make_unique<cricket::VideoContentDescription>();
  set_rtp_params(video.get());

+  // Turn on the RED "meta codec" for Opus redundancy.
+  auto opus_red = cricket::CreateAudioCodec(OPUS_RED_PT, cricket::kRedCodecName, 48000, 2);
+  opus_red.SetParam("", std::to_string(OPUS_PT) + "/" + std::to_string(OPUS_PT));
+
+  if (enable_red_audio) {
+    // Add RED before Opus to use it by default when sending.
+    audio->AddCodec(opus_red);
+  }
+
  auto opus = cricket::CreateAudioCodec(OPUS_PT, cricket::kOpusCodecName, 48000, 2);
  // These are the current defaults for WebRTC
  // We set them explicitly to avoid having the defaults change on us.
  opus.SetParam("stereo", "0");  // "1" would cause non-VOIP mode to be used
-  opus.SetParam("ptime", "20");
-  opus.SetParam("minptime", "10");
-  opus.SetParam("maxptime", "120");
+  opus.SetParam("ptime", "60");
+  opus.SetParam("minptime", "60");
+  opus.SetParam("maxptime", "60");
  opus.SetParam("useinbandfec", "1");
  // This is not a default. We enable this to help reduce bandwidth because we
  // are using CBR.
@ -306,6 +318,11 @@ Rust_sessionDescriptionFromV4(bool offer,
  opus.AddFeedbackParam(cricket::FeedbackParam(cricket::kRtcpFbParamTransportCc, cricket::kParamValueEmpty));
  audio->AddCodec(opus);

+  if (!enable_red_audio) {
+    // Add RED after Opus so that RED packets can at least be decoded properly if received.
+    audio->AddCodec(opus_red);
+  }
+
  auto add_video_feedback_params = [] (cricket::VideoCodec* video_codec) {
    video_codec->AddFeedbackParam(cricket::FeedbackParam(cricket::kRtcpFbParamTransportCc, cricket::kParamValueEmpty));
    video_codec->AddFeedbackParam(cricket::FeedbackParam(cricket::kRtcpFbParamCcm, cricket::kRtcpFbCcmParamFir));
@ -477,9 +494,9 @@ CreateSessionDescriptionForGroupCall(bool local,
  // These are the current defaults for WebRTC
  // We set them explicitly to avoid having the defaults change on us.
  opus.SetParam("stereo", "0");  // "1" would cause non-VOIP mode to be used
-  opus.SetParam("ptime", "20");
-  opus.SetParam("minptime", "10");
-  opus.SetParam("maxptime", "120");
+  opus.SetParam("ptime", "60");
+  opus.SetParam("minptime", "60");
+  opus.SetParam("maxptime", "60");
  opus.SetParam("useinbandfec", "1");
  // This is not a default. We enable this to help reduce bandwidth because we
  // are using CBR.
@ -490,6 +507,13 @@ CreateSessionDescriptionForGroupCall(bool local,
  opus.AddFeedbackParam(cricket::FeedbackParam(cricket::kRtcpFbParamTransportCc, cricket::kParamValueEmpty));
  audio->AddCodec(opus);

+  // Turn on the RED "meta codec" for Opus redundancy.
+  auto opus_red = cricket::CreateAudioCodec(OPUS_RED_PT, cricket::kRedCodecName, 48000, 2);
+  opus_red.SetParam("", std::to_string(OPUS_PT) + "/" + std::to_string(OPUS_PT));
+
+  // Add RED after Opus so that RED packets can at least be decoded properly if received.
+  audio->AddCodec(opus_red);
+
  auto add_video_feedback_params = [] (cricket::VideoCodec* video_codec) {
    video_codec->AddFeedbackParam(cricket::FeedbackParam(cricket::kRtcpFbParamTransportCc, cricket::kParamValueEmpty));
    video_codec->AddFeedbackParam(cricket::FeedbackParam(cricket::kRtcpFbParamCcm, cricket::kRtcpFbCcmParamFir));
--- a/ringrtc/rffi/src/peer_connection_factory.cc
+++ b/ringrtc/rffi/src/peer_connection_factory.cc
@ -365,6 +365,7 @@ RUSTEXPORT PeerConnectionInterface* Rust_createPeerConnection(
    PeerConnectionObserverRffi* observer_borrowed,
    RffiPeerConnectionKind kind,
    int audio_jitter_buffer_max_packets,
+    int audio_rtcp_report_interval_ms,
    RffiIceServer ice_server,
    webrtc::AudioTrackInterface* outgoing_audio_track_borrowed_rc,
    webrtc::VideoTrackInterface* outgoing_video_track_borrowed_rc) {
@ -380,6 +381,7 @@ RUSTEXPORT PeerConnectionInterface* Rust_createPeerConnection(
    config.tcp_candidate_policy = PeerConnectionInterface::kTcpCandidatePolicyEnabled;
  }
  config.audio_jitter_buffer_max_packets = audio_jitter_buffer_max_packets;
+  config.set_audio_rtcp_report_interval_ms(audio_rtcp_report_interval_ms);
  config.sdp_semantics = SdpSemantics::kPlanB_DEPRECATED;
  if (ice_server.urls_size > 0) {
    webrtc::PeerConnectionInterface::IceServer rtc_ice_server;