Use unified plan for group calls

2025-05-13 05:40:42 +01:00 · 2023-12-19 15:26:26 -05:00 · 2023-12-19 15:26:26 -05:00 · ba173479ef
commit ba173479ef
parent 4ba92580ff
15 changed files with 401 additions and 175 deletions
--- a/media/base/media_channel.h
+++ b/media/base/media_channel.h
@ -934,10 +934,7 @@ class VoiceMediaReceiveChannelInterface : public MediaReceiveChannelInterface {
  virtual void SetIncomingAudioMuted(uint32_t ssrc, bool muted) = 0;

  // RingRTC change to get audio levels
-  virtual void GetReceivedAudioLevels(
-      cricket::ReceivedAudioLevel* received_out,
-      size_t received_out_size,
-      size_t* received_size_out) = 0;
+  virtual absl::optional<cricket::ReceivedAudioLevel> GetReceivedAudioLevel() = 0;

  virtual bool GetStats(VoiceMediaReceiveInfo* stats, bool reset_legacy) = 0;
  virtual void SetReceiveNackEnabled(bool enabled) = 0;
--- a/media/engine/webrtc_voice_engine.cc
+++ b/media/engine/webrtc_voice_engine.cc
@ -2790,22 +2790,20 @@ void WebRtcVoiceReceiveChannel::SetIncomingAudioMuted(uint32_t ssrc, bool muted)
 }

 // RingRTC change to get audio levels
-void WebRtcVoiceReceiveChannel::GetReceivedAudioLevels(
-    cricket::ReceivedAudioLevel* received_out,
-    size_t received_out_size,
-    size_t* received_size_out) {
-  size_t received_size = 0;
-  for (const auto& kv : recv_streams_) {
-    if (received_size >= received_out_size) {
-      break;
-    }
-    received_out[received_size++] = cricket::ReceivedAudioLevel {
-        kv.first,
-        kv.second->GetAudioLevel()
-    };
+absl::optional<cricket::ReceivedAudioLevel> WebRtcVoiceReceiveChannel::GetReceivedAudioLevel() {
+  RTC_DCHECK_RUN_ON(worker_thread_);
+  if (recv_streams_.empty()) {
+    RTC_LOG(LS_WARNING)
+        << "Attempting to GetReceivedAudioLevel for channel with no receiving streams."
+        << " mid_=" << mid_;
+    return absl::nullopt;
  }

-  *received_size_out = received_size;
+  auto kv = recv_streams_.begin();
+  return cricket::ReceivedAudioLevel {
+      kv->first,
+      kv->second->GetAudioLevel()
+  };
 }

 }  // namespace cricket
--- a/media/engine/webrtc_voice_engine.h
+++ b/media/engine/webrtc_voice_engine.h
@ -442,10 +442,7 @@ class WebRtcVoiceReceiveChannel final
  void SetIncomingAudioMuted(uint32_t ssrc, bool muted) override;

  // RingRTC change to get audio levels
-  void GetReceivedAudioLevels(
-      cricket::ReceivedAudioLevel* received_out,
-      size_t received_out_size,
-      size_t* received_size_out) override;
+  absl::optional<cricket::ReceivedAudioLevel> GetReceivedAudioLevel() override;

 private:
  bool SetOptions(const AudioOptions& options);
--- a/pc/channel.cc
+++ b/pc/channel.cc
@ -971,17 +971,17 @@ void VoiceChannel::ConfigureEncoders(const webrtc::AudioEncoder::Config& config)
 }

 // RingRTC change to get audio levels
-void VoiceChannel::GetAudioLevels(
-    cricket::AudioLevel* captured_out,
-    cricket::ReceivedAudioLevel* received_out,
-    size_t received_out_size,
-    size_t* received_size_out) {
-  worker_thread()->BlockingCall([this, captured_out, received_out, received_out_size, received_size_out] {
+void VoiceChannel::GetCapturedAudioLevel(cricket::AudioLevel* captured_out) {
+  worker_thread()->BlockingCall([this, captured_out] {
    voice_media_send_channel()->GetCapturedAudioLevel(captured_out);
-    voice_media_receive_channel()->GetReceivedAudioLevels(received_out, received_out_size, received_size_out);
  });
 }

+// RingRTC change to get audio levels
+absl::optional<cricket::ReceivedAudioLevel> VoiceChannel::GetReceivedAudioLevel() {
+  return voice_media_receive_channel()->GetReceivedAudioLevel();
+}
+
 // RingRTC change to disable CNG for muted incoming streams.
 void VoiceChannel::SetIncomingAudioMuted(uint32_t ssrc, bool muted) {
  worker_thread()->BlockingCall([this, ssrc, muted] {
--- a/pc/channel.h
+++ b/pc/channel.h
@ -420,11 +420,8 @@ class VoiceChannel : public BaseChannel {
  void SetIncomingAudioMuted(uint32_t ssrc, bool muted);

  // RingRTC change to get audio levels
-  void GetAudioLevels(
-    cricket::AudioLevel* captured_out,
-    cricket::ReceivedAudioLevel* received_out,
-    size_t received_out_size,
-    size_t* received_size_out);
+  void GetCapturedAudioLevel(cricket::AudioLevel* captured_out);
+  absl::optional<cricket::ReceivedAudioLevel> GetReceivedAudioLevel();

 private:
  void InitCallback();
--- a/pc/peer_connection.cc
+++ b/pc/peer_connection.cc
@ -1758,10 +1758,17 @@ void PeerConnection::SetAudioRecording(bool recording) {

 // RingRTC change to disable CNG for muted incoming streams.
 void PeerConnection::SetIncomingAudioMuted(uint32_t ssrc, bool muted) {
-  auto* voice_channel = static_cast<cricket::VoiceChannel*>(rtp_manager()->GetAudioTransceiver()->internal()->channel());
-  if (voice_channel) {
+  auto ssrc_str = rtc::ToString(ssrc);
+  for (auto transceiver : rtp_manager()->transceivers()->List()) {
+    if (transceiver->media_type() != cricket::MEDIA_TYPE_AUDIO || transceiver->direction() != RtpTransceiverDirection::kRecvOnly) {
+      continue;
+    }
+
+    auto* voice_channel = static_cast<cricket::VoiceChannel*>(transceiver->internal()->channel());
+    if (voice_channel && transceiver->receiver()->stream_ids()[0] == ssrc_str) {
      voice_channel->SetIncomingAudioMuted(ssrc, muted);
    }
+  }
 }

 void PeerConnection::AddAdaptationResource(
@ -3096,7 +3103,12 @@ bool PeerConnection::ReceiveRtp(uint8_t pt, bool enable_incoming) {
 void PeerConnection::ConfigureAudioEncoders(const webrtc::AudioEncoder::Config& config) {
  int count = 0;
  for (const auto& transceiver : rtp_manager()->transceivers()->List()) {
-    if (transceiver->media_type() == cricket::MEDIA_TYPE_AUDIO) {
+    if (transceiver->media_type() != cricket::MEDIA_TYPE_AUDIO) {
+      continue;
+    }
+
+    if (transceiver->direction() == webrtc::RtpTransceiverDirection::kSendRecv ||
+        transceiver->direction() == webrtc::RtpTransceiverDirection::kSendOnly) {
      cricket::VoiceChannel* voice_channel = static_cast<cricket::VoiceChannel*>(transceiver->internal()->channel());
      voice_channel->ConfigureEncoders(config);
      count++;
@ -3113,13 +3125,47 @@ void PeerConnection::GetAudioLevels(cricket::AudioLevel* captured_out,
                                    cricket::ReceivedAudioLevel* received_out,
                                    size_t received_out_size,
                                    size_t* received_size_out) {
-  auto* voice_channel = static_cast<cricket::VoiceChannel*>(rtp_manager()->GetAudioTransceiver()->internal()->channel());
-  if (voice_channel) {
-    voice_channel->GetAudioLevels(captured_out, received_out, received_out_size, received_size_out);
-  } else {
  *captured_out = 0;
  *received_size_out = 0;
+
+  std::vector<cricket::VoiceChannel*> receiving_voice_channels;
+  auto transceivers = rtp_manager()->transceivers()->List();
+  for (auto transceiver : transceivers) {
+    if (transceiver->media_type() != cricket::MEDIA_TYPE_AUDIO) {
+      continue;
    }
+
+    auto is_send_recv = transceiver->direction() == RtpTransceiverDirection::kSendRecv;
+    if (is_send_recv || transceiver->direction() == RtpTransceiverDirection::kSendOnly) {
+      auto* voice_channel = static_cast<cricket::VoiceChannel*>(transceiver->internal()->channel());
+      if (voice_channel) {
+        voice_channel->GetCapturedAudioLevel(captured_out);
+      }
+    }
+    if (is_send_recv || transceiver->direction() == RtpTransceiverDirection::kRecvOnly) {
+      auto* voice_channel = static_cast<cricket::VoiceChannel*>(transceiver->internal()->channel());
+      if (voice_channel) {
+        receiving_voice_channels.push_back(voice_channel);
+      }
+    }
+  }
+
+  *received_size_out = worker_thread()->BlockingCall([received_out, received_out_size, receiving_voice_channels] {
+    size_t received_size = 0;
+
+    for (auto voice_channel : receiving_voice_channels) {
+      if (received_size == received_out_size) {
+        break;
+      }
+
+      auto audio_level = voice_channel->GetReceivedAudioLevel();
+      if (audio_level) {
+        received_out[received_size++] = *audio_level;
+      }
+    }
+
+    return received_size;
+  });
 }

 // RingRTC change to get upload bandwidth estimate
--- a/ringrtc/rffi/api/media.h
+++ b/ringrtc/rffi/api/media.h
@ -44,10 +44,6 @@ class VideoSource : public rtc::AdaptedVideoTrackSource {
 } // namespace rffi
 } // namespace webrtc

-// Parses track->id()
-// Returns 0 upon failure
-RUSTEXPORT uint32_t Rust_getTrackIdAsUint32(webrtc::MediaStreamTrackInterface* track_borrowed_rc);
-
 // Same as AudioTrack::set_enabled
 RUSTEXPORT void Rust_setAudioTrackEnabled(webrtc::AudioTrackInterface* track_borrowed_rc, bool);

--- a/ringrtc/rffi/api/peer_connection_intf.h
+++ b/ringrtc/rffi/api/peer_connection_intf.h
@ -14,6 +14,11 @@
 // TODO: Consider removing all these duplicative declarations.
 // It compiles without it.

+RUSTEXPORT bool
+Rust_updateTransceivers(webrtc::PeerConnectionInterface*                    peer_connection_borrowed_rc,
+                        uint32_t*                                           remote_demux_ids_data_borrowed,
+                        size_t                                              length);
+
 /**
 * Rust friendly wrapper around some webrtc::PeerConnectionInterface
 * methods
--- a/ringrtc/rffi/api/peer_connection_observer_intf.h
+++ b/ringrtc/rffi/api/peer_connection_observer_intf.h
@ -44,7 +44,7 @@ typedef struct {
  // Media events
  void (*onAddStream)(void* observer_borrowed, webrtc::MediaStreamInterface* stream_owned_rc);
  void (*onAddAudioRtpReceiver)(void* observer_borrowed, webrtc::MediaStreamTrackInterface* track_owned_rc);
-  void (*onAddVideoRtpReceiver)(void* observer_borrowed, webrtc::MediaStreamTrackInterface* track_owned_rc);
+  void (*onAddVideoRtpReceiver)(void* observer_borrowed, webrtc::MediaStreamTrackInterface* track_owned_rc, uint32_t demux_id);
  void (*onVideoFrame)(void* observer_borrowed, uint32_t track_id, RffiVideoFrameMetadata metadata, webrtc::VideoFrameBuffer* frame_buffer_owned_rc);

  // RTP data events
--- a/ringrtc/rffi/src/media.cc
+++ b/ringrtc/rffi/src/media.cc
@ -78,13 +78,6 @@ absl::optional<bool> VideoSource::needs_denoising() const {
  return absl::nullopt;
 }

-// Returns 0 upon failure
-RUSTEXPORT uint32_t Rust_getTrackIdAsUint32(webrtc::MediaStreamTrackInterface* track_borrowed_rc) {
-  uint32_t id = 0;
-  rtc::FromString(track_borrowed_rc->id(), &id);
-  return id;
-}
-
 RUSTEXPORT void Rust_setAudioTrackEnabled(
    webrtc::AudioTrackInterface* track_borrowed_rc, bool enabled) {
  track_borrowed_rc->set_enabled(enabled);
--- a/ringrtc/rffi/src/peer_connection.cc
+++ b/ringrtc/rffi/src/peer_connection.cc
@ -53,6 +53,77 @@ int RED_PT = 120;
 int RED_RTX_PT = 121;
 int ULPFEC_PT = 122;

+const uint32_t DISABLED_DEMUX_ID = 0;
+
+RUSTEXPORT bool
+Rust_updateTransceivers(webrtc::PeerConnectionInterface*      peer_connection_borrowed_rc,
+                        uint32_t*                             remote_demux_ids_data_borrowed,
+                        size_t                                length) {
+  std::vector<uint32_t> remote_demux_ids;
+  remote_demux_ids.assign(remote_demux_ids_data_borrowed, remote_demux_ids_data_borrowed + length);
+
+  auto transceivers = peer_connection_borrowed_rc->GetTransceivers();
+  // There should be at most 2 transceivers for each remote demux ID (there can
+  // be fewer if new transceivers are about to be created), excluding the 2
+  // transceivers for the local device's audio and video.
+  if (remote_demux_ids.size() * 2 < transceivers.size() - 2) {
+    RTC_LOG(LS_WARNING) << "Mismatched remote_demux_ids and transceivers count:"
+      << " remote_demux_ids.size()=" << remote_demux_ids.size()
+      << ", transceivers.size()=" << transceivers.size();
+  }
+
+  size_t remote_demux_ids_i = 0;
+  for (auto transceiver : transceivers) {
+    auto direction = transceiver->direction();
+    if (direction != RtpTransceiverDirection::kInactive && direction != RtpTransceiverDirection::kRecvOnly) {
+      // This is a transceiver used by the local device to send media.
+      continue;
+    }
+
+    auto ids = transceiver->receiver()->stream_ids();
+
+    if (remote_demux_ids_i < remote_demux_ids.size()) {
+      auto desired_demux_id = remote_demux_ids[remote_demux_ids_i];
+      if (desired_demux_id == DISABLED_DEMUX_ID) {
+        transceiver->SetDirectionWithError(RtpTransceiverDirection::kInactive);
+      } else if (ids.empty() || ids[0] != rtc::ToString(desired_demux_id)) {
+        // This transceiver is being reused
+        transceiver->SetDirectionWithError(RtpTransceiverDirection::kRecvOnly);
+      }
+    }
+
+    // The same demux ID is used for both the audio and video transceiver, and
+    // audio is added first. So only advance to the next demux ID after seeing
+    // a video transceiver.
+    if (transceiver->media_type() == cricket::MEDIA_TYPE_VIDEO) {
+      remote_demux_ids_i++;
+    }
+  }
+
+  // Create transceivers for the remaining remote_demux_ids.
+  for (auto i = remote_demux_ids_i; i < remote_demux_ids.size(); i++) {
+    auto remote_demux_id = remote_demux_ids[i];
+
+    RtpTransceiverInit init;
+    init.direction = RtpTransceiverDirection::kRecvOnly;
+    init.stream_ids = {rtc::ToString(remote_demux_id)};
+
+    auto result = peer_connection_borrowed_rc->AddTransceiver(cricket::MEDIA_TYPE_AUDIO, init);
+    if (!result.ok()) {
+      RTC_LOG(LS_ERROR) << "Failed to PeerConnection::AddTransceiver(audio)";
+      return false;
+    }
+
+    result = peer_connection_borrowed_rc->AddTransceiver(cricket::MEDIA_TYPE_VIDEO, init);
+    if (!result.ok()) {
+      RTC_LOG(LS_ERROR) << "Failed to PeerConnection::AddTransceiver(video)";
+      return false;
+    }
+  }
+
+  return true;
+}
+
 // Borrows the observer until the result is given to the observer,
 // so the observer must stay alive until it's given a result.
 RUSTEXPORT void
@ -447,14 +518,13 @@ Rust_sessionDescriptionFromV4(bool offer,
  return new webrtc::JsepSessionDescription(typ, std::move(session), "1", "1");
 }

-const uint32_t INVALID_DEMUX_ID = 0;
-
 webrtc::JsepSessionDescription*
 CreateSessionDescriptionForGroupCall(bool local, 
                                     const std::string& ice_ufrag,
                                     const std::string& ice_pwd,
                                     RffiSrtpKey srtp_key,
-                                     std::vector<uint32_t> rtp_demux_ids) {
+                                     uint32_t local_demux_id,
+                                     std::vector<uint32_t> remote_demux_ids) {
  // Major changes from the default WebRTC behavior:
  // 1. We remove all codecs except Opus and VP8.
  // 2. We remove all header extensions except for transport-cc, video orientation,
@ -486,17 +556,49 @@ CreateSessionDescriptionForGroupCall(bool local,
  auto set_rtp_params = [crypto_params] (cricket::MediaContentDescription* media) {
    media->set_protocol(cricket::kMediaProtocolSavpf);
    media->set_rtcp_mux(true);
-    media->set_direction(webrtc::RtpTransceiverDirection::kSendRecv);

    std::vector<cricket::CryptoParams> cryptos;
    cryptos.push_back(crypto_params);
    media->set_cryptos(cryptos);
  };

-  auto audio = std::make_unique<cricket::AudioContentDescription>();
-  set_rtp_params(audio.get());
-  auto video = std::make_unique<cricket::VideoContentDescription>();
-  set_rtp_params(video.get());
+  auto local_direction = local ? RtpTransceiverDirection::kSendOnly : RtpTransceiverDirection::kRecvOnly;
+
+  auto local_audio = std::make_unique<cricket::AudioContentDescription>();
+  set_rtp_params(local_audio.get());
+  local_audio.get()->set_direction(local_direction);
+
+  auto local_video = std::make_unique<cricket::VideoContentDescription>();
+  set_rtp_params(local_video.get());
+  local_video.get()->set_direction(local_direction);
+
+  auto remote_direction = local ? RtpTransceiverDirection::kRecvOnly : RtpTransceiverDirection::kSendOnly;
+
+  std::vector<std::unique_ptr<cricket::AudioContentDescription>> remote_audios;
+  for (auto demux_id : remote_demux_ids) {
+    auto remote_audio = std::make_unique<cricket::AudioContentDescription>();
+    set_rtp_params(remote_audio.get());
+    if (demux_id == DISABLED_DEMUX_ID) {
+      remote_audio.get()->set_direction(RtpTransceiverDirection::kInactive);
+    } else {
+      remote_audio.get()->set_direction(remote_direction);
+    }
+
+    remote_audios.push_back(std::move(remote_audio));
+  }
+
+  std::vector<std::unique_ptr<cricket::VideoContentDescription>> remote_videos;
+  for (auto demux_id : remote_demux_ids) {
+    auto remote_video = std::make_unique<cricket::VideoContentDescription>();
+    set_rtp_params(remote_video.get());
+    if (demux_id == DISABLED_DEMUX_ID) {
+      remote_video.get()->set_direction(RtpTransceiverDirection::kInactive);
+    } else {
+      remote_video.get()->set_direction(remote_direction);
+    }
+
+    remote_videos.push_back(std::move(remote_video));
+  }

  auto opus = cricket::CreateAudioCodec(OPUS_PT, cricket::kOpusCodecName, 48000, 2);
  // These are the current defaults for WebRTC
@ -513,14 +615,18 @@ CreateSessionDescriptionForGroupCall(bool local,
  // This is not a default. We enable this for privacy.
  opus.SetParam("cbr", "1");
  opus.AddFeedbackParam(cricket::FeedbackParam(cricket::kRtcpFbParamTransportCc, cricket::kParamValueEmpty));
-  audio->AddCodec(opus);

  // Turn on the RED "meta codec" for Opus redundancy.
  auto opus_red = cricket::CreateAudioCodec(OPUS_RED_PT, cricket::kRedCodecName, 48000, 2);
  opus_red.SetParam("", std::to_string(OPUS_PT) + "/" + std::to_string(OPUS_PT));

  // Add RED after Opus so that RED packets can at least be decoded properly if received.
-  audio->AddCodec(opus_red);
+  local_audio->AddCodec(opus);
+  local_audio->AddCodec(opus_red);
+  for (auto& remote_audio : remote_audios) {
+    remote_audio->AddCodec(opus);
+    remote_audio->AddCodec(opus_red);
+  }

  auto add_video_feedback_params = [] (cricket::VideoCodec* video_codec) {
    video_codec->AddFeedbackParam(cricket::FeedbackParam(cricket::kRtcpFbParamTransportCc, cricket::kParamValueEmpty));
@ -534,16 +640,32 @@ CreateSessionDescriptionForGroupCall(bool local,
  auto vp8_rtx = cricket::CreateVideoRtxCodec(VP8_RTX_PT, VP8_PT);
  add_video_feedback_params(&vp8);

-  video->AddCodec(vp8);
-  video->AddCodec(vp8_rtx);
-
  // These are "meta codecs" for redundancy and FEC.
  // They are enabled by default currently with WebRTC.
  auto red = cricket::CreateVideoCodec(RED_PT, cricket::kRedCodecName);
  auto red_rtx = cricket::CreateVideoRtxCodec(RED_RTX_PT, RED_PT);

-  video->AddCodec(red);
-  video->AddCodec(red_rtx);
+  local_video->AddCodec(vp8);
+  local_video->AddCodec(vp8_rtx);
+
+  local_video->AddCodec(red);
+  local_video->AddCodec(red_rtx);
+
+  for (auto& remote_video : remote_videos) {
+    remote_video->AddCodec(vp8);
+    remote_video->AddCodec(vp8_rtx);
+
+    remote_video->AddCodec(red);
+    remote_video->AddCodec(red_rtx);
+  }
+
+  auto audio_level = webrtc::RtpExtension(webrtc::AudioLevel::Uri(), AUDIO_LEVEL_EXT_ID);
+  // Note: Do not add transport-cc for audio.  Using transport-cc with audio is still experimental in WebRTC.
+  // And don't add abs_send_time because it's only used for video.
+  local_audio->AddRtpHeaderExtension(audio_level);
+  for (auto& remote_audio : remote_audios) {
+    remote_audio->AddRtpHeaderExtension(audio_level);
+  }

  auto transport_cc1 = webrtc::RtpExtension(webrtc::TransportSequenceNumber::Uri(), TRANSPORT_CC1_EXT_ID);
  // TransportCC V2 is now enabled by default, but the difference is that V2 doesn't send periodic updates
@ -551,51 +673,45 @@ CreateSessionDescriptionForGroupCall(bool local,
  // we can't enable V2.  We'd have to add it to the SFU to move from V1 to V2.
  // auto transport_cc2 = webrtc::RtpExtension(webrtc::TransportSequenceNumberV2::Uri(), TRANSPORT_CC2_EXT_ID);
  auto video_orientation = webrtc::RtpExtension(webrtc::VideoOrientation::Uri(), VIDEO_ORIENTATION_EXT_ID);
-  auto audio_level = webrtc::RtpExtension(webrtc::AudioLevel::Uri(), AUDIO_LEVEL_EXT_ID);
  // abs_send_time and tx_time_offset are used for more accurate REMB messages from the receiver,
  // but the SFU doesn't process REMB messages anyway, nor does it send or receive these header extensions.
  // So, don't waste bytes on them.
  // auto abs_send_time = webrtc::RtpExtension(webrtc::AbsoluteSendTime::Uri(), ABS_SEND_TIME_EXT_ID);
  // auto tx_time_offset = webrtc::RtpExtension(webrtc::TransmissionOffset::Uri(), TX_TIME_OFFSET_EXT_ID);
-
-  // Note: Do not add transport-cc for audio.  Using transport-cc with audio is still experimental in WebRTC.
-  // And don't add abs_send_time because it's only used for video.
-  audio->AddRtpHeaderExtension(audio_level);
-  video->AddRtpHeaderExtension(transport_cc1);
-  video->AddRtpHeaderExtension(video_orientation);
-
-  for (uint32_t rtp_demux_id : rtp_demux_ids) {
-    if (rtp_demux_id == INVALID_DEMUX_ID) {
-      RTC_LOG(LS_WARNING) << "Ignoring demux ID of 0";
-      continue;
+  local_video->AddRtpHeaderExtension(transport_cc1);
+  local_video->AddRtpHeaderExtension(video_orientation);
+  for (auto& remote_video : remote_videos) {
+    remote_video->AddRtpHeaderExtension(transport_cc1);
+    remote_video->AddRtpHeaderExtension(video_orientation);
  }

-    uint32_t audio_ssrc = rtp_demux_id + 0;
+  auto setup_streams = [local, &LOCAL_AUDIO_TRACK_ID, &LOCAL_VIDEO_TRACK_ID] (cricket::MediaContentDescription* audio,
+                                                                              cricket::MediaContentDescription* video,
+                                                                              uint32_t demux_id) {
+    uint32_t audio_ssrc = demux_id + 0;
    // Leave room for audio RTX
-    uint32_t video1_ssrc = rtp_demux_id + 2;
-    uint32_t video1_rtx_ssrc = rtp_demux_id + 3;
-    uint32_t video2_ssrc = rtp_demux_id + 4;
-    uint32_t video2_rtx_ssrc = rtp_demux_id + 5;
-    uint32_t video3_ssrc = rtp_demux_id + 6;
-    uint32_t video3_rtx_ssrc = rtp_demux_id + 7;
+    uint32_t video1_ssrc = demux_id + 2;
+    uint32_t video1_rtx_ssrc = demux_id + 3;
+    uint32_t video2_ssrc = demux_id + 4;
+    uint32_t video2_rtx_ssrc = demux_id + 5;
+    uint32_t video3_ssrc = demux_id + 6;
+    uint32_t video3_rtx_ssrc = demux_id + 7;
    // Leave room for some more video layers or FEC
-    // uint32_t data_ssrc = rtp_demux_id + 0xD;  Used by group_call.rs
+    // uint32_t data_ssrc = demux_id + 0xD;  Used by group_call.rs

    auto audio_stream = cricket::StreamParams();

    // We will use the string version of the demux ID to know which
-    // track is for which remote device.
-    std::string rtp_demux_id_str = rtc::ToString(rtp_demux_id);
+    // transceiver is for which remote device.
+    std::string demux_id_str = rtc::ToString(demux_id);

    // For local, this should stay in sync with PeerConnectionFactory.createAudioTrack
-    // For remote, this will result in the remote audio track/receiver's ID,
-    audio_stream.id = local ? LOCAL_AUDIO_TRACK_ID : rtp_demux_id_str;
+    audio_stream.id = local ? LOCAL_AUDIO_TRACK_ID : demux_id_str;
    audio_stream.add_ssrc(audio_ssrc);

    auto video_stream = cricket::StreamParams();
    // For local, this should stay in sync with PeerConnectionFactory.createVideoSource
-    // For remote, this will result in the remote video track/receiver's ID,
-    video_stream.id = local ? LOCAL_VIDEO_TRACK_ID : rtp_demux_id_str;
+    video_stream.id = local ? LOCAL_VIDEO_TRACK_ID : demux_id_str;
    video_stream.add_ssrc(video1_ssrc);
    if (local) {
      // Don't add simulcast for remote descriptions
@ -621,38 +737,84 @@ CreateSessionDescriptionForGroupCall(bool local,
      // The value doesn't seem to be used for anything else.
      // We'll set it around just in case.
      // But everything seems to work fine without it.
-      stream->cname = rtp_demux_id_str;
+      stream->cname = demux_id_str;
+
+      stream->set_stream_ids({demux_id_str});
    }

    audio->AddStream(audio_stream);
    video->AddStream(video_stream);
+  };
+
+  // Set up local_demux_id
+  setup_streams(local_audio.get(), local_video.get(), local_demux_id);
+
+  // Set up remote_demux_ids
+  for (size_t i = 0; i < remote_demux_ids.size(); i++) {
+    auto remote_audio = &remote_audios[i];
+    auto remote_video = &remote_videos[i];
+    uint32_t rtp_demux_id = remote_demux_ids[i];
+
+    if (rtp_demux_id == DISABLED_DEMUX_ID) {
+      continue;
+    }
+
+    setup_streams(remote_audio->get(), remote_video->get(), rtp_demux_id);
  }

  // TODO: Why is this only for video by default in WebRTC? Should we enable it for all of them?
-  video->set_rtcp_reduced_size(true);
+  local_video->set_rtcp_reduced_size(true);
+  for (auto& remote_video : remote_videos) {
+    remote_video->set_rtcp_reduced_size(true);
+  }

  // We don't set the crypto keys here.
  // We expect that will be done later by Rust_disableDtlsAndSetSrtpKey.

  // Keep the order as the WebRTC default: (audio, video).
-  auto audio_content_name = "audio";
-  auto video_content_name = "video";
+  auto local_audio_content_name = "local-audio0";
+  auto local_video_content_name = "local-video0";

-  auto session = std::make_unique<cricket::SessionDescription>();
-  session->AddTransportInfo(cricket::TransportInfo(audio_content_name, transport));
-  session->AddTransportInfo(cricket::TransportInfo(video_content_name, transport));
-
-  bool stopped = false;
-  session->AddContent(audio_content_name, cricket::MediaProtocolType::kRtp, stopped, std::move(audio));
-  session->AddContent(video_content_name, cricket::MediaProtocolType::kRtp, stopped, std::move(video));
+  auto remote_audio_content_name = "remote-audio";
+  auto remote_video_content_name = "remote-video";

  auto bundle = cricket::ContentGroup(cricket::GROUP_TYPE_BUNDLE);
-  bundle.AddContentName(audio_content_name);
-  bundle.AddContentName(video_content_name);
+  bundle.AddContentName(local_audio_content_name);
+  bundle.AddContentName(local_video_content_name);
+
+  auto session = std::make_unique<cricket::SessionDescription>();
+  session->AddTransportInfo(cricket::TransportInfo(local_audio_content_name, transport));
+  session->AddTransportInfo(cricket::TransportInfo(local_video_content_name, transport));
+
+  bool stopped = false;
+  session->AddContent(local_audio_content_name, cricket::MediaProtocolType::kRtp, stopped, std::move(local_audio));
+  session->AddContent(local_video_content_name, cricket::MediaProtocolType::kRtp, stopped, std::move(local_video));
+
+  auto audio_it = remote_audios.begin();
+  auto video_it = remote_videos.begin();
+  for (auto i = 0; audio_it != remote_audios.end() && video_it != remote_videos.end(); i++) {
+    auto remote_audio = std::move(*audio_it);
+    audio_it = remote_audios.erase(audio_it);
+
+    std::string audio_name = remote_audio_content_name;
+    audio_name += std::to_string(i);
+    session->AddTransportInfo(cricket::TransportInfo(audio_name, transport));
+    session->AddContent(audio_name, cricket::MediaProtocolType::kRtp, stopped, std::move(remote_audio));
+    bundle.AddContentName(audio_name);
+
+    auto remote_video = std::move(*video_it);
+    video_it = remote_videos.erase(video_it);
+
+    std::string video_name = remote_video_content_name;
+    video_name += std::to_string(i);
+    session->AddTransportInfo(cricket::TransportInfo(video_name, transport));
+    session->AddContent(video_name, cricket::MediaProtocolType::kRtp, stopped, std::move(remote_video));
+    bundle.AddContentName(video_name);
+  }
+
  session->AddGroup(bundle);

-  // This is the default and used for "Plan B" SDP, which is what we use in V1, V2, and V3.
-  session->set_msid_signaling(cricket::kMsidSignalingSsrcAttribute);
+  session->set_msid_signaling(cricket::kMsidSignalingMediaSection);

  auto typ = local ? SdpType::kOffer : SdpType::kAnswer;
  // The session ID and session version (both "1" here) go into SDP, but are not used at all.
@ -664,14 +826,13 @@ RUSTEXPORT webrtc::SessionDescriptionInterface*
 Rust_localDescriptionForGroupCall(const char* ice_ufrag_borrowed,
                                  const char* ice_pwd_borrowed,
                                  RffiSrtpKey client_srtp_key,
-                                  uint32_t rtp_demux_id) {
-  std::vector<uint32_t> rtp_demux_ids;
-  // A 0 demux_id means we don't know the demux ID yet and shouldn't include one.
-  if (rtp_demux_id > 0) {
-    rtp_demux_ids.push_back(rtp_demux_id);
-  }
+                                  uint32_t local_demux_id,
+                                  uint32_t* remote_demux_ids_borrowed,
+                                  size_t remote_demux_ids_len) {
+  std::vector<uint32_t> remote_demux_ids;
+  remote_demux_ids.assign(remote_demux_ids_borrowed, remote_demux_ids_borrowed + remote_demux_ids_len);
  return CreateSessionDescriptionForGroupCall(
-    true /* local */, std::string(ice_ufrag_borrowed), std::string(ice_pwd_borrowed), client_srtp_key, rtp_demux_ids);
+    true /* local */, std::string(ice_ufrag_borrowed), std::string(ice_pwd_borrowed), client_srtp_key, local_demux_id, remote_demux_ids);
 }

 // Returns an owned pointer.
@ -679,12 +840,13 @@ RUSTEXPORT webrtc::SessionDescriptionInterface*
 Rust_remoteDescriptionForGroupCall(const char* ice_ufrag_borrowed,
                                   const char* ice_pwd_borrowed,
                                   RffiSrtpKey server_srtp_key,
-                                   uint32_t* rtp_demux_ids_borrowed,
-                                   size_t rtp_demux_ids_len) {
-  std::vector<uint32_t> rtp_demux_ids;
-  rtp_demux_ids.assign(rtp_demux_ids_borrowed, rtp_demux_ids_borrowed + rtp_demux_ids_len);
+                                   uint32_t local_demux_id,
+                                   uint32_t* remote_demux_ids_borrowed,
+                                   size_t remote_demux_ids_len) {
+  std::vector<uint32_t> remote_demux_ids;
+  remote_demux_ids.assign(remote_demux_ids_borrowed, remote_demux_ids_borrowed + remote_demux_ids_len);
  return CreateSessionDescriptionForGroupCall(
-    false /* local */, std::string(ice_ufrag_borrowed), std::string(ice_pwd_borrowed), server_srtp_key, rtp_demux_ids);
+    false /* local */, std::string(ice_ufrag_borrowed), std::string(ice_pwd_borrowed), server_srtp_key, local_demux_id, remote_demux_ids);
 }

 RUSTEXPORT void
--- a/ringrtc/rffi/src/peer_connection_factory.cc
+++ b/ringrtc/rffi/src/peer_connection_factory.cc
@ -383,11 +383,7 @@ RUSTEXPORT PeerConnectionInterface* Rust_createPeerConnection(
  config.audio_jitter_buffer_max_packets = audio_jitter_buffer_max_packets;
  config.set_audio_jitter_buffer_max_target_delay_ms(audio_jitter_buffer_max_target_delay_ms);
  config.set_audio_rtcp_report_interval_ms(audio_rtcp_report_interval_ms);
-  if (kind == RffiPeerConnectionKind::kGroupCall) {
-    config.sdp_semantics = SdpSemantics::kPlanB_DEPRECATED;
-  } else {
  config.sdp_semantics = SdpSemantics::kUnifiedPlan;
-  }
  if (ice_server.urls_size > 0) {
    webrtc::PeerConnectionInterface::IceServer rtc_ice_server;
    rtc_ice_server.username = std::string(ice_server.username_borrowed);
@ -425,6 +421,22 @@ RUSTEXPORT PeerConnectionInterface* Rust_createPeerConnection(
  stream_ids.push_back(stream_id);

  if (outgoing_audio_track_borrowed_rc) {
+    if (kind == RffiPeerConnectionKind::kGroupCall) {
+      RtpTransceiverInit init;
+      init.direction = RtpTransceiverDirection::kSendOnly;
+      init.stream_ids = stream_ids;
+
+      auto result = pc->AddTransceiver(inc_rc(outgoing_audio_track_borrowed_rc), init);
+      if (result.ok()) {
+        if (observer_borrowed->enable_frame_encryption()) {
+          auto rtp_sender = result.MoveValue()->sender();
+          rtp_sender->SetFrameEncryptor(observer_borrowed->CreateEncryptor());
+        }
+      } else {
+        RTC_LOG(LS_ERROR) << "Failed to PeerConnection::AddTransceiver(audio)";
+      }
+
+    } else {
      auto result = pc->AddTrack(inc_rc(outgoing_audio_track_borrowed_rc), stream_ids);
      if (result.ok()) {
        if (observer_borrowed->enable_frame_encryption()) {
@ -435,12 +447,31 @@ RUSTEXPORT PeerConnectionInterface* Rust_createPeerConnection(
        RTC_LOG(LS_ERROR) << "Failed to PeerConnection::AddTrack(audio)";
      }
    }
+  }

  if (outgoing_video_track_borrowed_rc) {
    std::vector<webrtc::RtpEncodingParameters> rtp_parameters = {{}};
    if (kind == RffiPeerConnectionKind::kGroupCall) {
      rtp_parameters[0].max_bitrate_bps = 100000;
    }
+
+    if (kind == RffiPeerConnectionKind::kGroupCall) {
+      RtpTransceiverInit init;
+      init.direction = RtpTransceiverDirection::kSendOnly;
+      init.stream_ids = stream_ids;
+      init.send_encodings = rtp_parameters;
+
+      auto result = pc->AddTransceiver(inc_rc(outgoing_video_track_borrowed_rc), init);
+      if (result.ok()) {
+        if (observer_borrowed->enable_frame_encryption()) {
+          auto rtp_sender = result.MoveValue()->sender();
+          rtp_sender->SetFrameEncryptor(observer_borrowed->CreateEncryptor());
+        }
+      } else {
+        RTC_LOG(LS_ERROR) << "Failed to PeerConnection::AddTransceiver(video)";
+      }
+
+    } else {
      auto result = pc->AddTrack(inc_rc(outgoing_video_track_borrowed_rc), stream_ids, rtp_parameters);
      if (result.ok()) {
        if (observer_borrowed->enable_frame_encryption()) {
@ -451,6 +482,7 @@ RUSTEXPORT PeerConnectionInterface* Rust_createPeerConnection(
        RTC_LOG(LS_ERROR) << "Failed to PeerConnection::AddTrack(video)";
      }
    }
+  }

  return take_rc(pc);
 }
--- a/ringrtc/rffi/src/peer_connection_observer.cc
+++ b/ringrtc/rffi/src/peer_connection_observer.cc
@ -119,10 +119,6 @@ void PeerConnectionObserverRffi::OnAddStream(
    rtc::scoped_refptr<MediaStreamInterface> stream) {
  RTC_LOG(LS_INFO) << "OnAddStream()";

-  auto video_tracks = stream->GetVideoTracks();
-  if (!video_tracks.empty()) {
-    AddVideoSink(video_tracks[0].get());
-  }
  callbacks_.onAddStream(observer_, take_rc(stream));
 }

@ -149,13 +145,23 @@ void PeerConnectionObserverRffi::OnRenegotiationNeeded() {
 void PeerConnectionObserverRffi::OnAddTrack(
    rtc::scoped_refptr<RtpReceiverInterface> receiver,
    const std::vector<rtc::scoped_refptr<MediaStreamInterface>>& streams) {
-  // TODO: Define FFI for an RtpReceiver and pass that here instead.
+  RTC_LOG(LS_INFO) << "OnAddTrack()";
+}
+
+void PeerConnectionObserverRffi::OnTrack(
+    rtc::scoped_refptr<RtpTransceiverInterface> transceiver) {
+  auto receiver = transceiver->receiver();
+  auto streams = receiver->streams();
+
  // Ownership is transferred to the rust call back
  // handler.  Someone must call RefCountInterface::Release()
  // eventually.
  if (receiver->media_type() == cricket::MEDIA_TYPE_AUDIO) {
    if (enable_frame_encryption_) {
-      uint32_t id = Rust_getTrackIdAsUint32(receiver->track().get());
+      uint32_t id = 0;
+      if (receiver->stream_ids().size() > 0) {
+        rtc::FromString(receiver->stream_ids()[0], &id);
+      }
      if (id != 0) {
        receiver->SetFrameDecryptor(CreateDecryptor(id));
        callbacks_.onAddAudioRtpReceiver(observer_, take_rc(receiver->track()));
@ -167,26 +173,24 @@ void PeerConnectionObserverRffi::OnAddTrack(
    }
  } else if (receiver->media_type() == cricket::MEDIA_TYPE_VIDEO) {
    if (enable_frame_encryption_) {
-      uint32_t id = Rust_getTrackIdAsUint32(receiver->track().get());
+      uint32_t id = 0;
+      if (receiver->stream_ids().size() > 0) {
+        rtc::FromString(receiver->stream_ids()[0], &id);
+      }
      if (id != 0) {
        receiver->SetFrameDecryptor(CreateDecryptor(id));
-        AddVideoSink(static_cast<webrtc::VideoTrackInterface*>(receiver->track().get()));
-        callbacks_.onAddVideoRtpReceiver(observer_, take_rc(receiver->track()));
+        AddVideoSink(static_cast<webrtc::VideoTrackInterface*>(receiver->track().get()), id);
+        callbacks_.onAddVideoRtpReceiver(observer_, take_rc(receiver->track()), id);
      } else {
        RTC_LOG(LS_WARNING) << "Not sending decryptor for RtpReceiver with strange ID: " << receiver->track()->id();
      }
    } else {
-      AddVideoSink(static_cast<webrtc::VideoTrackInterface*>(receiver->track().get()));
-      callbacks_.onAddVideoRtpReceiver(observer_, take_rc(receiver->track()));
+      AddVideoSink(static_cast<webrtc::VideoTrackInterface*>(receiver->track().get()), 0);
+      callbacks_.onAddVideoRtpReceiver(observer_, take_rc(receiver->track()), 0);
    }
  }
 }

-void PeerConnectionObserverRffi::OnTrack(
-    rtc::scoped_refptr<RtpTransceiverInterface> transceiver) {
-  RTC_LOG(LS_INFO) << "OnTrack()";
-}
-
 class Encryptor : public webrtc::FrameEncryptorInterface {
 public:
  // Passed-in observer must live at least as long as the Encryptor,
@ -238,13 +242,12 @@ rtc::scoped_refptr<FrameEncryptorInterface> PeerConnectionObserverRffi::CreateEn
  return rtc::make_ref_counted<Encryptor>(observer_, &callbacks_);
 }

-void PeerConnectionObserverRffi::AddVideoSink(VideoTrackInterface* track) {
+void PeerConnectionObserverRffi::AddVideoSink(VideoTrackInterface* track, uint32_t demux_id) {
  if (!enable_video_frame_event_ || !track) {
    return;
  }

-  uint32_t track_id = Rust_getTrackIdAsUint32(track);
-  auto sink = std::make_unique<VideoSink>(track_id, this);
+  auto sink = std::make_unique<VideoSink>(demux_id, this);

  rtc::VideoSinkWants wants;
  // Note: this causes frames to be dropped, not rotated.
@ -258,15 +261,15 @@ void PeerConnectionObserverRffi::AddVideoSink(VideoTrackInterface* track) {
  video_sinks_.push_back(std::move(sink));
 }

-VideoSink::VideoSink(uint32_t track_id, PeerConnectionObserverRffi* pc_observer)
-  : track_id_(track_id), pc_observer_(pc_observer) {
+VideoSink::VideoSink(uint32_t demux_id, PeerConnectionObserverRffi* pc_observer)
+  : demux_id_(demux_id), pc_observer_(pc_observer) {
 }

 void VideoSink::OnFrame(const webrtc::VideoFrame& frame) {
-  pc_observer_->OnVideoFrame(track_id_, frame);
+  pc_observer_->OnVideoFrame(demux_id_, frame);
 }

-void PeerConnectionObserverRffi::OnVideoFrame(uint32_t track_id, const webrtc::VideoFrame& frame) {
+void PeerConnectionObserverRffi::OnVideoFrame(uint32_t demux_id, const webrtc::VideoFrame& frame) {
  RffiVideoFrameMetadata metadata = {};
  metadata.width = frame.width();
  metadata.height = frame.height();
@ -284,7 +287,7 @@ void PeerConnectionObserverRffi::OnVideoFrame(uint32_t track_id, const webrtc::V
  }
  metadata.rotation = kVideoRotation_0;

-  callbacks_.onVideoFrame(observer_, track_id, metadata, buffer_owned_rc);
+  callbacks_.onVideoFrame(observer_, demux_id, metadata, buffer_owned_rc);
 }

 class Decryptor : public webrtc::FrameDecryptorInterface {
--- a/ringrtc/rffi/src/peer_connection_observer.h
+++ b/ringrtc/rffi/src/peer_connection_observer.h
@ -75,12 +75,12 @@ class PeerConnectionObserverRffi : public PeerConnectionObserver {
      rtc::scoped_refptr<RtpTransceiverInterface> transceiver) override;

  // Called by the VideoSinks in video_sinks_.
-  void OnVideoFrame(uint32_t track_id, const webrtc::VideoFrame& frame);
+  void OnVideoFrame(uint32_t demux_id, const webrtc::VideoFrame& frame);

 private:
  // Add a VideoSink to the video_sinks_ for ownership and pass
  // a borrowed pointer to the track.
-  void AddVideoSink(VideoTrackInterface* track);
+  void AddVideoSink(VideoTrackInterface* track, uint32_t demux_id);

  void* observer_;
  PeerConnectionObserverCallbacks callbacks_;
@ -91,16 +91,16 @@ class PeerConnectionObserverRffi : public PeerConnectionObserver {
 };

 // A simple implementation of a VideoSinkInterface which passes video frames
-// back to the PeerConnectionObserver with a track_id.
+// back to the PeerConnectionObserver with a demux_id.
 class VideoSink : public rtc::VideoSinkInterface<webrtc::VideoFrame> {
 public:
-  VideoSink(uint32_t track_id, PeerConnectionObserverRffi*);
+  VideoSink(uint32_t demux_id, PeerConnectionObserverRffi*);
  ~VideoSink() override = default;

  void OnFrame(const webrtc::VideoFrame& frame) override;

 private:
-  uint32_t track_id_;
+  uint32_t demux_id_;
  PeerConnectionObserverRffi* pc_observer_;
 };

--- a/ringrtc/rffi/src/stats_observer.cc
+++ b/ringrtc/rffi/src/stats_observer.cc
@ -35,7 +35,7 @@ void StatsObserverRffi::OnStatsDelivered(const rtc::scoped_refptr<const RTCStats
  auto candidate_pair_stats = report->GetStatsOfType<RTCIceCandidatePairStats>();

  for (const auto& stat : outbound_stream_stats) {
-    if (*stat->kind == "audio") {
+    if (*stat->kind == "audio" && (*stat->mid == "audio" || absl::StartsWith(*stat->mid, "local-audio"))) {
      AudioSenderStatistics audio_sender = {0};

      audio_sender.ssrc = stat->ssrc.ValueOrDefault(0);
@ -59,7 +59,7 @@ void StatsObserverRffi::OnStatsDelivered(const rtc::scoped_refptr<const RTCStats
      }

      this->audio_sender_statistics_.push_back(audio_sender);
-    } else if (*stat->kind == "video") {
+    } else if (*stat->kind == "video" && (*stat->mid == "video" || absl::StartsWith(*stat->mid, "local-video"))) {
      VideoSenderStatistics video_sender = {0};

      video_sender.ssrc = stat->ssrc.ValueOrDefault(0);
@ -101,7 +101,7 @@ void StatsObserverRffi::OnStatsDelivered(const rtc::scoped_refptr<const RTCStats
  }

  for (const auto& stat : inbound_stream_stats) {
-    if (*stat->kind == "audio") {
+    if (*stat->kind == "audio" && (*stat->mid == "audio" || absl::StartsWith(*stat->mid, "remote-audio"))) {
      AudioReceiverStatistics audio_receiver = {0};

      audio_receiver.ssrc = stat->ssrc.ValueOrDefault(0);
@ -114,7 +114,7 @@ void StatsObserverRffi::OnStatsDelivered(const rtc::scoped_refptr<const RTCStats
      audio_receiver.jitter_buffer_emitted_count = stat->jitter_buffer_emitted_count.ValueOrDefault(0);

      this->audio_receiver_statistics_.push_back(audio_receiver);
-    } else if (*stat->kind == "video") {
+    } else if (*stat->kind == "video" && (*stat->mid == "video" || absl::StartsWith(*stat->mid, "remote-video"))) {
      VideoReceiverStatistics video_receiver = {0};

      video_receiver.ssrc = stat->ssrc.ValueOrDefault(0);