From 3e9af7fe059af739d11bf8693669ff48d50efcfb Mon Sep 17 00:00:00 2001 From: Marina Ciocea Date: Wed, 1 Apr 2020 07:46:16 +0200 Subject: [PATCH] Insert audio frame transformer between depacketizer and decoder. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The frame transformer is passed from RTPReceiverInterface through the library to be eventually set in ChannelReceive, where the frame transformation will occur in the follow-up CL. Insertable Streams Web API explainer: https://github.com/alvestrand/webrtc-media-streams/blob/master/explainer.md Design doc for WebRTC library changes: http://doc/1eiLkjNUkRy2FssCPLUp6eH08BZuXXoHfbbBP1ZN7EVk Bug: webrtc:11380 Change-Id: I5af06d1431047ef50d00e304cf95e92a832b4220 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/171872 Reviewed-by: Magnus Flodman Reviewed-by: Tommi Reviewed-by: Per Ã…hgren Commit-Queue: Marina Ciocea Cr-Commit-Position: refs/heads/master@{#30956} --- audio/audio_receive_stream.cc | 9 +++- audio/audio_receive_stream_unittest.cc | 2 + audio/channel_receive.cc | 61 +++++++++++++++++--------- audio/channel_receive.h | 10 ++++- audio/mock_voe_channel_proxy.h | 3 ++ call/audio_receive_stream.h | 5 +++ media/engine/webrtc_voice_engine.cc | 27 +++++++++++- media/engine/webrtc_voice_engine.h | 4 ++ pc/audio_rtp_receiver.cc | 24 ++++++++++ pc/audio_rtp_receiver.h | 5 +++ 10 files changed, 126 insertions(+), 24 deletions(-) diff --git a/audio/audio_receive_stream.cc b/audio/audio_receive_stream.cc index ce1b344129..6bc0d4137e 100644 --- a/audio/audio_receive_stream.cc +++ b/audio/audio_receive_stream.cc @@ -82,7 +82,8 @@ std::unique_ptr CreateChannelReceive( config.jitter_buffer_max_packets, config.jitter_buffer_fast_accelerate, config.jitter_buffer_min_delay_ms, config.jitter_buffer_enable_rtx_handling, config.decoder_factory, - config.codec_pair_id, config.frame_decryptor, config.crypto_options); + config.codec_pair_id, config.frame_decryptor, config.crypto_options, + std::move(config.frame_transformer)); } } // namespace @@ -409,6 +410,12 @@ void AudioReceiveStream::ConfigureStream(AudioReceiveStream* stream, channel_receive->SetReceiveCodecs(new_config.decoder_map); } + if (first_time || + old_config.frame_transformer != new_config.frame_transformer) { + channel_receive->SetDepacketizerToDecoderFrameTransformer( + new_config.frame_transformer); + } + stream->config_ = new_config; } } // namespace internal diff --git a/audio/audio_receive_stream_unittest.cc b/audio/audio_receive_stream_unittest.cc index 0b2cae507e..186eb1c67d 100644 --- a/audio/audio_receive_stream_unittest.cc +++ b/audio/audio_receive_stream_unittest.cc @@ -100,6 +100,8 @@ struct ConfigHelper { .WillRepeatedly(Invoke([](const std::map& codecs) { EXPECT_THAT(codecs, ::testing::IsEmpty()); })); + EXPECT_CALL(*channel_receive_, SetDepacketizerToDecoderFrameTransformer(_)) + .Times(1); stream_config_.rtp.local_ssrc = kLocalSsrc; stream_config_.rtp.remote_ssrc = kRemoteSsrc; diff --git a/audio/channel_receive.cc b/audio/channel_receive.cc index dfc849394f..1c884215ba 100644 --- a/audio/channel_receive.cc +++ b/audio/channel_receive.cc @@ -20,6 +20,7 @@ #include #include "api/crypto/frame_decryptor_interface.h" +#include "api/frame_transformer_interface.h" #include "api/rtc_event_log/rtc_event_log.h" #include "audio/audio_level.h" #include "audio/channel_send.h" @@ -79,22 +80,24 @@ AudioCodingModule::Config AcmConfig( class ChannelReceive : public ChannelReceiveInterface { public: // Used for receive streams. - ChannelReceive(Clock* clock, - ProcessThread* module_process_thread, - NetEqFactory* neteq_factory, - AudioDeviceModule* audio_device_module, - Transport* rtcp_send_transport, - RtcEventLog* rtc_event_log, - uint32_t local_ssrc, - uint32_t remote_ssrc, - size_t jitter_buffer_max_packets, - bool jitter_buffer_fast_playout, - int jitter_buffer_min_delay_ms, - bool jitter_buffer_enable_rtx_handling, - rtc::scoped_refptr decoder_factory, - absl::optional codec_pair_id, - rtc::scoped_refptr frame_decryptor, - const webrtc::CryptoOptions& crypto_options); + ChannelReceive( + Clock* clock, + ProcessThread* module_process_thread, + NetEqFactory* neteq_factory, + AudioDeviceModule* audio_device_module, + Transport* rtcp_send_transport, + RtcEventLog* rtc_event_log, + uint32_t local_ssrc, + uint32_t remote_ssrc, + size_t jitter_buffer_max_packets, + bool jitter_buffer_fast_playout, + int jitter_buffer_min_delay_ms, + bool jitter_buffer_enable_rtx_handling, + rtc::scoped_refptr decoder_factory, + absl::optional codec_pair_id, + rtc::scoped_refptr frame_decryptor, + const webrtc::CryptoOptions& crypto_options, + rtc::scoped_refptr frame_transformer); ~ChannelReceive() override; void SetSink(AudioSinkInterface* sink) override; @@ -161,6 +164,12 @@ class ChannelReceive : public ChannelReceiveInterface { // Used for obtaining RTT for a receive-only channel. void SetAssociatedSendChannel(const ChannelSendInterface* channel) override; + // Sets a frame transformer between the depacketizer and the decoder, to + // transform the received frames before decoding them. + void SetDepacketizerToDecoderFrameTransformer( + rtc::scoped_refptr frame_transformer) + override; + private: void ReceivePacket(const uint8_t* packet, size_t packet_length, @@ -262,6 +271,8 @@ class ChannelReceive : public ChannelReceiveInterface { webrtc::CryptoOptions crypto_options_; webrtc::AbsoluteCaptureTimeReceiver absolute_capture_time_receiver_; + + rtc::scoped_refptr frame_transformer_; }; void ChannelReceive::OnReceivedPayloadData( @@ -422,7 +433,8 @@ ChannelReceive::ChannelReceive( rtc::scoped_refptr decoder_factory, absl::optional codec_pair_id, rtc::scoped_refptr frame_decryptor, - const webrtc::CryptoOptions& crypto_options) + const webrtc::CryptoOptions& crypto_options, + rtc::scoped_refptr frame_transformer) : event_log_(rtc_event_log), rtp_receive_statistics_(ReceiveStatistics::Create(clock)), remote_ssrc_(remote_ssrc), @@ -444,7 +456,8 @@ ChannelReceive::ChannelReceive( associated_send_channel_(nullptr), frame_decryptor_(frame_decryptor), crypto_options_(crypto_options), - absolute_capture_time_receiver_(clock) { + absolute_capture_time_receiver_(clock), + frame_transformer_(std::move(frame_transformer)) { // TODO(nisse): Use _moduleProcessThreadPtr instead? module_process_thread_checker_.Detach(); @@ -742,6 +755,12 @@ void ChannelReceive::SetAssociatedSendChannel( associated_send_channel_ = channel; } +void ChannelReceive::SetDepacketizerToDecoderFrameTransformer( + rtc::scoped_refptr frame_transformer) { + RTC_DCHECK(worker_thread_checker_.IsCurrent()); + frame_transformer_ = std::move(frame_transformer); +} + NetworkStatistics ChannelReceive::GetNetworkStatistics() const { RTC_DCHECK(worker_thread_checker_.IsCurrent()); NetworkStatistics stats; @@ -927,13 +946,15 @@ std::unique_ptr CreateChannelReceive( rtc::scoped_refptr decoder_factory, absl::optional codec_pair_id, rtc::scoped_refptr frame_decryptor, - const webrtc::CryptoOptions& crypto_options) { + const webrtc::CryptoOptions& crypto_options, + rtc::scoped_refptr frame_transformer) { return std::make_unique( clock, module_process_thread, neteq_factory, audio_device_module, rtcp_send_transport, rtc_event_log, local_ssrc, remote_ssrc, jitter_buffer_max_packets, jitter_buffer_fast_playout, jitter_buffer_min_delay_ms, jitter_buffer_enable_rtx_handling, - decoder_factory, codec_pair_id, frame_decryptor, crypto_options); + decoder_factory, codec_pair_id, frame_decryptor, crypto_options, + std::move(frame_transformer)); } } // namespace voe diff --git a/audio/channel_receive.h b/audio/channel_receive.h index 034ac7b059..bc02ff3023 100644 --- a/audio/channel_receive.h +++ b/audio/channel_receive.h @@ -22,6 +22,7 @@ #include "api/call/audio_sink.h" #include "api/call/transport.h" #include "api/crypto/crypto_options.h" +#include "api/frame_transformer_interface.h" #include "api/neteq/neteq_factory.h" #include "api/transport/rtp/rtp_source.h" #include "call/rtp_packet_sink_interface.h" @@ -137,6 +138,12 @@ class ChannelReceiveInterface : public RtpPacketSinkInterface { // Used for obtaining RTT for a receive-only channel. virtual void SetAssociatedSendChannel( const ChannelSendInterface* channel) = 0; + + // Sets a frame transformer between the depacketizer and the decoder, to + // transform the received frames before decoding them. + virtual void SetDepacketizerToDecoderFrameTransformer( + rtc::scoped_refptr + frame_transformer) = 0; }; std::unique_ptr CreateChannelReceive( @@ -155,7 +162,8 @@ std::unique_ptr CreateChannelReceive( rtc::scoped_refptr decoder_factory, absl::optional codec_pair_id, rtc::scoped_refptr frame_decryptor, - const webrtc::CryptoOptions& crypto_options); + const webrtc::CryptoOptions& crypto_options, + rtc::scoped_refptr frame_transformer); } // namespace voe } // namespace webrtc diff --git a/audio/mock_voe_channel_proxy.h b/audio/mock_voe_channel_proxy.h index 9a013ff03f..38ad208e1a 100644 --- a/audio/mock_voe_channel_proxy.h +++ b/audio/mock_voe_channel_proxy.h @@ -66,6 +66,9 @@ class MockChannelReceive : public voe::ChannelReceiveInterface { MOCK_CONST_METHOD0(GetSources, std::vector()); MOCK_METHOD0(StartPlayout, void()); MOCK_METHOD0(StopPlayout, void()); + MOCK_METHOD1(SetDepacketizerToDecoderFrameTransformer, + void(rtc::scoped_refptr + frame_transformer)); }; class MockChannelSend : public voe::ChannelSendInterface { diff --git a/call/audio_receive_stream.h b/call/audio_receive_stream.h index 4a50cdbc46..d4012bf7e3 100644 --- a/call/audio_receive_stream.h +++ b/call/audio_receive_stream.h @@ -21,6 +21,7 @@ #include "api/call/transport.h" #include "api/crypto/crypto_options.h" #include "api/crypto/frame_decryptor_interface.h" +#include "api/frame_transformer_interface.h" #include "api/rtp_parameters.h" #include "api/scoped_refptr.h" #include "api/transport/rtp/rtp_source.h" @@ -150,6 +151,10 @@ class AudioReceiveStream { // decrypted in whatever way the caller choses. This is not required by // default. rtc::scoped_refptr frame_decryptor; + + // An optional frame transformer used by insertable streams to transform + // encoded frames. + rtc::scoped_refptr frame_transformer; }; // Reconfigure the stream according to the Configuration. diff --git a/media/engine/webrtc_voice_engine.cc b/media/engine/webrtc_voice_engine.cc index 972a7ac837..42109e86a8 100644 --- a/media/engine/webrtc_voice_engine.cc +++ b/media/engine/webrtc_voice_engine.cc @@ -1063,7 +1063,8 @@ class WebRtcVoiceMediaChannel::WebRtcAudioReceiveStream { int jitter_buffer_min_delay_ms, bool jitter_buffer_enable_rtx_handling, rtc::scoped_refptr frame_decryptor, - const webrtc::CryptoOptions& crypto_options) + const webrtc::CryptoOptions& crypto_options, + rtc::scoped_refptr frame_transformer) : call_(call), config_() { RTC_DCHECK(call); config_.rtp.remote_ssrc = remote_ssrc; @@ -1085,6 +1086,7 @@ class WebRtcVoiceMediaChannel::WebRtcAudioReceiveStream { config_.codec_pair_id = codec_pair_id; config_.frame_decryptor = frame_decryptor; config_.crypto_options = crypto_options; + config_.frame_transformer = std::move(frame_transformer); RecreateAudioReceiveStream(); } @@ -1215,6 +1217,13 @@ class WebRtcVoiceMediaChannel::WebRtcAudioReceiveStream { return rtp_parameters; } + void SetDepacketizerToDecoderFrameTransformer( + rtc::scoped_refptr frame_transformer) { + RTC_DCHECK(worker_thread_checker_.IsCurrent()); + config_.frame_transformer = std::move(frame_transformer); + ReconfigureAudioReceiveStream(); + } + private: void RecreateAudioReceiveStream() { RTC_DCHECK(worker_thread_checker_.IsCurrent()); @@ -1868,7 +1877,7 @@ bool WebRtcVoiceMediaChannel::AddRecvStream(const StreamParams& sp) { engine()->audio_jitter_buffer_fast_accelerate_, engine()->audio_jitter_buffer_min_delay_ms_, engine()->audio_jitter_buffer_enable_rtx_handling_, - unsignaled_frame_decryptor_, crypto_options_))); + unsignaled_frame_decryptor_, crypto_options_, nullptr))); recv_streams_[ssrc]->SetPlayout(playout_); return true; @@ -2337,6 +2346,20 @@ void WebRtcVoiceMediaChannel::SetEncoderToPacketizerFrameTransformer( std::move(frame_transformer)); } +void WebRtcVoiceMediaChannel::SetDepacketizerToDecoderFrameTransformer( + uint32_t ssrc, + rtc::scoped_refptr frame_transformer) { + RTC_DCHECK(worker_thread_checker_.IsCurrent()); + auto matching_stream = recv_streams_.find(ssrc); + if (matching_stream == recv_streams_.end()) { + RTC_LOG(LS_INFO) << "Attempting to set frame transformer for SSRC:" << ssrc + << " which doesn't exist."; + return; + } + matching_stream->second->SetDepacketizerToDecoderFrameTransformer( + std::move(frame_transformer)); +} + bool WebRtcVoiceMediaChannel::MaybeDeregisterUnsignaledRecvStream( uint32_t ssrc) { RTC_DCHECK(worker_thread_checker_.IsCurrent()); diff --git a/media/engine/webrtc_voice_engine.h b/media/engine/webrtc_voice_engine.h index a1f8ff5b04..86a7a495fe 100644 --- a/media/engine/webrtc_voice_engine.h +++ b/media/engine/webrtc_voice_engine.h @@ -215,6 +215,10 @@ class WebRtcVoiceMediaChannel final : public VoiceMediaChannel, uint32_t ssrc, rtc::scoped_refptr frame_transformer) override; + void SetDepacketizerToDecoderFrameTransformer( + uint32_t ssrc, + rtc::scoped_refptr frame_transformer) + override; // implements Transport interface bool SendRtp(const uint8_t* data, diff --git a/pc/audio_rtp_receiver.cc b/pc/audio_rtp_receiver.cc index e83e558c7b..69e8e7b9d8 100644 --- a/pc/audio_rtp_receiver.cc +++ b/pc/audio_rtp_receiver.cc @@ -225,6 +225,20 @@ std::vector AudioRtpReceiver::GetSources() const { RTC_FROM_HERE, [&] { return media_channel_->GetSources(*ssrc_); }); } +void AudioRtpReceiver::SetDepacketizerToDecoderFrameTransformer( + rtc::scoped_refptr frame_transformer) { + if (media_channel_ && ssrc_.has_value() && !stopped_) { + worker_thread_->Invoke( + RTC_FROM_HERE, + [this, frame_transformer = std::move(frame_transformer)] { + RTC_DCHECK_RUN_ON(worker_thread_); + frame_transformer_ = frame_transformer; + media_channel_->SetDepacketizerToDecoderFrameTransformer( + *ssrc_, frame_transformer); + }); + } +} + void AudioRtpReceiver::Reconfigure() { if (!media_channel_ || stopped_) { RTC_LOG(LS_ERROR) @@ -237,6 +251,16 @@ void AudioRtpReceiver::Reconfigure() { // Reattach the frame decryptor if we were reconfigured. MaybeAttachFrameDecryptorToMediaChannel( ssrc_, worker_thread_, frame_decryptor_, media_channel_, stopped_); + + if (media_channel_ && ssrc_.has_value() && !stopped_) { + worker_thread_->Invoke(RTC_FROM_HERE, [this] { + RTC_DCHECK_RUN_ON(worker_thread_); + if (!frame_transformer_) + return; + media_channel_->SetDepacketizerToDecoderFrameTransformer( + *ssrc_, frame_transformer_); + }); + } } void AudioRtpReceiver::SetObserver(RtpReceiverObserverInterface* observer) { diff --git a/pc/audio_rtp_receiver.h b/pc/audio_rtp_receiver.h index 908cb647fd..88b16ee682 100644 --- a/pc/audio_rtp_receiver.h +++ b/pc/audio_rtp_receiver.h @@ -104,6 +104,9 @@ class AudioRtpReceiver : public ObserverInterface, std::vector GetSources() const override; int AttachmentId() const override { return attachment_id_; } + void SetDepacketizerToDecoderFrameTransformer( + rtc::scoped_refptr frame_transformer) + override; private: void RestartMediaChannel(absl::optional ssrc); @@ -128,6 +131,8 @@ class AudioRtpReceiver : public ObserverInterface, // Allows to thread safely change playout delay. Handles caching cases if // |SetJitterBufferMinimumDelay| is called before start. rtc::scoped_refptr delay_; + rtc::scoped_refptr frame_transformer_ + RTC_GUARDED_BY(worker_thread_); }; } // namespace webrtc