From 64437e8cc0db5f104e2ec1d79b02871cebcb10cc Mon Sep 17 00:00:00 2001 From: Tony Herre Date: Mon, 29 Apr 2024 15:13:48 +0200 Subject: [PATCH] Calculate the audio level of audio packets before encoded transforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calculate the RMS audio level of audio packets being sent before invoking an encoded frame transform, and pass them with the encode frame object. Before this, the audio level was calculated at send time by having rms_levels_ look at all audio samples encoded since the last send. This is fine without a transform, as this is done synchronously after encoding, but with an async transform which might take arbitrarily long, we could end up marking older audio packets with newer audio levels, or not at all. This also makes things work correctly if external encoded frames are injected from elsewhere to be sent, and exposes the AudioLevel on the TransformableFrame interface. Bug: chromium:337193823, webrtc:42226202 Change-Id: If55d2c1d30dc03408ca9fb0193d791db44428316 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/349263 Reviewed-by: Jakob Ivarsson‎ Reviewed-by: Harald Alvestrand Commit-Queue: Tony Herre Cr-Commit-Position: refs/heads/main@{#42193} --- api/frame_transformer_interface.h | 5 + api/test/mock_transformable_audio_frame.h | 1 + ...nnel_receive_frame_transformer_delegate.cc | 7 + ...ive_frame_transformer_delegate_unittest.cc | 71 ++++++++++ audio/channel_send.cc | 28 ++-- ...channel_send_frame_transformer_delegate.cc | 24 +++- .../channel_send_frame_transformer_delegate.h | 6 +- ...end_frame_transformer_delegate_unittest.cc | 42 ++++-- audio/channel_send_unittest.cc | 125 +++++++++++++++++- 9 files changed, 275 insertions(+), 34 deletions(-) diff --git a/api/frame_transformer_interface.h b/api/frame_transformer_interface.h index d3d15418af..89356df383 100644 --- a/api/frame_transformer_interface.h +++ b/api/frame_transformer_interface.h @@ -82,6 +82,11 @@ class TransformableAudioFrameInterface : public TransformableFrameInterface { // TODO(crbug.com/1456628): Change this to pure virtual after it // is implemented everywhere. virtual FrameType Type() const { return FrameType::kEmptyFrame; } + + // Audio level in -dBov. Values range from 0 to 127, representing 0 to -127 + // dBov. 127 represents digital silence. Only present on remote frames if + // the audio level header extension was included. + virtual absl::optional AudioLevel() const = 0; }; // Objects implement this interface to be notified with the transformed frame. diff --git a/api/test/mock_transformable_audio_frame.h b/api/test/mock_transformable_audio_frame.h index 584c77fa54..f243e388b1 100644 --- a/api/test/mock_transformable_audio_frame.h +++ b/api/test/mock_transformable_audio_frame.h @@ -47,6 +47,7 @@ class MockTransformableAudioFrame : public TransformableAudioFrameInterface { Type, (), (const, override)); + MOCK_METHOD(absl::optional, AudioLevel, (), (const, override)); }; } // namespace webrtc diff --git a/audio/channel_receive_frame_transformer_delegate.cc b/audio/channel_receive_frame_transformer_delegate.cc index dbced0216f..953e27aa70 100644 --- a/audio/channel_receive_frame_transformer_delegate.cc +++ b/audio/channel_receive_frame_transformer_delegate.cc @@ -70,6 +70,13 @@ class TransformableIncomingAudioFrame : FrameType::kAudioFrameCN; } + absl::optional AudioLevel() const override { + if (header_.extension.audio_level()) { + return header_.extension.audio_level()->level(); + } + return absl::nullopt; + } + private: rtc::Buffer payload_; RTPHeader header_; diff --git a/audio/channel_receive_frame_transformer_delegate_unittest.cc b/audio/channel_receive_frame_transformer_delegate_unittest.cc index a206a09f99..8b819f1a9a 100644 --- a/audio/channel_receive_frame_transformer_delegate_unittest.cc +++ b/audio/channel_receive_frame_transformer_delegate_unittest.cc @@ -174,5 +174,76 @@ TEST(ChannelReceiveFrameTransformerDelegateTest, delegate->Transform(packet, header, /*ssrc=*/1111, /*mimeType=*/"audio/opus"); } +TEST(ChannelReceiveFrameTransformerDelegateTest, + AudioLevelAbsentWithoutExtension) { + rtc::AutoThread main_thread; + rtc::scoped_refptr mock_frame_transformer = + rtc::make_ref_counted>(); + rtc::scoped_refptr delegate = + rtc::make_ref_counted( + /*receive_frame_callback=*/nullptr, mock_frame_transformer, + rtc::Thread::Current()); + rtc::scoped_refptr callback; + EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback) + .WillOnce(SaveArg<0>(&callback)); + delegate->Init(); + ASSERT_TRUE(callback); + + const uint8_t data[] = {1, 2, 3, 4}; + rtc::ArrayView packet(data, sizeof(data)); + RTPHeader header; + std::unique_ptr frame; + ON_CALL(*mock_frame_transformer, Transform) + .WillByDefault( + [&](std::unique_ptr transform_frame) { + frame = std::move(transform_frame); + }); + delegate->Transform(packet, header, /*ssrc=*/1111, /*mimeType=*/"audio/opus"); + + EXPECT_TRUE(frame); + auto* audio_frame = + static_cast(frame.get()); + EXPECT_FALSE(audio_frame->AudioLevel()); + EXPECT_EQ(audio_frame->Type(), + TransformableAudioFrameInterface::FrameType::kAudioFrameCN); +} + +TEST(ChannelReceiveFrameTransformerDelegateTest, + AudioLevelPresentWithExtension) { + rtc::AutoThread main_thread; + rtc::scoped_refptr mock_frame_transformer = + rtc::make_ref_counted>(); + rtc::scoped_refptr delegate = + rtc::make_ref_counted( + /*receive_frame_callback=*/nullptr, mock_frame_transformer, + rtc::Thread::Current()); + rtc::scoped_refptr callback; + EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback) + .WillOnce(SaveArg<0>(&callback)); + delegate->Init(); + ASSERT_TRUE(callback); + + const uint8_t data[] = {1, 2, 3, 4}; + rtc::ArrayView packet(data, sizeof(data)); + RTPHeader header; + uint8_t audio_level_dbov = 67; + AudioLevel audio_level(/*voice_activity=*/true, audio_level_dbov); + header.extension.set_audio_level(audio_level); + std::unique_ptr frame; + ON_CALL(*mock_frame_transformer, Transform) + .WillByDefault( + [&](std::unique_ptr transform_frame) { + frame = std::move(transform_frame); + }); + delegate->Transform(packet, header, /*ssrc=*/1111, /*mimeType=*/"audio/opus"); + + EXPECT_TRUE(frame); + auto* audio_frame = + static_cast(frame.get()); + EXPECT_EQ(*audio_frame->AudioLevel(), audio_level_dbov); + EXPECT_EQ(audio_frame->Type(), + TransformableAudioFrameInterface::FrameType::kAudioFrameSpeech); +} + } // namespace } // namespace webrtc diff --git a/audio/channel_send.cc b/audio/channel_send.cc index e8eaa3111e..1e211ab1c6 100644 --- a/audio/channel_send.cc +++ b/audio/channel_send.cc @@ -170,7 +170,8 @@ class ChannelSend : public ChannelSendInterface, uint32_t rtp_timestamp_without_offset, rtc::ArrayView payload, int64_t absolute_capture_timestamp_ms, - rtc::ArrayView csrcs) + rtc::ArrayView csrcs, + absl::optional audio_level_dbov) RTC_RUN_ON(encoder_queue_checker_); void OnReceivedRtt(int64_t rtt_ms); @@ -280,6 +281,14 @@ int32_t ChannelSend::SendData(AudioFrameType frameType, int64_t absolute_capture_timestamp_ms) { RTC_DCHECK_RUN_ON(&encoder_queue_checker_); rtc::ArrayView payload(payloadData, payloadSize); + + absl::optional audio_level_dbov; + if (include_audio_level_indication_.load()) { + // Take the averaged audio levels from rms_level_ and reset it before + // invoking any async transformer. + audio_level_dbov = rms_level_.Average(); + } + if (frame_transformer_delegate_) { // Asynchronously transform the payload before sending it. After the payload // is transformed, the delegate will call SendRtpAudio to send it. @@ -290,11 +299,12 @@ int32_t ChannelSend::SendData(AudioFrameType frameType, frame_transformer_delegate_->Transform( frameType, payloadType, rtp_timestamp + rtp_rtcp_->StartTimestamp(), payloadData, payloadSize, absolute_capture_timestamp_ms, - rtp_rtcp_->SSRC(), mime_type.str()); + rtp_rtcp_->SSRC(), mime_type.str(), audio_level_dbov); return 0; } return SendRtpAudio(frameType, payloadType, rtp_timestamp, payload, - absolute_capture_timestamp_ms, /*csrcs=*/{}); + absolute_capture_timestamp_ms, /*csrcs=*/{}, + audio_level_dbov); } int32_t ChannelSend::SendRtpAudio(AudioFrameType frameType, @@ -302,7 +312,8 @@ int32_t ChannelSend::SendRtpAudio(AudioFrameType frameType, uint32_t rtp_timestamp_without_offset, rtc::ArrayView payload, int64_t absolute_capture_timestamp_ms, - rtc::ArrayView csrcs) { + rtc::ArrayView csrcs, + absl::optional audio_level_dbov) { // E2EE Custom Audio Frame Encryption (This is optional). // Keep this buffer around for the lifetime of the send call. rtc::Buffer encrypted_audio_payload; @@ -369,8 +380,8 @@ int32_t ChannelSend::SendRtpAudio(AudioFrameType frameType, if (absolute_capture_timestamp_ms > 0) { frame.capture_time = Timestamp::Millis(absolute_capture_timestamp_ms); } - if (include_audio_level_indication_.load()) { - frame.audio_level_dbov = rms_level_.Average(); + if (include_audio_level_indication_.load() && audio_level_dbov) { + frame.audio_level_dbov = *audio_level_dbov; } if (!rtp_sender_audio_->SendAudio(frame)) { RTC_DLOG(LS_ERROR) @@ -866,12 +877,13 @@ void ChannelSend::InitFrameTransformerDelegate( uint32_t rtp_timestamp_with_offset, rtc::ArrayView payload, int64_t absolute_capture_timestamp_ms, - rtc::ArrayView csrcs) { + rtc::ArrayView csrcs, + absl::optional audio_level_dbov) { RTC_DCHECK_RUN_ON(&encoder_queue_checker_); return SendRtpAudio( frameType, payloadType, rtp_timestamp_with_offset - rtp_rtcp_->StartTimestamp(), payload, - absolute_capture_timestamp_ms, csrcs); + absolute_capture_timestamp_ms, csrcs, audio_level_dbov); }; frame_transformer_delegate_ = rtc::make_ref_counted( diff --git a/audio/channel_send_frame_transformer_delegate.cc b/audio/channel_send_frame_transformer_delegate.cc index 6d3c011862..8bf19637ab 100644 --- a/audio/channel_send_frame_transformer_delegate.cc +++ b/audio/channel_send_frame_transformer_delegate.cc @@ -59,7 +59,8 @@ class TransformableOutgoingAudioFrame uint32_t ssrc, std::vector csrcs, const std::string& codec_mime_type, - absl::optional sequence_number) + absl::optional sequence_number, + absl::optional audio_level_dbov) : frame_type_(frame_type), payload_type_(payload_type), rtp_timestamp_with_offset_(rtp_timestamp_with_offset), @@ -68,7 +69,8 @@ class TransformableOutgoingAudioFrame ssrc_(ssrc), csrcs_(std::move(csrcs)), codec_mime_type_(codec_mime_type), - sequence_number_(sequence_number) {} + sequence_number_(sequence_number), + audio_level_dbov_(audio_level_dbov) {} ~TransformableOutgoingAudioFrame() override = default; rtc::ArrayView GetData() const override { return payload_; } void SetData(rtc::ArrayView data) override { @@ -101,6 +103,10 @@ class TransformableOutgoingAudioFrame return absolute_capture_timestamp_ms_; } + absl::optional AudioLevel() const override { + return audio_level_dbov_; + } + private: AudioFrameType frame_type_; uint8_t payload_type_; @@ -111,6 +117,7 @@ class TransformableOutgoingAudioFrame std::vector csrcs_; std::string codec_mime_type_; absl::optional sequence_number_; + absl::optional audio_level_dbov_; }; } // namespace @@ -143,14 +150,15 @@ void ChannelSendFrameTransformerDelegate::Transform( size_t payload_size, int64_t absolute_capture_timestamp_ms, uint32_t ssrc, - const std::string& codec_mimetype) { + const std::string& codec_mimetype, + absl::optional audio_level_dbov) { { MutexLock lock(&send_lock_); if (short_circuit_) { send_frame_callback_( frame_type, payload_type, rtp_timestamp, rtc::ArrayView(payload_data, payload_size), - absolute_capture_timestamp_ms, /*csrcs=*/{}); + absolute_capture_timestamp_ms, /*csrcs=*/{}, audio_level_dbov); return; } } @@ -159,7 +167,7 @@ void ChannelSendFrameTransformerDelegate::Transform( frame_type, payload_type, rtp_timestamp, payload_data, payload_size, absolute_capture_timestamp_ms, ssrc, /*csrcs=*/std::vector(), codec_mimetype, - /*sequence_number=*/absl::nullopt)); + /*sequence_number=*/absl::nullopt, audio_level_dbov)); } void ChannelSendFrameTransformerDelegate::OnTransformedFrame( @@ -194,7 +202,8 @@ void ChannelSendFrameTransformerDelegate::SendFrame( transformed_frame->AbsoluteCaptureTimestamp() ? *transformed_frame->AbsoluteCaptureTimestamp() : 0, - transformed_frame->GetContributingSources()); + transformed_frame->GetContributingSources(), + transformed_frame->AudioLevel()); } std::unique_ptr CloneSenderAudioFrame( @@ -207,7 +216,8 @@ std::unique_ptr CloneSenderAudioFrame( original->GetPayloadType(), original->GetTimestamp(), original->GetData().data(), original->GetData().size(), original->AbsoluteCaptureTimestamp(), original->GetSsrc(), - std::move(csrcs), original->GetMimeType(), original->SequenceNumber()); + std::move(csrcs), original->GetMimeType(), original->SequenceNumber(), + original->AudioLevel()); } } // namespace webrtc diff --git a/audio/channel_send_frame_transformer_delegate.h b/audio/channel_send_frame_transformer_delegate.h index 30e63ff98b..5573052ded 100644 --- a/audio/channel_send_frame_transformer_delegate.h +++ b/audio/channel_send_frame_transformer_delegate.h @@ -36,7 +36,8 @@ class ChannelSendFrameTransformerDelegate : public TransformedFrameCallback { uint32_t rtp_timestamp_with_offset, rtc::ArrayView payload, int64_t absolute_capture_timestamp_ms, - rtc::ArrayView csrcs)>; + rtc::ArrayView csrcs, + absl::optional audio_level_dbov)>; ChannelSendFrameTransformerDelegate( SendFrameCallback send_frame_callback, rtc::scoped_refptr frame_transformer, @@ -60,7 +61,8 @@ class ChannelSendFrameTransformerDelegate : public TransformedFrameCallback { size_t payload_size, int64_t absolute_capture_timestamp_ms, uint32_t ssrc, - const std::string& codec_mime_type); + const std::string& codec_mime_type, + absl::optional audio_level_dbov); // Implements TransformedFrameCallback. Can be called on any thread. void OnTransformedFrame( diff --git a/audio/channel_send_frame_transformer_delegate_unittest.cc b/audio/channel_send_frame_transformer_delegate_unittest.cc index 5c025bb345..e8b7aef29d 100644 --- a/audio/channel_send_frame_transformer_delegate_unittest.cc +++ b/audio/channel_send_frame_transformer_delegate_unittest.cc @@ -28,6 +28,7 @@ using ::testing::_; using ::testing::ElementsAre; using ::testing::ElementsAreArray; using ::testing::NiceMock; +using ::testing::Optional; using ::testing::Return; using ::testing::SaveArg; @@ -45,21 +46,24 @@ class MockChannelSend { uint32_t rtp_timestamp, rtc::ArrayView payload, int64_t absolute_capture_timestamp_ms, - rtc::ArrayView csrcs)); + rtc::ArrayView csrcs, + absl::optional audio_level_dbov)); ChannelSendFrameTransformerDelegate::SendFrameCallback callback() { return [this](AudioFrameType frameType, uint8_t payloadType, uint32_t rtp_timestamp, rtc::ArrayView payload, int64_t absolute_capture_timestamp_ms, - rtc::ArrayView csrcs) { + rtc::ArrayView csrcs, + absl::optional audio_level_dbov) { return SendFrame(frameType, payloadType, rtp_timestamp, payload, - absolute_capture_timestamp_ms, csrcs); + absolute_capture_timestamp_ms, csrcs, audio_level_dbov); }; } }; std::unique_ptr CreateMockReceiverFrame( - const std::vector& csrcs) { + const std::vector& csrcs, + absl::optional audio_level_dbov) { std::unique_ptr mock_frame = std::make_unique>(); rtc::ArrayView payload(mock_data); @@ -69,6 +73,7 @@ std::unique_ptr CreateMockReceiverFrame( .WillByDefault(Return(TransformableFrameInterface::Direction::kReceiver)); ON_CALL(*mock_frame, GetContributingSources).WillByDefault(Return(csrcs)); ON_CALL(*mock_frame, SequenceNumber).WillByDefault(Return(987654321)); + ON_CALL(*mock_frame, AudioLevel).WillByDefault(Return(audio_level_dbov)); return mock_frame; } @@ -88,9 +93,9 @@ std::unique_ptr CreateFrame() { std::unique_ptr transform_frame) { frame = std::move(transform_frame); }); - delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, mock_data, - sizeof(mock_data), 0, - /*ssrc=*/0, /*mimeType=*/"audio/opus"); + delegate->Transform( + AudioFrameType::kEmptyFrame, 0, 0, mock_data, sizeof(mock_data), 0, + /*ssrc=*/0, /*mimeType=*/"audio/opus", /*audio_level_dbov=*/123); return absl::WrapUnique( static_cast(frame.release())); } @@ -147,7 +152,8 @@ TEST(ChannelSendFrameTransformerDelegateTest, callback->OnTransformedFrame(std::move(frame)); }); delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, data, sizeof(data), 0, - /*ssrc=*/0, /*mimeType=*/"audio/opus"); + /*ssrc=*/0, /*mimeType=*/"audio/opus", + /*audio_level_dbov=*/31); channel_queue.WaitForPreviouslyPostedTasks(); } @@ -169,16 +175,20 @@ TEST(ChannelSendFrameTransformerDelegateTest, ASSERT_TRUE(callback); const std::vector csrcs = {123, 234, 345, 456}; + const uint8_t audio_level_dbov = 17; EXPECT_CALL(mock_channel, SendFrame).Times(0); - EXPECT_CALL(mock_channel, SendFrame(_, 0, 0, ElementsAreArray(mock_data), _, - ElementsAreArray(csrcs))); + EXPECT_CALL(mock_channel, + SendFrame(_, 0, 0, ElementsAreArray(mock_data), _, + ElementsAreArray(csrcs), Optional(audio_level_dbov))); ON_CALL(*mock_frame_transformer, Transform) .WillByDefault([&](std::unique_ptr frame) { - callback->OnTransformedFrame(CreateMockReceiverFrame(csrcs)); + callback->OnTransformedFrame(CreateMockReceiverFrame( + csrcs, absl::optional(audio_level_dbov))); }); delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, mock_data, sizeof(mock_data), 0, - /*ssrc=*/0, /*mimeType=*/"audio/opus"); + /*ssrc=*/0, /*mimeType=*/"audio/opus", + /*audio_level_dbov=*/absl::nullopt); channel_queue.WaitForPreviouslyPostedTasks(); } @@ -218,7 +228,8 @@ TEST(ChannelSendFrameTransformerDelegateTest, ShortCircuitingSkipsTransform) { EXPECT_CALL(mock_channel, SendFrame); const uint8_t data[] = {1, 2, 3, 4}; delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, data, sizeof(data), 0, - /*ssrc=*/0, /*mimeType=*/"audio/opus"); + /*ssrc=*/0, /*mimeType=*/"audio/opus", + /*audio_level_dbov=*/absl::nullopt); } TEST(ChannelSendFrameTransformerDelegateTest, @@ -234,11 +245,13 @@ TEST(ChannelSendFrameTransformerDelegateTest, EXPECT_EQ(cloned_frame->GetMimeType(), frame->GetMimeType()); EXPECT_THAT(cloned_frame->GetContributingSources(), ElementsAreArray(frame->GetContributingSources())); + EXPECT_EQ(cloned_frame->AudioLevel(), frame->AudioLevel()); } TEST(ChannelSendFrameTransformerDelegateTest, CloningReceiverFrameWithCsrcs) { std::unique_ptr frame = - CreateMockReceiverFrame(/*csrcs=*/{123, 234, 345}); + CreateMockReceiverFrame(/*csrcs=*/{123, 234, 345}, + absl::optional(72)); std::unique_ptr cloned_frame = CloneSenderAudioFrame(frame.get()); @@ -254,6 +267,7 @@ TEST(ChannelSendFrameTransformerDelegateTest, CloningReceiverFrameWithCsrcs) { EXPECT_THAT(cloned_frame->GetContributingSources(), ElementsAreArray(frame->GetContributingSources())); EXPECT_EQ(cloned_frame->SequenceNumber(), frame->SequenceNumber()); + EXPECT_EQ(cloned_frame->AudioLevel(), frame->AudioLevel()); } } // namespace diff --git a/audio/channel_send_unittest.cc b/audio/channel_send_unittest.cc index 77d8479519..523408ec19 100644 --- a/audio/channel_send_unittest.cc +++ b/audio/channel_send_unittest.cc @@ -18,6 +18,7 @@ #include "api/environment/environment_factory.h" #include "api/scoped_refptr.h" #include "api/test/mock_frame_transformer.h" +#include "api/test/mock_transformable_audio_frame.h" #include "api/units/time_delta.h" #include "api/units/timestamp.h" #include "call/rtp_transport_controller_send.h" @@ -76,22 +77,29 @@ class ChannelSendTest : public ::testing::Test { ON_CALL(transport_, SendRtp).WillByDefault(Return(true)); } - std::unique_ptr CreateAudioFrame() { + std::unique_ptr CreateAudioFrame(uint8_t data_init_value = 0) { auto frame = std::make_unique(); frame->sample_rate_hz_ = kSampleRateHz; frame->samples_per_channel_ = kSampleRateHz / 100; frame->num_channels_ = 1; frame->set_absolute_capture_timestamp_ms( time_controller_.GetClock()->TimeInMilliseconds()); + int16_t* dest = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; + i++, dest++) { + *dest = data_init_value; + } return frame; } - void ProcessNextFrame() { - channel_->ProcessAndEncodeAudio(CreateAudioFrame()); + void ProcessNextFrame(std::unique_ptr audio_frame) { + channel_->ProcessAndEncodeAudio(std::move(audio_frame)); // Advance time to process the task queue. time_controller_.AdvanceTime(TimeDelta::Millis(10)); } + void ProcessNextFrame() { ProcessNextFrame(CreateAudioFrame()); } + GlobalSimulatedTimeController time_controller_; webrtc::test::ScopedKeyValueConfig field_trials_; Environment env_; @@ -189,6 +197,117 @@ TEST_F(ChannelSendTest, FrameTransformerGetsCorrectTimestamp) { EXPECT_TRUE_WAIT(sent_timestamp, 1000); EXPECT_EQ(*sent_timestamp, transformable_frame_timestamp); } + +// Ensure that AudioLevel calculations are performed correctly per-packet even +// if there's an async Encoded Frame Transform happening. +TEST_F(ChannelSendTest, AudioLevelsAttachedToCorrectTransformedFrame) { + channel_->SetSendAudioLevelIndicationStatus(true, /*id=*/1); + RtpPacketReceived::ExtensionManager extension_manager; + extension_manager.RegisterByType(1, kRtpExtensionAudioLevel); + + rtc::scoped_refptr mock_frame_transformer = + rtc::make_ref_counted(); + channel_->SetEncoderToPacketizerFrameTransformer(mock_frame_transformer); + rtc::scoped_refptr callback; + EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback) + .WillOnce(SaveArg<0>(&callback)); + EXPECT_CALL(*mock_frame_transformer, UnregisterTransformedFrameCallback); + + std::vector sent_audio_levels; + auto send_rtp = [&](rtc::ArrayView data, + const PacketOptions& options) { + RtpPacketReceived packet(&extension_manager); + packet.Parse(data); + RTPHeader header; + packet.GetHeader(&header); + sent_audio_levels.push_back(header.extension.audio_level()->level()); + return true; + }; + EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(send_rtp)); + + channel_->StartSend(); + std::vector> frames; + EXPECT_CALL(*mock_frame_transformer, Transform) + .Times(2) + .WillRepeatedly([&](std::unique_ptr frame) { + frames.push_back(std::move(frame)); + }); + + // Insert two frames of 7s which should trigger a new packet. + ProcessNextFrame(CreateAudioFrame(/*data_init_value=*/7)); + ProcessNextFrame(CreateAudioFrame(/*data_init_value=*/7)); + + // Insert two more frames of 3s, meaning a second packet is + // prepared and sent to the transform before the first packet has + // been sent. + ProcessNextFrame(CreateAudioFrame(/*data_init_value=*/3)); + ProcessNextFrame(CreateAudioFrame(/*data_init_value=*/3)); + + // Wait for both packets to be encoded and sent to the transform. + EXPECT_EQ_WAIT(frames.size(), 2ul, 1000); + // Complete the transforms on both frames at the same time + callback->OnTransformedFrame(std::move(frames[0])); + callback->OnTransformedFrame(std::move(frames[1])); + + // Allow things posted back to the encoder queue to run. + time_controller_.AdvanceTime(TimeDelta::Millis(10)); + + // Ensure the audio levels on both sent packets is present and + // matches their contents. + EXPECT_EQ_WAIT(sent_audio_levels.size(), 2ul, 1000); + // rms dbov of the packet with raw audio of 7s is 73. + EXPECT_EQ(sent_audio_levels[0], 73); + // rms dbov of the second packet with raw audio of 3s is 81. + EXPECT_EQ(sent_audio_levels[1], 81); +} + +// Ensure that AudioLevels are attached to frames injected into the +// Encoded Frame transform. +TEST_F(ChannelSendTest, AudioLevelsAttachedToInsertedTransformedFrame) { + channel_->SetSendAudioLevelIndicationStatus(true, /*id=*/1); + RtpPacketReceived::ExtensionManager extension_manager; + extension_manager.RegisterByType(1, kRtpExtensionAudioLevel); + + rtc::scoped_refptr mock_frame_transformer = + rtc::make_ref_counted(); + channel_->SetEncoderToPacketizerFrameTransformer(mock_frame_transformer); + rtc::scoped_refptr callback; + EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback) + .WillOnce(SaveArg<0>(&callback)); + EXPECT_CALL(*mock_frame_transformer, UnregisterTransformedFrameCallback); + + std::optional sent_audio_level; + auto send_rtp = [&](rtc::ArrayView data, + const PacketOptions& options) { + RtpPacketReceived packet(&extension_manager); + packet.Parse(data); + RTPHeader header; + packet.GetHeader(&header); + sent_audio_level = header.extension.audio_level()->level(); + return true; + }; + EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(send_rtp)); + + channel_->StartSend(); + + time_controller_.AdvanceTime(TimeDelta::Millis(10)); + // Inject a frame encoded elsewhere. + auto mock_frame = std::make_unique>(); + uint8_t audio_level = 67; + ON_CALL(*mock_frame, AudioLevel()).WillByDefault(Return(audio_level)); + uint8_t payload[10]; + ON_CALL(*mock_frame, GetData()) + .WillByDefault(Return(rtc::ArrayView(&payload[0], 10))); + EXPECT_TRUE_WAIT(callback, 1000); + callback->OnTransformedFrame(std::move(mock_frame)); + + // Allow things posted back to the encoder queue to run. + time_controller_.AdvanceTime(TimeDelta::Millis(10)); + + // Ensure the audio levels is set on the sent packet. + EXPECT_TRUE_WAIT(sent_audio_level, 1000); + EXPECT_EQ(*sent_audio_level, audio_level); +} } // namespace } // namespace voe } // namespace webrtc