Calculate the audio level of audio packets before encoded transforms

Calculate the RMS audio level of audio packets being sent before
invoking an encoded frame transform, and pass them with the encode frame
object.

Before this, the audio level was calculated at send time by having rms_levels_ look at all audio samples encoded since the last send. This
is fine without a transform, as this is done synchronously after
encoding, but with an async transform which might take arbitrarily long,
we could end up marking older audio packets with newer audio levels, or
not at all.

This also makes things work correctly if external encoded frames are
injected from elsewhere to be sent, and exposes the AudioLevel on the
TransformableFrame interface.

Bug: chromium:337193823, webrtc:42226202
Change-Id: If55d2c1d30dc03408ca9fb0193d791db44428316
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/349263
Reviewed-by: Jakob Ivarsson‎ <jakobi@webrtc.org>
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Commit-Queue: Tony Herre <herre@google.com>
Cr-Commit-Position: refs/heads/main@{#42193}
This commit is contained in:
Tony Herre 2024-04-29 15:13:48 +02:00 committed by WebRTC LUCI CQ
parent 047238ebda
commit 64437e8cc0
9 changed files with 275 additions and 34 deletions

View file

@ -82,6 +82,11 @@ class TransformableAudioFrameInterface : public TransformableFrameInterface {
// TODO(crbug.com/1456628): Change this to pure virtual after it // TODO(crbug.com/1456628): Change this to pure virtual after it
// is implemented everywhere. // is implemented everywhere.
virtual FrameType Type() const { return FrameType::kEmptyFrame; } virtual FrameType Type() const { return FrameType::kEmptyFrame; }
// Audio level in -dBov. Values range from 0 to 127, representing 0 to -127
// dBov. 127 represents digital silence. Only present on remote frames if
// the audio level header extension was included.
virtual absl::optional<uint8_t> AudioLevel() const = 0;
}; };
// Objects implement this interface to be notified with the transformed frame. // Objects implement this interface to be notified with the transformed frame.

View file

@ -47,6 +47,7 @@ class MockTransformableAudioFrame : public TransformableAudioFrameInterface {
Type, Type,
(), (),
(const, override)); (const, override));
MOCK_METHOD(absl::optional<uint8_t>, AudioLevel, (), (const, override));
}; };
} // namespace webrtc } // namespace webrtc

View file

@ -70,6 +70,13 @@ class TransformableIncomingAudioFrame
: FrameType::kAudioFrameCN; : FrameType::kAudioFrameCN;
} }
absl::optional<uint8_t> AudioLevel() const override {
if (header_.extension.audio_level()) {
return header_.extension.audio_level()->level();
}
return absl::nullopt;
}
private: private:
rtc::Buffer payload_; rtc::Buffer payload_;
RTPHeader header_; RTPHeader header_;

View file

@ -174,5 +174,76 @@ TEST(ChannelReceiveFrameTransformerDelegateTest,
delegate->Transform(packet, header, /*ssrc=*/1111, /*mimeType=*/"audio/opus"); delegate->Transform(packet, header, /*ssrc=*/1111, /*mimeType=*/"audio/opus");
} }
TEST(ChannelReceiveFrameTransformerDelegateTest,
AudioLevelAbsentWithoutExtension) {
rtc::AutoThread main_thread;
rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer =
rtc::make_ref_counted<NiceMock<MockFrameTransformer>>();
rtc::scoped_refptr<ChannelReceiveFrameTransformerDelegate> delegate =
rtc::make_ref_counted<ChannelReceiveFrameTransformerDelegate>(
/*receive_frame_callback=*/nullptr, mock_frame_transformer,
rtc::Thread::Current());
rtc::scoped_refptr<TransformedFrameCallback> callback;
EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback)
.WillOnce(SaveArg<0>(&callback));
delegate->Init();
ASSERT_TRUE(callback);
const uint8_t data[] = {1, 2, 3, 4};
rtc::ArrayView<const uint8_t> packet(data, sizeof(data));
RTPHeader header;
std::unique_ptr<TransformableFrameInterface> frame;
ON_CALL(*mock_frame_transformer, Transform)
.WillByDefault(
[&](std::unique_ptr<TransformableFrameInterface> transform_frame) {
frame = std::move(transform_frame);
});
delegate->Transform(packet, header, /*ssrc=*/1111, /*mimeType=*/"audio/opus");
EXPECT_TRUE(frame);
auto* audio_frame =
static_cast<TransformableAudioFrameInterface*>(frame.get());
EXPECT_FALSE(audio_frame->AudioLevel());
EXPECT_EQ(audio_frame->Type(),
TransformableAudioFrameInterface::FrameType::kAudioFrameCN);
}
TEST(ChannelReceiveFrameTransformerDelegateTest,
AudioLevelPresentWithExtension) {
rtc::AutoThread main_thread;
rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer =
rtc::make_ref_counted<NiceMock<MockFrameTransformer>>();
rtc::scoped_refptr<ChannelReceiveFrameTransformerDelegate> delegate =
rtc::make_ref_counted<ChannelReceiveFrameTransformerDelegate>(
/*receive_frame_callback=*/nullptr, mock_frame_transformer,
rtc::Thread::Current());
rtc::scoped_refptr<TransformedFrameCallback> callback;
EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback)
.WillOnce(SaveArg<0>(&callback));
delegate->Init();
ASSERT_TRUE(callback);
const uint8_t data[] = {1, 2, 3, 4};
rtc::ArrayView<const uint8_t> packet(data, sizeof(data));
RTPHeader header;
uint8_t audio_level_dbov = 67;
AudioLevel audio_level(/*voice_activity=*/true, audio_level_dbov);
header.extension.set_audio_level(audio_level);
std::unique_ptr<TransformableFrameInterface> frame;
ON_CALL(*mock_frame_transformer, Transform)
.WillByDefault(
[&](std::unique_ptr<TransformableFrameInterface> transform_frame) {
frame = std::move(transform_frame);
});
delegate->Transform(packet, header, /*ssrc=*/1111, /*mimeType=*/"audio/opus");
EXPECT_TRUE(frame);
auto* audio_frame =
static_cast<TransformableAudioFrameInterface*>(frame.get());
EXPECT_EQ(*audio_frame->AudioLevel(), audio_level_dbov);
EXPECT_EQ(audio_frame->Type(),
TransformableAudioFrameInterface::FrameType::kAudioFrameSpeech);
}
} // namespace } // namespace
} // namespace webrtc } // namespace webrtc

View file

@ -170,7 +170,8 @@ class ChannelSend : public ChannelSendInterface,
uint32_t rtp_timestamp_without_offset, uint32_t rtp_timestamp_without_offset,
rtc::ArrayView<const uint8_t> payload, rtc::ArrayView<const uint8_t> payload,
int64_t absolute_capture_timestamp_ms, int64_t absolute_capture_timestamp_ms,
rtc::ArrayView<const uint32_t> csrcs) rtc::ArrayView<const uint32_t> csrcs,
absl::optional<uint8_t> audio_level_dbov)
RTC_RUN_ON(encoder_queue_checker_); RTC_RUN_ON(encoder_queue_checker_);
void OnReceivedRtt(int64_t rtt_ms); void OnReceivedRtt(int64_t rtt_ms);
@ -280,6 +281,14 @@ int32_t ChannelSend::SendData(AudioFrameType frameType,
int64_t absolute_capture_timestamp_ms) { int64_t absolute_capture_timestamp_ms) {
RTC_DCHECK_RUN_ON(&encoder_queue_checker_); RTC_DCHECK_RUN_ON(&encoder_queue_checker_);
rtc::ArrayView<const uint8_t> payload(payloadData, payloadSize); rtc::ArrayView<const uint8_t> payload(payloadData, payloadSize);
absl::optional<uint8_t> audio_level_dbov;
if (include_audio_level_indication_.load()) {
// Take the averaged audio levels from rms_level_ and reset it before
// invoking any async transformer.
audio_level_dbov = rms_level_.Average();
}
if (frame_transformer_delegate_) { if (frame_transformer_delegate_) {
// Asynchronously transform the payload before sending it. After the payload // Asynchronously transform the payload before sending it. After the payload
// is transformed, the delegate will call SendRtpAudio to send it. // is transformed, the delegate will call SendRtpAudio to send it.
@ -290,11 +299,12 @@ int32_t ChannelSend::SendData(AudioFrameType frameType,
frame_transformer_delegate_->Transform( frame_transformer_delegate_->Transform(
frameType, payloadType, rtp_timestamp + rtp_rtcp_->StartTimestamp(), frameType, payloadType, rtp_timestamp + rtp_rtcp_->StartTimestamp(),
payloadData, payloadSize, absolute_capture_timestamp_ms, payloadData, payloadSize, absolute_capture_timestamp_ms,
rtp_rtcp_->SSRC(), mime_type.str()); rtp_rtcp_->SSRC(), mime_type.str(), audio_level_dbov);
return 0; return 0;
} }
return SendRtpAudio(frameType, payloadType, rtp_timestamp, payload, return SendRtpAudio(frameType, payloadType, rtp_timestamp, payload,
absolute_capture_timestamp_ms, /*csrcs=*/{}); absolute_capture_timestamp_ms, /*csrcs=*/{},
audio_level_dbov);
} }
int32_t ChannelSend::SendRtpAudio(AudioFrameType frameType, int32_t ChannelSend::SendRtpAudio(AudioFrameType frameType,
@ -302,7 +312,8 @@ int32_t ChannelSend::SendRtpAudio(AudioFrameType frameType,
uint32_t rtp_timestamp_without_offset, uint32_t rtp_timestamp_without_offset,
rtc::ArrayView<const uint8_t> payload, rtc::ArrayView<const uint8_t> payload,
int64_t absolute_capture_timestamp_ms, int64_t absolute_capture_timestamp_ms,
rtc::ArrayView<const uint32_t> csrcs) { rtc::ArrayView<const uint32_t> csrcs,
absl::optional<uint8_t> audio_level_dbov) {
// E2EE Custom Audio Frame Encryption (This is optional). // E2EE Custom Audio Frame Encryption (This is optional).
// Keep this buffer around for the lifetime of the send call. // Keep this buffer around for the lifetime of the send call.
rtc::Buffer encrypted_audio_payload; rtc::Buffer encrypted_audio_payload;
@ -369,8 +380,8 @@ int32_t ChannelSend::SendRtpAudio(AudioFrameType frameType,
if (absolute_capture_timestamp_ms > 0) { if (absolute_capture_timestamp_ms > 0) {
frame.capture_time = Timestamp::Millis(absolute_capture_timestamp_ms); frame.capture_time = Timestamp::Millis(absolute_capture_timestamp_ms);
} }
if (include_audio_level_indication_.load()) { if (include_audio_level_indication_.load() && audio_level_dbov) {
frame.audio_level_dbov = rms_level_.Average(); frame.audio_level_dbov = *audio_level_dbov;
} }
if (!rtp_sender_audio_->SendAudio(frame)) { if (!rtp_sender_audio_->SendAudio(frame)) {
RTC_DLOG(LS_ERROR) RTC_DLOG(LS_ERROR)
@ -866,12 +877,13 @@ void ChannelSend::InitFrameTransformerDelegate(
uint32_t rtp_timestamp_with_offset, uint32_t rtp_timestamp_with_offset,
rtc::ArrayView<const uint8_t> payload, rtc::ArrayView<const uint8_t> payload,
int64_t absolute_capture_timestamp_ms, int64_t absolute_capture_timestamp_ms,
rtc::ArrayView<const uint32_t> csrcs) { rtc::ArrayView<const uint32_t> csrcs,
absl::optional<uint8_t> audio_level_dbov) {
RTC_DCHECK_RUN_ON(&encoder_queue_checker_); RTC_DCHECK_RUN_ON(&encoder_queue_checker_);
return SendRtpAudio( return SendRtpAudio(
frameType, payloadType, frameType, payloadType,
rtp_timestamp_with_offset - rtp_rtcp_->StartTimestamp(), payload, rtp_timestamp_with_offset - rtp_rtcp_->StartTimestamp(), payload,
absolute_capture_timestamp_ms, csrcs); absolute_capture_timestamp_ms, csrcs, audio_level_dbov);
}; };
frame_transformer_delegate_ = frame_transformer_delegate_ =
rtc::make_ref_counted<ChannelSendFrameTransformerDelegate>( rtc::make_ref_counted<ChannelSendFrameTransformerDelegate>(

View file

@ -59,7 +59,8 @@ class TransformableOutgoingAudioFrame
uint32_t ssrc, uint32_t ssrc,
std::vector<uint32_t> csrcs, std::vector<uint32_t> csrcs,
const std::string& codec_mime_type, const std::string& codec_mime_type,
absl::optional<uint16_t> sequence_number) absl::optional<uint16_t> sequence_number,
absl::optional<uint8_t> audio_level_dbov)
: frame_type_(frame_type), : frame_type_(frame_type),
payload_type_(payload_type), payload_type_(payload_type),
rtp_timestamp_with_offset_(rtp_timestamp_with_offset), rtp_timestamp_with_offset_(rtp_timestamp_with_offset),
@ -68,7 +69,8 @@ class TransformableOutgoingAudioFrame
ssrc_(ssrc), ssrc_(ssrc),
csrcs_(std::move(csrcs)), csrcs_(std::move(csrcs)),
codec_mime_type_(codec_mime_type), codec_mime_type_(codec_mime_type),
sequence_number_(sequence_number) {} sequence_number_(sequence_number),
audio_level_dbov_(audio_level_dbov) {}
~TransformableOutgoingAudioFrame() override = default; ~TransformableOutgoingAudioFrame() override = default;
rtc::ArrayView<const uint8_t> GetData() const override { return payload_; } rtc::ArrayView<const uint8_t> GetData() const override { return payload_; }
void SetData(rtc::ArrayView<const uint8_t> data) override { void SetData(rtc::ArrayView<const uint8_t> data) override {
@ -101,6 +103,10 @@ class TransformableOutgoingAudioFrame
return absolute_capture_timestamp_ms_; return absolute_capture_timestamp_ms_;
} }
absl::optional<uint8_t> AudioLevel() const override {
return audio_level_dbov_;
}
private: private:
AudioFrameType frame_type_; AudioFrameType frame_type_;
uint8_t payload_type_; uint8_t payload_type_;
@ -111,6 +117,7 @@ class TransformableOutgoingAudioFrame
std::vector<uint32_t> csrcs_; std::vector<uint32_t> csrcs_;
std::string codec_mime_type_; std::string codec_mime_type_;
absl::optional<uint16_t> sequence_number_; absl::optional<uint16_t> sequence_number_;
absl::optional<uint8_t> audio_level_dbov_;
}; };
} // namespace } // namespace
@ -143,14 +150,15 @@ void ChannelSendFrameTransformerDelegate::Transform(
size_t payload_size, size_t payload_size,
int64_t absolute_capture_timestamp_ms, int64_t absolute_capture_timestamp_ms,
uint32_t ssrc, uint32_t ssrc,
const std::string& codec_mimetype) { const std::string& codec_mimetype,
absl::optional<uint8_t> audio_level_dbov) {
{ {
MutexLock lock(&send_lock_); MutexLock lock(&send_lock_);
if (short_circuit_) { if (short_circuit_) {
send_frame_callback_( send_frame_callback_(
frame_type, payload_type, rtp_timestamp, frame_type, payload_type, rtp_timestamp,
rtc::ArrayView<const uint8_t>(payload_data, payload_size), rtc::ArrayView<const uint8_t>(payload_data, payload_size),
absolute_capture_timestamp_ms, /*csrcs=*/{}); absolute_capture_timestamp_ms, /*csrcs=*/{}, audio_level_dbov);
return; return;
} }
} }
@ -159,7 +167,7 @@ void ChannelSendFrameTransformerDelegate::Transform(
frame_type, payload_type, rtp_timestamp, payload_data, payload_size, frame_type, payload_type, rtp_timestamp, payload_data, payload_size,
absolute_capture_timestamp_ms, ssrc, absolute_capture_timestamp_ms, ssrc,
/*csrcs=*/std::vector<uint32_t>(), codec_mimetype, /*csrcs=*/std::vector<uint32_t>(), codec_mimetype,
/*sequence_number=*/absl::nullopt)); /*sequence_number=*/absl::nullopt, audio_level_dbov));
} }
void ChannelSendFrameTransformerDelegate::OnTransformedFrame( void ChannelSendFrameTransformerDelegate::OnTransformedFrame(
@ -194,7 +202,8 @@ void ChannelSendFrameTransformerDelegate::SendFrame(
transformed_frame->AbsoluteCaptureTimestamp() transformed_frame->AbsoluteCaptureTimestamp()
? *transformed_frame->AbsoluteCaptureTimestamp() ? *transformed_frame->AbsoluteCaptureTimestamp()
: 0, : 0,
transformed_frame->GetContributingSources()); transformed_frame->GetContributingSources(),
transformed_frame->AudioLevel());
} }
std::unique_ptr<TransformableAudioFrameInterface> CloneSenderAudioFrame( std::unique_ptr<TransformableAudioFrameInterface> CloneSenderAudioFrame(
@ -207,7 +216,8 @@ std::unique_ptr<TransformableAudioFrameInterface> CloneSenderAudioFrame(
original->GetPayloadType(), original->GetTimestamp(), original->GetPayloadType(), original->GetTimestamp(),
original->GetData().data(), original->GetData().size(), original->GetData().data(), original->GetData().size(),
original->AbsoluteCaptureTimestamp(), original->GetSsrc(), original->AbsoluteCaptureTimestamp(), original->GetSsrc(),
std::move(csrcs), original->GetMimeType(), original->SequenceNumber()); std::move(csrcs), original->GetMimeType(), original->SequenceNumber(),
original->AudioLevel());
} }
} // namespace webrtc } // namespace webrtc

View file

@ -36,7 +36,8 @@ class ChannelSendFrameTransformerDelegate : public TransformedFrameCallback {
uint32_t rtp_timestamp_with_offset, uint32_t rtp_timestamp_with_offset,
rtc::ArrayView<const uint8_t> payload, rtc::ArrayView<const uint8_t> payload,
int64_t absolute_capture_timestamp_ms, int64_t absolute_capture_timestamp_ms,
rtc::ArrayView<const uint32_t> csrcs)>; rtc::ArrayView<const uint32_t> csrcs,
absl::optional<uint8_t> audio_level_dbov)>;
ChannelSendFrameTransformerDelegate( ChannelSendFrameTransformerDelegate(
SendFrameCallback send_frame_callback, SendFrameCallback send_frame_callback,
rtc::scoped_refptr<FrameTransformerInterface> frame_transformer, rtc::scoped_refptr<FrameTransformerInterface> frame_transformer,
@ -60,7 +61,8 @@ class ChannelSendFrameTransformerDelegate : public TransformedFrameCallback {
size_t payload_size, size_t payload_size,
int64_t absolute_capture_timestamp_ms, int64_t absolute_capture_timestamp_ms,
uint32_t ssrc, uint32_t ssrc,
const std::string& codec_mime_type); const std::string& codec_mime_type,
absl::optional<uint8_t> audio_level_dbov);
// Implements TransformedFrameCallback. Can be called on any thread. // Implements TransformedFrameCallback. Can be called on any thread.
void OnTransformedFrame( void OnTransformedFrame(

View file

@ -28,6 +28,7 @@ using ::testing::_;
using ::testing::ElementsAre; using ::testing::ElementsAre;
using ::testing::ElementsAreArray; using ::testing::ElementsAreArray;
using ::testing::NiceMock; using ::testing::NiceMock;
using ::testing::Optional;
using ::testing::Return; using ::testing::Return;
using ::testing::SaveArg; using ::testing::SaveArg;
@ -45,21 +46,24 @@ class MockChannelSend {
uint32_t rtp_timestamp, uint32_t rtp_timestamp,
rtc::ArrayView<const uint8_t> payload, rtc::ArrayView<const uint8_t> payload,
int64_t absolute_capture_timestamp_ms, int64_t absolute_capture_timestamp_ms,
rtc::ArrayView<const uint32_t> csrcs)); rtc::ArrayView<const uint32_t> csrcs,
absl::optional<uint8_t> audio_level_dbov));
ChannelSendFrameTransformerDelegate::SendFrameCallback callback() { ChannelSendFrameTransformerDelegate::SendFrameCallback callback() {
return [this](AudioFrameType frameType, uint8_t payloadType, return [this](AudioFrameType frameType, uint8_t payloadType,
uint32_t rtp_timestamp, rtc::ArrayView<const uint8_t> payload, uint32_t rtp_timestamp, rtc::ArrayView<const uint8_t> payload,
int64_t absolute_capture_timestamp_ms, int64_t absolute_capture_timestamp_ms,
rtc::ArrayView<const uint32_t> csrcs) { rtc::ArrayView<const uint32_t> csrcs,
absl::optional<uint8_t> audio_level_dbov) {
return SendFrame(frameType, payloadType, rtp_timestamp, payload, return SendFrame(frameType, payloadType, rtp_timestamp, payload,
absolute_capture_timestamp_ms, csrcs); absolute_capture_timestamp_ms, csrcs, audio_level_dbov);
}; };
} }
}; };
std::unique_ptr<TransformableAudioFrameInterface> CreateMockReceiverFrame( std::unique_ptr<TransformableAudioFrameInterface> CreateMockReceiverFrame(
const std::vector<uint32_t>& csrcs) { const std::vector<uint32_t>& csrcs,
absl::optional<uint8_t> audio_level_dbov) {
std::unique_ptr<MockTransformableAudioFrame> mock_frame = std::unique_ptr<MockTransformableAudioFrame> mock_frame =
std::make_unique<NiceMock<MockTransformableAudioFrame>>(); std::make_unique<NiceMock<MockTransformableAudioFrame>>();
rtc::ArrayView<const uint8_t> payload(mock_data); rtc::ArrayView<const uint8_t> payload(mock_data);
@ -69,6 +73,7 @@ std::unique_ptr<TransformableAudioFrameInterface> CreateMockReceiverFrame(
.WillByDefault(Return(TransformableFrameInterface::Direction::kReceiver)); .WillByDefault(Return(TransformableFrameInterface::Direction::kReceiver));
ON_CALL(*mock_frame, GetContributingSources).WillByDefault(Return(csrcs)); ON_CALL(*mock_frame, GetContributingSources).WillByDefault(Return(csrcs));
ON_CALL(*mock_frame, SequenceNumber).WillByDefault(Return(987654321)); ON_CALL(*mock_frame, SequenceNumber).WillByDefault(Return(987654321));
ON_CALL(*mock_frame, AudioLevel).WillByDefault(Return(audio_level_dbov));
return mock_frame; return mock_frame;
} }
@ -88,9 +93,9 @@ std::unique_ptr<TransformableAudioFrameInterface> CreateFrame() {
std::unique_ptr<TransformableFrameInterface> transform_frame) { std::unique_ptr<TransformableFrameInterface> transform_frame) {
frame = std::move(transform_frame); frame = std::move(transform_frame);
}); });
delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, mock_data, delegate->Transform(
sizeof(mock_data), 0, AudioFrameType::kEmptyFrame, 0, 0, mock_data, sizeof(mock_data), 0,
/*ssrc=*/0, /*mimeType=*/"audio/opus"); /*ssrc=*/0, /*mimeType=*/"audio/opus", /*audio_level_dbov=*/123);
return absl::WrapUnique( return absl::WrapUnique(
static_cast<webrtc::TransformableAudioFrameInterface*>(frame.release())); static_cast<webrtc::TransformableAudioFrameInterface*>(frame.release()));
} }
@ -147,7 +152,8 @@ TEST(ChannelSendFrameTransformerDelegateTest,
callback->OnTransformedFrame(std::move(frame)); callback->OnTransformedFrame(std::move(frame));
}); });
delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, data, sizeof(data), 0, delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, data, sizeof(data), 0,
/*ssrc=*/0, /*mimeType=*/"audio/opus"); /*ssrc=*/0, /*mimeType=*/"audio/opus",
/*audio_level_dbov=*/31);
channel_queue.WaitForPreviouslyPostedTasks(); channel_queue.WaitForPreviouslyPostedTasks();
} }
@ -169,16 +175,20 @@ TEST(ChannelSendFrameTransformerDelegateTest,
ASSERT_TRUE(callback); ASSERT_TRUE(callback);
const std::vector<uint32_t> csrcs = {123, 234, 345, 456}; const std::vector<uint32_t> csrcs = {123, 234, 345, 456};
const uint8_t audio_level_dbov = 17;
EXPECT_CALL(mock_channel, SendFrame).Times(0); EXPECT_CALL(mock_channel, SendFrame).Times(0);
EXPECT_CALL(mock_channel, SendFrame(_, 0, 0, ElementsAreArray(mock_data), _, EXPECT_CALL(mock_channel,
ElementsAreArray(csrcs))); SendFrame(_, 0, 0, ElementsAreArray(mock_data), _,
ElementsAreArray(csrcs), Optional(audio_level_dbov)));
ON_CALL(*mock_frame_transformer, Transform) ON_CALL(*mock_frame_transformer, Transform)
.WillByDefault([&](std::unique_ptr<TransformableFrameInterface> frame) { .WillByDefault([&](std::unique_ptr<TransformableFrameInterface> frame) {
callback->OnTransformedFrame(CreateMockReceiverFrame(csrcs)); callback->OnTransformedFrame(CreateMockReceiverFrame(
csrcs, absl::optional<uint8_t>(audio_level_dbov)));
}); });
delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, mock_data, delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, mock_data,
sizeof(mock_data), 0, sizeof(mock_data), 0,
/*ssrc=*/0, /*mimeType=*/"audio/opus"); /*ssrc=*/0, /*mimeType=*/"audio/opus",
/*audio_level_dbov=*/absl::nullopt);
channel_queue.WaitForPreviouslyPostedTasks(); channel_queue.WaitForPreviouslyPostedTasks();
} }
@ -218,7 +228,8 @@ TEST(ChannelSendFrameTransformerDelegateTest, ShortCircuitingSkipsTransform) {
EXPECT_CALL(mock_channel, SendFrame); EXPECT_CALL(mock_channel, SendFrame);
const uint8_t data[] = {1, 2, 3, 4}; const uint8_t data[] = {1, 2, 3, 4};
delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, data, sizeof(data), 0, delegate->Transform(AudioFrameType::kEmptyFrame, 0, 0, data, sizeof(data), 0,
/*ssrc=*/0, /*mimeType=*/"audio/opus"); /*ssrc=*/0, /*mimeType=*/"audio/opus",
/*audio_level_dbov=*/absl::nullopt);
} }
TEST(ChannelSendFrameTransformerDelegateTest, TEST(ChannelSendFrameTransformerDelegateTest,
@ -234,11 +245,13 @@ TEST(ChannelSendFrameTransformerDelegateTest,
EXPECT_EQ(cloned_frame->GetMimeType(), frame->GetMimeType()); EXPECT_EQ(cloned_frame->GetMimeType(), frame->GetMimeType());
EXPECT_THAT(cloned_frame->GetContributingSources(), EXPECT_THAT(cloned_frame->GetContributingSources(),
ElementsAreArray(frame->GetContributingSources())); ElementsAreArray(frame->GetContributingSources()));
EXPECT_EQ(cloned_frame->AudioLevel(), frame->AudioLevel());
} }
TEST(ChannelSendFrameTransformerDelegateTest, CloningReceiverFrameWithCsrcs) { TEST(ChannelSendFrameTransformerDelegateTest, CloningReceiverFrameWithCsrcs) {
std::unique_ptr<TransformableAudioFrameInterface> frame = std::unique_ptr<TransformableAudioFrameInterface> frame =
CreateMockReceiverFrame(/*csrcs=*/{123, 234, 345}); CreateMockReceiverFrame(/*csrcs=*/{123, 234, 345},
absl::optional<uint8_t>(72));
std::unique_ptr<TransformableAudioFrameInterface> cloned_frame = std::unique_ptr<TransformableAudioFrameInterface> cloned_frame =
CloneSenderAudioFrame(frame.get()); CloneSenderAudioFrame(frame.get());
@ -254,6 +267,7 @@ TEST(ChannelSendFrameTransformerDelegateTest, CloningReceiverFrameWithCsrcs) {
EXPECT_THAT(cloned_frame->GetContributingSources(), EXPECT_THAT(cloned_frame->GetContributingSources(),
ElementsAreArray(frame->GetContributingSources())); ElementsAreArray(frame->GetContributingSources()));
EXPECT_EQ(cloned_frame->SequenceNumber(), frame->SequenceNumber()); EXPECT_EQ(cloned_frame->SequenceNumber(), frame->SequenceNumber());
EXPECT_EQ(cloned_frame->AudioLevel(), frame->AudioLevel());
} }
} // namespace } // namespace

View file

@ -18,6 +18,7 @@
#include "api/environment/environment_factory.h" #include "api/environment/environment_factory.h"
#include "api/scoped_refptr.h" #include "api/scoped_refptr.h"
#include "api/test/mock_frame_transformer.h" #include "api/test/mock_frame_transformer.h"
#include "api/test/mock_transformable_audio_frame.h"
#include "api/units/time_delta.h" #include "api/units/time_delta.h"
#include "api/units/timestamp.h" #include "api/units/timestamp.h"
#include "call/rtp_transport_controller_send.h" #include "call/rtp_transport_controller_send.h"
@ -76,22 +77,29 @@ class ChannelSendTest : public ::testing::Test {
ON_CALL(transport_, SendRtp).WillByDefault(Return(true)); ON_CALL(transport_, SendRtp).WillByDefault(Return(true));
} }
std::unique_ptr<AudioFrame> CreateAudioFrame() { std::unique_ptr<AudioFrame> CreateAudioFrame(uint8_t data_init_value = 0) {
auto frame = std::make_unique<AudioFrame>(); auto frame = std::make_unique<AudioFrame>();
frame->sample_rate_hz_ = kSampleRateHz; frame->sample_rate_hz_ = kSampleRateHz;
frame->samples_per_channel_ = kSampleRateHz / 100; frame->samples_per_channel_ = kSampleRateHz / 100;
frame->num_channels_ = 1; frame->num_channels_ = 1;
frame->set_absolute_capture_timestamp_ms( frame->set_absolute_capture_timestamp_ms(
time_controller_.GetClock()->TimeInMilliseconds()); time_controller_.GetClock()->TimeInMilliseconds());
int16_t* dest = frame->mutable_data();
for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
i++, dest++) {
*dest = data_init_value;
}
return frame; return frame;
} }
void ProcessNextFrame() { void ProcessNextFrame(std::unique_ptr<AudioFrame> audio_frame) {
channel_->ProcessAndEncodeAudio(CreateAudioFrame()); channel_->ProcessAndEncodeAudio(std::move(audio_frame));
// Advance time to process the task queue. // Advance time to process the task queue.
time_controller_.AdvanceTime(TimeDelta::Millis(10)); time_controller_.AdvanceTime(TimeDelta::Millis(10));
} }
void ProcessNextFrame() { ProcessNextFrame(CreateAudioFrame()); }
GlobalSimulatedTimeController time_controller_; GlobalSimulatedTimeController time_controller_;
webrtc::test::ScopedKeyValueConfig field_trials_; webrtc::test::ScopedKeyValueConfig field_trials_;
Environment env_; Environment env_;
@ -189,6 +197,117 @@ TEST_F(ChannelSendTest, FrameTransformerGetsCorrectTimestamp) {
EXPECT_TRUE_WAIT(sent_timestamp, 1000); EXPECT_TRUE_WAIT(sent_timestamp, 1000);
EXPECT_EQ(*sent_timestamp, transformable_frame_timestamp); EXPECT_EQ(*sent_timestamp, transformable_frame_timestamp);
} }
// Ensure that AudioLevel calculations are performed correctly per-packet even
// if there's an async Encoded Frame Transform happening.
TEST_F(ChannelSendTest, AudioLevelsAttachedToCorrectTransformedFrame) {
channel_->SetSendAudioLevelIndicationStatus(true, /*id=*/1);
RtpPacketReceived::ExtensionManager extension_manager;
extension_manager.RegisterByType(1, kRtpExtensionAudioLevel);
rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer =
rtc::make_ref_counted<MockFrameTransformer>();
channel_->SetEncoderToPacketizerFrameTransformer(mock_frame_transformer);
rtc::scoped_refptr<TransformedFrameCallback> callback;
EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback)
.WillOnce(SaveArg<0>(&callback));
EXPECT_CALL(*mock_frame_transformer, UnregisterTransformedFrameCallback);
std::vector<uint8_t> sent_audio_levels;
auto send_rtp = [&](rtc::ArrayView<const uint8_t> data,
const PacketOptions& options) {
RtpPacketReceived packet(&extension_manager);
packet.Parse(data);
RTPHeader header;
packet.GetHeader(&header);
sent_audio_levels.push_back(header.extension.audio_level()->level());
return true;
};
EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(send_rtp));
channel_->StartSend();
std::vector<std::unique_ptr<TransformableFrameInterface>> frames;
EXPECT_CALL(*mock_frame_transformer, Transform)
.Times(2)
.WillRepeatedly([&](std::unique_ptr<TransformableFrameInterface> frame) {
frames.push_back(std::move(frame));
});
// Insert two frames of 7s which should trigger a new packet.
ProcessNextFrame(CreateAudioFrame(/*data_init_value=*/7));
ProcessNextFrame(CreateAudioFrame(/*data_init_value=*/7));
// Insert two more frames of 3s, meaning a second packet is
// prepared and sent to the transform before the first packet has
// been sent.
ProcessNextFrame(CreateAudioFrame(/*data_init_value=*/3));
ProcessNextFrame(CreateAudioFrame(/*data_init_value=*/3));
// Wait for both packets to be encoded and sent to the transform.
EXPECT_EQ_WAIT(frames.size(), 2ul, 1000);
// Complete the transforms on both frames at the same time
callback->OnTransformedFrame(std::move(frames[0]));
callback->OnTransformedFrame(std::move(frames[1]));
// Allow things posted back to the encoder queue to run.
time_controller_.AdvanceTime(TimeDelta::Millis(10));
// Ensure the audio levels on both sent packets is present and
// matches their contents.
EXPECT_EQ_WAIT(sent_audio_levels.size(), 2ul, 1000);
// rms dbov of the packet with raw audio of 7s is 73.
EXPECT_EQ(sent_audio_levels[0], 73);
// rms dbov of the second packet with raw audio of 3s is 81.
EXPECT_EQ(sent_audio_levels[1], 81);
}
// Ensure that AudioLevels are attached to frames injected into the
// Encoded Frame transform.
TEST_F(ChannelSendTest, AudioLevelsAttachedToInsertedTransformedFrame) {
channel_->SetSendAudioLevelIndicationStatus(true, /*id=*/1);
RtpPacketReceived::ExtensionManager extension_manager;
extension_manager.RegisterByType(1, kRtpExtensionAudioLevel);
rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer =
rtc::make_ref_counted<MockFrameTransformer>();
channel_->SetEncoderToPacketizerFrameTransformer(mock_frame_transformer);
rtc::scoped_refptr<TransformedFrameCallback> callback;
EXPECT_CALL(*mock_frame_transformer, RegisterTransformedFrameCallback)
.WillOnce(SaveArg<0>(&callback));
EXPECT_CALL(*mock_frame_transformer, UnregisterTransformedFrameCallback);
std::optional<uint8_t> sent_audio_level;
auto send_rtp = [&](rtc::ArrayView<const uint8_t> data,
const PacketOptions& options) {
RtpPacketReceived packet(&extension_manager);
packet.Parse(data);
RTPHeader header;
packet.GetHeader(&header);
sent_audio_level = header.extension.audio_level()->level();
return true;
};
EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(send_rtp));
channel_->StartSend();
time_controller_.AdvanceTime(TimeDelta::Millis(10));
// Inject a frame encoded elsewhere.
auto mock_frame = std::make_unique<NiceMock<MockTransformableAudioFrame>>();
uint8_t audio_level = 67;
ON_CALL(*mock_frame, AudioLevel()).WillByDefault(Return(audio_level));
uint8_t payload[10];
ON_CALL(*mock_frame, GetData())
.WillByDefault(Return(rtc::ArrayView<uint8_t>(&payload[0], 10)));
EXPECT_TRUE_WAIT(callback, 1000);
callback->OnTransformedFrame(std::move(mock_frame));
// Allow things posted back to the encoder queue to run.
time_controller_.AdvanceTime(TimeDelta::Millis(10));
// Ensure the audio levels is set on the sent packet.
EXPECT_TRUE_WAIT(sent_audio_level, 1000);
EXPECT_EQ(*sent_audio_level, audio_level);
}
} // namespace } // namespace
} // namespace voe } // namespace voe
} // namespace webrtc } // namespace webrtc