From 5075cb4a60d78d7093f5f60f87236872cdaef190 Mon Sep 17 00:00:00 2001 From: Joachim Reiersen Date: Thu, 21 Mar 2024 18:08:54 -0700 Subject: [PATCH] Expose AudioLevel as an absl::optional struct in api/rtp_headers.h Start migrating away from `hasAudioLevel`, `voiceActivity`, `audioLevel` fields in RTPHeaderExtension and switch usages to a more modern absl::optional accessor instead. The old fields are preserved for compatibility with downstream projects, but will be removed in the future. Bug: webrtc:15788 Change-Id: I76599124fd68dd4d449f850df3b9814d6a002f5d Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/336303 Reviewed-by: Harald Alvestrand Reviewed-by: Danil Chapovalov Commit-Queue: Danil Chapovalov Cr-Commit-Position: refs/heads/main@{#41947} --- api/rtp_headers.cc | 26 ++++++++ api/rtp_headers.h | 29 +++++++++ api/rtp_packet_info.cc | 4 +- audio/audio_send_stream_tests.cc | 8 +-- ...nnel_receive_frame_transformer_delegate.cc | 9 ++- .../rtc_event_log_encoder_new_format.cc | 36 +++++------ .../rtc_event_log/rtc_event_log2rtp_dump.cc | 5 +- logging/rtc_event_log/rtc_event_log_parser.cc | 21 +++---- .../rtc_event_log_unittest_helper.cc | 19 +++--- media/engine/webrtc_voice_engine_unittest.cc | 10 ++-- .../audio_coding/neteq/neteq_impl_unittest.cc | 19 +++--- .../audio_coding/neteq/tools/rtp_analyze.cc | 7 ++- .../rtp_rtcp/source/rtp_header_extensions.cc | 35 ++++++++--- .../rtp_rtcp/source/rtp_header_extensions.h | 23 +++++--- .../rtp_rtcp/source/rtp_packet_received.cc | 3 +- .../rtp_rtcp/source/rtp_packet_unittest.cc | 59 +++++++++---------- modules/rtp_rtcp/source/rtp_sender_audio.cc | 4 +- .../source/rtp_sender_audio_unittest.cc | 9 ++- rtc_tools/rtc_event_log_to_text/converter.cc | 14 +++-- .../rtc_event_log_visualizer/analyzer.cc | 5 +- test/fuzzers/rtp_packet_fuzzer.cc | 8 +-- 21 files changed, 212 insertions(+), 141 deletions(-) diff --git a/api/rtp_headers.cc b/api/rtp_headers.cc index 0573e54684..b45bf40e54 100644 --- a/api/rtp_headers.cc +++ b/api/rtp_headers.cc @@ -12,6 +12,14 @@ namespace webrtc { +AudioLevel::AudioLevel() : voice_activity_(false), audio_level_(0) {} + +AudioLevel::AudioLevel(bool voice_activity, int audio_level) + : voice_activity_(voice_activity), audio_level_(audio_level) { + RTC_CHECK_GE(audio_level, 0); + RTC_CHECK_LE(audio_level, 127); +} + RTPHeaderExtension::RTPHeaderExtension() : hasTransmissionTimeOffset(false), transmissionTimeOffset(0), @@ -34,6 +42,24 @@ RTPHeaderExtension::RTPHeaderExtension(const RTPHeaderExtension& other) = RTPHeaderExtension& RTPHeaderExtension::operator=( const RTPHeaderExtension& other) = default; +absl::optional RTPHeaderExtension::audio_level() const { + if (!hasAudioLevel) { + return absl::nullopt; + } + return AudioLevel(voiceActivity, audioLevel); +} + +void RTPHeaderExtension::set_audio_level( + absl::optional audio_level) { + if (audio_level) { + hasAudioLevel = true; + voiceActivity = audio_level->voice_activity(); + audioLevel = audio_level->level(); + } else { + hasAudioLevel = false; + } +} + RTPHeader::RTPHeader() : markerBit(false), payloadType(0), diff --git a/api/rtp_headers.h b/api/rtp_headers.h index 5d4d4190d5..3ea643d37e 100644 --- a/api/rtp_headers.h +++ b/api/rtp_headers.h @@ -77,6 +77,29 @@ struct AbsoluteCaptureTime { absl::optional estimated_capture_clock_offset; }; +// The audio level extension is used to indicate the voice activity and the +// audio level of the payload in the RTP stream. See: +// https://tools.ietf.org/html/rfc6464#section-3. +class AudioLevel { + public: + AudioLevel(); + AudioLevel(bool voice_activity, int audio_level); + AudioLevel(const AudioLevel& other) = default; + AudioLevel& operator=(const AudioLevel& other) = default; + + // Flag indicating whether the encoder believes the audio packet contains + // voice activity. + bool voice_activity() const { return voice_activity_; } + + // Audio level in -dBov. Values range from 0 to 127, representing 0 to -127 + // dBov. 127 represents digital silence. + int level() const { return audio_level_; } + + private: + bool voice_activity_; + int audio_level_; +}; + inline bool operator==(const AbsoluteCaptureTime& lhs, const AbsoluteCaptureTime& rhs) { return (lhs.absolute_capture_timestamp == rhs.absolute_capture_timestamp) && @@ -114,6 +137,12 @@ struct RTPHeaderExtension { // Audio Level includes both level in dBov and voiced/unvoiced bit. See: // https://tools.ietf.org/html/rfc6464#section-3 + absl::optional audio_level() const; + + void set_audio_level(absl::optional audio_level); + + // Direct use of the following members is discouraged and will be removed + // once downstream projects have been updated. bool hasAudioLevel; bool voiceActivity; uint8_t audioLevel; diff --git a/api/rtp_packet_info.cc b/api/rtp_packet_info.cc index cba274ec38..cfe6a24a0c 100644 --- a/api/rtp_packet_info.cc +++ b/api/rtp_packet_info.cc @@ -37,8 +37,8 @@ RtpPacketInfo::RtpPacketInfo(const RTPHeader& rtp_header, csrcs_.assign(&rtp_header.arrOfCSRCs[0], &rtp_header.arrOfCSRCs[csrcs_count]); - if (extension.hasAudioLevel) { - audio_level_ = extension.audioLevel; + if (extension.audio_level()) { + audio_level_ = extension.audio_level()->level(); } absolute_capture_time_ = extension.absolute_capture_time; diff --git a/audio/audio_send_stream_tests.cc b/audio/audio_send_stream_tests.cc index 436e752e3a..6e394a7673 100644 --- a/audio/audio_send_stream_tests.cc +++ b/audio/audio_send_stream_tests.cc @@ -116,11 +116,9 @@ TEST_F(AudioSendStreamCallTest, SupportsAudioLevel) { RtpPacket rtp_packet(&extensions_); EXPECT_TRUE(rtp_packet.Parse(packet)); - uint8_t audio_level = 0; - bool voice = false; - EXPECT_TRUE( - rtp_packet.GetExtension(&voice, &audio_level)); - if (audio_level != 0) { + AudioLevel audio_level; + EXPECT_TRUE(rtp_packet.GetExtension(&audio_level)); + if (audio_level.level() != 0) { // Wait for at least one packet with a non-zero level. observation_complete_.Set(); } else { diff --git a/audio/channel_receive_frame_transformer_delegate.cc b/audio/channel_receive_frame_transformer_delegate.cc index 8a7dda826e..dbced0216f 100644 --- a/audio/channel_receive_frame_transformer_delegate.cc +++ b/audio/channel_receive_frame_transformer_delegate.cc @@ -61,8 +61,13 @@ class TransformableIncomingAudioFrame const RTPHeader& Header() const { return header_; } FrameType Type() const override { - return header_.extension.voiceActivity ? FrameType::kAudioFrameSpeech - : FrameType::kAudioFrameCN; + if (!header_.extension.audio_level()) { + // Audio level extension not set. + return FrameType::kAudioFrameCN; + } + return header_.extension.audio_level()->voice_activity() + ? FrameType::kAudioFrameSpeech + : FrameType::kAudioFrameCN; } private: diff --git a/logging/rtc_event_log/encoder/rtc_event_log_encoder_new_format.cc b/logging/rtc_event_log/encoder/rtc_event_log_encoder_new_format.cc index 3faebb3d7b..6c9581ebcb 100644 --- a/logging/rtc_event_log/encoder/rtc_event_log_encoder_new_format.cc +++ b/logging/rtc_event_log/encoder/rtc_event_log_encoder_new_format.cc @@ -444,16 +444,14 @@ void RtcEventLogEncoderNewFormat::EncodeRtpPacket(const Batch& batch, absl::optional base_audio_level; absl::optional base_voice_activity; { - bool voice_activity; - uint8_t audio_level; - if (base_event->template GetExtension(&voice_activity, - &audio_level)) { - RTC_DCHECK_LE(audio_level, 0x7Fu); - base_audio_level = audio_level; - proto_batch->set_audio_level(audio_level); + AudioLevel audio_level; + if (base_event->template GetExtension(&audio_level)) { + RTC_DCHECK_LE(audio_level.level(), 0x7Fu); + base_audio_level = audio_level.level(); + proto_batch->set_audio_level(audio_level.level()); - base_voice_activity = voice_activity; - proto_batch->set_voice_activity(voice_activity); + base_voice_activity = audio_level.voice_activity(); + proto_batch->set_voice_activity(audio_level.voice_activity()); } } @@ -641,12 +639,10 @@ void RtcEventLogEncoderNewFormat::EncodeRtpPacket(const Batch& batch, // audio_level (RTP extension) for (size_t i = 0; i < values.size(); ++i) { const EventType* event = batch[i + 1]; - bool voice_activity; - uint8_t audio_level; - if (event->template GetExtension(&voice_activity, - &audio_level)) { - RTC_DCHECK_LE(audio_level, 0x7Fu); - values[i] = audio_level; + AudioLevel audio_level; + if (event->template GetExtension(&audio_level)) { + RTC_DCHECK_LE(audio_level.level(), 0x7F); + values[i] = audio_level.level(); } else { values[i].reset(); } @@ -659,12 +655,10 @@ void RtcEventLogEncoderNewFormat::EncodeRtpPacket(const Batch& batch, // voice_activity (RTP extension) for (size_t i = 0; i < values.size(); ++i) { const EventType* event = batch[i + 1]; - bool voice_activity; - uint8_t audio_level; - if (event->template GetExtension(&voice_activity, - &audio_level)) { - RTC_DCHECK_LE(audio_level, 0x7Fu); - values[i] = voice_activity; + AudioLevel audio_level; + if (event->template GetExtension(&audio_level)) { + RTC_DCHECK_LE(audio_level.level(), 0x7F); + values[i] = audio_level.voice_activity(); } else { values[i].reset(); } diff --git a/logging/rtc_event_log/rtc_event_log2rtp_dump.cc b/logging/rtc_event_log/rtc_event_log2rtp_dump.cc index 250aaf59d7..a845fcb0c2 100644 --- a/logging/rtc_event_log/rtc_event_log2rtp_dump.cc +++ b/logging/rtc_event_log/rtc_event_log2rtp_dump.cc @@ -135,10 +135,9 @@ void ConvertRtpPacket( if (incoming.rtp.header.extension.hasTransportSequenceNumber) reconstructed_packet.SetExtension( incoming.rtp.header.extension.transportSequenceNumber); - if (incoming.rtp.header.extension.hasAudioLevel) + if (incoming.rtp.header.extension.audio_level()) reconstructed_packet.SetExtension( - incoming.rtp.header.extension.voiceActivity, - incoming.rtp.header.extension.audioLevel); + *incoming.rtp.header.extension.audio_level()); if (incoming.rtp.header.extension.hasVideoRotation) reconstructed_packet.SetExtension( incoming.rtp.header.extension.videoRotation); diff --git a/logging/rtc_event_log/rtc_event_log_parser.cc b/logging/rtc_event_log/rtc_event_log_parser.cc index 589850218b..7f1f7e18e6 100644 --- a/logging/rtc_event_log/rtc_event_log_parser.cc +++ b/logging/rtc_event_log/rtc_event_log_parser.cc @@ -358,13 +358,10 @@ ParsedRtcEventLog::ParseStatus StoreRtpPackets( } if (proto.has_audio_level()) { RTC_PARSE_CHECK_OR_RETURN(proto.has_voice_activity()); - header.extension.hasAudioLevel = true; - header.extension.voiceActivity = - rtc::checked_cast(proto.voice_activity()); - const uint8_t audio_level = - rtc::checked_cast(proto.audio_level()); - RTC_PARSE_CHECK_OR_RETURN_LE(audio_level, 0x7Fu); - header.extension.audioLevel = audio_level; + bool voice_activity = rtc::checked_cast(proto.voice_activity()); + int audio_level = rtc::checked_cast(proto.audio_level()); + RTC_PARSE_CHECK_OR_RETURN_LE(audio_level, 0x7F); + header.extension.set_audio_level(AudioLevel(voice_activity, audio_level)); } else { RTC_PARSE_CHECK_OR_RETURN(!proto.has_voice_activity()); } @@ -562,13 +559,11 @@ ParsedRtcEventLog::ParseStatus StoreRtpPackets( if (audio_level_values.size() > i && audio_level_values[i].has_value()) { RTC_PARSE_CHECK_OR_RETURN(voice_activity_values.size() > i && voice_activity_values[i].has_value()); - header.extension.hasAudioLevel = true; - header.extension.voiceActivity = + bool voice_activity = rtc::checked_cast(voice_activity_values[i].value()); - const uint8_t audio_level = - rtc::checked_cast(audio_level_values[i].value()); - RTC_PARSE_CHECK_OR_RETURN_LE(audio_level, 0x7Fu); - header.extension.audioLevel = audio_level; + int audio_level = rtc::checked_cast(audio_level_values[i].value()); + RTC_PARSE_CHECK_OR_RETURN_LE(audio_level, 0x7F); + header.extension.set_audio_level(AudioLevel(voice_activity, audio_level)); } else { RTC_PARSE_CHECK_OR_RETURN(voice_activity_values.size() <= i || !voice_activity_values[i].has_value()); diff --git a/logging/rtc_event_log/rtc_event_log_unittest_helper.cc b/logging/rtc_event_log/rtc_event_log_unittest_helper.cc index bfc10e5915..643ca74c0a 100644 --- a/logging/rtc_event_log/rtc_event_log_unittest_helper.cc +++ b/logging/rtc_event_log/rtc_event_log_unittest_helper.cc @@ -618,8 +618,8 @@ void EventGenerator::RandomizeRtpPacket( if (extension_map.IsRegistered(AudioLevelExtension::kId) && (all_configured_exts || prng_.Rand())) { - rtp_packet->SetExtension(prng_.Rand(), - prng_.Rand(127)); + rtp_packet->SetExtension( + AudioLevel(prng_.Rand(), prng_.Rand(127))); } if (extension_map.IsRegistered(AbsoluteSendTime::kId) && @@ -1029,14 +1029,15 @@ void VerifyLoggedRtpHeader(const Event& original_header, // AudioLevel header extension. ASSERT_EQ(original_header.template HasExtension(), - logged_header.extension.hasAudioLevel); - if (logged_header.extension.hasAudioLevel) { - bool voice_activity; - uint8_t audio_level; + logged_header.extension.audio_level().has_value()); + if (logged_header.extension.audio_level()) { + AudioLevel audio_level; ASSERT_TRUE(original_header.template GetExtension( - &voice_activity, &audio_level)); - EXPECT_EQ(voice_activity, logged_header.extension.voiceActivity); - EXPECT_EQ(audio_level, logged_header.extension.audioLevel); + &audio_level)); + EXPECT_EQ(audio_level.voice_activity(), + logged_header.extension.audio_level()->voice_activity()); + EXPECT_EQ(audio_level.level(), + logged_header.extension.audio_level()->level()); } // VideoOrientation header extension. diff --git a/media/engine/webrtc_voice_engine_unittest.cc b/media/engine/webrtc_voice_engine_unittest.cc index 777d3a0dea..d5efbca9d9 100644 --- a/media/engine/webrtc_voice_engine_unittest.cc +++ b/media/engine/webrtc_voice_engine_unittest.cc @@ -1561,7 +1561,7 @@ TEST_P(WebRtcVoiceEngineTestFake, OnPacketReceivedIdentifiesExtensions) { webrtc::RtpPacketReceived reference_packet(&extension_map); constexpr uint8_t kAudioLevel = 123; reference_packet.SetExtension( - /*voice_activity=*/true, kAudioLevel); + webrtc::AudioLevel(/*voice_activity=*/true, kAudioLevel)); // Create a packet without the extension map but with the same content. webrtc::RtpPacketReceived received_packet; ASSERT_TRUE(received_packet.Parse(reference_packet.Buffer())); @@ -1569,12 +1569,10 @@ TEST_P(WebRtcVoiceEngineTestFake, OnPacketReceivedIdentifiesExtensions) { receive_channel_->OnPacketReceived(received_packet); rtc::Thread::Current()->ProcessMessages(0); - bool voice_activity; - uint8_t audio_level; + webrtc::AudioLevel audio_level; EXPECT_TRUE(call_.last_received_rtp_packet() - .GetExtension(&voice_activity, - &audio_level)); - EXPECT_EQ(audio_level, kAudioLevel); + .GetExtension(&audio_level)); + EXPECT_EQ(audio_level.level(), kAudioLevel); } // Test that we apply codecs properly. diff --git a/modules/audio_coding/neteq/neteq_impl_unittest.cc b/modules/audio_coding/neteq/neteq_impl_unittest.cc index 8309dafb58..07c924c2f2 100644 --- a/modules/audio_coding/neteq/neteq_impl_unittest.cc +++ b/modules/audio_coding/neteq/neteq_impl_unittest.cc @@ -564,8 +564,8 @@ TEST_F(NetEqImplTest, ReorderedPacket) { rtp_header.sequenceNumber = 0x1234; rtp_header.timestamp = 0x12345678; rtp_header.ssrc = 0x87654321; - rtp_header.extension.hasAudioLevel = true; - rtp_header.extension.audioLevel = 42; + rtp_header.extension.set_audio_level( + AudioLevel(/*voice_activity=*/false, 42)); EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return()); EXPECT_CALL(mock_decoder, SampleRateHz()) @@ -606,7 +606,8 @@ TEST_F(NetEqImplTest, ReorderedPacket) { EXPECT_EQ(packet_info.ssrc(), rtp_header.ssrc); EXPECT_THAT(packet_info.csrcs(), IsEmpty()); EXPECT_EQ(packet_info.rtp_timestamp(), rtp_header.timestamp); - EXPECT_EQ(packet_info.audio_level(), rtp_header.extension.audioLevel); + EXPECT_EQ(packet_info.audio_level(), + rtp_header.extension.audio_level()->level()); EXPECT_EQ(packet_info.receive_time(), expected_receive_time); } @@ -614,13 +615,13 @@ TEST_F(NetEqImplTest, ReorderedPacket) { // old, the second one is the expected next packet. rtp_header.sequenceNumber -= 1; rtp_header.timestamp -= kPayloadLengthSamples; - rtp_header.extension.audioLevel = 1; + rtp_header.extension.set_audio_level(AudioLevel(/*voice_activity=*/false, 1)); payload[0] = 1; clock_.AdvanceTimeMilliseconds(1000); EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); rtp_header.sequenceNumber += 2; rtp_header.timestamp += 2 * kPayloadLengthSamples; - rtp_header.extension.audioLevel = 2; + rtp_header.extension.set_audio_level(AudioLevel(/*voice_activity=*/false, 2)); payload[0] = 2; clock_.AdvanceTimeMilliseconds(2000); expected_receive_time = clock_.CurrentTime(); @@ -655,7 +656,8 @@ TEST_F(NetEqImplTest, ReorderedPacket) { EXPECT_EQ(packet_info.ssrc(), rtp_header.ssrc); EXPECT_THAT(packet_info.csrcs(), IsEmpty()); EXPECT_EQ(packet_info.rtp_timestamp(), rtp_header.timestamp); - EXPECT_EQ(packet_info.audio_level(), rtp_header.extension.audioLevel); + EXPECT_EQ(packet_info.audio_level(), + rtp_header.extension.audio_level()->level()); EXPECT_EQ(packet_info.receive_time(), expected_receive_time); } @@ -772,8 +774,7 @@ TEST_F(NetEqImplTest, InsertRedPayload) { AbsoluteCaptureTime capture_time; capture_time.absolute_capture_timestamp = 1234; header.extension.absolute_capture_time = capture_time; - header.extension.audioLevel = 12; - header.extension.hasAudioLevel = true; + header.extension.set_audio_level(AudioLevel(/*voice_activity=*/false, 12)); header.numCSRCs = 1; header.arrOfCSRCs[0] = 123; neteq_->InsertPacket(header, payload); @@ -795,7 +796,7 @@ TEST_F(NetEqImplTest, InsertRedPayload) { EXPECT_EQ(frame.packet_infos_.size(), 1u); EXPECT_EQ(frame.packet_infos_.front().absolute_capture_time(), capture_time); EXPECT_EQ(frame.packet_infos_.front().audio_level(), - header.extension.audioLevel); + header.extension.audio_level()->level()); EXPECT_EQ(frame.packet_infos_.front().csrcs()[0], header.arrOfCSRCs[0]); } diff --git a/modules/audio_coding/neteq/tools/rtp_analyze.cc b/modules/audio_coding/neteq/tools/rtp_analyze.cc index 7ecf925ebb..8e94cbdc05 100644 --- a/modules/audio_coding/neteq/tools/rtp_analyze.cc +++ b/modules/audio_coding/neteq/tools/rtp_analyze.cc @@ -110,9 +110,10 @@ int main(int argc, char* argv[]) { static_cast(packet->virtual_packet_length_bytes()), packet->header().payloadType, packet->header().markerBit, packet->header().ssrc); - if (print_audio_level && packet->header().extension.hasAudioLevel) { - fprintf(out_file, " %5u (%1i)", packet->header().extension.audioLevel, - packet->header().extension.voiceActivity); + if (print_audio_level && packet->header().extension.audio_level()) { + fprintf(out_file, " %5d (%1i)", + packet->header().extension.audio_level()->level(), + packet->header().extension.audio_level()->voice_activity()); } if (print_abs_send_time && packet->header().extension.hasAbsoluteSendTime) { if (cycles == -1) { diff --git a/modules/rtp_rtcp/source/rtp_header_extensions.cc b/modules/rtp_rtcp/source/rtp_header_extensions.cc index 9f0374b299..b77c293d6e 100644 --- a/modules/rtp_rtcp/source/rtp_header_extensions.cc +++ b/modules/rtp_rtcp/source/rtp_header_extensions.cc @@ -161,24 +161,41 @@ bool AbsoluteCaptureTimeExtension::Write(rtc::ArrayView data, // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // Sample Audio Level Encoding Using the Two-Byte Header Format bool AudioLevelExtension::Parse(rtc::ArrayView data, - bool* voice_activity, - uint8_t* audio_level) { + AudioLevel* extension) { // One-byte and two-byte format share the same data definition. if (data.size() != 1) return false; - *voice_activity = (data[0] & 0x80) != 0; - *audio_level = data[0] & 0x7F; + bool voice_activity = (data[0] & 0x80) != 0; + int audio_level = data[0] & 0x7F; + *extension = AudioLevel(voice_activity, audio_level); + return true; +} + +bool AudioLevelExtension::Write(rtc::ArrayView data, + const AudioLevel& extension) { + // One-byte and two-byte format share the same data definition. + RTC_DCHECK_EQ(data.size(), 1); + RTC_CHECK_GE(extension.level(), 0); + RTC_CHECK_LE(extension.level(), 0x7f); + data[0] = (extension.voice_activity() ? 0x80 : 0x00) | extension.level(); + return true; +} + +bool AudioLevelExtension::Parse(rtc::ArrayView data, + bool* voice_activity, + uint8_t* audio_level) { + AudioLevel extension; + Parse(data, &extension); + *voice_activity = extension.voice_activity(); + *audio_level = extension.level(); return true; } bool AudioLevelExtension::Write(rtc::ArrayView data, bool voice_activity, uint8_t audio_level) { - // One-byte and two-byte format share the same data definition. - RTC_DCHECK_EQ(data.size(), 1); - RTC_CHECK_LE(audio_level, 0x7f); - data[0] = (voice_activity ? 0x80 : 0x00) | audio_level; - return true; + AudioLevel extension(voice_activity, audio_level); + return Write(data, extension); } // An RTP Header Extension for Mixer-to-Client Audio Level Indication diff --git a/modules/rtp_rtcp/source/rtp_header_extensions.h b/modules/rtp_rtcp/source/rtp_header_extensions.h index 7f039ec8ac..941b79a536 100644 --- a/modules/rtp_rtcp/source/rtp_header_extensions.h +++ b/modules/rtp_rtcp/source/rtp_header_extensions.h @@ -84,21 +84,30 @@ class AbsoluteCaptureTimeExtension { class AudioLevelExtension { public: + using value_type = AudioLevel; static constexpr RTPExtensionType kId = kRtpExtensionAudioLevel; static constexpr uint8_t kValueSizeBytes = 1; static constexpr absl::string_view Uri() { return RtpExtension::kAudioLevelUri; } - static bool Parse(rtc::ArrayView data, - bool* voice_activity, - uint8_t* audio_level); - static size_t ValueSize(bool voice_activity, uint8_t audio_level) { + static bool Parse(rtc::ArrayView data, AudioLevel* extension); + static size_t ValueSize(const AudioLevel& extension) { return kValueSizeBytes; } - static bool Write(rtc::ArrayView data, - bool voice_activity, - uint8_t audio_level); + static bool Write(rtc::ArrayView data, const AudioLevel& extension); + + [[deprecated("Use AudioLevel struct")]] static bool Parse( + rtc::ArrayView data, + bool* voice_activity, + uint8_t* audio_level); + [[deprecated("Use AudioLevel struct")]] static size_t ValueSize( + bool voice_activity, + uint8_t audio_level) { + return kValueSizeBytes; + } + [[deprecated("Use AudioLevel struct")]] static bool + Write(rtc::ArrayView data, bool voice_activity, uint8_t audio_level); }; class CsrcAudioLevel { diff --git a/modules/rtp_rtcp/source/rtp_packet_received.cc b/modules/rtp_rtcp/source/rtp_packet_received.cc index bdd1bb559f..f8b3e25f2a 100644 --- a/modules/rtp_rtcp/source/rtp_packet_received.cc +++ b/modules/rtp_rtcp/source/rtp_packet_received.cc @@ -61,8 +61,7 @@ void RtpPacketReceived::GetHeader(RTPHeader* header) const { &header->extension.feedback_request) || GetExtension( &header->extension.transportSequenceNumber); - header->extension.hasAudioLevel = GetExtension( - &header->extension.voiceActivity, &header->extension.audioLevel); + header->extension.set_audio_level(GetExtension()); header->extension.hasVideoRotation = GetExtension(&header->extension.videoRotation); header->extension.hasVideoContentType = diff --git a/modules/rtp_rtcp/source/rtp_packet_unittest.cc b/modules/rtp_rtcp/source/rtp_packet_unittest.cc index 922a4b15f4..ccdb5f8a75 100644 --- a/modules/rtp_rtcp/source/rtp_packet_unittest.cc +++ b/modules/rtp_rtcp/source/rtp_packet_unittest.cc @@ -237,7 +237,8 @@ TEST(RtpPacketTest, CreateWith2Extensions) { packet.SetTimestamp(kTimestamp); packet.SetSsrc(kSsrc); packet.SetExtension(kTimeOffset); - packet.SetExtension(kVoiceActive, kAudioLevel); + packet.SetExtension( + AudioLevel(kVoiceActive, kAudioLevel)); EXPECT_THAT(kPacketWithTOAndAL, ElementsAreArray(packet.data(), packet.size())); } @@ -257,7 +258,8 @@ TEST(RtpPacketTest, CreateWithTwoByteHeaderExtensionFirst) { TimeDelta::Millis(340)); ASSERT_TRUE(packet.SetExtension(playout_delay)); packet.SetExtension(kTimeOffset); - packet.SetExtension(kVoiceActive, kAudioLevel); + packet.SetExtension( + AudioLevel(kVoiceActive, kAudioLevel)); EXPECT_THAT(kPacketWithTwoByteExtensionIdFirst, ElementsAreArray(packet.data(), packet.size())); } @@ -274,7 +276,8 @@ TEST(RtpPacketTest, CreateWithTwoByteHeaderExtensionLast) { packet.SetTimestamp(kTimestamp); packet.SetSsrc(kSsrc); packet.SetExtension(kTimeOffset); - packet.SetExtension(kVoiceActive, kAudioLevel); + packet.SetExtension( + AudioLevel(kVoiceActive, kAudioLevel)); EXPECT_THAT(kPacketWithTOAndAL, ElementsAreArray(packet.data(), packet.size())); // Set extension that requires two-byte header. @@ -334,8 +337,8 @@ TEST(RtpPacketTest, TryToCreateTwoByteHeaderNotSupported) { extensions.Register(kTwoByteExtensionId); RtpPacketToSend packet(&extensions); // Set extension that requires two-byte header. - EXPECT_FALSE( - packet.SetExtension(kVoiceActive, kAudioLevel)); + EXPECT_FALSE(packet.SetExtension( + AudioLevel(kVoiceActive, kAudioLevel))); } TEST(RtpPacketTest, CreateTwoByteHeaderSupportedIfExtmapAllowMixed) { @@ -343,8 +346,8 @@ TEST(RtpPacketTest, CreateTwoByteHeaderSupportedIfExtmapAllowMixed) { extensions.Register(kTwoByteExtensionId); RtpPacketToSend packet(&extensions); // Set extension that requires two-byte header. - EXPECT_TRUE( - packet.SetExtension(kVoiceActive, kAudioLevel)); + EXPECT_TRUE(packet.SetExtension( + AudioLevel(kVoiceActive, kAudioLevel))); } TEST(RtpPacketTest, CreateWithMaxSizeHeaderExtension) { @@ -409,8 +412,8 @@ TEST(RtpPacketTest, SetReservedExtensionsAfterPayload) { EXPECT_TRUE(packet.ReserveExtension()); packet.SetPayloadSize(kPayloadSize); // Can't set extension after payload. - EXPECT_FALSE( - packet.SetExtension(kVoiceActive, kAudioLevel)); + EXPECT_FALSE(packet.SetExtension( + AudioLevel(kVoiceActive, kAudioLevel))); // Unless reserved. EXPECT_TRUE(packet.SetExtension(kTimeOffset)); } @@ -688,12 +691,10 @@ TEST(RtpPacketTest, ParseWith2Extensions) { int32_t time_offset; EXPECT_TRUE(packet.GetExtension(&time_offset)); EXPECT_EQ(kTimeOffset, time_offset); - bool voice_active; - uint8_t audio_level; - EXPECT_TRUE( - packet.GetExtension(&voice_active, &audio_level)); - EXPECT_EQ(kVoiceActive, voice_active); - EXPECT_EQ(kAudioLevel, audio_level); + AudioLevel audio_level; + EXPECT_TRUE(packet.GetExtension(&audio_level)); + EXPECT_EQ(kVoiceActive, audio_level.voice_activity()); + EXPECT_EQ(kAudioLevel, audio_level.level()); } TEST(RtpPacketTest, ParseSecondPacketWithFewerExtensions) { @@ -721,10 +722,8 @@ TEST(RtpPacketTest, ParseWith2ExtensionsInvalidPadding) { int32_t time_offset; EXPECT_TRUE(packet.GetExtension(&time_offset)); EXPECT_EQ(kTimeOffset, time_offset); - bool voice_active; - uint8_t audio_level; - EXPECT_FALSE( - packet.GetExtension(&voice_active, &audio_level)); + AudioLevel audio_level; + EXPECT_FALSE(packet.GetExtension(&audio_level)); } TEST(RtpPacketTest, ParseWith2ExtensionsReservedExtensionId) { @@ -737,10 +736,8 @@ TEST(RtpPacketTest, ParseWith2ExtensionsReservedExtensionId) { int32_t time_offset; EXPECT_TRUE(packet.GetExtension(&time_offset)); EXPECT_EQ(kTimeOffset, time_offset); - bool voice_active; - uint8_t audio_level; - EXPECT_FALSE( - packet.GetExtension(&voice_active, &audio_level)); + AudioLevel audio_level; + EXPECT_FALSE(packet.GetExtension(&audio_level)); } TEST(RtpPacketTest, ParseWithAllFeatures) { @@ -792,12 +789,10 @@ TEST(RtpPacketTest, ParseTwoByteHeaderExtensionWithPadding) { int32_t time_offset; EXPECT_TRUE(packet.GetExtension(&time_offset)); EXPECT_EQ(kTimeOffset, time_offset); - bool voice_active; - uint8_t audio_level; - EXPECT_TRUE( - packet.GetExtension(&voice_active, &audio_level)); - EXPECT_EQ(kVoiceActive, voice_active); - EXPECT_EQ(kAudioLevel, audio_level); + AudioLevel audio_level; + EXPECT_TRUE(packet.GetExtension(&audio_level)); + EXPECT_EQ(kVoiceActive, audio_level.voice_activity()); + EXPECT_EQ(kAudioLevel, audio_level.level()); } TEST(RtpPacketTest, ParseWithExtensionDelayed) { @@ -1221,7 +1216,8 @@ TEST(RtpPacketTest, RemoveMultipleExtensions) { packet.SetTimestamp(kTimestamp); packet.SetSsrc(kSsrc); packet.SetExtension(kTimeOffset); - packet.SetExtension(kVoiceActive, kAudioLevel); + packet.SetExtension( + AudioLevel(kVoiceActive, kAudioLevel)); EXPECT_THAT(kPacketWithTOAndAL, ElementsAreArray(packet.data(), packet.size())); @@ -1249,7 +1245,8 @@ TEST(RtpPacketTest, RemoveExtensionPreservesOtherUnregisteredExtensions) { packet.SetTimestamp(kTimestamp); packet.SetSsrc(kSsrc); packet.SetExtension(kTimeOffset); - packet.SetExtension(kVoiceActive, kAudioLevel); + packet.SetExtension( + AudioLevel(kVoiceActive, kAudioLevel)); EXPECT_THAT(kPacketWithTOAndAL, ElementsAreArray(packet.data(), packet.size())); diff --git a/modules/rtp_rtcp/source/rtp_sender_audio.cc b/modules/rtp_rtcp/source/rtp_sender_audio.cc index 57fb4f41af..883803fa71 100644 --- a/modules/rtp_rtcp/source/rtp_sender_audio.cc +++ b/modules/rtp_rtcp/source/rtp_sender_audio.cc @@ -244,8 +244,8 @@ bool RTPSenderAudio::SendAudio(const RtpAudioFrame& frame) { packet->set_capture_time(clock_->CurrentTime()); // Set audio level extension, if included. packet->SetExtension( - frame.type == AudioFrameType::kAudioFrameSpeech, - frame.audio_level_dbov.value_or(127)); + AudioLevel(frame.type == AudioFrameType::kAudioFrameSpeech, + frame.audio_level_dbov.value_or(127))); if (absolute_capture_time.has_value()) { // It also checks that extension was registered during SDP negotiation. If diff --git a/modules/rtp_rtcp/source/rtp_sender_audio_unittest.cc b/modules/rtp_rtcp/source/rtp_sender_audio_unittest.cc index 2b7938b85a..c1666b4a1b 100644 --- a/modules/rtp_rtcp/source/rtp_sender_audio_unittest.cc +++ b/modules/rtp_rtcp/source/rtp_sender_audio_unittest.cc @@ -121,12 +121,11 @@ TEST_F(RtpSenderAudioTest, SendAudioWithAudioLevelExtension) { auto sent_payload = transport_.last_sent_packet().payload(); EXPECT_THAT(sent_payload, ElementsAreArray(payload)); // Verify AudioLevel extension. - bool voice_activity; - uint8_t audio_level; + AudioLevel audio_level; EXPECT_TRUE(transport_.last_sent_packet().GetExtension( - &voice_activity, &audio_level)); - EXPECT_EQ(kAudioLevel, audio_level); - EXPECT_FALSE(voice_activity); + &audio_level)); + EXPECT_EQ(kAudioLevel, audio_level.level()); + EXPECT_FALSE(audio_level.voice_activity()); } TEST_F(RtpSenderAudioTest, SendAudioWithoutAbsoluteCaptureTime) { diff --git a/rtc_tools/rtc_event_log_to_text/converter.cc b/rtc_tools/rtc_event_log_to_text/converter.cc index b4e93be519..28ffb3bc87 100644 --- a/rtc_tools/rtc_event_log_to_text/converter.cc +++ b/rtc_tools/rtc_event_log_to_text/converter.cc @@ -333,10 +333,11 @@ bool Convert(std::string inputfile, fprintf(output, " transmission_offset=%d", event.rtp.header.extension.transmissionTimeOffset); } - if (event.rtp.header.extension.hasAudioLevel) { + if (event.rtp.header.extension.audio_level()) { fprintf(output, " voice_activity=%d", - event.rtp.header.extension.voiceActivity); - fprintf(output, " audio_level=%u", event.rtp.header.extension.audioLevel); + event.rtp.header.extension.audio_level()->voice_activity()); + fprintf(output, " audio_level=%u", + event.rtp.header.extension.audio_level()->level()); } if (event.rtp.header.extension.hasVideoRotation) { fprintf(output, " video_rotation=%d", @@ -367,10 +368,11 @@ bool Convert(std::string inputfile, fprintf(output, " transmission_offset=%d", event.rtp.header.extension.transmissionTimeOffset); } - if (event.rtp.header.extension.hasAudioLevel) { + if (event.rtp.header.extension.audio_level()) { fprintf(output, " voice_activity=%d", - event.rtp.header.extension.voiceActivity); - fprintf(output, " audio_level=%u", event.rtp.header.extension.audioLevel); + event.rtp.header.extension.audio_level()->voice_activity()); + fprintf(output, " audio_level=%u", + event.rtp.header.extension.audio_level()->level()); } if (event.rtp.header.extension.hasVideoRotation) { fprintf(output, " video_rotation=%d", diff --git a/rtc_tools/rtc_event_log_visualizer/analyzer.cc b/rtc_tools/rtc_event_log_visualizer/analyzer.cc index 0c7775e39f..7b8586a8d1 100644 --- a/rtc_tools/rtc_event_log_visualizer/analyzer.cc +++ b/rtc_tools/rtc_event_log_visualizer/analyzer.cc @@ -740,11 +740,12 @@ void EventLogAnalyzer::CreateAudioLevelGraph(PacketDirection direction, TimeSeries time_series(GetStreamName(parsed_log_, direction, stream.ssrc), LineStyle::kLine); for (auto& packet : stream.packet_view) { - if (packet.header.extension.hasAudioLevel) { + if (packet.header.extension.audio_level()) { float x = config_.GetCallTimeSec(packet.log_time()); // The audio level is stored in -dBov (so e.g. -10 dBov is stored as 10) // Here we convert it to dBov. - float y = static_cast(-packet.header.extension.audioLevel); + float y = + static_cast(-packet.header.extension.audio_level()->level()); time_series.points.emplace_back(TimeSeriesPoint(x, y)); } } diff --git a/test/fuzzers/rtp_packet_fuzzer.cc b/test/fuzzers/rtp_packet_fuzzer.cc index 02fc64a2dc..be4e9a3e85 100644 --- a/test/fuzzers/rtp_packet_fuzzer.cc +++ b/test/fuzzers/rtp_packet_fuzzer.cc @@ -72,11 +72,11 @@ void FuzzOneInput(const uint8_t* data, size_t size) { int32_t offset; packet.GetExtension(&offset); break; - case kRtpExtensionAudioLevel: - bool voice_activity; - uint8_t audio_level; - packet.GetExtension(&voice_activity, &audio_level); + case kRtpExtensionAudioLevel: { + AudioLevel audio_level; + packet.GetExtension(&audio_level); break; + } case kRtpExtensionCsrcAudioLevel: { std::vector audio_levels; packet.GetExtension(&audio_levels);