Expose AudioLevel as an absl::optional struct in api/rtp_headers.h

Start migrating away from `hasAudioLevel`, `voiceActivity`, `audioLevel` fields in RTPHeaderExtension and switch usages to a more modern absl::optional<AudioLevel> accessor instead.

The old fields are preserved for compatibility with downstream projects, but will be removed in the future.

Bug: webrtc:15788
Change-Id: I76599124fd68dd4d449f850df3b9814d6a002f5d
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/336303
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#41947}
This commit is contained in:
Joachim Reiersen 2024-03-21 18:08:54 -07:00 committed by WebRTC LUCI CQ
parent 28efb5acb4
commit 5075cb4a60
21 changed files with 212 additions and 141 deletions

View file

@ -12,6 +12,14 @@
namespace webrtc { namespace webrtc {
AudioLevel::AudioLevel() : voice_activity_(false), audio_level_(0) {}
AudioLevel::AudioLevel(bool voice_activity, int audio_level)
: voice_activity_(voice_activity), audio_level_(audio_level) {
RTC_CHECK_GE(audio_level, 0);
RTC_CHECK_LE(audio_level, 127);
}
RTPHeaderExtension::RTPHeaderExtension() RTPHeaderExtension::RTPHeaderExtension()
: hasTransmissionTimeOffset(false), : hasTransmissionTimeOffset(false),
transmissionTimeOffset(0), transmissionTimeOffset(0),
@ -34,6 +42,24 @@ RTPHeaderExtension::RTPHeaderExtension(const RTPHeaderExtension& other) =
RTPHeaderExtension& RTPHeaderExtension::operator=( RTPHeaderExtension& RTPHeaderExtension::operator=(
const RTPHeaderExtension& other) = default; const RTPHeaderExtension& other) = default;
absl::optional<AudioLevel> RTPHeaderExtension::audio_level() const {
if (!hasAudioLevel) {
return absl::nullopt;
}
return AudioLevel(voiceActivity, audioLevel);
}
void RTPHeaderExtension::set_audio_level(
absl::optional<AudioLevel> audio_level) {
if (audio_level) {
hasAudioLevel = true;
voiceActivity = audio_level->voice_activity();
audioLevel = audio_level->level();
} else {
hasAudioLevel = false;
}
}
RTPHeader::RTPHeader() RTPHeader::RTPHeader()
: markerBit(false), : markerBit(false),
payloadType(0), payloadType(0),

View file

@ -77,6 +77,29 @@ struct AbsoluteCaptureTime {
absl::optional<int64_t> estimated_capture_clock_offset; absl::optional<int64_t> estimated_capture_clock_offset;
}; };
// The audio level extension is used to indicate the voice activity and the
// audio level of the payload in the RTP stream. See:
// https://tools.ietf.org/html/rfc6464#section-3.
class AudioLevel {
public:
AudioLevel();
AudioLevel(bool voice_activity, int audio_level);
AudioLevel(const AudioLevel& other) = default;
AudioLevel& operator=(const AudioLevel& other) = default;
// Flag indicating whether the encoder believes the audio packet contains
// voice activity.
bool voice_activity() const { return voice_activity_; }
// Audio level in -dBov. Values range from 0 to 127, representing 0 to -127
// dBov. 127 represents digital silence.
int level() const { return audio_level_; }
private:
bool voice_activity_;
int audio_level_;
};
inline bool operator==(const AbsoluteCaptureTime& lhs, inline bool operator==(const AbsoluteCaptureTime& lhs,
const AbsoluteCaptureTime& rhs) { const AbsoluteCaptureTime& rhs) {
return (lhs.absolute_capture_timestamp == rhs.absolute_capture_timestamp) && return (lhs.absolute_capture_timestamp == rhs.absolute_capture_timestamp) &&
@ -114,6 +137,12 @@ struct RTPHeaderExtension {
// Audio Level includes both level in dBov and voiced/unvoiced bit. See: // Audio Level includes both level in dBov and voiced/unvoiced bit. See:
// https://tools.ietf.org/html/rfc6464#section-3 // https://tools.ietf.org/html/rfc6464#section-3
absl::optional<AudioLevel> audio_level() const;
void set_audio_level(absl::optional<AudioLevel> audio_level);
// Direct use of the following members is discouraged and will be removed
// once downstream projects have been updated.
bool hasAudioLevel; bool hasAudioLevel;
bool voiceActivity; bool voiceActivity;
uint8_t audioLevel; uint8_t audioLevel;

View file

@ -37,8 +37,8 @@ RtpPacketInfo::RtpPacketInfo(const RTPHeader& rtp_header,
csrcs_.assign(&rtp_header.arrOfCSRCs[0], &rtp_header.arrOfCSRCs[csrcs_count]); csrcs_.assign(&rtp_header.arrOfCSRCs[0], &rtp_header.arrOfCSRCs[csrcs_count]);
if (extension.hasAudioLevel) { if (extension.audio_level()) {
audio_level_ = extension.audioLevel; audio_level_ = extension.audio_level()->level();
} }
absolute_capture_time_ = extension.absolute_capture_time; absolute_capture_time_ = extension.absolute_capture_time;

View file

@ -116,11 +116,9 @@ TEST_F(AudioSendStreamCallTest, SupportsAudioLevel) {
RtpPacket rtp_packet(&extensions_); RtpPacket rtp_packet(&extensions_);
EXPECT_TRUE(rtp_packet.Parse(packet)); EXPECT_TRUE(rtp_packet.Parse(packet));
uint8_t audio_level = 0; AudioLevel audio_level;
bool voice = false; EXPECT_TRUE(rtp_packet.GetExtension<AudioLevelExtension>(&audio_level));
EXPECT_TRUE( if (audio_level.level() != 0) {
rtp_packet.GetExtension<AudioLevelExtension>(&voice, &audio_level));
if (audio_level != 0) {
// Wait for at least one packet with a non-zero level. // Wait for at least one packet with a non-zero level.
observation_complete_.Set(); observation_complete_.Set();
} else { } else {

View file

@ -61,7 +61,12 @@ class TransformableIncomingAudioFrame
const RTPHeader& Header() const { return header_; } const RTPHeader& Header() const { return header_; }
FrameType Type() const override { FrameType Type() const override {
return header_.extension.voiceActivity ? FrameType::kAudioFrameSpeech if (!header_.extension.audio_level()) {
// Audio level extension not set.
return FrameType::kAudioFrameCN;
}
return header_.extension.audio_level()->voice_activity()
? FrameType::kAudioFrameSpeech
: FrameType::kAudioFrameCN; : FrameType::kAudioFrameCN;
} }

View file

@ -444,16 +444,14 @@ void RtcEventLogEncoderNewFormat::EncodeRtpPacket(const Batch& batch,
absl::optional<uint64_t> base_audio_level; absl::optional<uint64_t> base_audio_level;
absl::optional<uint64_t> base_voice_activity; absl::optional<uint64_t> base_voice_activity;
{ {
bool voice_activity; AudioLevel audio_level;
uint8_t audio_level; if (base_event->template GetExtension<AudioLevelExtension>(&audio_level)) {
if (base_event->template GetExtension<AudioLevelExtension>(&voice_activity, RTC_DCHECK_LE(audio_level.level(), 0x7Fu);
&audio_level)) { base_audio_level = audio_level.level();
RTC_DCHECK_LE(audio_level, 0x7Fu); proto_batch->set_audio_level(audio_level.level());
base_audio_level = audio_level;
proto_batch->set_audio_level(audio_level);
base_voice_activity = voice_activity; base_voice_activity = audio_level.voice_activity();
proto_batch->set_voice_activity(voice_activity); proto_batch->set_voice_activity(audio_level.voice_activity());
} }
} }
@ -641,12 +639,10 @@ void RtcEventLogEncoderNewFormat::EncodeRtpPacket(const Batch& batch,
// audio_level (RTP extension) // audio_level (RTP extension)
for (size_t i = 0; i < values.size(); ++i) { for (size_t i = 0; i < values.size(); ++i) {
const EventType* event = batch[i + 1]; const EventType* event = batch[i + 1];
bool voice_activity; AudioLevel audio_level;
uint8_t audio_level; if (event->template GetExtension<AudioLevelExtension>(&audio_level)) {
if (event->template GetExtension<AudioLevelExtension>(&voice_activity, RTC_DCHECK_LE(audio_level.level(), 0x7F);
&audio_level)) { values[i] = audio_level.level();
RTC_DCHECK_LE(audio_level, 0x7Fu);
values[i] = audio_level;
} else { } else {
values[i].reset(); values[i].reset();
} }
@ -659,12 +655,10 @@ void RtcEventLogEncoderNewFormat::EncodeRtpPacket(const Batch& batch,
// voice_activity (RTP extension) // voice_activity (RTP extension)
for (size_t i = 0; i < values.size(); ++i) { for (size_t i = 0; i < values.size(); ++i) {
const EventType* event = batch[i + 1]; const EventType* event = batch[i + 1];
bool voice_activity; AudioLevel audio_level;
uint8_t audio_level; if (event->template GetExtension<AudioLevelExtension>(&audio_level)) {
if (event->template GetExtension<AudioLevelExtension>(&voice_activity, RTC_DCHECK_LE(audio_level.level(), 0x7F);
&audio_level)) { values[i] = audio_level.voice_activity();
RTC_DCHECK_LE(audio_level, 0x7Fu);
values[i] = voice_activity;
} else { } else {
values[i].reset(); values[i].reset();
} }

View file

@ -135,10 +135,9 @@ void ConvertRtpPacket(
if (incoming.rtp.header.extension.hasTransportSequenceNumber) if (incoming.rtp.header.extension.hasTransportSequenceNumber)
reconstructed_packet.SetExtension<webrtc::TransportSequenceNumber>( reconstructed_packet.SetExtension<webrtc::TransportSequenceNumber>(
incoming.rtp.header.extension.transportSequenceNumber); incoming.rtp.header.extension.transportSequenceNumber);
if (incoming.rtp.header.extension.hasAudioLevel) if (incoming.rtp.header.extension.audio_level())
reconstructed_packet.SetExtension<webrtc::AudioLevelExtension>( reconstructed_packet.SetExtension<webrtc::AudioLevelExtension>(
incoming.rtp.header.extension.voiceActivity, *incoming.rtp.header.extension.audio_level());
incoming.rtp.header.extension.audioLevel);
if (incoming.rtp.header.extension.hasVideoRotation) if (incoming.rtp.header.extension.hasVideoRotation)
reconstructed_packet.SetExtension<webrtc::VideoOrientation>( reconstructed_packet.SetExtension<webrtc::VideoOrientation>(
incoming.rtp.header.extension.videoRotation); incoming.rtp.header.extension.videoRotation);

View file

@ -358,13 +358,10 @@ ParsedRtcEventLog::ParseStatus StoreRtpPackets(
} }
if (proto.has_audio_level()) { if (proto.has_audio_level()) {
RTC_PARSE_CHECK_OR_RETURN(proto.has_voice_activity()); RTC_PARSE_CHECK_OR_RETURN(proto.has_voice_activity());
header.extension.hasAudioLevel = true; bool voice_activity = rtc::checked_cast<bool>(proto.voice_activity());
header.extension.voiceActivity = int audio_level = rtc::checked_cast<int>(proto.audio_level());
rtc::checked_cast<bool>(proto.voice_activity()); RTC_PARSE_CHECK_OR_RETURN_LE(audio_level, 0x7F);
const uint8_t audio_level = header.extension.set_audio_level(AudioLevel(voice_activity, audio_level));
rtc::checked_cast<uint8_t>(proto.audio_level());
RTC_PARSE_CHECK_OR_RETURN_LE(audio_level, 0x7Fu);
header.extension.audioLevel = audio_level;
} else { } else {
RTC_PARSE_CHECK_OR_RETURN(!proto.has_voice_activity()); RTC_PARSE_CHECK_OR_RETURN(!proto.has_voice_activity());
} }
@ -562,13 +559,11 @@ ParsedRtcEventLog::ParseStatus StoreRtpPackets(
if (audio_level_values.size() > i && audio_level_values[i].has_value()) { if (audio_level_values.size() > i && audio_level_values[i].has_value()) {
RTC_PARSE_CHECK_OR_RETURN(voice_activity_values.size() > i && RTC_PARSE_CHECK_OR_RETURN(voice_activity_values.size() > i &&
voice_activity_values[i].has_value()); voice_activity_values[i].has_value());
header.extension.hasAudioLevel = true; bool voice_activity =
header.extension.voiceActivity =
rtc::checked_cast<bool>(voice_activity_values[i].value()); rtc::checked_cast<bool>(voice_activity_values[i].value());
const uint8_t audio_level = int audio_level = rtc::checked_cast<int>(audio_level_values[i].value());
rtc::checked_cast<uint8_t>(audio_level_values[i].value()); RTC_PARSE_CHECK_OR_RETURN_LE(audio_level, 0x7F);
RTC_PARSE_CHECK_OR_RETURN_LE(audio_level, 0x7Fu); header.extension.set_audio_level(AudioLevel(voice_activity, audio_level));
header.extension.audioLevel = audio_level;
} else { } else {
RTC_PARSE_CHECK_OR_RETURN(voice_activity_values.size() <= i || RTC_PARSE_CHECK_OR_RETURN(voice_activity_values.size() <= i ||
!voice_activity_values[i].has_value()); !voice_activity_values[i].has_value());

View file

@ -618,8 +618,8 @@ void EventGenerator::RandomizeRtpPacket(
if (extension_map.IsRegistered(AudioLevelExtension::kId) && if (extension_map.IsRegistered(AudioLevelExtension::kId) &&
(all_configured_exts || prng_.Rand<bool>())) { (all_configured_exts || prng_.Rand<bool>())) {
rtp_packet->SetExtension<AudioLevelExtension>(prng_.Rand<bool>(), rtp_packet->SetExtension<AudioLevelExtension>(
prng_.Rand(127)); AudioLevel(prng_.Rand<bool>(), prng_.Rand(127)));
} }
if (extension_map.IsRegistered(AbsoluteSendTime::kId) && if (extension_map.IsRegistered(AbsoluteSendTime::kId) &&
@ -1029,14 +1029,15 @@ void VerifyLoggedRtpHeader(const Event& original_header,
// AudioLevel header extension. // AudioLevel header extension.
ASSERT_EQ(original_header.template HasExtension<AudioLevelExtension>(), ASSERT_EQ(original_header.template HasExtension<AudioLevelExtension>(),
logged_header.extension.hasAudioLevel); logged_header.extension.audio_level().has_value());
if (logged_header.extension.hasAudioLevel) { if (logged_header.extension.audio_level()) {
bool voice_activity; AudioLevel audio_level;
uint8_t audio_level;
ASSERT_TRUE(original_header.template GetExtension<AudioLevelExtension>( ASSERT_TRUE(original_header.template GetExtension<AudioLevelExtension>(
&voice_activity, &audio_level)); &audio_level));
EXPECT_EQ(voice_activity, logged_header.extension.voiceActivity); EXPECT_EQ(audio_level.voice_activity(),
EXPECT_EQ(audio_level, logged_header.extension.audioLevel); logged_header.extension.audio_level()->voice_activity());
EXPECT_EQ(audio_level.level(),
logged_header.extension.audio_level()->level());
} }
// VideoOrientation header extension. // VideoOrientation header extension.

View file

@ -1561,7 +1561,7 @@ TEST_P(WebRtcVoiceEngineTestFake, OnPacketReceivedIdentifiesExtensions) {
webrtc::RtpPacketReceived reference_packet(&extension_map); webrtc::RtpPacketReceived reference_packet(&extension_map);
constexpr uint8_t kAudioLevel = 123; constexpr uint8_t kAudioLevel = 123;
reference_packet.SetExtension<webrtc::AudioLevelExtension>( reference_packet.SetExtension<webrtc::AudioLevelExtension>(
/*voice_activity=*/true, kAudioLevel); webrtc::AudioLevel(/*voice_activity=*/true, kAudioLevel));
// Create a packet without the extension map but with the same content. // Create a packet without the extension map but with the same content.
webrtc::RtpPacketReceived received_packet; webrtc::RtpPacketReceived received_packet;
ASSERT_TRUE(received_packet.Parse(reference_packet.Buffer())); ASSERT_TRUE(received_packet.Parse(reference_packet.Buffer()));
@ -1569,12 +1569,10 @@ TEST_P(WebRtcVoiceEngineTestFake, OnPacketReceivedIdentifiesExtensions) {
receive_channel_->OnPacketReceived(received_packet); receive_channel_->OnPacketReceived(received_packet);
rtc::Thread::Current()->ProcessMessages(0); rtc::Thread::Current()->ProcessMessages(0);
bool voice_activity; webrtc::AudioLevel audio_level;
uint8_t audio_level;
EXPECT_TRUE(call_.last_received_rtp_packet() EXPECT_TRUE(call_.last_received_rtp_packet()
.GetExtension<webrtc::AudioLevelExtension>(&voice_activity, .GetExtension<webrtc::AudioLevelExtension>(&audio_level));
&audio_level)); EXPECT_EQ(audio_level.level(), kAudioLevel);
EXPECT_EQ(audio_level, kAudioLevel);
} }
// Test that we apply codecs properly. // Test that we apply codecs properly.

View file

@ -564,8 +564,8 @@ TEST_F(NetEqImplTest, ReorderedPacket) {
rtp_header.sequenceNumber = 0x1234; rtp_header.sequenceNumber = 0x1234;
rtp_header.timestamp = 0x12345678; rtp_header.timestamp = 0x12345678;
rtp_header.ssrc = 0x87654321; rtp_header.ssrc = 0x87654321;
rtp_header.extension.hasAudioLevel = true; rtp_header.extension.set_audio_level(
rtp_header.extension.audioLevel = 42; AudioLevel(/*voice_activity=*/false, 42));
EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return()); EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return());
EXPECT_CALL(mock_decoder, SampleRateHz()) EXPECT_CALL(mock_decoder, SampleRateHz())
@ -606,7 +606,8 @@ TEST_F(NetEqImplTest, ReorderedPacket) {
EXPECT_EQ(packet_info.ssrc(), rtp_header.ssrc); EXPECT_EQ(packet_info.ssrc(), rtp_header.ssrc);
EXPECT_THAT(packet_info.csrcs(), IsEmpty()); EXPECT_THAT(packet_info.csrcs(), IsEmpty());
EXPECT_EQ(packet_info.rtp_timestamp(), rtp_header.timestamp); EXPECT_EQ(packet_info.rtp_timestamp(), rtp_header.timestamp);
EXPECT_EQ(packet_info.audio_level(), rtp_header.extension.audioLevel); EXPECT_EQ(packet_info.audio_level(),
rtp_header.extension.audio_level()->level());
EXPECT_EQ(packet_info.receive_time(), expected_receive_time); EXPECT_EQ(packet_info.receive_time(), expected_receive_time);
} }
@ -614,13 +615,13 @@ TEST_F(NetEqImplTest, ReorderedPacket) {
// old, the second one is the expected next packet. // old, the second one is the expected next packet.
rtp_header.sequenceNumber -= 1; rtp_header.sequenceNumber -= 1;
rtp_header.timestamp -= kPayloadLengthSamples; rtp_header.timestamp -= kPayloadLengthSamples;
rtp_header.extension.audioLevel = 1; rtp_header.extension.set_audio_level(AudioLevel(/*voice_activity=*/false, 1));
payload[0] = 1; payload[0] = 1;
clock_.AdvanceTimeMilliseconds(1000); clock_.AdvanceTimeMilliseconds(1000);
EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload)); EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload));
rtp_header.sequenceNumber += 2; rtp_header.sequenceNumber += 2;
rtp_header.timestamp += 2 * kPayloadLengthSamples; rtp_header.timestamp += 2 * kPayloadLengthSamples;
rtp_header.extension.audioLevel = 2; rtp_header.extension.set_audio_level(AudioLevel(/*voice_activity=*/false, 2));
payload[0] = 2; payload[0] = 2;
clock_.AdvanceTimeMilliseconds(2000); clock_.AdvanceTimeMilliseconds(2000);
expected_receive_time = clock_.CurrentTime(); expected_receive_time = clock_.CurrentTime();
@ -655,7 +656,8 @@ TEST_F(NetEqImplTest, ReorderedPacket) {
EXPECT_EQ(packet_info.ssrc(), rtp_header.ssrc); EXPECT_EQ(packet_info.ssrc(), rtp_header.ssrc);
EXPECT_THAT(packet_info.csrcs(), IsEmpty()); EXPECT_THAT(packet_info.csrcs(), IsEmpty());
EXPECT_EQ(packet_info.rtp_timestamp(), rtp_header.timestamp); EXPECT_EQ(packet_info.rtp_timestamp(), rtp_header.timestamp);
EXPECT_EQ(packet_info.audio_level(), rtp_header.extension.audioLevel); EXPECT_EQ(packet_info.audio_level(),
rtp_header.extension.audio_level()->level());
EXPECT_EQ(packet_info.receive_time(), expected_receive_time); EXPECT_EQ(packet_info.receive_time(), expected_receive_time);
} }
@ -772,8 +774,7 @@ TEST_F(NetEqImplTest, InsertRedPayload) {
AbsoluteCaptureTime capture_time; AbsoluteCaptureTime capture_time;
capture_time.absolute_capture_timestamp = 1234; capture_time.absolute_capture_timestamp = 1234;
header.extension.absolute_capture_time = capture_time; header.extension.absolute_capture_time = capture_time;
header.extension.audioLevel = 12; header.extension.set_audio_level(AudioLevel(/*voice_activity=*/false, 12));
header.extension.hasAudioLevel = true;
header.numCSRCs = 1; header.numCSRCs = 1;
header.arrOfCSRCs[0] = 123; header.arrOfCSRCs[0] = 123;
neteq_->InsertPacket(header, payload); neteq_->InsertPacket(header, payload);
@ -795,7 +796,7 @@ TEST_F(NetEqImplTest, InsertRedPayload) {
EXPECT_EQ(frame.packet_infos_.size(), 1u); EXPECT_EQ(frame.packet_infos_.size(), 1u);
EXPECT_EQ(frame.packet_infos_.front().absolute_capture_time(), capture_time); EXPECT_EQ(frame.packet_infos_.front().absolute_capture_time(), capture_time);
EXPECT_EQ(frame.packet_infos_.front().audio_level(), EXPECT_EQ(frame.packet_infos_.front().audio_level(),
header.extension.audioLevel); header.extension.audio_level()->level());
EXPECT_EQ(frame.packet_infos_.front().csrcs()[0], header.arrOfCSRCs[0]); EXPECT_EQ(frame.packet_infos_.front().csrcs()[0], header.arrOfCSRCs[0]);
} }

View file

@ -110,9 +110,10 @@ int main(int argc, char* argv[]) {
static_cast<int>(packet->virtual_packet_length_bytes()), static_cast<int>(packet->virtual_packet_length_bytes()),
packet->header().payloadType, packet->header().markerBit, packet->header().payloadType, packet->header().markerBit,
packet->header().ssrc); packet->header().ssrc);
if (print_audio_level && packet->header().extension.hasAudioLevel) { if (print_audio_level && packet->header().extension.audio_level()) {
fprintf(out_file, " %5u (%1i)", packet->header().extension.audioLevel, fprintf(out_file, " %5d (%1i)",
packet->header().extension.voiceActivity); packet->header().extension.audio_level()->level(),
packet->header().extension.audio_level()->voice_activity());
} }
if (print_abs_send_time && packet->header().extension.hasAbsoluteSendTime) { if (print_abs_send_time && packet->header().extension.hasAbsoluteSendTime) {
if (cycles == -1) { if (cycles == -1) {

View file

@ -161,24 +161,41 @@ bool AbsoluteCaptureTimeExtension::Write(rtc::ArrayView<uint8_t> data,
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// Sample Audio Level Encoding Using the Two-Byte Header Format // Sample Audio Level Encoding Using the Two-Byte Header Format
bool AudioLevelExtension::Parse(rtc::ArrayView<const uint8_t> data, bool AudioLevelExtension::Parse(rtc::ArrayView<const uint8_t> data,
bool* voice_activity, AudioLevel* extension) {
uint8_t* audio_level) {
// One-byte and two-byte format share the same data definition. // One-byte and two-byte format share the same data definition.
if (data.size() != 1) if (data.size() != 1)
return false; return false;
*voice_activity = (data[0] & 0x80) != 0; bool voice_activity = (data[0] & 0x80) != 0;
*audio_level = data[0] & 0x7F; int audio_level = data[0] & 0x7F;
*extension = AudioLevel(voice_activity, audio_level);
return true;
}
bool AudioLevelExtension::Write(rtc::ArrayView<uint8_t> data,
const AudioLevel& extension) {
// One-byte and two-byte format share the same data definition.
RTC_DCHECK_EQ(data.size(), 1);
RTC_CHECK_GE(extension.level(), 0);
RTC_CHECK_LE(extension.level(), 0x7f);
data[0] = (extension.voice_activity() ? 0x80 : 0x00) | extension.level();
return true;
}
bool AudioLevelExtension::Parse(rtc::ArrayView<const uint8_t> data,
bool* voice_activity,
uint8_t* audio_level) {
AudioLevel extension;
Parse(data, &extension);
*voice_activity = extension.voice_activity();
*audio_level = extension.level();
return true; return true;
} }
bool AudioLevelExtension::Write(rtc::ArrayView<uint8_t> data, bool AudioLevelExtension::Write(rtc::ArrayView<uint8_t> data,
bool voice_activity, bool voice_activity,
uint8_t audio_level) { uint8_t audio_level) {
// One-byte and two-byte format share the same data definition. AudioLevel extension(voice_activity, audio_level);
RTC_DCHECK_EQ(data.size(), 1); return Write(data, extension);
RTC_CHECK_LE(audio_level, 0x7f);
data[0] = (voice_activity ? 0x80 : 0x00) | audio_level;
return true;
} }
// An RTP Header Extension for Mixer-to-Client Audio Level Indication // An RTP Header Extension for Mixer-to-Client Audio Level Indication

View file

@ -84,21 +84,30 @@ class AbsoluteCaptureTimeExtension {
class AudioLevelExtension { class AudioLevelExtension {
public: public:
using value_type = AudioLevel;
static constexpr RTPExtensionType kId = kRtpExtensionAudioLevel; static constexpr RTPExtensionType kId = kRtpExtensionAudioLevel;
static constexpr uint8_t kValueSizeBytes = 1; static constexpr uint8_t kValueSizeBytes = 1;
static constexpr absl::string_view Uri() { static constexpr absl::string_view Uri() {
return RtpExtension::kAudioLevelUri; return RtpExtension::kAudioLevelUri;
} }
static bool Parse(rtc::ArrayView<const uint8_t> data, static bool Parse(rtc::ArrayView<const uint8_t> data, AudioLevel* extension);
bool* voice_activity, static size_t ValueSize(const AudioLevel& extension) {
uint8_t* audio_level);
static size_t ValueSize(bool voice_activity, uint8_t audio_level) {
return kValueSizeBytes; return kValueSizeBytes;
} }
static bool Write(rtc::ArrayView<uint8_t> data, static bool Write(rtc::ArrayView<uint8_t> data, const AudioLevel& extension);
[[deprecated("Use AudioLevel struct")]] static bool Parse(
rtc::ArrayView<const uint8_t> data,
bool* voice_activity,
uint8_t* audio_level);
[[deprecated("Use AudioLevel struct")]] static size_t ValueSize(
bool voice_activity, bool voice_activity,
uint8_t audio_level); uint8_t audio_level) {
return kValueSizeBytes;
}
[[deprecated("Use AudioLevel struct")]] static bool
Write(rtc::ArrayView<uint8_t> data, bool voice_activity, uint8_t audio_level);
}; };
class CsrcAudioLevel { class CsrcAudioLevel {

View file

@ -61,8 +61,7 @@ void RtpPacketReceived::GetHeader(RTPHeader* header) const {
&header->extension.feedback_request) || &header->extension.feedback_request) ||
GetExtension<TransportSequenceNumber>( GetExtension<TransportSequenceNumber>(
&header->extension.transportSequenceNumber); &header->extension.transportSequenceNumber);
header->extension.hasAudioLevel = GetExtension<AudioLevelExtension>( header->extension.set_audio_level(GetExtension<AudioLevelExtension>());
&header->extension.voiceActivity, &header->extension.audioLevel);
header->extension.hasVideoRotation = header->extension.hasVideoRotation =
GetExtension<VideoOrientation>(&header->extension.videoRotation); GetExtension<VideoOrientation>(&header->extension.videoRotation);
header->extension.hasVideoContentType = header->extension.hasVideoContentType =

View file

@ -237,7 +237,8 @@ TEST(RtpPacketTest, CreateWith2Extensions) {
packet.SetTimestamp(kTimestamp); packet.SetTimestamp(kTimestamp);
packet.SetSsrc(kSsrc); packet.SetSsrc(kSsrc);
packet.SetExtension<TransmissionOffset>(kTimeOffset); packet.SetExtension<TransmissionOffset>(kTimeOffset);
packet.SetExtension<AudioLevelExtension>(kVoiceActive, kAudioLevel); packet.SetExtension<AudioLevelExtension>(
AudioLevel(kVoiceActive, kAudioLevel));
EXPECT_THAT(kPacketWithTOAndAL, EXPECT_THAT(kPacketWithTOAndAL,
ElementsAreArray(packet.data(), packet.size())); ElementsAreArray(packet.data(), packet.size()));
} }
@ -257,7 +258,8 @@ TEST(RtpPacketTest, CreateWithTwoByteHeaderExtensionFirst) {
TimeDelta::Millis(340)); TimeDelta::Millis(340));
ASSERT_TRUE(packet.SetExtension<PlayoutDelayLimits>(playout_delay)); ASSERT_TRUE(packet.SetExtension<PlayoutDelayLimits>(playout_delay));
packet.SetExtension<TransmissionOffset>(kTimeOffset); packet.SetExtension<TransmissionOffset>(kTimeOffset);
packet.SetExtension<AudioLevelExtension>(kVoiceActive, kAudioLevel); packet.SetExtension<AudioLevelExtension>(
AudioLevel(kVoiceActive, kAudioLevel));
EXPECT_THAT(kPacketWithTwoByteExtensionIdFirst, EXPECT_THAT(kPacketWithTwoByteExtensionIdFirst,
ElementsAreArray(packet.data(), packet.size())); ElementsAreArray(packet.data(), packet.size()));
} }
@ -274,7 +276,8 @@ TEST(RtpPacketTest, CreateWithTwoByteHeaderExtensionLast) {
packet.SetTimestamp(kTimestamp); packet.SetTimestamp(kTimestamp);
packet.SetSsrc(kSsrc); packet.SetSsrc(kSsrc);
packet.SetExtension<TransmissionOffset>(kTimeOffset); packet.SetExtension<TransmissionOffset>(kTimeOffset);
packet.SetExtension<AudioLevelExtension>(kVoiceActive, kAudioLevel); packet.SetExtension<AudioLevelExtension>(
AudioLevel(kVoiceActive, kAudioLevel));
EXPECT_THAT(kPacketWithTOAndAL, EXPECT_THAT(kPacketWithTOAndAL,
ElementsAreArray(packet.data(), packet.size())); ElementsAreArray(packet.data(), packet.size()));
// Set extension that requires two-byte header. // Set extension that requires two-byte header.
@ -334,8 +337,8 @@ TEST(RtpPacketTest, TryToCreateTwoByteHeaderNotSupported) {
extensions.Register<AudioLevelExtension>(kTwoByteExtensionId); extensions.Register<AudioLevelExtension>(kTwoByteExtensionId);
RtpPacketToSend packet(&extensions); RtpPacketToSend packet(&extensions);
// Set extension that requires two-byte header. // Set extension that requires two-byte header.
EXPECT_FALSE( EXPECT_FALSE(packet.SetExtension<AudioLevelExtension>(
packet.SetExtension<AudioLevelExtension>(kVoiceActive, kAudioLevel)); AudioLevel(kVoiceActive, kAudioLevel)));
} }
TEST(RtpPacketTest, CreateTwoByteHeaderSupportedIfExtmapAllowMixed) { TEST(RtpPacketTest, CreateTwoByteHeaderSupportedIfExtmapAllowMixed) {
@ -343,8 +346,8 @@ TEST(RtpPacketTest, CreateTwoByteHeaderSupportedIfExtmapAllowMixed) {
extensions.Register<AudioLevelExtension>(kTwoByteExtensionId); extensions.Register<AudioLevelExtension>(kTwoByteExtensionId);
RtpPacketToSend packet(&extensions); RtpPacketToSend packet(&extensions);
// Set extension that requires two-byte header. // Set extension that requires two-byte header.
EXPECT_TRUE( EXPECT_TRUE(packet.SetExtension<AudioLevelExtension>(
packet.SetExtension<AudioLevelExtension>(kVoiceActive, kAudioLevel)); AudioLevel(kVoiceActive, kAudioLevel)));
} }
TEST(RtpPacketTest, CreateWithMaxSizeHeaderExtension) { TEST(RtpPacketTest, CreateWithMaxSizeHeaderExtension) {
@ -409,8 +412,8 @@ TEST(RtpPacketTest, SetReservedExtensionsAfterPayload) {
EXPECT_TRUE(packet.ReserveExtension<TransmissionOffset>()); EXPECT_TRUE(packet.ReserveExtension<TransmissionOffset>());
packet.SetPayloadSize(kPayloadSize); packet.SetPayloadSize(kPayloadSize);
// Can't set extension after payload. // Can't set extension after payload.
EXPECT_FALSE( EXPECT_FALSE(packet.SetExtension<AudioLevelExtension>(
packet.SetExtension<AudioLevelExtension>(kVoiceActive, kAudioLevel)); AudioLevel(kVoiceActive, kAudioLevel)));
// Unless reserved. // Unless reserved.
EXPECT_TRUE(packet.SetExtension<TransmissionOffset>(kTimeOffset)); EXPECT_TRUE(packet.SetExtension<TransmissionOffset>(kTimeOffset));
} }
@ -688,12 +691,10 @@ TEST(RtpPacketTest, ParseWith2Extensions) {
int32_t time_offset; int32_t time_offset;
EXPECT_TRUE(packet.GetExtension<TransmissionOffset>(&time_offset)); EXPECT_TRUE(packet.GetExtension<TransmissionOffset>(&time_offset));
EXPECT_EQ(kTimeOffset, time_offset); EXPECT_EQ(kTimeOffset, time_offset);
bool voice_active; AudioLevel audio_level;
uint8_t audio_level; EXPECT_TRUE(packet.GetExtension<AudioLevelExtension>(&audio_level));
EXPECT_TRUE( EXPECT_EQ(kVoiceActive, audio_level.voice_activity());
packet.GetExtension<AudioLevelExtension>(&voice_active, &audio_level)); EXPECT_EQ(kAudioLevel, audio_level.level());
EXPECT_EQ(kVoiceActive, voice_active);
EXPECT_EQ(kAudioLevel, audio_level);
} }
TEST(RtpPacketTest, ParseSecondPacketWithFewerExtensions) { TEST(RtpPacketTest, ParseSecondPacketWithFewerExtensions) {
@ -721,10 +722,8 @@ TEST(RtpPacketTest, ParseWith2ExtensionsInvalidPadding) {
int32_t time_offset; int32_t time_offset;
EXPECT_TRUE(packet.GetExtension<TransmissionOffset>(&time_offset)); EXPECT_TRUE(packet.GetExtension<TransmissionOffset>(&time_offset));
EXPECT_EQ(kTimeOffset, time_offset); EXPECT_EQ(kTimeOffset, time_offset);
bool voice_active; AudioLevel audio_level;
uint8_t audio_level; EXPECT_FALSE(packet.GetExtension<AudioLevelExtension>(&audio_level));
EXPECT_FALSE(
packet.GetExtension<AudioLevelExtension>(&voice_active, &audio_level));
} }
TEST(RtpPacketTest, ParseWith2ExtensionsReservedExtensionId) { TEST(RtpPacketTest, ParseWith2ExtensionsReservedExtensionId) {
@ -737,10 +736,8 @@ TEST(RtpPacketTest, ParseWith2ExtensionsReservedExtensionId) {
int32_t time_offset; int32_t time_offset;
EXPECT_TRUE(packet.GetExtension<TransmissionOffset>(&time_offset)); EXPECT_TRUE(packet.GetExtension<TransmissionOffset>(&time_offset));
EXPECT_EQ(kTimeOffset, time_offset); EXPECT_EQ(kTimeOffset, time_offset);
bool voice_active; AudioLevel audio_level;
uint8_t audio_level; EXPECT_FALSE(packet.GetExtension<AudioLevelExtension>(&audio_level));
EXPECT_FALSE(
packet.GetExtension<AudioLevelExtension>(&voice_active, &audio_level));
} }
TEST(RtpPacketTest, ParseWithAllFeatures) { TEST(RtpPacketTest, ParseWithAllFeatures) {
@ -792,12 +789,10 @@ TEST(RtpPacketTest, ParseTwoByteHeaderExtensionWithPadding) {
int32_t time_offset; int32_t time_offset;
EXPECT_TRUE(packet.GetExtension<TransmissionOffset>(&time_offset)); EXPECT_TRUE(packet.GetExtension<TransmissionOffset>(&time_offset));
EXPECT_EQ(kTimeOffset, time_offset); EXPECT_EQ(kTimeOffset, time_offset);
bool voice_active; AudioLevel audio_level;
uint8_t audio_level; EXPECT_TRUE(packet.GetExtension<AudioLevelExtension>(&audio_level));
EXPECT_TRUE( EXPECT_EQ(kVoiceActive, audio_level.voice_activity());
packet.GetExtension<AudioLevelExtension>(&voice_active, &audio_level)); EXPECT_EQ(kAudioLevel, audio_level.level());
EXPECT_EQ(kVoiceActive, voice_active);
EXPECT_EQ(kAudioLevel, audio_level);
} }
TEST(RtpPacketTest, ParseWithExtensionDelayed) { TEST(RtpPacketTest, ParseWithExtensionDelayed) {
@ -1221,7 +1216,8 @@ TEST(RtpPacketTest, RemoveMultipleExtensions) {
packet.SetTimestamp(kTimestamp); packet.SetTimestamp(kTimestamp);
packet.SetSsrc(kSsrc); packet.SetSsrc(kSsrc);
packet.SetExtension<TransmissionOffset>(kTimeOffset); packet.SetExtension<TransmissionOffset>(kTimeOffset);
packet.SetExtension<AudioLevelExtension>(kVoiceActive, kAudioLevel); packet.SetExtension<AudioLevelExtension>(
AudioLevel(kVoiceActive, kAudioLevel));
EXPECT_THAT(kPacketWithTOAndAL, EXPECT_THAT(kPacketWithTOAndAL,
ElementsAreArray(packet.data(), packet.size())); ElementsAreArray(packet.data(), packet.size()));
@ -1249,7 +1245,8 @@ TEST(RtpPacketTest, RemoveExtensionPreservesOtherUnregisteredExtensions) {
packet.SetTimestamp(kTimestamp); packet.SetTimestamp(kTimestamp);
packet.SetSsrc(kSsrc); packet.SetSsrc(kSsrc);
packet.SetExtension<TransmissionOffset>(kTimeOffset); packet.SetExtension<TransmissionOffset>(kTimeOffset);
packet.SetExtension<AudioLevelExtension>(kVoiceActive, kAudioLevel); packet.SetExtension<AudioLevelExtension>(
AudioLevel(kVoiceActive, kAudioLevel));
EXPECT_THAT(kPacketWithTOAndAL, EXPECT_THAT(kPacketWithTOAndAL,
ElementsAreArray(packet.data(), packet.size())); ElementsAreArray(packet.data(), packet.size()));

View file

@ -244,8 +244,8 @@ bool RTPSenderAudio::SendAudio(const RtpAudioFrame& frame) {
packet->set_capture_time(clock_->CurrentTime()); packet->set_capture_time(clock_->CurrentTime());
// Set audio level extension, if included. // Set audio level extension, if included.
packet->SetExtension<AudioLevelExtension>( packet->SetExtension<AudioLevelExtension>(
frame.type == AudioFrameType::kAudioFrameSpeech, AudioLevel(frame.type == AudioFrameType::kAudioFrameSpeech,
frame.audio_level_dbov.value_or(127)); frame.audio_level_dbov.value_or(127)));
if (absolute_capture_time.has_value()) { if (absolute_capture_time.has_value()) {
// It also checks that extension was registered during SDP negotiation. If // It also checks that extension was registered during SDP negotiation. If

View file

@ -121,12 +121,11 @@ TEST_F(RtpSenderAudioTest, SendAudioWithAudioLevelExtension) {
auto sent_payload = transport_.last_sent_packet().payload(); auto sent_payload = transport_.last_sent_packet().payload();
EXPECT_THAT(sent_payload, ElementsAreArray(payload)); EXPECT_THAT(sent_payload, ElementsAreArray(payload));
// Verify AudioLevel extension. // Verify AudioLevel extension.
bool voice_activity; AudioLevel audio_level;
uint8_t audio_level;
EXPECT_TRUE(transport_.last_sent_packet().GetExtension<AudioLevelExtension>( EXPECT_TRUE(transport_.last_sent_packet().GetExtension<AudioLevelExtension>(
&voice_activity, &audio_level)); &audio_level));
EXPECT_EQ(kAudioLevel, audio_level); EXPECT_EQ(kAudioLevel, audio_level.level());
EXPECT_FALSE(voice_activity); EXPECT_FALSE(audio_level.voice_activity());
} }
TEST_F(RtpSenderAudioTest, SendAudioWithoutAbsoluteCaptureTime) { TEST_F(RtpSenderAudioTest, SendAudioWithoutAbsoluteCaptureTime) {

View file

@ -333,10 +333,11 @@ bool Convert(std::string inputfile,
fprintf(output, " transmission_offset=%d", fprintf(output, " transmission_offset=%d",
event.rtp.header.extension.transmissionTimeOffset); event.rtp.header.extension.transmissionTimeOffset);
} }
if (event.rtp.header.extension.hasAudioLevel) { if (event.rtp.header.extension.audio_level()) {
fprintf(output, " voice_activity=%d", fprintf(output, " voice_activity=%d",
event.rtp.header.extension.voiceActivity); event.rtp.header.extension.audio_level()->voice_activity());
fprintf(output, " audio_level=%u", event.rtp.header.extension.audioLevel); fprintf(output, " audio_level=%u",
event.rtp.header.extension.audio_level()->level());
} }
if (event.rtp.header.extension.hasVideoRotation) { if (event.rtp.header.extension.hasVideoRotation) {
fprintf(output, " video_rotation=%d", fprintf(output, " video_rotation=%d",
@ -367,10 +368,11 @@ bool Convert(std::string inputfile,
fprintf(output, " transmission_offset=%d", fprintf(output, " transmission_offset=%d",
event.rtp.header.extension.transmissionTimeOffset); event.rtp.header.extension.transmissionTimeOffset);
} }
if (event.rtp.header.extension.hasAudioLevel) { if (event.rtp.header.extension.audio_level()) {
fprintf(output, " voice_activity=%d", fprintf(output, " voice_activity=%d",
event.rtp.header.extension.voiceActivity); event.rtp.header.extension.audio_level()->voice_activity());
fprintf(output, " audio_level=%u", event.rtp.header.extension.audioLevel); fprintf(output, " audio_level=%u",
event.rtp.header.extension.audio_level()->level());
} }
if (event.rtp.header.extension.hasVideoRotation) { if (event.rtp.header.extension.hasVideoRotation) {
fprintf(output, " video_rotation=%d", fprintf(output, " video_rotation=%d",

View file

@ -740,11 +740,12 @@ void EventLogAnalyzer::CreateAudioLevelGraph(PacketDirection direction,
TimeSeries time_series(GetStreamName(parsed_log_, direction, stream.ssrc), TimeSeries time_series(GetStreamName(parsed_log_, direction, stream.ssrc),
LineStyle::kLine); LineStyle::kLine);
for (auto& packet : stream.packet_view) { for (auto& packet : stream.packet_view) {
if (packet.header.extension.hasAudioLevel) { if (packet.header.extension.audio_level()) {
float x = config_.GetCallTimeSec(packet.log_time()); float x = config_.GetCallTimeSec(packet.log_time());
// The audio level is stored in -dBov (so e.g. -10 dBov is stored as 10) // The audio level is stored in -dBov (so e.g. -10 dBov is stored as 10)
// Here we convert it to dBov. // Here we convert it to dBov.
float y = static_cast<float>(-packet.header.extension.audioLevel); float y =
static_cast<float>(-packet.header.extension.audio_level()->level());
time_series.points.emplace_back(TimeSeriesPoint(x, y)); time_series.points.emplace_back(TimeSeriesPoint(x, y));
} }
} }

View file

@ -72,11 +72,11 @@ void FuzzOneInput(const uint8_t* data, size_t size) {
int32_t offset; int32_t offset;
packet.GetExtension<TransmissionOffset>(&offset); packet.GetExtension<TransmissionOffset>(&offset);
break; break;
case kRtpExtensionAudioLevel: case kRtpExtensionAudioLevel: {
bool voice_activity; AudioLevel audio_level;
uint8_t audio_level; packet.GetExtension<AudioLevelExtension>(&audio_level);
packet.GetExtension<AudioLevelExtension>(&voice_activity, &audio_level);
break; break;
}
case kRtpExtensionCsrcAudioLevel: { case kRtpExtensionCsrcAudioLevel: {
std::vector<uint8_t> audio_levels; std::vector<uint8_t> audio_levels;
packet.GetExtension<CsrcAudioLevel>(&audio_levels); packet.GetExtension<CsrcAudioLevel>(&audio_levels);