Move RtpDepacketizerH264 into own files

Bug: webrtc:11152
Change-Id: Iab4975e9f378b177a2abf34559f9b74752e69843
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/165582
Reviewed-by: Markus Handell <handellm@webrtc.org>
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#30212}
This commit is contained in:
Danil Chapovalov 2020-01-09 18:55:02 +01:00 committed by Commit Bot
parent 539f9b376e
commit b42aeaa3fb
8 changed files with 806 additions and 682 deletions

View file

@ -215,6 +215,8 @@ rtc_library("rtp_rtcp") {
"source/video_rtp_depacketizer.h", "source/video_rtp_depacketizer.h",
"source/video_rtp_depacketizer_generic.cc", "source/video_rtp_depacketizer_generic.cc",
"source/video_rtp_depacketizer_generic.h", "source/video_rtp_depacketizer_generic.h",
"source/video_rtp_depacketizer_h264.cc",
"source/video_rtp_depacketizer_h264.h",
"source/video_rtp_depacketizer_raw.cc", "source/video_rtp_depacketizer_raw.cc",
"source/video_rtp_depacketizer_raw.h", "source/video_rtp_depacketizer_raw.h",
"source/video_rtp_depacketizer_vp8.cc", "source/video_rtp_depacketizer_vp8.cc",
@ -486,6 +488,7 @@ if (rtc_include_tests) {
"source/ulpfec_header_reader_writer_unittest.cc", "source/ulpfec_header_reader_writer_unittest.cc",
"source/ulpfec_receiver_unittest.cc", "source/ulpfec_receiver_unittest.cc",
"source/video_rtp_depacketizer_generic_unittest.cc", "source/video_rtp_depacketizer_generic_unittest.cc",
"source/video_rtp_depacketizer_h264_unittest.cc",
"source/video_rtp_depacketizer_raw_unittest.cc", "source/video_rtp_depacketizer_raw_unittest.cc",
"source/video_rtp_depacketizer_vp8_unittest.cc", "source/video_rtp_depacketizer_vp8_unittest.cc",
"source/video_rtp_depacketizer_vp9_unittest.cc", "source/video_rtp_depacketizer_vp9_unittest.cc",

View file

@ -15,9 +15,9 @@
#include "absl/memory/memory.h" #include "absl/memory/memory.h"
#include "absl/types/optional.h" #include "absl/types/optional.h"
#include "modules/rtp_rtcp/source/rtp_depacketizer_av1.h" #include "modules/rtp_rtcp/source/rtp_depacketizer_av1.h"
#include "modules/rtp_rtcp/source/rtp_format_h264.h"
#include "modules/rtp_rtcp/source/video_rtp_depacketizer.h" #include "modules/rtp_rtcp/source/video_rtp_depacketizer.h"
#include "modules/rtp_rtcp/source/video_rtp_depacketizer_generic.h" #include "modules/rtp_rtcp/source/video_rtp_depacketizer_generic.h"
#include "modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h"
#include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp8.h" #include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp8.h"
#include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp9.h" #include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp9.h"
#include "rtc_base/checks.h" #include "rtc_base/checks.h"

View file

@ -38,7 +38,6 @@ namespace {
static const size_t kNalHeaderSize = 1; static const size_t kNalHeaderSize = 1;
static const size_t kFuAHeaderSize = 2; static const size_t kFuAHeaderSize = 2;
static const size_t kLengthFieldSize = 2; static const size_t kLengthFieldSize = 2;
static const size_t kStapAHeaderSize = kNalHeaderSize + kLengthFieldSize;
// Bit masks for FU (A and B) indicators. // Bit masks for FU (A and B) indicators.
enum NalDefs : uint8_t { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F }; enum NalDefs : uint8_t { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F };
@ -46,29 +45,6 @@ enum NalDefs : uint8_t { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F };
// Bit masks for FU (A and B) headers. // Bit masks for FU (A and B) headers.
enum FuDefs : uint8_t { kSBit = 0x80, kEBit = 0x40, kRBit = 0x20 }; enum FuDefs : uint8_t { kSBit = 0x80, kEBit = 0x40, kRBit = 0x20 };
// TODO(pbos): Avoid parsing this here as well as inside the jitter buffer.
bool ParseStapAStartOffsets(const uint8_t* nalu_ptr,
size_t length_remaining,
std::vector<size_t>* offsets) {
size_t offset = 0;
while (length_remaining > 0) {
// Buffer doesn't contain room for additional nalu length.
if (length_remaining < sizeof(uint16_t))
return false;
uint16_t nalu_size = ByteReader<uint16_t>::ReadBigEndian(nalu_ptr);
nalu_ptr += sizeof(uint16_t);
length_remaining -= sizeof(uint16_t);
if (nalu_size > length_remaining)
return false;
nalu_ptr += nalu_size;
length_remaining -= nalu_size;
offsets->push_back(offset + kStapAHeaderSize);
offset += kLengthFieldSize + nalu_size;
}
return true;
}
} // namespace } // namespace
RtpPacketizerH264::RtpPacketizerH264( RtpPacketizerH264::RtpPacketizerH264(
@ -342,263 +318,4 @@ void RtpPacketizerH264::NextFragmentPacket(RtpPacketToSend* rtp_packet) {
packets_.pop(); packets_.pop();
} }
RtpDepacketizerH264::RtpDepacketizerH264() : offset_(0), length_(0) {}
RtpDepacketizerH264::~RtpDepacketizerH264() {}
bool RtpDepacketizerH264::Parse(ParsedPayload* parsed_payload,
const uint8_t* payload_data,
size_t payload_data_length) {
RTC_CHECK(parsed_payload != nullptr);
if (payload_data_length == 0) {
RTC_LOG(LS_ERROR) << "Empty payload.";
return false;
}
offset_ = 0;
length_ = payload_data_length;
modified_buffer_.reset();
uint8_t nal_type = payload_data[0] & kTypeMask;
parsed_payload->video_header()
.video_type_header.emplace<RTPVideoHeaderH264>();
if (nal_type == H264::NaluType::kFuA) {
// Fragmented NAL units (FU-A).
if (!ParseFuaNalu(parsed_payload, payload_data))
return false;
} else {
// We handle STAP-A and single NALU's the same way here. The jitter buffer
// will depacketize the STAP-A into NAL units later.
// TODO(sprang): Parse STAP-A offsets here and store in fragmentation vec.
if (!ProcessStapAOrSingleNalu(parsed_payload, payload_data))
return false;
}
const uint8_t* payload =
modified_buffer_ ? modified_buffer_->data() : payload_data;
parsed_payload->payload = payload + offset_;
parsed_payload->payload_length = length_;
return true;
}
bool RtpDepacketizerH264::ProcessStapAOrSingleNalu(
ParsedPayload* parsed_payload,
const uint8_t* payload_data) {
parsed_payload->video_header().width = 0;
parsed_payload->video_header().height = 0;
parsed_payload->video_header().codec = kVideoCodecH264;
parsed_payload->video_header().simulcastIdx = 0;
parsed_payload->video_header().is_first_packet_in_frame = true;
auto& h264_header = absl::get<RTPVideoHeaderH264>(
parsed_payload->video_header().video_type_header);
const uint8_t* nalu_start = payload_data + kNalHeaderSize;
const size_t nalu_length = length_ - kNalHeaderSize;
uint8_t nal_type = payload_data[0] & kTypeMask;
std::vector<size_t> nalu_start_offsets;
if (nal_type == H264::NaluType::kStapA) {
// Skip the StapA header (StapA NAL type + length).
if (length_ <= kStapAHeaderSize) {
RTC_LOG(LS_ERROR) << "StapA header truncated.";
return false;
}
if (!ParseStapAStartOffsets(nalu_start, nalu_length, &nalu_start_offsets)) {
RTC_LOG(LS_ERROR) << "StapA packet with incorrect NALU packet lengths.";
return false;
}
h264_header.packetization_type = kH264StapA;
nal_type = payload_data[kStapAHeaderSize] & kTypeMask;
} else {
h264_header.packetization_type = kH264SingleNalu;
nalu_start_offsets.push_back(0);
}
h264_header.nalu_type = nal_type;
parsed_payload->video_header().frame_type = VideoFrameType::kVideoFrameDelta;
nalu_start_offsets.push_back(length_ + kLengthFieldSize); // End offset.
for (size_t i = 0; i < nalu_start_offsets.size() - 1; ++i) {
size_t start_offset = nalu_start_offsets[i];
// End offset is actually start offset for next unit, excluding length field
// so remove that from this units length.
size_t end_offset = nalu_start_offsets[i + 1] - kLengthFieldSize;
if (end_offset - start_offset < H264::kNaluTypeSize) {
RTC_LOG(LS_ERROR) << "STAP-A packet too short";
return false;
}
NaluInfo nalu;
nalu.type = payload_data[start_offset] & kTypeMask;
nalu.sps_id = -1;
nalu.pps_id = -1;
start_offset += H264::kNaluTypeSize;
switch (nalu.type) {
case H264::NaluType::kSps: {
// Check if VUI is present in SPS and if it needs to be modified to
// avoid
// excessive decoder latency.
// Copy any previous data first (likely just the first header).
std::unique_ptr<rtc::Buffer> output_buffer(new rtc::Buffer());
if (start_offset)
output_buffer->AppendData(payload_data, start_offset);
absl::optional<SpsParser::SpsState> sps;
SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps(
&payload_data[start_offset], end_offset - start_offset, &sps,
nullptr, output_buffer.get(), SpsVuiRewriter::Direction::kIncoming);
if (result == SpsVuiRewriter::ParseResult::kVuiRewritten) {
if (modified_buffer_) {
RTC_LOG(LS_WARNING)
<< "More than one H264 SPS NAL units needing "
"rewriting found within a single STAP-A packet. "
"Keeping the first and rewriting the last.";
}
// Rewrite length field to new SPS size.
if (h264_header.packetization_type == kH264StapA) {
size_t length_field_offset =
start_offset - (H264::kNaluTypeSize + kLengthFieldSize);
// Stap-A Length includes payload data and type header.
size_t rewritten_size =
output_buffer->size() - start_offset + H264::kNaluTypeSize;
ByteWriter<uint16_t>::WriteBigEndian(
&(*output_buffer)[length_field_offset], rewritten_size);
}
// Append rest of packet.
output_buffer->AppendData(&payload_data[end_offset],
nalu_length + kNalHeaderSize - end_offset);
modified_buffer_ = std::move(output_buffer);
length_ = modified_buffer_->size();
}
if (sps) {
parsed_payload->video_header().width = sps->width;
parsed_payload->video_header().height = sps->height;
nalu.sps_id = sps->id;
} else {
RTC_LOG(LS_WARNING) << "Failed to parse SPS id from SPS slice.";
}
parsed_payload->video_header().frame_type =
VideoFrameType::kVideoFrameKey;
break;
}
case H264::NaluType::kPps: {
uint32_t pps_id;
uint32_t sps_id;
if (PpsParser::ParsePpsIds(&payload_data[start_offset],
end_offset - start_offset, &pps_id,
&sps_id)) {
nalu.pps_id = pps_id;
nalu.sps_id = sps_id;
} else {
RTC_LOG(LS_WARNING)
<< "Failed to parse PPS id and SPS id from PPS slice.";
}
break;
}
case H264::NaluType::kIdr:
parsed_payload->video_header().frame_type =
VideoFrameType::kVideoFrameKey;
RTC_FALLTHROUGH();
case H264::NaluType::kSlice: {
absl::optional<uint32_t> pps_id = PpsParser::ParsePpsIdFromSlice(
&payload_data[start_offset], end_offset - start_offset);
if (pps_id) {
nalu.pps_id = *pps_id;
} else {
RTC_LOG(LS_WARNING) << "Failed to parse PPS id from slice of type: "
<< static_cast<int>(nalu.type);
}
break;
}
// Slices below don't contain SPS or PPS ids.
case H264::NaluType::kAud:
case H264::NaluType::kEndOfSequence:
case H264::NaluType::kEndOfStream:
case H264::NaluType::kFiller:
case H264::NaluType::kSei:
break;
case H264::NaluType::kStapA:
case H264::NaluType::kFuA:
RTC_LOG(LS_WARNING) << "Unexpected STAP-A or FU-A received.";
return false;
}
if (h264_header.nalus_length == kMaxNalusPerPacket) {
RTC_LOG(LS_WARNING)
<< "Received packet containing more than " << kMaxNalusPerPacket
<< " NAL units. Will not keep track sps and pps ids for all of them.";
} else {
h264_header.nalus[h264_header.nalus_length++] = nalu;
}
}
return true;
}
bool RtpDepacketizerH264::ParseFuaNalu(
RtpDepacketizer::ParsedPayload* parsed_payload,
const uint8_t* payload_data) {
if (length_ < kFuAHeaderSize) {
RTC_LOG(LS_ERROR) << "FU-A NAL units truncated.";
return false;
}
uint8_t fnri = payload_data[0] & (kFBit | kNriMask);
uint8_t original_nal_type = payload_data[1] & kTypeMask;
bool first_fragment = (payload_data[1] & kSBit) > 0;
NaluInfo nalu;
nalu.type = original_nal_type;
nalu.sps_id = -1;
nalu.pps_id = -1;
if (first_fragment) {
offset_ = 0;
length_ -= kNalHeaderSize;
absl::optional<uint32_t> pps_id = PpsParser::ParsePpsIdFromSlice(
payload_data + 2 * kNalHeaderSize, length_ - kNalHeaderSize);
if (pps_id) {
nalu.pps_id = *pps_id;
} else {
RTC_LOG(LS_WARNING)
<< "Failed to parse PPS from first fragment of FU-A NAL "
"unit with original type: "
<< static_cast<int>(nalu.type);
}
uint8_t original_nal_header = fnri | original_nal_type;
modified_buffer_.reset(new rtc::Buffer());
modified_buffer_->AppendData(payload_data + kNalHeaderSize, length_);
(*modified_buffer_)[0] = original_nal_header;
} else {
offset_ = kFuAHeaderSize;
length_ -= kFuAHeaderSize;
}
if (original_nal_type == H264::NaluType::kIdr) {
parsed_payload->video_header().frame_type = VideoFrameType::kVideoFrameKey;
} else {
parsed_payload->video_header().frame_type =
VideoFrameType::kVideoFrameDelta;
}
parsed_payload->video_header().width = 0;
parsed_payload->video_header().height = 0;
parsed_payload->video_header().codec = kVideoCodecH264;
parsed_payload->video_header().simulcastIdx = 0;
parsed_payload->video_header().is_first_packet_in_frame = first_fragment;
auto& h264_header = absl::get<RTPVideoHeaderH264>(
parsed_payload->video_header().video_type_header);
h264_header.packetization_type = kH264FuA;
h264_header.nalu_type = original_nal_type;
if (first_fragment) {
h264_header.nalus[h264_header.nalus_length] = nalu;
h264_header.nalus_length = 1;
}
return true;
}
} // namespace webrtc } // namespace webrtc

View file

@ -22,6 +22,7 @@
#include "modules/include/module_common_types.h" #include "modules/include/module_common_types.h"
#include "modules/rtp_rtcp/source/rtp_format.h" #include "modules/rtp_rtcp/source/rtp_format.h"
#include "modules/rtp_rtcp/source/rtp_packet_to_send.h" #include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
#include "modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h"
#include "modules/video_coding/codecs/h264/include/h264_globals.h" #include "modules/video_coding/codecs/h264/include/h264_globals.h"
#include "rtc_base/buffer.h" #include "rtc_base/buffer.h"
#include "rtc_base/constructor_magic.h" #include "rtc_base/constructor_magic.h"
@ -87,26 +88,5 @@ class RtpPacketizerH264 : public RtpPacketizer {
RTC_DISALLOW_COPY_AND_ASSIGN(RtpPacketizerH264); RTC_DISALLOW_COPY_AND_ASSIGN(RtpPacketizerH264);
}; };
// Depacketizer for H264.
class RtpDepacketizerH264 : public RtpDepacketizer {
public:
RtpDepacketizerH264();
~RtpDepacketizerH264() override;
bool Parse(ParsedPayload* parsed_payload,
const uint8_t* payload_data,
size_t payload_data_length) override;
private:
bool ParseFuaNalu(RtpDepacketizer::ParsedPayload* parsed_payload,
const uint8_t* payload_data);
bool ProcessStapAOrSingleNalu(RtpDepacketizer::ParsedPayload* parsed_payload,
const uint8_t* payload_data);
size_t offset_;
size_t length_;
std::unique_ptr<rtc::Buffer> modified_buffer_;
};
} // namespace webrtc } // namespace webrtc
#endif // MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H264_H_ #endif // MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H264_H_

View file

@ -497,382 +497,5 @@ TEST(RtpPacketizerH264Test, RejectsOverlongDataInPacketizationMode0) {
EXPECT_THAT(packets, IsEmpty()); EXPECT_THAT(packets, IsEmpty());
} }
const uint8_t kOriginalSps[] = {kSps, 0x00, 0x00, 0x03, 0x03,
0xF4, 0x05, 0x03, 0xC7, 0xC0};
const uint8_t kRewrittenSps[] = {kSps, 0x00, 0x00, 0x03, 0x03, 0xF4, 0x05, 0x03,
0xC7, 0xE0, 0x1B, 0x41, 0x10, 0x8D, 0x00};
const uint8_t kIdrOne[] = {kIdr, 0xFF, 0x00, 0x00, 0x04};
const uint8_t kIdrTwo[] = {kIdr, 0xFF, 0x00, 0x11};
struct H264ParsedPayload : public RtpDepacketizer::ParsedPayload {
RTPVideoHeaderH264& h264() {
return absl::get<RTPVideoHeaderH264>(video.video_type_header);
}
};
class RtpDepacketizerH264Test : public ::testing::Test {
protected:
RtpDepacketizerH264Test()
: depacketizer_(std::make_unique<RtpDepacketizerH264>()) {}
void ExpectPacket(H264ParsedPayload* parsed_payload,
const uint8_t* data,
size_t length) {
ASSERT_TRUE(parsed_payload != NULL);
EXPECT_THAT(std::vector<uint8_t>(
parsed_payload->payload,
parsed_payload->payload + parsed_payload->payload_length),
::testing::ElementsAreArray(data, length));
}
std::unique_ptr<RtpDepacketizer> depacketizer_;
};
TEST_F(RtpDepacketizerH264Test, TestSingleNalu) {
uint8_t packet[2] = {0x05, 0xFF}; // F=0, NRI=0, Type=5 (IDR).
H264ParsedPayload payload;
ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
ExpectPacket(&payload, packet, sizeof(packet));
EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type);
EXPECT_EQ(kVideoCodecH264, payload.video_header().codec);
EXPECT_TRUE(payload.video_header().is_first_packet_in_frame);
EXPECT_EQ(kH264SingleNalu, payload.h264().packetization_type);
EXPECT_EQ(kIdr, payload.h264().nalu_type);
}
TEST_F(RtpDepacketizerH264Test, TestSingleNaluSpsWithResolution) {
uint8_t packet[] = {kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50,
0x05, 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0,
0x00, 0x00, 0x03, 0x2A, 0xE0, 0xF1, 0x83, 0x25};
H264ParsedPayload payload;
ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
ExpectPacket(&payload, packet, sizeof(packet));
EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type);
EXPECT_EQ(kVideoCodecH264, payload.video_header().codec);
EXPECT_TRUE(payload.video_header().is_first_packet_in_frame);
EXPECT_EQ(kH264SingleNalu, payload.h264().packetization_type);
EXPECT_EQ(1280u, payload.video_header().width);
EXPECT_EQ(720u, payload.video_header().height);
}
TEST_F(RtpDepacketizerH264Test, TestStapAKey) {
// clang-format off
const NaluInfo kExpectedNalus[] = { {H264::kSps, 0, -1},
{H264::kPps, 1, 2},
{H264::kIdr, -1, 0} };
uint8_t packet[] = {kStapA, // F=0, NRI=0, Type=24.
// Length, nal header, payload.
0, 0x18, kExpectedNalus[0].type,
0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, 0x05, 0xBA,
0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, 0x00, 0x00, 0x03,
0x2A, 0xE0, 0xF1, 0x83, 0x25,
0, 0xD, kExpectedNalus[1].type,
0x69, 0xFC, 0x0, 0x0, 0x3, 0x0, 0x7, 0xFF, 0xFF, 0xFF,
0xF6, 0x40,
0, 0xB, kExpectedNalus[2].type,
0x85, 0xB8, 0x0, 0x4, 0x0, 0x0, 0x13, 0x93, 0x12, 0x0};
// clang-format on
H264ParsedPayload payload;
ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
ExpectPacket(&payload, packet, sizeof(packet));
EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type);
EXPECT_EQ(kVideoCodecH264, payload.video_header().codec);
EXPECT_TRUE(payload.video_header().is_first_packet_in_frame);
const RTPVideoHeaderH264& h264 = payload.h264();
EXPECT_EQ(kH264StapA, h264.packetization_type);
// NALU type for aggregated packets is the type of the first packet only.
EXPECT_EQ(kSps, h264.nalu_type);
ASSERT_EQ(3u, h264.nalus_length);
for (size_t i = 0; i < h264.nalus_length; ++i) {
EXPECT_EQ(kExpectedNalus[i].type, h264.nalus[i].type)
<< "Failed parsing nalu " << i;
EXPECT_EQ(kExpectedNalus[i].sps_id, h264.nalus[i].sps_id)
<< "Failed parsing nalu " << i;
EXPECT_EQ(kExpectedNalus[i].pps_id, h264.nalus[i].pps_id)
<< "Failed parsing nalu " << i;
}
}
TEST_F(RtpDepacketizerH264Test, TestStapANaluSpsWithResolution) {
uint8_t packet[] = {kStapA, // F=0, NRI=0, Type=24.
// Length (2 bytes), nal header, payload.
0x00, 0x19, kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40,
0x50, 0x05, 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0,
0x00, 0x00, 0x03, 0x2A, 0xE0, 0xF1, 0x83, 0x25, 0x80,
0x00, 0x03, kIdr, 0xFF, 0x00, 0x00, 0x04, kIdr, 0xFF,
0x00, 0x11};
H264ParsedPayload payload;
ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
ExpectPacket(&payload, packet, sizeof(packet));
EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type);
EXPECT_EQ(kVideoCodecH264, payload.video_header().codec);
EXPECT_TRUE(payload.video_header().is_first_packet_in_frame);
EXPECT_EQ(kH264StapA, payload.h264().packetization_type);
EXPECT_EQ(1280u, payload.video_header().width);
EXPECT_EQ(720u, payload.video_header().height);
}
TEST_F(RtpDepacketizerH264Test, TestEmptyStapARejected) {
uint8_t lone_empty_packet[] = {kStapA, 0x00, 0x00};
uint8_t leading_empty_packet[] = {kStapA, 0x00, 0x00, 0x00, 0x04,
kIdr, 0xFF, 0x00, 0x11};
uint8_t middle_empty_packet[] = {kStapA, 0x00, 0x03, kIdr, 0xFF, 0x00, 0x00,
0x00, 0x00, 0x04, kIdr, 0xFF, 0x00, 0x11};
uint8_t trailing_empty_packet[] = {kStapA, 0x00, 0x03, kIdr,
0xFF, 0x00, 0x00, 0x00};
H264ParsedPayload payload;
EXPECT_FALSE(depacketizer_->Parse(&payload, lone_empty_packet,
sizeof(lone_empty_packet)));
EXPECT_FALSE(depacketizer_->Parse(&payload, leading_empty_packet,
sizeof(leading_empty_packet)));
EXPECT_FALSE(depacketizer_->Parse(&payload, middle_empty_packet,
sizeof(middle_empty_packet)));
EXPECT_FALSE(depacketizer_->Parse(&payload, trailing_empty_packet,
sizeof(trailing_empty_packet)));
}
TEST_F(RtpDepacketizerH264Test, DepacketizeWithRewriting) {
rtc::Buffer in_buffer;
rtc::Buffer out_buffer;
uint8_t kHeader[2] = {kStapA};
in_buffer.AppendData(kHeader, 1);
out_buffer.AppendData(kHeader, 1);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kOriginalSps));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kOriginalSps);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kRewrittenSps));
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kRewrittenSps);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kIdrOne));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kIdrOne);
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kIdrOne);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kIdrTwo));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kIdrTwo);
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kIdrTwo);
H264ParsedPayload payload;
EXPECT_TRUE(
depacketizer_->Parse(&payload, in_buffer.data(), in_buffer.size()));
std::vector<uint8_t> expected_packet_payload(
out_buffer.data(), &out_buffer.data()[out_buffer.size()]);
EXPECT_THAT(
expected_packet_payload,
::testing::ElementsAreArray(payload.payload, payload.payload_length));
}
TEST_F(RtpDepacketizerH264Test, DepacketizeWithDoubleRewriting) {
rtc::Buffer in_buffer;
rtc::Buffer out_buffer;
uint8_t kHeader[2] = {kStapA};
in_buffer.AppendData(kHeader, 1);
out_buffer.AppendData(kHeader, 1);
// First SPS will be kept...
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kOriginalSps));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kOriginalSps);
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kOriginalSps);
// ...only the second one will be rewritten.
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kOriginalSps));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kOriginalSps);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kRewrittenSps));
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kRewrittenSps);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kIdrOne));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kIdrOne);
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kIdrOne);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kIdrTwo));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kIdrTwo);
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kIdrTwo);
H264ParsedPayload payload;
EXPECT_TRUE(
depacketizer_->Parse(&payload, in_buffer.data(), in_buffer.size()));
std::vector<uint8_t> expected_packet_payload(
out_buffer.data(), &out_buffer.data()[out_buffer.size()]);
EXPECT_THAT(
expected_packet_payload,
::testing::ElementsAreArray(payload.payload, payload.payload_length));
}
TEST_F(RtpDepacketizerH264Test, TestStapADelta) {
uint8_t packet[16] = {kStapA, // F=0, NRI=0, Type=24.
// Length, nal header, payload.
0, 0x02, kSlice, 0xFF, 0, 0x03, kSlice, 0xFF, 0x00, 0,
0x04, kSlice, 0xFF, 0x00, 0x11};
H264ParsedPayload payload;
ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
ExpectPacket(&payload, packet, sizeof(packet));
EXPECT_EQ(VideoFrameType::kVideoFrameDelta,
payload.video_header().frame_type);
EXPECT_EQ(kVideoCodecH264, payload.video_header().codec);
EXPECT_TRUE(payload.video_header().is_first_packet_in_frame);
EXPECT_EQ(kH264StapA, payload.h264().packetization_type);
// NALU type for aggregated packets is the type of the first packet only.
EXPECT_EQ(kSlice, payload.h264().nalu_type);
}
TEST_F(RtpDepacketizerH264Test, TestFuA) {
// clang-format off
uint8_t packet1[] = {
kFuA, // F=0, NRI=0, Type=28.
kSBit | kIdr, // FU header.
0x85, 0xB8, 0x0, 0x4, 0x0, 0x0, 0x13, 0x93, 0x12, 0x0 // Payload.
};
// clang-format on
const uint8_t kExpected1[] = {kIdr, 0x85, 0xB8, 0x0, 0x4, 0x0,
0x0, 0x13, 0x93, 0x12, 0x0};
uint8_t packet2[] = {
kFuA, // F=0, NRI=0, Type=28.
kIdr, // FU header.
0x02 // Payload.
};
const uint8_t kExpected2[] = {0x02};
uint8_t packet3[] = {
kFuA, // F=0, NRI=0, Type=28.
kEBit | kIdr, // FU header.
0x03 // Payload.
};
const uint8_t kExpected3[] = {0x03};
H264ParsedPayload payload;
// We expect that the first packet is one byte shorter since the FU-A header
// has been replaced by the original nal header.
ASSERT_TRUE(depacketizer_->Parse(&payload, packet1, sizeof(packet1)));
ExpectPacket(&payload, kExpected1, sizeof(kExpected1));
EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type);
EXPECT_EQ(kVideoCodecH264, payload.video_header().codec);
EXPECT_TRUE(payload.video_header().is_first_packet_in_frame);
const RTPVideoHeaderH264& h264 = payload.h264();
EXPECT_EQ(kH264FuA, h264.packetization_type);
EXPECT_EQ(kIdr, h264.nalu_type);
ASSERT_EQ(1u, h264.nalus_length);
EXPECT_EQ(static_cast<H264::NaluType>(kIdr), h264.nalus[0].type);
EXPECT_EQ(-1, h264.nalus[0].sps_id);
EXPECT_EQ(0, h264.nalus[0].pps_id);
// Following packets will be 2 bytes shorter since they will only be appended
// onto the first packet.
payload = H264ParsedPayload();
ASSERT_TRUE(depacketizer_->Parse(&payload, packet2, sizeof(packet2)));
ExpectPacket(&payload, kExpected2, sizeof(kExpected2));
EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type);
EXPECT_EQ(kVideoCodecH264, payload.video_header().codec);
EXPECT_FALSE(payload.video_header().is_first_packet_in_frame);
{
const RTPVideoHeaderH264& h264 = payload.h264();
EXPECT_EQ(kH264FuA, h264.packetization_type);
EXPECT_EQ(kIdr, h264.nalu_type);
// NALU info is only expected for the first FU-A packet.
EXPECT_EQ(0u, h264.nalus_length);
}
payload = H264ParsedPayload();
ASSERT_TRUE(depacketizer_->Parse(&payload, packet3, sizeof(packet3)));
ExpectPacket(&payload, kExpected3, sizeof(kExpected3));
EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type);
EXPECT_EQ(kVideoCodecH264, payload.video_header().codec);
EXPECT_FALSE(payload.video_header().is_first_packet_in_frame);
{
const RTPVideoHeaderH264& h264 = payload.h264();
EXPECT_EQ(kH264FuA, h264.packetization_type);
EXPECT_EQ(kIdr, h264.nalu_type);
// NALU info is only expected for the first FU-A packet.
ASSERT_EQ(0u, h264.nalus_length);
}
}
TEST_F(RtpDepacketizerH264Test, TestEmptyPayload) {
// Using a wild pointer to crash on accesses from inside the depacketizer.
uint8_t* garbage_ptr = reinterpret_cast<uint8_t*>(0x4711);
H264ParsedPayload payload;
EXPECT_FALSE(depacketizer_->Parse(&payload, garbage_ptr, 0));
}
TEST_F(RtpDepacketizerH264Test, TestTruncatedFuaNalu) {
const uint8_t kPayload[] = {0x9c};
H264ParsedPayload payload;
EXPECT_FALSE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload)));
}
TEST_F(RtpDepacketizerH264Test, TestTruncatedSingleStapANalu) {
const uint8_t kPayload[] = {0xd8, 0x27};
H264ParsedPayload payload;
EXPECT_FALSE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload)));
}
TEST_F(RtpDepacketizerH264Test, TestStapAPacketWithTruncatedNalUnits) {
const uint8_t kPayload[] = {0x58, 0xCB, 0xED, 0xDF};
H264ParsedPayload payload;
EXPECT_FALSE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload)));
}
TEST_F(RtpDepacketizerH264Test, TestTruncationJustAfterSingleStapANalu) {
const uint8_t kPayload[] = {0x38, 0x27, 0x27};
H264ParsedPayload payload;
EXPECT_FALSE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload)));
}
TEST_F(RtpDepacketizerH264Test, TestShortSpsPacket) {
const uint8_t kPayload[] = {0x27, 0x80, 0x00};
H264ParsedPayload payload;
EXPECT_TRUE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload)));
}
TEST_F(RtpDepacketizerH264Test, TestSeiPacket) {
const uint8_t kPayload[] = {
kSei, // F=0, NRI=0, Type=6.
0x03, 0x03, 0x03, 0x03 // Payload.
};
H264ParsedPayload payload;
ASSERT_TRUE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload)));
const RTPVideoHeaderH264& h264 = payload.h264();
EXPECT_EQ(VideoFrameType::kVideoFrameDelta,
payload.video_header().frame_type);
EXPECT_EQ(kH264SingleNalu, h264.packetization_type);
EXPECT_EQ(kSei, h264.nalu_type);
ASSERT_EQ(1u, h264.nalus_length);
EXPECT_EQ(static_cast<H264::NaluType>(kSei), h264.nalus[0].type);
EXPECT_EQ(-1, h264.nalus[0].sps_id);
EXPECT_EQ(-1, h264.nalus[0].pps_id);
}
} // namespace } // namespace
} // namespace webrtc } // namespace webrtc

View file

@ -0,0 +1,330 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h"
#include <string.h>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <utility>
#include <vector>
#include "absl/types/optional.h"
#include "absl/types/variant.h"
#include "common_video/h264/h264_common.h"
#include "common_video/h264/pps_parser.h"
#include "common_video/h264/sps_parser.h"
#include "common_video/h264/sps_vui_rewriter.h"
#include "modules/rtp_rtcp/source/byte_io.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/system/fallthrough.h"
namespace webrtc {
namespace {
static const size_t kNalHeaderSize = 1;
static const size_t kFuAHeaderSize = 2;
static const size_t kLengthFieldSize = 2;
static const size_t kStapAHeaderSize = kNalHeaderSize + kLengthFieldSize;
// Bit masks for FU (A and B) indicators.
enum NalDefs : uint8_t { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F };
// Bit masks for FU (A and B) headers.
enum FuDefs : uint8_t { kSBit = 0x80, kEBit = 0x40, kRBit = 0x20 };
// TODO(pbos): Avoid parsing this here as well as inside the jitter buffer.
bool ParseStapAStartOffsets(const uint8_t* nalu_ptr,
size_t length_remaining,
std::vector<size_t>* offsets) {
size_t offset = 0;
while (length_remaining > 0) {
// Buffer doesn't contain room for additional nalu length.
if (length_remaining < sizeof(uint16_t))
return false;
uint16_t nalu_size = ByteReader<uint16_t>::ReadBigEndian(nalu_ptr);
nalu_ptr += sizeof(uint16_t);
length_remaining -= sizeof(uint16_t);
if (nalu_size > length_remaining)
return false;
nalu_ptr += nalu_size;
length_remaining -= nalu_size;
offsets->push_back(offset + kStapAHeaderSize);
offset += kLengthFieldSize + nalu_size;
}
return true;
}
} // namespace
RtpDepacketizerH264::RtpDepacketizerH264() : offset_(0), length_(0) {}
RtpDepacketizerH264::~RtpDepacketizerH264() {}
bool RtpDepacketizerH264::ProcessStapAOrSingleNalu(
ParsedPayload* parsed_payload,
const uint8_t* payload_data) {
parsed_payload->video_header().width = 0;
parsed_payload->video_header().height = 0;
parsed_payload->video_header().codec = kVideoCodecH264;
parsed_payload->video_header().simulcastIdx = 0;
parsed_payload->video_header().is_first_packet_in_frame = true;
auto& h264_header = absl::get<RTPVideoHeaderH264>(
parsed_payload->video_header().video_type_header);
const uint8_t* nalu_start = payload_data + kNalHeaderSize;
const size_t nalu_length = length_ - kNalHeaderSize;
uint8_t nal_type = payload_data[0] & kTypeMask;
std::vector<size_t> nalu_start_offsets;
if (nal_type == H264::NaluType::kStapA) {
// Skip the StapA header (StapA NAL type + length).
if (length_ <= kStapAHeaderSize) {
RTC_LOG(LS_ERROR) << "StapA header truncated.";
return false;
}
if (!ParseStapAStartOffsets(nalu_start, nalu_length, &nalu_start_offsets)) {
RTC_LOG(LS_ERROR) << "StapA packet with incorrect NALU packet lengths.";
return false;
}
h264_header.packetization_type = kH264StapA;
nal_type = payload_data[kStapAHeaderSize] & kTypeMask;
} else {
h264_header.packetization_type = kH264SingleNalu;
nalu_start_offsets.push_back(0);
}
h264_header.nalu_type = nal_type;
parsed_payload->video_header().frame_type = VideoFrameType::kVideoFrameDelta;
nalu_start_offsets.push_back(length_ + kLengthFieldSize); // End offset.
for (size_t i = 0; i < nalu_start_offsets.size() - 1; ++i) {
size_t start_offset = nalu_start_offsets[i];
// End offset is actually start offset for next unit, excluding length field
// so remove that from this units length.
size_t end_offset = nalu_start_offsets[i + 1] - kLengthFieldSize;
if (end_offset - start_offset < H264::kNaluTypeSize) {
RTC_LOG(LS_ERROR) << "STAP-A packet too short";
return false;
}
NaluInfo nalu;
nalu.type = payload_data[start_offset] & kTypeMask;
nalu.sps_id = -1;
nalu.pps_id = -1;
start_offset += H264::kNaluTypeSize;
switch (nalu.type) {
case H264::NaluType::kSps: {
// Check if VUI is present in SPS and if it needs to be modified to
// avoid
// excessive decoder latency.
// Copy any previous data first (likely just the first header).
std::unique_ptr<rtc::Buffer> output_buffer(new rtc::Buffer());
if (start_offset)
output_buffer->AppendData(payload_data, start_offset);
absl::optional<SpsParser::SpsState> sps;
SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps(
&payload_data[start_offset], end_offset - start_offset, &sps,
nullptr, output_buffer.get(), SpsVuiRewriter::Direction::kIncoming);
if (result == SpsVuiRewriter::ParseResult::kVuiRewritten) {
if (modified_buffer_) {
RTC_LOG(LS_WARNING)
<< "More than one H264 SPS NAL units needing "
"rewriting found within a single STAP-A packet. "
"Keeping the first and rewriting the last.";
}
// Rewrite length field to new SPS size.
if (h264_header.packetization_type == kH264StapA) {
size_t length_field_offset =
start_offset - (H264::kNaluTypeSize + kLengthFieldSize);
// Stap-A Length includes payload data and type header.
size_t rewritten_size =
output_buffer->size() - start_offset + H264::kNaluTypeSize;
ByteWriter<uint16_t>::WriteBigEndian(
&(*output_buffer)[length_field_offset], rewritten_size);
}
// Append rest of packet.
output_buffer->AppendData(&payload_data[end_offset],
nalu_length + kNalHeaderSize - end_offset);
modified_buffer_ = std::move(output_buffer);
length_ = modified_buffer_->size();
}
if (sps) {
parsed_payload->video_header().width = sps->width;
parsed_payload->video_header().height = sps->height;
nalu.sps_id = sps->id;
} else {
RTC_LOG(LS_WARNING) << "Failed to parse SPS id from SPS slice.";
}
parsed_payload->video_header().frame_type =
VideoFrameType::kVideoFrameKey;
break;
}
case H264::NaluType::kPps: {
uint32_t pps_id;
uint32_t sps_id;
if (PpsParser::ParsePpsIds(&payload_data[start_offset],
end_offset - start_offset, &pps_id,
&sps_id)) {
nalu.pps_id = pps_id;
nalu.sps_id = sps_id;
} else {
RTC_LOG(LS_WARNING)
<< "Failed to parse PPS id and SPS id from PPS slice.";
}
break;
}
case H264::NaluType::kIdr:
parsed_payload->video_header().frame_type =
VideoFrameType::kVideoFrameKey;
RTC_FALLTHROUGH();
case H264::NaluType::kSlice: {
absl::optional<uint32_t> pps_id = PpsParser::ParsePpsIdFromSlice(
&payload_data[start_offset], end_offset - start_offset);
if (pps_id) {
nalu.pps_id = *pps_id;
} else {
RTC_LOG(LS_WARNING) << "Failed to parse PPS id from slice of type: "
<< static_cast<int>(nalu.type);
}
break;
}
// Slices below don't contain SPS or PPS ids.
case H264::NaluType::kAud:
case H264::NaluType::kEndOfSequence:
case H264::NaluType::kEndOfStream:
case H264::NaluType::kFiller:
case H264::NaluType::kSei:
break;
case H264::NaluType::kStapA:
case H264::NaluType::kFuA:
RTC_LOG(LS_WARNING) << "Unexpected STAP-A or FU-A received.";
return false;
}
if (h264_header.nalus_length == kMaxNalusPerPacket) {
RTC_LOG(LS_WARNING)
<< "Received packet containing more than " << kMaxNalusPerPacket
<< " NAL units. Will not keep track sps and pps ids for all of them.";
} else {
h264_header.nalus[h264_header.nalus_length++] = nalu;
}
}
return true;
}
bool RtpDepacketizerH264::ParseFuaNalu(
RtpDepacketizer::ParsedPayload* parsed_payload,
const uint8_t* payload_data) {
if (length_ < kFuAHeaderSize) {
RTC_LOG(LS_ERROR) << "FU-A NAL units truncated.";
return false;
}
uint8_t fnri = payload_data[0] & (kFBit | kNriMask);
uint8_t original_nal_type = payload_data[1] & kTypeMask;
bool first_fragment = (payload_data[1] & kSBit) > 0;
NaluInfo nalu;
nalu.type = original_nal_type;
nalu.sps_id = -1;
nalu.pps_id = -1;
if (first_fragment) {
offset_ = 0;
length_ -= kNalHeaderSize;
absl::optional<uint32_t> pps_id = PpsParser::ParsePpsIdFromSlice(
payload_data + 2 * kNalHeaderSize, length_ - kNalHeaderSize);
if (pps_id) {
nalu.pps_id = *pps_id;
} else {
RTC_LOG(LS_WARNING)
<< "Failed to parse PPS from first fragment of FU-A NAL "
"unit with original type: "
<< static_cast<int>(nalu.type);
}
uint8_t original_nal_header = fnri | original_nal_type;
modified_buffer_.reset(new rtc::Buffer());
modified_buffer_->AppendData(payload_data + kNalHeaderSize, length_);
(*modified_buffer_)[0] = original_nal_header;
} else {
offset_ = kFuAHeaderSize;
length_ -= kFuAHeaderSize;
}
if (original_nal_type == H264::NaluType::kIdr) {
parsed_payload->video_header().frame_type = VideoFrameType::kVideoFrameKey;
} else {
parsed_payload->video_header().frame_type =
VideoFrameType::kVideoFrameDelta;
}
parsed_payload->video_header().width = 0;
parsed_payload->video_header().height = 0;
parsed_payload->video_header().codec = kVideoCodecH264;
parsed_payload->video_header().simulcastIdx = 0;
parsed_payload->video_header().is_first_packet_in_frame = first_fragment;
auto& h264_header = absl::get<RTPVideoHeaderH264>(
parsed_payload->video_header().video_type_header);
h264_header.packetization_type = kH264FuA;
h264_header.nalu_type = original_nal_type;
if (first_fragment) {
h264_header.nalus[h264_header.nalus_length] = nalu;
h264_header.nalus_length = 1;
}
return true;
}
bool RtpDepacketizerH264::Parse(ParsedPayload* parsed_payload,
const uint8_t* payload_data,
size_t payload_data_length) {
RTC_CHECK(parsed_payload != nullptr);
if (payload_data_length == 0) {
RTC_LOG(LS_ERROR) << "Empty payload.";
return false;
}
offset_ = 0;
length_ = payload_data_length;
modified_buffer_.reset();
uint8_t nal_type = payload_data[0] & kTypeMask;
parsed_payload->video_header()
.video_type_header.emplace<RTPVideoHeaderH264>();
if (nal_type == H264::NaluType::kFuA) {
// Fragmented NAL units (FU-A).
if (!ParseFuaNalu(parsed_payload, payload_data))
return false;
} else {
// We handle STAP-A and single NALU's the same way here. The jitter buffer
// will depacketize the STAP-A into NAL units later.
// TODO(sprang): Parse STAP-A offsets here and store in fragmentation vec.
if (!ProcessStapAOrSingleNalu(parsed_payload, payload_data))
return false;
}
const uint8_t* payload =
modified_buffer_ ? modified_buffer_->data() : payload_data;
parsed_payload->payload = payload + offset_;
parsed_payload->payload_length = length_;
return true;
}
} // namespace webrtc

View file

@ -0,0 +1,44 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_RTP_RTCP_SOURCE_VIDEO_RTP_DEPACKETIZER_H264_H_
#define MODULES_RTP_RTCP_SOURCE_VIDEO_RTP_DEPACKETIZER_H264_H_
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include "modules/rtp_rtcp/source/rtp_format.h"
#include "rtc_base/buffer.h"
namespace webrtc {
class RtpDepacketizerH264 : public RtpDepacketizer {
public:
RtpDepacketizerH264();
~RtpDepacketizerH264() override;
bool Parse(ParsedPayload* parsed_payload,
const uint8_t* payload_data,
size_t payload_data_length) override;
private:
bool ParseFuaNalu(RtpDepacketizer::ParsedPayload* parsed_payload,
const uint8_t* payload_data);
bool ProcessStapAOrSingleNalu(RtpDepacketizer::ParsedPayload* parsed_payload,
const uint8_t* payload_data);
size_t offset_;
size_t length_;
std::unique_ptr<rtc::Buffer> modified_buffer_;
};
} // namespace webrtc
#endif // MODULES_RTP_RTCP_SOURCE_VIDEO_RTP_DEPACKETIZER_H264_H_

View file

@ -0,0 +1,427 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h"
#include <memory>
#include <vector>
#include "api/array_view.h"
#include "common_video/h264/h264_common.h"
#include "modules/include/module_common_types.h"
#include "modules/rtp_rtcp/mocks/mock_rtp_rtcp.h"
#include "modules/rtp_rtcp/source/byte_io.h"
#include "test/gmock.h"
#include "test/gtest.h"
namespace webrtc {
namespace {
using ::testing::Each;
using ::testing::ElementsAre;
using ::testing::ElementsAreArray;
using ::testing::Eq;
using ::testing::IsEmpty;
using ::testing::SizeIs;
enum Nalu {
kSlice = 1,
kIdr = 5,
kSei = 6,
kSps = 7,
kPps = 8,
kStapA = 24,
kFuA = 28
};
// Bit masks for FU (A and B) indicators.
enum NalDefs { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F };
// Bit masks for FU (A and B) headers.
enum FuDefs { kSBit = 0x80, kEBit = 0x40, kRBit = 0x20 };
const uint8_t kOriginalSps[] = {kSps, 0x00, 0x00, 0x03, 0x03,
0xF4, 0x05, 0x03, 0xC7, 0xC0};
const uint8_t kRewrittenSps[] = {kSps, 0x00, 0x00, 0x03, 0x03, 0xF4, 0x05, 0x03,
0xC7, 0xE0, 0x1B, 0x41, 0x10, 0x8D, 0x00};
const uint8_t kIdrOne[] = {kIdr, 0xFF, 0x00, 0x00, 0x04};
const uint8_t kIdrTwo[] = {kIdr, 0xFF, 0x00, 0x11};
struct H264ParsedPayload : public RtpDepacketizer::ParsedPayload {
RTPVideoHeaderH264& h264() {
return absl::get<RTPVideoHeaderH264>(video.video_type_header);
}
};
class RtpDepacketizerH264Test : public ::testing::Test {
protected:
RtpDepacketizerH264Test()
: depacketizer_(std::make_unique<RtpDepacketizerH264>()) {}
void ExpectPacket(H264ParsedPayload* parsed_payload,
const uint8_t* data,
size_t length) {
ASSERT_TRUE(parsed_payload != NULL);
EXPECT_THAT(std::vector<uint8_t>(
parsed_payload->payload,
parsed_payload->payload + parsed_payload->payload_length),
::testing::ElementsAreArray(data, length));
}
std::unique_ptr<RtpDepacketizer> depacketizer_;
};
TEST_F(RtpDepacketizerH264Test, TestSingleNalu) {
uint8_t packet[2] = {0x05, 0xFF}; // F=0, NRI=0, Type=5 (IDR).
H264ParsedPayload payload;
ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
ExpectPacket(&payload, packet, sizeof(packet));
EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type);
EXPECT_EQ(kVideoCodecH264, payload.video_header().codec);
EXPECT_TRUE(payload.video_header().is_first_packet_in_frame);
EXPECT_EQ(kH264SingleNalu, payload.h264().packetization_type);
EXPECT_EQ(kIdr, payload.h264().nalu_type);
}
TEST_F(RtpDepacketizerH264Test, TestSingleNaluSpsWithResolution) {
uint8_t packet[] = {kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50,
0x05, 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0,
0x00, 0x00, 0x03, 0x2A, 0xE0, 0xF1, 0x83, 0x25};
H264ParsedPayload payload;
ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
ExpectPacket(&payload, packet, sizeof(packet));
EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type);
EXPECT_EQ(kVideoCodecH264, payload.video_header().codec);
EXPECT_TRUE(payload.video_header().is_first_packet_in_frame);
EXPECT_EQ(kH264SingleNalu, payload.h264().packetization_type);
EXPECT_EQ(1280u, payload.video_header().width);
EXPECT_EQ(720u, payload.video_header().height);
}
TEST_F(RtpDepacketizerH264Test, TestStapAKey) {
// clang-format off
const NaluInfo kExpectedNalus[] = { {H264::kSps, 0, -1},
{H264::kPps, 1, 2},
{H264::kIdr, -1, 0} };
uint8_t packet[] = {kStapA, // F=0, NRI=0, Type=24.
// Length, nal header, payload.
0, 0x18, kExpectedNalus[0].type,
0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, 0x05, 0xBA,
0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, 0x00, 0x00, 0x03,
0x2A, 0xE0, 0xF1, 0x83, 0x25,
0, 0xD, kExpectedNalus[1].type,
0x69, 0xFC, 0x0, 0x0, 0x3, 0x0, 0x7, 0xFF, 0xFF, 0xFF,
0xF6, 0x40,
0, 0xB, kExpectedNalus[2].type,
0x85, 0xB8, 0x0, 0x4, 0x0, 0x0, 0x13, 0x93, 0x12, 0x0};
// clang-format on
H264ParsedPayload payload;
ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
ExpectPacket(&payload, packet, sizeof(packet));
EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type);
EXPECT_EQ(kVideoCodecH264, payload.video_header().codec);
EXPECT_TRUE(payload.video_header().is_first_packet_in_frame);
const RTPVideoHeaderH264& h264 = payload.h264();
EXPECT_EQ(kH264StapA, h264.packetization_type);
// NALU type for aggregated packets is the type of the first packet only.
EXPECT_EQ(kSps, h264.nalu_type);
ASSERT_EQ(3u, h264.nalus_length);
for (size_t i = 0; i < h264.nalus_length; ++i) {
EXPECT_EQ(kExpectedNalus[i].type, h264.nalus[i].type)
<< "Failed parsing nalu " << i;
EXPECT_EQ(kExpectedNalus[i].sps_id, h264.nalus[i].sps_id)
<< "Failed parsing nalu " << i;
EXPECT_EQ(kExpectedNalus[i].pps_id, h264.nalus[i].pps_id)
<< "Failed parsing nalu " << i;
}
}
TEST_F(RtpDepacketizerH264Test, TestStapANaluSpsWithResolution) {
uint8_t packet[] = {kStapA, // F=0, NRI=0, Type=24.
// Length (2 bytes), nal header, payload.
0x00, 0x19, kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40,
0x50, 0x05, 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0,
0x00, 0x00, 0x03, 0x2A, 0xE0, 0xF1, 0x83, 0x25, 0x80,
0x00, 0x03, kIdr, 0xFF, 0x00, 0x00, 0x04, kIdr, 0xFF,
0x00, 0x11};
H264ParsedPayload payload;
ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
ExpectPacket(&payload, packet, sizeof(packet));
EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type);
EXPECT_EQ(kVideoCodecH264, payload.video_header().codec);
EXPECT_TRUE(payload.video_header().is_first_packet_in_frame);
EXPECT_EQ(kH264StapA, payload.h264().packetization_type);
EXPECT_EQ(1280u, payload.video_header().width);
EXPECT_EQ(720u, payload.video_header().height);
}
TEST_F(RtpDepacketizerH264Test, TestEmptyStapARejected) {
uint8_t lone_empty_packet[] = {kStapA, 0x00, 0x00};
uint8_t leading_empty_packet[] = {kStapA, 0x00, 0x00, 0x00, 0x04,
kIdr, 0xFF, 0x00, 0x11};
uint8_t middle_empty_packet[] = {kStapA, 0x00, 0x03, kIdr, 0xFF, 0x00, 0x00,
0x00, 0x00, 0x04, kIdr, 0xFF, 0x00, 0x11};
uint8_t trailing_empty_packet[] = {kStapA, 0x00, 0x03, kIdr,
0xFF, 0x00, 0x00, 0x00};
H264ParsedPayload payload;
EXPECT_FALSE(depacketizer_->Parse(&payload, lone_empty_packet,
sizeof(lone_empty_packet)));
EXPECT_FALSE(depacketizer_->Parse(&payload, leading_empty_packet,
sizeof(leading_empty_packet)));
EXPECT_FALSE(depacketizer_->Parse(&payload, middle_empty_packet,
sizeof(middle_empty_packet)));
EXPECT_FALSE(depacketizer_->Parse(&payload, trailing_empty_packet,
sizeof(trailing_empty_packet)));
}
TEST_F(RtpDepacketizerH264Test, DepacketizeWithRewriting) {
rtc::Buffer in_buffer;
rtc::Buffer out_buffer;
uint8_t kHeader[2] = {kStapA};
in_buffer.AppendData(kHeader, 1);
out_buffer.AppendData(kHeader, 1);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kOriginalSps));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kOriginalSps);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kRewrittenSps));
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kRewrittenSps);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kIdrOne));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kIdrOne);
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kIdrOne);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kIdrTwo));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kIdrTwo);
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kIdrTwo);
H264ParsedPayload payload;
EXPECT_TRUE(
depacketizer_->Parse(&payload, in_buffer.data(), in_buffer.size()));
std::vector<uint8_t> expected_packet_payload(
out_buffer.data(), &out_buffer.data()[out_buffer.size()]);
EXPECT_THAT(
expected_packet_payload,
::testing::ElementsAreArray(payload.payload, payload.payload_length));
}
TEST_F(RtpDepacketizerH264Test, DepacketizeWithDoubleRewriting) {
rtc::Buffer in_buffer;
rtc::Buffer out_buffer;
uint8_t kHeader[2] = {kStapA};
in_buffer.AppendData(kHeader, 1);
out_buffer.AppendData(kHeader, 1);
// First SPS will be kept...
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kOriginalSps));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kOriginalSps);
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kOriginalSps);
// ...only the second one will be rewritten.
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kOriginalSps));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kOriginalSps);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kRewrittenSps));
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kRewrittenSps);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kIdrOne));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kIdrOne);
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kIdrOne);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kIdrTwo));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kIdrTwo);
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kIdrTwo);
H264ParsedPayload payload;
EXPECT_TRUE(
depacketizer_->Parse(&payload, in_buffer.data(), in_buffer.size()));
std::vector<uint8_t> expected_packet_payload(
out_buffer.data(), &out_buffer.data()[out_buffer.size()]);
EXPECT_THAT(
expected_packet_payload,
::testing::ElementsAreArray(payload.payload, payload.payload_length));
}
TEST_F(RtpDepacketizerH264Test, TestStapADelta) {
uint8_t packet[16] = {kStapA, // F=0, NRI=0, Type=24.
// Length, nal header, payload.
0, 0x02, kSlice, 0xFF, 0, 0x03, kSlice, 0xFF, 0x00, 0,
0x04, kSlice, 0xFF, 0x00, 0x11};
H264ParsedPayload payload;
ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
ExpectPacket(&payload, packet, sizeof(packet));
EXPECT_EQ(VideoFrameType::kVideoFrameDelta,
payload.video_header().frame_type);
EXPECT_EQ(kVideoCodecH264, payload.video_header().codec);
EXPECT_TRUE(payload.video_header().is_first_packet_in_frame);
EXPECT_EQ(kH264StapA, payload.h264().packetization_type);
// NALU type for aggregated packets is the type of the first packet only.
EXPECT_EQ(kSlice, payload.h264().nalu_type);
}
TEST_F(RtpDepacketizerH264Test, TestFuA) {
// clang-format off
uint8_t packet1[] = {
kFuA, // F=0, NRI=0, Type=28.
kSBit | kIdr, // FU header.
0x85, 0xB8, 0x0, 0x4, 0x0, 0x0, 0x13, 0x93, 0x12, 0x0 // Payload.
};
// clang-format on
const uint8_t kExpected1[] = {kIdr, 0x85, 0xB8, 0x0, 0x4, 0x0,
0x0, 0x13, 0x93, 0x12, 0x0};
uint8_t packet2[] = {
kFuA, // F=0, NRI=0, Type=28.
kIdr, // FU header.
0x02 // Payload.
};
const uint8_t kExpected2[] = {0x02};
uint8_t packet3[] = {
kFuA, // F=0, NRI=0, Type=28.
kEBit | kIdr, // FU header.
0x03 // Payload.
};
const uint8_t kExpected3[] = {0x03};
H264ParsedPayload payload;
// We expect that the first packet is one byte shorter since the FU-A header
// has been replaced by the original nal header.
ASSERT_TRUE(depacketizer_->Parse(&payload, packet1, sizeof(packet1)));
ExpectPacket(&payload, kExpected1, sizeof(kExpected1));
EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type);
EXPECT_EQ(kVideoCodecH264, payload.video_header().codec);
EXPECT_TRUE(payload.video_header().is_first_packet_in_frame);
const RTPVideoHeaderH264& h264 = payload.h264();
EXPECT_EQ(kH264FuA, h264.packetization_type);
EXPECT_EQ(kIdr, h264.nalu_type);
ASSERT_EQ(1u, h264.nalus_length);
EXPECT_EQ(static_cast<H264::NaluType>(kIdr), h264.nalus[0].type);
EXPECT_EQ(-1, h264.nalus[0].sps_id);
EXPECT_EQ(0, h264.nalus[0].pps_id);
// Following packets will be 2 bytes shorter since they will only be appended
// onto the first packet.
payload = H264ParsedPayload();
ASSERT_TRUE(depacketizer_->Parse(&payload, packet2, sizeof(packet2)));
ExpectPacket(&payload, kExpected2, sizeof(kExpected2));
EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type);
EXPECT_EQ(kVideoCodecH264, payload.video_header().codec);
EXPECT_FALSE(payload.video_header().is_first_packet_in_frame);
{
const RTPVideoHeaderH264& h264 = payload.h264();
EXPECT_EQ(kH264FuA, h264.packetization_type);
EXPECT_EQ(kIdr, h264.nalu_type);
// NALU info is only expected for the first FU-A packet.
EXPECT_EQ(0u, h264.nalus_length);
}
payload = H264ParsedPayload();
ASSERT_TRUE(depacketizer_->Parse(&payload, packet3, sizeof(packet3)));
ExpectPacket(&payload, kExpected3, sizeof(kExpected3));
EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type);
EXPECT_EQ(kVideoCodecH264, payload.video_header().codec);
EXPECT_FALSE(payload.video_header().is_first_packet_in_frame);
{
const RTPVideoHeaderH264& h264 = payload.h264();
EXPECT_EQ(kH264FuA, h264.packetization_type);
EXPECT_EQ(kIdr, h264.nalu_type);
// NALU info is only expected for the first FU-A packet.
ASSERT_EQ(0u, h264.nalus_length);
}
}
TEST_F(RtpDepacketizerH264Test, TestEmptyPayload) {
// Using a wild pointer to crash on accesses from inside the depacketizer.
uint8_t* garbage_ptr = reinterpret_cast<uint8_t*>(0x4711);
H264ParsedPayload payload;
EXPECT_FALSE(depacketizer_->Parse(&payload, garbage_ptr, 0));
}
TEST_F(RtpDepacketizerH264Test, TestTruncatedFuaNalu) {
const uint8_t kPayload[] = {0x9c};
H264ParsedPayload payload;
EXPECT_FALSE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload)));
}
TEST_F(RtpDepacketizerH264Test, TestTruncatedSingleStapANalu) {
const uint8_t kPayload[] = {0xd8, 0x27};
H264ParsedPayload payload;
EXPECT_FALSE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload)));
}
TEST_F(RtpDepacketizerH264Test, TestStapAPacketWithTruncatedNalUnits) {
const uint8_t kPayload[] = {0x58, 0xCB, 0xED, 0xDF};
H264ParsedPayload payload;
EXPECT_FALSE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload)));
}
TEST_F(RtpDepacketizerH264Test, TestTruncationJustAfterSingleStapANalu) {
const uint8_t kPayload[] = {0x38, 0x27, 0x27};
H264ParsedPayload payload;
EXPECT_FALSE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload)));
}
TEST_F(RtpDepacketizerH264Test, TestShortSpsPacket) {
const uint8_t kPayload[] = {0x27, 0x80, 0x00};
H264ParsedPayload payload;
EXPECT_TRUE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload)));
}
TEST_F(RtpDepacketizerH264Test, TestSeiPacket) {
const uint8_t kPayload[] = {
kSei, // F=0, NRI=0, Type=6.
0x03, 0x03, 0x03, 0x03 // Payload.
};
H264ParsedPayload payload;
ASSERT_TRUE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload)));
const RTPVideoHeaderH264& h264 = payload.h264();
EXPECT_EQ(VideoFrameType::kVideoFrameDelta,
payload.video_header().frame_type);
EXPECT_EQ(kH264SingleNalu, h264.packetization_type);
EXPECT_EQ(kSei, h264.nalu_type);
ASSERT_EQ(1u, h264.nalus_length);
EXPECT_EQ(static_cast<H264::NaluType>(kSei), h264.nalus[0].type);
EXPECT_EQ(-1, h264.nalus[0].sps_id);
EXPECT_EQ(-1, h264.nalus[0].pps_id);
}
} // namespace
} // namespace webrtc