diff --git a/modules/rtp_rtcp/BUILD.gn b/modules/rtp_rtcp/BUILD.gn index 4f082ecf4b..055a361d8a 100644 --- a/modules/rtp_rtcp/BUILD.gn +++ b/modules/rtp_rtcp/BUILD.gn @@ -215,6 +215,8 @@ rtc_library("rtp_rtcp") { "source/video_rtp_depacketizer.h", "source/video_rtp_depacketizer_generic.cc", "source/video_rtp_depacketizer_generic.h", + "source/video_rtp_depacketizer_h264.cc", + "source/video_rtp_depacketizer_h264.h", "source/video_rtp_depacketizer_raw.cc", "source/video_rtp_depacketizer_raw.h", "source/video_rtp_depacketizer_vp8.cc", @@ -486,6 +488,7 @@ if (rtc_include_tests) { "source/ulpfec_header_reader_writer_unittest.cc", "source/ulpfec_receiver_unittest.cc", "source/video_rtp_depacketizer_generic_unittest.cc", + "source/video_rtp_depacketizer_h264_unittest.cc", "source/video_rtp_depacketizer_raw_unittest.cc", "source/video_rtp_depacketizer_vp8_unittest.cc", "source/video_rtp_depacketizer_vp9_unittest.cc", diff --git a/modules/rtp_rtcp/source/create_video_rtp_depacketizer.cc b/modules/rtp_rtcp/source/create_video_rtp_depacketizer.cc index f9d1f2fbf5..af1b50f53b 100644 --- a/modules/rtp_rtcp/source/create_video_rtp_depacketizer.cc +++ b/modules/rtp_rtcp/source/create_video_rtp_depacketizer.cc @@ -15,9 +15,9 @@ #include "absl/memory/memory.h" #include "absl/types/optional.h" #include "modules/rtp_rtcp/source/rtp_depacketizer_av1.h" -#include "modules/rtp_rtcp/source/rtp_format_h264.h" #include "modules/rtp_rtcp/source/video_rtp_depacketizer.h" #include "modules/rtp_rtcp/source/video_rtp_depacketizer_generic.h" +#include "modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h" #include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp8.h" #include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp9.h" #include "rtc_base/checks.h" diff --git a/modules/rtp_rtcp/source/rtp_format_h264.cc b/modules/rtp_rtcp/source/rtp_format_h264.cc index 85f023b564..394d037f7e 100644 --- a/modules/rtp_rtcp/source/rtp_format_h264.cc +++ b/modules/rtp_rtcp/source/rtp_format_h264.cc @@ -38,7 +38,6 @@ namespace { static const size_t kNalHeaderSize = 1; static const size_t kFuAHeaderSize = 2; static const size_t kLengthFieldSize = 2; -static const size_t kStapAHeaderSize = kNalHeaderSize + kLengthFieldSize; // Bit masks for FU (A and B) indicators. enum NalDefs : uint8_t { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F }; @@ -46,29 +45,6 @@ enum NalDefs : uint8_t { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F }; // Bit masks for FU (A and B) headers. enum FuDefs : uint8_t { kSBit = 0x80, kEBit = 0x40, kRBit = 0x20 }; -// TODO(pbos): Avoid parsing this here as well as inside the jitter buffer. -bool ParseStapAStartOffsets(const uint8_t* nalu_ptr, - size_t length_remaining, - std::vector* offsets) { - size_t offset = 0; - while (length_remaining > 0) { - // Buffer doesn't contain room for additional nalu length. - if (length_remaining < sizeof(uint16_t)) - return false; - uint16_t nalu_size = ByteReader::ReadBigEndian(nalu_ptr); - nalu_ptr += sizeof(uint16_t); - length_remaining -= sizeof(uint16_t); - if (nalu_size > length_remaining) - return false; - nalu_ptr += nalu_size; - length_remaining -= nalu_size; - - offsets->push_back(offset + kStapAHeaderSize); - offset += kLengthFieldSize + nalu_size; - } - return true; -} - } // namespace RtpPacketizerH264::RtpPacketizerH264( @@ -342,263 +318,4 @@ void RtpPacketizerH264::NextFragmentPacket(RtpPacketToSend* rtp_packet) { packets_.pop(); } -RtpDepacketizerH264::RtpDepacketizerH264() : offset_(0), length_(0) {} -RtpDepacketizerH264::~RtpDepacketizerH264() {} - -bool RtpDepacketizerH264::Parse(ParsedPayload* parsed_payload, - const uint8_t* payload_data, - size_t payload_data_length) { - RTC_CHECK(parsed_payload != nullptr); - if (payload_data_length == 0) { - RTC_LOG(LS_ERROR) << "Empty payload."; - return false; - } - - offset_ = 0; - length_ = payload_data_length; - modified_buffer_.reset(); - - uint8_t nal_type = payload_data[0] & kTypeMask; - parsed_payload->video_header() - .video_type_header.emplace(); - if (nal_type == H264::NaluType::kFuA) { - // Fragmented NAL units (FU-A). - if (!ParseFuaNalu(parsed_payload, payload_data)) - return false; - } else { - // We handle STAP-A and single NALU's the same way here. The jitter buffer - // will depacketize the STAP-A into NAL units later. - // TODO(sprang): Parse STAP-A offsets here and store in fragmentation vec. - if (!ProcessStapAOrSingleNalu(parsed_payload, payload_data)) - return false; - } - - const uint8_t* payload = - modified_buffer_ ? modified_buffer_->data() : payload_data; - - parsed_payload->payload = payload + offset_; - parsed_payload->payload_length = length_; - return true; -} - -bool RtpDepacketizerH264::ProcessStapAOrSingleNalu( - ParsedPayload* parsed_payload, - const uint8_t* payload_data) { - parsed_payload->video_header().width = 0; - parsed_payload->video_header().height = 0; - parsed_payload->video_header().codec = kVideoCodecH264; - parsed_payload->video_header().simulcastIdx = 0; - parsed_payload->video_header().is_first_packet_in_frame = true; - auto& h264_header = absl::get( - parsed_payload->video_header().video_type_header); - - const uint8_t* nalu_start = payload_data + kNalHeaderSize; - const size_t nalu_length = length_ - kNalHeaderSize; - uint8_t nal_type = payload_data[0] & kTypeMask; - std::vector nalu_start_offsets; - if (nal_type == H264::NaluType::kStapA) { - // Skip the StapA header (StapA NAL type + length). - if (length_ <= kStapAHeaderSize) { - RTC_LOG(LS_ERROR) << "StapA header truncated."; - return false; - } - - if (!ParseStapAStartOffsets(nalu_start, nalu_length, &nalu_start_offsets)) { - RTC_LOG(LS_ERROR) << "StapA packet with incorrect NALU packet lengths."; - return false; - } - - h264_header.packetization_type = kH264StapA; - nal_type = payload_data[kStapAHeaderSize] & kTypeMask; - } else { - h264_header.packetization_type = kH264SingleNalu; - nalu_start_offsets.push_back(0); - } - h264_header.nalu_type = nal_type; - parsed_payload->video_header().frame_type = VideoFrameType::kVideoFrameDelta; - - nalu_start_offsets.push_back(length_ + kLengthFieldSize); // End offset. - for (size_t i = 0; i < nalu_start_offsets.size() - 1; ++i) { - size_t start_offset = nalu_start_offsets[i]; - // End offset is actually start offset for next unit, excluding length field - // so remove that from this units length. - size_t end_offset = nalu_start_offsets[i + 1] - kLengthFieldSize; - if (end_offset - start_offset < H264::kNaluTypeSize) { - RTC_LOG(LS_ERROR) << "STAP-A packet too short"; - return false; - } - - NaluInfo nalu; - nalu.type = payload_data[start_offset] & kTypeMask; - nalu.sps_id = -1; - nalu.pps_id = -1; - start_offset += H264::kNaluTypeSize; - - switch (nalu.type) { - case H264::NaluType::kSps: { - // Check if VUI is present in SPS and if it needs to be modified to - // avoid - // excessive decoder latency. - - // Copy any previous data first (likely just the first header). - std::unique_ptr output_buffer(new rtc::Buffer()); - if (start_offset) - output_buffer->AppendData(payload_data, start_offset); - - absl::optional sps; - - SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps( - &payload_data[start_offset], end_offset - start_offset, &sps, - nullptr, output_buffer.get(), SpsVuiRewriter::Direction::kIncoming); - - if (result == SpsVuiRewriter::ParseResult::kVuiRewritten) { - if (modified_buffer_) { - RTC_LOG(LS_WARNING) - << "More than one H264 SPS NAL units needing " - "rewriting found within a single STAP-A packet. " - "Keeping the first and rewriting the last."; - } - - // Rewrite length field to new SPS size. - if (h264_header.packetization_type == kH264StapA) { - size_t length_field_offset = - start_offset - (H264::kNaluTypeSize + kLengthFieldSize); - // Stap-A Length includes payload data and type header. - size_t rewritten_size = - output_buffer->size() - start_offset + H264::kNaluTypeSize; - ByteWriter::WriteBigEndian( - &(*output_buffer)[length_field_offset], rewritten_size); - } - - // Append rest of packet. - output_buffer->AppendData(&payload_data[end_offset], - nalu_length + kNalHeaderSize - end_offset); - - modified_buffer_ = std::move(output_buffer); - length_ = modified_buffer_->size(); - } - - if (sps) { - parsed_payload->video_header().width = sps->width; - parsed_payload->video_header().height = sps->height; - nalu.sps_id = sps->id; - } else { - RTC_LOG(LS_WARNING) << "Failed to parse SPS id from SPS slice."; - } - parsed_payload->video_header().frame_type = - VideoFrameType::kVideoFrameKey; - break; - } - case H264::NaluType::kPps: { - uint32_t pps_id; - uint32_t sps_id; - if (PpsParser::ParsePpsIds(&payload_data[start_offset], - end_offset - start_offset, &pps_id, - &sps_id)) { - nalu.pps_id = pps_id; - nalu.sps_id = sps_id; - } else { - RTC_LOG(LS_WARNING) - << "Failed to parse PPS id and SPS id from PPS slice."; - } - break; - } - case H264::NaluType::kIdr: - parsed_payload->video_header().frame_type = - VideoFrameType::kVideoFrameKey; - RTC_FALLTHROUGH(); - case H264::NaluType::kSlice: { - absl::optional pps_id = PpsParser::ParsePpsIdFromSlice( - &payload_data[start_offset], end_offset - start_offset); - if (pps_id) { - nalu.pps_id = *pps_id; - } else { - RTC_LOG(LS_WARNING) << "Failed to parse PPS id from slice of type: " - << static_cast(nalu.type); - } - break; - } - // Slices below don't contain SPS or PPS ids. - case H264::NaluType::kAud: - case H264::NaluType::kEndOfSequence: - case H264::NaluType::kEndOfStream: - case H264::NaluType::kFiller: - case H264::NaluType::kSei: - break; - case H264::NaluType::kStapA: - case H264::NaluType::kFuA: - RTC_LOG(LS_WARNING) << "Unexpected STAP-A or FU-A received."; - return false; - } - - if (h264_header.nalus_length == kMaxNalusPerPacket) { - RTC_LOG(LS_WARNING) - << "Received packet containing more than " << kMaxNalusPerPacket - << " NAL units. Will not keep track sps and pps ids for all of them."; - } else { - h264_header.nalus[h264_header.nalus_length++] = nalu; - } - } - - return true; -} - -bool RtpDepacketizerH264::ParseFuaNalu( - RtpDepacketizer::ParsedPayload* parsed_payload, - const uint8_t* payload_data) { - if (length_ < kFuAHeaderSize) { - RTC_LOG(LS_ERROR) << "FU-A NAL units truncated."; - return false; - } - uint8_t fnri = payload_data[0] & (kFBit | kNriMask); - uint8_t original_nal_type = payload_data[1] & kTypeMask; - bool first_fragment = (payload_data[1] & kSBit) > 0; - NaluInfo nalu; - nalu.type = original_nal_type; - nalu.sps_id = -1; - nalu.pps_id = -1; - if (first_fragment) { - offset_ = 0; - length_ -= kNalHeaderSize; - absl::optional pps_id = PpsParser::ParsePpsIdFromSlice( - payload_data + 2 * kNalHeaderSize, length_ - kNalHeaderSize); - if (pps_id) { - nalu.pps_id = *pps_id; - } else { - RTC_LOG(LS_WARNING) - << "Failed to parse PPS from first fragment of FU-A NAL " - "unit with original type: " - << static_cast(nalu.type); - } - uint8_t original_nal_header = fnri | original_nal_type; - modified_buffer_.reset(new rtc::Buffer()); - modified_buffer_->AppendData(payload_data + kNalHeaderSize, length_); - (*modified_buffer_)[0] = original_nal_header; - } else { - offset_ = kFuAHeaderSize; - length_ -= kFuAHeaderSize; - } - - if (original_nal_type == H264::NaluType::kIdr) { - parsed_payload->video_header().frame_type = VideoFrameType::kVideoFrameKey; - } else { - parsed_payload->video_header().frame_type = - VideoFrameType::kVideoFrameDelta; - } - parsed_payload->video_header().width = 0; - parsed_payload->video_header().height = 0; - parsed_payload->video_header().codec = kVideoCodecH264; - parsed_payload->video_header().simulcastIdx = 0; - parsed_payload->video_header().is_first_packet_in_frame = first_fragment; - auto& h264_header = absl::get( - parsed_payload->video_header().video_type_header); - h264_header.packetization_type = kH264FuA; - h264_header.nalu_type = original_nal_type; - if (first_fragment) { - h264_header.nalus[h264_header.nalus_length] = nalu; - h264_header.nalus_length = 1; - } - return true; -} - } // namespace webrtc diff --git a/modules/rtp_rtcp/source/rtp_format_h264.h b/modules/rtp_rtcp/source/rtp_format_h264.h index 29d4aa0b73..fdea9a7607 100644 --- a/modules/rtp_rtcp/source/rtp_format_h264.h +++ b/modules/rtp_rtcp/source/rtp_format_h264.h @@ -22,6 +22,7 @@ #include "modules/include/module_common_types.h" #include "modules/rtp_rtcp/source/rtp_format.h" #include "modules/rtp_rtcp/source/rtp_packet_to_send.h" +#include "modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h" #include "modules/video_coding/codecs/h264/include/h264_globals.h" #include "rtc_base/buffer.h" #include "rtc_base/constructor_magic.h" @@ -87,26 +88,5 @@ class RtpPacketizerH264 : public RtpPacketizer { RTC_DISALLOW_COPY_AND_ASSIGN(RtpPacketizerH264); }; - -// Depacketizer for H264. -class RtpDepacketizerH264 : public RtpDepacketizer { - public: - RtpDepacketizerH264(); - ~RtpDepacketizerH264() override; - - bool Parse(ParsedPayload* parsed_payload, - const uint8_t* payload_data, - size_t payload_data_length) override; - - private: - bool ParseFuaNalu(RtpDepacketizer::ParsedPayload* parsed_payload, - const uint8_t* payload_data); - bool ProcessStapAOrSingleNalu(RtpDepacketizer::ParsedPayload* parsed_payload, - const uint8_t* payload_data); - - size_t offset_; - size_t length_; - std::unique_ptr modified_buffer_; -}; } // namespace webrtc #endif // MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H264_H_ diff --git a/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc b/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc index f5b496a2a1..bf9771ab9f 100644 --- a/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc +++ b/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc @@ -497,382 +497,5 @@ TEST(RtpPacketizerH264Test, RejectsOverlongDataInPacketizationMode0) { EXPECT_THAT(packets, IsEmpty()); } - -const uint8_t kOriginalSps[] = {kSps, 0x00, 0x00, 0x03, 0x03, - 0xF4, 0x05, 0x03, 0xC7, 0xC0}; -const uint8_t kRewrittenSps[] = {kSps, 0x00, 0x00, 0x03, 0x03, 0xF4, 0x05, 0x03, - 0xC7, 0xE0, 0x1B, 0x41, 0x10, 0x8D, 0x00}; -const uint8_t kIdrOne[] = {kIdr, 0xFF, 0x00, 0x00, 0x04}; -const uint8_t kIdrTwo[] = {kIdr, 0xFF, 0x00, 0x11}; - -struct H264ParsedPayload : public RtpDepacketizer::ParsedPayload { - RTPVideoHeaderH264& h264() { - return absl::get(video.video_type_header); - } -}; - -class RtpDepacketizerH264Test : public ::testing::Test { - protected: - RtpDepacketizerH264Test() - : depacketizer_(std::make_unique()) {} - - void ExpectPacket(H264ParsedPayload* parsed_payload, - const uint8_t* data, - size_t length) { - ASSERT_TRUE(parsed_payload != NULL); - EXPECT_THAT(std::vector( - parsed_payload->payload, - parsed_payload->payload + parsed_payload->payload_length), - ::testing::ElementsAreArray(data, length)); - } - - std::unique_ptr depacketizer_; -}; - -TEST_F(RtpDepacketizerH264Test, TestSingleNalu) { - uint8_t packet[2] = {0x05, 0xFF}; // F=0, NRI=0, Type=5 (IDR). - H264ParsedPayload payload; - - ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet))); - ExpectPacket(&payload, packet, sizeof(packet)); - EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type); - EXPECT_EQ(kVideoCodecH264, payload.video_header().codec); - EXPECT_TRUE(payload.video_header().is_first_packet_in_frame); - EXPECT_EQ(kH264SingleNalu, payload.h264().packetization_type); - EXPECT_EQ(kIdr, payload.h264().nalu_type); -} - -TEST_F(RtpDepacketizerH264Test, TestSingleNaluSpsWithResolution) { - uint8_t packet[] = {kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, - 0x05, 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, - 0x00, 0x00, 0x03, 0x2A, 0xE0, 0xF1, 0x83, 0x25}; - H264ParsedPayload payload; - - ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet))); - ExpectPacket(&payload, packet, sizeof(packet)); - EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type); - EXPECT_EQ(kVideoCodecH264, payload.video_header().codec); - EXPECT_TRUE(payload.video_header().is_first_packet_in_frame); - EXPECT_EQ(kH264SingleNalu, payload.h264().packetization_type); - EXPECT_EQ(1280u, payload.video_header().width); - EXPECT_EQ(720u, payload.video_header().height); -} - -TEST_F(RtpDepacketizerH264Test, TestStapAKey) { - // clang-format off - const NaluInfo kExpectedNalus[] = { {H264::kSps, 0, -1}, - {H264::kPps, 1, 2}, - {H264::kIdr, -1, 0} }; - uint8_t packet[] = {kStapA, // F=0, NRI=0, Type=24. - // Length, nal header, payload. - 0, 0x18, kExpectedNalus[0].type, - 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, 0x05, 0xBA, - 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, 0x00, 0x00, 0x03, - 0x2A, 0xE0, 0xF1, 0x83, 0x25, - 0, 0xD, kExpectedNalus[1].type, - 0x69, 0xFC, 0x0, 0x0, 0x3, 0x0, 0x7, 0xFF, 0xFF, 0xFF, - 0xF6, 0x40, - 0, 0xB, kExpectedNalus[2].type, - 0x85, 0xB8, 0x0, 0x4, 0x0, 0x0, 0x13, 0x93, 0x12, 0x0}; - // clang-format on - - H264ParsedPayload payload; - ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet))); - ExpectPacket(&payload, packet, sizeof(packet)); - EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type); - EXPECT_EQ(kVideoCodecH264, payload.video_header().codec); - EXPECT_TRUE(payload.video_header().is_first_packet_in_frame); - const RTPVideoHeaderH264& h264 = payload.h264(); - EXPECT_EQ(kH264StapA, h264.packetization_type); - // NALU type for aggregated packets is the type of the first packet only. - EXPECT_EQ(kSps, h264.nalu_type); - ASSERT_EQ(3u, h264.nalus_length); - for (size_t i = 0; i < h264.nalus_length; ++i) { - EXPECT_EQ(kExpectedNalus[i].type, h264.nalus[i].type) - << "Failed parsing nalu " << i; - EXPECT_EQ(kExpectedNalus[i].sps_id, h264.nalus[i].sps_id) - << "Failed parsing nalu " << i; - EXPECT_EQ(kExpectedNalus[i].pps_id, h264.nalus[i].pps_id) - << "Failed parsing nalu " << i; - } -} - -TEST_F(RtpDepacketizerH264Test, TestStapANaluSpsWithResolution) { - uint8_t packet[] = {kStapA, // F=0, NRI=0, Type=24. - // Length (2 bytes), nal header, payload. - 0x00, 0x19, kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, - 0x50, 0x05, 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, - 0x00, 0x00, 0x03, 0x2A, 0xE0, 0xF1, 0x83, 0x25, 0x80, - 0x00, 0x03, kIdr, 0xFF, 0x00, 0x00, 0x04, kIdr, 0xFF, - 0x00, 0x11}; - - H264ParsedPayload payload; - - ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet))); - ExpectPacket(&payload, packet, sizeof(packet)); - EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type); - EXPECT_EQ(kVideoCodecH264, payload.video_header().codec); - EXPECT_TRUE(payload.video_header().is_first_packet_in_frame); - EXPECT_EQ(kH264StapA, payload.h264().packetization_type); - EXPECT_EQ(1280u, payload.video_header().width); - EXPECT_EQ(720u, payload.video_header().height); -} - -TEST_F(RtpDepacketizerH264Test, TestEmptyStapARejected) { - uint8_t lone_empty_packet[] = {kStapA, 0x00, 0x00}; - - uint8_t leading_empty_packet[] = {kStapA, 0x00, 0x00, 0x00, 0x04, - kIdr, 0xFF, 0x00, 0x11}; - - uint8_t middle_empty_packet[] = {kStapA, 0x00, 0x03, kIdr, 0xFF, 0x00, 0x00, - 0x00, 0x00, 0x04, kIdr, 0xFF, 0x00, 0x11}; - - uint8_t trailing_empty_packet[] = {kStapA, 0x00, 0x03, kIdr, - 0xFF, 0x00, 0x00, 0x00}; - - H264ParsedPayload payload; - - EXPECT_FALSE(depacketizer_->Parse(&payload, lone_empty_packet, - sizeof(lone_empty_packet))); - EXPECT_FALSE(depacketizer_->Parse(&payload, leading_empty_packet, - sizeof(leading_empty_packet))); - EXPECT_FALSE(depacketizer_->Parse(&payload, middle_empty_packet, - sizeof(middle_empty_packet))); - EXPECT_FALSE(depacketizer_->Parse(&payload, trailing_empty_packet, - sizeof(trailing_empty_packet))); -} - -TEST_F(RtpDepacketizerH264Test, DepacketizeWithRewriting) { - rtc::Buffer in_buffer; - rtc::Buffer out_buffer; - - uint8_t kHeader[2] = {kStapA}; - in_buffer.AppendData(kHeader, 1); - out_buffer.AppendData(kHeader, 1); - - ByteWriter::WriteBigEndian(kHeader, sizeof(kOriginalSps)); - in_buffer.AppendData(kHeader, 2); - in_buffer.AppendData(kOriginalSps); - ByteWriter::WriteBigEndian(kHeader, sizeof(kRewrittenSps)); - out_buffer.AppendData(kHeader, 2); - out_buffer.AppendData(kRewrittenSps); - - ByteWriter::WriteBigEndian(kHeader, sizeof(kIdrOne)); - in_buffer.AppendData(kHeader, 2); - in_buffer.AppendData(kIdrOne); - out_buffer.AppendData(kHeader, 2); - out_buffer.AppendData(kIdrOne); - - ByteWriter::WriteBigEndian(kHeader, sizeof(kIdrTwo)); - in_buffer.AppendData(kHeader, 2); - in_buffer.AppendData(kIdrTwo); - out_buffer.AppendData(kHeader, 2); - out_buffer.AppendData(kIdrTwo); - - H264ParsedPayload payload; - EXPECT_TRUE( - depacketizer_->Parse(&payload, in_buffer.data(), in_buffer.size())); - - std::vector expected_packet_payload( - out_buffer.data(), &out_buffer.data()[out_buffer.size()]); - - EXPECT_THAT( - expected_packet_payload, - ::testing::ElementsAreArray(payload.payload, payload.payload_length)); -} - -TEST_F(RtpDepacketizerH264Test, DepacketizeWithDoubleRewriting) { - rtc::Buffer in_buffer; - rtc::Buffer out_buffer; - - uint8_t kHeader[2] = {kStapA}; - in_buffer.AppendData(kHeader, 1); - out_buffer.AppendData(kHeader, 1); - - // First SPS will be kept... - ByteWriter::WriteBigEndian(kHeader, sizeof(kOriginalSps)); - in_buffer.AppendData(kHeader, 2); - in_buffer.AppendData(kOriginalSps); - out_buffer.AppendData(kHeader, 2); - out_buffer.AppendData(kOriginalSps); - - // ...only the second one will be rewritten. - ByteWriter::WriteBigEndian(kHeader, sizeof(kOriginalSps)); - in_buffer.AppendData(kHeader, 2); - in_buffer.AppendData(kOriginalSps); - ByteWriter::WriteBigEndian(kHeader, sizeof(kRewrittenSps)); - out_buffer.AppendData(kHeader, 2); - out_buffer.AppendData(kRewrittenSps); - - ByteWriter::WriteBigEndian(kHeader, sizeof(kIdrOne)); - in_buffer.AppendData(kHeader, 2); - in_buffer.AppendData(kIdrOne); - out_buffer.AppendData(kHeader, 2); - out_buffer.AppendData(kIdrOne); - - ByteWriter::WriteBigEndian(kHeader, sizeof(kIdrTwo)); - in_buffer.AppendData(kHeader, 2); - in_buffer.AppendData(kIdrTwo); - out_buffer.AppendData(kHeader, 2); - out_buffer.AppendData(kIdrTwo); - - H264ParsedPayload payload; - EXPECT_TRUE( - depacketizer_->Parse(&payload, in_buffer.data(), in_buffer.size())); - - std::vector expected_packet_payload( - out_buffer.data(), &out_buffer.data()[out_buffer.size()]); - - EXPECT_THAT( - expected_packet_payload, - ::testing::ElementsAreArray(payload.payload, payload.payload_length)); -} - -TEST_F(RtpDepacketizerH264Test, TestStapADelta) { - uint8_t packet[16] = {kStapA, // F=0, NRI=0, Type=24. - // Length, nal header, payload. - 0, 0x02, kSlice, 0xFF, 0, 0x03, kSlice, 0xFF, 0x00, 0, - 0x04, kSlice, 0xFF, 0x00, 0x11}; - H264ParsedPayload payload; - - ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet))); - ExpectPacket(&payload, packet, sizeof(packet)); - EXPECT_EQ(VideoFrameType::kVideoFrameDelta, - payload.video_header().frame_type); - EXPECT_EQ(kVideoCodecH264, payload.video_header().codec); - EXPECT_TRUE(payload.video_header().is_first_packet_in_frame); - EXPECT_EQ(kH264StapA, payload.h264().packetization_type); - // NALU type for aggregated packets is the type of the first packet only. - EXPECT_EQ(kSlice, payload.h264().nalu_type); -} - -TEST_F(RtpDepacketizerH264Test, TestFuA) { - // clang-format off - uint8_t packet1[] = { - kFuA, // F=0, NRI=0, Type=28. - kSBit | kIdr, // FU header. - 0x85, 0xB8, 0x0, 0x4, 0x0, 0x0, 0x13, 0x93, 0x12, 0x0 // Payload. - }; - // clang-format on - const uint8_t kExpected1[] = {kIdr, 0x85, 0xB8, 0x0, 0x4, 0x0, - 0x0, 0x13, 0x93, 0x12, 0x0}; - - uint8_t packet2[] = { - kFuA, // F=0, NRI=0, Type=28. - kIdr, // FU header. - 0x02 // Payload. - }; - const uint8_t kExpected2[] = {0x02}; - - uint8_t packet3[] = { - kFuA, // F=0, NRI=0, Type=28. - kEBit | kIdr, // FU header. - 0x03 // Payload. - }; - const uint8_t kExpected3[] = {0x03}; - - H264ParsedPayload payload; - - // We expect that the first packet is one byte shorter since the FU-A header - // has been replaced by the original nal header. - ASSERT_TRUE(depacketizer_->Parse(&payload, packet1, sizeof(packet1))); - ExpectPacket(&payload, kExpected1, sizeof(kExpected1)); - EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type); - EXPECT_EQ(kVideoCodecH264, payload.video_header().codec); - EXPECT_TRUE(payload.video_header().is_first_packet_in_frame); - const RTPVideoHeaderH264& h264 = payload.h264(); - EXPECT_EQ(kH264FuA, h264.packetization_type); - EXPECT_EQ(kIdr, h264.nalu_type); - ASSERT_EQ(1u, h264.nalus_length); - EXPECT_EQ(static_cast(kIdr), h264.nalus[0].type); - EXPECT_EQ(-1, h264.nalus[0].sps_id); - EXPECT_EQ(0, h264.nalus[0].pps_id); - - // Following packets will be 2 bytes shorter since they will only be appended - // onto the first packet. - payload = H264ParsedPayload(); - ASSERT_TRUE(depacketizer_->Parse(&payload, packet2, sizeof(packet2))); - ExpectPacket(&payload, kExpected2, sizeof(kExpected2)); - EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type); - EXPECT_EQ(kVideoCodecH264, payload.video_header().codec); - EXPECT_FALSE(payload.video_header().is_first_packet_in_frame); - { - const RTPVideoHeaderH264& h264 = payload.h264(); - EXPECT_EQ(kH264FuA, h264.packetization_type); - EXPECT_EQ(kIdr, h264.nalu_type); - // NALU info is only expected for the first FU-A packet. - EXPECT_EQ(0u, h264.nalus_length); - } - - payload = H264ParsedPayload(); - ASSERT_TRUE(depacketizer_->Parse(&payload, packet3, sizeof(packet3))); - ExpectPacket(&payload, kExpected3, sizeof(kExpected3)); - EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type); - EXPECT_EQ(kVideoCodecH264, payload.video_header().codec); - EXPECT_FALSE(payload.video_header().is_first_packet_in_frame); - { - const RTPVideoHeaderH264& h264 = payload.h264(); - EXPECT_EQ(kH264FuA, h264.packetization_type); - EXPECT_EQ(kIdr, h264.nalu_type); - // NALU info is only expected for the first FU-A packet. - ASSERT_EQ(0u, h264.nalus_length); - } -} - -TEST_F(RtpDepacketizerH264Test, TestEmptyPayload) { - // Using a wild pointer to crash on accesses from inside the depacketizer. - uint8_t* garbage_ptr = reinterpret_cast(0x4711); - H264ParsedPayload payload; - EXPECT_FALSE(depacketizer_->Parse(&payload, garbage_ptr, 0)); -} - -TEST_F(RtpDepacketizerH264Test, TestTruncatedFuaNalu) { - const uint8_t kPayload[] = {0x9c}; - H264ParsedPayload payload; - EXPECT_FALSE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload))); -} - -TEST_F(RtpDepacketizerH264Test, TestTruncatedSingleStapANalu) { - const uint8_t kPayload[] = {0xd8, 0x27}; - H264ParsedPayload payload; - EXPECT_FALSE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload))); -} - -TEST_F(RtpDepacketizerH264Test, TestStapAPacketWithTruncatedNalUnits) { - const uint8_t kPayload[] = {0x58, 0xCB, 0xED, 0xDF}; - H264ParsedPayload payload; - EXPECT_FALSE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload))); -} - -TEST_F(RtpDepacketizerH264Test, TestTruncationJustAfterSingleStapANalu) { - const uint8_t kPayload[] = {0x38, 0x27, 0x27}; - H264ParsedPayload payload; - EXPECT_FALSE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload))); -} - -TEST_F(RtpDepacketizerH264Test, TestShortSpsPacket) { - const uint8_t kPayload[] = {0x27, 0x80, 0x00}; - H264ParsedPayload payload; - EXPECT_TRUE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload))); -} - -TEST_F(RtpDepacketizerH264Test, TestSeiPacket) { - const uint8_t kPayload[] = { - kSei, // F=0, NRI=0, Type=6. - 0x03, 0x03, 0x03, 0x03 // Payload. - }; - H264ParsedPayload payload; - ASSERT_TRUE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload))); - const RTPVideoHeaderH264& h264 = payload.h264(); - EXPECT_EQ(VideoFrameType::kVideoFrameDelta, - payload.video_header().frame_type); - EXPECT_EQ(kH264SingleNalu, h264.packetization_type); - EXPECT_EQ(kSei, h264.nalu_type); - ASSERT_EQ(1u, h264.nalus_length); - EXPECT_EQ(static_cast(kSei), h264.nalus[0].type); - EXPECT_EQ(-1, h264.nalus[0].sps_id); - EXPECT_EQ(-1, h264.nalus[0].pps_id); -} - } // namespace } // namespace webrtc diff --git a/modules/rtp_rtcp/source/video_rtp_depacketizer_h264.cc b/modules/rtp_rtcp/source/video_rtp_depacketizer_h264.cc new file mode 100644 index 0000000000..9babc67548 --- /dev/null +++ b/modules/rtp_rtcp/source/video_rtp_depacketizer_h264.cc @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h" + +#include + +#include +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "absl/types/variant.h" +#include "common_video/h264/h264_common.h" +#include "common_video/h264/pps_parser.h" +#include "common_video/h264/sps_parser.h" +#include "common_video/h264/sps_vui_rewriter.h" +#include "modules/rtp_rtcp/source/byte_io.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/system/fallthrough.h" + +namespace webrtc { +namespace { + +static const size_t kNalHeaderSize = 1; +static const size_t kFuAHeaderSize = 2; +static const size_t kLengthFieldSize = 2; +static const size_t kStapAHeaderSize = kNalHeaderSize + kLengthFieldSize; + +// Bit masks for FU (A and B) indicators. +enum NalDefs : uint8_t { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F }; + +// Bit masks for FU (A and B) headers. +enum FuDefs : uint8_t { kSBit = 0x80, kEBit = 0x40, kRBit = 0x20 }; + +// TODO(pbos): Avoid parsing this here as well as inside the jitter buffer. +bool ParseStapAStartOffsets(const uint8_t* nalu_ptr, + size_t length_remaining, + std::vector* offsets) { + size_t offset = 0; + while (length_remaining > 0) { + // Buffer doesn't contain room for additional nalu length. + if (length_remaining < sizeof(uint16_t)) + return false; + uint16_t nalu_size = ByteReader::ReadBigEndian(nalu_ptr); + nalu_ptr += sizeof(uint16_t); + length_remaining -= sizeof(uint16_t); + if (nalu_size > length_remaining) + return false; + nalu_ptr += nalu_size; + length_remaining -= nalu_size; + + offsets->push_back(offset + kStapAHeaderSize); + offset += kLengthFieldSize + nalu_size; + } + return true; +} + +} // namespace + +RtpDepacketizerH264::RtpDepacketizerH264() : offset_(0), length_(0) {} +RtpDepacketizerH264::~RtpDepacketizerH264() {} + +bool RtpDepacketizerH264::ProcessStapAOrSingleNalu( + ParsedPayload* parsed_payload, + const uint8_t* payload_data) { + parsed_payload->video_header().width = 0; + parsed_payload->video_header().height = 0; + parsed_payload->video_header().codec = kVideoCodecH264; + parsed_payload->video_header().simulcastIdx = 0; + parsed_payload->video_header().is_first_packet_in_frame = true; + auto& h264_header = absl::get( + parsed_payload->video_header().video_type_header); + + const uint8_t* nalu_start = payload_data + kNalHeaderSize; + const size_t nalu_length = length_ - kNalHeaderSize; + uint8_t nal_type = payload_data[0] & kTypeMask; + std::vector nalu_start_offsets; + if (nal_type == H264::NaluType::kStapA) { + // Skip the StapA header (StapA NAL type + length). + if (length_ <= kStapAHeaderSize) { + RTC_LOG(LS_ERROR) << "StapA header truncated."; + return false; + } + + if (!ParseStapAStartOffsets(nalu_start, nalu_length, &nalu_start_offsets)) { + RTC_LOG(LS_ERROR) << "StapA packet with incorrect NALU packet lengths."; + return false; + } + + h264_header.packetization_type = kH264StapA; + nal_type = payload_data[kStapAHeaderSize] & kTypeMask; + } else { + h264_header.packetization_type = kH264SingleNalu; + nalu_start_offsets.push_back(0); + } + h264_header.nalu_type = nal_type; + parsed_payload->video_header().frame_type = VideoFrameType::kVideoFrameDelta; + + nalu_start_offsets.push_back(length_ + kLengthFieldSize); // End offset. + for (size_t i = 0; i < nalu_start_offsets.size() - 1; ++i) { + size_t start_offset = nalu_start_offsets[i]; + // End offset is actually start offset for next unit, excluding length field + // so remove that from this units length. + size_t end_offset = nalu_start_offsets[i + 1] - kLengthFieldSize; + if (end_offset - start_offset < H264::kNaluTypeSize) { + RTC_LOG(LS_ERROR) << "STAP-A packet too short"; + return false; + } + + NaluInfo nalu; + nalu.type = payload_data[start_offset] & kTypeMask; + nalu.sps_id = -1; + nalu.pps_id = -1; + start_offset += H264::kNaluTypeSize; + + switch (nalu.type) { + case H264::NaluType::kSps: { + // Check if VUI is present in SPS and if it needs to be modified to + // avoid + // excessive decoder latency. + + // Copy any previous data first (likely just the first header). + std::unique_ptr output_buffer(new rtc::Buffer()); + if (start_offset) + output_buffer->AppendData(payload_data, start_offset); + + absl::optional sps; + + SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps( + &payload_data[start_offset], end_offset - start_offset, &sps, + nullptr, output_buffer.get(), SpsVuiRewriter::Direction::kIncoming); + + if (result == SpsVuiRewriter::ParseResult::kVuiRewritten) { + if (modified_buffer_) { + RTC_LOG(LS_WARNING) + << "More than one H264 SPS NAL units needing " + "rewriting found within a single STAP-A packet. " + "Keeping the first and rewriting the last."; + } + + // Rewrite length field to new SPS size. + if (h264_header.packetization_type == kH264StapA) { + size_t length_field_offset = + start_offset - (H264::kNaluTypeSize + kLengthFieldSize); + // Stap-A Length includes payload data and type header. + size_t rewritten_size = + output_buffer->size() - start_offset + H264::kNaluTypeSize; + ByteWriter::WriteBigEndian( + &(*output_buffer)[length_field_offset], rewritten_size); + } + + // Append rest of packet. + output_buffer->AppendData(&payload_data[end_offset], + nalu_length + kNalHeaderSize - end_offset); + + modified_buffer_ = std::move(output_buffer); + length_ = modified_buffer_->size(); + } + + if (sps) { + parsed_payload->video_header().width = sps->width; + parsed_payload->video_header().height = sps->height; + nalu.sps_id = sps->id; + } else { + RTC_LOG(LS_WARNING) << "Failed to parse SPS id from SPS slice."; + } + parsed_payload->video_header().frame_type = + VideoFrameType::kVideoFrameKey; + break; + } + case H264::NaluType::kPps: { + uint32_t pps_id; + uint32_t sps_id; + if (PpsParser::ParsePpsIds(&payload_data[start_offset], + end_offset - start_offset, &pps_id, + &sps_id)) { + nalu.pps_id = pps_id; + nalu.sps_id = sps_id; + } else { + RTC_LOG(LS_WARNING) + << "Failed to parse PPS id and SPS id from PPS slice."; + } + break; + } + case H264::NaluType::kIdr: + parsed_payload->video_header().frame_type = + VideoFrameType::kVideoFrameKey; + RTC_FALLTHROUGH(); + case H264::NaluType::kSlice: { + absl::optional pps_id = PpsParser::ParsePpsIdFromSlice( + &payload_data[start_offset], end_offset - start_offset); + if (pps_id) { + nalu.pps_id = *pps_id; + } else { + RTC_LOG(LS_WARNING) << "Failed to parse PPS id from slice of type: " + << static_cast(nalu.type); + } + break; + } + // Slices below don't contain SPS or PPS ids. + case H264::NaluType::kAud: + case H264::NaluType::kEndOfSequence: + case H264::NaluType::kEndOfStream: + case H264::NaluType::kFiller: + case H264::NaluType::kSei: + break; + case H264::NaluType::kStapA: + case H264::NaluType::kFuA: + RTC_LOG(LS_WARNING) << "Unexpected STAP-A or FU-A received."; + return false; + } + + if (h264_header.nalus_length == kMaxNalusPerPacket) { + RTC_LOG(LS_WARNING) + << "Received packet containing more than " << kMaxNalusPerPacket + << " NAL units. Will not keep track sps and pps ids for all of them."; + } else { + h264_header.nalus[h264_header.nalus_length++] = nalu; + } + } + + return true; +} + +bool RtpDepacketizerH264::ParseFuaNalu( + RtpDepacketizer::ParsedPayload* parsed_payload, + const uint8_t* payload_data) { + if (length_ < kFuAHeaderSize) { + RTC_LOG(LS_ERROR) << "FU-A NAL units truncated."; + return false; + } + uint8_t fnri = payload_data[0] & (kFBit | kNriMask); + uint8_t original_nal_type = payload_data[1] & kTypeMask; + bool first_fragment = (payload_data[1] & kSBit) > 0; + NaluInfo nalu; + nalu.type = original_nal_type; + nalu.sps_id = -1; + nalu.pps_id = -1; + if (first_fragment) { + offset_ = 0; + length_ -= kNalHeaderSize; + absl::optional pps_id = PpsParser::ParsePpsIdFromSlice( + payload_data + 2 * kNalHeaderSize, length_ - kNalHeaderSize); + if (pps_id) { + nalu.pps_id = *pps_id; + } else { + RTC_LOG(LS_WARNING) + << "Failed to parse PPS from first fragment of FU-A NAL " + "unit with original type: " + << static_cast(nalu.type); + } + uint8_t original_nal_header = fnri | original_nal_type; + modified_buffer_.reset(new rtc::Buffer()); + modified_buffer_->AppendData(payload_data + kNalHeaderSize, length_); + (*modified_buffer_)[0] = original_nal_header; + } else { + offset_ = kFuAHeaderSize; + length_ -= kFuAHeaderSize; + } + + if (original_nal_type == H264::NaluType::kIdr) { + parsed_payload->video_header().frame_type = VideoFrameType::kVideoFrameKey; + } else { + parsed_payload->video_header().frame_type = + VideoFrameType::kVideoFrameDelta; + } + parsed_payload->video_header().width = 0; + parsed_payload->video_header().height = 0; + parsed_payload->video_header().codec = kVideoCodecH264; + parsed_payload->video_header().simulcastIdx = 0; + parsed_payload->video_header().is_first_packet_in_frame = first_fragment; + auto& h264_header = absl::get( + parsed_payload->video_header().video_type_header); + h264_header.packetization_type = kH264FuA; + h264_header.nalu_type = original_nal_type; + if (first_fragment) { + h264_header.nalus[h264_header.nalus_length] = nalu; + h264_header.nalus_length = 1; + } + return true; +} + +bool RtpDepacketizerH264::Parse(ParsedPayload* parsed_payload, + const uint8_t* payload_data, + size_t payload_data_length) { + RTC_CHECK(parsed_payload != nullptr); + if (payload_data_length == 0) { + RTC_LOG(LS_ERROR) << "Empty payload."; + return false; + } + + offset_ = 0; + length_ = payload_data_length; + modified_buffer_.reset(); + + uint8_t nal_type = payload_data[0] & kTypeMask; + parsed_payload->video_header() + .video_type_header.emplace(); + if (nal_type == H264::NaluType::kFuA) { + // Fragmented NAL units (FU-A). + if (!ParseFuaNalu(parsed_payload, payload_data)) + return false; + } else { + // We handle STAP-A and single NALU's the same way here. The jitter buffer + // will depacketize the STAP-A into NAL units later. + // TODO(sprang): Parse STAP-A offsets here and store in fragmentation vec. + if (!ProcessStapAOrSingleNalu(parsed_payload, payload_data)) + return false; + } + + const uint8_t* payload = + modified_buffer_ ? modified_buffer_->data() : payload_data; + + parsed_payload->payload = payload + offset_; + parsed_payload->payload_length = length_; + return true; +} + +} // namespace webrtc diff --git a/modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h b/modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h new file mode 100644 index 0000000000..044ad035af --- /dev/null +++ b/modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_RTP_RTCP_SOURCE_VIDEO_RTP_DEPACKETIZER_H264_H_ +#define MODULES_RTP_RTCP_SOURCE_VIDEO_RTP_DEPACKETIZER_H264_H_ + +#include +#include + +#include + +#include "modules/rtp_rtcp/source/rtp_format.h" +#include "rtc_base/buffer.h" + +namespace webrtc { +class RtpDepacketizerH264 : public RtpDepacketizer { + public: + RtpDepacketizerH264(); + ~RtpDepacketizerH264() override; + + bool Parse(ParsedPayload* parsed_payload, + const uint8_t* payload_data, + size_t payload_data_length) override; + + private: + bool ParseFuaNalu(RtpDepacketizer::ParsedPayload* parsed_payload, + const uint8_t* payload_data); + bool ProcessStapAOrSingleNalu(RtpDepacketizer::ParsedPayload* parsed_payload, + const uint8_t* payload_data); + + size_t offset_; + size_t length_; + std::unique_ptr modified_buffer_; +}; +} // namespace webrtc + +#endif // MODULES_RTP_RTCP_SOURCE_VIDEO_RTP_DEPACKETIZER_H264_H_ diff --git a/modules/rtp_rtcp/source/video_rtp_depacketizer_h264_unittest.cc b/modules/rtp_rtcp/source/video_rtp_depacketizer_h264_unittest.cc new file mode 100644 index 0000000000..d4467aa279 --- /dev/null +++ b/modules/rtp_rtcp/source/video_rtp_depacketizer_h264_unittest.cc @@ -0,0 +1,427 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h" + +#include +#include + +#include "api/array_view.h" +#include "common_video/h264/h264_common.h" +#include "modules/include/module_common_types.h" +#include "modules/rtp_rtcp/mocks/mock_rtp_rtcp.h" +#include "modules/rtp_rtcp/source/byte_io.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::Each; +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; +using ::testing::Eq; +using ::testing::IsEmpty; +using ::testing::SizeIs; + +enum Nalu { + kSlice = 1, + kIdr = 5, + kSei = 6, + kSps = 7, + kPps = 8, + kStapA = 24, + kFuA = 28 +}; + +// Bit masks for FU (A and B) indicators. +enum NalDefs { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F }; + +// Bit masks for FU (A and B) headers. +enum FuDefs { kSBit = 0x80, kEBit = 0x40, kRBit = 0x20 }; + +const uint8_t kOriginalSps[] = {kSps, 0x00, 0x00, 0x03, 0x03, + 0xF4, 0x05, 0x03, 0xC7, 0xC0}; +const uint8_t kRewrittenSps[] = {kSps, 0x00, 0x00, 0x03, 0x03, 0xF4, 0x05, 0x03, + 0xC7, 0xE0, 0x1B, 0x41, 0x10, 0x8D, 0x00}; +const uint8_t kIdrOne[] = {kIdr, 0xFF, 0x00, 0x00, 0x04}; +const uint8_t kIdrTwo[] = {kIdr, 0xFF, 0x00, 0x11}; + +struct H264ParsedPayload : public RtpDepacketizer::ParsedPayload { + RTPVideoHeaderH264& h264() { + return absl::get(video.video_type_header); + } +}; + +class RtpDepacketizerH264Test : public ::testing::Test { + protected: + RtpDepacketizerH264Test() + : depacketizer_(std::make_unique()) {} + + void ExpectPacket(H264ParsedPayload* parsed_payload, + const uint8_t* data, + size_t length) { + ASSERT_TRUE(parsed_payload != NULL); + EXPECT_THAT(std::vector( + parsed_payload->payload, + parsed_payload->payload + parsed_payload->payload_length), + ::testing::ElementsAreArray(data, length)); + } + + std::unique_ptr depacketizer_; +}; + +TEST_F(RtpDepacketizerH264Test, TestSingleNalu) { + uint8_t packet[2] = {0x05, 0xFF}; // F=0, NRI=0, Type=5 (IDR). + H264ParsedPayload payload; + + ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet))); + ExpectPacket(&payload, packet, sizeof(packet)); + EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type); + EXPECT_EQ(kVideoCodecH264, payload.video_header().codec); + EXPECT_TRUE(payload.video_header().is_first_packet_in_frame); + EXPECT_EQ(kH264SingleNalu, payload.h264().packetization_type); + EXPECT_EQ(kIdr, payload.h264().nalu_type); +} + +TEST_F(RtpDepacketizerH264Test, TestSingleNaluSpsWithResolution) { + uint8_t packet[] = {kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, + 0x05, 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, + 0x00, 0x00, 0x03, 0x2A, 0xE0, 0xF1, 0x83, 0x25}; + H264ParsedPayload payload; + + ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet))); + ExpectPacket(&payload, packet, sizeof(packet)); + EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type); + EXPECT_EQ(kVideoCodecH264, payload.video_header().codec); + EXPECT_TRUE(payload.video_header().is_first_packet_in_frame); + EXPECT_EQ(kH264SingleNalu, payload.h264().packetization_type); + EXPECT_EQ(1280u, payload.video_header().width); + EXPECT_EQ(720u, payload.video_header().height); +} + +TEST_F(RtpDepacketizerH264Test, TestStapAKey) { + // clang-format off + const NaluInfo kExpectedNalus[] = { {H264::kSps, 0, -1}, + {H264::kPps, 1, 2}, + {H264::kIdr, -1, 0} }; + uint8_t packet[] = {kStapA, // F=0, NRI=0, Type=24. + // Length, nal header, payload. + 0, 0x18, kExpectedNalus[0].type, + 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, 0x05, 0xBA, + 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, 0x00, 0x00, 0x03, + 0x2A, 0xE0, 0xF1, 0x83, 0x25, + 0, 0xD, kExpectedNalus[1].type, + 0x69, 0xFC, 0x0, 0x0, 0x3, 0x0, 0x7, 0xFF, 0xFF, 0xFF, + 0xF6, 0x40, + 0, 0xB, kExpectedNalus[2].type, + 0x85, 0xB8, 0x0, 0x4, 0x0, 0x0, 0x13, 0x93, 0x12, 0x0}; + // clang-format on + + H264ParsedPayload payload; + ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet))); + ExpectPacket(&payload, packet, sizeof(packet)); + EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type); + EXPECT_EQ(kVideoCodecH264, payload.video_header().codec); + EXPECT_TRUE(payload.video_header().is_first_packet_in_frame); + const RTPVideoHeaderH264& h264 = payload.h264(); + EXPECT_EQ(kH264StapA, h264.packetization_type); + // NALU type for aggregated packets is the type of the first packet only. + EXPECT_EQ(kSps, h264.nalu_type); + ASSERT_EQ(3u, h264.nalus_length); + for (size_t i = 0; i < h264.nalus_length; ++i) { + EXPECT_EQ(kExpectedNalus[i].type, h264.nalus[i].type) + << "Failed parsing nalu " << i; + EXPECT_EQ(kExpectedNalus[i].sps_id, h264.nalus[i].sps_id) + << "Failed parsing nalu " << i; + EXPECT_EQ(kExpectedNalus[i].pps_id, h264.nalus[i].pps_id) + << "Failed parsing nalu " << i; + } +} + +TEST_F(RtpDepacketizerH264Test, TestStapANaluSpsWithResolution) { + uint8_t packet[] = {kStapA, // F=0, NRI=0, Type=24. + // Length (2 bytes), nal header, payload. + 0x00, 0x19, kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, + 0x50, 0x05, 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, + 0x00, 0x00, 0x03, 0x2A, 0xE0, 0xF1, 0x83, 0x25, 0x80, + 0x00, 0x03, kIdr, 0xFF, 0x00, 0x00, 0x04, kIdr, 0xFF, + 0x00, 0x11}; + + H264ParsedPayload payload; + + ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet))); + ExpectPacket(&payload, packet, sizeof(packet)); + EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type); + EXPECT_EQ(kVideoCodecH264, payload.video_header().codec); + EXPECT_TRUE(payload.video_header().is_first_packet_in_frame); + EXPECT_EQ(kH264StapA, payload.h264().packetization_type); + EXPECT_EQ(1280u, payload.video_header().width); + EXPECT_EQ(720u, payload.video_header().height); +} + +TEST_F(RtpDepacketizerH264Test, TestEmptyStapARejected) { + uint8_t lone_empty_packet[] = {kStapA, 0x00, 0x00}; + + uint8_t leading_empty_packet[] = {kStapA, 0x00, 0x00, 0x00, 0x04, + kIdr, 0xFF, 0x00, 0x11}; + + uint8_t middle_empty_packet[] = {kStapA, 0x00, 0x03, kIdr, 0xFF, 0x00, 0x00, + 0x00, 0x00, 0x04, kIdr, 0xFF, 0x00, 0x11}; + + uint8_t trailing_empty_packet[] = {kStapA, 0x00, 0x03, kIdr, + 0xFF, 0x00, 0x00, 0x00}; + + H264ParsedPayload payload; + + EXPECT_FALSE(depacketizer_->Parse(&payload, lone_empty_packet, + sizeof(lone_empty_packet))); + EXPECT_FALSE(depacketizer_->Parse(&payload, leading_empty_packet, + sizeof(leading_empty_packet))); + EXPECT_FALSE(depacketizer_->Parse(&payload, middle_empty_packet, + sizeof(middle_empty_packet))); + EXPECT_FALSE(depacketizer_->Parse(&payload, trailing_empty_packet, + sizeof(trailing_empty_packet))); +} + +TEST_F(RtpDepacketizerH264Test, DepacketizeWithRewriting) { + rtc::Buffer in_buffer; + rtc::Buffer out_buffer; + + uint8_t kHeader[2] = {kStapA}; + in_buffer.AppendData(kHeader, 1); + out_buffer.AppendData(kHeader, 1); + + ByteWriter::WriteBigEndian(kHeader, sizeof(kOriginalSps)); + in_buffer.AppendData(kHeader, 2); + in_buffer.AppendData(kOriginalSps); + ByteWriter::WriteBigEndian(kHeader, sizeof(kRewrittenSps)); + out_buffer.AppendData(kHeader, 2); + out_buffer.AppendData(kRewrittenSps); + + ByteWriter::WriteBigEndian(kHeader, sizeof(kIdrOne)); + in_buffer.AppendData(kHeader, 2); + in_buffer.AppendData(kIdrOne); + out_buffer.AppendData(kHeader, 2); + out_buffer.AppendData(kIdrOne); + + ByteWriter::WriteBigEndian(kHeader, sizeof(kIdrTwo)); + in_buffer.AppendData(kHeader, 2); + in_buffer.AppendData(kIdrTwo); + out_buffer.AppendData(kHeader, 2); + out_buffer.AppendData(kIdrTwo); + + H264ParsedPayload payload; + EXPECT_TRUE( + depacketizer_->Parse(&payload, in_buffer.data(), in_buffer.size())); + + std::vector expected_packet_payload( + out_buffer.data(), &out_buffer.data()[out_buffer.size()]); + + EXPECT_THAT( + expected_packet_payload, + ::testing::ElementsAreArray(payload.payload, payload.payload_length)); +} + +TEST_F(RtpDepacketizerH264Test, DepacketizeWithDoubleRewriting) { + rtc::Buffer in_buffer; + rtc::Buffer out_buffer; + + uint8_t kHeader[2] = {kStapA}; + in_buffer.AppendData(kHeader, 1); + out_buffer.AppendData(kHeader, 1); + + // First SPS will be kept... + ByteWriter::WriteBigEndian(kHeader, sizeof(kOriginalSps)); + in_buffer.AppendData(kHeader, 2); + in_buffer.AppendData(kOriginalSps); + out_buffer.AppendData(kHeader, 2); + out_buffer.AppendData(kOriginalSps); + + // ...only the second one will be rewritten. + ByteWriter::WriteBigEndian(kHeader, sizeof(kOriginalSps)); + in_buffer.AppendData(kHeader, 2); + in_buffer.AppendData(kOriginalSps); + ByteWriter::WriteBigEndian(kHeader, sizeof(kRewrittenSps)); + out_buffer.AppendData(kHeader, 2); + out_buffer.AppendData(kRewrittenSps); + + ByteWriter::WriteBigEndian(kHeader, sizeof(kIdrOne)); + in_buffer.AppendData(kHeader, 2); + in_buffer.AppendData(kIdrOne); + out_buffer.AppendData(kHeader, 2); + out_buffer.AppendData(kIdrOne); + + ByteWriter::WriteBigEndian(kHeader, sizeof(kIdrTwo)); + in_buffer.AppendData(kHeader, 2); + in_buffer.AppendData(kIdrTwo); + out_buffer.AppendData(kHeader, 2); + out_buffer.AppendData(kIdrTwo); + + H264ParsedPayload payload; + EXPECT_TRUE( + depacketizer_->Parse(&payload, in_buffer.data(), in_buffer.size())); + + std::vector expected_packet_payload( + out_buffer.data(), &out_buffer.data()[out_buffer.size()]); + + EXPECT_THAT( + expected_packet_payload, + ::testing::ElementsAreArray(payload.payload, payload.payload_length)); +} + +TEST_F(RtpDepacketizerH264Test, TestStapADelta) { + uint8_t packet[16] = {kStapA, // F=0, NRI=0, Type=24. + // Length, nal header, payload. + 0, 0x02, kSlice, 0xFF, 0, 0x03, kSlice, 0xFF, 0x00, 0, + 0x04, kSlice, 0xFF, 0x00, 0x11}; + H264ParsedPayload payload; + + ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet))); + ExpectPacket(&payload, packet, sizeof(packet)); + EXPECT_EQ(VideoFrameType::kVideoFrameDelta, + payload.video_header().frame_type); + EXPECT_EQ(kVideoCodecH264, payload.video_header().codec); + EXPECT_TRUE(payload.video_header().is_first_packet_in_frame); + EXPECT_EQ(kH264StapA, payload.h264().packetization_type); + // NALU type for aggregated packets is the type of the first packet only. + EXPECT_EQ(kSlice, payload.h264().nalu_type); +} + +TEST_F(RtpDepacketizerH264Test, TestFuA) { + // clang-format off + uint8_t packet1[] = { + kFuA, // F=0, NRI=0, Type=28. + kSBit | kIdr, // FU header. + 0x85, 0xB8, 0x0, 0x4, 0x0, 0x0, 0x13, 0x93, 0x12, 0x0 // Payload. + }; + // clang-format on + const uint8_t kExpected1[] = {kIdr, 0x85, 0xB8, 0x0, 0x4, 0x0, + 0x0, 0x13, 0x93, 0x12, 0x0}; + + uint8_t packet2[] = { + kFuA, // F=0, NRI=0, Type=28. + kIdr, // FU header. + 0x02 // Payload. + }; + const uint8_t kExpected2[] = {0x02}; + + uint8_t packet3[] = { + kFuA, // F=0, NRI=0, Type=28. + kEBit | kIdr, // FU header. + 0x03 // Payload. + }; + const uint8_t kExpected3[] = {0x03}; + + H264ParsedPayload payload; + + // We expect that the first packet is one byte shorter since the FU-A header + // has been replaced by the original nal header. + ASSERT_TRUE(depacketizer_->Parse(&payload, packet1, sizeof(packet1))); + ExpectPacket(&payload, kExpected1, sizeof(kExpected1)); + EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type); + EXPECT_EQ(kVideoCodecH264, payload.video_header().codec); + EXPECT_TRUE(payload.video_header().is_first_packet_in_frame); + const RTPVideoHeaderH264& h264 = payload.h264(); + EXPECT_EQ(kH264FuA, h264.packetization_type); + EXPECT_EQ(kIdr, h264.nalu_type); + ASSERT_EQ(1u, h264.nalus_length); + EXPECT_EQ(static_cast(kIdr), h264.nalus[0].type); + EXPECT_EQ(-1, h264.nalus[0].sps_id); + EXPECT_EQ(0, h264.nalus[0].pps_id); + + // Following packets will be 2 bytes shorter since they will only be appended + // onto the first packet. + payload = H264ParsedPayload(); + ASSERT_TRUE(depacketizer_->Parse(&payload, packet2, sizeof(packet2))); + ExpectPacket(&payload, kExpected2, sizeof(kExpected2)); + EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type); + EXPECT_EQ(kVideoCodecH264, payload.video_header().codec); + EXPECT_FALSE(payload.video_header().is_first_packet_in_frame); + { + const RTPVideoHeaderH264& h264 = payload.h264(); + EXPECT_EQ(kH264FuA, h264.packetization_type); + EXPECT_EQ(kIdr, h264.nalu_type); + // NALU info is only expected for the first FU-A packet. + EXPECT_EQ(0u, h264.nalus_length); + } + + payload = H264ParsedPayload(); + ASSERT_TRUE(depacketizer_->Parse(&payload, packet3, sizeof(packet3))); + ExpectPacket(&payload, kExpected3, sizeof(kExpected3)); + EXPECT_EQ(VideoFrameType::kVideoFrameKey, payload.video_header().frame_type); + EXPECT_EQ(kVideoCodecH264, payload.video_header().codec); + EXPECT_FALSE(payload.video_header().is_first_packet_in_frame); + { + const RTPVideoHeaderH264& h264 = payload.h264(); + EXPECT_EQ(kH264FuA, h264.packetization_type); + EXPECT_EQ(kIdr, h264.nalu_type); + // NALU info is only expected for the first FU-A packet. + ASSERT_EQ(0u, h264.nalus_length); + } +} + +TEST_F(RtpDepacketizerH264Test, TestEmptyPayload) { + // Using a wild pointer to crash on accesses from inside the depacketizer. + uint8_t* garbage_ptr = reinterpret_cast(0x4711); + H264ParsedPayload payload; + EXPECT_FALSE(depacketizer_->Parse(&payload, garbage_ptr, 0)); +} + +TEST_F(RtpDepacketizerH264Test, TestTruncatedFuaNalu) { + const uint8_t kPayload[] = {0x9c}; + H264ParsedPayload payload; + EXPECT_FALSE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload))); +} + +TEST_F(RtpDepacketizerH264Test, TestTruncatedSingleStapANalu) { + const uint8_t kPayload[] = {0xd8, 0x27}; + H264ParsedPayload payload; + EXPECT_FALSE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload))); +} + +TEST_F(RtpDepacketizerH264Test, TestStapAPacketWithTruncatedNalUnits) { + const uint8_t kPayload[] = {0x58, 0xCB, 0xED, 0xDF}; + H264ParsedPayload payload; + EXPECT_FALSE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload))); +} + +TEST_F(RtpDepacketizerH264Test, TestTruncationJustAfterSingleStapANalu) { + const uint8_t kPayload[] = {0x38, 0x27, 0x27}; + H264ParsedPayload payload; + EXPECT_FALSE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload))); +} + +TEST_F(RtpDepacketizerH264Test, TestShortSpsPacket) { + const uint8_t kPayload[] = {0x27, 0x80, 0x00}; + H264ParsedPayload payload; + EXPECT_TRUE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload))); +} + +TEST_F(RtpDepacketizerH264Test, TestSeiPacket) { + const uint8_t kPayload[] = { + kSei, // F=0, NRI=0, Type=6. + 0x03, 0x03, 0x03, 0x03 // Payload. + }; + H264ParsedPayload payload; + ASSERT_TRUE(depacketizer_->Parse(&payload, kPayload, sizeof(kPayload))); + const RTPVideoHeaderH264& h264 = payload.h264(); + EXPECT_EQ(VideoFrameType::kVideoFrameDelta, + payload.video_header().frame_type); + EXPECT_EQ(kH264SingleNalu, h264.packetization_type); + EXPECT_EQ(kSei, h264.nalu_type); + ASSERT_EQ(1u, h264.nalus_length); + EXPECT_EQ(static_cast(kSei), h264.nalus[0].type); + EXPECT_EQ(-1, h264.nalus[0].sps_id); + EXPECT_EQ(-1, h264.nalus[0].pps_id); +} + +} // namespace +} // namespace webrtc