diff --git a/modules/rtp_rtcp/source/video_rtp_depacketizer_vp9.cc b/modules/rtp_rtcp/source/video_rtp_depacketizer_vp9.cc index 41f363d221..a8b04db78d 100644 --- a/modules/rtp_rtcp/source/video_rtp_depacketizer_vp9.cc +++ b/modules/rtp_rtcp/source/video_rtp_depacketizer_vp9.cc @@ -180,9 +180,6 @@ int VideoRtpDepacketizerVp9::ParseRtpPayload( video_header->simulcastIdx = 0; video_header->codec = kVideoCodecVP9; - video_header->frame_type = - p_bit ? VideoFrameType::kVideoFrameDelta : VideoFrameType::kVideoFrameKey; - auto& vp9_header = video_header->video_type_header.emplace(); vp9_header.InitRTPVideoHeaderVP9(); @@ -211,6 +208,9 @@ int VideoRtpDepacketizerVp9::ParseRtpPayload( video_header->height = vp9_header.height[0]; } } + video_header->frame_type = p_bit || vp9_header.inter_layer_predicted + ? VideoFrameType::kVideoFrameDelta + : VideoFrameType::kVideoFrameKey; video_header->is_first_packet_in_frame = b_bit; video_header->is_last_packet_in_frame = e_bit; diff --git a/modules/rtp_rtcp/source/video_rtp_depacketizer_vp9_unittest.cc b/modules/rtp_rtcp/source/video_rtp_depacketizer_vp9_unittest.cc index 36af59a779..7f89812133 100644 --- a/modules/rtp_rtcp/source/video_rtp_depacketizer_vp9_unittest.cc +++ b/modules/rtp_rtcp/source/video_rtp_depacketizer_vp9_unittest.cc @@ -369,5 +369,18 @@ TEST(VideoRtpDepacketizerVp9Test, ReferencesInputCopyOnWriteBuffer) { // Compare pointers to check there was no copy on write buffer unsharing. EXPECT_EQ(parsed->video_payload.cdata(), rtp_payload.cdata() + kHeaderSize); } + +TEST(VideoRtpDepacketizerVp9Test, InterLayerPredOnlyFrameMerkedAsDelta) { + // Set P=0 and D=1 and vefify that the depaketizers marks this packet as a + // part of a delta frame (not a keyframe). + uint8_t packet[13] = {0}; + packet[0] = 0b0010'0000; // I:0 P:0 L:1 F:0 B:0 E:0 V:0 Z:0 + packet[1] = 0b0000'0001; // T:000 U:0 S:000 D:1 + packet[2] = 0; // TL0PICIDX + + RTPVideoHeader video_header; + VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header); + EXPECT_EQ(video_header.frame_type, VideoFrameType::kVideoFrameDelta); +} } // namespace } // namespace webrtc diff --git a/modules/video_coding/rtp_vp9_ref_finder.cc b/modules/video_coding/rtp_vp9_ref_finder.cc index 175ed3464b..f0d51adad8 100644 --- a/modules/video_coding/rtp_vp9_ref_finder.cc +++ b/modules/video_coding/rtp_vp9_ref_finder.cc @@ -132,24 +132,20 @@ RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameGof( FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted); return kHandOff; } - } else if (frame->frame_type() == VideoFrameType::kVideoFrameKey) { - if (frame->SpatialIndex() == 0) { + } else { + if (frame->frame_type() == VideoFrameType::kVideoFrameKey) { RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure"; return kDrop; } - const auto gof_info_it = gof_info_.find(unwrapped_tl0); - if (gof_info_it == gof_info_.end()) - return kStash; - info = &gof_info_it->second; - - frame->num_references = 0; - FrameReceivedVp9(frame->Id(), info); - FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted); - return kHandOff; - } else { - auto gof_info_it = gof_info_.find( - (codec_header.temporal_idx == 0) ? unwrapped_tl0 - 1 : unwrapped_tl0); + // tl0_idx is incremented on temporal_idx=0 frames of the lowest spatial + // layer (which spatial_idx is not necessarily zero). Upper spatial layer + // frames with inter-layer prediction use GOF info of their base spatial + // layer frames. + const bool use_prev_gof = + codec_header.temporal_idx == 0 && !codec_header.inter_layer_predicted; + auto gof_info_it = + gof_info_.find(use_prev_gof ? unwrapped_tl0 - 1 : unwrapped_tl0); // Gof info for this frame is not available yet, stash this frame. if (gof_info_it == gof_info_.end()) @@ -185,29 +181,29 @@ RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameGof( auto up_switch_erase_to = up_switch_.lower_bound(old_picture_id); up_switch_.erase(up_switch_.begin(), up_switch_erase_to); - size_t diff = - ForwardDiff(info->gof->pid_start, frame->Id()); - size_t gof_idx = diff % info->gof->num_frames_in_gof; + if (codec_header.inter_pic_predicted) { + size_t diff = ForwardDiff(info->gof->pid_start, + frame->Id()); + size_t gof_idx = diff % info->gof->num_frames_in_gof; - if (info->gof->num_ref_pics[gof_idx] > EncodedFrame::kMaxFrameReferences) { - return kDrop; - } - // Populate references according to the scalability structure. - frame->num_references = info->gof->num_ref_pics[gof_idx]; - for (size_t i = 0; i < frame->num_references; ++i) { - frame->references[i] = - Subtract(frame->Id(), info->gof->pid_diff[gof_idx][i]); - - // If this is a reference to a frame earlier than the last up switch point, - // then ignore this reference. - if (UpSwitchInIntervalVp9(frame->Id(), codec_header.temporal_idx, - frame->references[i])) { - --frame->num_references; + if (info->gof->num_ref_pics[gof_idx] > EncodedFrame::kMaxFrameReferences) { + return kDrop; } - } - // Override GOF references. - if (!codec_header.inter_pic_predicted) { + // Populate references according to the scalability structure. + frame->num_references = info->gof->num_ref_pics[gof_idx]; + for (size_t i = 0; i < frame->num_references; ++i) { + frame->references[i] = Subtract( + frame->Id(), info->gof->pid_diff[gof_idx][i]); + + // If this is a reference to a frame earlier than the last up switch + // point, then ignore this reference. + if (UpSwitchInIntervalVp9(frame->Id(), codec_header.temporal_idx, + frame->references[i])) { + --frame->num_references; + } + } + } else { frame->num_references = 0; } diff --git a/modules/video_coding/rtp_vp9_ref_finder_unittest.cc b/modules/video_coding/rtp_vp9_ref_finder_unittest.cc index a3cb31ade5..23b6cfca9a 100644 --- a/modules/video_coding/rtp_vp9_ref_finder_unittest.cc +++ b/modules/video_coding/rtp_vp9_ref_finder_unittest.cc @@ -360,6 +360,18 @@ TEST_F(RtpVp9RefFinderTest, GofSkipFramesTemporalLayers_0212) { EXPECT_THAT(frames_, HasFrameWithIdAndRefs(35, {30})); } +TEST_F(RtpVp9RefFinderTest, GofInterLayerPredS0KeyS1Delta) { + GofInfoVP9 ss; + ss.SetGofInfoVP9(kTemporalStructureMode1); + + Insert(Frame().Pid(1).SidAndTid(0, 0).Tl0(0).AsKeyFrame().Gof(&ss)); + Insert(Frame().Pid(1).SidAndTid(1, 0).Tl0(0).AsInterLayer().NotAsInterPic()); + + ASSERT_EQ(2UL, frames_.size()); + EXPECT_THAT(frames_, HasFrameWithIdAndRefs(5, {})); + EXPECT_THAT(frames_, HasFrameWithIdAndRefs(6, {5})); +} + TEST_F(RtpVp9RefFinderTest, GofTemporalLayers_01) { GofInfoVP9 ss; ss.SetGofInfoVP9(kTemporalStructureMode2); // 0101 pattern