/* * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "call/rtp_payload_params.h" #include #include #include "absl/container/inlined_vector.h" #include "absl/types/variant.h" #include "api/video/video_timing.h" #include "common_types.h" // NOLINT(build/include) #include "modules/video_coding/codecs/h264/include/h264_globals.h" #include "modules/video_coding/codecs/interface/common_constants.h" #include "modules/video_coding/codecs/vp8/include/vp8_globals.h" #include "modules/video_coding/codecs/vp9/include/vp9_globals.h" #include "rtc_base/arraysize.h" #include "rtc_base/checks.h" #include "rtc_base/logging.h" #include "rtc_base/random.h" #include "rtc_base/time_utils.h" #include "system_wrappers/include/field_trial.h" namespace webrtc { namespace { void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info, absl::optional spatial_index, RTPVideoHeader* rtp) { rtp->codec = info.codecType; switch (info.codecType) { case kVideoCodecVP8: { auto& vp8_header = rtp->video_type_header.emplace(); vp8_header.InitRTPVideoHeaderVP8(); vp8_header.nonReference = info.codecSpecific.VP8.nonReference; vp8_header.temporalIdx = info.codecSpecific.VP8.temporalIdx; vp8_header.layerSync = info.codecSpecific.VP8.layerSync; vp8_header.keyIdx = info.codecSpecific.VP8.keyIdx; rtp->simulcastIdx = spatial_index.value_or(0); return; } case kVideoCodecVP9: { auto& vp9_header = rtp->video_type_header.emplace(); vp9_header.InitRTPVideoHeaderVP9(); vp9_header.inter_pic_predicted = info.codecSpecific.VP9.inter_pic_predicted; vp9_header.flexible_mode = info.codecSpecific.VP9.flexible_mode; vp9_header.ss_data_available = info.codecSpecific.VP9.ss_data_available; vp9_header.non_ref_for_inter_layer_pred = info.codecSpecific.VP9.non_ref_for_inter_layer_pred; vp9_header.temporal_idx = info.codecSpecific.VP9.temporal_idx; vp9_header.temporal_up_switch = info.codecSpecific.VP9.temporal_up_switch; vp9_header.inter_layer_predicted = info.codecSpecific.VP9.inter_layer_predicted; vp9_header.gof_idx = info.codecSpecific.VP9.gof_idx; vp9_header.num_spatial_layers = info.codecSpecific.VP9.num_spatial_layers; if (vp9_header.num_spatial_layers > 1) { vp9_header.spatial_idx = spatial_index.value_or(kNoSpatialIdx); } else { vp9_header.spatial_idx = kNoSpatialIdx; } if (info.codecSpecific.VP9.ss_data_available) { vp9_header.spatial_layer_resolution_present = info.codecSpecific.VP9.spatial_layer_resolution_present; if (info.codecSpecific.VP9.spatial_layer_resolution_present) { for (size_t i = 0; i < info.codecSpecific.VP9.num_spatial_layers; ++i) { vp9_header.width[i] = info.codecSpecific.VP9.width[i]; vp9_header.height[i] = info.codecSpecific.VP9.height[i]; } } vp9_header.gof.CopyGofInfoVP9(info.codecSpecific.VP9.gof); } vp9_header.num_ref_pics = info.codecSpecific.VP9.num_ref_pics; for (int i = 0; i < info.codecSpecific.VP9.num_ref_pics; ++i) { vp9_header.pid_diff[i] = info.codecSpecific.VP9.p_diff[i]; } vp9_header.end_of_picture = info.codecSpecific.VP9.end_of_picture; return; } case kVideoCodecH264: { auto& h264_header = rtp->video_type_header.emplace(); h264_header.packetization_mode = info.codecSpecific.H264.packetization_mode; rtp->simulcastIdx = spatial_index.value_or(0); rtp->frame_marking.temporal_id = kNoTemporalIdx; if (info.codecSpecific.H264.temporal_idx != kNoTemporalIdx) { rtp->frame_marking.temporal_id = info.codecSpecific.H264.temporal_idx; rtp->frame_marking.layer_id = 0; rtp->frame_marking.independent_frame = info.codecSpecific.H264.idr_frame; rtp->frame_marking.base_layer_sync = info.codecSpecific.H264.base_layer_sync; } return; } case kVideoCodecMultiplex: case kVideoCodecGeneric: rtp->codec = kVideoCodecGeneric; rtp->simulcastIdx = spatial_index.value_or(0); return; default: return; } } void SetVideoTiming(const EncodedImage& image, VideoSendTiming* timing) { if (image.timing_.flags == VideoSendTiming::TimingFrameFlags::kInvalid || image.timing_.flags == VideoSendTiming::TimingFrameFlags::kNotTriggered) { timing->flags = VideoSendTiming::TimingFrameFlags::kInvalid; return; } timing->encode_start_delta_ms = VideoSendTiming::GetDeltaCappedMs( image.capture_time_ms_, image.timing_.encode_start_ms); timing->encode_finish_delta_ms = VideoSendTiming::GetDeltaCappedMs( image.capture_time_ms_, image.timing_.encode_finish_ms); timing->packetization_finish_delta_ms = 0; timing->pacer_exit_delta_ms = 0; timing->network_timestamp_delta_ms = 0; timing->network2_timestamp_delta_ms = 0; timing->flags = image.timing_.flags; } } // namespace RtpPayloadParams::RtpPayloadParams(const uint32_t ssrc, const RtpPayloadState* state) : ssrc_(ssrc), generic_picture_id_experiment_( field_trial::IsEnabled("WebRTC-GenericPictureId")), generic_descriptor_experiment_( field_trial::IsEnabled("WebRTC-GenericDescriptor")) { for (auto& spatial_layer : last_shared_frame_id_) spatial_layer.fill(-1); buffer_id_to_frame_id_.fill(-1); Random random(rtc::TimeMicros()); state_.picture_id = state ? state->picture_id : (random.Rand() & 0x7FFF); state_.tl0_pic_idx = state ? state->tl0_pic_idx : (random.Rand()); } RtpPayloadParams::RtpPayloadParams(const RtpPayloadParams& other) = default; RtpPayloadParams::~RtpPayloadParams() {} RTPVideoHeader RtpPayloadParams::GetRtpVideoHeader( const EncodedImage& image, const CodecSpecificInfo* codec_specific_info, int64_t shared_frame_id) { RTPVideoHeader rtp_video_header; if (codec_specific_info) { PopulateRtpWithCodecSpecifics(*codec_specific_info, image.SpatialIndex(), &rtp_video_header); } rtp_video_header.rotation = image.rotation_; rtp_video_header.content_type = image.content_type_; rtp_video_header.playout_delay = image.playout_delay_; rtp_video_header.width = image._encodedWidth; rtp_video_header.height = image._encodedHeight; rtp_video_header.color_space = image.ColorSpace() ? absl::make_optional(*image.ColorSpace()) : absl::nullopt; SetVideoTiming(image, &rtp_video_header.video_timing); const bool is_keyframe = image._frameType == kVideoFrameKey; const bool first_frame_in_picture = (codec_specific_info && codec_specific_info->codecType == kVideoCodecVP9) ? codec_specific_info->codecSpecific.VP9.first_frame_in_picture : true; SetCodecSpecific(&rtp_video_header, first_frame_in_picture); if (generic_descriptor_experiment_) SetGeneric(codec_specific_info, shared_frame_id, is_keyframe, &rtp_video_header); return rtp_video_header; } uint32_t RtpPayloadParams::ssrc() const { return ssrc_; } RtpPayloadState RtpPayloadParams::state() const { return state_; } void RtpPayloadParams::SetCodecSpecific(RTPVideoHeader* rtp_video_header, bool first_frame_in_picture) { // Always set picture id. Set tl0_pic_idx iff temporal index is set. if (first_frame_in_picture) { state_.picture_id = (static_cast(state_.picture_id) + 1) & 0x7FFF; } if (rtp_video_header->codec == kVideoCodecVP8) { auto& vp8_header = absl::get(rtp_video_header->video_type_header); vp8_header.pictureId = state_.picture_id; if (vp8_header.temporalIdx != kNoTemporalIdx) { if (vp8_header.temporalIdx == 0) { ++state_.tl0_pic_idx; } vp8_header.tl0PicIdx = state_.tl0_pic_idx; } } if (rtp_video_header->codec == kVideoCodecVP9) { auto& vp9_header = absl::get(rtp_video_header->video_type_header); vp9_header.picture_id = state_.picture_id; // Note that in the case that we have no temporal layers but we do have // spatial layers, packets will carry layering info with a temporal_idx of // zero, and we then have to set and increment tl0_pic_idx. if (vp9_header.temporal_idx != kNoTemporalIdx || vp9_header.spatial_idx != kNoSpatialIdx) { if (first_frame_in_picture && (vp9_header.temporal_idx == 0 || vp9_header.temporal_idx == kNoTemporalIdx)) { ++state_.tl0_pic_idx; } vp9_header.tl0_pic_idx = state_.tl0_pic_idx; } } if (rtp_video_header->codec == kVideoCodecH264) { if (rtp_video_header->frame_marking.temporal_id != kNoTemporalIdx) { if (rtp_video_header->frame_marking.temporal_id == 0) { ++state_.tl0_pic_idx; } rtp_video_header->frame_marking.tl0_pic_idx = state_.tl0_pic_idx; } } // There are currently two generic descriptors in WebRTC. The old descriptor // can not share a picture id space between simulcast streams, so we use the // |picture_id| in this case. We let the |picture_id| tag along in |frame_id| // until the old generic format can be removed. // TODO(philipel): Remove this when the new generic format has been fully // implemented. if (generic_picture_id_experiment_ && rtp_video_header->codec == kVideoCodecGeneric) { rtp_video_header->generic.emplace().frame_id = state_.picture_id; } } void RtpPayloadParams::SetGeneric(const CodecSpecificInfo* codec_specific_info, int64_t frame_id, bool is_keyframe, RTPVideoHeader* rtp_video_header) { switch (rtp_video_header->codec) { case VideoCodecType::kVideoCodecGeneric: // TODO(philipel): Implement generic codec to new generic descriptor. return; case VideoCodecType::kVideoCodecVP8: if (codec_specific_info) { Vp8ToGeneric(codec_specific_info->codecSpecific.VP8, frame_id, is_keyframe, rtp_video_header); } return; case VideoCodecType::kVideoCodecVP9: // TODO(philipel): Implement VP9 to new generic descriptor. return; case VideoCodecType::kVideoCodecH264: // TODO(philipel): Implement H264 to new generic descriptor. case VideoCodecType::kVideoCodecMultiplex: return; } RTC_NOTREACHED() << "Unsupported codec."; } void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info, int64_t shared_frame_id, bool is_keyframe, RTPVideoHeader* rtp_video_header) { const auto& vp8_header = absl::get(rtp_video_header->video_type_header); const int spatial_index = 0; const int temporal_index = vp8_header.temporalIdx != kNoTemporalIdx ? vp8_header.temporalIdx : 0; if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers || spatial_index >= RtpGenericFrameDescriptor::kMaxSpatialLayers) { RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be " "used with generic frame descriptor."; return; } RTPVideoHeader::GenericDescriptorInfo& generic = rtp_video_header->generic.emplace(); generic.frame_id = shared_frame_id; generic.spatial_index = spatial_index; generic.temporal_index = temporal_index; if (vp8_info.useExplicitDependencies) { SetDependenciesVp8New(vp8_info, shared_frame_id, is_keyframe, vp8_header.layerSync, &generic); } else { SetDependenciesVp8Deprecated(vp8_info, shared_frame_id, is_keyframe, spatial_index, temporal_index, vp8_header.layerSync, &generic); } } void RtpPayloadParams::SetDependenciesVp8Deprecated( const CodecSpecificInfoVP8& vp8_info, int64_t shared_frame_id, bool is_keyframe, int spatial_index, int temporal_index, bool layer_sync, RTPVideoHeader::GenericDescriptorInfo* generic) { RTC_DCHECK(!vp8_info.useExplicitDependencies); RTC_DCHECK(!new_version_used_.has_value() || !new_version_used_.value()); new_version_used_ = false; if (is_keyframe) { RTC_DCHECK_EQ(temporal_index, 0); last_shared_frame_id_[spatial_index].fill(-1); last_shared_frame_id_[spatial_index][temporal_index] = shared_frame_id; return; } if (layer_sync) { int64_t tl0_frame_id = last_shared_frame_id_[spatial_index][0]; for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) { if (last_shared_frame_id_[spatial_index][i] < tl0_frame_id) { last_shared_frame_id_[spatial_index][i] = -1; } } RTC_DCHECK_GE(tl0_frame_id, 0); RTC_DCHECK_LT(tl0_frame_id, shared_frame_id); generic->dependencies.push_back(tl0_frame_id); } else { for (int i = 0; i <= temporal_index; ++i) { int64_t frame_id = last_shared_frame_id_[spatial_index][i]; if (frame_id != -1) { RTC_DCHECK_LT(frame_id, shared_frame_id); generic->dependencies.push_back(frame_id); } } } last_shared_frame_id_[spatial_index][temporal_index] = shared_frame_id; } void RtpPayloadParams::SetDependenciesVp8New( const CodecSpecificInfoVP8& vp8_info, int64_t shared_frame_id, bool is_keyframe, bool layer_sync, RTPVideoHeader::GenericDescriptorInfo* generic) { RTC_DCHECK(vp8_info.useExplicitDependencies); RTC_DCHECK(!new_version_used_.has_value() || new_version_used_.value()); new_version_used_ = true; if (is_keyframe) { RTC_DCHECK_EQ(vp8_info.referencedBuffersCount, 0u); buffer_id_to_frame_id_.fill(shared_frame_id); return; } constexpr size_t kBuffersCountVp8 = CodecSpecificInfoVP8::kBuffersCount; RTC_DCHECK_GT(vp8_info.referencedBuffersCount, 0u); RTC_DCHECK_LE(vp8_info.referencedBuffersCount, arraysize(vp8_info.referencedBuffers)); for (size_t i = 0; i < vp8_info.referencedBuffersCount; ++i) { const size_t referenced_buffer = vp8_info.referencedBuffers[i]; RTC_DCHECK_LT(referenced_buffer, kBuffersCountVp8); RTC_DCHECK_LT(referenced_buffer, buffer_id_to_frame_id_.size()); const int64_t dependency_frame_id = buffer_id_to_frame_id_[referenced_buffer]; RTC_DCHECK_GE(dependency_frame_id, 0); RTC_DCHECK_LT(dependency_frame_id, shared_frame_id); const bool is_new_dependency = std::find(generic->dependencies.begin(), generic->dependencies.end(), dependency_frame_id) == generic->dependencies.end(); if (is_new_dependency) { generic->dependencies.push_back(dependency_frame_id); } } RTC_DCHECK_LE(vp8_info.updatedBuffersCount, kBuffersCountVp8); for (size_t i = 0; i < vp8_info.updatedBuffersCount; ++i) { const size_t updated_id = vp8_info.updatedBuffers[i]; buffer_id_to_frame_id_[updated_id] = shared_frame_id; } RTC_DCHECK_LE(buffer_id_to_frame_id_.size(), kBuffersCountVp8); } } // namespace webrtc