Introduce capture_time_identifier in webrtc::EncodedImage

This CL propagates capture_time_identifier introduced in
webrtc::VideoFrame and propagates it to EncodedImage. For use cases
involving EncodedTransforms, this identifier is further propagated to
TransformableVideoSenderFrame.

VideoEncoder::Encode function is overriden by each encoder. Each of
these overriden functions needs to be changed so that they can handle
this new identifier and propagate its value in the created EncodedImage.

Change-Id: I5bea4c5a3fe714f1198e497a4bcb5fd059afe516
Bug: webrtc:14878
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/291800
Reviewed-by: Tony Herre <herre@google.com>
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Commit-Queue: Palak Agarwal <agpalak@google.com>
Cr-Commit-Position: refs/heads/main@{#39374}
This commit is contained in:
Palak Agarwal 2023-02-22 14:46:23 +01:00 committed by WebRTC LUCI CQ
parent a087f6f1c8
commit a09f21b207
13 changed files with 117 additions and 3 deletions

View file

@ -36,6 +36,11 @@ class TransformableFrameInterface {
virtual uint8_t GetPayloadType() const = 0; virtual uint8_t GetPayloadType() const = 0;
virtual uint32_t GetSsrc() const = 0; virtual uint32_t GetSsrc() const = 0;
virtual uint32_t GetTimestamp() const = 0; virtual uint32_t GetTimestamp() const = 0;
// TODO(https://bugs.webrtc.org/14878): Change this to pure virtual after it
// is implemented everywhere.
virtual absl::optional<Timestamp> GetCaptureTimeIdentifier() const {
return absl::nullopt;
}
enum class Direction { enum class Direction {
kUnknown, kUnknown,

View file

@ -98,6 +98,14 @@ class RTC_EXPORT EncodedImage {
simulcast_index_ = simulcast_index; simulcast_index_ = simulcast_index;
} }
const absl::optional<webrtc::Timestamp>& CaptureTimeIdentifier() const {
return capture_time_identifier_;
}
void SetCaptureTimeIdentifier(
const absl::optional<webrtc::Timestamp>& capture_time_identifier) {
capture_time_identifier_ = capture_time_identifier;
}
// Encoded images can have dependencies between spatial and/or temporal // Encoded images can have dependencies between spatial and/or temporal
// layers, depending on the scalability mode used by the encoder. See diagrams // layers, depending on the scalability mode used by the encoder. See diagrams
// at https://w3c.github.io/webrtc-svc/#dependencydiagrams*. // at https://w3c.github.io/webrtc-svc/#dependencydiagrams*.
@ -217,6 +225,7 @@ class RTC_EXPORT EncodedImage {
size_t size_ = 0; // Size of encoded frame data. size_t size_ = 0; // Size of encoded frame data.
uint32_t timestamp_rtp_ = 0; uint32_t timestamp_rtp_ = 0;
absl::optional<int> simulcast_index_; absl::optional<int> simulcast_index_;
absl::optional<webrtc::Timestamp> capture_time_identifier_;
absl::optional<int> spatial_index_; absl::optional<int> spatial_index_;
absl::optional<int> temporal_index_; absl::optional<int> temporal_index_;
std::map<int, size_t> spatial_layer_frame_size_bytes_; std::map<int, size_t> spatial_layer_frame_size_bytes_;

View file

@ -41,6 +41,7 @@ class TransformableVideoSenderFrame : public TransformableVideoFrameInterface {
codec_type_(codec_type), codec_type_(codec_type),
timestamp_(rtp_timestamp), timestamp_(rtp_timestamp),
capture_time_ms_(encoded_image.capture_time_ms_), capture_time_ms_(encoded_image.capture_time_ms_),
capture_time_identifier_(encoded_image.CaptureTimeIdentifier()),
expected_retransmission_time_ms_(expected_retransmission_time_ms) { expected_retransmission_time_ms_(expected_retransmission_time_ms) {
RTC_DCHECK_GE(payload_type_, 0); RTC_DCHECK_GE(payload_type_, 0);
RTC_DCHECK_LE(payload_type_, 127); RTC_DCHECK_LE(payload_type_, 127);
@ -87,6 +88,9 @@ class TransformableVideoSenderFrame : public TransformableVideoFrameInterface {
uint8_t GetPayloadType() const override { return payload_type_; } uint8_t GetPayloadType() const override { return payload_type_; }
absl::optional<VideoCodecType> GetCodecType() const { return codec_type_; } absl::optional<VideoCodecType> GetCodecType() const { return codec_type_; }
int64_t GetCaptureTimeMs() const { return capture_time_ms_; } int64_t GetCaptureTimeMs() const { return capture_time_ms_; }
absl::optional<Timestamp> GetCaptureTimeIdentifier() const override {
return capture_time_identifier_;
}
const absl::optional<int64_t>& GetExpectedRetransmissionTimeMs() const { const absl::optional<int64_t>& GetExpectedRetransmissionTimeMs() const {
return expected_retransmission_time_ms_; return expected_retransmission_time_ms_;
@ -107,6 +111,7 @@ class TransformableVideoSenderFrame : public TransformableVideoFrameInterface {
const absl::optional<VideoCodecType> codec_type_ = absl::nullopt; const absl::optional<VideoCodecType> codec_type_ = absl::nullopt;
const uint32_t timestamp_; const uint32_t timestamp_;
const int64_t capture_time_ms_; const int64_t capture_time_ms_;
const absl::optional<Timestamp> capture_time_identifier_;
const absl::optional<int64_t> expected_retransmission_time_ms_; const absl::optional<int64_t> expected_retransmission_time_ms_;
}; };
} // namespace } // namespace

View file

@ -1653,6 +1653,30 @@ TEST_F(RtpSenderVideoWithFrameTransformerTest,
kDefaultExpectedRetransmissionTimeMs); kDefaultExpectedRetransmissionTimeMs);
} }
TEST_F(RtpSenderVideoWithFrameTransformerTest,
TransformableFrameHasCorrectCaptureIdentifier) {
auto mock_frame_transformer =
rtc::make_ref_counted<NiceMock<MockFrameTransformer>>();
std::unique_ptr<RTPSenderVideo> rtp_sender_video =
CreateSenderWithFrameTransformer(mock_frame_transformer);
auto encoded_image = CreateDefaultEncodedImage();
encoded_image->SetCaptureTimeIdentifier(Timestamp::Millis(1));
RTPVideoHeader video_header;
EXPECT_CALL(*mock_frame_transformer, Transform)
.WillOnce([&encoded_image](std::unique_ptr<TransformableFrameInterface>
transformable_frame) {
auto* frame = static_cast<TransformableVideoFrameInterface*>(
transformable_frame.get());
ASSERT_TRUE(frame);
EXPECT_EQ(frame->GetCaptureTimeIdentifier(),
encoded_image->CaptureTimeIdentifier());
});
rtp_sender_video->SendEncodedImage(kPayload, kType, kTimestamp,
*encoded_image, video_header,
kDefaultExpectedRetransmissionTimeMs);
}
TEST_F(RtpSenderVideoWithFrameTransformerTest, TEST_F(RtpSenderVideoWithFrameTransformerTest,
OnTransformedFrameSendsVideoWhenCloned) { OnTransformedFrameSendsVideoWhenCloned) {
auto mock_frame_transformer = auto mock_frame_transformer =

View file

@ -678,6 +678,7 @@ int32_t LibaomAv1Encoder::Encode(
? VideoFrameType::kVideoFrameKey ? VideoFrameType::kVideoFrameKey
: VideoFrameType::kVideoFrameDelta; : VideoFrameType::kVideoFrameDelta;
encoded_image.SetTimestamp(frame.timestamp()); encoded_image.SetTimestamp(frame.timestamp());
encoded_image.SetCaptureTimeIdentifier(frame.capture_time_identifier());
encoded_image.capture_time_ms_ = frame.render_time_ms(); encoded_image.capture_time_ms_ = frame.render_time_ms();
encoded_image.rotation_ = frame.rotation(); encoded_image.rotation_ = frame.rotation();
encoded_image.content_type_ = VideoContentType::UNSPECIFIED; encoded_image.content_type_ = VideoContentType::UNSPECIFIED;

View file

@ -263,5 +263,37 @@ TEST(LibaomAv1EncoderTest, RtpTimestampWrap) {
Eq(VideoFrameType::kVideoFrameDelta)); Eq(VideoFrameType::kVideoFrameDelta));
} }
TEST(LibaomAv1EncoderTest, TestCaptureTimeId) {
std::unique_ptr<VideoEncoder> encoder = CreateLibaomAv1Encoder();
const Timestamp capture_time_id = Timestamp::Micros(2000);
VideoCodec codec_settings = DefaultCodecSettings();
codec_settings.SetScalabilityMode(ScalabilityMode::kL2T1);
ASSERT_EQ(encoder->InitEncode(&codec_settings, DefaultEncoderSettings()),
WEBRTC_VIDEO_CODEC_OK);
VideoEncoder::RateControlParameters rate_parameters;
rate_parameters.framerate_fps = 30;
rate_parameters.bitrate.SetBitrate(/*spatial_index=*/0, /*temporal_index=*/0,
300'000);
rate_parameters.bitrate.SetBitrate(/*spatial_index=*/1, /*temporal_index=*/0,
300'000);
encoder->SetRates(rate_parameters);
std::vector<EncodedVideoFrameProducer::EncodedFrame> encoded_frames =
EncodedVideoFrameProducer(*encoder)
.SetNumInputFrames(1)
.SetCaptureTimeIdentifier(capture_time_id)
.Encode();
ASSERT_THAT(encoded_frames, SizeIs(2));
ASSERT_TRUE(
encoded_frames[0].encoded_image.CaptureTimeIdentifier().has_value());
ASSERT_TRUE(
encoded_frames[1].encoded_image.CaptureTimeIdentifier().has_value());
EXPECT_EQ(encoded_frames[0].encoded_image.CaptureTimeIdentifier()->us(),
capture_time_id.us());
EXPECT_EQ(encoded_frames[1].encoded_image.CaptureTimeIdentifier()->us(),
capture_time_id.us());
}
} // namespace } // namespace
} // namespace webrtc } // namespace webrtc

View file

@ -62,6 +62,7 @@ EncodedVideoFrameProducer::Encode() {
VideoFrame::Builder() VideoFrame::Builder()
.set_video_frame_buffer(frame_buffer_generator->NextFrame().buffer) .set_video_frame_buffer(frame_buffer_generator->NextFrame().buffer)
.set_timestamp_rtp(rtp_timestamp_) .set_timestamp_rtp(rtp_timestamp_)
.set_capture_time_identifier(capture_time_identifier_)
.build(); .build();
rtp_timestamp_ += rtp_tick; rtp_timestamp_ += rtp_tick;
RTC_CHECK_EQ(encoder_.Encode(frame, &next_frame_type_), RTC_CHECK_EQ(encoder_.Encode(frame, &next_frame_type_),

View file

@ -49,15 +49,18 @@ class EncodedVideoFrameProducer {
EncodedVideoFrameProducer& SetRtpTimestamp(uint32_t value); EncodedVideoFrameProducer& SetRtpTimestamp(uint32_t value);
// Generates input video frames and encodes them with `encoder` provided in EncodedVideoFrameProducer& SetCaptureTimeIdentifier(Timestamp value);
// the constructor. Returns frame passed to the `OnEncodedImage` by wraping
// `EncodedImageCallback` underneath. // Generates input video frames and encodes them with `encoder` provided
// in the constructor. Returns frame passed to the `OnEncodedImage` by
// wraping `EncodedImageCallback` underneath.
std::vector<EncodedFrame> Encode(); std::vector<EncodedFrame> Encode();
private: private:
VideoEncoder& encoder_; VideoEncoder& encoder_;
uint32_t rtp_timestamp_ = 1000; uint32_t rtp_timestamp_ = 1000;
Timestamp capture_time_identifier_ = Timestamp::Micros(1000);
int num_input_frames_ = 1; int num_input_frames_ = 1;
int framerate_fps_ = 30; int framerate_fps_ = 30;
RenderResolution resolution_ = {320, 180}; RenderResolution resolution_ = {320, 180};
@ -96,5 +99,10 @@ inline EncodedVideoFrameProducer& EncodedVideoFrameProducer::SetRtpTimestamp(
return *this; return *this;
} }
inline EncodedVideoFrameProducer&
EncodedVideoFrameProducer::SetCaptureTimeIdentifier(Timestamp value) {
capture_time_identifier_ = value;
return *this;
}
} // namespace webrtc } // namespace webrtc
#endif // MODULES_VIDEO_CODING_CODECS_TEST_ENCODED_VIDEO_FRAME_PRODUCER_H_ #endif // MODULES_VIDEO_CODING_CODECS_TEST_ENCODED_VIDEO_FRAME_PRODUCER_H_

View file

@ -1180,6 +1180,8 @@ int LibvpxVp8Encoder::GetEncodedPartitions(const VideoFrame& input_image,
} }
} }
encoded_images_[encoder_idx].SetTimestamp(input_image.timestamp()); encoded_images_[encoder_idx].SetTimestamp(input_image.timestamp());
encoded_images_[encoder_idx].SetCaptureTimeIdentifier(
input_image.capture_time_identifier());
encoded_images_[encoder_idx].SetColorSpace(input_image.color_space()); encoded_images_[encoder_idx].SetColorSpace(input_image.color_space());
encoded_images_[encoder_idx].SetRetransmissionAllowed( encoded_images_[encoder_idx].SetRetransmissionAllowed(
retransmission_allowed); retransmission_allowed);

View file

@ -249,10 +249,12 @@ TEST_F(TestVp8Impl, Configure) {
} }
TEST_F(TestVp8Impl, OnEncodedImageReportsInfo) { TEST_F(TestVp8Impl, OnEncodedImageReportsInfo) {
constexpr Timestamp kCaptureTimeIdentifier = Timestamp::Micros(1000);
VideoFrame input_frame = NextInputFrame(); VideoFrame input_frame = NextInputFrame();
input_frame.set_timestamp(kInitialTimestampRtp); input_frame.set_timestamp(kInitialTimestampRtp);
input_frame.set_timestamp_us(kInitialTimestampMs * input_frame.set_timestamp_us(kInitialTimestampMs *
rtc::kNumMicrosecsPerMillisec); rtc::kNumMicrosecsPerMillisec);
input_frame.set_capture_time_identifier(kCaptureTimeIdentifier);
EncodedImage encoded_frame; EncodedImage encoded_frame;
CodecSpecificInfo codec_specific_info; CodecSpecificInfo codec_specific_info;
EncodeAndWaitForFrame(input_frame, &encoded_frame, &codec_specific_info); EncodeAndWaitForFrame(input_frame, &encoded_frame, &codec_specific_info);
@ -260,6 +262,9 @@ TEST_F(TestVp8Impl, OnEncodedImageReportsInfo) {
EXPECT_EQ(kInitialTimestampRtp, encoded_frame.Timestamp()); EXPECT_EQ(kInitialTimestampRtp, encoded_frame.Timestamp());
EXPECT_EQ(kWidth, static_cast<int>(encoded_frame._encodedWidth)); EXPECT_EQ(kWidth, static_cast<int>(encoded_frame._encodedWidth));
EXPECT_EQ(kHeight, static_cast<int>(encoded_frame._encodedHeight)); EXPECT_EQ(kHeight, static_cast<int>(encoded_frame._encodedHeight));
ASSERT_TRUE(encoded_frame.CaptureTimeIdentifier().has_value());
EXPECT_EQ(kCaptureTimeIdentifier.us(),
encoded_frame.CaptureTimeIdentifier()->us());
} }
TEST_F(TestVp8Impl, TEST_F(TestVp8Impl,

View file

@ -1754,6 +1754,8 @@ void LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_.size()); TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_.size());
encoded_image_.SetTimestamp(input_image_->timestamp()); encoded_image_.SetTimestamp(input_image_->timestamp());
encoded_image_.SetCaptureTimeIdentifier(
input_image_->capture_time_identifier());
encoded_image_.SetColorSpace(input_image_->color_space()); encoded_image_.SetColorSpace(input_image_->color_space());
encoded_image_._encodedHeight = encoded_image_._encodedHeight =
pkt->data.frame.height[layer_id.spatial_layer_id]; pkt->data.frame.height[layer_id.spatial_layer_id];

View file

@ -218,6 +218,19 @@ TEST_P(TestVp9ImplForPixelFormat, DecodedQpEqualsEncodedQp) {
EXPECT_EQ(encoded_frame.qp_, *decoded_qp); EXPECT_EQ(encoded_frame.qp_, *decoded_qp);
} }
TEST_P(TestVp9ImplForPixelFormat, CheckCaptureTimeID) {
constexpr Timestamp kCaptureTimeIdentifier = Timestamp::Micros(1000);
VideoFrame input_frame = NextInputFrame();
input_frame.set_capture_time_identifier(kCaptureTimeIdentifier);
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr));
EncodedImage encoded_frame;
CodecSpecificInfo codec_specific_info;
ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
ASSERT_TRUE(encoded_frame.CaptureTimeIdentifier().has_value());
EXPECT_EQ(kCaptureTimeIdentifier.us(),
encoded_frame.CaptureTimeIdentifier()->us());
}
TEST_F(TestVp9Impl, SwitchInputPixelFormatsWithoutReconfigure) { TEST_F(TestVp9Impl, SwitchInputPixelFormatsWithoutReconfigure) {
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr)); EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
EncodedImage encoded_frame; EncodedImage encoded_frame;

View file

@ -1501,6 +1501,11 @@ void VideoStreamEncoder::OnFrame(Timestamp post_time,
incoming_frame.set_timestamp( incoming_frame.set_timestamp(
kMsToRtpTimestamp * static_cast<uint32_t>(incoming_frame.ntp_time_ms())); kMsToRtpTimestamp * static_cast<uint32_t>(incoming_frame.ntp_time_ms()));
// Identifier should remain the same for newly produced incoming frame and the
// received |video_frame|.
incoming_frame.set_capture_time_identifier(
video_frame.capture_time_identifier());
if (incoming_frame.ntp_time_ms() <= last_captured_timestamp_) { if (incoming_frame.ntp_time_ms() <= last_captured_timestamp_) {
// We don't allow the same capture time for two frames, drop this one. // We don't allow the same capture time for two frames, drop this one.
RTC_LOG(LS_WARNING) << "Same/old NTP timestamp (" RTC_LOG(LS_WARNING) << "Same/old NTP timestamp ("
@ -1962,6 +1967,8 @@ void VideoStreamEncoder::EncodeVideoFrame(const VideoFrame& video_frame,
out_frame.set_video_frame_buffer(cropped_buffer); out_frame.set_video_frame_buffer(cropped_buffer);
out_frame.set_update_rect(update_rect); out_frame.set_update_rect(update_rect);
out_frame.set_ntp_time_ms(video_frame.ntp_time_ms()); out_frame.set_ntp_time_ms(video_frame.ntp_time_ms());
out_frame.set_capture_time_identifier(
video_frame.capture_time_identifier());
// Since accumulated_update_rect_ is constructed before cropping, // Since accumulated_update_rect_ is constructed before cropping,
// we can't trust it. If any changes were pending, we invalidate whole // we can't trust it. If any changes were pending, we invalidate whole
// frame here. // frame here.