Introduce capture_time_identifier in webrtc::EncodedImage

This CL propagates capture_time_identifier introduced in webrtc::VideoFrame and propagates it to EncodedImage. For use cases involving EncodedTransforms, this identifier is further propagated to TransformableVideoSenderFrame. VideoEncoder::Encode function is overriden by each encoder. Each of these overriden functions needs to be changed so that they can handle this new identifier and propagate its value in the created EncodedImage. Change-Id: I5bea4c5a3fe714f1198e497a4bcb5fd059afe516 Bug: webrtc:14878 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/291800 Reviewed-by: Tony Herre <herre@google.com> Reviewed-by: Harald Alvestrand <hta@webrtc.org> Commit-Queue: Palak Agarwal <agpalak@google.com> Cr-Commit-Position: refs/heads/main@{#39374}
2025-05-13 05:40:42 +01:00 · 2023-02-22 14:46:23 +01:00 · 2023-02-22 14:46:23 +01:00 · a09f21b207
commit a09f21b207
parent a087f6f1c8
13 changed files with 117 additions and 3 deletions
--- a/api/frame_transformer_interface.h
+++ b/api/frame_transformer_interface.h
@ -36,6 +36,11 @@ class TransformableFrameInterface {
  virtual uint8_t GetPayloadType() const = 0;
  virtual uint32_t GetSsrc() const = 0;
  virtual uint32_t GetTimestamp() const = 0;
  // TODO(https://bugs.webrtc.org/14878): Change this to pure virtual after it
  // is implemented everywhere.
  virtual absl::optional<Timestamp> GetCaptureTimeIdentifier() const {
    return absl::nullopt;
  }
  enum class Direction {
    kUnknown,
--- a/api/video/encoded_image.h
+++ b/api/video/encoded_image.h
@ -98,6 +98,14 @@ class RTC_EXPORT EncodedImage {
    simulcast_index_ = simulcast_index;
  }
  const absl::optional<webrtc::Timestamp>& CaptureTimeIdentifier() const {
    return capture_time_identifier_;
  }
  void SetCaptureTimeIdentifier(
      const absl::optional<webrtc::Timestamp>& capture_time_identifier) {
    capture_time_identifier_ = capture_time_identifier;
  }
  // Encoded images can have dependencies between spatial and/or temporal
  // layers, depending on the scalability mode used by the encoder. See diagrams
  // at https://w3c.github.io/webrtc-svc/#dependencydiagrams*.
@ -217,6 +225,7 @@ class RTC_EXPORT EncodedImage {
  size_t size_ = 0;  // Size of encoded frame data.
  uint32_t timestamp_rtp_ = 0;
  absl::optional<int> simulcast_index_;
  absl::optional<webrtc::Timestamp> capture_time_identifier_;
  absl::optional<int> spatial_index_;
  absl::optional<int> temporal_index_;
  std::map<int, size_t> spatial_layer_frame_size_bytes_;
--- a/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.cc
+++ b/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.cc
@ -41,6 +41,7 @@ class TransformableVideoSenderFrame : public TransformableVideoFrameInterface {
        codec_type_(codec_type),
        timestamp_(rtp_timestamp),
        capture_time_ms_(encoded_image.capture_time_ms_),
        capture_time_identifier_(encoded_image.CaptureTimeIdentifier()),
        expected_retransmission_time_ms_(expected_retransmission_time_ms) {
    RTC_DCHECK_GE(payload_type_, 0);
    RTC_DCHECK_LE(payload_type_, 127);
@ -87,6 +88,9 @@ class TransformableVideoSenderFrame : public TransformableVideoFrameInterface {
  uint8_t GetPayloadType() const override { return payload_type_; }
  absl::optional<VideoCodecType> GetCodecType() const { return codec_type_; }
  int64_t GetCaptureTimeMs() const { return capture_time_ms_; }
  absl::optional<Timestamp> GetCaptureTimeIdentifier() const override {
    return capture_time_identifier_;
  }
  const absl::optional<int64_t>& GetExpectedRetransmissionTimeMs() const {
    return expected_retransmission_time_ms_;
@ -107,6 +111,7 @@ class TransformableVideoSenderFrame : public TransformableVideoFrameInterface {
  const absl::optional<VideoCodecType> codec_type_ = absl::nullopt;
  const uint32_t timestamp_;
  const int64_t capture_time_ms_;
  const absl::optional<Timestamp> capture_time_identifier_;
  const absl::optional<int64_t> expected_retransmission_time_ms_;
 };
 }  // namespace
--- a/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc
+++ b/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc
@ -1653,6 +1653,30 @@ TEST_F(RtpSenderVideoWithFrameTransformerTest,
                                     kDefaultExpectedRetransmissionTimeMs);
 }
 TEST_F(RtpSenderVideoWithFrameTransformerTest,
       TransformableFrameHasCorrectCaptureIdentifier) {
  auto mock_frame_transformer =
      rtc::make_ref_counted<NiceMock<MockFrameTransformer>>();
  std::unique_ptr<RTPSenderVideo> rtp_sender_video =
      CreateSenderWithFrameTransformer(mock_frame_transformer);
  auto encoded_image = CreateDefaultEncodedImage();
  encoded_image->SetCaptureTimeIdentifier(Timestamp::Millis(1));
  RTPVideoHeader video_header;
  EXPECT_CALL(*mock_frame_transformer, Transform)
      .WillOnce([&encoded_image](std::unique_ptr<TransformableFrameInterface>
                                     transformable_frame) {
        auto* frame = static_cast<TransformableVideoFrameInterface*>(
            transformable_frame.get());
        ASSERT_TRUE(frame);
        EXPECT_EQ(frame->GetCaptureTimeIdentifier(),
                  encoded_image->CaptureTimeIdentifier());
      });
  rtp_sender_video->SendEncodedImage(kPayload, kType, kTimestamp,
                                     *encoded_image, video_header,
                                     kDefaultExpectedRetransmissionTimeMs);
 }
 TEST_F(RtpSenderVideoWithFrameTransformerTest,
       OnTransformedFrameSendsVideoWhenCloned) {
  auto mock_frame_transformer =
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
@ -678,6 +678,7 @@ int32_t LibaomAv1Encoder::Encode(
                                       ? VideoFrameType::kVideoFrameKey
                                       : VideoFrameType::kVideoFrameDelta;
        encoded_image.SetTimestamp(frame.timestamp());
        encoded_image.SetCaptureTimeIdentifier(frame.capture_time_identifier());
        encoded_image.capture_time_ms_ = frame.render_time_ms();
        encoded_image.rotation_ = frame.rotation();
        encoded_image.content_type_ = VideoContentType::UNSPECIFIED;
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder_unittest.cc
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder_unittest.cc
@ -263,5 +263,37 @@ TEST(LibaomAv1EncoderTest, RtpTimestampWrap) {
              Eq(VideoFrameType::kVideoFrameDelta));
 }
 TEST(LibaomAv1EncoderTest, TestCaptureTimeId) {
  std::unique_ptr<VideoEncoder> encoder = CreateLibaomAv1Encoder();
  const Timestamp capture_time_id = Timestamp::Micros(2000);
  VideoCodec codec_settings = DefaultCodecSettings();
  codec_settings.SetScalabilityMode(ScalabilityMode::kL2T1);
  ASSERT_EQ(encoder->InitEncode(&codec_settings, DefaultEncoderSettings()),
            WEBRTC_VIDEO_CODEC_OK);
  VideoEncoder::RateControlParameters rate_parameters;
  rate_parameters.framerate_fps = 30;
  rate_parameters.bitrate.SetBitrate(/*spatial_index=*/0, /*temporal_index=*/0,
                                     300'000);
  rate_parameters.bitrate.SetBitrate(/*spatial_index=*/1, /*temporal_index=*/0,
                                     300'000);
  encoder->SetRates(rate_parameters);
  std::vector<EncodedVideoFrameProducer::EncodedFrame> encoded_frames =
      EncodedVideoFrameProducer(*encoder)
          .SetNumInputFrames(1)
          .SetCaptureTimeIdentifier(capture_time_id)
          .Encode();
  ASSERT_THAT(encoded_frames, SizeIs(2));
  ASSERT_TRUE(
      encoded_frames[0].encoded_image.CaptureTimeIdentifier().has_value());
  ASSERT_TRUE(
      encoded_frames[1].encoded_image.CaptureTimeIdentifier().has_value());
  EXPECT_EQ(encoded_frames[0].encoded_image.CaptureTimeIdentifier()->us(),
            capture_time_id.us());
  EXPECT_EQ(encoded_frames[1].encoded_image.CaptureTimeIdentifier()->us(),
            capture_time_id.us());
 }
 }  // namespace
 }  // namespace webrtc
--- a/modules/video_coding/codecs/test/encoded_video_frame_producer.cc
+++ b/modules/video_coding/codecs/test/encoded_video_frame_producer.cc
@ -62,6 +62,7 @@ EncodedVideoFrameProducer::Encode() {
        VideoFrame::Builder()
            .set_video_frame_buffer(frame_buffer_generator->NextFrame().buffer)
            .set_timestamp_rtp(rtp_timestamp_)
            .set_capture_time_identifier(capture_time_identifier_)
            .build();
    rtp_timestamp_ += rtp_tick;
    RTC_CHECK_EQ(encoder_.Encode(frame, &next_frame_type_),
--- a/modules/video_coding/codecs/test/encoded_video_frame_producer.h
+++ b/modules/video_coding/codecs/test/encoded_video_frame_producer.h
@ -49,15 +49,18 @@ class EncodedVideoFrameProducer {
  EncodedVideoFrameProducer& SetRtpTimestamp(uint32_t value);
-  // Generates input video frames and encodes them with `encoder` provided in
+  EncodedVideoFrameProducer& SetCaptureTimeIdentifier(Timestamp value);
-  // the constructor. Returns frame passed to the `OnEncodedImage` by wraping
+
-  // `EncodedImageCallback` underneath.
+  // Generates input video frames and encodes them with `encoder` provided
  // in the constructor. Returns frame passed to the `OnEncodedImage` by
  // wraping `EncodedImageCallback` underneath.
  std::vector<EncodedFrame> Encode();
 private:
  VideoEncoder& encoder_;
  uint32_t rtp_timestamp_ = 1000;
  Timestamp capture_time_identifier_ = Timestamp::Micros(1000);
  int num_input_frames_ = 1;
  int framerate_fps_ = 30;
  RenderResolution resolution_ = {320, 180};
@ -96,5 +99,10 @@ inline EncodedVideoFrameProducer& EncodedVideoFrameProducer::SetRtpTimestamp(
  return *this;
 }
 inline EncodedVideoFrameProducer&
 EncodedVideoFrameProducer::SetCaptureTimeIdentifier(Timestamp value) {
  capture_time_identifier_ = value;
  return *this;
 }
 }  // namespace webrtc
 #endif  // MODULES_VIDEO_CODING_CODECS_TEST_ENCODED_VIDEO_FRAME_PRODUCER_H_
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
@ -1180,6 +1180,8 @@ int LibvpxVp8Encoder::GetEncodedPartitions(const VideoFrame& input_image,
      }
    }
    encoded_images_[encoder_idx].SetTimestamp(input_image.timestamp());
    encoded_images_[encoder_idx].SetCaptureTimeIdentifier(
        input_image.capture_time_identifier());
    encoded_images_[encoder_idx].SetColorSpace(input_image.color_space());
    encoded_images_[encoder_idx].SetRetransmissionAllowed(
        retransmission_allowed);
--- a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
@ -249,10 +249,12 @@ TEST_F(TestVp8Impl, Configure) {
 }
 TEST_F(TestVp8Impl, OnEncodedImageReportsInfo) {
  constexpr Timestamp kCaptureTimeIdentifier = Timestamp::Micros(1000);
  VideoFrame input_frame = NextInputFrame();
  input_frame.set_timestamp(kInitialTimestampRtp);
  input_frame.set_timestamp_us(kInitialTimestampMs *
                               rtc::kNumMicrosecsPerMillisec);
  input_frame.set_capture_time_identifier(kCaptureTimeIdentifier);
  EncodedImage encoded_frame;
  CodecSpecificInfo codec_specific_info;
  EncodeAndWaitForFrame(input_frame, &encoded_frame, &codec_specific_info);
@ -260,6 +262,9 @@ TEST_F(TestVp8Impl, OnEncodedImageReportsInfo) {
  EXPECT_EQ(kInitialTimestampRtp, encoded_frame.Timestamp());
  EXPECT_EQ(kWidth, static_cast<int>(encoded_frame._encodedWidth));
  EXPECT_EQ(kHeight, static_cast<int>(encoded_frame._encodedHeight));
  ASSERT_TRUE(encoded_frame.CaptureTimeIdentifier().has_value());
  EXPECT_EQ(kCaptureTimeIdentifier.us(),
            encoded_frame.CaptureTimeIdentifier()->us());
 }
 TEST_F(TestVp8Impl,
--- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
+++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
@ -1754,6 +1754,8 @@ void LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
  TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_.size());
  encoded_image_.SetTimestamp(input_image_->timestamp());
  encoded_image_.SetCaptureTimeIdentifier(
      input_image_->capture_time_identifier());
  encoded_image_.SetColorSpace(input_image_->color_space());
  encoded_image_._encodedHeight =
      pkt->data.frame.height[layer_id.spatial_layer_id];
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@ -218,6 +218,19 @@ TEST_P(TestVp9ImplForPixelFormat, DecodedQpEqualsEncodedQp) {
  EXPECT_EQ(encoded_frame.qp_, *decoded_qp);
 }
 TEST_P(TestVp9ImplForPixelFormat, CheckCaptureTimeID) {
  constexpr Timestamp kCaptureTimeIdentifier = Timestamp::Micros(1000);
  VideoFrame input_frame = NextInputFrame();
  input_frame.set_capture_time_identifier(kCaptureTimeIdentifier);
  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr));
  EncodedImage encoded_frame;
  CodecSpecificInfo codec_specific_info;
  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
  ASSERT_TRUE(encoded_frame.CaptureTimeIdentifier().has_value());
  EXPECT_EQ(kCaptureTimeIdentifier.us(),
            encoded_frame.CaptureTimeIdentifier()->us());
 }
 TEST_F(TestVp9Impl, SwitchInputPixelFormatsWithoutReconfigure) {
  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
  EncodedImage encoded_frame;
--- a/video/video_stream_encoder.cc
+++ b/video/video_stream_encoder.cc
@ -1501,6 +1501,11 @@ void VideoStreamEncoder::OnFrame(Timestamp post_time,
  incoming_frame.set_timestamp(
      kMsToRtpTimestamp * static_cast<uint32_t>(incoming_frame.ntp_time_ms()));
  // Identifier should remain the same for newly produced incoming frame and the
  // received |video_frame|.
  incoming_frame.set_capture_time_identifier(
      video_frame.capture_time_identifier());
  if (incoming_frame.ntp_time_ms() <= last_captured_timestamp_) {
    // We don't allow the same capture time for two frames, drop this one.
    RTC_LOG(LS_WARNING) << "Same/old NTP timestamp ("
@ -1962,6 +1967,8 @@ void VideoStreamEncoder::EncodeVideoFrame(const VideoFrame& video_frame,
    out_frame.set_video_frame_buffer(cropped_buffer);
    out_frame.set_update_rect(update_rect);
    out_frame.set_ntp_time_ms(video_frame.ntp_time_ms());
    out_frame.set_capture_time_identifier(
        video_frame.capture_time_identifier());
    // Since accumulated_update_rect_ is constructed before cropping,
    // we can't trust it. If any changes were pending, we invalidate whole
    // frame here.