generateKeyframe: pass frame_types in bypass mode

Passes frame_types to the underlying encoder in bypass mode. For libvpx this has no effect, for H264 this changes the behavior to allow generating keyframes on a per-layer basis. BUG=chromium:1354101 Change-Id: I26fc22d9e2ec4681a57ce591e9eafd0b1ec962b0 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/285083 Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org> Reviewed-by: Harald Alvestrand <hta@webrtc.org> Commit-Queue: Philipp Hancke <phancke@microsoft.com> Cr-Commit-Position: refs/heads/main@{#38821}
2025-05-13 05:40:42 +01:00 · 2022-11-30 18:03:41 +01:00 · 2022-11-30 18:03:41 +01:00 · 279b4b7d4f
commit 279b4b7d4f
parent 898403b0c9
8 changed files with 111 additions and 40 deletions
--- a/api/test/simulcast_test_fixture.h
+++ b/api/test/simulcast_test_fixture.h
@ -19,6 +19,7 @@ class SimulcastTestFixture {
  virtual ~SimulcastTestFixture() = default;

  virtual void TestKeyFrameRequestsOnAllStreams() = 0;
+  virtual void TestKeyFrameRequestsOnSpecificStreams() = 0;
  virtual void TestPaddingAllStreams() = 0;
  virtual void TestPaddingTwoStreams() = 0;
  virtual void TestPaddingTwoStreamsOneMaxedOut() = 0;
--- a/media/engine/simulcast_encoder_adapter.cc
+++ b/media/engine/simulcast_encoder_adapter.cc
@ -466,7 +466,7 @@ int SimulcastEncoderAdapter::Encode(
    if (layer.is_keyframe_needed()) {
      // This is legacy behavior, generating a keyframe on all layers
      // when generating one for a layer that became active for the first time
-      // or after being disabled
+      // or after being disabled.
      is_keyframe_needed = true;
      break;
    }
@ -491,8 +491,11 @@ int SimulcastEncoderAdapter::Encode(
    // frame types for all streams should be passed to the encoder unchanged.
    // Otherwise a single per-encoder frame type is passed.
    std::vector<VideoFrameType> stream_frame_types(
-        bypass_mode_ ? total_streams_count_ : 1,
+        bypass_mode_
+            ? std::max<unsigned char>(codec_.numberOfSimulcastStreams, 1)
+            : 1,
        VideoFrameType::kVideoFrameDelta);
+
    bool keyframe_requested = false;
    if (is_keyframe_needed) {
      std::fill(stream_frame_types.begin(), stream_frame_types.end(),
@ -500,16 +503,13 @@ int SimulcastEncoderAdapter::Encode(
      keyframe_requested = true;
    } else if (frame_types) {
      if (bypass_mode_) {
-        // In bypass mode, requesting a key frame on any layer triggers a
-        // key frame request on all layers.
-        for (const auto& frame_type : *frame_types) {
-          if (frame_type == VideoFrameType::kVideoFrameKey) {
-            std::fill(stream_frame_types.begin(), stream_frame_types.end(),
-                      VideoFrameType::kVideoFrameKey);
-            keyframe_requested = true;
-            break;
-          }
-        }
+        // In bypass mode, we effectively pass on frame_types.
+        RTC_DCHECK_EQ(frame_types->size(), stream_frame_types.size());
+        stream_frame_types = *frame_types;
+        keyframe_requested =
+            absl::c_any_of(*frame_types, [](const VideoFrameType frame_type) {
+              return frame_type == VideoFrameType::kVideoFrameKey;
+            });
      } else {
        size_t stream_idx = static_cast<size_t>(layer.stream_idx());
        if (frame_types->size() >= stream_idx &&
--- a/modules/video_coding/codecs/h264/h264_encoder_impl.cc
+++ b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
@ -416,26 +416,17 @@ int32_t H264EncoderImpl::Encode(
  RTC_CHECK(frame_buffer->type() == VideoFrameBuffer::Type::kI420 ||
            frame_buffer->type() == VideoFrameBuffer::Type::kI420A);

-  bool send_key_frame = false;
+  bool is_keyframe_needed = false;
  for (size_t i = 0; i < configurations_.size(); ++i) {
    if (configurations_[i].key_frame_request && configurations_[i].sending) {
-      send_key_frame = true;
+      // This is legacy behavior, generating a keyframe on all layers
+      // when generating one for a layer that became active for the first time
+      // or after being disabled.
+      is_keyframe_needed = true;
      break;
    }
  }

-  if (!send_key_frame && frame_types) {
-    for (size_t i = 0; i < configurations_.size(); ++i) {
-      const size_t simulcast_idx =
-          static_cast<size_t>(configurations_[i].simulcast_idx);
-      if (configurations_[i].sending && simulcast_idx < frame_types->size() &&
-          (*frame_types)[simulcast_idx] == VideoFrameType::kVideoFrameKey) {
-        send_key_frame = true;
-        break;
-      }
-    }
-  }
-
  RTC_DCHECK_EQ(configurations_[0].width, frame_buffer->width());
  RTC_DCHECK_EQ(configurations_[0].height, frame_buffer->height());

@ -480,12 +471,20 @@ int32_t H264EncoderImpl::Encode(
    if (!configurations_[i].sending) {
      continue;
    }
-    if (frame_types != nullptr) {
+    if (frame_types != nullptr && i < frame_types->size()) {
      // Skip frame?
      if ((*frame_types)[i] == VideoFrameType::kEmptyFrame) {
        continue;
      }
    }
+    // Send a key frame either when this layer is configured to require one
+    // or we have explicitly been asked to.
+    const size_t simulcast_idx =
+        static_cast<size_t>(configurations_[i].simulcast_idx);
+    bool send_key_frame =
+        is_keyframe_needed ||
+        (frame_types && simulcast_idx < frame_types->size() &&
+         (*frame_types)[simulcast_idx] == VideoFrameType::kVideoFrameKey);
    if (send_key_frame) {
      // API doc says ForceIntraFrame(false) does nothing, but calling this
      // function forces a key frame regardless of the `bIDR` argument's value.
--- a/modules/video_coding/codecs/h264/h264_simulcast_unittest.cc
+++ b/modules/video_coding/codecs/h264/h264_simulcast_unittest.cc
@ -35,8 +35,12 @@ std::unique_ptr<SimulcastTestFixture> CreateSpecificSimulcastTestFixture() {
 }  // namespace

 TEST(TestH264Simulcast, TestKeyFrameRequestsOnAllStreams) {
+  GTEST_SKIP() << "Not applicable to H264.";
+}
+
+TEST(TestH264Simulcast, TestKeyFrameRequestsOnSpecificStreams) {
  auto fixture = CreateSpecificSimulcastTestFixture();
-  fixture->TestKeyFrameRequestsOnAllStreams();
+  fixture->TestKeyFrameRequestsOnSpecificStreams();
 }

 TEST(TestH264Simulcast, TestPaddingAllStreams) {
--- a/modules/video_coding/codecs/test/videoprocessor.cc
+++ b/modules/video_coding/codecs/test/videoprocessor.cc
@ -307,8 +307,10 @@ void VideoProcessor::ProcessFrame() {
  // Encode.
  const std::vector<VideoFrameType> frame_types =
      (frame_number == 0)
-          ? std::vector<VideoFrameType>{VideoFrameType::kVideoFrameKey}
-          : std::vector<VideoFrameType>{VideoFrameType::kVideoFrameDelta};
+          ? std::vector<VideoFrameType>(num_simulcast_or_spatial_layers_,
+                                        VideoFrameType::kVideoFrameKey)
+          : std::vector<VideoFrameType>(num_simulcast_or_spatial_layers_,
+                                        VideoFrameType::kVideoFrameDelta);
  const int encode_return_code = encoder_->Encode(input_frame, &frame_types);
  for (size_t i = 0; i < num_simulcast_or_spatial_layers_; ++i) {
    FrameStatistics* frame_stat = stats_->GetFrame(frame_number, i);
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_simulcast_test.cc
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_simulcast_test.cc
@ -39,6 +39,10 @@ TEST(LibvpxVp8SimulcastTest, TestKeyFrameRequestsOnAllStreams) {
  fixture->TestKeyFrameRequestsOnAllStreams();
 }

+TEST(LibvpxVp8SimulcastTest, TestKeyFrameRequestsOnSpecificStreams) {
+  GTEST_SKIP() << "Not applicable to VP8.";
+}
+
 TEST(LibvpxVp8SimulcastTest, TestPaddingAllStreams) {
  auto fixture = CreateSpecificSimulcastTestFixture();
  fixture->TestPaddingAllStreams();
--- a/modules/video_coding/utility/simulcast_test_fixture_impl.cc
+++ b/modules/video_coding/utility/simulcast_test_fixture_impl.cc
@ -44,6 +44,7 @@ const int kMaxBitrates[kNumberOfSimulcastStreams] = {150, 600, 1200};
 const int kMinBitrates[kNumberOfSimulcastStreams] = {50, 150, 600};
 const int kTargetBitrates[kNumberOfSimulcastStreams] = {100, 450, 1000};
 const float kMaxFramerates[kNumberOfSimulcastStreams] = {30, 30, 30};
+const int kScaleResolutionDownBy[kNumberOfSimulcastStreams] = {4, 2, 1};
 const int kDefaultTemporalLayerProfile[3] = {3, 3, 3};
 const int kNoTemporalLayerProfile[3] = {0, 0, 0};

@ -353,14 +354,10 @@ void SimulcastTestFixtureImpl::ExpectStreams(
    const std::vector<bool> expected_streams_active) {
  ASSERT_EQ(static_cast<int>(expected_streams_active.size()),
            kNumberOfSimulcastStreams);
-  if (expected_streams_active[0]) {
-    ExpectStream(frame_type, 4);
+  for (size_t i = 0; i < kNumberOfSimulcastStreams; i++) {
+    if (expected_streams_active[i]) {
+      ExpectStream(frame_type, kScaleResolutionDownBy[i]);
    }
-  if (expected_streams_active[1]) {
-    ExpectStream(frame_type, 2);
-  }
-  if (expected_streams_active[2]) {
-    ExpectStream(frame_type, 1);
  }
 }

@ -389,8 +386,8 @@ void SimulcastTestFixtureImpl::VerifyTemporalIdxAndSyncForAllSpatialLayers(
  }
 }

-// We currently expect all active streams to generate a key frame even though
-// a key frame was only requested for some of them.
+// For some codecs (VP8) expect all active streams to generate a key frame even
+// though a key frame was only requested for some of them.
 void SimulcastTestFixtureImpl::TestKeyFrameRequestsOnAllStreams() {
  SetRates(kMaxBitrates[2], 30);  // To get all three streams.
  std::vector<VideoFrameType> frame_types(kNumberOfSimulcastStreams,
@ -428,6 +425,69 @@ void SimulcastTestFixtureImpl::TestKeyFrameRequestsOnAllStreams() {
  EXPECT_EQ(0, encoder_->Encode(*input_frame_, &frame_types));
 }

+// For some codecs (H264) expect only particular active streams to generate a
+// key frame when a key frame was only requested for some of them.
+void SimulcastTestFixtureImpl::TestKeyFrameRequestsOnSpecificStreams() {
+  SetRates(kMaxBitrates[2], 30);  // To get all three streams.
+  std::vector<VideoFrameType> frame_types(kNumberOfSimulcastStreams,
+                                          VideoFrameType::kVideoFrameDelta);
+  ExpectStreams(VideoFrameType::kVideoFrameKey, kNumberOfSimulcastStreams);
+  EXPECT_EQ(0, encoder_->Encode(*input_frame_, &frame_types));
+
+  ExpectStreams(VideoFrameType::kVideoFrameDelta, kNumberOfSimulcastStreams);
+  input_frame_->set_timestamp(input_frame_->timestamp() + 3000);
+  EXPECT_EQ(0, encoder_->Encode(*input_frame_, &frame_types));
+
+  frame_types[0] = VideoFrameType::kVideoFrameKey;
+  ExpectStream(VideoFrameType::kVideoFrameKey, kScaleResolutionDownBy[0]);
+  ExpectStream(VideoFrameType::kVideoFrameDelta, kScaleResolutionDownBy[1]);
+  ExpectStream(VideoFrameType::kVideoFrameDelta, kScaleResolutionDownBy[2]);
+  input_frame_->set_timestamp(input_frame_->timestamp() + 3000);
+  EXPECT_EQ(0, encoder_->Encode(*input_frame_, &frame_types));
+
+  std::fill(frame_types.begin(), frame_types.end(),
+            VideoFrameType::kVideoFrameDelta);
+  frame_types[1] = VideoFrameType::kVideoFrameKey;
+  ExpectStream(VideoFrameType::kVideoFrameDelta, kScaleResolutionDownBy[0]);
+  ExpectStream(VideoFrameType::kVideoFrameKey, kScaleResolutionDownBy[1]);
+  ExpectStream(VideoFrameType::kVideoFrameDelta, kScaleResolutionDownBy[2]);
+  input_frame_->set_timestamp(input_frame_->timestamp() + 3000);
+  EXPECT_EQ(0, encoder_->Encode(*input_frame_, &frame_types));
+
+  std::fill(frame_types.begin(), frame_types.end(),
+            VideoFrameType::kVideoFrameDelta);
+  frame_types[2] = VideoFrameType::kVideoFrameKey;
+  ExpectStream(VideoFrameType::kVideoFrameDelta, kScaleResolutionDownBy[0]);
+  ExpectStream(VideoFrameType::kVideoFrameDelta, kScaleResolutionDownBy[1]);
+  ExpectStream(VideoFrameType::kVideoFrameKey, kScaleResolutionDownBy[2]);
+  input_frame_->set_timestamp(input_frame_->timestamp() + 3000);
+  EXPECT_EQ(0, encoder_->Encode(*input_frame_, &frame_types));
+
+  std::fill(frame_types.begin(), frame_types.end(),
+            VideoFrameType::kVideoFrameDelta);
+  frame_types[0] = VideoFrameType::kVideoFrameKey;
+  frame_types[2] = VideoFrameType::kVideoFrameKey;
+  ExpectStream(VideoFrameType::kVideoFrameKey, kScaleResolutionDownBy[0]);
+  ExpectStream(VideoFrameType::kVideoFrameDelta, kScaleResolutionDownBy[1]);
+  ExpectStream(VideoFrameType::kVideoFrameKey, kScaleResolutionDownBy[2]);
+  input_frame_->set_timestamp(input_frame_->timestamp() + 3000);
+  EXPECT_EQ(0, encoder_->Encode(*input_frame_, &frame_types));
+
+  std::fill(frame_types.begin(), frame_types.end(),
+            VideoFrameType::kVideoFrameKey);
+  ExpectStream(VideoFrameType::kVideoFrameKey, kScaleResolutionDownBy[0]);
+  ExpectStream(VideoFrameType::kVideoFrameKey, kScaleResolutionDownBy[1]);
+  ExpectStream(VideoFrameType::kVideoFrameKey, kScaleResolutionDownBy[2]);
+  input_frame_->set_timestamp(input_frame_->timestamp() + 3000);
+  EXPECT_EQ(0, encoder_->Encode(*input_frame_, &frame_types));
+
+  std::fill(frame_types.begin(), frame_types.end(),
+            VideoFrameType::kVideoFrameDelta);
+  ExpectStreams(VideoFrameType::kVideoFrameDelta, kNumberOfSimulcastStreams);
+  input_frame_->set_timestamp(input_frame_->timestamp() + 3000);
+  EXPECT_EQ(0, encoder_->Encode(*input_frame_, &frame_types));
+}
+
 void SimulcastTestFixtureImpl::TestPaddingAllStreams() {
  // We should always encode the base layer.
  SetRates(kMinBitrates[0] - 1, 30);
--- a/modules/video_coding/utility/simulcast_test_fixture_impl.h
+++ b/modules/video_coding/utility/simulcast_test_fixture_impl.h
@ -35,6 +35,7 @@ class SimulcastTestFixtureImpl final : public SimulcastTestFixture {

  // Implements SimulcastTestFixture.
  void TestKeyFrameRequestsOnAllStreams() override;
+  void TestKeyFrameRequestsOnSpecificStreams() override;
  void TestPaddingAllStreams() override;
  void TestPaddingTwoStreams() override;
  void TestPaddingTwoStreamsOneMaxedOut() override;