diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn index 502485e46e..798a1be2eb 100644 --- a/modules/video_coding/BUILD.gn +++ b/modules/video_coding/BUILD.gn @@ -745,9 +745,13 @@ rtc_library("webrtc_vp9_helpers") { "../../api/video:video_codec_constants", "../../api/video_codecs:video_codecs_api", "../../common_video", + "../../media:rtc_media_base", "../../rtc_base:checks", "../../rtc_base:logging", "../../rtc_base/experiments:stable_target_rate_experiment", + "svc:scalability_mode_util", + "svc:scalability_structures", + "svc:scalable_video_controller", ] absl_deps = [ "//third_party/abseil-cpp/absl/container:inlined_vector" ] } diff --git a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc index 0c02b34d1b..b1fbf977dd 100644 --- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc +++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc @@ -499,7 +499,7 @@ void LibvpxVp9Encoder::SetActiveSpatialLayers() { void LibvpxVp9Encoder::SetRates(const RateControlParameters& parameters) { if (!inited_) { - RTC_LOG(LS_WARNING) << "SetRates() calll while uninitialzied."; + RTC_LOG(LS_WARNING) << "SetRates() called while uninitialized."; return; } if (encoder_->err) { @@ -570,14 +570,32 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst, force_key_frame_ = true; pics_since_key_ = 0; - num_spatial_layers_ = inst->VP9().numberOfSpatialLayers; - RTC_DCHECK_GT(num_spatial_layers_, 0); - num_temporal_layers_ = inst->VP9().numberOfTemporalLayers; - if (num_temporal_layers_ == 0) { - num_temporal_layers_ = 1; + absl::optional scalability_mode = inst->GetScalabilityMode(); + if (scalability_mode.has_value()) { + // Use settings from `ScalabilityMode` identifier. + RTC_LOG(LS_INFO) << "Create scalability structure " + << ScalabilityModeToString(*scalability_mode); + svc_controller_ = CreateScalabilityStructure(*scalability_mode); + if (!svc_controller_) { + RTC_LOG(LS_WARNING) << "Failed to create scalability structure."; + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + ScalableVideoController::StreamLayersConfig info = + svc_controller_->StreamConfig(); + num_spatial_layers_ = info.num_spatial_layers; + num_temporal_layers_ = info.num_temporal_layers; + inter_layer_pred_ = ScalabilityModeToInterLayerPredMode(*scalability_mode); + } else { + num_spatial_layers_ = inst->VP9().numberOfSpatialLayers; + RTC_DCHECK_GT(num_spatial_layers_, 0); + num_temporal_layers_ = inst->VP9().numberOfTemporalLayers; + if (num_temporal_layers_ == 0) { + num_temporal_layers_ = 1; + } + inter_layer_pred_ = inst->VP9().interLayerPred; + svc_controller_ = CreateVp9ScalabilityStructure(*inst); } - svc_controller_ = CreateVp9ScalabilityStructure(*inst); framerate_controller_ = std::vector( num_spatial_layers_, FramerateControllerDeprecated(codec_.maxFramerate)); @@ -661,8 +679,6 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst, is_flexible_mode_ = inst->VP9().flexibleMode; - inter_layer_pred_ = inst->VP9().interLayerPred; - if (num_spatial_layers_ > 1 && codec_.mode == VideoCodecMode::kScreensharing && !is_flexible_mode_) { RTC_LOG(LS_ERROR) << "Flexible mode is required for screenshare with " diff --git a/modules/video_coding/codecs/vp9/svc_config.cc b/modules/video_coding/codecs/vp9/svc_config.cc index 92818eb4f9..77eee3dbf5 100644 --- a/modules/video_coding/codecs/vp9/svc_config.cc +++ b/modules/video_coding/codecs/vp9/svc_config.cc @@ -12,9 +12,13 @@ #include #include +#include #include +#include "media/base/video_common.h" #include "modules/video_coding/codecs/vp9/include/vp9_globals.h" +#include "modules/video_coding/svc/create_scalability_structure.h" +#include "modules/video_coding/svc/scalability_mode_util.h" #include "rtc_base/checks.h" #include "rtc_base/logging.h" @@ -29,6 +33,19 @@ const size_t kMinScreenSharingLayerBitrateKbps[] = {30, 200, 500}; const size_t kTargetScreenSharingLayerBitrateKbps[] = {150, 350, 950}; const size_t kMaxScreenSharingLayerBitrateKbps[] = {250, 500, 950}; +// Gets limited number of layers for given resolution. +size_t GetLimitedNumSpatialLayers(size_t width, size_t height) { + const bool is_landscape = width >= height; + const size_t min_width = is_landscape ? kMinVp9SpatialLayerLongSideLength + : kMinVp9SpatialLayerShortSideLength; + const size_t min_height = is_landscape ? kMinVp9SpatialLayerShortSideLength + : kMinVp9SpatialLayerLongSideLength; + const size_t num_layers_fit_horz = static_cast( + std::floor(1 + std::max(0.0f, std::log2(1.0f * width / min_width)))); + const size_t num_layers_fit_vert = static_cast( + std::floor(1 + std::max(0.0f, std::log2(1.0f * height / min_height)))); + return std::min(num_layers_fit_horz, num_layers_fit_vert); +} } // namespace std::vector ConfigureSvcScreenSharing(size_t input_width, @@ -59,27 +76,19 @@ std::vector ConfigureSvcScreenSharing(size_t input_width, return spatial_layers; } -std::vector ConfigureSvcNormalVideo(size_t input_width, - size_t input_height, - float max_framerate_fps, - size_t first_active_layer, - size_t num_spatial_layers, - size_t num_temporal_layers) { +std::vector ConfigureSvcNormalVideo( + size_t input_width, + size_t input_height, + float max_framerate_fps, + size_t first_active_layer, + size_t num_spatial_layers, + size_t num_temporal_layers, + absl::optional config) { RTC_DCHECK_LT(first_active_layer, num_spatial_layers); - std::vector spatial_layers; // Limit number of layers for given resolution. - const bool is_landscape = input_width >= input_height; - const size_t min_width = is_landscape ? kMinVp9SpatialLayerLongSideLength - : kMinVp9SpatialLayerShortSideLength; - const size_t min_height = is_landscape ? kMinVp9SpatialLayerShortSideLength - : kMinVp9SpatialLayerLongSideLength; - const size_t num_layers_fit_horz = static_cast(std::floor( - 1 + std::max(0.0f, std::log2(1.0f * input_width / min_width)))); - const size_t num_layers_fit_vert = static_cast(std::floor( - 1 + std::max(0.0f, std::log2(1.0f * input_height / min_height)))); - const size_t limited_num_spatial_layers = - std::min(num_layers_fit_horz, num_layers_fit_vert); + size_t limited_num_spatial_layers = + GetLimitedNumSpatialLayers(input_width, input_height); if (limited_num_spatial_layers < num_spatial_layers) { RTC_LOG(LS_WARNING) << "Reducing number of spatial layers from " << num_spatial_layers << " to " @@ -87,14 +96,23 @@ std::vector ConfigureSvcNormalVideo(size_t input_width, << " due to low input resolution."; num_spatial_layers = limited_num_spatial_layers; } + // First active layer must be configured. num_spatial_layers = std::max(num_spatial_layers, first_active_layer + 1); // Ensure top layer is even enough. int required_divisiblity = 1 << (num_spatial_layers - first_active_layer - 1); + if (config) { + required_divisiblity = 1; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + required_divisiblity = cricket::LeastCommonMultiple( + required_divisiblity, config->scaling_factor_den[sl_idx]); + } + } input_width = input_width - input_width % required_divisiblity; input_height = input_height - input_height % required_divisiblity; + std::vector spatial_layers; for (size_t sl_idx = first_active_layer; sl_idx < num_spatial_layers; ++sl_idx) { SpatialLayer spatial_layer = {0}; @@ -104,6 +122,13 @@ std::vector ConfigureSvcNormalVideo(size_t input_width, spatial_layer.numberOfTemporalLayers = num_temporal_layers; spatial_layer.active = true; + if (config) { + spatial_layer.width = input_width * config->scaling_factor_num[sl_idx] / + config->scaling_factor_den[sl_idx]; + spatial_layer.height = input_height * config->scaling_factor_num[sl_idx] / + config->scaling_factor_den[sl_idx]; + } + // minBitrate and maxBitrate formulas were derived from // subjective-quality data to determing bit rates below which video // quality is unacceptable and above which additional bits do not provide @@ -124,7 +149,7 @@ std::vector ConfigureSvcNormalVideo(size_t input_width, spatial_layers.push_back(spatial_layer); } - // A workaround for sitiation when single HD layer is left with minBitrate + // A workaround for situation when single HD layer is left with minBitrate // about 500kbps. This would mean that there will always be at least 500kbps // allocated to video regardless of how low is the actual BWE. // Also, boost maxBitrate for the first layer to account for lost ability to @@ -140,13 +165,58 @@ std::vector ConfigureSvcNormalVideo(size_t input_width, return spatial_layers; } -std::vector GetSvcConfig(size_t input_width, - size_t input_height, - float max_framerate_fps, - size_t first_active_layer, - size_t num_spatial_layers, - size_t num_temporal_layers, - bool is_screen_sharing) { +// Uses scalability mode to configure spatial layers. +std::vector GetVp9SvcConfig(VideoCodec& codec) { + RTC_DCHECK_EQ(codec.codecType, kVideoCodecVP9); + + absl::optional scalability_mode = codec.GetScalabilityMode(); + RTC_DCHECK(scalability_mode.has_value()); + + absl::optional info = + ScalabilityStructureConfig(*scalability_mode); + if (!info.has_value()) { + RTC_LOG(LS_WARNING) << "Failed to create structure " + << ScalabilityModeToString(*scalability_mode); + return {}; + } + + if (static_cast(GetLimitedNumSpatialLayers(codec.width, codec.height)) < + info->num_spatial_layers) { + // Layers will be reduced, do not use scalability mode for now. + // TODO(bugs.webrtc.org/11607): Use a lower scalability mode once all lower + // modes are supported. + codec.UnsetScalabilityMode(); + codec.VP9()->interLayerPred = + ScalabilityModeToInterLayerPredMode(*scalability_mode); + } + + // TODO(bugs.webrtc.org/11607): Add support for screensharing. + std::vector spatial_layers = + GetSvcConfig(codec.width, codec.height, codec.maxFramerate, + /*first_active_layer=*/0, info->num_spatial_layers, + info->num_temporal_layers, /*is_screen_sharing=*/false, + codec.GetScalabilityMode() ? info : absl::nullopt); + RTC_DCHECK(!spatial_layers.empty()); + + // Use codec bitrate limits if spatial layering is not requested. + if (info->num_spatial_layers == 1) { + spatial_layers.back().minBitrate = codec.minBitrate; + spatial_layers.back().targetBitrate = codec.maxBitrate; + spatial_layers.back().maxBitrate = codec.maxBitrate; + } + + return spatial_layers; +} + +std::vector GetSvcConfig( + size_t input_width, + size_t input_height, + float max_framerate_fps, + size_t first_active_layer, + size_t num_spatial_layers, + size_t num_temporal_layers, + bool is_screen_sharing, + absl::optional config) { RTC_DCHECK_GT(input_width, 0); RTC_DCHECK_GT(input_height, 0); RTC_DCHECK_GT(num_spatial_layers, 0); @@ -158,7 +228,7 @@ std::vector GetSvcConfig(size_t input_width, } else { return ConfigureSvcNormalVideo(input_width, input_height, max_framerate_fps, first_active_layer, num_spatial_layers, - num_temporal_layers); + num_temporal_layers, config); } } diff --git a/modules/video_coding/codecs/vp9/svc_config.h b/modules/video_coding/codecs/vp9/svc_config.h index f6b562e189..adeaf0f161 100644 --- a/modules/video_coding/codecs/vp9/svc_config.h +++ b/modules/video_coding/codecs/vp9/svc_config.h @@ -15,16 +15,24 @@ #include #include "api/video_codecs/spatial_layer.h" +#include "api/video_codecs/video_codec.h" +#include "modules/video_coding/svc/scalable_video_controller.h" namespace webrtc { -std::vector GetSvcConfig(size_t input_width, - size_t input_height, - float max_framerate_fps, - size_t first_active_layer, - size_t num_spatial_layers, - size_t num_temporal_layers, - bool is_screen_sharing); +// Uses scalability mode to configure spatial layers. +std::vector GetVp9SvcConfig(VideoCodec& video_codec); + +std::vector GetSvcConfig( + size_t input_width, + size_t input_height, + float max_framerate_fps, + size_t first_active_layer, + size_t num_spatial_layers, + size_t num_temporal_layers, + bool is_screen_sharing, + absl::optional config = + absl::nullopt); } // namespace webrtc diff --git a/modules/video_coding/codecs/vp9/svc_config_unittest.cc b/modules/video_coding/codecs/vp9/svc_config_unittest.cc index 77d75ee8bc..4de3c5b2a6 100644 --- a/modules/video_coding/codecs/vp9/svc_config_unittest.cc +++ b/modules/video_coding/codecs/vp9/svc_config_unittest.cc @@ -14,8 +14,12 @@ #include #include "modules/video_coding/codecs/vp9/include/vp9_globals.h" +#include "test/gmock.h" #include "test/gtest.h" +using ::testing::ElementsAre; +using ::testing::Field; + namespace webrtc { TEST(SvcConfig, NumSpatialLayers) { const size_t max_num_spatial_layers = 6; @@ -43,6 +47,92 @@ TEST(SvcConfig, NumSpatialLayersPortrait) { EXPECT_EQ(spatial_layers.size(), num_spatial_layers); } +TEST(SvcConfig, NumSpatialLayersWithScalabilityMode) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 960; + codec.height = 540; + codec.SetScalabilityMode(ScalabilityMode::kL3T3_KEY); + + std::vector spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135), + Field(&SpatialLayer::height, 270), + Field(&SpatialLayer::height, 540))); + EXPECT_THAT(spatial_layers, + ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 3), + Field(&SpatialLayer::numberOfTemporalLayers, 3), + Field(&SpatialLayer::numberOfTemporalLayers, 3))); + EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL3T3_KEY); +} + +TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityMode) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 480; + codec.height = 270; + codec.SetScalabilityMode(ScalabilityMode::kL3T3_KEY); + + // Scalability mode reset, configuration should be in accordance to L2T3_KEY. + std::vector spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135), + Field(&SpatialLayer::height, 270))); + EXPECT_THAT(spatial_layers, + ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 3), + Field(&SpatialLayer::numberOfTemporalLayers, 3))); + EXPECT_EQ(codec.VP9()->interLayerPred, InterLayerPredMode::kOnKeyPic); + EXPECT_EQ(codec.GetScalabilityMode(), absl::nullopt); +} + +TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityModePortrait) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 270; + codec.height = 480; + codec.SetScalabilityMode(ScalabilityMode::kL3T1); + + // Scalability mode reset, configuration should be in accordance to L2T1. + std::vector spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 135), + Field(&SpatialLayer::width, 270))); + EXPECT_THAT(spatial_layers, + ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1), + Field(&SpatialLayer::numberOfTemporalLayers, 1))); + EXPECT_EQ(codec.VP9()->interLayerPred, InterLayerPredMode::kOn); + EXPECT_EQ(codec.GetScalabilityMode(), absl::nullopt); +} + +TEST(SvcConfig, NumSpatialLayersWithScalabilityModeResolutionRatio1_5) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 270; + codec.height = 480; + codec.SetScalabilityMode(ScalabilityMode::kL2T1h); // 1.5:1 + + std::vector spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 180), + Field(&SpatialLayer::width, 270))); + EXPECT_THAT(spatial_layers, + ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1), + Field(&SpatialLayer::numberOfTemporalLayers, 1))); + EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL2T1h); +} + +TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityModeResolutionRatio1_5) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 320; + codec.height = 180; + codec.SetScalabilityMode(ScalabilityMode::kL2T1h); // 1.5:1 + + // Scalability mode reset, configuration should be in accordance to L1T1. + std::vector spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 320))); + EXPECT_THAT(spatial_layers, + ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1))); + EXPECT_EQ(codec.VP9()->interLayerPred, InterLayerPredMode::kOn); + EXPECT_EQ(codec.GetScalabilityMode(), absl::nullopt); +} + TEST(SvcConfig, AlwaysSendsAtLeastOneLayer) { const size_t max_num_spatial_layers = 6; const size_t first_active_layer = 5; @@ -91,6 +181,44 @@ TEST(SvcConfig, EnforcesMinimalRequiredParity) { EXPECT_EQ(spatial_layers.back().width, kOddSize); } +TEST(SvcConfig, EnforcesMinimalRequiredParityWithScalabilityMode) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 1023; + codec.height = 1023; + codec.SetScalabilityMode(ScalabilityMode::kL3T1); + + std::vector spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, // Divisiblity by 4 required. + ElementsAre(Field(&SpatialLayer::width, 255), + Field(&SpatialLayer::width, 510), + Field(&SpatialLayer::width, 1020))); + + codec.SetScalabilityMode(ScalabilityMode::kL2T1); + spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, // Divisiblity by 2 required. + ElementsAre(Field(&SpatialLayer::width, 511), + Field(&SpatialLayer::width, 1022))); + + codec.SetScalabilityMode(ScalabilityMode::kL1T1); + spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, // Divisiblity by 1 required. + ElementsAre(Field(&SpatialLayer::width, 1023))); +} + +TEST(SvcConfig, EnforcesMinimalRequiredParityWithScalabilityModeResRatio1_5) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 1280; + codec.height = 1280; + codec.SetScalabilityMode(ScalabilityMode::kL2T1h); // 1.5:1 + + std::vector spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, // Divisiblity by 3 required. + ElementsAre(Field(&SpatialLayer::width, 852), + Field(&SpatialLayer::width, 1278))); +} + TEST(SvcConfig, SkipsInactiveLayers) { const size_t num_spatial_layers = 4; const size_t first_active_layer = 2; @@ -121,6 +249,25 @@ TEST(SvcConfig, BitrateThresholds) { } } +TEST(SvcConfig, BitrateThresholdsWithScalabilityMode) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.width = 960; + codec.height = 540; + codec.SetScalabilityMode(ScalabilityMode::kS3T3); + + std::vector spatial_layers = GetVp9SvcConfig(codec); + EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135), + Field(&SpatialLayer::height, 270), + Field(&SpatialLayer::height, 540))); + + for (const SpatialLayer& layer : spatial_layers) { + EXPECT_LE(layer.minBitrate, layer.maxBitrate); + EXPECT_LE(layer.minBitrate, layer.targetBitrate); + EXPECT_LE(layer.targetBitrate, layer.maxBitrate); + } +} + TEST(SvcConfig, ScreenSharing) { std::vector spatial_layers = GetSvcConfig(1920, 1080, 30, 1, 3, 3, true); diff --git a/modules/video_coding/svc/BUILD.gn b/modules/video_coding/svc/BUILD.gn index f68001ad72..d82f3164a5 100644 --- a/modules/video_coding/svc/BUILD.gn +++ b/modules/video_coding/svc/BUILD.gn @@ -15,6 +15,7 @@ rtc_source_set("scalability_mode_util") { ] deps = [ "../../../api/video_codecs:scalability_mode", + "../../../api/video_codecs:video_codecs_api", "../../../rtc_base:checks", ] absl_deps = [ diff --git a/modules/video_coding/svc/scalability_mode_util.cc b/modules/video_coding/svc/scalability_mode_util.cc index 1cbdeb6cf2..d0a56afebc 100644 --- a/modules/video_coding/svc/scalability_mode_util.cc +++ b/modules/video_coding/svc/scalability_mode_util.cc @@ -139,6 +139,52 @@ absl::string_view ScalabilityModeToString(ScalabilityMode scalability_mode) { RTC_CHECK_NOTREACHED(); } +InterLayerPredMode ScalabilityModeToInterLayerPredMode( + ScalabilityMode scalability_mode) { + switch (scalability_mode) { + case ScalabilityMode::kL1T1: + case ScalabilityMode::kL1T2: + case ScalabilityMode::kL1T2h: + case ScalabilityMode::kL1T3: + case ScalabilityMode::kL1T3h: + case ScalabilityMode::kL2T1: + case ScalabilityMode::kL2T1h: + return InterLayerPredMode::kOn; + case ScalabilityMode::kL2T1_KEY: + return InterLayerPredMode::kOnKeyPic; + case ScalabilityMode::kL2T2: + case ScalabilityMode::kL2T2h: + return InterLayerPredMode::kOn; + case ScalabilityMode::kL2T2_KEY: + case ScalabilityMode::kL2T2_KEY_SHIFT: + return InterLayerPredMode::kOnKeyPic; + case ScalabilityMode::kL2T3: + case ScalabilityMode::kL2T3h: + return InterLayerPredMode::kOn; + case ScalabilityMode::kL2T3_KEY: + return InterLayerPredMode::kOnKeyPic; + case ScalabilityMode::kL3T1: + case ScalabilityMode::kL3T1h: + return InterLayerPredMode::kOn; + case ScalabilityMode::kL3T1_KEY: + return InterLayerPredMode::kOnKeyPic; + case ScalabilityMode::kL3T2: + case ScalabilityMode::kL3T2h: + return InterLayerPredMode::kOn; + case ScalabilityMode::kL3T2_KEY: + return InterLayerPredMode::kOnKeyPic; + case ScalabilityMode::kL3T3: + case ScalabilityMode::kL3T3h: + return InterLayerPredMode::kOn; + case ScalabilityMode::kL3T3_KEY: + return InterLayerPredMode::kOnKeyPic; + case ScalabilityMode::kS2T1: + case ScalabilityMode::kS3T3: + return InterLayerPredMode::kOff; + } + RTC_CHECK_NOTREACHED(); +} + int ScalabilityModeToNumSpatialLayers(ScalabilityMode scalability_mode) { switch (scalability_mode) { case ScalabilityMode::kL1T1: diff --git a/modules/video_coding/svc/scalability_mode_util.h b/modules/video_coding/svc/scalability_mode_util.h index faff4cf500..c543c0c15a 100644 --- a/modules/video_coding/svc/scalability_mode_util.h +++ b/modules/video_coding/svc/scalability_mode_util.h @@ -14,6 +14,7 @@ #include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "api/video_codecs/scalability_mode.h" +#include "api/video_codecs/video_codec.h" namespace webrtc { @@ -22,6 +23,9 @@ absl::optional ScalabilityModeFromString( absl::string_view ScalabilityModeToString(ScalabilityMode scalability_mode); +InterLayerPredMode ScalabilityModeToInterLayerPredMode( + ScalabilityMode scalability_mode); + int ScalabilityModeToNumSpatialLayers(ScalabilityMode scalability_mode); int ScalabilityModeToNumTemporalLayers(ScalabilityMode scalability_mode); diff --git a/modules/video_coding/video_codec_initializer.cc b/modules/video_coding/video_codec_initializer.cc index 03f7ffec37..99eb67cb9b 100644 --- a/modules/video_coding/video_codec_initializer.cc +++ b/modules/video_coding/video_codec_initializer.cc @@ -231,6 +231,11 @@ VideoCodec VideoCodecInitializer::VideoEncoderConfigToVideoCodec( if (!config.spatial_layers.empty()) { // Layering is set explicitly. spatial_layers = config.spatial_layers; + } else if (scalability_mode.has_value()) { + // Layering is set via scalability mode. + spatial_layers = GetVp9SvcConfig(video_codec); + if (spatial_layers.empty()) + break; } else { size_t first_active_layer = 0; for (size_t spatial_idx = 0; diff --git a/test/encoder_settings.cc b/test/encoder_settings.cc index c8251883fd..09cefc8d77 100644 --- a/test/encoder_settings.cc +++ b/test/encoder_settings.cc @@ -88,6 +88,7 @@ std::vector CreateVideoStreams( stream_settings[i].width = width / stream.scale_resolution_down_by; stream_settings[i].height = height / stream.scale_resolution_down_by; } + stream_settings[i].scalability_mode = stream.scalability_mode; stream_settings[i].target_bitrate_bps = target_bitrate_bps; stream_settings[i].max_bitrate_bps = max_bitrate_bps; stream_settings[i].active = diff --git a/video/BUILD.gn b/video/BUILD.gn index 71f4fc9742..980e5b5b54 100644 --- a/video/BUILD.gn +++ b/video/BUILD.gn @@ -926,6 +926,8 @@ if (rtc_include_tests) { "../modules/video_coding:webrtc_vp9_helpers", "../modules/video_coding/codecs/av1:libaom_av1_encoder_if_supported", "../modules/video_coding/svc:scalability_mode_util", + "../modules/video_coding/svc:scalability_structures", + "../modules/video_coding/svc:scalable_video_controller", "../rtc_base", "../rtc_base:byte_buffer", "../rtc_base:checks", diff --git a/video/video_send_stream_tests.cc b/video/video_send_stream_tests.cc index c364d0bda2..02bff7d3d0 100644 --- a/video/video_send_stream_tests.cc +++ b/video/video_send_stream_tests.cc @@ -39,7 +39,9 @@ #include "modules/video_coding/codecs/interface/common_constants.h" #include "modules/video_coding/codecs/vp8/include/vp8.h" #include "modules/video_coding/codecs/vp9/include/vp9.h" +#include "modules/video_coding/svc/create_scalability_structure.h" #include "modules/video_coding/svc/scalability_mode_util.h" +#include "modules/video_coding/svc/scalable_video_controller.h" #include "rtc_base/checks.h" #include "rtc_base/event.h" #include "rtc_base/experiments/alr_experiment.h" @@ -2976,14 +2978,11 @@ TEST_F(VideoSendStreamTest, ReportsSentResolution) { #if defined(RTC_ENABLE_VP9) class Vp9HeaderObserver : public test::SendTest { public: - Vp9HeaderObserver() + explicit Vp9HeaderObserver(const Vp9TestParams& params) : SendTest(VideoSendStreamTest::kLongTimeoutMs), encoder_factory_([]() { return VP9Encoder::Create(); }), - vp9_settings_(VideoEncoder::GetDefaultVp9Settings()), - packets_sent_(0), - frames_sent_(0), - expected_width_(0), - expected_height_(0) {} + params_(params), + vp9_settings_(VideoEncoder::GetDefaultVp9Settings()) {} virtual void ModifyVideoConfigsHook( VideoSendStream::Config* send_config, @@ -3008,8 +3007,6 @@ class Vp9HeaderObserver : public test::SendTest { vp9_settings_); EXPECT_EQ(1u, encoder_config->number_of_streams); EXPECT_EQ(1u, encoder_config->simulcast_layers.size()); - encoder_config->simulcast_layers[0].num_temporal_layers = - vp9_settings_.numberOfTemporalLayers; encoder_config_ = encoder_config->Copy(); } @@ -3185,17 +3182,17 @@ class Vp9HeaderObserver : public test::SendTest { EXPECT_NE(kNoPictureId, vp9.picture_id); // I:1 EXPECT_EQ(vp9_settings_.flexibleMode, vp9.flexible_mode); // F - if (vp9_settings_.numberOfSpatialLayers > 1) { - EXPECT_LT(vp9.spatial_idx, vp9_settings_.numberOfSpatialLayers); - } else if (vp9_settings_.numberOfTemporalLayers > 1) { + if (params_.num_spatial_layers > 1) { + EXPECT_LT(vp9.spatial_idx, params_.num_spatial_layers); + } else if (params_.num_temporal_layers > 1) { EXPECT_EQ(vp9.spatial_idx, 0); } else { EXPECT_EQ(vp9.spatial_idx, kNoSpatialIdx); } - if (vp9_settings_.numberOfTemporalLayers > 1) { - EXPECT_LT(vp9.temporal_idx, vp9_settings_.numberOfTemporalLayers); - } else if (vp9_settings_.numberOfSpatialLayers > 1) { + if (params_.num_temporal_layers > 1) { + EXPECT_LT(vp9.temporal_idx, params_.num_temporal_layers); + } else if (params_.num_spatial_layers > 1) { EXPECT_EQ(vp9.temporal_idx, 0); } else { EXPECT_EQ(vp9.temporal_idx, kNoTemporalIdx); @@ -3234,16 +3231,24 @@ class Vp9HeaderObserver : public test::SendTest { // +-+-+-+-+-+-+-+-+ void VerifySsData(const RTPVideoHeaderVP9& vp9) const { EXPECT_TRUE(vp9.ss_data_available); // V - EXPECT_EQ(vp9_settings_.numberOfSpatialLayers, // N_S + 1 + EXPECT_EQ(params_.num_spatial_layers, // N_S + 1 vp9.num_spatial_layers); EXPECT_TRUE(vp9.spatial_layer_resolution_present); // Y:1 - int expected_width = expected_width_; - int expected_height = expected_height_; + + absl::optional info; + absl::optional scalability_mode = + ScalabilityModeFromString(params_.scalability_mode); + if (scalability_mode) { + info = ScalabilityStructureConfig(*scalability_mode); + } + double default_ratio = 1.0; for (int i = static_cast(vp9.num_spatial_layers) - 1; i >= 0; --i) { - EXPECT_EQ(expected_width, vp9.width[i]); // WIDTH - EXPECT_EQ(expected_height, vp9.height[i]); // HEIGHT - expected_width /= 2; - expected_height /= 2; + double ratio = info ? (static_cast(info->scaling_factor_num[i]) / + info->scaling_factor_den[i]) + : default_ratio; + EXPECT_EQ(expected_width_ * ratio, vp9.width[i]); // WIDTH + EXPECT_EQ(expected_height_ * ratio, vp9.height[i]); // HEIGHT + default_ratio /= 2.0; } } @@ -3281,17 +3286,18 @@ class Vp9HeaderObserver : public test::SendTest { } test::FunctionVideoEncoderFactory encoder_factory_; + const Vp9TestParams params_; VideoCodecVP9 vp9_settings_; webrtc::VideoEncoderConfig encoder_config_; bool last_packet_marker_ = false; uint16_t last_packet_sequence_number_ = 0; uint32_t last_packet_timestamp_ = 0; RTPVideoHeaderVP9 last_vp9_; - size_t packets_sent_; + size_t packets_sent_ = 0; Mutex mutex_; - size_t frames_sent_; - int expected_width_; - int expected_height_; + size_t frames_sent_ = 0; + int expected_width_ = 0; + int expected_height_ = 0; }; class Vp9Test : public VideoSendStreamTest, @@ -3319,13 +3325,27 @@ INSTANTIATE_TEST_SUITE_P( {"L2T1_KEY", 2, 1, InterLayerPredMode::kOnKeyPic}, {"L2T2", 2, 2, InterLayerPredMode::kOn}, {"L2T2_KEY", 2, 2, InterLayerPredMode::kOnKeyPic}, - {"L2T3", 2, 3, InterLayerPredMode::kOn}, {"L2T3_KEY", 2, 3, InterLayerPredMode::kOnKeyPic}, {"L3T1", 3, 1, InterLayerPredMode::kOn}, {"L3T3", 3, 3, InterLayerPredMode::kOn}, {"L3T3_KEY", 3, 3, InterLayerPredMode::kOnKeyPic}, {"S2T1", 2, 1, InterLayerPredMode::kOff}, {"S3T3", 3, 3, InterLayerPredMode::kOff}}), + ::testing::Values(false, true)), // use_scalability_mode_identifier + [](const ::testing::TestParamInfo& info) { + rtc::StringBuilder sb; + sb << std::get<0>(info.param).scalability_mode << "_" + << (std::get<1>(info.param) ? "WithIdentifier" : "WithoutIdentifier"); + return sb.str(); + }); + +INSTANTIATE_TEST_SUITE_P( + ScalabilityModeOff, + Vp9Test, + ::testing::Combine( + ::testing::ValuesIn( + {{"L2T3", 2, 3, InterLayerPredMode::kOn}, + {"S2T3", 2, 3, InterLayerPredMode::kOff}}), ::testing::Values(false)), // use_scalability_mode_identifier [](const ::testing::TestParamInfo& info) { rtc::StringBuilder sb; @@ -3334,6 +3354,20 @@ INSTANTIATE_TEST_SUITE_P( return sb.str(); }); +INSTANTIATE_TEST_SUITE_P( + ScalabilityModeOn, + Vp9Test, + ::testing::Combine( + ::testing::ValuesIn({{"L2T1h", 2, 1, + InterLayerPredMode::kOn}}), + ::testing::Values(true)), // use_scalability_mode_identifier + [](const ::testing::TestParamInfo& info) { + rtc::StringBuilder sb; + sb << std::get<0>(info.param).scalability_mode << "_" + << (std::get<1>(info.param) ? "WithIdentifier" : "WithoutIdentifier"); + return sb.str(); + }); + TEST_P(Vp9Test, NonFlexMode) { TestVp9NonFlexMode(params_, use_scalability_mode_identifier_); } @@ -3353,7 +3387,7 @@ void VideoSendStreamTest::TestVp9NonFlexMode( public: NonFlexibleMode(const Vp9TestParams& params, bool use_scalability_mode_identifier) - : params_(params), + : Vp9HeaderObserver(params), use_scalability_mode_identifier_(use_scalability_mode_identifier), l_field_(params.num_temporal_layers > 1 || params.num_spatial_layers > 1) {} @@ -3372,9 +3406,6 @@ void VideoSendStreamTest::TestVp9NonFlexMode( } encoder_config->max_bitrate_bps = bitrate_bps * 2; - EXPECT_EQ(1u, encoder_config->number_of_streams); - EXPECT_EQ(1u, encoder_config->simulcast_layers.size()); - encoder_config->frame_drop_enabled = false; vp9_settings_.flexibleMode = false; @@ -3385,8 +3416,10 @@ void VideoSendStreamTest::TestVp9NonFlexMode( vp9_settings_.numberOfSpatialLayers = params_.num_spatial_layers; vp9_settings_.interLayerPred = params_.inter_layer_pred; } else { - encoder_config->simulcast_layers[0].scalability_mode = + absl::optional mode = ScalabilityModeFromString(params_.scalability_mode); + encoder_config->simulcast_layers[0].scalability_mode = mode; + EXPECT_TRUE(mode.has_value()); } } @@ -3442,7 +3475,6 @@ void VideoSendStreamTest::TestVp9NonFlexMode( if (frames_sent_ > kNumFramesToSend) observation_complete_.Set(); } - const Vp9TestParams params_; const bool use_scalability_mode_identifier_; const bool l_field_; @@ -3463,16 +3495,20 @@ TEST_F(VideoSendStreamTest, Vp9NonFlexModeSmallResolution) { static const int kWidth = 4; static const int kHeight = 4; class NonFlexibleModeResolution : public Vp9HeaderObserver { + public: + explicit NonFlexibleModeResolution(const Vp9TestParams& params) + : Vp9HeaderObserver(params) {} + + private: void ModifyVideoConfigsHook( VideoSendStream::Config* send_config, std::vector* receive_configs, VideoEncoderConfig* encoder_config) override { encoder_config->codec_type = kVideoCodecVP9; vp9_settings_.flexibleMode = false; - vp9_settings_.numberOfTemporalLayers = 1; - vp9_settings_.numberOfSpatialLayers = 1; - - EXPECT_EQ(1u, encoder_config->number_of_streams); + vp9_settings_.numberOfTemporalLayers = params_.num_temporal_layers; + vp9_settings_.numberOfSpatialLayers = params_.num_spatial_layers; + vp9_settings_.interLayerPred = params_.inter_layer_pred; } void InspectHeader(const RTPVideoHeaderVP9& vp9_header) override { @@ -3488,7 +3524,10 @@ TEST_F(VideoSendStreamTest, Vp9NonFlexModeSmallResolution) { *width = kWidth; *height = kHeight; } - } test; + }; + + Vp9TestParams params{"L1T1", 1, 1, InterLayerPredMode::kOn}; + NonFlexibleModeResolution test(params); RunBaseTest(&test); } @@ -3504,6 +3543,11 @@ TEST_F(VideoSendStreamTest, Vp9NonFlexModeSmallResolution) { #endif TEST_F(VideoSendStreamTest, MAYBE_Vp9FlexModeRefCount) { class FlexibleMode : public Vp9HeaderObserver { + public: + explicit FlexibleMode(const Vp9TestParams& params) + : Vp9HeaderObserver(params) {} + + private: void ModifyVideoConfigsHook( VideoSendStream::Config* send_config, std::vector* receive_configs, @@ -3511,8 +3555,9 @@ TEST_F(VideoSendStreamTest, MAYBE_Vp9FlexModeRefCount) { encoder_config->codec_type = kVideoCodecVP9; encoder_config->content_type = VideoEncoderConfig::ContentType::kScreen; vp9_settings_.flexibleMode = true; - vp9_settings_.numberOfTemporalLayers = 1; - vp9_settings_.numberOfSpatialLayers = 2; + vp9_settings_.numberOfTemporalLayers = params_.num_temporal_layers; + vp9_settings_.numberOfSpatialLayers = params_.num_spatial_layers; + vp9_settings_.interLayerPred = params_.inter_layer_pred; } void InspectHeader(const RTPVideoHeaderVP9& vp9_header) override { @@ -3523,7 +3568,10 @@ TEST_F(VideoSendStreamTest, MAYBE_Vp9FlexModeRefCount) { observation_complete_.Set(); } } - } test; + }; + + Vp9TestParams params{"L2T1", 2, 1, InterLayerPredMode::kOn}; + FlexibleMode test(params); RunBaseTest(&test); }