Add support for VP9 configuration through scalability mode.

Bug: webrtc:13960
Change-Id: Ia930647b15f624a4d10d8d335519b69ffdae6636
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/260983
Commit-Queue: Åsa Persson <asapersson@webrtc.org>
Reviewed-by: Niels Moller <nisse@webrtc.org>
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#36919}
This commit is contained in:
Asa Persson 2022-05-16 22:37:34 +02:00 committed by WebRTC LUCI CQ
parent 15a73be295
commit cde992ddad
12 changed files with 435 additions and 83 deletions

View file

@ -745,9 +745,13 @@ rtc_library("webrtc_vp9_helpers") {
"../../api/video:video_codec_constants",
"../../api/video_codecs:video_codecs_api",
"../../common_video",
"../../media:rtc_media_base",
"../../rtc_base:checks",
"../../rtc_base:logging",
"../../rtc_base/experiments:stable_target_rate_experiment",
"svc:scalability_mode_util",
"svc:scalability_structures",
"svc:scalable_video_controller",
]
absl_deps = [ "//third_party/abseil-cpp/absl/container:inlined_vector" ]
}

View file

@ -499,7 +499,7 @@ void LibvpxVp9Encoder::SetActiveSpatialLayers() {
void LibvpxVp9Encoder::SetRates(const RateControlParameters& parameters) {
if (!inited_) {
RTC_LOG(LS_WARNING) << "SetRates() calll while uninitialzied.";
RTC_LOG(LS_WARNING) << "SetRates() called while uninitialized.";
return;
}
if (encoder_->err) {
@ -570,14 +570,32 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst,
force_key_frame_ = true;
pics_since_key_ = 0;
num_spatial_layers_ = inst->VP9().numberOfSpatialLayers;
RTC_DCHECK_GT(num_spatial_layers_, 0);
num_temporal_layers_ = inst->VP9().numberOfTemporalLayers;
if (num_temporal_layers_ == 0) {
num_temporal_layers_ = 1;
absl::optional<ScalabilityMode> scalability_mode = inst->GetScalabilityMode();
if (scalability_mode.has_value()) {
// Use settings from `ScalabilityMode` identifier.
RTC_LOG(LS_INFO) << "Create scalability structure "
<< ScalabilityModeToString(*scalability_mode);
svc_controller_ = CreateScalabilityStructure(*scalability_mode);
if (!svc_controller_) {
RTC_LOG(LS_WARNING) << "Failed to create scalability structure.";
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
ScalableVideoController::StreamLayersConfig info =
svc_controller_->StreamConfig();
num_spatial_layers_ = info.num_spatial_layers;
num_temporal_layers_ = info.num_temporal_layers;
inter_layer_pred_ = ScalabilityModeToInterLayerPredMode(*scalability_mode);
} else {
num_spatial_layers_ = inst->VP9().numberOfSpatialLayers;
RTC_DCHECK_GT(num_spatial_layers_, 0);
num_temporal_layers_ = inst->VP9().numberOfTemporalLayers;
if (num_temporal_layers_ == 0) {
num_temporal_layers_ = 1;
}
inter_layer_pred_ = inst->VP9().interLayerPred;
svc_controller_ = CreateVp9ScalabilityStructure(*inst);
}
svc_controller_ = CreateVp9ScalabilityStructure(*inst);
framerate_controller_ = std::vector<FramerateControllerDeprecated>(
num_spatial_layers_, FramerateControllerDeprecated(codec_.maxFramerate));
@ -661,8 +679,6 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst,
is_flexible_mode_ = inst->VP9().flexibleMode;
inter_layer_pred_ = inst->VP9().interLayerPred;
if (num_spatial_layers_ > 1 &&
codec_.mode == VideoCodecMode::kScreensharing && !is_flexible_mode_) {
RTC_LOG(LS_ERROR) << "Flexible mode is required for screenshare with "

View file

@ -12,9 +12,13 @@
#include <algorithm>
#include <cmath>
#include <memory>
#include <vector>
#include "media/base/video_common.h"
#include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
#include "modules/video_coding/svc/create_scalability_structure.h"
#include "modules/video_coding/svc/scalability_mode_util.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
@ -29,6 +33,19 @@ const size_t kMinScreenSharingLayerBitrateKbps[] = {30, 200, 500};
const size_t kTargetScreenSharingLayerBitrateKbps[] = {150, 350, 950};
const size_t kMaxScreenSharingLayerBitrateKbps[] = {250, 500, 950};
// Gets limited number of layers for given resolution.
size_t GetLimitedNumSpatialLayers(size_t width, size_t height) {
const bool is_landscape = width >= height;
const size_t min_width = is_landscape ? kMinVp9SpatialLayerLongSideLength
: kMinVp9SpatialLayerShortSideLength;
const size_t min_height = is_landscape ? kMinVp9SpatialLayerShortSideLength
: kMinVp9SpatialLayerLongSideLength;
const size_t num_layers_fit_horz = static_cast<size_t>(
std::floor(1 + std::max(0.0f, std::log2(1.0f * width / min_width))));
const size_t num_layers_fit_vert = static_cast<size_t>(
std::floor(1 + std::max(0.0f, std::log2(1.0f * height / min_height))));
return std::min(num_layers_fit_horz, num_layers_fit_vert);
}
} // namespace
std::vector<SpatialLayer> ConfigureSvcScreenSharing(size_t input_width,
@ -59,27 +76,19 @@ std::vector<SpatialLayer> ConfigureSvcScreenSharing(size_t input_width,
return spatial_layers;
}
std::vector<SpatialLayer> ConfigureSvcNormalVideo(size_t input_width,
size_t input_height,
float max_framerate_fps,
size_t first_active_layer,
size_t num_spatial_layers,
size_t num_temporal_layers) {
std::vector<SpatialLayer> ConfigureSvcNormalVideo(
size_t input_width,
size_t input_height,
float max_framerate_fps,
size_t first_active_layer,
size_t num_spatial_layers,
size_t num_temporal_layers,
absl::optional<ScalableVideoController::StreamLayersConfig> config) {
RTC_DCHECK_LT(first_active_layer, num_spatial_layers);
std::vector<SpatialLayer> spatial_layers;
// Limit number of layers for given resolution.
const bool is_landscape = input_width >= input_height;
const size_t min_width = is_landscape ? kMinVp9SpatialLayerLongSideLength
: kMinVp9SpatialLayerShortSideLength;
const size_t min_height = is_landscape ? kMinVp9SpatialLayerShortSideLength
: kMinVp9SpatialLayerLongSideLength;
const size_t num_layers_fit_horz = static_cast<size_t>(std::floor(
1 + std::max(0.0f, std::log2(1.0f * input_width / min_width))));
const size_t num_layers_fit_vert = static_cast<size_t>(std::floor(
1 + std::max(0.0f, std::log2(1.0f * input_height / min_height))));
const size_t limited_num_spatial_layers =
std::min(num_layers_fit_horz, num_layers_fit_vert);
size_t limited_num_spatial_layers =
GetLimitedNumSpatialLayers(input_width, input_height);
if (limited_num_spatial_layers < num_spatial_layers) {
RTC_LOG(LS_WARNING) << "Reducing number of spatial layers from "
<< num_spatial_layers << " to "
@ -87,14 +96,23 @@ std::vector<SpatialLayer> ConfigureSvcNormalVideo(size_t input_width,
<< " due to low input resolution.";
num_spatial_layers = limited_num_spatial_layers;
}
// First active layer must be configured.
num_spatial_layers = std::max(num_spatial_layers, first_active_layer + 1);
// Ensure top layer is even enough.
int required_divisiblity = 1 << (num_spatial_layers - first_active_layer - 1);
if (config) {
required_divisiblity = 1;
for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
required_divisiblity = cricket::LeastCommonMultiple(
required_divisiblity, config->scaling_factor_den[sl_idx]);
}
}
input_width = input_width - input_width % required_divisiblity;
input_height = input_height - input_height % required_divisiblity;
std::vector<SpatialLayer> spatial_layers;
for (size_t sl_idx = first_active_layer; sl_idx < num_spatial_layers;
++sl_idx) {
SpatialLayer spatial_layer = {0};
@ -104,6 +122,13 @@ std::vector<SpatialLayer> ConfigureSvcNormalVideo(size_t input_width,
spatial_layer.numberOfTemporalLayers = num_temporal_layers;
spatial_layer.active = true;
if (config) {
spatial_layer.width = input_width * config->scaling_factor_num[sl_idx] /
config->scaling_factor_den[sl_idx];
spatial_layer.height = input_height * config->scaling_factor_num[sl_idx] /
config->scaling_factor_den[sl_idx];
}
// minBitrate and maxBitrate formulas were derived from
// subjective-quality data to determing bit rates below which video
// quality is unacceptable and above which additional bits do not provide
@ -124,7 +149,7 @@ std::vector<SpatialLayer> ConfigureSvcNormalVideo(size_t input_width,
spatial_layers.push_back(spatial_layer);
}
// A workaround for sitiation when single HD layer is left with minBitrate
// A workaround for situation when single HD layer is left with minBitrate
// about 500kbps. This would mean that there will always be at least 500kbps
// allocated to video regardless of how low is the actual BWE.
// Also, boost maxBitrate for the first layer to account for lost ability to
@ -140,13 +165,58 @@ std::vector<SpatialLayer> ConfigureSvcNormalVideo(size_t input_width,
return spatial_layers;
}
std::vector<SpatialLayer> GetSvcConfig(size_t input_width,
size_t input_height,
float max_framerate_fps,
size_t first_active_layer,
size_t num_spatial_layers,
size_t num_temporal_layers,
bool is_screen_sharing) {
// Uses scalability mode to configure spatial layers.
std::vector<SpatialLayer> GetVp9SvcConfig(VideoCodec& codec) {
RTC_DCHECK_EQ(codec.codecType, kVideoCodecVP9);
absl::optional<ScalabilityMode> scalability_mode = codec.GetScalabilityMode();
RTC_DCHECK(scalability_mode.has_value());
absl::optional<ScalableVideoController::StreamLayersConfig> info =
ScalabilityStructureConfig(*scalability_mode);
if (!info.has_value()) {
RTC_LOG(LS_WARNING) << "Failed to create structure "
<< ScalabilityModeToString(*scalability_mode);
return {};
}
if (static_cast<int>(GetLimitedNumSpatialLayers(codec.width, codec.height)) <
info->num_spatial_layers) {
// Layers will be reduced, do not use scalability mode for now.
// TODO(bugs.webrtc.org/11607): Use a lower scalability mode once all lower
// modes are supported.
codec.UnsetScalabilityMode();
codec.VP9()->interLayerPred =
ScalabilityModeToInterLayerPredMode(*scalability_mode);
}
// TODO(bugs.webrtc.org/11607): Add support for screensharing.
std::vector<SpatialLayer> spatial_layers =
GetSvcConfig(codec.width, codec.height, codec.maxFramerate,
/*first_active_layer=*/0, info->num_spatial_layers,
info->num_temporal_layers, /*is_screen_sharing=*/false,
codec.GetScalabilityMode() ? info : absl::nullopt);
RTC_DCHECK(!spatial_layers.empty());
// Use codec bitrate limits if spatial layering is not requested.
if (info->num_spatial_layers == 1) {
spatial_layers.back().minBitrate = codec.minBitrate;
spatial_layers.back().targetBitrate = codec.maxBitrate;
spatial_layers.back().maxBitrate = codec.maxBitrate;
}
return spatial_layers;
}
std::vector<SpatialLayer> GetSvcConfig(
size_t input_width,
size_t input_height,
float max_framerate_fps,
size_t first_active_layer,
size_t num_spatial_layers,
size_t num_temporal_layers,
bool is_screen_sharing,
absl::optional<ScalableVideoController::StreamLayersConfig> config) {
RTC_DCHECK_GT(input_width, 0);
RTC_DCHECK_GT(input_height, 0);
RTC_DCHECK_GT(num_spatial_layers, 0);
@ -158,7 +228,7 @@ std::vector<SpatialLayer> GetSvcConfig(size_t input_width,
} else {
return ConfigureSvcNormalVideo(input_width, input_height, max_framerate_fps,
first_active_layer, num_spatial_layers,
num_temporal_layers);
num_temporal_layers, config);
}
}

View file

@ -15,16 +15,24 @@
#include <vector>
#include "api/video_codecs/spatial_layer.h"
#include "api/video_codecs/video_codec.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
namespace webrtc {
std::vector<SpatialLayer> GetSvcConfig(size_t input_width,
size_t input_height,
float max_framerate_fps,
size_t first_active_layer,
size_t num_spatial_layers,
size_t num_temporal_layers,
bool is_screen_sharing);
// Uses scalability mode to configure spatial layers.
std::vector<SpatialLayer> GetVp9SvcConfig(VideoCodec& video_codec);
std::vector<SpatialLayer> GetSvcConfig(
size_t input_width,
size_t input_height,
float max_framerate_fps,
size_t first_active_layer,
size_t num_spatial_layers,
size_t num_temporal_layers,
bool is_screen_sharing,
absl::optional<ScalableVideoController::StreamLayersConfig> config =
absl::nullopt);
} // namespace webrtc

View file

@ -14,8 +14,12 @@
#include <vector>
#include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
#include "test/gmock.h"
#include "test/gtest.h"
using ::testing::ElementsAre;
using ::testing::Field;
namespace webrtc {
TEST(SvcConfig, NumSpatialLayers) {
const size_t max_num_spatial_layers = 6;
@ -43,6 +47,92 @@ TEST(SvcConfig, NumSpatialLayersPortrait) {
EXPECT_EQ(spatial_layers.size(), num_spatial_layers);
}
TEST(SvcConfig, NumSpatialLayersWithScalabilityMode) {
VideoCodec codec;
codec.codecType = kVideoCodecVP9;
codec.width = 960;
codec.height = 540;
codec.SetScalabilityMode(ScalabilityMode::kL3T3_KEY);
std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135),
Field(&SpatialLayer::height, 270),
Field(&SpatialLayer::height, 540)));
EXPECT_THAT(spatial_layers,
ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 3),
Field(&SpatialLayer::numberOfTemporalLayers, 3),
Field(&SpatialLayer::numberOfTemporalLayers, 3)));
EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL3T3_KEY);
}
TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityMode) {
VideoCodec codec;
codec.codecType = kVideoCodecVP9;
codec.width = 480;
codec.height = 270;
codec.SetScalabilityMode(ScalabilityMode::kL3T3_KEY);
// Scalability mode reset, configuration should be in accordance to L2T3_KEY.
std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135),
Field(&SpatialLayer::height, 270)));
EXPECT_THAT(spatial_layers,
ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 3),
Field(&SpatialLayer::numberOfTemporalLayers, 3)));
EXPECT_EQ(codec.VP9()->interLayerPred, InterLayerPredMode::kOnKeyPic);
EXPECT_EQ(codec.GetScalabilityMode(), absl::nullopt);
}
TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityModePortrait) {
VideoCodec codec;
codec.codecType = kVideoCodecVP9;
codec.width = 270;
codec.height = 480;
codec.SetScalabilityMode(ScalabilityMode::kL3T1);
// Scalability mode reset, configuration should be in accordance to L2T1.
std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 135),
Field(&SpatialLayer::width, 270)));
EXPECT_THAT(spatial_layers,
ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1),
Field(&SpatialLayer::numberOfTemporalLayers, 1)));
EXPECT_EQ(codec.VP9()->interLayerPred, InterLayerPredMode::kOn);
EXPECT_EQ(codec.GetScalabilityMode(), absl::nullopt);
}
TEST(SvcConfig, NumSpatialLayersWithScalabilityModeResolutionRatio1_5) {
VideoCodec codec;
codec.codecType = kVideoCodecVP9;
codec.width = 270;
codec.height = 480;
codec.SetScalabilityMode(ScalabilityMode::kL2T1h); // 1.5:1
std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 180),
Field(&SpatialLayer::width, 270)));
EXPECT_THAT(spatial_layers,
ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1),
Field(&SpatialLayer::numberOfTemporalLayers, 1)));
EXPECT_EQ(codec.GetScalabilityMode(), ScalabilityMode::kL2T1h);
}
TEST(SvcConfig, NumSpatialLayersLimitedWithScalabilityModeResolutionRatio1_5) {
VideoCodec codec;
codec.codecType = kVideoCodecVP9;
codec.width = 320;
codec.height = 180;
codec.SetScalabilityMode(ScalabilityMode::kL2T1h); // 1.5:1
// Scalability mode reset, configuration should be in accordance to L1T1.
std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::width, 320)));
EXPECT_THAT(spatial_layers,
ElementsAre(Field(&SpatialLayer::numberOfTemporalLayers, 1)));
EXPECT_EQ(codec.VP9()->interLayerPred, InterLayerPredMode::kOn);
EXPECT_EQ(codec.GetScalabilityMode(), absl::nullopt);
}
TEST(SvcConfig, AlwaysSendsAtLeastOneLayer) {
const size_t max_num_spatial_layers = 6;
const size_t first_active_layer = 5;
@ -91,6 +181,44 @@ TEST(SvcConfig, EnforcesMinimalRequiredParity) {
EXPECT_EQ(spatial_layers.back().width, kOddSize);
}
TEST(SvcConfig, EnforcesMinimalRequiredParityWithScalabilityMode) {
VideoCodec codec;
codec.codecType = kVideoCodecVP9;
codec.width = 1023;
codec.height = 1023;
codec.SetScalabilityMode(ScalabilityMode::kL3T1);
std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
EXPECT_THAT(spatial_layers, // Divisiblity by 4 required.
ElementsAre(Field(&SpatialLayer::width, 255),
Field(&SpatialLayer::width, 510),
Field(&SpatialLayer::width, 1020)));
codec.SetScalabilityMode(ScalabilityMode::kL2T1);
spatial_layers = GetVp9SvcConfig(codec);
EXPECT_THAT(spatial_layers, // Divisiblity by 2 required.
ElementsAre(Field(&SpatialLayer::width, 511),
Field(&SpatialLayer::width, 1022)));
codec.SetScalabilityMode(ScalabilityMode::kL1T1);
spatial_layers = GetVp9SvcConfig(codec);
EXPECT_THAT(spatial_layers, // Divisiblity by 1 required.
ElementsAre(Field(&SpatialLayer::width, 1023)));
}
TEST(SvcConfig, EnforcesMinimalRequiredParityWithScalabilityModeResRatio1_5) {
VideoCodec codec;
codec.codecType = kVideoCodecVP9;
codec.width = 1280;
codec.height = 1280;
codec.SetScalabilityMode(ScalabilityMode::kL2T1h); // 1.5:1
std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
EXPECT_THAT(spatial_layers, // Divisiblity by 3 required.
ElementsAre(Field(&SpatialLayer::width, 852),
Field(&SpatialLayer::width, 1278)));
}
TEST(SvcConfig, SkipsInactiveLayers) {
const size_t num_spatial_layers = 4;
const size_t first_active_layer = 2;
@ -121,6 +249,25 @@ TEST(SvcConfig, BitrateThresholds) {
}
}
TEST(SvcConfig, BitrateThresholdsWithScalabilityMode) {
VideoCodec codec;
codec.codecType = kVideoCodecVP9;
codec.width = 960;
codec.height = 540;
codec.SetScalabilityMode(ScalabilityMode::kS3T3);
std::vector<SpatialLayer> spatial_layers = GetVp9SvcConfig(codec);
EXPECT_THAT(spatial_layers, ElementsAre(Field(&SpatialLayer::height, 135),
Field(&SpatialLayer::height, 270),
Field(&SpatialLayer::height, 540)));
for (const SpatialLayer& layer : spatial_layers) {
EXPECT_LE(layer.minBitrate, layer.maxBitrate);
EXPECT_LE(layer.minBitrate, layer.targetBitrate);
EXPECT_LE(layer.targetBitrate, layer.maxBitrate);
}
}
TEST(SvcConfig, ScreenSharing) {
std::vector<SpatialLayer> spatial_layers =
GetSvcConfig(1920, 1080, 30, 1, 3, 3, true);

View file

@ -15,6 +15,7 @@ rtc_source_set("scalability_mode_util") {
]
deps = [
"../../../api/video_codecs:scalability_mode",
"../../../api/video_codecs:video_codecs_api",
"../../../rtc_base:checks",
]
absl_deps = [

View file

@ -139,6 +139,52 @@ absl::string_view ScalabilityModeToString(ScalabilityMode scalability_mode) {
RTC_CHECK_NOTREACHED();
}
InterLayerPredMode ScalabilityModeToInterLayerPredMode(
ScalabilityMode scalability_mode) {
switch (scalability_mode) {
case ScalabilityMode::kL1T1:
case ScalabilityMode::kL1T2:
case ScalabilityMode::kL1T2h:
case ScalabilityMode::kL1T3:
case ScalabilityMode::kL1T3h:
case ScalabilityMode::kL2T1:
case ScalabilityMode::kL2T1h:
return InterLayerPredMode::kOn;
case ScalabilityMode::kL2T1_KEY:
return InterLayerPredMode::kOnKeyPic;
case ScalabilityMode::kL2T2:
case ScalabilityMode::kL2T2h:
return InterLayerPredMode::kOn;
case ScalabilityMode::kL2T2_KEY:
case ScalabilityMode::kL2T2_KEY_SHIFT:
return InterLayerPredMode::kOnKeyPic;
case ScalabilityMode::kL2T3:
case ScalabilityMode::kL2T3h:
return InterLayerPredMode::kOn;
case ScalabilityMode::kL2T3_KEY:
return InterLayerPredMode::kOnKeyPic;
case ScalabilityMode::kL3T1:
case ScalabilityMode::kL3T1h:
return InterLayerPredMode::kOn;
case ScalabilityMode::kL3T1_KEY:
return InterLayerPredMode::kOnKeyPic;
case ScalabilityMode::kL3T2:
case ScalabilityMode::kL3T2h:
return InterLayerPredMode::kOn;
case ScalabilityMode::kL3T2_KEY:
return InterLayerPredMode::kOnKeyPic;
case ScalabilityMode::kL3T3:
case ScalabilityMode::kL3T3h:
return InterLayerPredMode::kOn;
case ScalabilityMode::kL3T3_KEY:
return InterLayerPredMode::kOnKeyPic;
case ScalabilityMode::kS2T1:
case ScalabilityMode::kS3T3:
return InterLayerPredMode::kOff;
}
RTC_CHECK_NOTREACHED();
}
int ScalabilityModeToNumSpatialLayers(ScalabilityMode scalability_mode) {
switch (scalability_mode) {
case ScalabilityMode::kL1T1:

View file

@ -14,6 +14,7 @@
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "api/video_codecs/scalability_mode.h"
#include "api/video_codecs/video_codec.h"
namespace webrtc {
@ -22,6 +23,9 @@ absl::optional<ScalabilityMode> ScalabilityModeFromString(
absl::string_view ScalabilityModeToString(ScalabilityMode scalability_mode);
InterLayerPredMode ScalabilityModeToInterLayerPredMode(
ScalabilityMode scalability_mode);
int ScalabilityModeToNumSpatialLayers(ScalabilityMode scalability_mode);
int ScalabilityModeToNumTemporalLayers(ScalabilityMode scalability_mode);

View file

@ -231,6 +231,11 @@ VideoCodec VideoCodecInitializer::VideoEncoderConfigToVideoCodec(
if (!config.spatial_layers.empty()) {
// Layering is set explicitly.
spatial_layers = config.spatial_layers;
} else if (scalability_mode.has_value()) {
// Layering is set via scalability mode.
spatial_layers = GetVp9SvcConfig(video_codec);
if (spatial_layers.empty())
break;
} else {
size_t first_active_layer = 0;
for (size_t spatial_idx = 0;

View file

@ -88,6 +88,7 @@ std::vector<VideoStream> CreateVideoStreams(
stream_settings[i].width = width / stream.scale_resolution_down_by;
stream_settings[i].height = height / stream.scale_resolution_down_by;
}
stream_settings[i].scalability_mode = stream.scalability_mode;
stream_settings[i].target_bitrate_bps = target_bitrate_bps;
stream_settings[i].max_bitrate_bps = max_bitrate_bps;
stream_settings[i].active =

View file

@ -926,6 +926,8 @@ if (rtc_include_tests) {
"../modules/video_coding:webrtc_vp9_helpers",
"../modules/video_coding/codecs/av1:libaom_av1_encoder_if_supported",
"../modules/video_coding/svc:scalability_mode_util",
"../modules/video_coding/svc:scalability_structures",
"../modules/video_coding/svc:scalable_video_controller",
"../rtc_base",
"../rtc_base:byte_buffer",
"../rtc_base:checks",

View file

@ -39,7 +39,9 @@
#include "modules/video_coding/codecs/interface/common_constants.h"
#include "modules/video_coding/codecs/vp8/include/vp8.h"
#include "modules/video_coding/codecs/vp9/include/vp9.h"
#include "modules/video_coding/svc/create_scalability_structure.h"
#include "modules/video_coding/svc/scalability_mode_util.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
#include "rtc_base/checks.h"
#include "rtc_base/event.h"
#include "rtc_base/experiments/alr_experiment.h"
@ -2976,14 +2978,11 @@ TEST_F(VideoSendStreamTest, ReportsSentResolution) {
#if defined(RTC_ENABLE_VP9)
class Vp9HeaderObserver : public test::SendTest {
public:
Vp9HeaderObserver()
explicit Vp9HeaderObserver(const Vp9TestParams& params)
: SendTest(VideoSendStreamTest::kLongTimeoutMs),
encoder_factory_([]() { return VP9Encoder::Create(); }),
vp9_settings_(VideoEncoder::GetDefaultVp9Settings()),
packets_sent_(0),
frames_sent_(0),
expected_width_(0),
expected_height_(0) {}
params_(params),
vp9_settings_(VideoEncoder::GetDefaultVp9Settings()) {}
virtual void ModifyVideoConfigsHook(
VideoSendStream::Config* send_config,
@ -3008,8 +3007,6 @@ class Vp9HeaderObserver : public test::SendTest {
vp9_settings_);
EXPECT_EQ(1u, encoder_config->number_of_streams);
EXPECT_EQ(1u, encoder_config->simulcast_layers.size());
encoder_config->simulcast_layers[0].num_temporal_layers =
vp9_settings_.numberOfTemporalLayers;
encoder_config_ = encoder_config->Copy();
}
@ -3185,17 +3182,17 @@ class Vp9HeaderObserver : public test::SendTest {
EXPECT_NE(kNoPictureId, vp9.picture_id); // I:1
EXPECT_EQ(vp9_settings_.flexibleMode, vp9.flexible_mode); // F
if (vp9_settings_.numberOfSpatialLayers > 1) {
EXPECT_LT(vp9.spatial_idx, vp9_settings_.numberOfSpatialLayers);
} else if (vp9_settings_.numberOfTemporalLayers > 1) {
if (params_.num_spatial_layers > 1) {
EXPECT_LT(vp9.spatial_idx, params_.num_spatial_layers);
} else if (params_.num_temporal_layers > 1) {
EXPECT_EQ(vp9.spatial_idx, 0);
} else {
EXPECT_EQ(vp9.spatial_idx, kNoSpatialIdx);
}
if (vp9_settings_.numberOfTemporalLayers > 1) {
EXPECT_LT(vp9.temporal_idx, vp9_settings_.numberOfTemporalLayers);
} else if (vp9_settings_.numberOfSpatialLayers > 1) {
if (params_.num_temporal_layers > 1) {
EXPECT_LT(vp9.temporal_idx, params_.num_temporal_layers);
} else if (params_.num_spatial_layers > 1) {
EXPECT_EQ(vp9.temporal_idx, 0);
} else {
EXPECT_EQ(vp9.temporal_idx, kNoTemporalIdx);
@ -3234,16 +3231,24 @@ class Vp9HeaderObserver : public test::SendTest {
// +-+-+-+-+-+-+-+-+
void VerifySsData(const RTPVideoHeaderVP9& vp9) const {
EXPECT_TRUE(vp9.ss_data_available); // V
EXPECT_EQ(vp9_settings_.numberOfSpatialLayers, // N_S + 1
EXPECT_EQ(params_.num_spatial_layers, // N_S + 1
vp9.num_spatial_layers);
EXPECT_TRUE(vp9.spatial_layer_resolution_present); // Y:1
int expected_width = expected_width_;
int expected_height = expected_height_;
absl::optional<ScalableVideoController::StreamLayersConfig> info;
absl::optional<ScalabilityMode> scalability_mode =
ScalabilityModeFromString(params_.scalability_mode);
if (scalability_mode) {
info = ScalabilityStructureConfig(*scalability_mode);
}
double default_ratio = 1.0;
for (int i = static_cast<int>(vp9.num_spatial_layers) - 1; i >= 0; --i) {
EXPECT_EQ(expected_width, vp9.width[i]); // WIDTH
EXPECT_EQ(expected_height, vp9.height[i]); // HEIGHT
expected_width /= 2;
expected_height /= 2;
double ratio = info ? (static_cast<double>(info->scaling_factor_num[i]) /
info->scaling_factor_den[i])
: default_ratio;
EXPECT_EQ(expected_width_ * ratio, vp9.width[i]); // WIDTH
EXPECT_EQ(expected_height_ * ratio, vp9.height[i]); // HEIGHT
default_ratio /= 2.0;
}
}
@ -3281,17 +3286,18 @@ class Vp9HeaderObserver : public test::SendTest {
}
test::FunctionVideoEncoderFactory encoder_factory_;
const Vp9TestParams params_;
VideoCodecVP9 vp9_settings_;
webrtc::VideoEncoderConfig encoder_config_;
bool last_packet_marker_ = false;
uint16_t last_packet_sequence_number_ = 0;
uint32_t last_packet_timestamp_ = 0;
RTPVideoHeaderVP9 last_vp9_;
size_t packets_sent_;
size_t packets_sent_ = 0;
Mutex mutex_;
size_t frames_sent_;
int expected_width_;
int expected_height_;
size_t frames_sent_ = 0;
int expected_width_ = 0;
int expected_height_ = 0;
};
class Vp9Test : public VideoSendStreamTest,
@ -3319,13 +3325,27 @@ INSTANTIATE_TEST_SUITE_P(
{"L2T1_KEY", 2, 1, InterLayerPredMode::kOnKeyPic},
{"L2T2", 2, 2, InterLayerPredMode::kOn},
{"L2T2_KEY", 2, 2, InterLayerPredMode::kOnKeyPic},
{"L2T3", 2, 3, InterLayerPredMode::kOn},
{"L2T3_KEY", 2, 3, InterLayerPredMode::kOnKeyPic},
{"L3T1", 3, 1, InterLayerPredMode::kOn},
{"L3T3", 3, 3, InterLayerPredMode::kOn},
{"L3T3_KEY", 3, 3, InterLayerPredMode::kOnKeyPic},
{"S2T1", 2, 1, InterLayerPredMode::kOff},
{"S3T3", 3, 3, InterLayerPredMode::kOff}}),
::testing::Values(false, true)), // use_scalability_mode_identifier
[](const ::testing::TestParamInfo<Vp9Test::ParamType>& info) {
rtc::StringBuilder sb;
sb << std::get<0>(info.param).scalability_mode << "_"
<< (std::get<1>(info.param) ? "WithIdentifier" : "WithoutIdentifier");
return sb.str();
});
INSTANTIATE_TEST_SUITE_P(
ScalabilityModeOff,
Vp9Test,
::testing::Combine(
::testing::ValuesIn<Vp9TestParams>(
{{"L2T3", 2, 3, InterLayerPredMode::kOn},
{"S2T3", 2, 3, InterLayerPredMode::kOff}}),
::testing::Values(false)), // use_scalability_mode_identifier
[](const ::testing::TestParamInfo<Vp9Test::ParamType>& info) {
rtc::StringBuilder sb;
@ -3334,6 +3354,20 @@ INSTANTIATE_TEST_SUITE_P(
return sb.str();
});
INSTANTIATE_TEST_SUITE_P(
ScalabilityModeOn,
Vp9Test,
::testing::Combine(
::testing::ValuesIn<Vp9TestParams>({{"L2T1h", 2, 1,
InterLayerPredMode::kOn}}),
::testing::Values(true)), // use_scalability_mode_identifier
[](const ::testing::TestParamInfo<Vp9Test::ParamType>& info) {
rtc::StringBuilder sb;
sb << std::get<0>(info.param).scalability_mode << "_"
<< (std::get<1>(info.param) ? "WithIdentifier" : "WithoutIdentifier");
return sb.str();
});
TEST_P(Vp9Test, NonFlexMode) {
TestVp9NonFlexMode(params_, use_scalability_mode_identifier_);
}
@ -3353,7 +3387,7 @@ void VideoSendStreamTest::TestVp9NonFlexMode(
public:
NonFlexibleMode(const Vp9TestParams& params,
bool use_scalability_mode_identifier)
: params_(params),
: Vp9HeaderObserver(params),
use_scalability_mode_identifier_(use_scalability_mode_identifier),
l_field_(params.num_temporal_layers > 1 ||
params.num_spatial_layers > 1) {}
@ -3372,9 +3406,6 @@ void VideoSendStreamTest::TestVp9NonFlexMode(
}
encoder_config->max_bitrate_bps = bitrate_bps * 2;
EXPECT_EQ(1u, encoder_config->number_of_streams);
EXPECT_EQ(1u, encoder_config->simulcast_layers.size());
encoder_config->frame_drop_enabled = false;
vp9_settings_.flexibleMode = false;
@ -3385,8 +3416,10 @@ void VideoSendStreamTest::TestVp9NonFlexMode(
vp9_settings_.numberOfSpatialLayers = params_.num_spatial_layers;
vp9_settings_.interLayerPred = params_.inter_layer_pred;
} else {
encoder_config->simulcast_layers[0].scalability_mode =
absl::optional<ScalabilityMode> mode =
ScalabilityModeFromString(params_.scalability_mode);
encoder_config->simulcast_layers[0].scalability_mode = mode;
EXPECT_TRUE(mode.has_value());
}
}
@ -3442,7 +3475,6 @@ void VideoSendStreamTest::TestVp9NonFlexMode(
if (frames_sent_ > kNumFramesToSend)
observation_complete_.Set();
}
const Vp9TestParams params_;
const bool use_scalability_mode_identifier_;
const bool l_field_;
@ -3463,16 +3495,20 @@ TEST_F(VideoSendStreamTest, Vp9NonFlexModeSmallResolution) {
static const int kWidth = 4;
static const int kHeight = 4;
class NonFlexibleModeResolution : public Vp9HeaderObserver {
public:
explicit NonFlexibleModeResolution(const Vp9TestParams& params)
: Vp9HeaderObserver(params) {}
private:
void ModifyVideoConfigsHook(
VideoSendStream::Config* send_config,
std::vector<VideoReceiveStream::Config>* receive_configs,
VideoEncoderConfig* encoder_config) override {
encoder_config->codec_type = kVideoCodecVP9;
vp9_settings_.flexibleMode = false;
vp9_settings_.numberOfTemporalLayers = 1;
vp9_settings_.numberOfSpatialLayers = 1;
EXPECT_EQ(1u, encoder_config->number_of_streams);
vp9_settings_.numberOfTemporalLayers = params_.num_temporal_layers;
vp9_settings_.numberOfSpatialLayers = params_.num_spatial_layers;
vp9_settings_.interLayerPred = params_.inter_layer_pred;
}
void InspectHeader(const RTPVideoHeaderVP9& vp9_header) override {
@ -3488,7 +3524,10 @@ TEST_F(VideoSendStreamTest, Vp9NonFlexModeSmallResolution) {
*width = kWidth;
*height = kHeight;
}
} test;
};
Vp9TestParams params{"L1T1", 1, 1, InterLayerPredMode::kOn};
NonFlexibleModeResolution test(params);
RunBaseTest(&test);
}
@ -3504,6 +3543,11 @@ TEST_F(VideoSendStreamTest, Vp9NonFlexModeSmallResolution) {
#endif
TEST_F(VideoSendStreamTest, MAYBE_Vp9FlexModeRefCount) {
class FlexibleMode : public Vp9HeaderObserver {
public:
explicit FlexibleMode(const Vp9TestParams& params)
: Vp9HeaderObserver(params) {}
private:
void ModifyVideoConfigsHook(
VideoSendStream::Config* send_config,
std::vector<VideoReceiveStream::Config>* receive_configs,
@ -3511,8 +3555,9 @@ TEST_F(VideoSendStreamTest, MAYBE_Vp9FlexModeRefCount) {
encoder_config->codec_type = kVideoCodecVP9;
encoder_config->content_type = VideoEncoderConfig::ContentType::kScreen;
vp9_settings_.flexibleMode = true;
vp9_settings_.numberOfTemporalLayers = 1;
vp9_settings_.numberOfSpatialLayers = 2;
vp9_settings_.numberOfTemporalLayers = params_.num_temporal_layers;
vp9_settings_.numberOfSpatialLayers = params_.num_spatial_layers;
vp9_settings_.interLayerPred = params_.inter_layer_pred;
}
void InspectHeader(const RTPVideoHeaderVP9& vp9_header) override {
@ -3523,7 +3568,10 @@ TEST_F(VideoSendStreamTest, MAYBE_Vp9FlexModeRefCount) {
observation_complete_.Set();
}
}
} test;
};
Vp9TestParams params{"L2T1", 2, 1, InterLayerPredMode::kOn};
FlexibleMode test(params);
RunBaseTest(&test);
}