mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-13 05:40:42 +01:00

The purpose of this interface is to allow VideoEncoder to override the bandwidth allocation set by FecController in RtpVideoSender. This CL defines the interface and sends it down to VideoSender. Two upcoming CLs will: 1. Make LibvpxVp8Encoder pass it on to the (injectable) FrameBufferController, where it might be put to good use. 2. Modify RtpVideoSender to respond to the message sent to it via this API. TBR=kwiberg@webrtc.org Bug: webrtc:10769 Change-Id: I2ef82f0ddcde7fd078e32d8aabf6efe43e0f7f8a Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/143962 Commit-Queue: Elad Alon <eladalon@webrtc.org> Reviewed-by: Erik Språng <sprang@webrtc.org> Reviewed-by: Rasmus Brandt <brandtr@webrtc.org> Cr-Commit-Position: refs/heads/master@{#28416}
1775 lines
66 KiB
C++
1775 lines
66 KiB
C++
/*
|
|
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*
|
|
*/
|
|
|
|
#ifdef RTC_ENABLE_VP9
|
|
|
|
#include "modules/video_coding/codecs/vp9/vp9_impl.h"
|
|
|
|
#include <algorithm>
|
|
#include <limits>
|
|
#include <vector>
|
|
|
|
#include "vpx/vp8cx.h"
|
|
#include "vpx/vp8dx.h"
|
|
#include "vpx/vpx_decoder.h"
|
|
#include "vpx/vpx_encoder.h"
|
|
|
|
#include "absl/memory/memory.h"
|
|
#include "api/video/color_space.h"
|
|
#include "api/video/i010_buffer.h"
|
|
#include "common_video/include/video_frame_buffer.h"
|
|
#include "common_video/libyuv/include/webrtc_libyuv.h"
|
|
#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
|
|
#include "modules/video_coding/codecs/vp9/svc_rate_allocator.h"
|
|
#include "rtc_base/checks.h"
|
|
#include "rtc_base/experiments/rate_control_settings.h"
|
|
#include "rtc_base/keep_ref_until_done.h"
|
|
#include "rtc_base/logging.h"
|
|
#include "rtc_base/time_utils.h"
|
|
#include "rtc_base/trace_event.h"
|
|
#include "system_wrappers/include/field_trial.h"
|
|
|
|
namespace webrtc {
|
|
|
|
namespace {
|
|
// Maps from gof_idx to encoder internal reference frame buffer index. These
|
|
// maps work for 1,2 and 3 temporal layers with GOF length of 1,2 and 4 frames.
|
|
uint8_t kRefBufIdx[4] = {0, 0, 0, 1};
|
|
uint8_t kUpdBufIdx[4] = {0, 0, 1, 0};
|
|
|
|
int kMaxNumTiles4kVideo = 8;
|
|
|
|
// Maximum allowed PID difference for differnet per-layer frame-rate case.
|
|
const int kMaxAllowedPidDIff = 30;
|
|
|
|
constexpr double kLowRateFactor = 1.0;
|
|
constexpr double kHighRateFactor = 2.0;
|
|
|
|
// These settings correspond to the settings in vpx_codec_enc_cfg.
|
|
struct Vp9RateSettings {
|
|
uint32_t rc_undershoot_pct;
|
|
uint32_t rc_overshoot_pct;
|
|
uint32_t rc_buf_sz;
|
|
uint32_t rc_buf_optimal_sz;
|
|
uint32_t rc_dropframe_thresh;
|
|
};
|
|
|
|
// Only positive speeds, range for real-time coding currently is: 5 - 8.
|
|
// Lower means slower/better quality, higher means fastest/lower quality.
|
|
int GetCpuSpeed(int width, int height) {
|
|
#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || defined(ANDROID)
|
|
return 8;
|
|
#else
|
|
// For smaller resolutions, use lower speed setting (get some coding gain at
|
|
// the cost of increased encoding complexity).
|
|
if (width * height <= 352 * 288)
|
|
return 5;
|
|
else
|
|
return 7;
|
|
#endif
|
|
}
|
|
// Helper class for extracting VP9 colorspace.
|
|
ColorSpace ExtractVP9ColorSpace(vpx_color_space_t space_t,
|
|
vpx_color_range_t range_t,
|
|
unsigned int bit_depth) {
|
|
ColorSpace::PrimaryID primaries = ColorSpace::PrimaryID::kUnspecified;
|
|
ColorSpace::TransferID transfer = ColorSpace::TransferID::kUnspecified;
|
|
ColorSpace::MatrixID matrix = ColorSpace::MatrixID::kUnspecified;
|
|
switch (space_t) {
|
|
case VPX_CS_BT_601:
|
|
case VPX_CS_SMPTE_170:
|
|
primaries = ColorSpace::PrimaryID::kSMPTE170M;
|
|
transfer = ColorSpace::TransferID::kSMPTE170M;
|
|
matrix = ColorSpace::MatrixID::kSMPTE170M;
|
|
break;
|
|
case VPX_CS_SMPTE_240:
|
|
primaries = ColorSpace::PrimaryID::kSMPTE240M;
|
|
transfer = ColorSpace::TransferID::kSMPTE240M;
|
|
matrix = ColorSpace::MatrixID::kSMPTE240M;
|
|
break;
|
|
case VPX_CS_BT_709:
|
|
primaries = ColorSpace::PrimaryID::kBT709;
|
|
transfer = ColorSpace::TransferID::kBT709;
|
|
matrix = ColorSpace::MatrixID::kBT709;
|
|
break;
|
|
case VPX_CS_BT_2020:
|
|
primaries = ColorSpace::PrimaryID::kBT2020;
|
|
switch (bit_depth) {
|
|
case 8:
|
|
transfer = ColorSpace::TransferID::kBT709;
|
|
break;
|
|
case 10:
|
|
transfer = ColorSpace::TransferID::kBT2020_10;
|
|
break;
|
|
default:
|
|
RTC_NOTREACHED();
|
|
break;
|
|
}
|
|
matrix = ColorSpace::MatrixID::kBT2020_NCL;
|
|
break;
|
|
case VPX_CS_SRGB:
|
|
primaries = ColorSpace::PrimaryID::kBT709;
|
|
transfer = ColorSpace::TransferID::kIEC61966_2_1;
|
|
matrix = ColorSpace::MatrixID::kBT709;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
ColorSpace::RangeID range = ColorSpace::RangeID::kInvalid;
|
|
switch (range_t) {
|
|
case VPX_CR_STUDIO_RANGE:
|
|
range = ColorSpace::RangeID::kLimited;
|
|
break;
|
|
case VPX_CR_FULL_RANGE:
|
|
range = ColorSpace::RangeID::kFull;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
return ColorSpace(primaries, transfer, matrix, range);
|
|
}
|
|
|
|
bool MoreLayersEnabled(const VideoBitrateAllocation& first,
|
|
const VideoBitrateAllocation& second) {
|
|
for (size_t sl_idx = 0; sl_idx < kMaxSpatialLayers; ++sl_idx) {
|
|
if (first.GetSpatialLayerSum(sl_idx) > 0 &&
|
|
second.GetSpatialLayerSum(sl_idx) == 0) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
uint32_t Interpolate(uint32_t low,
|
|
uint32_t high,
|
|
double bandwidth_headroom_factor) {
|
|
RTC_DCHECK_GE(bandwidth_headroom_factor, kLowRateFactor);
|
|
RTC_DCHECK_LE(bandwidth_headroom_factor, kHighRateFactor);
|
|
|
|
// |factor| is between 0.0 and 1.0.
|
|
const double factor = bandwidth_headroom_factor - kLowRateFactor;
|
|
|
|
return static_cast<uint32_t>(((1.0 - factor) * low) + (factor * high) + 0.5);
|
|
}
|
|
|
|
Vp9RateSettings GetRateSettings(double bandwidth_headroom_factor) {
|
|
static const Vp9RateSettings low_settings{100u, 0u, 100u, 33u, 40u};
|
|
static const Vp9RateSettings high_settings{50u, 50u, 1000u, 700u, 5u};
|
|
|
|
if (bandwidth_headroom_factor <= kLowRateFactor) {
|
|
return low_settings;
|
|
} else if (bandwidth_headroom_factor >= kHighRateFactor) {
|
|
return high_settings;
|
|
}
|
|
|
|
Vp9RateSettings settings;
|
|
settings.rc_undershoot_pct =
|
|
Interpolate(low_settings.rc_undershoot_pct,
|
|
high_settings.rc_undershoot_pct, bandwidth_headroom_factor);
|
|
settings.rc_overshoot_pct =
|
|
Interpolate(low_settings.rc_overshoot_pct, high_settings.rc_overshoot_pct,
|
|
bandwidth_headroom_factor);
|
|
settings.rc_buf_sz =
|
|
Interpolate(low_settings.rc_buf_sz, high_settings.rc_buf_sz,
|
|
bandwidth_headroom_factor);
|
|
settings.rc_buf_optimal_sz =
|
|
Interpolate(low_settings.rc_buf_optimal_sz,
|
|
high_settings.rc_buf_optimal_sz, bandwidth_headroom_factor);
|
|
settings.rc_dropframe_thresh =
|
|
Interpolate(low_settings.rc_dropframe_thresh,
|
|
high_settings.rc_dropframe_thresh, bandwidth_headroom_factor);
|
|
return settings;
|
|
}
|
|
|
|
void UpdateRateSettings(vpx_codec_enc_cfg_t* config,
|
|
const Vp9RateSettings& new_settings) {
|
|
config->rc_undershoot_pct = new_settings.rc_undershoot_pct;
|
|
config->rc_overshoot_pct = new_settings.rc_overshoot_pct;
|
|
config->rc_buf_sz = new_settings.rc_buf_sz;
|
|
config->rc_buf_optimal_sz = new_settings.rc_buf_optimal_sz;
|
|
config->rc_dropframe_thresh = new_settings.rc_dropframe_thresh;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
void VP9EncoderImpl::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,
|
|
void* user_data) {
|
|
VP9EncoderImpl* enc = static_cast<VP9EncoderImpl*>(user_data);
|
|
enc->GetEncodedLayerFrame(pkt);
|
|
}
|
|
|
|
VP9EncoderImpl::VP9EncoderImpl(const cricket::VideoCodec& codec)
|
|
: encoded_image_(),
|
|
encoded_complete_callback_(nullptr),
|
|
profile_(
|
|
ParseSdpForVP9Profile(codec.params).value_or(VP9Profile::kProfile0)),
|
|
inited_(false),
|
|
timestamp_(0),
|
|
cpu_speed_(3),
|
|
rc_max_intra_target_(0),
|
|
encoder_(nullptr),
|
|
config_(nullptr),
|
|
raw_(nullptr),
|
|
input_image_(nullptr),
|
|
force_key_frame_(true),
|
|
pics_since_key_(0),
|
|
num_temporal_layers_(0),
|
|
num_spatial_layers_(0),
|
|
num_active_spatial_layers_(0),
|
|
layer_deactivation_requires_key_frame_(
|
|
field_trial::IsEnabled("WebRTC-Vp9IssueKeyFrameOnLayerDeactivation")),
|
|
is_svc_(false),
|
|
inter_layer_pred_(InterLayerPredMode::kOn),
|
|
external_ref_control_(false), // Set in InitEncode because of tests.
|
|
trusted_rate_controller_(RateControlSettings::ParseFromFieldTrials()
|
|
.LibvpxVp9TrustedRateController()),
|
|
dynamic_rate_settings_(
|
|
RateControlSettings::ParseFromFieldTrials().Vp9DynamicRateSettings()),
|
|
full_superframe_drop_(true),
|
|
first_frame_in_picture_(true),
|
|
ss_info_needed_(false),
|
|
is_flexible_mode_(false),
|
|
variable_framerate_experiment_(ParseVariableFramerateConfig(
|
|
"WebRTC-VP9VariableFramerateScreenshare")),
|
|
variable_framerate_controller_(
|
|
variable_framerate_experiment_.framerate_limit),
|
|
num_steady_state_frames_(0) {
|
|
codec_ = {};
|
|
memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t));
|
|
}
|
|
|
|
VP9EncoderImpl::~VP9EncoderImpl() {
|
|
Release();
|
|
}
|
|
|
|
void VP9EncoderImpl::SetFecControllerOverride(
|
|
FecControllerOverride* fec_controller_override) {
|
|
// Ignored.
|
|
}
|
|
|
|
int VP9EncoderImpl::Release() {
|
|
int ret_val = WEBRTC_VIDEO_CODEC_OK;
|
|
|
|
encoded_image_.Allocate(0);
|
|
if (encoder_ != nullptr) {
|
|
if (inited_) {
|
|
if (vpx_codec_destroy(encoder_)) {
|
|
ret_val = WEBRTC_VIDEO_CODEC_MEMORY;
|
|
}
|
|
}
|
|
delete encoder_;
|
|
encoder_ = nullptr;
|
|
}
|
|
if (config_ != nullptr) {
|
|
delete config_;
|
|
config_ = nullptr;
|
|
}
|
|
if (raw_ != nullptr) {
|
|
vpx_img_free(raw_);
|
|
raw_ = nullptr;
|
|
}
|
|
inited_ = false;
|
|
return ret_val;
|
|
}
|
|
|
|
bool VP9EncoderImpl::ExplicitlyConfiguredSpatialLayers() const {
|
|
// We check target_bitrate_bps of the 0th layer to see if the spatial layers
|
|
// (i.e. bitrates) were explicitly configured.
|
|
return codec_.spatialLayers[0].targetBitrate > 0;
|
|
}
|
|
|
|
bool VP9EncoderImpl::SetSvcRates(
|
|
const VideoBitrateAllocation& bitrate_allocation) {
|
|
config_->rc_target_bitrate = bitrate_allocation.get_sum_kbps();
|
|
|
|
if (ExplicitlyConfiguredSpatialLayers()) {
|
|
const bool layer_activation_requires_key_frame =
|
|
inter_layer_pred_ == InterLayerPredMode::kOff ||
|
|
inter_layer_pred_ == InterLayerPredMode::kOnKeyPic;
|
|
|
|
for (size_t sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) {
|
|
const bool was_layer_active = (config_->ss_target_bitrate[sl_idx] > 0);
|
|
config_->ss_target_bitrate[sl_idx] =
|
|
bitrate_allocation.GetSpatialLayerSum(sl_idx) / 1000;
|
|
|
|
for (size_t tl_idx = 0; tl_idx < num_temporal_layers_; ++tl_idx) {
|
|
config_->layer_target_bitrate[sl_idx * num_temporal_layers_ + tl_idx] =
|
|
bitrate_allocation.GetTemporalLayerSum(sl_idx, tl_idx) / 1000;
|
|
}
|
|
|
|
const bool is_active_layer = (config_->ss_target_bitrate[sl_idx] > 0);
|
|
if (!was_layer_active && is_active_layer &&
|
|
layer_activation_requires_key_frame) {
|
|
force_key_frame_ = true;
|
|
} else if (was_layer_active && !is_active_layer &&
|
|
layer_deactivation_requires_key_frame_) {
|
|
force_key_frame_ = true;
|
|
}
|
|
|
|
if (!was_layer_active) {
|
|
// Reset frame rate controller if layer is resumed after pause.
|
|
framerate_controller_[sl_idx].Reset();
|
|
}
|
|
|
|
framerate_controller_[sl_idx].SetTargetRate(
|
|
codec_.spatialLayers[sl_idx].maxFramerate);
|
|
}
|
|
} else {
|
|
float rate_ratio[VPX_MAX_LAYERS] = {0};
|
|
float total = 0;
|
|
for (int i = 0; i < num_spatial_layers_; ++i) {
|
|
if (svc_params_.scaling_factor_num[i] <= 0 ||
|
|
svc_params_.scaling_factor_den[i] <= 0) {
|
|
RTC_LOG(LS_ERROR) << "Scaling factors not specified!";
|
|
return false;
|
|
}
|
|
rate_ratio[i] = static_cast<float>(svc_params_.scaling_factor_num[i]) /
|
|
svc_params_.scaling_factor_den[i];
|
|
total += rate_ratio[i];
|
|
}
|
|
|
|
for (int i = 0; i < num_spatial_layers_; ++i) {
|
|
RTC_CHECK_GT(total, 0);
|
|
config_->ss_target_bitrate[i] = static_cast<unsigned int>(
|
|
config_->rc_target_bitrate * rate_ratio[i] / total);
|
|
if (num_temporal_layers_ == 1) {
|
|
config_->layer_target_bitrate[i] = config_->ss_target_bitrate[i];
|
|
} else if (num_temporal_layers_ == 2) {
|
|
config_->layer_target_bitrate[i * num_temporal_layers_] =
|
|
config_->ss_target_bitrate[i] * 2 / 3;
|
|
config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =
|
|
config_->ss_target_bitrate[i];
|
|
} else if (num_temporal_layers_ == 3) {
|
|
config_->layer_target_bitrate[i * num_temporal_layers_] =
|
|
config_->ss_target_bitrate[i] / 2;
|
|
config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =
|
|
config_->layer_target_bitrate[i * num_temporal_layers_] +
|
|
(config_->ss_target_bitrate[i] / 4);
|
|
config_->layer_target_bitrate[i * num_temporal_layers_ + 2] =
|
|
config_->ss_target_bitrate[i];
|
|
} else {
|
|
RTC_LOG(LS_ERROR) << "Unsupported number of temporal layers: "
|
|
<< num_temporal_layers_;
|
|
return false;
|
|
}
|
|
|
|
framerate_controller_[i].SetTargetRate(codec_.maxFramerate);
|
|
}
|
|
}
|
|
|
|
num_active_spatial_layers_ = 0;
|
|
for (int i = 0; i < num_spatial_layers_; ++i) {
|
|
if (config_->ss_target_bitrate[i] > 0) {
|
|
++num_active_spatial_layers_;
|
|
}
|
|
}
|
|
RTC_DCHECK_GT(num_active_spatial_layers_, 0);
|
|
|
|
return true;
|
|
}
|
|
|
|
void VP9EncoderImpl::SetRates(const RateControlParameters& parameters) {
|
|
if (!inited_) {
|
|
RTC_LOG(LS_WARNING) << "SetRates() calll while uninitialzied.";
|
|
return;
|
|
}
|
|
if (encoder_->err) {
|
|
RTC_LOG(LS_WARNING) << "Encoder in error state: " << encoder_->err;
|
|
return;
|
|
}
|
|
if (parameters.framerate_fps < 1.0) {
|
|
RTC_LOG(LS_WARNING) << "Unsupported framerate: "
|
|
<< parameters.framerate_fps;
|
|
return;
|
|
}
|
|
// Update bit rate
|
|
if (codec_.maxBitrate > 0 &&
|
|
parameters.bitrate.get_sum_kbps() > codec_.maxBitrate) {
|
|
RTC_LOG(LS_WARNING) << "Target bitrate exceeds maximum: "
|
|
<< parameters.bitrate.get_sum_kbps() << " vs "
|
|
<< codec_.maxBitrate;
|
|
return;
|
|
}
|
|
|
|
codec_.maxFramerate = static_cast<uint32_t>(parameters.framerate_fps + 0.5);
|
|
requested_rate_settings_ = parameters;
|
|
|
|
return;
|
|
}
|
|
|
|
// TODO(eladalon): s/inst/codec_settings/g.
|
|
int VP9EncoderImpl::InitEncode(const VideoCodec* inst,
|
|
const Settings& settings) {
|
|
if (inst == nullptr) {
|
|
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
|
}
|
|
if (inst->maxFramerate < 1) {
|
|
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
|
}
|
|
// Allow zero to represent an unspecified maxBitRate
|
|
if (inst->maxBitrate > 0 && inst->startBitrate > inst->maxBitrate) {
|
|
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
|
}
|
|
if (inst->width < 1 || inst->height < 1) {
|
|
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
|
}
|
|
if (settings.number_of_cores < 1) {
|
|
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
|
}
|
|
if (inst->VP9().numberOfTemporalLayers > 3) {
|
|
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
|
}
|
|
// libvpx probably does not support more than 3 spatial layers.
|
|
if (inst->VP9().numberOfSpatialLayers > 3) {
|
|
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
|
}
|
|
|
|
int ret_val = Release();
|
|
if (ret_val < 0) {
|
|
return ret_val;
|
|
}
|
|
if (encoder_ == nullptr) {
|
|
encoder_ = new vpx_codec_ctx_t;
|
|
}
|
|
if (config_ == nullptr) {
|
|
config_ = new vpx_codec_enc_cfg_t;
|
|
}
|
|
timestamp_ = 0;
|
|
if (&codec_ != inst) {
|
|
codec_ = *inst;
|
|
}
|
|
|
|
force_key_frame_ = true;
|
|
pics_since_key_ = 0;
|
|
|
|
num_spatial_layers_ = inst->VP9().numberOfSpatialLayers;
|
|
RTC_DCHECK_GT(num_spatial_layers_, 0);
|
|
num_temporal_layers_ = inst->VP9().numberOfTemporalLayers;
|
|
if (num_temporal_layers_ == 0) {
|
|
num_temporal_layers_ = 1;
|
|
}
|
|
|
|
framerate_controller_ = std::vector<FramerateController>(
|
|
num_spatial_layers_, FramerateController(codec_.maxFramerate));
|
|
|
|
is_svc_ = (num_spatial_layers_ > 1 || num_temporal_layers_ > 1);
|
|
|
|
encoded_image_._completeFrame = true;
|
|
// Populate encoder configuration with default values.
|
|
if (vpx_codec_enc_config_default(vpx_codec_vp9_cx(), config_, 0)) {
|
|
return WEBRTC_VIDEO_CODEC_ERROR;
|
|
}
|
|
|
|
vpx_img_fmt img_fmt = VPX_IMG_FMT_NONE;
|
|
unsigned int bits_for_storage = 8;
|
|
switch (profile_) {
|
|
case VP9Profile::kProfile0:
|
|
img_fmt = VPX_IMG_FMT_I420;
|
|
bits_for_storage = 8;
|
|
config_->g_bit_depth = VPX_BITS_8;
|
|
config_->g_profile = 0;
|
|
config_->g_input_bit_depth = 8;
|
|
break;
|
|
case VP9Profile::kProfile2:
|
|
img_fmt = VPX_IMG_FMT_I42016;
|
|
bits_for_storage = 16;
|
|
config_->g_bit_depth = VPX_BITS_10;
|
|
config_->g_profile = 2;
|
|
config_->g_input_bit_depth = 10;
|
|
break;
|
|
}
|
|
|
|
// Creating a wrapper to the image - setting image data to nullptr. Actual
|
|
// pointer will be set in encode. Setting align to 1, as it is meaningless
|
|
// (actual memory is not allocated).
|
|
raw_ =
|
|
vpx_img_wrap(nullptr, img_fmt, codec_.width, codec_.height, 1, nullptr);
|
|
raw_->bit_depth = bits_for_storage;
|
|
|
|
config_->g_w = codec_.width;
|
|
config_->g_h = codec_.height;
|
|
config_->rc_target_bitrate = inst->startBitrate; // in kbit/s
|
|
config_->g_error_resilient = is_svc_ ? VPX_ERROR_RESILIENT_DEFAULT : 0;
|
|
// Setting the time base of the codec.
|
|
config_->g_timebase.num = 1;
|
|
config_->g_timebase.den = 90000;
|
|
config_->g_lag_in_frames = 0; // 0- no frame lagging
|
|
config_->g_threads = 1;
|
|
// Rate control settings.
|
|
config_->rc_dropframe_thresh = inst->VP9().frameDroppingOn ? 30 : 0;
|
|
config_->rc_end_usage = VPX_CBR;
|
|
config_->g_pass = VPX_RC_ONE_PASS;
|
|
config_->rc_min_quantizer =
|
|
codec_.mode == VideoCodecMode::kScreensharing ? 8 : 2;
|
|
config_->rc_max_quantizer = 52;
|
|
config_->rc_undershoot_pct = 50;
|
|
config_->rc_overshoot_pct = 50;
|
|
config_->rc_buf_initial_sz = 500;
|
|
config_->rc_buf_optimal_sz = 600;
|
|
config_->rc_buf_sz = 1000;
|
|
// Set the maximum target size of any key-frame.
|
|
rc_max_intra_target_ = MaxIntraTarget(config_->rc_buf_optimal_sz);
|
|
// Key-frame interval is enforced manually by this wrapper.
|
|
config_->kf_mode = VPX_KF_DISABLED;
|
|
// TODO(webm:1592): work-around for libvpx issue, as it can still
|
|
// put some key-frames at will even in VPX_KF_DISABLED kf_mode.
|
|
config_->kf_max_dist = inst->VP9().keyFrameInterval;
|
|
config_->kf_min_dist = config_->kf_max_dist;
|
|
config_->rc_resize_allowed = inst->VP9().automaticResizeOn ? 1 : 0;
|
|
// Determine number of threads based on the image size and #cores.
|
|
config_->g_threads =
|
|
NumberOfThreads(config_->g_w, config_->g_h, settings.number_of_cores);
|
|
|
|
cpu_speed_ = GetCpuSpeed(config_->g_w, config_->g_h);
|
|
|
|
is_flexible_mode_ = inst->VP9().flexibleMode;
|
|
|
|
inter_layer_pred_ = inst->VP9().interLayerPred;
|
|
|
|
if (num_spatial_layers_ > 1 &&
|
|
codec_.mode == VideoCodecMode::kScreensharing && !is_flexible_mode_) {
|
|
RTC_LOG(LS_ERROR) << "Flexible mode is required for screenshare with "
|
|
"several spatial layers";
|
|
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
|
}
|
|
|
|
// External reference control is required for different frame rate on spatial
|
|
// layers because libvpx generates rtp incompatible references in this case.
|
|
external_ref_control_ = field_trial::IsEnabled("WebRTC-Vp9ExternalRefCtrl") ||
|
|
(num_spatial_layers_ > 1 &&
|
|
codec_.mode == VideoCodecMode::kScreensharing) ||
|
|
inter_layer_pred_ == InterLayerPredMode::kOn;
|
|
|
|
if (num_temporal_layers_ == 1) {
|
|
gof_.SetGofInfoVP9(kTemporalStructureMode1);
|
|
config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING;
|
|
config_->ts_number_layers = 1;
|
|
config_->ts_rate_decimator[0] = 1;
|
|
config_->ts_periodicity = 1;
|
|
config_->ts_layer_id[0] = 0;
|
|
} else if (num_temporal_layers_ == 2) {
|
|
gof_.SetGofInfoVP9(kTemporalStructureMode2);
|
|
config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0101;
|
|
config_->ts_number_layers = 2;
|
|
config_->ts_rate_decimator[0] = 2;
|
|
config_->ts_rate_decimator[1] = 1;
|
|
config_->ts_periodicity = 2;
|
|
config_->ts_layer_id[0] = 0;
|
|
config_->ts_layer_id[1] = 1;
|
|
} else if (num_temporal_layers_ == 3) {
|
|
gof_.SetGofInfoVP9(kTemporalStructureMode3);
|
|
config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0212;
|
|
config_->ts_number_layers = 3;
|
|
config_->ts_rate_decimator[0] = 4;
|
|
config_->ts_rate_decimator[1] = 2;
|
|
config_->ts_rate_decimator[2] = 1;
|
|
config_->ts_periodicity = 4;
|
|
config_->ts_layer_id[0] = 0;
|
|
config_->ts_layer_id[1] = 2;
|
|
config_->ts_layer_id[2] = 1;
|
|
config_->ts_layer_id[3] = 2;
|
|
} else {
|
|
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
|
}
|
|
|
|
if (external_ref_control_) {
|
|
config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
|
|
if (num_temporal_layers_ > 1 && num_spatial_layers_ > 1 &&
|
|
codec_.mode == VideoCodecMode::kScreensharing) {
|
|
// External reference control for several temporal layers with different
|
|
// frame rates on spatial layers is not implemented yet.
|
|
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
|
}
|
|
}
|
|
ref_buf_.clear();
|
|
|
|
return InitAndSetControlSettings(inst);
|
|
}
|
|
|
|
int VP9EncoderImpl::NumberOfThreads(int width,
|
|
int height,
|
|
int number_of_cores) {
|
|
// Keep the number of encoder threads equal to the possible number of column
|
|
// tiles, which is (1, 2, 4, 8). See comments below for VP9E_SET_TILE_COLUMNS.
|
|
if (width * height >= 1280 * 720 && number_of_cores > 4) {
|
|
return 4;
|
|
} else if (width * height >= 640 * 360 && number_of_cores > 2) {
|
|
return 2;
|
|
} else {
|
|
// Use 2 threads for low res on ARM.
|
|
#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || \
|
|
defined(WEBRTC_ANDROID)
|
|
if (width * height >= 320 * 180 && number_of_cores > 2) {
|
|
return 2;
|
|
}
|
|
#endif
|
|
// 1 thread less than VGA.
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) {
|
|
// Set QP-min/max per spatial and temporal layer.
|
|
int tot_num_layers = num_spatial_layers_ * num_temporal_layers_;
|
|
for (int i = 0; i < tot_num_layers; ++i) {
|
|
svc_params_.max_quantizers[i] = config_->rc_max_quantizer;
|
|
svc_params_.min_quantizers[i] = config_->rc_min_quantizer;
|
|
}
|
|
config_->ss_number_layers = num_spatial_layers_;
|
|
if (ExplicitlyConfiguredSpatialLayers()) {
|
|
for (int i = 0; i < num_spatial_layers_; ++i) {
|
|
const auto& layer = codec_.spatialLayers[i];
|
|
RTC_CHECK_GT(layer.width, 0);
|
|
const int scale_factor = codec_.width / layer.width;
|
|
RTC_DCHECK_GT(scale_factor, 0);
|
|
|
|
// Ensure scaler factor is integer.
|
|
if (scale_factor * layer.width != codec_.width) {
|
|
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
|
}
|
|
|
|
// Ensure scale factor is the same in both dimensions.
|
|
if (scale_factor * layer.height != codec_.height) {
|
|
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
|
}
|
|
|
|
// Ensure scale factor is power of two.
|
|
const bool is_pow_of_two = (scale_factor & (scale_factor - 1)) == 0;
|
|
if (!is_pow_of_two) {
|
|
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
|
}
|
|
|
|
svc_params_.scaling_factor_num[i] = 1;
|
|
svc_params_.scaling_factor_den[i] = scale_factor;
|
|
|
|
RTC_DCHECK_GT(codec_.spatialLayers[i].maxFramerate, 0);
|
|
RTC_DCHECK_LE(codec_.spatialLayers[i].maxFramerate, codec_.maxFramerate);
|
|
if (i > 0) {
|
|
// Frame rate of high spatial layer is supposed to be equal or higher
|
|
// than frame rate of low spatial layer.
|
|
RTC_DCHECK_GE(codec_.spatialLayers[i].maxFramerate,
|
|
codec_.spatialLayers[i - 1].maxFramerate);
|
|
}
|
|
}
|
|
} else {
|
|
int scaling_factor_num = 256;
|
|
for (int i = num_spatial_layers_ - 1; i >= 0; --i) {
|
|
// 1:2 scaling in each dimension.
|
|
svc_params_.scaling_factor_num[i] = scaling_factor_num;
|
|
svc_params_.scaling_factor_den[i] = 256;
|
|
}
|
|
}
|
|
|
|
SvcRateAllocator init_allocator(codec_);
|
|
current_bitrate_allocation_ = init_allocator.GetAllocation(
|
|
inst->startBitrate * 1000, inst->maxFramerate);
|
|
if (!SetSvcRates(current_bitrate_allocation_)) {
|
|
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
|
}
|
|
|
|
const vpx_codec_err_t rv = vpx_codec_enc_init(
|
|
encoder_, vpx_codec_vp9_cx(), config_,
|
|
config_->g_bit_depth == VPX_BITS_8 ? 0 : VPX_CODEC_USE_HIGHBITDEPTH);
|
|
if (rv != VPX_CODEC_OK) {
|
|
RTC_LOG(LS_ERROR) << "Init error: " << vpx_codec_err_to_string(rv);
|
|
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
|
|
}
|
|
vpx_codec_control(encoder_, VP8E_SET_CPUUSED, cpu_speed_);
|
|
vpx_codec_control(encoder_, VP8E_SET_MAX_INTRA_BITRATE_PCT,
|
|
rc_max_intra_target_);
|
|
vpx_codec_control(encoder_, VP9E_SET_AQ_MODE,
|
|
inst->VP9().adaptiveQpMode ? 3 : 0);
|
|
|
|
vpx_codec_control(encoder_, VP9E_SET_FRAME_PARALLEL_DECODING, 0);
|
|
vpx_codec_control(encoder_, VP9E_SET_SVC_GF_TEMPORAL_REF, 0);
|
|
|
|
if (is_svc_) {
|
|
vpx_codec_control(encoder_, VP9E_SET_SVC, 1);
|
|
vpx_codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_);
|
|
}
|
|
|
|
if (num_spatial_layers_ > 1) {
|
|
switch (inter_layer_pred_) {
|
|
case InterLayerPredMode::kOn:
|
|
vpx_codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 0);
|
|
break;
|
|
case InterLayerPredMode::kOff:
|
|
vpx_codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 1);
|
|
break;
|
|
case InterLayerPredMode::kOnKeyPic:
|
|
vpx_codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 2);
|
|
break;
|
|
default:
|
|
RTC_NOTREACHED();
|
|
}
|
|
|
|
memset(&svc_drop_frame_, 0, sizeof(svc_drop_frame_));
|
|
dropping_only_base_layer_ = inter_layer_pred_ == InterLayerPredMode::kOn &&
|
|
codec_.mode == VideoCodecMode::kScreensharing &&
|
|
num_spatial_layers_ > 1;
|
|
if (dropping_only_base_layer_) {
|
|
// Screenshare dropping mode: only the base spatial layer
|
|
// can be dropped and it doesn't affect other spatial layers.
|
|
// This mode is preferable because base layer has low bitrate targets
|
|
// and more likely to drop frames. It shouldn't reduce framerate on other
|
|
// layers.
|
|
svc_drop_frame_.framedrop_mode = LAYER_DROP;
|
|
svc_drop_frame_.max_consec_drop = 5;
|
|
svc_drop_frame_.framedrop_thresh[0] = config_->rc_dropframe_thresh;
|
|
for (size_t i = 1; i < num_spatial_layers_; ++i) {
|
|
svc_drop_frame_.framedrop_thresh[i] = 0;
|
|
}
|
|
} else {
|
|
// Configure encoder to drop entire superframe whenever it needs to drop
|
|
// a layer. This mode is preferred over per-layer dropping which causes
|
|
// quality flickering and is not compatible with RTP non-flexible mode.
|
|
svc_drop_frame_.framedrop_mode =
|
|
full_superframe_drop_ ? FULL_SUPERFRAME_DROP : CONSTRAINED_LAYER_DROP;
|
|
svc_drop_frame_.max_consec_drop = std::numeric_limits<int>::max();
|
|
for (size_t i = 0; i < num_spatial_layers_; ++i) {
|
|
svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh;
|
|
}
|
|
}
|
|
vpx_codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER,
|
|
&svc_drop_frame_);
|
|
}
|
|
|
|
// Register callback for getting each spatial layer.
|
|
vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = {
|
|
VP9EncoderImpl::EncoderOutputCodedPacketCallback,
|
|
reinterpret_cast<void*>(this)};
|
|
vpx_codec_control(encoder_, VP9E_REGISTER_CX_CALLBACK,
|
|
reinterpret_cast<void*>(&cbp));
|
|
|
|
// Control function to set the number of column tiles in encoding a frame, in
|
|
// log2 unit: e.g., 0 = 1 tile column, 1 = 2 tile columns, 2 = 4 tile columns.
|
|
// The number tile columns will be capped by the encoder based on image size
|
|
// (minimum width of tile column is 256 pixels, maximum is 4096).
|
|
vpx_codec_control(encoder_, VP9E_SET_TILE_COLUMNS, (config_->g_threads >> 1));
|
|
|
|
// Turn on row-based multithreading.
|
|
vpx_codec_control(encoder_, VP9E_SET_ROW_MT, 1);
|
|
|
|
#if !defined(WEBRTC_ARCH_ARM) && !defined(WEBRTC_ARCH_ARM64) && \
|
|
!defined(ANDROID)
|
|
// Do not enable the denoiser on ARM since optimization is pending.
|
|
// Denoiser is on by default on other platforms.
|
|
vpx_codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY,
|
|
inst->VP9().denoisingOn ? 1 : 0);
|
|
#endif
|
|
|
|
if (codec_.mode == VideoCodecMode::kScreensharing) {
|
|
// Adjust internal parameters to screen content.
|
|
vpx_codec_control(encoder_, VP9E_SET_TUNE_CONTENT, 1);
|
|
}
|
|
// Enable encoder skip of static/low content blocks.
|
|
vpx_codec_control(encoder_, VP8E_SET_STATIC_THRESHOLD, 1);
|
|
inited_ = true;
|
|
return WEBRTC_VIDEO_CODEC_OK;
|
|
}
|
|
|
|
uint32_t VP9EncoderImpl::MaxIntraTarget(uint32_t optimal_buffer_size) {
|
|
// Set max to the optimal buffer level (normalized by target BR),
|
|
// and scaled by a scale_par.
|
|
// Max target size = scale_par * optimal_buffer_size * targetBR[Kbps].
|
|
// This value is presented in percentage of perFrameBw:
|
|
// perFrameBw = targetBR[Kbps] * 1000 / framerate.
|
|
// The target in % is as follows:
|
|
float scale_par = 0.5;
|
|
uint32_t target_pct =
|
|
optimal_buffer_size * scale_par * codec_.maxFramerate / 10;
|
|
// Don't go below 3 times the per frame bandwidth.
|
|
const uint32_t min_intra_size = 300;
|
|
return (target_pct < min_intra_size) ? min_intra_size : target_pct;
|
|
}
|
|
|
|
int VP9EncoderImpl::Encode(const VideoFrame& input_image,
|
|
const std::vector<VideoFrameType>* frame_types) {
|
|
if (!inited_) {
|
|
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
|
|
}
|
|
if (encoded_complete_callback_ == nullptr) {
|
|
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
|
|
}
|
|
if (num_active_spatial_layers_ == 0) {
|
|
// All spatial layers are disabled, return without encoding anything.
|
|
return WEBRTC_VIDEO_CODEC_OK;
|
|
}
|
|
|
|
// We only support one stream at the moment.
|
|
if (frame_types && !frame_types->empty()) {
|
|
if ((*frame_types)[0] == VideoFrameType::kVideoFrameKey) {
|
|
force_key_frame_ = true;
|
|
}
|
|
}
|
|
|
|
if (pics_since_key_ + 1 ==
|
|
static_cast<size_t>(codec_.VP9()->keyFrameInterval)) {
|
|
force_key_frame_ = true;
|
|
}
|
|
|
|
vpx_svc_layer_id_t layer_id = {0};
|
|
if (!force_key_frame_) {
|
|
const size_t gof_idx = (pics_since_key_ + 1) % gof_.num_frames_in_gof;
|
|
layer_id.temporal_layer_id = gof_.temporal_idx[gof_idx];
|
|
|
|
if (VideoCodecMode::kScreensharing == codec_.mode) {
|
|
const uint32_t frame_timestamp_ms =
|
|
1000 * input_image.timestamp() / kVideoPayloadTypeFrequency;
|
|
|
|
// To ensure that several rate-limiters with different limits don't
|
|
// interfere, they must be queried in order of increasing limit.
|
|
|
|
bool use_steady_state_limiter =
|
|
variable_framerate_experiment_.enabled &&
|
|
input_image.update_rect().IsEmpty() &&
|
|
num_steady_state_frames_ >=
|
|
variable_framerate_experiment_.frames_before_steady_state;
|
|
|
|
for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
|
|
const float layer_fps =
|
|
framerate_controller_[layer_id.spatial_layer_id].GetTargetRate();
|
|
// Use steady state rate-limiter at the correct place.
|
|
if (use_steady_state_limiter &&
|
|
layer_fps > variable_framerate_experiment_.framerate_limit - 1e-9) {
|
|
if (variable_framerate_controller_.DropFrame(frame_timestamp_ms)) {
|
|
layer_id.spatial_layer_id = num_active_spatial_layers_;
|
|
}
|
|
// Break always: if rate limiter triggered frame drop, no need to
|
|
// continue; otherwise, the rate is less than the next limiters.
|
|
break;
|
|
}
|
|
if (framerate_controller_[sl_idx].DropFrame(frame_timestamp_ms)) {
|
|
++layer_id.spatial_layer_id;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (use_steady_state_limiter &&
|
|
layer_id.spatial_layer_id < num_active_spatial_layers_) {
|
|
variable_framerate_controller_.AddFrame(frame_timestamp_ms);
|
|
}
|
|
}
|
|
|
|
RTC_DCHECK_LE(layer_id.spatial_layer_id, num_active_spatial_layers_);
|
|
if (layer_id.spatial_layer_id >= num_active_spatial_layers_) {
|
|
// Drop entire picture.
|
|
return WEBRTC_VIDEO_CODEC_OK;
|
|
}
|
|
}
|
|
|
|
for (int sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
|
|
layer_id.temporal_layer_id_per_spatial[sl_idx] = layer_id.temporal_layer_id;
|
|
}
|
|
|
|
vpx_codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id);
|
|
|
|
if (requested_rate_settings_) {
|
|
if (dynamic_rate_settings_) {
|
|
// Tweak rate control settings based on available network headroom.
|
|
UpdateRateSettings(
|
|
config_,
|
|
GetRateSettings(
|
|
requested_rate_settings_->bandwidth_allocation.bps<double>() /
|
|
requested_rate_settings_->bitrate.get_sum_bps()));
|
|
}
|
|
|
|
bool more_layers_requested = MoreLayersEnabled(
|
|
requested_rate_settings_->bitrate, current_bitrate_allocation_);
|
|
bool less_layers_requested = MoreLayersEnabled(
|
|
current_bitrate_allocation_, requested_rate_settings_->bitrate);
|
|
// In SVC can enable new layers only if all lower layers are encoded and at
|
|
// the base temporal layer.
|
|
// This will delay rate allocation change until the next frame on the base
|
|
// spatial layer.
|
|
// In KSVC or simulcast modes KF will be generated for a new layer, so can
|
|
// update allocation any time.
|
|
bool can_upswitch =
|
|
inter_layer_pred_ != InterLayerPredMode::kOn ||
|
|
(layer_id.spatial_layer_id == 0 && layer_id.temporal_layer_id == 0);
|
|
if (!more_layers_requested || can_upswitch) {
|
|
current_bitrate_allocation_ = requested_rate_settings_->bitrate;
|
|
requested_rate_settings_ = absl::nullopt;
|
|
if (!SetSvcRates(current_bitrate_allocation_)) {
|
|
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
|
}
|
|
if (less_layers_requested || more_layers_requested) {
|
|
ss_info_needed_ = true;
|
|
}
|
|
if (more_layers_requested && !force_key_frame_) {
|
|
// Prohibit drop of all layers for the next frame, so newly enabled
|
|
// layer would have a valid spatial reference.
|
|
for (size_t i = 0; i < num_spatial_layers_; ++i) {
|
|
svc_drop_frame_.framedrop_thresh[i] = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (num_spatial_layers_ > 1) {
|
|
// Update frame dropping settings as they may change on per-frame basis.
|
|
vpx_codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER,
|
|
&svc_drop_frame_);
|
|
}
|
|
|
|
if (vpx_codec_enc_config_set(encoder_, config_)) {
|
|
return WEBRTC_VIDEO_CODEC_ERROR;
|
|
}
|
|
|
|
RTC_DCHECK_EQ(input_image.width(), raw_->d_w);
|
|
RTC_DCHECK_EQ(input_image.height(), raw_->d_h);
|
|
|
|
// Set input image for use in the callback.
|
|
// This was necessary since you need some information from input_image.
|
|
// You can save only the necessary information (such as timestamp) instead of
|
|
// doing this.
|
|
input_image_ = &input_image;
|
|
|
|
// Keep reference to buffer until encode completes.
|
|
rtc::scoped_refptr<I420BufferInterface> i420_buffer;
|
|
const I010BufferInterface* i010_buffer;
|
|
rtc::scoped_refptr<const I010BufferInterface> i010_copy;
|
|
switch (profile_) {
|
|
case VP9Profile::kProfile0: {
|
|
i420_buffer = input_image.video_frame_buffer()->ToI420();
|
|
// Image in vpx_image_t format.
|
|
// Input image is const. VPX's raw image is not defined as const.
|
|
raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(i420_buffer->DataY());
|
|
raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(i420_buffer->DataU());
|
|
raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>(i420_buffer->DataV());
|
|
raw_->stride[VPX_PLANE_Y] = i420_buffer->StrideY();
|
|
raw_->stride[VPX_PLANE_U] = i420_buffer->StrideU();
|
|
raw_->stride[VPX_PLANE_V] = i420_buffer->StrideV();
|
|
break;
|
|
}
|
|
case VP9Profile::kProfile2: {
|
|
// We can inject kI010 frames directly for encode. All other formats
|
|
// should be converted to it.
|
|
switch (input_image.video_frame_buffer()->type()) {
|
|
case VideoFrameBuffer::Type::kI010: {
|
|
i010_buffer = input_image.video_frame_buffer()->GetI010();
|
|
break;
|
|
}
|
|
default: {
|
|
i010_copy =
|
|
I010Buffer::Copy(*input_image.video_frame_buffer()->ToI420());
|
|
i010_buffer = i010_copy.get();
|
|
}
|
|
}
|
|
raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(
|
|
reinterpret_cast<const uint8_t*>(i010_buffer->DataY()));
|
|
raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(
|
|
reinterpret_cast<const uint8_t*>(i010_buffer->DataU()));
|
|
raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>(
|
|
reinterpret_cast<const uint8_t*>(i010_buffer->DataV()));
|
|
raw_->stride[VPX_PLANE_Y] = i010_buffer->StrideY() * 2;
|
|
raw_->stride[VPX_PLANE_U] = i010_buffer->StrideU() * 2;
|
|
raw_->stride[VPX_PLANE_V] = i010_buffer->StrideV() * 2;
|
|
break;
|
|
}
|
|
}
|
|
|
|
vpx_enc_frame_flags_t flags = 0;
|
|
if (force_key_frame_) {
|
|
flags = VPX_EFLAG_FORCE_KF;
|
|
}
|
|
|
|
if (external_ref_control_) {
|
|
vpx_svc_ref_frame_config_t ref_config =
|
|
SetReferences(force_key_frame_, layer_id.spatial_layer_id);
|
|
|
|
if (VideoCodecMode::kScreensharing == codec_.mode) {
|
|
for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
|
|
ref_config.duration[sl_idx] = static_cast<int64_t>(
|
|
90000 / (std::min(static_cast<float>(codec_.maxFramerate),
|
|
framerate_controller_[sl_idx].GetTargetRate())));
|
|
}
|
|
}
|
|
|
|
vpx_codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG, &ref_config);
|
|
}
|
|
|
|
first_frame_in_picture_ = true;
|
|
|
|
// TODO(ssilkin): Frame duration should be specified per spatial layer
|
|
// since their frame rate can be different. For now calculate frame duration
|
|
// based on target frame rate of the highest spatial layer, which frame rate
|
|
// is supposed to be equal or higher than frame rate of low spatial layers.
|
|
// Also, timestamp should represent actual time passed since previous frame
|
|
// (not 'expected' time). Then rate controller can drain buffer more
|
|
// accurately.
|
|
RTC_DCHECK_GE(framerate_controller_.size(), num_active_spatial_layers_);
|
|
float target_framerate_fps =
|
|
(codec_.mode == VideoCodecMode::kScreensharing)
|
|
? std::min(static_cast<float>(codec_.maxFramerate),
|
|
framerate_controller_[num_active_spatial_layers_ - 1]
|
|
.GetTargetRate())
|
|
: codec_.maxFramerate;
|
|
uint32_t duration = static_cast<uint32_t>(90000 / target_framerate_fps);
|
|
const vpx_codec_err_t rv = vpx_codec_encode(encoder_, raw_, timestamp_,
|
|
duration, flags, VPX_DL_REALTIME);
|
|
if (rv != VPX_CODEC_OK) {
|
|
RTC_LOG(LS_ERROR) << "Encoding error: " << vpx_codec_err_to_string(rv)
|
|
<< "\n"
|
|
<< "Details: " << vpx_codec_error(encoder_) << "\n"
|
|
<< vpx_codec_error_detail(encoder_);
|
|
return WEBRTC_VIDEO_CODEC_ERROR;
|
|
}
|
|
timestamp_ += duration;
|
|
|
|
if (!full_superframe_drop_) {
|
|
const bool end_of_picture = true;
|
|
DeliverBufferedFrame(end_of_picture);
|
|
}
|
|
|
|
return WEBRTC_VIDEO_CODEC_OK;
|
|
}
|
|
|
|
void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
|
|
absl::optional<int>* spatial_idx,
|
|
const vpx_codec_cx_pkt& pkt,
|
|
uint32_t timestamp) {
|
|
RTC_CHECK(codec_specific != nullptr);
|
|
codec_specific->codecType = kVideoCodecVP9;
|
|
CodecSpecificInfoVP9* vp9_info = &(codec_specific->codecSpecific.VP9);
|
|
|
|
vp9_info->first_frame_in_picture = first_frame_in_picture_;
|
|
vp9_info->flexible_mode = is_flexible_mode_;
|
|
|
|
if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {
|
|
pics_since_key_ = 0;
|
|
} else if (first_frame_in_picture_) {
|
|
++pics_since_key_;
|
|
}
|
|
|
|
vpx_svc_layer_id_t layer_id = {0};
|
|
vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
|
|
|
|
// Can't have keyframe with non-zero temporal layer.
|
|
RTC_DCHECK(pics_since_key_ != 0 || layer_id.temporal_layer_id == 0);
|
|
|
|
RTC_CHECK_GT(num_temporal_layers_, 0);
|
|
RTC_CHECK_GT(num_active_spatial_layers_, 0);
|
|
if (num_temporal_layers_ == 1) {
|
|
RTC_CHECK_EQ(layer_id.temporal_layer_id, 0);
|
|
vp9_info->temporal_idx = kNoTemporalIdx;
|
|
} else {
|
|
vp9_info->temporal_idx = layer_id.temporal_layer_id;
|
|
}
|
|
if (num_active_spatial_layers_ == 1) {
|
|
RTC_CHECK_EQ(layer_id.spatial_layer_id, 0);
|
|
*spatial_idx = absl::nullopt;
|
|
} else {
|
|
*spatial_idx = layer_id.spatial_layer_id;
|
|
}
|
|
|
|
// TODO(asapersson): this info has to be obtained from the encoder.
|
|
vp9_info->temporal_up_switch = false;
|
|
|
|
const bool is_key_pic = (pics_since_key_ == 0);
|
|
const bool is_inter_layer_pred_allowed =
|
|
(inter_layer_pred_ == InterLayerPredMode::kOn ||
|
|
(inter_layer_pred_ == InterLayerPredMode::kOnKeyPic && is_key_pic));
|
|
|
|
// Always set inter_layer_predicted to true on high layer frame if inter-layer
|
|
// prediction (ILP) is allowed even if encoder didn't actually use it.
|
|
// Setting inter_layer_predicted to false would allow receiver to decode high
|
|
// layer frame without decoding low layer frame. If that would happen (e.g.
|
|
// if low layer frame is lost) then receiver won't be able to decode next high
|
|
// layer frame which uses ILP.
|
|
vp9_info->inter_layer_predicted =
|
|
first_frame_in_picture_ ? false : is_inter_layer_pred_allowed;
|
|
|
|
// Mark all low spatial layer frames as references (not just frames of
|
|
// active low spatial layers) if inter-layer prediction is enabled since
|
|
// these frames are indirect references of high spatial layer, which can
|
|
// later be enabled without key frame.
|
|
vp9_info->non_ref_for_inter_layer_pred =
|
|
!is_inter_layer_pred_allowed ||
|
|
layer_id.spatial_layer_id + 1 == num_spatial_layers_;
|
|
|
|
// Always populate this, so that the packetizer can properly set the marker
|
|
// bit.
|
|
vp9_info->num_spatial_layers = num_active_spatial_layers_;
|
|
|
|
vp9_info->num_ref_pics = 0;
|
|
FillReferenceIndices(pkt, pics_since_key_, vp9_info->inter_layer_predicted,
|
|
vp9_info);
|
|
if (vp9_info->flexible_mode) {
|
|
vp9_info->gof_idx = kNoGofIdx;
|
|
} else {
|
|
vp9_info->gof_idx =
|
|
static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof);
|
|
vp9_info->temporal_up_switch = gof_.temporal_up_switch[vp9_info->gof_idx];
|
|
RTC_DCHECK(vp9_info->num_ref_pics == gof_.num_ref_pics[vp9_info->gof_idx] ||
|
|
vp9_info->num_ref_pics == 0);
|
|
}
|
|
|
|
vp9_info->inter_pic_predicted = (!is_key_pic && vp9_info->num_ref_pics > 0);
|
|
|
|
// Write SS on key frame of independently coded spatial layers and on base
|
|
// temporal/spatial layer frame if number of layers changed without issuing
|
|
// of key picture (inter-layer prediction is enabled).
|
|
const bool is_key_frame = is_key_pic && !vp9_info->inter_layer_predicted;
|
|
if (is_key_frame || (ss_info_needed_ && layer_id.temporal_layer_id == 0 &&
|
|
layer_id.spatial_layer_id == 0)) {
|
|
vp9_info->ss_data_available = true;
|
|
vp9_info->spatial_layer_resolution_present = true;
|
|
for (size_t i = 0; i < num_active_spatial_layers_; ++i) {
|
|
vp9_info->width[i] = codec_.width * svc_params_.scaling_factor_num[i] /
|
|
svc_params_.scaling_factor_den[i];
|
|
vp9_info->height[i] = codec_.height * svc_params_.scaling_factor_num[i] /
|
|
svc_params_.scaling_factor_den[i];
|
|
}
|
|
if (vp9_info->flexible_mode) {
|
|
vp9_info->gof.num_frames_in_gof = 0;
|
|
} else {
|
|
vp9_info->gof.CopyGofInfoVP9(gof_);
|
|
}
|
|
|
|
ss_info_needed_ = false;
|
|
} else {
|
|
vp9_info->ss_data_available = false;
|
|
}
|
|
|
|
first_frame_in_picture_ = false;
|
|
}
|
|
|
|
void VP9EncoderImpl::FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
|
|
const size_t pic_num,
|
|
const bool inter_layer_predicted,
|
|
CodecSpecificInfoVP9* vp9_info) {
|
|
vpx_svc_layer_id_t layer_id = {0};
|
|
vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
|
|
|
|
const bool is_key_frame =
|
|
(pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
|
|
|
|
std::vector<RefFrameBuffer> ref_buf_list;
|
|
|
|
if (is_svc_) {
|
|
vpx_svc_ref_frame_config_t enc_layer_conf = {{0}};
|
|
vpx_codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, &enc_layer_conf);
|
|
int ref_buf_flags = 0;
|
|
|
|
if (enc_layer_conf.reference_last[layer_id.spatial_layer_id]) {
|
|
const size_t fb_idx =
|
|
enc_layer_conf.lst_fb_idx[layer_id.spatial_layer_id];
|
|
RTC_DCHECK(ref_buf_.find(fb_idx) != ref_buf_.end());
|
|
if (std::find(ref_buf_list.begin(), ref_buf_list.end(),
|
|
ref_buf_.at(fb_idx)) == ref_buf_list.end()) {
|
|
ref_buf_list.push_back(ref_buf_.at(fb_idx));
|
|
ref_buf_flags |= 1 << fb_idx;
|
|
}
|
|
}
|
|
|
|
if (enc_layer_conf.reference_alt_ref[layer_id.spatial_layer_id]) {
|
|
const size_t fb_idx =
|
|
enc_layer_conf.alt_fb_idx[layer_id.spatial_layer_id];
|
|
RTC_DCHECK(ref_buf_.find(fb_idx) != ref_buf_.end());
|
|
if (std::find(ref_buf_list.begin(), ref_buf_list.end(),
|
|
ref_buf_.at(fb_idx)) == ref_buf_list.end()) {
|
|
ref_buf_list.push_back(ref_buf_.at(fb_idx));
|
|
ref_buf_flags |= 1 << fb_idx;
|
|
}
|
|
}
|
|
|
|
if (enc_layer_conf.reference_golden[layer_id.spatial_layer_id]) {
|
|
const size_t fb_idx =
|
|
enc_layer_conf.gld_fb_idx[layer_id.spatial_layer_id];
|
|
RTC_DCHECK(ref_buf_.find(fb_idx) != ref_buf_.end());
|
|
if (std::find(ref_buf_list.begin(), ref_buf_list.end(),
|
|
ref_buf_.at(fb_idx)) == ref_buf_list.end()) {
|
|
ref_buf_list.push_back(ref_buf_.at(fb_idx));
|
|
ref_buf_flags |= 1 << fb_idx;
|
|
}
|
|
}
|
|
|
|
RTC_LOG(LS_VERBOSE) << "Frame " << pic_num << " sl "
|
|
<< layer_id.spatial_layer_id << " tl "
|
|
<< layer_id.temporal_layer_id << " refered buffers "
|
|
<< (ref_buf_flags & (1 << 0) ? 1 : 0)
|
|
<< (ref_buf_flags & (1 << 1) ? 1 : 0)
|
|
<< (ref_buf_flags & (1 << 2) ? 1 : 0)
|
|
<< (ref_buf_flags & (1 << 3) ? 1 : 0)
|
|
<< (ref_buf_flags & (1 << 4) ? 1 : 0)
|
|
<< (ref_buf_flags & (1 << 5) ? 1 : 0)
|
|
<< (ref_buf_flags & (1 << 6) ? 1 : 0)
|
|
<< (ref_buf_flags & (1 << 7) ? 1 : 0);
|
|
|
|
} else if (!is_key_frame) {
|
|
RTC_DCHECK_EQ(num_spatial_layers_, 1);
|
|
RTC_DCHECK_EQ(num_temporal_layers_, 1);
|
|
// In non-SVC mode encoder doesn't provide reference list. Assume each frame
|
|
// refers previous one, which is stored in buffer 0.
|
|
ref_buf_list.push_back(ref_buf_.at(0));
|
|
}
|
|
|
|
size_t max_ref_temporal_layer_id = 0;
|
|
|
|
std::vector<size_t> ref_pid_list;
|
|
|
|
vp9_info->num_ref_pics = 0;
|
|
for (const RefFrameBuffer& ref_buf : ref_buf_list) {
|
|
RTC_DCHECK_LE(ref_buf.pic_num, pic_num);
|
|
if (ref_buf.pic_num < pic_num) {
|
|
if (inter_layer_pred_ != InterLayerPredMode::kOn) {
|
|
// RTP spec limits temporal prediction to the same spatial layer.
|
|
// It is safe to ignore this requirement if inter-layer prediction is
|
|
// enabled for all frames when all base frames are relayed to receiver.
|
|
RTC_DCHECK_EQ(ref_buf.spatial_layer_id, layer_id.spatial_layer_id);
|
|
} else {
|
|
RTC_DCHECK_LE(ref_buf.spatial_layer_id, layer_id.spatial_layer_id);
|
|
}
|
|
RTC_DCHECK_LE(ref_buf.temporal_layer_id, layer_id.temporal_layer_id);
|
|
|
|
// Encoder may reference several spatial layers on the same previous
|
|
// frame in case if some spatial layers are skipped on the current frame.
|
|
// We shouldn't put duplicate references as it may break some old
|
|
// clients and isn't RTP compatible.
|
|
if (std::find(ref_pid_list.begin(), ref_pid_list.end(),
|
|
ref_buf.pic_num) != ref_pid_list.end()) {
|
|
continue;
|
|
}
|
|
ref_pid_list.push_back(ref_buf.pic_num);
|
|
|
|
const size_t p_diff = pic_num - ref_buf.pic_num;
|
|
RTC_DCHECK_LE(p_diff, 127UL);
|
|
|
|
vp9_info->p_diff[vp9_info->num_ref_pics] = static_cast<uint8_t>(p_diff);
|
|
++vp9_info->num_ref_pics;
|
|
|
|
max_ref_temporal_layer_id =
|
|
std::max(max_ref_temporal_layer_id, ref_buf.temporal_layer_id);
|
|
} else {
|
|
RTC_DCHECK(inter_layer_predicted);
|
|
// RTP spec only allows to use previous spatial layer for inter-layer
|
|
// prediction.
|
|
RTC_DCHECK_EQ(ref_buf.spatial_layer_id + 1, layer_id.spatial_layer_id);
|
|
}
|
|
}
|
|
|
|
vp9_info->temporal_up_switch =
|
|
(max_ref_temporal_layer_id <
|
|
static_cast<size_t>(layer_id.temporal_layer_id));
|
|
}
|
|
|
|
void VP9EncoderImpl::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt,
|
|
const size_t pic_num) {
|
|
vpx_svc_layer_id_t layer_id = {0};
|
|
vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
|
|
|
|
RefFrameBuffer frame_buf(pic_num, layer_id.spatial_layer_id,
|
|
layer_id.temporal_layer_id);
|
|
|
|
if (is_svc_) {
|
|
vpx_svc_ref_frame_config_t enc_layer_conf = {{0}};
|
|
vpx_codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, &enc_layer_conf);
|
|
const int update_buffer_slot =
|
|
enc_layer_conf.update_buffer_slot[layer_id.spatial_layer_id];
|
|
|
|
for (size_t i = 0; i < kNumVp9Buffers; ++i) {
|
|
if (update_buffer_slot & (1 << i)) {
|
|
ref_buf_[i] = frame_buf;
|
|
}
|
|
}
|
|
|
|
RTC_LOG(LS_VERBOSE) << "Frame " << pic_num << " sl "
|
|
<< layer_id.spatial_layer_id << " tl "
|
|
<< layer_id.temporal_layer_id << " updated buffers "
|
|
<< (update_buffer_slot & (1 << 0) ? 1 : 0)
|
|
<< (update_buffer_slot & (1 << 1) ? 1 : 0)
|
|
<< (update_buffer_slot & (1 << 2) ? 1 : 0)
|
|
<< (update_buffer_slot & (1 << 3) ? 1 : 0)
|
|
<< (update_buffer_slot & (1 << 4) ? 1 : 0)
|
|
<< (update_buffer_slot & (1 << 5) ? 1 : 0)
|
|
<< (update_buffer_slot & (1 << 6) ? 1 : 0)
|
|
<< (update_buffer_slot & (1 << 7) ? 1 : 0);
|
|
} else {
|
|
RTC_DCHECK_EQ(num_spatial_layers_, 1);
|
|
RTC_DCHECK_EQ(num_temporal_layers_, 1);
|
|
// In non-svc mode encoder doesn't provide reference list. Assume each frame
|
|
// is reference and stored in buffer 0.
|
|
ref_buf_[0] = frame_buf;
|
|
}
|
|
}
|
|
|
|
vpx_svc_ref_frame_config_t VP9EncoderImpl::SetReferences(
|
|
bool is_key_pic,
|
|
size_t first_active_spatial_layer_id) {
|
|
// kRefBufIdx, kUpdBufIdx need to be updated to support longer GOFs.
|
|
RTC_DCHECK_LE(gof_.num_frames_in_gof, 4);
|
|
|
|
vpx_svc_ref_frame_config_t ref_config;
|
|
memset(&ref_config, 0, sizeof(ref_config));
|
|
|
|
const size_t num_temporal_refs = std::max(1, num_temporal_layers_ - 1);
|
|
const bool is_inter_layer_pred_allowed =
|
|
inter_layer_pred_ == InterLayerPredMode::kOn ||
|
|
(inter_layer_pred_ == InterLayerPredMode::kOnKeyPic && is_key_pic);
|
|
absl::optional<int> last_updated_buf_idx;
|
|
|
|
// Put temporal reference to LAST and spatial reference to GOLDEN. Update
|
|
// frame buffer (i.e. store encoded frame) if current frame is a temporal
|
|
// reference (i.e. it belongs to a low temporal layer) or it is a spatial
|
|
// reference. In later case, always store spatial reference in the last
|
|
// reference frame buffer.
|
|
// For the case of 3 temporal and 3 spatial layers we need 6 frame buffers
|
|
// for temporal references plus 1 buffer for spatial reference. 7 buffers
|
|
// in total.
|
|
|
|
for (size_t sl_idx = first_active_spatial_layer_id;
|
|
sl_idx < num_active_spatial_layers_; ++sl_idx) {
|
|
const size_t curr_pic_num = is_key_pic ? 0 : pics_since_key_ + 1;
|
|
const size_t gof_idx = curr_pic_num % gof_.num_frames_in_gof;
|
|
|
|
if (!is_key_pic) {
|
|
// Set up temporal reference.
|
|
const int buf_idx = sl_idx * num_temporal_refs + kRefBufIdx[gof_idx];
|
|
|
|
// Last reference frame buffer is reserved for spatial reference. It is
|
|
// not supposed to be used for temporal prediction.
|
|
RTC_DCHECK_LT(buf_idx, kNumVp9Buffers - 1);
|
|
|
|
// Sanity check that reference picture number is smaller than current
|
|
// picture number.
|
|
RTC_DCHECK_LT(ref_buf_[buf_idx].pic_num, curr_pic_num);
|
|
const size_t pid_diff = curr_pic_num - ref_buf_[buf_idx].pic_num;
|
|
// Incorrect spatial layer may be in the buffer due to a key-frame.
|
|
const bool same_spatial_layer =
|
|
ref_buf_[buf_idx].spatial_layer_id == sl_idx;
|
|
bool correct_pid = false;
|
|
if (is_flexible_mode_) {
|
|
correct_pid = pid_diff < kMaxAllowedPidDIff;
|
|
} else {
|
|
// Below code assumes single temporal referecence.
|
|
RTC_DCHECK_EQ(gof_.num_ref_pics[gof_idx], 1);
|
|
correct_pid = pid_diff == gof_.pid_diff[gof_idx][0];
|
|
}
|
|
|
|
if (same_spatial_layer && correct_pid) {
|
|
ref_config.lst_fb_idx[sl_idx] = buf_idx;
|
|
ref_config.reference_last[sl_idx] = 1;
|
|
} else {
|
|
// This reference doesn't match with one specified by GOF. This can
|
|
// only happen if spatial layer is enabled dynamically without key
|
|
// frame. Spatial prediction is supposed to be enabled in this case.
|
|
RTC_DCHECK(is_inter_layer_pred_allowed &&
|
|
sl_idx > first_active_spatial_layer_id);
|
|
}
|
|
}
|
|
|
|
if (is_inter_layer_pred_allowed && sl_idx > first_active_spatial_layer_id) {
|
|
// Set up spatial reference.
|
|
RTC_DCHECK(last_updated_buf_idx);
|
|
ref_config.gld_fb_idx[sl_idx] = *last_updated_buf_idx;
|
|
ref_config.reference_golden[sl_idx] = 1;
|
|
} else {
|
|
RTC_DCHECK(ref_config.reference_last[sl_idx] != 0 ||
|
|
sl_idx == first_active_spatial_layer_id ||
|
|
inter_layer_pred_ == InterLayerPredMode::kOff);
|
|
}
|
|
|
|
last_updated_buf_idx.reset();
|
|
|
|
if (gof_.temporal_idx[gof_idx] < num_temporal_layers_ - 1 ||
|
|
num_temporal_layers_ == 1) {
|
|
last_updated_buf_idx = sl_idx * num_temporal_refs + kUpdBufIdx[gof_idx];
|
|
|
|
// Ensure last frame buffer is not used for temporal prediction (it is
|
|
// reserved for spatial reference).
|
|
RTC_DCHECK_LT(*last_updated_buf_idx, kNumVp9Buffers - 1);
|
|
} else if (is_inter_layer_pred_allowed) {
|
|
last_updated_buf_idx = kNumVp9Buffers - 1;
|
|
}
|
|
|
|
if (last_updated_buf_idx) {
|
|
ref_config.update_buffer_slot[sl_idx] = 1 << *last_updated_buf_idx;
|
|
}
|
|
}
|
|
|
|
return ref_config;
|
|
}
|
|
|
|
int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
|
|
RTC_DCHECK_EQ(pkt->kind, VPX_CODEC_CX_FRAME_PKT);
|
|
|
|
if (pkt->data.frame.sz == 0) {
|
|
// Ignore dropped frame.
|
|
return WEBRTC_VIDEO_CODEC_OK;
|
|
}
|
|
|
|
vpx_svc_layer_id_t layer_id = {0};
|
|
vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
|
|
|
|
if (!full_superframe_drop_) {
|
|
// Deliver buffered low spatial layer frame.
|
|
const bool end_of_picture = false;
|
|
DeliverBufferedFrame(end_of_picture);
|
|
}
|
|
|
|
// TODO(nisse): Introduce some buffer cache or buffer pool, to reduce
|
|
// allocations and/or copy operations.
|
|
encoded_image_.SetEncodedData(EncodedImageBuffer::Create(
|
|
static_cast<const uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz));
|
|
|
|
const bool is_key_frame =
|
|
(pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
|
|
// Ensure encoder issued key frame on request.
|
|
RTC_DCHECK(is_key_frame || !force_key_frame_);
|
|
|
|
// Check if encoded frame is a key frame.
|
|
encoded_image_._frameType = VideoFrameType::kVideoFrameDelta;
|
|
if (is_key_frame) {
|
|
encoded_image_._frameType = VideoFrameType::kVideoFrameKey;
|
|
force_key_frame_ = false;
|
|
}
|
|
RTC_DCHECK_LE(encoded_image_.size(), encoded_image_.capacity());
|
|
|
|
codec_specific_ = {};
|
|
absl::optional<int> spatial_index;
|
|
PopulateCodecSpecific(&codec_specific_, &spatial_index, *pkt,
|
|
input_image_->timestamp());
|
|
encoded_image_.SetSpatialIndex(spatial_index);
|
|
|
|
UpdateReferenceBuffers(*pkt, pics_since_key_);
|
|
|
|
TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_.size());
|
|
encoded_image_.SetTimestamp(input_image_->timestamp());
|
|
encoded_image_._encodedHeight =
|
|
pkt->data.frame.height[layer_id.spatial_layer_id];
|
|
encoded_image_._encodedWidth =
|
|
pkt->data.frame.width[layer_id.spatial_layer_id];
|
|
int qp = -1;
|
|
vpx_codec_control(encoder_, VP8E_GET_LAST_QUANTIZER, &qp);
|
|
encoded_image_.qp_ = qp;
|
|
|
|
if (full_superframe_drop_) {
|
|
const bool end_of_picture = encoded_image_.SpatialIndex().value_or(0) + 1 ==
|
|
num_active_spatial_layers_;
|
|
DeliverBufferedFrame(end_of_picture);
|
|
}
|
|
|
|
return WEBRTC_VIDEO_CODEC_OK;
|
|
}
|
|
|
|
void VP9EncoderImpl::DeliverBufferedFrame(bool end_of_picture) {
|
|
if (encoded_image_.size() > 0) {
|
|
if (num_spatial_layers_ > 1) {
|
|
// Restore frame dropping settings, as dropping may be temporary forbidden
|
|
// due to dynamically enabled layers.
|
|
svc_drop_frame_.framedrop_thresh[0] = config_->rc_dropframe_thresh;
|
|
for (size_t i = 1; i < num_spatial_layers_; ++i) {
|
|
svc_drop_frame_.framedrop_thresh[i] =
|
|
dropping_only_base_layer_ ? 0 : config_->rc_dropframe_thresh;
|
|
}
|
|
}
|
|
|
|
codec_specific_.codecSpecific.VP9.end_of_picture = end_of_picture;
|
|
|
|
// No data partitioning in VP9, so 1 partition only.
|
|
int part_idx = 0;
|
|
RTPFragmentationHeader frag_info;
|
|
frag_info.VerifyAndAllocateFragmentationHeader(1);
|
|
frag_info.fragmentationOffset[part_idx] = 0;
|
|
frag_info.fragmentationLength[part_idx] = encoded_image_.size();
|
|
|
|
encoded_complete_callback_->OnEncodedImage(encoded_image_, &codec_specific_,
|
|
&frag_info);
|
|
|
|
if (codec_.mode == VideoCodecMode::kScreensharing) {
|
|
const uint8_t spatial_idx = encoded_image_.SpatialIndex().value_or(0);
|
|
const uint32_t frame_timestamp_ms =
|
|
1000 * encoded_image_.Timestamp() / kVideoPayloadTypeFrequency;
|
|
framerate_controller_[spatial_idx].AddFrame(frame_timestamp_ms);
|
|
|
|
const size_t steady_state_size = SteadyStateSize(
|
|
spatial_idx, codec_specific_.codecSpecific.VP9.temporal_idx);
|
|
|
|
// Only frames on spatial layers, which may be limited in a steady state
|
|
// are considered for steady state detection.
|
|
if (framerate_controller_[spatial_idx].GetTargetRate() >
|
|
variable_framerate_experiment_.framerate_limit + 1e-9) {
|
|
if (encoded_image_.qp_ <=
|
|
variable_framerate_experiment_.steady_state_qp &&
|
|
encoded_image_.size() <= steady_state_size) {
|
|
++num_steady_state_frames_;
|
|
} else {
|
|
num_steady_state_frames_ = 0;
|
|
}
|
|
}
|
|
}
|
|
encoded_image_.set_size(0);
|
|
}
|
|
}
|
|
|
|
int VP9EncoderImpl::RegisterEncodeCompleteCallback(
|
|
EncodedImageCallback* callback) {
|
|
encoded_complete_callback_ = callback;
|
|
return WEBRTC_VIDEO_CODEC_OK;
|
|
}
|
|
|
|
VideoEncoder::EncoderInfo VP9EncoderImpl::GetEncoderInfo() const {
|
|
EncoderInfo info;
|
|
info.supports_native_handle = false;
|
|
info.implementation_name = "libvpx";
|
|
info.scaling_settings = VideoEncoder::ScalingSettings::kOff;
|
|
info.has_trusted_rate_controller = trusted_rate_controller_;
|
|
info.is_hardware_accelerated = false;
|
|
info.has_internal_source = false;
|
|
for (size_t si = 0; si < num_spatial_layers_; ++si) {
|
|
info.fps_allocation[si].clear();
|
|
if (!codec_.spatialLayers[si].active) {
|
|
continue;
|
|
}
|
|
// This spatial layer may already use a fraction of the total frame rate.
|
|
const float sl_fps_fraction =
|
|
codec_.spatialLayers[si].maxFramerate / codec_.maxFramerate;
|
|
for (size_t ti = 0; ti < num_temporal_layers_; ++ti) {
|
|
const uint32_t decimator =
|
|
num_temporal_layers_ <= 1 ? 1 : config_->ts_rate_decimator[ti];
|
|
RTC_DCHECK_GT(decimator, 0);
|
|
info.fps_allocation[si].push_back(rtc::saturated_cast<uint8_t>(
|
|
EncoderInfo::kMaxFramerateFraction * (sl_fps_fraction / decimator)));
|
|
}
|
|
}
|
|
return info;
|
|
}
|
|
|
|
size_t VP9EncoderImpl::SteadyStateSize(int sid, int tid) {
|
|
const size_t bitrate_bps = current_bitrate_allocation_.GetBitrate(
|
|
sid, tid == kNoTemporalIdx ? 0 : tid);
|
|
const float fps = (codec_.mode == VideoCodecMode::kScreensharing)
|
|
? std::min(static_cast<float>(codec_.maxFramerate),
|
|
framerate_controller_[sid].GetTargetRate())
|
|
: codec_.maxFramerate;
|
|
return static_cast<size_t>(
|
|
bitrate_bps / (8 * fps) *
|
|
(100 -
|
|
variable_framerate_experiment_.steady_state_undershoot_percentage) /
|
|
100 +
|
|
0.5);
|
|
}
|
|
|
|
// static
|
|
VP9EncoderImpl::VariableFramerateExperiment
|
|
VP9EncoderImpl::ParseVariableFramerateConfig(std::string group_name) {
|
|
FieldTrialFlag enabled = FieldTrialFlag("Enabled");
|
|
FieldTrialParameter<double> framerate_limit("min_fps", 5.0);
|
|
FieldTrialParameter<int> qp("min_qp", 32);
|
|
FieldTrialParameter<int> undershoot_percentage("undershoot", 30);
|
|
FieldTrialParameter<int> frames_before_steady_state(
|
|
"frames_before_steady_state", 5);
|
|
ParseFieldTrial({&enabled, &framerate_limit, &qp, &undershoot_percentage,
|
|
&frames_before_steady_state},
|
|
field_trial::FindFullName(group_name));
|
|
VariableFramerateExperiment config;
|
|
config.enabled = enabled.Get();
|
|
config.framerate_limit = framerate_limit.Get();
|
|
config.steady_state_qp = qp.Get();
|
|
config.steady_state_undershoot_percentage = undershoot_percentage.Get();
|
|
config.frames_before_steady_state = frames_before_steady_state.Get();
|
|
|
|
return config;
|
|
}
|
|
|
|
VP9DecoderImpl::VP9DecoderImpl()
|
|
: decode_complete_callback_(nullptr),
|
|
inited_(false),
|
|
decoder_(nullptr),
|
|
key_frame_required_(true) {}
|
|
|
|
VP9DecoderImpl::~VP9DecoderImpl() {
|
|
inited_ = true; // in order to do the actual release
|
|
Release();
|
|
int num_buffers_in_use = frame_buffer_pool_.GetNumBuffersInUse();
|
|
if (num_buffers_in_use > 0) {
|
|
// The frame buffers are reference counted and frames are exposed after
|
|
// decoding. There may be valid usage cases where previous frames are still
|
|
// referenced after ~VP9DecoderImpl that is not a leak.
|
|
RTC_LOG(LS_INFO) << num_buffers_in_use << " Vp9FrameBuffers are still "
|
|
<< "referenced during ~VP9DecoderImpl.";
|
|
}
|
|
}
|
|
|
|
int VP9DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) {
|
|
int ret_val = Release();
|
|
if (ret_val < 0) {
|
|
return ret_val;
|
|
}
|
|
|
|
if (decoder_ == nullptr) {
|
|
decoder_ = new vpx_codec_ctx_t;
|
|
}
|
|
vpx_codec_dec_cfg_t cfg;
|
|
memset(&cfg, 0, sizeof(cfg));
|
|
|
|
// We want to use multithreading when decoding high resolution videos. But,
|
|
// since we don't know resolution of input stream at this stage, we always
|
|
// enable it.
|
|
cfg.threads = std::min(number_of_cores, kMaxNumTiles4kVideo);
|
|
|
|
vpx_codec_flags_t flags = 0;
|
|
if (vpx_codec_dec_init(decoder_, vpx_codec_vp9_dx(), &cfg, flags)) {
|
|
return WEBRTC_VIDEO_CODEC_MEMORY;
|
|
}
|
|
|
|
if (!frame_buffer_pool_.InitializeVpxUsePool(decoder_)) {
|
|
return WEBRTC_VIDEO_CODEC_MEMORY;
|
|
}
|
|
|
|
inited_ = true;
|
|
// Always start with a complete key frame.
|
|
key_frame_required_ = true;
|
|
return WEBRTC_VIDEO_CODEC_OK;
|
|
}
|
|
|
|
int VP9DecoderImpl::Decode(const EncodedImage& input_image,
|
|
bool missing_frames,
|
|
int64_t /*render_time_ms*/) {
|
|
if (!inited_) {
|
|
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
|
|
}
|
|
if (decode_complete_callback_ == nullptr) {
|
|
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
|
|
}
|
|
// Always start with a complete key frame.
|
|
if (key_frame_required_) {
|
|
if (input_image._frameType != VideoFrameType::kVideoFrameKey)
|
|
return WEBRTC_VIDEO_CODEC_ERROR;
|
|
// We have a key frame - is it complete?
|
|
if (input_image._completeFrame) {
|
|
key_frame_required_ = false;
|
|
} else {
|
|
return WEBRTC_VIDEO_CODEC_ERROR;
|
|
}
|
|
}
|
|
vpx_codec_iter_t iter = nullptr;
|
|
vpx_image_t* img;
|
|
const uint8_t* buffer = input_image.data();
|
|
if (input_image.size() == 0) {
|
|
buffer = nullptr; // Triggers full frame concealment.
|
|
}
|
|
// During decode libvpx may get and release buffers from |frame_buffer_pool_|.
|
|
// In practice libvpx keeps a few (~3-4) buffers alive at a time.
|
|
if (vpx_codec_decode(decoder_, buffer,
|
|
static_cast<unsigned int>(input_image.size()), 0,
|
|
VPX_DL_REALTIME)) {
|
|
return WEBRTC_VIDEO_CODEC_ERROR;
|
|
}
|
|
// |img->fb_priv| contains the image data, a reference counted Vp9FrameBuffer.
|
|
// It may be released by libvpx during future vpx_codec_decode or
|
|
// vpx_codec_destroy calls.
|
|
img = vpx_codec_get_frame(decoder_, &iter);
|
|
int qp;
|
|
vpx_codec_err_t vpx_ret =
|
|
vpx_codec_control(decoder_, VPXD_GET_LAST_QUANTIZER, &qp);
|
|
RTC_DCHECK_EQ(vpx_ret, VPX_CODEC_OK);
|
|
int ret = ReturnFrame(img, input_image.Timestamp(), qp);
|
|
if (ret != 0) {
|
|
return ret;
|
|
}
|
|
return WEBRTC_VIDEO_CODEC_OK;
|
|
}
|
|
|
|
int VP9DecoderImpl::ReturnFrame(const vpx_image_t* img,
|
|
uint32_t timestamp,
|
|
int qp) {
|
|
if (img == nullptr) {
|
|
// Decoder OK and nullptr image => No show frame.
|
|
return WEBRTC_VIDEO_CODEC_NO_OUTPUT;
|
|
}
|
|
|
|
// This buffer contains all of |img|'s image data, a reference counted
|
|
// Vp9FrameBuffer. (libvpx is done with the buffers after a few
|
|
// vpx_codec_decode calls or vpx_codec_destroy).
|
|
Vp9FrameBufferPool::Vp9FrameBuffer* img_buffer =
|
|
static_cast<Vp9FrameBufferPool::Vp9FrameBuffer*>(img->fb_priv);
|
|
|
|
// The buffer can be used directly by the VideoFrame (without copy) by
|
|
// using a Wrapped*Buffer.
|
|
rtc::scoped_refptr<VideoFrameBuffer> img_wrapped_buffer;
|
|
switch (img->bit_depth) {
|
|
case 8:
|
|
img_wrapped_buffer = WrapI420Buffer(
|
|
img->d_w, img->d_h, img->planes[VPX_PLANE_Y],
|
|
img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U],
|
|
img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V],
|
|
img->stride[VPX_PLANE_V],
|
|
// WrappedI420Buffer's mechanism for allowing the release of its frame
|
|
// buffer is through a callback function. This is where we should
|
|
// release |img_buffer|.
|
|
rtc::KeepRefUntilDone(img_buffer));
|
|
break;
|
|
case 10:
|
|
img_wrapped_buffer = WrapI010Buffer(
|
|
img->d_w, img->d_h,
|
|
reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_Y]),
|
|
img->stride[VPX_PLANE_Y] / 2,
|
|
reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_U]),
|
|
img->stride[VPX_PLANE_U] / 2,
|
|
reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_V]),
|
|
img->stride[VPX_PLANE_V] / 2, rtc::KeepRefUntilDone(img_buffer));
|
|
break;
|
|
default:
|
|
RTC_NOTREACHED();
|
|
return WEBRTC_VIDEO_CODEC_NO_OUTPUT;
|
|
}
|
|
|
|
auto builder = VideoFrame::Builder()
|
|
.set_video_frame_buffer(img_wrapped_buffer)
|
|
.set_timestamp_rtp(timestamp)
|
|
.set_color_space(ExtractVP9ColorSpace(img->cs, img->range,
|
|
img->bit_depth));
|
|
|
|
VideoFrame decoded_image = builder.build();
|
|
|
|
decode_complete_callback_->Decoded(decoded_image, absl::nullopt, qp);
|
|
return WEBRTC_VIDEO_CODEC_OK;
|
|
}
|
|
|
|
int VP9DecoderImpl::RegisterDecodeCompleteCallback(
|
|
DecodedImageCallback* callback) {
|
|
decode_complete_callback_ = callback;
|
|
return WEBRTC_VIDEO_CODEC_OK;
|
|
}
|
|
|
|
int VP9DecoderImpl::Release() {
|
|
int ret_val = WEBRTC_VIDEO_CODEC_OK;
|
|
|
|
if (decoder_ != nullptr) {
|
|
if (inited_) {
|
|
// When a codec is destroyed libvpx will release any buffers of
|
|
// |frame_buffer_pool_| it is currently using.
|
|
if (vpx_codec_destroy(decoder_)) {
|
|
ret_val = WEBRTC_VIDEO_CODEC_MEMORY;
|
|
}
|
|
}
|
|
delete decoder_;
|
|
decoder_ = nullptr;
|
|
}
|
|
// Releases buffers from the pool. Any buffers not in use are deleted. Buffers
|
|
// still referenced externally are deleted once fully released, not returning
|
|
// to the pool.
|
|
frame_buffer_pool_.ClearPool();
|
|
inited_ = false;
|
|
return ret_val;
|
|
}
|
|
|
|
const char* VP9DecoderImpl::ImplementationName() const {
|
|
return "libvpx";
|
|
}
|
|
|
|
} // namespace webrtc
|
|
|
|
#endif // RTC_ENABLE_VP9
|