mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-12 21:30:45 +01:00

Bug: b/336978562 Change-Id: Icc9be7a5ad095304bf68e19ffedca77d8bc1810c Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/349266 Commit-Queue: Philip Eliasson <philipel@webrtc.org> Reviewed-by: Erik Språng <sprang@webrtc.org> Cr-Commit-Position: refs/heads/main@{#42285}
856 lines
33 KiB
C++
856 lines
33 KiB
C++
/*
|
|
* Copyright (c) 2024 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "api/video_codecs/libaom_av1_encoder_factory.h"
|
|
|
|
#include <algorithm>
|
|
#include <map>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "absl/algorithm/container.h"
|
|
#include "api/video_codecs/video_encoder_interface.h"
|
|
#include "rtc_base/logging.h"
|
|
#include "rtc_base/strings/string_builder.h"
|
|
#include "third_party/libaom/source/libaom/aom/aom_codec.h"
|
|
#include "third_party/libaom/source/libaom/aom/aom_encoder.h"
|
|
#include "third_party/libaom/source/libaom/aom/aomcx.h"
|
|
|
|
#define SET_OR_DO_ERROR_CALLBACK_AND_RETURN(param_id, param_value) \
|
|
do { \
|
|
if (!SetEncoderControlParameters(&ctx_, param_id, param_value)) { \
|
|
DoErrorCallback(frame_settings); \
|
|
return; \
|
|
} \
|
|
} while (0)
|
|
|
|
#define SET_OR_RETURN_FALSE(param_id, param_value) \
|
|
do { \
|
|
if (!SetEncoderControlParameters(&ctx_, param_id, param_value)) { \
|
|
return false; \
|
|
} \
|
|
} while (0)
|
|
|
|
namespace webrtc {
|
|
|
|
using FrameEncodeSettings = VideoEncoderInterface::FrameEncodeSettings;
|
|
using Cbr = FrameEncodeSettings::Cbr;
|
|
using Cqp = FrameEncodeSettings::Cqp;
|
|
using aom_img_ptr = std::unique_ptr<aom_image_t, decltype(&aom_img_free)>;
|
|
|
|
namespace {
|
|
// MaxQp defined here:
|
|
// http://google3/third_party/libaom/git_root/av1/av1_cx_iface.c;l=3510;rcl=527067478
|
|
constexpr int kMaxQp = 63;
|
|
constexpr int kNumBuffers = 8;
|
|
constexpr int kMaxReferences = 3;
|
|
constexpr int kMinEffortLevel = -2;
|
|
constexpr int kMaxEffortLevel = 2;
|
|
constexpr int kMaxSpatialLayersWtf = 4;
|
|
constexpr int kMaxTemporalLayers = 4;
|
|
constexpr int kRtpTicksPerSecond = 90000;
|
|
constexpr std::array<VideoFrameBuffer::Type, 2> kSupportedInputFormats = {
|
|
VideoFrameBuffer::Type::kI420, VideoFrameBuffer::Type::kNV12};
|
|
|
|
constexpr std::array<Rational, 7> kSupportedScalingFactors = {
|
|
{{8, 1}, {4, 1}, {2, 1}, {1, 1}, {1, 2}, {1, 4}, {1, 8}}};
|
|
|
|
absl::optional<Rational> GetScalingFactor(const Resolution& from,
|
|
const Resolution& to) {
|
|
auto it = absl::c_find_if(kSupportedScalingFactors, [&](const Rational& r) {
|
|
return (from.width * r.numerator / r.denominator) == to.width &&
|
|
(from.height * r.numerator / r.denominator) == to.height;
|
|
});
|
|
|
|
if (it != kSupportedScalingFactors.end()) {
|
|
return *it;
|
|
}
|
|
|
|
return {};
|
|
}
|
|
|
|
class LibaomAv1Encoder : public VideoEncoderInterface {
|
|
public:
|
|
LibaomAv1Encoder() = default;
|
|
~LibaomAv1Encoder() override;
|
|
|
|
bool InitEncode(
|
|
const VideoEncoderFactoryInterface::StaticEncoderSettings& settings,
|
|
const std::map<std::string, std::string>& encoder_specific_settings);
|
|
|
|
void Encode(rtc::scoped_refptr<webrtc::VideoFrameBuffer> frame_buffer,
|
|
const TemporalUnitSettings& tu_settings,
|
|
std::vector<FrameEncodeSettings> frame_settings) override;
|
|
|
|
private:
|
|
aom_img_ptr image_to_encode_ = aom_img_ptr(nullptr, aom_img_free);
|
|
aom_codec_ctx_t ctx_;
|
|
aom_codec_enc_cfg_t cfg_;
|
|
|
|
absl::optional<VideoCodecMode> current_content_type_;
|
|
absl::optional<int> current_effort_level_;
|
|
int max_number_of_threads_;
|
|
std::array<absl::optional<Resolution>, 8> last_resolution_in_buffer_;
|
|
};
|
|
|
|
template <typename T>
|
|
bool SetEncoderControlParameters(aom_codec_ctx_t* ctx, int id, T value) {
|
|
aom_codec_err_t error_code = aom_codec_control(ctx, id, value);
|
|
if (error_code != AOM_CODEC_OK) {
|
|
RTC_LOG(LS_WARNING) << "aom_codec_control returned " << error_code
|
|
<< " with id: " << id << ".";
|
|
}
|
|
return error_code == AOM_CODEC_OK;
|
|
}
|
|
|
|
LibaomAv1Encoder::~LibaomAv1Encoder() {
|
|
aom_codec_destroy(&ctx_);
|
|
}
|
|
|
|
bool LibaomAv1Encoder::InitEncode(
|
|
const VideoEncoderFactoryInterface::StaticEncoderSettings& settings,
|
|
const std::map<std::string, std::string>& encoder_specific_settings) {
|
|
if (!encoder_specific_settings.empty()) {
|
|
RTC_LOG(LS_ERROR)
|
|
<< "libaom av1 encoder accepts no encoder specific settings";
|
|
return false;
|
|
}
|
|
|
|
if (aom_codec_err_t ret = aom_codec_enc_config_default(
|
|
aom_codec_av1_cx(), &cfg_, AOM_USAGE_REALTIME);
|
|
ret != AOM_CODEC_OK) {
|
|
RTC_LOG(LS_ERROR) << "aom_codec_enc_config_default returned " << ret;
|
|
return false;
|
|
}
|
|
|
|
max_number_of_threads_ = settings.max_number_of_threads;
|
|
|
|
// The encode resolution is set dynamically for each call to `Encode`, but for
|
|
// `aom_codec_enc_init` to not fail we set it here as well.
|
|
cfg_.g_w = settings.max_encode_dimensions.width;
|
|
cfg_.g_h = settings.max_encode_dimensions.height;
|
|
cfg_.g_timebase.num = 1;
|
|
// TD: does 90khz timebase make sense, use microseconds instead maybe?
|
|
cfg_.g_timebase.den = kRtpTicksPerSecond;
|
|
cfg_.g_input_bit_depth = settings.encoding_format.bit_depth;
|
|
cfg_.kf_mode = AOM_KF_DISABLED;
|
|
// TD: rc_undershoot_pct and rc_overshoot_pct should probably be removed.
|
|
cfg_.rc_undershoot_pct = 50;
|
|
cfg_.rc_overshoot_pct = 50;
|
|
auto* cbr =
|
|
absl::get_if<VideoEncoderFactoryInterface::StaticEncoderSettings::Cbr>(
|
|
&settings.rc_mode);
|
|
cfg_.rc_buf_initial_sz = cbr ? cbr->target_buffer_size.ms() : 600;
|
|
cfg_.rc_buf_optimal_sz = cbr ? cbr->target_buffer_size.ms() : 600;
|
|
cfg_.rc_buf_sz = cbr ? cbr->max_buffer_size.ms() : 1000;
|
|
cfg_.g_usage = AOM_USAGE_REALTIME;
|
|
cfg_.g_pass = AOM_RC_ONE_PASS;
|
|
cfg_.g_lag_in_frames = 0;
|
|
cfg_.g_error_resilient = 0;
|
|
cfg_.rc_end_usage = cbr ? AOM_CBR : AOM_Q;
|
|
|
|
if (aom_codec_err_t ret =
|
|
aom_codec_enc_init(&ctx_, aom_codec_av1_cx(), &cfg_, /*flags=*/0);
|
|
ret != AOM_CODEC_OK) {
|
|
RTC_LOG(LS_ERROR) << "aom_codec_enc_init returned " << ret;
|
|
return false;
|
|
}
|
|
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_CDEF, 1);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_TPL_MODEL, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_DELTAQ_MODE, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_ORDER_HINT, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_AQ_MODE, 3);
|
|
SET_OR_RETURN_FALSE(AOME_SET_MAX_INTRA_BITRATE_PCT, 300);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_COEFF_COST_UPD_FREQ, 3);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_MODE_COST_UPD_FREQ, 3);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_MV_COST_UPD_FREQ, 3);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ROW_MT, 1);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_OBMC, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_NOISE_SENSITIVITY, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_WARPED_MOTION, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_REF_FRAME_MVS, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_CFL_INTRA, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_SMOOTH_INTRA, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_ANGLE_DELTA, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_FILTER_INTRA, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_DISABLE_TRELLIS_QUANT, 1);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_DIST_WTD_COMP, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_DIFF_WTD_COMP, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_DUAL_FILTER, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_INTERINTRA_COMP, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_INTERINTRA_WEDGE, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_INTRA_EDGE_FILTER, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_INTRABC, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_MASKED_COMP, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_PAETH_INTRA, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_QM, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_RECT_PARTITIONS, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_RESTORATION, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_SMOOTH_INTERINTRA, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_TX64, 0);
|
|
SET_OR_RETURN_FALSE(AV1E_SET_MAX_REFERENCE_FRAMES, 3);
|
|
|
|
return true;
|
|
}
|
|
|
|
struct ThreadTilesAndSuperblockSizeInfo {
|
|
int num_threads;
|
|
int exp_tile_rows;
|
|
int exp_tile_colums;
|
|
aom_superblock_size_t superblock_size;
|
|
};
|
|
|
|
ThreadTilesAndSuperblockSizeInfo GetThreadingTilesAndSuperblockSize(
|
|
int width,
|
|
int height,
|
|
int max_number_of_threads) {
|
|
ThreadTilesAndSuperblockSizeInfo res;
|
|
const int num_pixels = width * height;
|
|
if (num_pixels >= 1920 * 1080 && max_number_of_threads > 8) {
|
|
res.num_threads = 8;
|
|
res.exp_tile_rows = 2;
|
|
res.exp_tile_colums = 1;
|
|
} else if (num_pixels >= 640 * 360 && max_number_of_threads > 4) {
|
|
res.num_threads = 4;
|
|
res.exp_tile_rows = 1;
|
|
res.exp_tile_colums = 1;
|
|
} else if (num_pixels >= 320 * 180 && max_number_of_threads > 2) {
|
|
res.num_threads = 2;
|
|
res.exp_tile_rows = 1;
|
|
res.exp_tile_colums = 0;
|
|
} else {
|
|
res.num_threads = 1;
|
|
res.exp_tile_rows = 0;
|
|
res.exp_tile_colums = 0;
|
|
}
|
|
|
|
if (res.num_threads > 4 && num_pixels >= 960 * 540) {
|
|
res.superblock_size = AOM_SUPERBLOCK_SIZE_64X64;
|
|
} else {
|
|
res.superblock_size = AOM_SUPERBLOCK_SIZE_DYNAMIC;
|
|
}
|
|
|
|
RTC_LOG(LS_WARNING) << __FUNCTION__ << " res.num_threads=" << res.num_threads
|
|
<< " res.exp_tile_rows=" << res.exp_tile_rows
|
|
<< " res.exp_tile_colums=" << res.exp_tile_colums
|
|
<< " res.superblock_size=" << res.superblock_size;
|
|
|
|
return res;
|
|
}
|
|
|
|
bool ValidateEncodeParams(
|
|
const webrtc::VideoFrameBuffer& frame_buffer,
|
|
const VideoEncoderInterface::TemporalUnitSettings& tu_settings,
|
|
const std::vector<VideoEncoderInterface::FrameEncodeSettings>&
|
|
frame_settings,
|
|
const std::array<absl::optional<Resolution>, 8>& last_resolution_in_buffer,
|
|
aom_rc_mode rc_mode) {
|
|
if (frame_settings.empty()) {
|
|
RTC_LOG(LS_ERROR) << "No frame settings provided.";
|
|
return false;
|
|
}
|
|
|
|
auto in_range = [](int low, int high, int val) {
|
|
return low <= val && val < high;
|
|
};
|
|
|
|
if (!in_range(kMinEffortLevel, kMaxEffortLevel + 1,
|
|
tu_settings.effort_level)) {
|
|
RTC_LOG(LS_ERROR) << "Unsupported effort level "
|
|
<< tu_settings.effort_level;
|
|
return false;
|
|
}
|
|
|
|
for (size_t i = 0; i < frame_settings.size(); ++i) {
|
|
const VideoEncoderInterface::FrameEncodeSettings& settings =
|
|
frame_settings[i];
|
|
|
|
if (!settings.result_callback) {
|
|
RTC_LOG(LS_ERROR) << "No result callback function provided.";
|
|
return false;
|
|
}
|
|
|
|
if (!in_range(0, kMaxSpatialLayersWtf, settings.spatial_id)) {
|
|
RTC_LOG(LS_ERROR) << "invalid spatial id " << settings.spatial_id;
|
|
return false;
|
|
}
|
|
|
|
if (!in_range(0, kMaxTemporalLayers, settings.temporal_id)) {
|
|
RTC_LOG(LS_ERROR) << "invalid temporal id " << settings.temporal_id;
|
|
return false;
|
|
}
|
|
|
|
if ((settings.frame_type == FrameType::kKeyframe ||
|
|
settings.frame_type == FrameType::kStartFrame) &&
|
|
!settings.reference_buffers.empty()) {
|
|
RTC_LOG(LS_ERROR) << "Reference buffers can not be used for keyframes.";
|
|
return false;
|
|
}
|
|
|
|
if ((settings.frame_type == FrameType::kKeyframe ||
|
|
settings.frame_type == FrameType::kStartFrame) &&
|
|
!settings.update_buffer) {
|
|
RTC_LOG(LS_ERROR)
|
|
<< "Buffer to update must be specified for keyframe/startframe";
|
|
return false;
|
|
}
|
|
|
|
if (settings.update_buffer &&
|
|
!in_range(0, kNumBuffers, *settings.update_buffer)) {
|
|
RTC_LOG(LS_ERROR) << "Invalid update buffer id.";
|
|
return false;
|
|
}
|
|
|
|
if (settings.reference_buffers.size() > kMaxReferences) {
|
|
RTC_LOG(LS_ERROR) << "Too many referenced buffers.";
|
|
return false;
|
|
}
|
|
|
|
for (size_t j = 0; j < settings.reference_buffers.size(); ++j) {
|
|
if (!in_range(0, kNumBuffers, settings.reference_buffers[j])) {
|
|
RTC_LOG(LS_ERROR) << "Invalid reference buffer id.";
|
|
return false;
|
|
}
|
|
|
|
// Figure out which frame resolution a certain buffer will hold when the
|
|
// frame described by `settings` is encoded.
|
|
absl::optional<Resolution> referenced_resolution;
|
|
bool keyframe_on_previous_layer = false;
|
|
|
|
// Will some other frame in this temporal unit update the buffer?
|
|
for (size_t k = 0; k < i; ++k) {
|
|
if (frame_settings[k].frame_type == FrameType::kKeyframe) {
|
|
keyframe_on_previous_layer = true;
|
|
referenced_resolution.reset();
|
|
}
|
|
if (frame_settings[k].update_buffer == settings.reference_buffers[j]) {
|
|
referenced_resolution = frame_settings[k].resolution;
|
|
}
|
|
}
|
|
|
|
// Not updated by another frame in the temporal unit, what is the
|
|
// resolution of the last frame stored into that buffer?
|
|
if (!referenced_resolution && !keyframe_on_previous_layer) {
|
|
referenced_resolution =
|
|
last_resolution_in_buffer[settings.reference_buffers[j]];
|
|
}
|
|
|
|
if (!referenced_resolution) {
|
|
RTC_LOG(LS_ERROR) << "Referenced buffer holds no frame.";
|
|
return false;
|
|
}
|
|
|
|
if (!GetScalingFactor(*referenced_resolution, settings.resolution)) {
|
|
RTC_LOG(LS_ERROR)
|
|
<< "Required resolution scaling factor not supported.";
|
|
return false;
|
|
}
|
|
|
|
for (size_t l = i + 1; l < settings.reference_buffers.size(); ++l) {
|
|
if (settings.reference_buffers[i] == settings.reference_buffers[l]) {
|
|
RTC_LOG(LS_ERROR) << "Duplicate reference buffer specified.";
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
if ((rc_mode == AOM_CBR &&
|
|
absl::holds_alternative<Cqp>(settings.rate_options)) ||
|
|
(rc_mode == AOM_Q &&
|
|
absl::holds_alternative<Cbr>(settings.rate_options))) {
|
|
RTC_LOG(LS_ERROR) << "Invalid rate options, encoder configured with "
|
|
<< (rc_mode == AOM_CBR ? "AOM_CBR" : "AOM_Q");
|
|
return false;
|
|
}
|
|
|
|
for (size_t j = i + 1; j < frame_settings.size(); ++j) {
|
|
if (settings.spatial_id >= frame_settings[j].spatial_id) {
|
|
RTC_LOG(LS_ERROR) << "Frame spatial id specified out of order.";
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void PrepareInputImage(const VideoFrameBuffer& input_buffer,
|
|
aom_img_ptr& out_aom_image) {
|
|
aom_img_fmt_t input_format;
|
|
switch (input_buffer.type()) {
|
|
case VideoFrameBuffer::Type::kI420:
|
|
input_format = AOM_IMG_FMT_I420;
|
|
break;
|
|
case VideoFrameBuffer::Type::kNV12:
|
|
input_format = AOM_IMG_FMT_NV12;
|
|
break;
|
|
default:
|
|
RTC_CHECK_NOTREACHED();
|
|
return;
|
|
}
|
|
|
|
if (!out_aom_image || out_aom_image->fmt != input_format ||
|
|
static_cast<int>(out_aom_image->w) != input_buffer.width() ||
|
|
static_cast<int>(out_aom_image->h) != input_buffer.height()) {
|
|
out_aom_image.reset(
|
|
aom_img_wrap(/*img=*/nullptr, input_format, input_buffer.width(),
|
|
input_buffer.height(), /*align=*/1, /*img_data=*/nullptr));
|
|
|
|
RTC_LOG(LS_WARNING) << __FUNCTION__ << " input_format=" << input_format
|
|
<< " input_buffer.width()=" << input_buffer.width()
|
|
<< " input_buffer.height()=" << input_buffer.height()
|
|
<< " w=" << out_aom_image->w
|
|
<< " h=" << out_aom_image->h
|
|
<< " d_w=" << out_aom_image->d_w
|
|
<< " d_h=" << out_aom_image->d_h
|
|
<< " r_w=" << out_aom_image->r_w
|
|
<< " r_h=" << out_aom_image->r_h;
|
|
}
|
|
|
|
if (input_format == AOM_IMG_FMT_I420) {
|
|
const I420BufferInterface* i420_buffer = input_buffer.GetI420();
|
|
RTC_DCHECK(i420_buffer);
|
|
out_aom_image->planes[AOM_PLANE_Y] =
|
|
const_cast<unsigned char*>(i420_buffer->DataY());
|
|
out_aom_image->planes[AOM_PLANE_U] =
|
|
const_cast<unsigned char*>(i420_buffer->DataU());
|
|
out_aom_image->planes[AOM_PLANE_V] =
|
|
const_cast<unsigned char*>(i420_buffer->DataV());
|
|
out_aom_image->stride[AOM_PLANE_Y] = i420_buffer->StrideY();
|
|
out_aom_image->stride[AOM_PLANE_U] = i420_buffer->StrideU();
|
|
out_aom_image->stride[AOM_PLANE_V] = i420_buffer->StrideV();
|
|
} else {
|
|
const NV12BufferInterface* nv12_buffer = input_buffer.GetNV12();
|
|
RTC_DCHECK(nv12_buffer);
|
|
out_aom_image->planes[AOM_PLANE_Y] =
|
|
const_cast<unsigned char*>(nv12_buffer->DataY());
|
|
out_aom_image->planes[AOM_PLANE_U] =
|
|
const_cast<unsigned char*>(nv12_buffer->DataUV());
|
|
out_aom_image->planes[AOM_PLANE_V] = nullptr;
|
|
out_aom_image->stride[AOM_PLANE_Y] = nv12_buffer->StrideY();
|
|
out_aom_image->stride[AOM_PLANE_U] = nv12_buffer->StrideUV();
|
|
out_aom_image->stride[AOM_PLANE_V] = 0;
|
|
}
|
|
}
|
|
|
|
aom_svc_ref_frame_config_t GetSvcRefFrameConfig(
|
|
const VideoEncoderInterface::FrameEncodeSettings& settings) {
|
|
// Buffer alias to use for each position. In particular when there are two
|
|
// buffers being used, prefer to alias them as LAST and GOLDEN, since the AV1
|
|
// bitstream format has dedicated fields for them. See last_frame_idx and
|
|
// golden_frame_idx in the av1 spec
|
|
// https://aomediacodec.github.io/av1-spec/av1-spec.pdf.
|
|
|
|
// Libaom is also compiled for RTC, which limits the number of references to
|
|
// at most three, and they must be aliased as LAST, GOLDEN and ALTREF. Also
|
|
// note that libaom favors LAST the most, and GOLDEN second most, so buffers
|
|
// should be specified in order of how useful they are for prediction. Libaom
|
|
// could be updated to make LAST, GOLDEN and ALTREF equivalent, but that is
|
|
// not a priority for now. All aliases can be used to update buffers.
|
|
// TD: Automatically select LAST, GOLDEN and ALTREF depending on previous
|
|
// buffer usage.
|
|
static constexpr int kPreferedAlias[] = {0, // LAST
|
|
3, // GOLDEN
|
|
6, // ALTREF
|
|
1, 2, 4, 5};
|
|
|
|
aom_svc_ref_frame_config_t ref_frame_config = {};
|
|
|
|
int alias_index = 0;
|
|
if (!settings.reference_buffers.empty()) {
|
|
for (size_t i = 0; i < settings.reference_buffers.size(); ++i) {
|
|
ref_frame_config.ref_idx[kPreferedAlias[alias_index]] =
|
|
settings.reference_buffers[i];
|
|
ref_frame_config.reference[kPreferedAlias[alias_index]] = 1;
|
|
alias_index++;
|
|
}
|
|
|
|
// Delta frames must not alias unused buffers, and since start frames only
|
|
// update some buffers it is not safe to leave unused aliases to simply
|
|
// point to buffer 0.
|
|
for (size_t i = settings.reference_buffers.size();
|
|
i < std::size(ref_frame_config.ref_idx); ++i) {
|
|
ref_frame_config.ref_idx[kPreferedAlias[i]] =
|
|
settings.reference_buffers.back();
|
|
}
|
|
}
|
|
|
|
if (settings.update_buffer) {
|
|
if (!absl::c_linear_search(settings.reference_buffers,
|
|
*settings.update_buffer)) {
|
|
ref_frame_config.ref_idx[kPreferedAlias[alias_index]] =
|
|
*settings.update_buffer;
|
|
alias_index++;
|
|
}
|
|
ref_frame_config.refresh[*settings.update_buffer] = 1;
|
|
}
|
|
|
|
char buf[256];
|
|
rtc::SimpleStringBuilder sb(buf);
|
|
sb << " spatial_id=" << settings.spatial_id;
|
|
sb << " ref_idx=[ ";
|
|
for (auto r : ref_frame_config.ref_idx) {
|
|
sb << r << " ";
|
|
}
|
|
sb << "] reference=[ ";
|
|
for (auto r : ref_frame_config.reference) {
|
|
sb << r << " ";
|
|
}
|
|
sb << "] refresh=[ ";
|
|
for (auto r : ref_frame_config.refresh) {
|
|
sb << r << " ";
|
|
}
|
|
sb << "]";
|
|
|
|
RTC_LOG(LS_WARNING) << __FUNCTION__ << sb.str();
|
|
|
|
return ref_frame_config;
|
|
}
|
|
|
|
aom_svc_params_t GetSvcParams(
|
|
const webrtc::VideoFrameBuffer& frame_buffer,
|
|
const std::vector<VideoEncoderInterface::FrameEncodeSettings>&
|
|
frame_settings) {
|
|
aom_svc_params_t svc_params = {};
|
|
svc_params.number_spatial_layers = frame_settings.back().spatial_id + 1;
|
|
svc_params.number_temporal_layers = kMaxTemporalLayers;
|
|
|
|
// TD: What about svc_params.framerate_factor?
|
|
// If `framerate_factors` are left at 0 then configured bitrate values will
|
|
// not be picked up by libaom.
|
|
for (int tid = 0; tid < svc_params.number_temporal_layers; ++tid) {
|
|
svc_params.framerate_factor[tid] = 1;
|
|
}
|
|
|
|
// If the scaling factor is left at zero for unused layers a division by zero
|
|
// will happen inside libaom, default all layers to one.
|
|
for (int sid = 0; sid < svc_params.number_spatial_layers; ++sid) {
|
|
svc_params.scaling_factor_num[sid] = 1;
|
|
svc_params.scaling_factor_den[sid] = 1;
|
|
}
|
|
|
|
for (const VideoEncoderInterface::FrameEncodeSettings& settings :
|
|
frame_settings) {
|
|
absl::optional<Rational> scaling_factor = GetScalingFactor(
|
|
{frame_buffer.width(), frame_buffer.height()}, settings.resolution);
|
|
RTC_CHECK(scaling_factor);
|
|
svc_params.scaling_factor_num[settings.spatial_id] =
|
|
scaling_factor->numerator;
|
|
svc_params.scaling_factor_den[settings.spatial_id] =
|
|
scaling_factor->denominator;
|
|
|
|
const int flat_layer_id =
|
|
settings.spatial_id * svc_params.number_temporal_layers +
|
|
settings.temporal_id;
|
|
|
|
RTC_LOG(LS_WARNING) << __FUNCTION__ << " flat_layer_id=" << flat_layer_id
|
|
<< " num="
|
|
<< svc_params.scaling_factor_num[settings.spatial_id]
|
|
<< " den="
|
|
<< svc_params.scaling_factor_den[settings.spatial_id];
|
|
|
|
absl::visit(
|
|
[&](auto&& arg) {
|
|
using T = std::decay_t<decltype(arg)>;
|
|
if constexpr (std::is_same_v<T, Cbr>) {
|
|
// Libaom calculates the total bitrate across all spatial layers by
|
|
// summing the bitrate of the last temporal layer in each spatial
|
|
// layer. This means the bitrate for the top temporal layer always
|
|
// has to be set even if that temporal layer is not being encoded.
|
|
const int last_temporal_layer_in_spatial_layer_id =
|
|
settings.spatial_id * svc_params.number_temporal_layers +
|
|
(kMaxTemporalLayers - 1);
|
|
svc_params
|
|
.layer_target_bitrate[last_temporal_layer_in_spatial_layer_id] =
|
|
arg.target_bitrate.kbps();
|
|
|
|
svc_params.layer_target_bitrate[flat_layer_id] =
|
|
arg.target_bitrate.kbps();
|
|
// When libaom is configured with `AOM_CBR` it will still limit QP
|
|
// to stay between `min_quantizers` and `max_quantizers'. Set
|
|
// `max_quantizers` to max QP to avoid the encoder overshooting.
|
|
svc_params.max_quantizers[flat_layer_id] = kMaxQp;
|
|
svc_params.min_quantizers[flat_layer_id] = 0;
|
|
} else if constexpr (std::is_same_v<T, Cqp>) {
|
|
// When libaom is configured with `AOM_Q` it will still look at the
|
|
// `layer_target_bitrate` to determine whether the layer is disabled
|
|
// or not. Set `layer_target_bitrate` to 1 so that libaom knows the
|
|
// layer is active.
|
|
svc_params.layer_target_bitrate[flat_layer_id] = 1;
|
|
svc_params.max_quantizers[flat_layer_id] = arg.target_qp;
|
|
svc_params.min_quantizers[flat_layer_id] = arg.target_qp;
|
|
RTC_LOG(LS_WARNING) << __FUNCTION__ << " svc_params.qp["
|
|
<< flat_layer_id << "]=" << arg.target_qp;
|
|
// TD: Does libaom look at both max and min? Shouldn't it just be
|
|
// one of them
|
|
}
|
|
},
|
|
settings.rate_options);
|
|
}
|
|
|
|
char buf[512];
|
|
rtc::SimpleStringBuilder sb(buf);
|
|
sb << "GetSvcParams" << " layer bitrates kbps";
|
|
for (int s = 0; s < svc_params.number_spatial_layers; ++s) {
|
|
sb << " S" << s << "=[ ";
|
|
for (int t = 0; t < svc_params.number_temporal_layers; ++t) {
|
|
int id = s * svc_params.number_temporal_layers + t;
|
|
sb << "T" << t << "=" << svc_params.layer_target_bitrate[id] << " ";
|
|
}
|
|
sb << "]";
|
|
}
|
|
|
|
RTC_LOG(LS_WARNING) << sb.str();
|
|
|
|
return svc_params;
|
|
}
|
|
|
|
void DoErrorCallback(std::vector<FrameEncodeSettings>& frame_settings) {
|
|
for (FrameEncodeSettings& settings : frame_settings) {
|
|
if (settings.result_callback) {
|
|
std::move(settings.result_callback)({});
|
|
// To avoid invoking any callback more than once.
|
|
settings.result_callback = {};
|
|
}
|
|
}
|
|
}
|
|
|
|
void LibaomAv1Encoder::Encode(
|
|
rtc::scoped_refptr<webrtc::VideoFrameBuffer> frame_buffer,
|
|
const TemporalUnitSettings& tu_settings,
|
|
std::vector<FrameEncodeSettings> frame_settings) {
|
|
if (!ValidateEncodeParams(*frame_buffer, tu_settings, frame_settings,
|
|
last_resolution_in_buffer_, cfg_.rc_end_usage)) {
|
|
DoErrorCallback(frame_settings);
|
|
return;
|
|
}
|
|
|
|
if (tu_settings.effort_level != current_effort_level_) {
|
|
// For RTC we use speed level 6 to 10, with 8 being the default. Note that
|
|
// low effort means higher speed.
|
|
SET_OR_DO_ERROR_CALLBACK_AND_RETURN(AOME_SET_CPUUSED,
|
|
8 - tu_settings.effort_level);
|
|
current_effort_level_ = tu_settings.effort_level;
|
|
}
|
|
|
|
if (current_content_type_ != tu_settings.content_hint) {
|
|
if (tu_settings.content_hint == VideoCodecMode::kScreensharing) {
|
|
// TD: Set speed 11?
|
|
SET_OR_DO_ERROR_CALLBACK_AND_RETURN(AV1E_SET_TUNE_CONTENT,
|
|
AOM_CONTENT_SCREEN);
|
|
SET_OR_DO_ERROR_CALLBACK_AND_RETURN(AV1E_SET_ENABLE_PALETTE, 1);
|
|
} else {
|
|
SET_OR_DO_ERROR_CALLBACK_AND_RETURN(AV1E_SET_TUNE_CONTENT,
|
|
AOM_CONTENT_DEFAULT);
|
|
SET_OR_DO_ERROR_CALLBACK_AND_RETURN(AV1E_SET_ENABLE_PALETTE, 0);
|
|
}
|
|
current_content_type_ = tu_settings.content_hint;
|
|
}
|
|
|
|
if (cfg_.rc_end_usage == AOM_CBR) {
|
|
DataRate accum_rate = DataRate::Zero();
|
|
for (const FrameEncodeSettings& settings : frame_settings) {
|
|
accum_rate += absl::get<Cbr>(settings.rate_options).target_bitrate;
|
|
}
|
|
cfg_.rc_target_bitrate = accum_rate.kbps();
|
|
RTC_LOG(LS_WARNING) << __FUNCTION__
|
|
<< " cfg_.rc_target_bitrate=" << cfg_.rc_target_bitrate;
|
|
}
|
|
|
|
if (static_cast<int>(cfg_.g_w) != frame_buffer->width() ||
|
|
static_cast<int>(cfg_.g_h) != frame_buffer->height()) {
|
|
RTC_LOG(LS_WARNING) << __FUNCTION__ << " resolution changed from "
|
|
<< cfg_.g_w << "x" << cfg_.g_h << " to "
|
|
<< frame_buffer->width() << "x"
|
|
<< frame_buffer->height();
|
|
ThreadTilesAndSuperblockSizeInfo ttsbi = GetThreadingTilesAndSuperblockSize(
|
|
frame_buffer->width(), frame_buffer->height(), max_number_of_threads_);
|
|
SET_OR_DO_ERROR_CALLBACK_AND_RETURN(AV1E_SET_SUPERBLOCK_SIZE,
|
|
ttsbi.superblock_size);
|
|
SET_OR_DO_ERROR_CALLBACK_AND_RETURN(AV1E_SET_TILE_ROWS,
|
|
ttsbi.exp_tile_rows);
|
|
SET_OR_DO_ERROR_CALLBACK_AND_RETURN(AV1E_SET_TILE_COLUMNS,
|
|
ttsbi.exp_tile_colums);
|
|
cfg_.g_threads = ttsbi.num_threads;
|
|
cfg_.g_w = frame_buffer->width();
|
|
cfg_.g_h = frame_buffer->height();
|
|
}
|
|
|
|
PrepareInputImage(*frame_buffer, image_to_encode_);
|
|
|
|
// The bitrates caluclated internally in libaom when `AV1E_SET_SVC_PARAMS` is
|
|
// called depends on the currently configured `cfg_.rc_target_bitrate`. If the
|
|
// total target bitrate is not updated first a division by zero could happen.
|
|
if (aom_codec_err_t ret = aom_codec_enc_config_set(&ctx_, &cfg_);
|
|
ret != AOM_CODEC_OK) {
|
|
RTC_LOG(LS_ERROR) << "aom_codec_enc_config_set returned " << ret;
|
|
DoErrorCallback(frame_settings);
|
|
return;
|
|
}
|
|
aom_svc_params_t svc_params = GetSvcParams(*frame_buffer, frame_settings);
|
|
SET_OR_DO_ERROR_CALLBACK_AND_RETURN(AV1E_SET_SVC_PARAMS, &svc_params);
|
|
|
|
// The libaom AV1 encoder requires that `aom_codec_encode` is called for
|
|
// every spatial layer, even if no frame should be encoded for that layer.
|
|
std::array<FrameEncodeSettings*, kMaxSpatialLayersWtf>
|
|
settings_for_spatial_id;
|
|
settings_for_spatial_id.fill(nullptr);
|
|
FrameEncodeSettings settings_for_unused_layer;
|
|
for (FrameEncodeSettings& settings : frame_settings) {
|
|
settings_for_spatial_id[settings.spatial_id] = &settings;
|
|
}
|
|
|
|
for (int sid = frame_settings[0].spatial_id;
|
|
sid < svc_params.number_spatial_layers; ++sid) {
|
|
const bool layer_enabled = settings_for_spatial_id[sid] != nullptr;
|
|
FrameEncodeSettings& settings = layer_enabled
|
|
? *settings_for_spatial_id[sid]
|
|
: settings_for_unused_layer;
|
|
|
|
aom_svc_layer_id_t layer_id = {
|
|
.spatial_layer_id = sid,
|
|
.temporal_layer_id = settings.temporal_id,
|
|
};
|
|
SET_OR_DO_ERROR_CALLBACK_AND_RETURN(AV1E_SET_SVC_LAYER_ID, &layer_id);
|
|
aom_svc_ref_frame_config_t ref_config = GetSvcRefFrameConfig(settings);
|
|
SET_OR_DO_ERROR_CALLBACK_AND_RETURN(AV1E_SET_SVC_REF_FRAME_CONFIG,
|
|
&ref_config);
|
|
|
|
// TD: Duration can't be zero, what does it matter when the layer is
|
|
// not being encoded?
|
|
TimeDelta duration = TimeDelta::Millis(1);
|
|
if (layer_enabled) {
|
|
if (const Cbr* cbr = absl::get_if<Cbr>(&settings.rate_options)) {
|
|
duration = cbr->duration;
|
|
} else {
|
|
// TD: What should duration be when Cqp is used?
|
|
duration = TimeDelta::Millis(1);
|
|
}
|
|
}
|
|
|
|
RTC_LOG(LS_WARNING)
|
|
<< __FUNCTION__ << " timestamp="
|
|
<< (tu_settings.presentation_timestamp.ms() * kRtpTicksPerSecond / 1000)
|
|
<< " duration=" << (duration.ms() * kRtpTicksPerSecond / 1000)
|
|
<< " type="
|
|
<< (settings.frame_type == FrameType::kKeyframe ? "key" : "delta");
|
|
aom_codec_err_t ret = aom_codec_encode(
|
|
&ctx_, &*image_to_encode_, tu_settings.presentation_timestamp.ms() * 90,
|
|
duration.ms() * 90,
|
|
settings.frame_type == FrameType::kKeyframe ? AOM_EFLAG_FORCE_KF : 0);
|
|
if (ret != AOM_CODEC_OK) {
|
|
RTC_LOG(LS_WARNING) << "aom_codec_encode returned " << ret;
|
|
DoErrorCallback(frame_settings);
|
|
return;
|
|
}
|
|
|
|
if (!layer_enabled) {
|
|
continue;
|
|
}
|
|
|
|
if (settings.frame_type == FrameType::kKeyframe) {
|
|
last_resolution_in_buffer_ = {};
|
|
}
|
|
|
|
if (settings.update_buffer) {
|
|
last_resolution_in_buffer_[*settings.update_buffer] = settings.resolution;
|
|
}
|
|
|
|
EncodedData result;
|
|
aom_codec_iter_t iter = nullptr;
|
|
while (const aom_codec_cx_pkt_t* pkt =
|
|
aom_codec_get_cx_data(&ctx_, &iter)) {
|
|
if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) {
|
|
SET_OR_DO_ERROR_CALLBACK_AND_RETURN(AOME_GET_LAST_QUANTIZER_64,
|
|
&result.encoded_qp);
|
|
result.frame_type = pkt->data.frame.flags & AOM_EFLAG_FORCE_KF
|
|
? FrameType::kKeyframe
|
|
: FrameType::kDeltaFrame;
|
|
result.bitstream_data = EncodedImageBuffer::Create(
|
|
static_cast<uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (result.bitstream_data == nullptr) {
|
|
DoErrorCallback(frame_settings);
|
|
return;
|
|
} else {
|
|
RTC_CHECK(settings.result_callback);
|
|
std::move(settings.result_callback)(result);
|
|
// To avoid invoking any callback more than once.
|
|
settings.result_callback = {};
|
|
}
|
|
}
|
|
}
|
|
} // namespace
|
|
|
|
std::string LibaomAv1EncoderFactory::CodecName() const {
|
|
return "AV1";
|
|
}
|
|
|
|
std::string LibaomAv1EncoderFactory::ImplementationName() const {
|
|
return "Libaom";
|
|
}
|
|
|
|
std::map<std::string, std::string> LibaomAv1EncoderFactory::CodecSpecifics()
|
|
const {
|
|
return {};
|
|
}
|
|
|
|
VideoEncoderFactoryInterface::Capabilities
|
|
LibaomAv1EncoderFactory::GetEncoderCapabilities() const {
|
|
return {
|
|
.prediction_constraints =
|
|
{.num_buffers = kNumBuffers,
|
|
.max_references = kMaxReferences,
|
|
.max_temporal_layers = kMaxTemporalLayers,
|
|
.buffer_space_type = VideoEncoderFactoryInterface::Capabilities::
|
|
PredictionConstraints::BufferSpaceType::kSingleKeyframe,
|
|
.max_spatial_layers = kMaxSpatialLayersWtf,
|
|
.scaling_factors = {kSupportedScalingFactors.begin(),
|
|
kSupportedScalingFactors.end()},
|
|
.supported_frame_types = {FrameType::kKeyframe,
|
|
FrameType::kStartFrame,
|
|
FrameType::kDeltaFrame}},
|
|
.input_constraints = {
|
|
.min = {.width = 64, .height = 36},
|
|
.max = {.width = 3840, .height = 2160},
|
|
.pixel_alignment = 1,
|
|
.input_formats = {kSupportedInputFormats.begin(),
|
|
kSupportedInputFormats.end()},
|
|
},
|
|
.encoding_formats = {{.sub_sampling = EncodingFormat::k420,
|
|
.bit_depth = 8}},
|
|
.rate_control =
|
|
{.qp_range = {0, kMaxQp},
|
|
.rc_modes = {VideoEncoderFactoryInterface::RateControlMode::kCbr,
|
|
VideoEncoderFactoryInterface::RateControlMode::kCqp}},
|
|
.performance = {.min_max_effort_level = {kMinEffortLevel,
|
|
kMaxEffortLevel}},
|
|
};
|
|
}
|
|
|
|
std::unique_ptr<VideoEncoderInterface> LibaomAv1EncoderFactory::CreateEncoder(
|
|
const StaticEncoderSettings& settings,
|
|
const std::map<std::string, std::string>& encoder_specific_settings) {
|
|
auto encoder = std::make_unique<LibaomAv1Encoder>();
|
|
if (!encoder->InitEncode(settings, encoder_specific_settings)) {
|
|
return nullptr;
|
|
}
|
|
return encoder;
|
|
}
|
|
|
|
} // namespace webrtc
|