diff --git a/api/video_codecs/BUILD.gn b/api/video_codecs/BUILD.gn index 6167c1fafa..a36bf442e6 100644 --- a/api/video_codecs/BUILD.gn +++ b/api/video_codecs/BUILD.gn @@ -342,6 +342,7 @@ rtc_library("simple_encoder_wrapper") { absl_deps = [ "//third_party/abseil-cpp/absl/algorithm:container", "//third_party/abseil-cpp/absl/functional:any_invocable", + "//third_party/abseil-cpp/absl/types:variant", ] } @@ -397,6 +398,8 @@ rtc_library("libaom_av1_encoder_factory_test") { "../../test:test_support", "../../test:video_test_support", ] + + absl_deps = [ "//third_party/abseil-cpp/absl/types:variant" ] } rtc_library("vp8_temporal_layers_factory") { diff --git a/api/video_codecs/libaom_av1_encoder_factory.cc b/api/video_codecs/libaom_av1_encoder_factory.cc index 52bf5d64aa..7e3c583c7f 100644 --- a/api/video_codecs/libaom_av1_encoder_factory.cc +++ b/api/video_codecs/libaom_av1_encoder_factory.cc @@ -27,7 +27,7 @@ #define SET_OR_DO_ERROR_CALLBACK_AND_RETURN(param_id, param_value) \ do { \ if (!SetEncoderControlParameters(&ctx_, param_id, param_value)) { \ - encode_result_callback({}); \ + DoErrorCallback(frame_settings); \ return; \ } \ } while (0) @@ -41,8 +41,9 @@ namespace webrtc { -using Cbr = VideoEncoderInterface::FrameEncodeSettings::Cbr; -using Cqp = VideoEncoderInterface::FrameEncodeSettings::Cqp; +using FrameEncodeSettings = VideoEncoderInterface::FrameEncodeSettings; +using Cbr = FrameEncodeSettings::Cbr; +using Cqp = FrameEncodeSettings::Cqp; using aom_img_ptr = std::unique_ptr; namespace { @@ -87,8 +88,7 @@ class LibaomAv1Encoder : public VideoEncoderInterface { void Encode(rtc::scoped_refptr frame_buffer, const TemporalUnitSettings& tu_settings, - const std::vector& frame_settings, - EncodeResultCallback encode_result_callback) override; + std::vector frame_settings) override; private: aom_img_ptr image_to_encode_ = aom_img_ptr(nullptr, aom_img_free); @@ -276,6 +276,11 @@ bool ValidateEncodeParams( const VideoEncoderInterface::FrameEncodeSettings& settings = frame_settings[i]; + if (!settings.result_callback) { + RTC_LOG(LS_ERROR) << "No result callback function provided."; + return false; + } + if (!in_range(0, kMaxSpatialLayersWtf, settings.spatial_id)) { RTC_LOG(LS_ERROR) << "invalid spatial id " << settings.spatial_id; return false; @@ -611,14 +616,23 @@ aom_svc_params_t GetSvcParams( return svc_params; } +void DoErrorCallback(std::vector& frame_settings) { + for (FrameEncodeSettings& settings : frame_settings) { + if (settings.result_callback) { + settings.result_callback({}); + // To avoid invoking any callback more than once. + settings.result_callback = {}; + } + } +} + void LibaomAv1Encoder::Encode( rtc::scoped_refptr frame_buffer, const TemporalUnitSettings& tu_settings, - const std::vector& frame_settings, - EncodeResultCallback encode_result_callback) { + std::vector frame_settings) { if (!ValidateEncodeParams(*frame_buffer, tu_settings, frame_settings, last_resolution_in_buffer_, cfg_.rc_end_usage)) { - encode_result_callback({}); + DoErrorCallback(frame_settings); return; } @@ -681,7 +695,7 @@ void LibaomAv1Encoder::Encode( if (aom_codec_err_t ret = aom_codec_enc_config_set(&ctx_, &cfg_); ret != AOM_CODEC_OK) { RTC_LOG(LS_ERROR) << "aom_codec_enc_config_set returned " << ret; - encode_result_callback({}); + DoErrorCallback(frame_settings); return; } aom_svc_params_t svc_params = GetSvcParams(*frame_buffer, frame_settings); @@ -689,20 +703,20 @@ void LibaomAv1Encoder::Encode( // The libaom AV1 encoder requires that `aom_codec_encode` is called for // every spatial layer, even if no frame should be encoded for that layer. - std::array + std::array settings_for_spatial_id; settings_for_spatial_id.fill(nullptr); FrameEncodeSettings settings_for_unused_layer; - for (const FrameEncodeSettings& settings : frame_settings) { + for (FrameEncodeSettings& settings : frame_settings) { settings_for_spatial_id[settings.spatial_id] = &settings; } for (int sid = frame_settings[0].spatial_id; sid < svc_params.number_spatial_layers; ++sid) { const bool layer_enabled = settings_for_spatial_id[sid] != nullptr; - const FrameEncodeSettings& settings = layer_enabled - ? *settings_for_spatial_id[sid] - : settings_for_unused_layer; + FrameEncodeSettings& settings = layer_enabled + ? *settings_for_spatial_id[sid] + : settings_for_unused_layer; aom_svc_layer_id_t layer_id = { .spatial_layer_id = sid, @@ -713,7 +727,6 @@ void LibaomAv1Encoder::Encode( SET_OR_DO_ERROR_CALLBACK_AND_RETURN(AV1E_SET_SVC_REF_FRAME_CONFIG, &ref_config); - // TD: Why does the libaom have both `encode_timestamp_` and `duration`? // TD: Duration can't be zero, what does it matter when the layer is // not being encoded? TimeDelta duration = TimeDelta::Millis(1); @@ -738,7 +751,7 @@ void LibaomAv1Encoder::Encode( settings.frame_type == FrameType::kKeyframe ? AOM_EFLAG_FORCE_KF : 0); if (ret != AOM_CODEC_OK) { RTC_LOG(LS_WARNING) << "aom_codec_encode returned " << ret; - encode_result_callback({}); + DoErrorCallback(frame_settings); return; } @@ -766,17 +779,18 @@ void LibaomAv1Encoder::Encode( : FrameType::kDeltaFrame; result.bitstream_data = EncodedImageBuffer::Create( static_cast(pkt->data.frame.buf), pkt->data.frame.sz); - result.spatial_id = sid; break; } } if (result.bitstream_data == nullptr) { - // TD: How should error callbacks be handled, only call once? - encode_result_callback({}); + DoErrorCallback(frame_settings); return; } else { - encode_result_callback(result); + RTC_CHECK(settings.result_callback); + settings.result_callback(result); + // To avoid invoking any callback more than once. + settings.result_callback = {}; } } } @@ -786,7 +800,6 @@ std::string LibaomAv1EncoderFactory::CodecName() const { return "AV1"; } -// TD: it should also possible to expose SW/HW/driver version. std::string LibaomAv1EncoderFactory::ImplementationName() const { return "Libaom"; } diff --git a/api/video_codecs/libaom_av1_encoder_factory_test.cc b/api/video_codecs/libaom_av1_encoder_factory_test.cc index eacb5a7a85..deb03aad62 100644 --- a/api/video_codecs/libaom_av1_encoder_factory_test.cc +++ b/api/video_codecs/libaom_av1_encoder_factory_test.cc @@ -14,6 +14,7 @@ #include #include +#include "absl/types/variant.h" #include "api/video/i420_buffer.h" #include "api/video_codecs/video_decoder.h" #include "api/video_codecs/video_encoder_interface.h" @@ -59,7 +60,7 @@ std::string OutPath() { class EncodeResults { public: - EncodeResultCallback CallBack() { + EncodeResultCallback Cb() { return [&](const EncodeResult& result) { results_.push_back(result); }; } @@ -68,7 +69,7 @@ class EncodeResults { RTC_CHECK(false); return nullptr; } - return std::get_if(&results_[index]); + return absl::get_if(&results_[index]); } private: @@ -172,8 +173,13 @@ class FrameEncoderSettingsBuilder { return *this; } - VideoEncoderInterface::FrameEncodeSettings Build() { - return frame_encode_settings_; + FrameEncoderSettingsBuilder& Cb(EncodeResultCallback cb) { + frame_encode_settings_.result_callback = std::move(cb); + return *this; + } + + operator VideoEncoderInterface::FrameEncodeSettings&&() { + return std::move(frame_encode_settings_); } private: @@ -182,6 +188,16 @@ class FrameEncoderSettingsBuilder { using Fb = FrameEncoderSettingsBuilder; +// Since FrameEncodeSettings is move only, initalizer-list initialization won't +// work, so instead a C-style array can be used to do aggregate initialization. +template +std::vector ToVec( + VideoEncoderInterface::FrameEncodeSettings (&&settings)[N]) { + return std::vector( + std::make_move_iterator(std::begin(settings)), + std::make_move_iterator(std::end(settings))); +} + // For reasonable debug printout when an EXPECT fail. struct Resolution { explicit Resolution(const VideoFrame& frame) @@ -253,15 +269,14 @@ TEST(LibaomAv1Encoder, KeyframeUpdatesSpecifiedBuffer) { auto raw_delta = frame_reader->PullFrame(); enc->Encode(raw_key, {.presentation_timestamp = Timestamp::Millis(0)}, - {Fb().Rate(kCbr).Res(640, 360).Upd(5).Key().Build()}, - res.CallBack()); + ToVec({Fb().Rate(kCbr).Res(640, 360).Upd(5).Key().Cb(res.Cb())})); ASSERT_THAT(res.FrameAt(0), NotNull()); VideoFrame decoded_key = dec.Decode(*res.FrameAt(0)); EXPECT_THAT(Resolution(decoded_key), ResolutionIs(640, 360)); EXPECT_THAT(Psnr(raw_key, decoded_key), Gt(40)); enc->Encode(raw_delta, {.presentation_timestamp = Timestamp::Millis(100)}, - {Fb().Rate(kCbr).Res(640, 360).Ref({0}).Build()}, res.CallBack()); + ToVec({Fb().Rate(kCbr).Res(640, 360).Ref({0}).Cb(res.Cb())})); ASSERT_THAT(res.FrameAt(1), Eq(nullptr)); } @@ -271,20 +286,20 @@ TEST(LibaomAv1Encoder, MidTemporalUnitKeyframeResetsBuffers) { EncodeResults res; Av1Decoder dec; - enc->Encode(frame_reader->PullFrame(), - {.presentation_timestamp = Timestamp::Millis(0)}, - {Fb().Rate(kCbr).Res(160, 90).S(0).Upd(0).Key().Build(), - Fb().Rate(kCbr).Res(320, 180).S(1).Ref({0}).Build(), - Fb().Rate(kCbr).Res(640, 360).S(2).Ref({0}).Build()}, - res.CallBack()); + enc->Encode( + frame_reader->PullFrame(), + {.presentation_timestamp = Timestamp::Millis(0)}, + ToVec({Fb().Rate(kCbr).Res(160, 90).S(0).Upd(0).Key().Cb(res.Cb()), + Fb().Rate(kCbr).Res(320, 180).S(1).Ref({0}).Cb(res.Cb()), + Fb().Rate(kCbr).Res(640, 360).S(2).Ref({0}).Cb(res.Cb())})); ASSERT_THAT(res.FrameAt(2), NotNull()); - enc->Encode(frame_reader->PullFrame(), - {.presentation_timestamp = Timestamp::Millis(100)}, - {Fb().Rate(kCbr).Res(160, 90).S(0).Upd(0).Ref({0}).Build(), - Fb().Rate(kCbr).Res(320, 180).S(1).Upd(1).Key().Build(), - Fb().Rate(kCbr).Res(640, 360).S(2).Ref({0}).Build()}, - res.CallBack()); + enc->Encode( + frame_reader->PullFrame(), + {.presentation_timestamp = Timestamp::Millis(100)}, + ToVec({Fb().Rate(kCbr).Res(160, 90).S(0).Upd(0).Ref({0}).Cb(res.Cb()), + Fb().Rate(kCbr).Res(320, 180).S(1).Upd(1).Key().Cb(res.Cb()), + Fb().Rate(kCbr).Res(640, 360).S(2).Ref({0}).Cb(res.Cb())})); ASSERT_THAT(res.FrameAt(3), Eq(nullptr)); } @@ -295,20 +310,15 @@ TEST(LibaomAv1Encoder, ResolutionSwitching) { rtc::scoped_refptr in0 = frame_reader->PullFrame(); enc->Encode(in0, {.presentation_timestamp = Timestamp::Millis(0)}, - {Fb().Rate(kCbr).Res(320, 180).Upd(0).Key().Build()}, - res.CallBack()); + ToVec({Fb().Rate(kCbr).Res(320, 180).Upd(0).Key().Cb(res.Cb())})); rtc::scoped_refptr in1 = frame_reader->PullFrame(); enc->Encode(in1, {.presentation_timestamp = Timestamp::Millis(100)}, - {Fb().Rate(kCbr).Res(640, 360).Ref({0}).Build()}, res.CallBack()); + ToVec({Fb().Rate(kCbr).Res(640, 360).Ref({0}).Cb(res.Cb())})); rtc::scoped_refptr in2 = frame_reader->PullFrame(); enc->Encode(in2, {.presentation_timestamp = Timestamp::Millis(200)}, - {Fb().Rate(kCbr).Res(160, 90).Ref({0}).Build()}, res.CallBack()); - - EXPECT_THAT(res.FrameAt(0), Field(&EncodedData::spatial_id, 0)); - EXPECT_THAT(res.FrameAt(1), Field(&EncodedData::spatial_id, 0)); - EXPECT_THAT(res.FrameAt(2), Field(&EncodedData::spatial_id, 0)); + ToVec({Fb().Rate(kCbr).Res(160, 90).Ref({0}).Cb(res.Cb())})); Av1Decoder dec; VideoFrame f0 = dec.Decode(*res.FrameAt(0)); @@ -333,26 +343,21 @@ TEST(LibaomAv1Encoder, InputResolutionSwitching) { rtc::scoped_refptr in0 = frame_reader->PullFrame(); enc->Encode(in0, {.presentation_timestamp = Timestamp::Millis(0)}, - {Fb().Rate(kCbr).Res(160, 90).Upd(0).Key().Build()}, - res.CallBack()); + ToVec({Fb().Rate(kCbr).Res(160, 90).Upd(0).Key().Cb(res.Cb())})); rtc::scoped_refptr in1 = frame_reader->PullFrame( /*frame_num=*/nullptr, /*resolution=*/{320, 180}, /*framerate_scale=*/{1, 1}); enc->Encode(in1, {.presentation_timestamp = Timestamp::Millis(100)}, - {Fb().Rate(kCbr).Res(160, 90).Ref({0}).Build()}, res.CallBack()); + ToVec({Fb().Rate(kCbr).Res(160, 90).Ref({0}).Cb(res.Cb())})); rtc::scoped_refptr in2 = frame_reader->PullFrame( /*frame_num=*/nullptr, /*resolution=*/{160, 90}, /*framerate_scale=*/{1, 1}); enc->Encode(in2, {.presentation_timestamp = Timestamp::Millis(200)}, - {Fb().Rate(kCbr).Res(160, 90).Ref({0}).Build()}, res.CallBack()); - - EXPECT_THAT(res.FrameAt(0), Field(&EncodedData::spatial_id, 0)); - EXPECT_THAT(res.FrameAt(1), Field(&EncodedData::spatial_id, 0)); - EXPECT_THAT(res.FrameAt(2), Field(&EncodedData::spatial_id, 0)); + ToVec({Fb().Rate(kCbr).Res(160, 90).Ref({0}).Cb(res.Cb())})); Av1Decoder dec; VideoFrame f0 = dec.Decode(*res.FrameAt(0)); @@ -380,24 +385,27 @@ TEST(LibaomAv1Encoder, TempoSpatial) { const Cbr k20Fps{.duration = TimeDelta::Millis(50), .target_bitrate = DataRate::KilobitsPerSec(500)}; - enc->Encode(frame_reader->PullFrame(), - {.presentation_timestamp = Timestamp::Millis(0)}, - {Fb().Rate(k10Fps).Res(160, 90).S(0).Upd(0).Key().Build(), - Fb().Rate(k10Fps).Res(320, 180).S(1).Ref({0}).Upd(1).Build(), - Fb().Rate(k20Fps).Res(640, 360).S(2).Ref({1}).Upd(2).Build()}, - res.CallBack()); + enc->Encode( + frame_reader->PullFrame(), + {.presentation_timestamp = Timestamp::Millis(0)}, + ToVec( + {Fb().Rate(k10Fps).Res(160, 90).S(0).Upd(0).Key().Cb(res.Cb()), + Fb().Rate(k10Fps).Res(320, 180).S(1).Ref({0}).Upd(1).Cb(res.Cb()), + Fb().Rate(k20Fps).Res(640, 360).S(2).Ref({1}).Upd(2).Cb(res.Cb())})); enc->Encode(frame_reader->PullFrame(), {.presentation_timestamp = Timestamp::Millis(50)}, - {Fb().Rate(k20Fps).Res(640, 360).S(2).Ref({2}).Upd(2).Build()}, - res.CallBack()); + ToVec({Fb().Rate(k20Fps).Res(640, 360).S(2).Ref({2}).Upd(2).Cb( + res.Cb())})); rtc::scoped_refptr frame = frame_reader->PullFrame(); - enc->Encode(frame, {.presentation_timestamp = Timestamp::Millis(100)}, - {Fb().Rate(k10Fps).Res(160, 90).S(0).Ref({0}).Upd(0).Build(), - Fb().Rate(k10Fps).Res(320, 180).S(1).Ref({0, 1}).Upd(1).Build(), - Fb().Rate(k20Fps).Res(640, 360).S(2).Ref({1, 2}).Upd(2).Build()}, - res.CallBack()); + enc->Encode( + frame, {.presentation_timestamp = Timestamp::Millis(100)}, + ToVec( + {Fb().Rate(k10Fps).Res(160, 90).S(0).Ref({0}).Upd(0).Cb(res.Cb()), + Fb().Rate(k10Fps).Res(320, 180).S(1).Ref({0, 1}).Upd(1).Cb(res.Cb()), + Fb().Rate(k20Fps).Res(640, 360).S(2).Ref({1, 2}).Upd(2).Cb( + res.Cb())})); Av1Decoder dec; EXPECT_THAT(Resolution(dec.Decode(*res.FrameAt(0))), ResolutionIs(160, 90)); @@ -418,22 +426,23 @@ TEST(DISABLED_LibaomAv1Encoder, InvertedTempoSpatial) { auto enc = LibaomAv1EncoderFactory().CreateEncoder(kCbrEncoderSettings, {}); EncodeResults res; - enc->Encode(frame_reader->PullFrame(), - {.presentation_timestamp = Timestamp::Millis(0)}, - {Fb().Rate(kCbr).Res(320, 180).S(0).Upd(0).Key().Build(), - Fb().Rate(kCbr).Res(640, 360).S(1).Ref({0}).Upd(1).Build()}, - res.CallBack()); + enc->Encode( + frame_reader->PullFrame(), + {.presentation_timestamp = Timestamp::Millis(0)}, + ToVec({Fb().Rate(kCbr).Res(320, 180).S(0).Upd(0).Key().Cb(res.Cb()), + Fb().Rate(kCbr).Res(640, 360).S(1).Ref({0}).Upd(1).Cb(res.Cb())})); - enc->Encode(frame_reader->PullFrame(), - {.presentation_timestamp = Timestamp::Millis(100)}, - {Fb().Rate(kCbr).Res(320, 180).S(0).Ref({0}).Upd(0).Build()}, - res.CallBack()); + enc->Encode( + frame_reader->PullFrame(), + {.presentation_timestamp = Timestamp::Millis(100)}, + ToVec({Fb().Rate(kCbr).Res(320, 180).S(0).Ref({0}).Upd(0).Cb(res.Cb())})); rtc::scoped_refptr frame = frame_reader->PullFrame(); - enc->Encode(frame, {.presentation_timestamp = Timestamp::Millis(200)}, - {Fb().Rate(kCbr).Res(320, 180).S(0).Ref({0}).Upd(0).Build(), - Fb().Rate(kCbr).Res(640, 360).S(1).Ref({1, 0}).Upd(1).Build()}, - res.CallBack()); + enc->Encode( + frame, {.presentation_timestamp = Timestamp::Millis(200)}, + ToVec({Fb().Rate(kCbr).Res(320, 180).S(0).Ref({0}).Upd(0).Cb(res.Cb()), + Fb().Rate(kCbr).Res(640, 360).S(1).Ref({1, 0}).Upd(1).Cb( + res.Cb())})); Av1Decoder dec; EXPECT_THAT(Resolution(dec.Decode(*res.FrameAt(0))), ResolutionIs(320, 180)); @@ -448,25 +457,26 @@ TEST(LibaomAv1Encoder, SkipMidLayer) { auto enc = LibaomAv1EncoderFactory().CreateEncoder(kCbrEncoderSettings, {}); EncodeResults res; - enc->Encode(frame_reader->PullFrame(), - {.presentation_timestamp = Timestamp::Millis(0)}, - {Fb().Rate(kCbr).Res(160, 90).S(0).Upd(0).Key().Build(), - Fb().Rate(kCbr).Res(320, 180).S(1).Ref({0}).Upd(1).Build(), - Fb().Rate(kCbr).Res(640, 360).S(2).Ref({1}).Upd(2).Build()}, - res.CallBack()); + enc->Encode( + frame_reader->PullFrame(), + {.presentation_timestamp = Timestamp::Millis(0)}, + ToVec({Fb().Rate(kCbr).Res(160, 90).S(0).Upd(0).Key().Cb(res.Cb()), + Fb().Rate(kCbr).Res(320, 180).S(1).Ref({0}).Upd(1).Cb(res.Cb()), + Fb().Rate(kCbr).Res(640, 360).S(2).Ref({1}).Upd(2).Cb(res.Cb())})); - enc->Encode(frame_reader->PullFrame(), - {.presentation_timestamp = Timestamp::Millis(100)}, - {Fb().Rate(kCbr).Res(160, 90).S(0).Ref({0}).Upd(0).Build(), - Fb().Rate(kCbr).Res(640, 360).S(2).Ref({2}).Upd(2).Build()}, - res.CallBack()); + enc->Encode( + frame_reader->PullFrame(), + {.presentation_timestamp = Timestamp::Millis(100)}, + ToVec({Fb().Rate(kCbr).Res(160, 90).S(0).Ref({0}).Upd(0).Cb(res.Cb()), + Fb().Rate(kCbr).Res(640, 360).S(2).Ref({2}).Upd(2).Cb(res.Cb())})); rtc::scoped_refptr frame = frame_reader->PullFrame(); - enc->Encode(frame, {.presentation_timestamp = Timestamp::Millis(200)}, - {Fb().Rate(kCbr).Res(160, 90).S(0).Ref({0}).Upd(0).Build(), - Fb().Rate(kCbr).Res(320, 180).S(1).Ref({0, 1}).Upd(1).Build(), - Fb().Rate(kCbr).Res(640, 360).S(2).Ref({1, 2}).Upd(2).Build()}, - res.CallBack()); + enc->Encode( + frame, {.presentation_timestamp = Timestamp::Millis(200)}, + ToVec({Fb().Rate(kCbr).Res(160, 90).S(0).Ref({0}).Upd(0).Cb(res.Cb()), + Fb().Rate(kCbr).Res(320, 180).S(1).Ref({0, 1}).Upd(1).Cb(res.Cb()), + Fb().Rate(kCbr).Res(640, 360).S(2).Ref({1, 2}).Upd(2).Cb( + res.Cb())})); Av1Decoder dec; EXPECT_THAT(Resolution(dec.Decode(*res.FrameAt(0))), ResolutionIs(160, 90)); @@ -486,26 +496,26 @@ TEST(LibaomAv1Encoder, L3T1) { auto frame_reader = CreateFrameReader(); auto enc = LibaomAv1EncoderFactory().CreateEncoder(kCbrEncoderSettings, {}); EncodeResults res; - Av1Decoder dec; - enc->Encode(frame_reader->PullFrame(), - {.presentation_timestamp = Timestamp::Millis(0)}, - {Fb().Rate(kCbr).Res(160, 90).S(0).Upd(0).Key().Build(), - Fb().Rate(kCbr).Res(320, 180).S(1).Ref({0}).Upd(1).Build(), - Fb().Rate(kCbr).Res(640, 360).S(2).Ref({1}).Upd(2).Build()}, - res.CallBack()); + enc->Encode( + frame_reader->PullFrame(), + {.presentation_timestamp = Timestamp::Millis(0)}, + ToVec({Fb().Rate(kCbr).Res(160, 90).S(0).Upd(0).Key().Cb(res.Cb()), + Fb().Rate(kCbr).Res(320, 180).S(1).Ref({0}).Upd(1).Cb(res.Cb()), + Fb().Rate(kCbr).Res(640, 360).S(2).Ref({1}).Upd(2).Cb(res.Cb())})); EXPECT_THAT(Resolution(dec.Decode(*res.FrameAt(0))), ResolutionIs(160, 90)); EXPECT_THAT(Resolution(dec.Decode(*res.FrameAt(1))), ResolutionIs(320, 180)); EXPECT_THAT(Resolution(dec.Decode(*res.FrameAt(2))), ResolutionIs(640, 360)); auto tu1_frame = frame_reader->PullFrame(); - enc->Encode(tu1_frame, {.presentation_timestamp = Timestamp::Millis(100)}, - {Fb().Rate(kCbr).Res(160, 90).S(0).Ref({0}).Upd(0).Build(), - Fb().Rate(kCbr).Res(320, 180).S(1).Ref({1, 0}).Upd(1).Build(), - Fb().Rate(kCbr).Res(640, 360).S(2).Ref({2, 1}).Upd(2).Build()}, - res.CallBack()); + enc->Encode( + tu1_frame, {.presentation_timestamp = Timestamp::Millis(100)}, + ToVec({Fb().Rate(kCbr).Res(160, 90).S(0).Ref({0}).Upd(0).Cb(res.Cb()), + Fb().Rate(kCbr).Res(320, 180).S(1).Ref({1, 0}).Upd(1).Cb(res.Cb()), + Fb().Rate(kCbr).Res(640, 360).S(2).Ref({2, 1}).Upd(2).Cb( + res.Cb())})); EXPECT_THAT(Resolution(dec.Decode(*res.FrameAt(3))), ResolutionIs(160, 90)); EXPECT_THAT(Resolution(dec.Decode(*res.FrameAt(4))), ResolutionIs(320, 180)); @@ -515,11 +525,12 @@ TEST(LibaomAv1Encoder, L3T1) { EXPECT_THAT(Psnr(tu1_frame, f_tu1), Gt(40)); auto tu2_frame = frame_reader->PullFrame(); - enc->Encode(tu2_frame, {.presentation_timestamp = Timestamp::Millis(200)}, - {Fb().Rate(kCbr).Res(160, 90).S(0).Ref({0}).Upd(0).Build(), - Fb().Rate(kCbr).Res(320, 180).S(1).Ref({1, 0}).Upd(1).Build(), - Fb().Rate(kCbr).Res(640, 360).S(2).Ref({2, 1}).Upd(2).Build()}, - res.CallBack()); + enc->Encode( + tu2_frame, {.presentation_timestamp = Timestamp::Millis(200)}, + ToVec({Fb().Rate(kCbr).Res(160, 90).S(0).Ref({0}).Upd(0).Cb(res.Cb()), + Fb().Rate(kCbr).Res(320, 180).S(1).Ref({1, 0}).Upd(1).Cb(res.Cb()), + Fb().Rate(kCbr).Res(640, 360).S(2).Ref({2, 1}).Upd(2).Cb( + res.Cb())})); EXPECT_THAT(Resolution(dec.Decode(*res.FrameAt(6))), ResolutionIs(160, 90)); EXPECT_THAT(Resolution(dec.Decode(*res.FrameAt(7))), ResolutionIs(320, 180)); @@ -538,12 +549,12 @@ TEST(LibaomAv1Encoder, L3T1_KEY) { Av1Decoder dec_s1; Av1Decoder dec_s2; - enc->Encode(frame_reader->PullFrame(), - {.presentation_timestamp = Timestamp::Millis(0)}, - {Fb().Rate(kCbr).Res(160, 90).S(0).Upd(0).Key().Build(), - Fb().Rate(kCbr).Res(320, 180).S(1).Ref({0}).Upd(1).Build(), - Fb().Rate(kCbr).Res(640, 360).S(2).Ref({1}).Upd(2).Build()}, - res.CallBack()); + enc->Encode( + frame_reader->PullFrame(), + {.presentation_timestamp = Timestamp::Millis(0)}, + ToVec({Fb().Rate(kCbr).Res(160, 90).S(0).Upd(0).Key().Cb(res.Cb()), + Fb().Rate(kCbr).Res(320, 180).S(1).Ref({0}).Upd(1).Cb(res.Cb()), + Fb().Rate(kCbr).Res(640, 360).S(2).Ref({1}).Upd(2).Cb(res.Cb())})); EXPECT_THAT(Resolution(dec_s0.Decode(*res.FrameAt(0))), ResolutionIs(160, 90)); @@ -557,12 +568,12 @@ TEST(LibaomAv1Encoder, L3T1_KEY) { EXPECT_THAT(Resolution(dec_s2.Decode(*res.FrameAt(2))), ResolutionIs(640, 360)); - enc->Encode(frame_reader->PullFrame(), - {.presentation_timestamp = Timestamp::Millis(100)}, - {Fb().Rate(kCbr).Res(160, 90).S(0).Ref({0}).Upd(0).Build(), - Fb().Rate(kCbr).Res(320, 180).S(1).Ref({1}).Upd(1).Build(), - Fb().Rate(kCbr).Res(640, 360).S(2).Ref({2}).Upd(2).Build()}, - res.CallBack()); + enc->Encode( + frame_reader->PullFrame(), + {.presentation_timestamp = Timestamp::Millis(100)}, + ToVec({Fb().Rate(kCbr).Res(160, 90).S(0).Ref({0}).Upd(0).Cb(res.Cb()), + Fb().Rate(kCbr).Res(320, 180).S(1).Ref({1}).Upd(1).Cb(res.Cb()), + Fb().Rate(kCbr).Res(640, 360).S(2).Ref({2}).Upd(2).Cb(res.Cb())})); EXPECT_THAT(Resolution(dec_s0.Decode(*res.FrameAt(3))), ResolutionIs(160, 90)); @@ -571,12 +582,12 @@ TEST(LibaomAv1Encoder, L3T1_KEY) { EXPECT_THAT(Resolution(dec_s2.Decode(*res.FrameAt(5))), ResolutionIs(640, 360)); - enc->Encode(frame_reader->PullFrame(), - {.presentation_timestamp = Timestamp::Millis(200)}, - {Fb().Rate(kCbr).Res(160, 90).S(0).Ref({0}).Upd(0).Build(), - Fb().Rate(kCbr).Res(320, 180).S(1).Ref({1}).Upd(1).Build(), - Fb().Rate(kCbr).Res(640, 360).S(2).Ref({2}).Upd(2).Build()}, - res.CallBack()); + enc->Encode( + frame_reader->PullFrame(), + {.presentation_timestamp = Timestamp::Millis(200)}, + ToVec({Fb().Rate(kCbr).Res(160, 90).S(0).Ref({0}).Upd(0).Cb(res.Cb()), + Fb().Rate(kCbr).Res(320, 180).S(1).Ref({1}).Upd(1).Cb(res.Cb()), + Fb().Rate(kCbr).Res(640, 360).S(2).Ref({2}).Upd(2).Cb(res.Cb())})); EXPECT_THAT(Resolution(dec_s0.Decode(*res.FrameAt(6))), ResolutionIs(160, 90)); @@ -595,12 +606,12 @@ TEST(LibaomAv1Encoder, S3T1) { Av1Decoder dec_s1; Av1Decoder dec_s2; - enc->Encode(frame_reader->PullFrame(), - {.presentation_timestamp = Timestamp::Millis(0)}, - {Fb().Rate(kCbr).Res(160, 90).S(0).Start().Upd(0).Build(), - Fb().Rate(kCbr).Res(320, 180).S(1).Start().Upd(1).Build(), - Fb().Rate(kCbr).Res(640, 360).S(2).Start().Upd(2).Build()}, - res.CallBack()); + enc->Encode( + frame_reader->PullFrame(), + {.presentation_timestamp = Timestamp::Millis(0)}, + ToVec({Fb().Rate(kCbr).Res(160, 90).S(0).Start().Upd(0).Cb(res.Cb()), + Fb().Rate(kCbr).Res(320, 180).S(1).Start().Upd(1).Cb(res.Cb()), + Fb().Rate(kCbr).Res(640, 360).S(2).Start().Upd(2).Cb(res.Cb())})); EXPECT_THAT(Resolution(dec_s0.Decode(*res.FrameAt(0))), ResolutionIs(160, 90)); EXPECT_THAT(Resolution(dec_s1.Decode(*res.FrameAt(1))), @@ -608,12 +619,12 @@ TEST(LibaomAv1Encoder, S3T1) { EXPECT_THAT(Resolution(dec_s2.Decode(*res.FrameAt(2))), ResolutionIs(640, 360)); - enc->Encode(frame_reader->PullFrame(), - {.presentation_timestamp = Timestamp::Millis(100)}, - {Fb().Rate(kCbr).Res(160, 90).S(0).Ref({0}).Upd(0).Build(), - Fb().Rate(kCbr).Res(320, 180).S(1).Ref({1}).Upd(1).Build(), - Fb().Rate(kCbr).Res(640, 360).S(2).Ref({2}).Upd(2).Build()}, - res.CallBack()); + enc->Encode( + frame_reader->PullFrame(), + {.presentation_timestamp = Timestamp::Millis(100)}, + ToVec({Fb().Rate(kCbr).Res(160, 90).S(0).Ref({0}).Upd(0).Cb(res.Cb()), + Fb().Rate(kCbr).Res(320, 180).S(1).Ref({1}).Upd(1).Cb(res.Cb()), + Fb().Rate(kCbr).Res(640, 360).S(2).Ref({2}).Upd(2).Cb(res.Cb())})); EXPECT_THAT(Resolution(dec_s0.Decode(*res.FrameAt(3))), ResolutionIs(160, 90)); @@ -622,12 +633,12 @@ TEST(LibaomAv1Encoder, S3T1) { EXPECT_THAT(Resolution(dec_s2.Decode(*res.FrameAt(5))), ResolutionIs(640, 360)); - enc->Encode(frame_reader->PullFrame(), - {.presentation_timestamp = Timestamp::Millis(200)}, - {Fb().Rate(kCbr).Res(160, 90).S(0).Ref({0}).Upd(0).Build(), - Fb().Rate(kCbr).Res(320, 180).S(1).Ref({1}).Upd(1).Build(), - Fb().Rate(kCbr).Res(640, 360).S(2).Ref({2}).Upd(2).Build()}, - res.CallBack()); + enc->Encode( + frame_reader->PullFrame(), + {.presentation_timestamp = Timestamp::Millis(200)}, + ToVec({Fb().Rate(kCbr).Res(160, 90).S(0).Ref({0}).Upd(0).Cb(res.Cb()), + Fb().Rate(kCbr).Res(320, 180).S(1).Ref({1}).Upd(1).Cb(res.Cb()), + Fb().Rate(kCbr).Res(640, 360).S(2).Ref({2}).Upd(2).Cb(res.Cb())})); EXPECT_THAT(Resolution(dec_s0.Decode(*res.FrameAt(6))), ResolutionIs(160, 90)); @@ -653,7 +664,7 @@ TEST(LibaomAv1Encoder, HigherEffortLevelYieldsHigherQualityFrames) { enc->Encode( frame_in, {.presentation_timestamp = Timestamp::Millis(0), .effort_level = i}, - {Fb().Rate(kCbr).Res(640, 360).Upd(0).Key().Build()}, res.CallBack()); + ToVec({Fb().Rate(kCbr).Res(640, 360).Upd(0).Key().Cb(res.Cb())})); double psnr = Psnr(frame_in, dec.Decode(*res.FrameAt(0))); EXPECT_THAT(psnr, Gt(psnr_last)); psnr_last = psnr; @@ -689,12 +700,12 @@ TEST(LibaomAv1Encoder, KeyframeAndStartrameAreApproximatelyEqual) { auto frame_in = frame_reader->PullFrame(); enc_key->Encode( frame_in, {.presentation_timestamp = Timestamp::Millis(0)}, - {Fb().Rate(kRate).Res(640, 360).S(sid).Upd(0).Key().Build()}, - res_key.CallBack()); + ToVec({Fb().Rate(kRate).Res(640, 360).S(sid).Upd(0).Key().Cb( + res_key.Cb())})); enc_start->Encode( frame_in, {.presentation_timestamp = Timestamp::Millis(0)}, - {Fb().Rate(kRate).Res(640, 360).S(sid).Start().Upd(0).Build()}, - res_start.CallBack()); + ToVec({Fb().Rate(kRate).Res(640, 360).S(sid).Start().Upd(0).Cb( + res_start.Cb())})); total_size_key += DataSize::Bytes(res_key.FrameAt(0)->bitstream_data->size()); total_size_start += @@ -710,12 +721,12 @@ TEST(LibaomAv1Encoder, KeyframeAndStartrameAreApproximatelyEqual) { frame_in = frame_reader->PullFrame(); enc_key->Encode( frame_in, {.presentation_timestamp = Timestamp::Millis(f * 100)}, - {Fb().Rate(kRate).Res(640, 360).S(sid).Ref({0}).Upd(0).Build()}, - res_key.CallBack()); + ToVec({Fb().Rate(kRate).Res(640, 360).S(sid).Ref({0}).Upd(0).Cb( + res_key.Cb())})); enc_start->Encode( frame_in, {.presentation_timestamp = Timestamp::Millis(f * 100)}, - {Fb().Rate(kRate).Res(640, 360).S(sid).Ref({0}).Upd(0).Build()}, - res_start.CallBack()); + ToVec({Fb().Rate(kRate).Res(640, 360).S(sid).Ref({0}).Upd(0).Cb( + res_start.Cb())})); total_size_key += DataSize::Bytes(res_key.FrameAt(f)->bitstream_data->size()); total_size_start += @@ -751,8 +762,8 @@ TEST(LibaomAv1Encoder, BitrateConsistentAcrossSpatialLayers) { EncodeResults res; enc->Encode(frame_reader->PullFrame(), {.presentation_timestamp = Timestamp::Millis(0)}, - {Fb().Rate(kRate).Res(640, 360).S(sid).Upd(0).Key().Build()}, - res.CallBack()); + ToVec({Fb().Rate(kRate).Res(640, 360).S(sid).Upd(0).Key().Cb( + res.Cb())})); total_size += DataSize::Bytes(res.FrameAt(0)->bitstream_data->size()); total_duration += kRate.duration; dec.Decode(*res.FrameAt(0)); @@ -761,8 +772,8 @@ TEST(LibaomAv1Encoder, BitrateConsistentAcrossSpatialLayers) { enc->Encode( frame_reader->PullFrame(), {.presentation_timestamp = Timestamp::Millis(f * 100)}, - {Fb().Rate(kRate).Res(640, 360).S(sid).Ref({0}).Upd(0).Build()}, - res.CallBack()); + ToVec({Fb().Rate(kRate).Res(640, 360).S(sid).Ref({0}).Upd(0).Cb( + res.Cb())})); total_size += DataSize::Bytes(res.FrameAt(f)->bitstream_data->size()); total_duration += kRate.duration; dec.Decode(*res.FrameAt(f)); @@ -791,13 +802,12 @@ TEST(LibaomAv1Encoder, ConstantQp) { enc->Encode(frame_reader->PullFrame(), {.presentation_timestamp = Timestamp::Millis(0)}, - {Fb().Rate(Cqp{.target_qp = kQp}) - .Res(640, 360) - .S(sid) - .Upd(0) - .Key() - .Build()}, - res.CallBack()); + ToVec({Fb().Rate(Cqp{.target_qp = kQp}) + .Res(640, 360) + .S(sid) + .Upd(0) + .Key() + .Cb(res.Cb())})); EXPECT_THAT(res.FrameAt(0)->encoded_qp, Eq(kQp)); total_size += DataSize::Bytes(res.FrameAt(0)->bitstream_data->size()); dec.Decode(*res.FrameAt(0)); @@ -805,13 +815,12 @@ TEST(LibaomAv1Encoder, ConstantQp) { for (int f = 1; f < 10; ++f) { enc->Encode(frame_reader->PullFrame(), {.presentation_timestamp = Timestamp::Millis(f * 100)}, - {Fb().Rate(Cqp{.target_qp = kQp - f}) - .Res(640, 360) - .S(sid) - .Ref({0}) - .Upd(0) - .Build()}, - res.CallBack()); + ToVec({Fb().Rate(Cqp{.target_qp = kQp - f}) + .Res(640, 360) + .S(sid) + .Ref({0}) + .Upd(0) + .Cb(res.Cb())})); EXPECT_THAT(res.FrameAt(f)->encoded_qp, Eq(kQp - f)); dec.Decode(*res.FrameAt(f)); } diff --git a/api/video_codecs/simple_encoder_wrapper.cc b/api/video_codecs/simple_encoder_wrapper.cc index 51adfd5170..c3627e07d1 100644 --- a/api/video_codecs/simple_encoder_wrapper.cc +++ b/api/video_codecs/simple_encoder_wrapper.cc @@ -17,6 +17,7 @@ #include #include "absl/algorithm/container.h" +#include "absl/types/variant.h" #include "api/video_codecs/scalability_mode.h" #include "api/video_codecs/scalability_mode_helper.h" #include "api/video_codecs/video_encoder_factory_interface.h" @@ -146,7 +147,6 @@ void SimpleEncoderWrapper::Encode( svc_controller_->NextFrameConfig(force_keyframe); std::vector encode_settings; std::vector frame_infos; - bool include_dependency_structure = false; for (size_t s = 0; s < configs.size(); ++s) { @@ -178,38 +178,39 @@ void SimpleEncoderWrapper::Encode( settings.frame_type = FrameType::kKeyframe; include_dependency_structure = true; } - } - absl::optional dependency_structure; - if (include_dependency_structure) { - dependency_structure = svc_controller_->DependencyStructure(); - } + absl::optional dependency_structure; + if (include_dependency_structure) { + dependency_structure = svc_controller_->DependencyStructure(); + } - VideoEncoderInterface::EncodeResultCallback callback_internal = - [cb = std::move(callback), ds = std::move(dependency_structure), - infos = std::move(frame_infos)]( - const VideoEncoderInterface::EncodeResult& result) mutable { - auto* data = std::get_if(&result); - EncodeResult res; - if (!data || data->spatial_id >= static_cast(infos.size())) { - res.oh_no = true; + settings.result_callback = + [cb = callback, ds = std::move(dependency_structure), + info = std::move(frame_infos[settings.spatial_id])]( + const VideoEncoderInterface::EncodeResult& result) mutable { + auto* data = + absl::get_if(&result); + + EncodeResult res; + if (!data) { + res.oh_no = true; + cb(res); + return; + } + + res.frame_type = data->frame_type; + res.bitstream_data = std::move(data->bitstream_data); + res.generic_frame_info = info; + if (res.frame_type == FrameType::kKeyframe) { + res.dependency_structure = ds; + } cb(res); - return; - } - - res.frame_type = data->frame_type; - res.bitstream_data = std::move(data->bitstream_data); - res.generic_frame_info = infos[data->spatial_id]; - if (res.frame_type == FrameType::kKeyframe) { - // Keyframe - res.dependency_structure = ds; - } - cb(res); - }; + }; + } encoder_->Encode(std::move(frame_buffer), {.presentation_timestamp = presentation_timestamp_}, - encode_settings, std::move(callback_internal)); + std::move(encode_settings)); presentation_timestamp_ += 1 / Frequency::Hertz(fps_); } diff --git a/api/video_codecs/simple_encoder_wrapper.h b/api/video_codecs/simple_encoder_wrapper.h index 4d11020924..cc1b94743e 100644 --- a/api/video_codecs/simple_encoder_wrapper.h +++ b/api/video_codecs/simple_encoder_wrapper.h @@ -33,8 +33,7 @@ class SimpleEncoderWrapper { absl::optional dependency_structure; }; - using EncodeResultCallback = - absl::AnyInvocable; + using EncodeResultCallback = std::function; static std::vector SupportedWebrtcSvcModes( const VideoEncoderFactoryInterface::Capabilities::PredictionConstraints& diff --git a/api/video_codecs/simple_encoder_wrapper_unittests.cc b/api/video_codecs/simple_encoder_wrapper_unittests.cc index 83518b6be5..3aa12b32b2 100644 --- a/api/video_codecs/simple_encoder_wrapper_unittests.cc +++ b/api/video_codecs/simple_encoder_wrapper_unittests.cc @@ -125,8 +125,8 @@ TEST(SimpleEncoderWrapper, SupportedSvcModesUpToL3T3KeyWithHScaling) { "S3T1", "S3T1h", "S3T2", "S3T2h", "S3T3", "S3T3h")); } -// TD: The encoder wrapper shouldn't really use an actual encoder implementation -// for testing, but hey, this is just a PoC. +// TD: The encoder wrapper shouldn't really use an actual encoder +// implementation for testing, but hey, this is just a PoC. TEST(SimpleEncoderWrapper, EncodeL1T1) { auto encoder = LibaomAv1EncoderFactory().CreateEncoder( {.max_encode_dimensions = {1080, 720}, @@ -171,7 +171,7 @@ TEST(SimpleEncoderWrapper, EncodeL1T1) { }); } -TEST(SimpleEncoderWrapper, DISABLED_EncodeL2T2_KEY) { +TEST(SimpleEncoderWrapper, EncodeL2T2_KEY) { auto encoder = LibaomAv1EncoderFactory().CreateEncoder( {.max_encode_dimensions = {1080, 720}, .encoding_format = {.sub_sampling = EncodingFormat::k420, @@ -231,7 +231,7 @@ TEST(SimpleEncoderWrapper, DISABLED_EncodeL2T2_KEY) { EXPECT_THAT(num_callbacks, Eq(4)); } -TEST(SimpleEncoderWrapper, DISABLED_EncodeL1T3ForceKeyframe) { +TEST(SimpleEncoderWrapper, EncodeL1T3ForceKeyframe) { auto encoder = LibaomAv1EncoderFactory().CreateEncoder( {.max_encode_dimensions = {1080, 720}, .encoding_format = {.sub_sampling = EncodingFormat::k420, diff --git a/api/video_codecs/video_encoder_interface.h b/api/video_codecs/video_encoder_interface.h index 4e0726436c..55ef55861c 100644 --- a/api/video_codecs/video_encoder_interface.h +++ b/api/video_codecs/video_encoder_interface.h @@ -13,7 +13,6 @@ #include #include -// #include #include #include #include @@ -44,6 +43,17 @@ class VideoEncoderInterface { int effort_level = 0; }; + // Results from calling Encode. Called once for each configured frame. + struct EncodingError {}; + struct EncodedData { + rtc::scoped_refptr bitstream_data; + FrameType frame_type; + int encoded_qp; + }; + using EncodeResult = absl::variant; + using EncodeResultCallback = + absl::AnyInvocable; + struct FrameEncodeSettings { struct Cbr { TimeDelta duration; @@ -62,26 +72,13 @@ class VideoEncoderInterface { Resolution resolution; std::vector reference_buffers; absl::optional update_buffer; + + EncodeResultCallback result_callback; }; - // Results from calling Encode. Called once for each configured frame. - struct EncodingError {}; - - struct EncodedData { - rtc::scoped_refptr bitstream_data; - FrameType frame_type; - int spatial_id; - int encoded_qp; - }; - - using EncodeResult = std::variant; - using EncodeResultCallback = - absl::AnyInvocable; - virtual void Encode(rtc::scoped_refptr frame_buffer, const TemporalUnitSettings& settings, - const std::vector& frame_settings, - EncodeResultCallback encode_result_callback) = 0; + std::vector frame_settings) = 0; }; } // namespace webrtc