From c184047fef005b86a6dd76f03b0eb5ec01de3c5c Mon Sep 17 00:00:00 2001 From: Danil Chapovalov Date: Thu, 25 Mar 2021 17:30:10 +0100 Subject: [PATCH] Add fuzzer to validate libvpx vp9 encoder wrapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix simulcast svc controller to reuse dropped frame configuration, same as full svc and k-svc controllers do. This fuzzer reminded the issue was still there. Bug: webrtc:11999 Change-Id: I74156bd743124723562e99deb48de5b5018a81d0 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/212281 Reviewed-by: Erik Språng Commit-Queue: Danil Chapovalov Cr-Commit-Position: refs/heads/master@{#33568} --- .../svc/scalability_structure_simulcast.cc | 11 +- test/fuzzers/BUILD.gn | 24 + test/fuzzers/vp9_encoder_references_fuzzer.cc | 480 ++++++++++++++++++ 3 files changed, 511 insertions(+), 4 deletions(-) create mode 100644 test/fuzzers/vp9_encoder_references_fuzzer.cc diff --git a/modules/video_coding/svc/scalability_structure_simulcast.cc b/modules/video_coding/svc/scalability_structure_simulcast.cc index 13751daea6..c236066736 100644 --- a/modules/video_coding/svc/scalability_structure_simulcast.cc +++ b/modules/video_coding/svc/scalability_structure_simulcast.cc @@ -137,7 +137,7 @@ ScalabilityStructureSimulcast::NextFrameConfig(bool restart) { } configs.emplace_back(); ScalableVideoController::LayerFrameConfig& config = configs.back(); - config.S(sid).T(0); + config.Id(current_pattern).S(sid).T(0); if (can_reference_t0_frame_for_spatial_id_[sid]) { config.ReferenceAndUpdate(BufferIndex(sid, /*tid=*/0)); @@ -155,7 +155,10 @@ ScalabilityStructureSimulcast::NextFrameConfig(bool restart) { } configs.emplace_back(); ScalableVideoController::LayerFrameConfig& config = configs.back(); - config.S(sid).T(1).Reference(BufferIndex(sid, /*tid=*/0)); + config.Id(current_pattern) + .S(sid) + .T(1) + .Reference(BufferIndex(sid, /*tid=*/0)); // Save frame only if there is a higher temporal layer that may need it. if (num_temporal_layers_ > 2) { config.Update(BufferIndex(sid, /*tid=*/1)); @@ -171,7 +174,7 @@ ScalabilityStructureSimulcast::NextFrameConfig(bool restart) { } configs.emplace_back(); ScalableVideoController::LayerFrameConfig& config = configs.back(); - config.S(sid).T(2); + config.Id(current_pattern).S(sid).T(2); if (can_reference_t1_frame_for_spatial_id_[sid]) { config.Reference(BufferIndex(sid, /*tid=*/1)); } else { @@ -184,12 +187,12 @@ ScalabilityStructureSimulcast::NextFrameConfig(bool restart) { break; } - last_pattern_ = current_pattern; return configs; } GenericFrameInfo ScalabilityStructureSimulcast::OnEncodeDone( const LayerFrameConfig& config) { + last_pattern_ = static_cast(config.Id()); if (config.TemporalId() == 1) { can_reference_t1_frame_for_spatial_id_.set(config.SpatialId()); } diff --git a/test/fuzzers/BUILD.gn b/test/fuzzers/BUILD.gn index e08ba3cd99..ee2a3f7dc7 100644 --- a/test/fuzzers/BUILD.gn +++ b/test/fuzzers/BUILD.gn @@ -632,6 +632,30 @@ webrtc_fuzzer_test("vp8_replay_fuzzer") { seed_corpus = "corpora/rtpdump-corpus/vp8" } +if (rtc_build_libvpx) { + webrtc_fuzzer_test("vp9_encoder_references_fuzzer") { + sources = [ "vp9_encoder_references_fuzzer.cc" ] + deps = [ + "..:test_support", + "../../api:array_view", + "../../api/transport:webrtc_key_value_config", + "../../api/video:video_frame", + "../../api/video_codecs:video_codecs_api", + "../../modules/video_coding:frame_dependencies_calculator", + "../../modules/video_coding:mock_libvpx_interface", + "../../modules/video_coding:webrtc_vp9", + "../../rtc_base:safe_compare", + rtc_libvpx_dir, + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/algorithm:container", + "//third_party/abseil-cpp/absl/base:core_headers", + "//third_party/abseil-cpp/absl/container:inlined_vector", + ] + defines = [ "RTC_ENABLE_VP9" ] + } +} + webrtc_fuzzer_test("vp9_replay_fuzzer") { sources = [ "vp9_replay_fuzzer.cc" ] deps = [ diff --git a/test/fuzzers/vp9_encoder_references_fuzzer.cc b/test/fuzzers/vp9_encoder_references_fuzzer.cc new file mode 100644 index 0000000000..9300ddf9c9 --- /dev/null +++ b/test/fuzzers/vp9_encoder_references_fuzzer.cc @@ -0,0 +1,480 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "absl/algorithm/container.h" +#include "absl/base/macros.h" +#include "absl/container/inlined_vector.h" +#include "api/array_view.h" +#include "api/transport/webrtc_key_value_config.h" +#include "api/video/video_frame.h" +#include "api/video_codecs/video_codec.h" +#include "api/video_codecs/video_encoder.h" +#include "modules/video_coding/codecs/interface/mock_libvpx_interface.h" +#include "modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h" +#include "modules/video_coding/frame_dependencies_calculator.h" +#include "rtc_base/numerics/safe_compare.h" +#include "test/fuzzers/fuzz_data_helper.h" +#include "test/gmock.h" + +// Fuzzer simulates various svc configurations and libvpx encoder droping +// layer frames. +// Validates vp9 encoder wrapper produces consistent frame references. +namespace webrtc { +namespace { + +using test::FuzzDataHelper; +using ::testing::NiceMock; + +class FrameValidator : public EncodedImageCallback { + public: + ~FrameValidator() override = default; + + Result OnEncodedImage(const EncodedImage& encoded_image, + const CodecSpecificInfo* codec_specific_info) override { + RTC_CHECK(codec_specific_info); + RTC_CHECK_EQ(codec_specific_info->codecType, kVideoCodecVP9); + if (codec_specific_info->codecSpecific.VP9.first_frame_in_picture) { + ++picture_id_; + } + LayerFrame layer_frame; + layer_frame.picture_id = picture_id_; + layer_frame.spatial_id = encoded_image.SpatialIndex().value_or(0); + layer_frame.info = *codec_specific_info; + CheckVp9References(layer_frame); + + if (layer_frame.info.generic_frame_info.has_value()) { + int64_t frame_id = frames_.size(); + layer_frame.frame_dependencies = + dependencies_calculator_.FromBuffersUsage( + frame_id, layer_frame.info.generic_frame_info->encoder_buffers); + + CheckGenericReferences(layer_frame); + CheckGenericAndCodecSpecificReferencesAreConsistent(layer_frame); + } + + frames_.push_back(std::move(layer_frame)); + return Result(Result::OK); + } + + private: + struct LayerFrame { + const CodecSpecificInfoVP9& vp9() const { return info.codecSpecific.VP9; } + int temporal_id() const { + return vp9().temporal_idx == kNoTemporalIdx ? 0 : vp9().temporal_idx; + } + + int64_t picture_id; + int spatial_id; + absl::InlinedVector frame_dependencies; + CodecSpecificInfo info; + }; + + void CheckVp9References(const LayerFrame& layer_frame) { + if (layer_frame.vp9().inter_layer_predicted) { + RTC_CHECK(!frames_.empty()); + const LayerFrame& previous_frame = frames_.back(); + RTC_CHECK(!previous_frame.vp9().non_ref_for_inter_layer_pred); + RTC_CHECK_EQ(layer_frame.picture_id, previous_frame.picture_id); + } + if (!frames_.empty() && + frames_.back().picture_id == layer_frame.picture_id) { + RTC_CHECK_GT(layer_frame.spatial_id, frames_.back().spatial_id); + // The check below would fail for temporal shift structures. Remove it or + // move it to !flexible_mode section when vp9 encoder starts supporting + // such structures. + RTC_CHECK_EQ(layer_frame.vp9().temporal_idx, + frames_.back().vp9().temporal_idx); + } + if (!layer_frame.vp9().flexible_mode) { + if (layer_frame.vp9().gof.num_frames_in_gof > 0) { + gof_.CopyGofInfoVP9(layer_frame.vp9().gof); + } + RTC_CHECK_EQ(gof_.temporal_idx[layer_frame.vp9().gof_idx], + layer_frame.temporal_id()); + } + } + + void CheckGenericReferences(const LayerFrame& layer_frame) const { + const GenericFrameInfo& generic_info = *layer_frame.info.generic_frame_info; + for (int64_t dependency_frame_id : layer_frame.frame_dependencies) { + RTC_CHECK_GE(dependency_frame_id, 0); + const LayerFrame& dependency = frames_[dependency_frame_id]; + RTC_CHECK(dependency.info.generic_frame_info.has_value()); + RTC_CHECK_GE(generic_info.spatial_id, + dependency.info.generic_frame_info->spatial_id); + RTC_CHECK_GE(generic_info.temporal_id, + dependency.info.generic_frame_info->temporal_id); + } + } + + void CheckGenericAndCodecSpecificReferencesAreConsistent( + const LayerFrame& layer_frame) const { + const GenericFrameInfo& generic_info = *layer_frame.info.generic_frame_info; + RTC_CHECK_EQ(generic_info.spatial_id, layer_frame.spatial_id); + RTC_CHECK_EQ(generic_info.temporal_id, layer_frame.temporal_id()); + auto picture_id_diffs = rtc::MakeArrayView(layer_frame.vp9().p_diff, + layer_frame.vp9().num_ref_pics); + RTC_CHECK_EQ(layer_frame.frame_dependencies.size(), + picture_id_diffs.size() + + (layer_frame.vp9().inter_layer_predicted ? 1 : 0)); + for (int64_t dependency_frame_id : layer_frame.frame_dependencies) { + RTC_CHECK_GE(dependency_frame_id, 0); + const LayerFrame& dependency = frames_[dependency_frame_id]; + if (dependency.spatial_id != layer_frame.spatial_id) { + RTC_CHECK(layer_frame.vp9().inter_layer_predicted); + RTC_CHECK_EQ(layer_frame.picture_id, dependency.picture_id); + RTC_CHECK_GT(layer_frame.spatial_id, dependency.spatial_id); + } else { + RTC_CHECK(layer_frame.vp9().inter_pic_predicted); + RTC_CHECK_EQ(layer_frame.spatial_id, dependency.spatial_id); + RTC_CHECK(absl::c_linear_search( + picture_id_diffs, layer_frame.picture_id - dependency.picture_id)); + } + } + } + + GofInfoVP9 gof_; + int64_t picture_id_; + FrameDependenciesCalculator dependencies_calculator_; + std::vector frames_; +}; + +class FieldTrials : public WebRtcKeyValueConfig { + public: + explicit FieldTrials(FuzzDataHelper& config) + : flags_(config.ReadOrDefaultValue(0)) {} + + ~FieldTrials() override = default; + std::string Lookup(absl::string_view key) const override { + static constexpr absl::string_view kBinaryFieldTrials[] = { + "WebRTC-Vp9DependencyDescriptor", + "WebRTC-Vp9ExternalRefCtrl", + "WebRTC-Vp9IssueKeyFrameOnLayerDeactivation", + }; + for (size_t i = 0; i < ABSL_ARRAYSIZE(kBinaryFieldTrials); ++i) { + if (key == kBinaryFieldTrials[i]) { + return (flags_ & (1u << i)) ? "Enabled" : "Disabled"; + } + } + + // Ignore following field trials. + if (key == "WebRTC-CongestionWindow" || + key == "WebRTC-UseBaseHeavyVP8TL3RateAllocation" || + key == "WebRTC-SimulcastUpswitchHysteresisPercent" || + key == "WebRTC-SimulcastScreenshareUpswitchHysteresisPercent" || + key == "WebRTC-VideoRateControl" || + key == "WebRTC-VP9-PerformanceFlags" || + key == "WebRTC-VP9VariableFramerateScreenshare" || + key == "WebRTC-VP9QualityScaler") { + return ""; + } + // Crash when using unexpected field trial to decide if it should be fuzzed + // or have a constant value. + RTC_CHECK(false) << "Unfuzzed field trial " << key << "\n"; + } + + private: + const uint8_t flags_; +}; + +VideoCodec CodecSettings(FuzzDataHelper& rng) { + uint16_t config = rng.ReadOrDefaultValue(0); + // Test up to to 4 spatial and 4 temporal layers. + int num_spatial_layers = 1 + (config & 0b11); + int num_temporal_layers = 1 + ((config >> 2) & 0b11); + + VideoCodec codec_settings = {}; + codec_settings.codecType = kVideoCodecVP9; + codec_settings.maxFramerate = 30; + codec_settings.width = 320 << (num_spatial_layers - 1); + codec_settings.height = 180 << (num_spatial_layers - 1); + if (num_spatial_layers > 1) { + for (int sid = 0; sid < num_spatial_layers; ++sid) { + SpatialLayer& spatial_layer = codec_settings.spatialLayers[sid]; + codec_settings.width = 320 << sid; + codec_settings.height = 180 << sid; + spatial_layer.maxFramerate = codec_settings.maxFramerate; + spatial_layer.numberOfTemporalLayers = num_temporal_layers; + } + } + codec_settings.VP9()->numberOfSpatialLayers = num_spatial_layers; + codec_settings.VP9()->numberOfTemporalLayers = num_temporal_layers; + int inter_layer_pred = (config >> 4) & 0b11; + // There are only 3 valid values. + codec_settings.VP9()->interLayerPred = static_cast( + inter_layer_pred < 3 ? inter_layer_pred : 0); + codec_settings.VP9()->flexibleMode = (config & (1u << 6)) != 0; + codec_settings.VP9()->frameDroppingOn = (config & (1u << 7)) != 0; + codec_settings.mode = VideoCodecMode::kRealtimeVideo; + return codec_settings; +} + +VideoEncoder::Settings EncoderSettings() { + return VideoEncoder::Settings(VideoEncoder::Capabilities(false), + /*number_of_cores=*/1, + /*max_payload_size=*/0); +} + +struct LibvpxState { + LibvpxState() { + pkt.kind = VPX_CODEC_CX_FRAME_PKT; + pkt.data.frame.buf = pkt_buffer; + pkt.data.frame.sz = ABSL_ARRAYSIZE(pkt_buffer); + layer_id.spatial_layer_id = -1; + } + + uint8_t pkt_buffer[1000] = {}; + vpx_codec_enc_cfg_t config = {}; + vpx_codec_priv_output_cx_pkt_cb_pair_t callback = {}; + vpx_image_t img = {}; + vpx_svc_ref_frame_config_t ref_config = {}; + vpx_svc_layer_id_t layer_id = {}; + vpx_svc_frame_drop_t frame_drop = {}; + vpx_codec_cx_pkt pkt = {}; +}; + +class StubLibvpx : public NiceMock { + public: + explicit StubLibvpx(LibvpxState* state) : state_(state) { RTC_CHECK(state_); } + + vpx_codec_err_t codec_enc_config_default(vpx_codec_iface_t* iface, + vpx_codec_enc_cfg_t* cfg, + unsigned int usage) const override { + state_->config = *cfg; + return VPX_CODEC_OK; + } + + vpx_codec_err_t codec_enc_init(vpx_codec_ctx_t* ctx, + vpx_codec_iface_t* iface, + const vpx_codec_enc_cfg_t* cfg, + vpx_codec_flags_t flags) const override { + RTC_CHECK(ctx); + ctx->err = VPX_CODEC_OK; + return VPX_CODEC_OK; + } + + vpx_image_t* img_wrap(vpx_image_t* img, + vpx_img_fmt_t fmt, + unsigned int d_w, + unsigned int d_h, + unsigned int stride_align, + unsigned char* img_data) const override { + state_->img.fmt = fmt; + state_->img.d_w = d_w; + state_->img.d_h = d_h; + return &state_->img; + } + + vpx_codec_err_t codec_encode(vpx_codec_ctx_t* ctx, + const vpx_image_t* img, + vpx_codec_pts_t pts, + uint64_t duration, + vpx_enc_frame_flags_t flags, + uint64_t deadline) const override { + if (flags & VPX_EFLAG_FORCE_KF) { + state_->pkt.data.frame.flags = VPX_FRAME_IS_KEY; + } else { + state_->pkt.data.frame.flags = 0; + } + state_->pkt.data.frame.duration = duration; + return VPX_CODEC_OK; + } + + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + void* param) const override { + if (ctrl_id == VP9E_REGISTER_CX_CALLBACK) { + state_->callback = + *reinterpret_cast(param); + } + return VPX_CODEC_OK; + } + + vpx_codec_err_t codec_control( + vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_svc_ref_frame_config_t* param) const override { + switch (ctrl_id) { + case VP9E_SET_SVC_REF_FRAME_CONFIG: + state_->ref_config = *param; + break; + case VP9E_GET_SVC_REF_FRAME_CONFIG: + *param = state_->ref_config; + break; + default: + break; + } + return VPX_CODEC_OK; + } + + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_svc_layer_id_t* param) const override { + switch (ctrl_id) { + case VP9E_SET_SVC_LAYER_ID: + state_->layer_id = *param; + break; + case VP9E_GET_SVC_LAYER_ID: + *param = state_->layer_id; + break; + default: + break; + } + return VPX_CODEC_OK; + } + + vpx_codec_err_t codec_control(vpx_codec_ctx_t* ctx, + vp8e_enc_control_id ctrl_id, + vpx_svc_frame_drop_t* param) const override { + if (ctrl_id == VP9E_SET_SVC_FRAME_DROP_LAYER) { + state_->frame_drop = *param; + } + return VPX_CODEC_OK; + } + + vpx_codec_err_t codec_enc_config_set( + vpx_codec_ctx_t* ctx, + const vpx_codec_enc_cfg_t* cfg) const override { + state_->config = *cfg; + return VPX_CODEC_OK; + } + + private: + LibvpxState* const state_; +}; + +enum Actions { + kEncode, + kSetRates, +}; + +// When a layer frame is marked for drop, drops all layer frames from that +// pictures with larger spatial ids. +constexpr bool DropAbove(uint8_t layers_mask, int sid) { + uint8_t full_mask = (uint8_t{1} << (sid + 1)) - 1; + return (layers_mask & full_mask) != full_mask; +} +// inline unittests +static_assert(DropAbove(0b1011, /*sid=*/0) == false, ""); +static_assert(DropAbove(0b1011, /*sid=*/1) == false, ""); +static_assert(DropAbove(0b1011, /*sid=*/2) == true, ""); +static_assert(DropAbove(0b1011, /*sid=*/3) == true, ""); + +// When a layer frame is marked for drop, drops all layer frames from that +// pictures with smaller spatial ids. +constexpr bool DropBelow(uint8_t layers_mask, int sid, int num_layers) { + return (layers_mask >> sid) != (1 << (num_layers - sid)) - 1; +} +// inline unittests +static_assert(DropBelow(0b1101, /*sid=*/0, 4) == true, ""); +static_assert(DropBelow(0b1101, /*sid=*/1, 4) == true, ""); +static_assert(DropBelow(0b1101, /*sid=*/2, 4) == false, ""); +static_assert(DropBelow(0b1101, /*sid=*/3, 4) == false, ""); + +} // namespace + +void FuzzOneInput(const uint8_t* data, size_t size) { + FuzzDataHelper helper(rtc::MakeArrayView(data, size)); + + FrameValidator validator; + FieldTrials field_trials(helper); + // Setup call callbacks for the fake + LibvpxState state; + + // Initialize encoder + LibvpxVp9Encoder encoder(cricket::VideoCodec(), + std::make_unique(&state), field_trials); + VideoCodec codec = CodecSettings(helper); + if (encoder.InitEncode(&codec, EncoderSettings()) != WEBRTC_VIDEO_CODEC_OK) { + return; + } + RTC_CHECK_EQ(encoder.RegisterEncodeCompleteCallback(&validator), + WEBRTC_VIDEO_CODEC_OK); + { + // Enable all the layers initially. Encoder doesn't support producing + // frames when no layers are enabled. + LibvpxVp9Encoder::RateControlParameters parameters; + parameters.framerate_fps = 30.0; + for (int sid = 0; sid < codec.VP9()->numberOfSpatialLayers; ++sid) { + for (int tid = 0; tid < codec.VP9()->numberOfTemporalLayers; ++tid) { + parameters.bitrate.SetBitrate(sid, tid, 100'000); + } + } + encoder.SetRates(parameters); + } + + std::vector frame_types(1); + VideoFrame fake_image = VideoFrame::Builder() + .set_video_frame_buffer(I420Buffer::Create( + int{codec.width}, int{codec.height})) + .build(); + + // Start producing frames at random. + while (helper.CanReadBytes(1)) { + uint8_t action = helper.Read(); + switch (action & 0b11) { + case kEncode: { + // bitmask of the action: SSSS-K00, where + // four S bit indicate which spatial layers should be produced, + // K bit indicates if frame should be a key frame. + frame_types[0] = (action & 0b100) ? VideoFrameType::kVideoFrameKey + : VideoFrameType::kVideoFrameDelta; + encoder.Encode(fake_image, &frame_types); + uint8_t encode_spatial_layers = (action >> 4); + for (size_t sid = 0; sid < state.config.ss_number_layers; ++sid) { + bool drop = true; + switch (state.frame_drop.framedrop_mode) { + case FULL_SUPERFRAME_DROP: + drop = encode_spatial_layers == 0; + break; + case LAYER_DROP: + drop = (encode_spatial_layers & (1 << sid)) == 0; + break; + case CONSTRAINED_LAYER_DROP: + drop = DropBelow(encode_spatial_layers, sid, + state.config.ss_number_layers); + break; + case CONSTRAINED_FROM_ABOVE_DROP: + drop = DropAbove(encode_spatial_layers, sid); + break; + } + if (!drop) { + state.layer_id.spatial_layer_id = sid; + state.callback.output_cx_pkt(&state.pkt, state.callback.user_priv); + } + } + } break; + case kSetRates: { + // bitmask of the action: (S3)(S1)(S0)01, + // where Sx is number of temporal layers to enable for spatial layer x + // In pariculat Sx = 0 indicates spatial layer x should be disabled. + LibvpxVp9Encoder::RateControlParameters parameters; + parameters.framerate_fps = 30.0; + for (int sid = 0; sid < codec.VP9()->numberOfSpatialLayers; ++sid) { + int temporal_layers = (action >> ((1 + sid) * 2)) & 0b11; + for (int tid = 0; tid < temporal_layers; ++tid) { + parameters.bitrate.SetBitrate(sid, tid, 100'000); + } + } + // Ignore allocation that turns off all the layers. in such case + // it is up to upper-layer code not to call Encode. + if (parameters.bitrate.get_sum_bps() > 0) { + encoder.SetRates(parameters); + } + } break; + default: + // Unspecificed values are noop. + break; + } + } +} +} // namespace webrtc