From 10d9d59db16457582b6febea6914ead6ec43f5ec Mon Sep 17 00:00:00 2001 From: Sergey Silkin Date: Thu, 1 Feb 2018 13:25:17 +0100 Subject: [PATCH] Adding simulcast/spatial layering support to VideoProcessor. Encoded frames are preserved and decoded after all layers are encoded. Each spatial layer is decoded with separate decoder. For quality evaluation of lowres layers original input frame is downscaled with bilinear interpolation. Encoded and decoded frames are dumped into separate files. For async codecs encoded frames are passed to decoder in encode callback, as before. Bug: webrtc:8524 Change-Id: Idb0c92c7274c1915cff9a011a2794f1cf4bc8cb1 Reviewed-on: https://webrtc-review.googlesource.com/43381 Commit-Queue: Sergey Silkin Reviewed-by: Rasmus Brandt Cr-Commit-Position: refs/heads/master@{#21844} --- modules/BUILD.gn | 1 + modules/video_coding/BUILD.gn | 1 + .../video_coding/codecs/test/test_config.cc | 38 +- .../video_coding/codecs/test/test_config.h | 4 +- .../codecs/test/test_config_unittest.cc | 33 -- .../codecs/test/videoprocessor.cc | 356 ++++++++++++++---- .../video_coding/codecs/test/videoprocessor.h | 74 ++-- .../test/videoprocessor_integrationtest.cc | 41 +- .../videoprocessor_integrationtest_libvpx.cc | 64 +++- ...processor_integrationtest_parameterized.cc | 12 +- .../codecs/test/videoprocessor_unittest.cc | 20 +- 11 files changed, 438 insertions(+), 206 deletions(-) diff --git a/modules/BUILD.gn b/modules/BUILD.gn index 4c45a628b0..4a77b21561 100644 --- a/modules/BUILD.gn +++ b/modules/BUILD.gn @@ -67,6 +67,7 @@ if (rtc_include_tests) { "../resources/audio_coding/teststereo32kHz.pcm", "../resources/foreman_cif.yuv", "../resources/paris_qcif.yuv", + "../resources/ConferenceMotion_1280_720_50.yuv", ] if (is_ios) { diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn index 721773d3ae..fe5414fa99 100644 --- a/modules/video_coding/BUILD.gn +++ b/modules/video_coding/BUILD.gn @@ -540,6 +540,7 @@ if (rtc_include_tests) { "../../test:video_test_common", "../../test:video_test_support", "../rtp_rtcp:rtp_rtcp_format", + "//third_party/libyuv", ] } diff --git a/modules/video_coding/codecs/test/test_config.cc b/modules/video_coding/codecs/test/test_config.cc index 452a591b83..37a6f5c920 100644 --- a/modules/video_coding/codecs/test/test_config.cc +++ b/modules/video_coding/codecs/test/test_config.cc @@ -98,6 +98,10 @@ void TestConfig::SetCodecSettings(VideoCodecType codec_type, // Spatial scalability is only available with VP9. RTC_CHECK(num_spatial_layers < 2 || codec_type == kVideoCodecVP9); + // Simulcast/SVC is only supposed to work with software codecs. + RTC_CHECK((!hw_encoder && !hw_decoder) || + (num_simulcast_streams == 1 && num_spatial_layers == 1)); + // Some base code requires numberOfSimulcastStreams to be set to zero // when simulcast is not used. codec_settings.numberOfSimulcastStreams = @@ -186,36 +190,6 @@ size_t TestConfig::NumberOfSimulcastStreams() const { return codec_settings.numberOfSimulcastStreams; } -size_t TestConfig::TemporalLayerForFrame(size_t frame_idx) const { - size_t tl = 0; - switch (NumberOfTemporalLayers()) { - case 1: - tl = 0; - break; - case 2: - // temporal layer 1: 1 3 - // temporal layer 0: 0 2 4 ... - tl = (frame_idx % 2 == 0) ? 0 : 1; - break; - case 3: - // temporal layer 2: 1 3 5 7 - // temporal layer 1: 2 6 - // temporal layer 0: 0 4 8 ... - if (frame_idx % 4 == 0) { - tl = 0; - } else if ((frame_idx + 2) % 4 == 0) { - tl = 1; - } else if ((frame_idx + 1) % 2 == 0) { - tl = 2; - } - break; - default: - RTC_NOTREACHED(); - break; - } - return tl; -} - std::vector TestConfig::FrameTypeForFrame(size_t frame_idx) const { if (keyframe_interval > 0 && (frame_idx % keyframe_interval == 0)) { return {kVideoFrameKey}; @@ -264,5 +238,9 @@ std::string TestConfig::FilenameWithParams() const { std::to_string(codec_settings.startBitrate); } +bool TestConfig::IsAsyncCodec() const { + return hw_encoder || hw_decoder; +} + } // namespace test } // namespace webrtc diff --git a/modules/video_coding/codecs/test/test_config.h b/modules/video_coding/codecs/test/test_config.h index 38edc407af..70cc51c222 100644 --- a/modules/video_coding/codecs/test/test_config.h +++ b/modules/video_coding/codecs/test/test_config.h @@ -53,8 +53,6 @@ struct TestConfig { size_t NumberOfSimulcastStreams() const; - size_t TemporalLayerForFrame(size_t frame_idx) const; - std::vector FrameTypeForFrame(size_t frame_idx) const; std::string ToString() const; @@ -63,6 +61,8 @@ struct TestConfig { std::string FilenameWithParams() const; + bool IsAsyncCodec() const; + // Plain name of YUV file to process without file extension. std::string filename; diff --git a/modules/video_coding/codecs/test/test_config_unittest.cc b/modules/video_coding/codecs/test/test_config_unittest.cc index 7978f1106f..2202f919d1 100644 --- a/modules/video_coding/codecs/test/test_config_unittest.cc +++ b/modules/video_coding/codecs/test/test_config_unittest.cc @@ -54,39 +54,6 @@ TEST(TestConfig, NumberOfTemporalLayers_Vp9) { EXPECT_EQ(kNumTemporalLayers, config.NumberOfTemporalLayers()); } -TEST(TestConfig, TemporalLayersForFrame_OneLayer) { - TestConfig config; - webrtc::test::CodecSettings(kVideoCodecVP8, &config.codec_settings); - config.codec_settings.VP8()->numberOfTemporalLayers = 1; - EXPECT_EQ(0u, config.TemporalLayerForFrame(0)); - EXPECT_EQ(0u, config.TemporalLayerForFrame(1)); - EXPECT_EQ(0u, config.TemporalLayerForFrame(2)); -} - -TEST(TestConfig, TemporalLayersForFrame_TwoLayers) { - TestConfig config; - webrtc::test::CodecSettings(kVideoCodecVP8, &config.codec_settings); - config.codec_settings.VP8()->numberOfTemporalLayers = 2; - EXPECT_EQ(0u, config.TemporalLayerForFrame(0)); - EXPECT_EQ(1u, config.TemporalLayerForFrame(1)); - EXPECT_EQ(0u, config.TemporalLayerForFrame(2)); - EXPECT_EQ(1u, config.TemporalLayerForFrame(3)); -} - -TEST(TestConfig, TemporalLayersForFrame_ThreeLayers) { - TestConfig config; - webrtc::test::CodecSettings(kVideoCodecVP8, &config.codec_settings); - config.codec_settings.VP8()->numberOfTemporalLayers = 3; - EXPECT_EQ(0u, config.TemporalLayerForFrame(0)); - EXPECT_EQ(2u, config.TemporalLayerForFrame(1)); - EXPECT_EQ(1u, config.TemporalLayerForFrame(2)); - EXPECT_EQ(2u, config.TemporalLayerForFrame(3)); - EXPECT_EQ(0u, config.TemporalLayerForFrame(4)); - EXPECT_EQ(2u, config.TemporalLayerForFrame(5)); - EXPECT_EQ(1u, config.TemporalLayerForFrame(6)); - EXPECT_EQ(2u, config.TemporalLayerForFrame(7)); -} - TEST(TestConfig, ForcedKeyFrameIntervalOff) { TestConfig config; config.keyframe_interval = 0; diff --git a/modules/video_coding/codecs/test/videoprocessor.cc b/modules/video_coding/codecs/test/videoprocessor.cc index 7e1047f458..6c06087820 100644 --- a/modules/video_coding/codecs/test/videoprocessor.cc +++ b/modules/video_coding/codecs/test/videoprocessor.cc @@ -24,6 +24,7 @@ #include "rtc_base/checks.h" #include "rtc_base/timeutils.h" #include "test/gtest.h" +#include "third_party/libyuv/include/libyuv/scale.h" namespace webrtc { namespace test { @@ -96,57 +97,70 @@ void ExtractBufferWithSize(const VideoFrame& image, } // namespace VideoProcessor::VideoProcessor(webrtc::VideoEncoder* encoder, - webrtc::VideoDecoder* decoder, - FrameReader* analysis_frame_reader, + VideoDecoderList* decoders, + FrameReader* input_frame_reader, const TestConfig& config, - Stats* stats, - IvfFileWriter* encoded_frame_writer, - FrameWriter* decoded_frame_writer) + std::vector* stats, + IvfFileWriterList* encoded_frame_writers, + FrameWriterList* decoded_frame_writers) : config_(config), + num_simulcast_or_spatial_layers_( + std::max(config_.NumberOfSimulcastStreams(), + config_.NumberOfSpatialLayers())), encoder_(encoder), - decoder_(decoder), + decoders_(decoders), bitrate_allocator_(CreateBitrateAllocator(&config_)), encode_callback_(this), decode_callback_(this), - analysis_frame_reader_(analysis_frame_reader), - encoded_frame_writer_(encoded_frame_writer), - decoded_frame_writer_(decoded_frame_writer), + input_frame_reader_(input_frame_reader), + encoded_frame_writers_(encoded_frame_writers), + decoded_frame_writers_(decoded_frame_writers), last_inputed_frame_num_(0), last_encoded_frame_num_(0), + last_encoded_simulcast_svc_idx_(0), last_decoded_frame_num_(0), num_encoded_frames_(0), num_decoded_frames_(0), - last_decoded_frame_buffer_(analysis_frame_reader->FrameLength()), stats_(stats) { - RTC_DCHECK(encoder); - RTC_DCHECK(decoder); - RTC_DCHECK(analysis_frame_reader); - RTC_DCHECK(stats); + RTC_CHECK(encoder); + RTC_CHECK(decoders && decoders->size() == num_simulcast_or_spatial_layers_); + RTC_CHECK(input_frame_reader); + RTC_CHECK(stats); + RTC_CHECK(!encoded_frame_writers || + encoded_frame_writers->size() == num_simulcast_or_spatial_layers_); + RTC_CHECK(!decoded_frame_writers || + decoded_frame_writers->size() == num_simulcast_or_spatial_layers_); - // Setup required callbacks for the encoder and decoder. + // Setup required callbacks for the encoder and decoder and initialize them. RTC_CHECK_EQ(encoder_->RegisterEncodeCompleteCallback(&encode_callback_), WEBRTC_VIDEO_CODEC_OK); - RTC_CHECK_EQ(decoder_->RegisterDecodeCompleteCallback(&decode_callback_), - WEBRTC_VIDEO_CODEC_OK); - // Initialize the encoder and decoder. RTC_CHECK_EQ(encoder_->InitEncode(&config_.codec_settings, static_cast(config_.NumberOfCores()), config_.max_payload_size_bytes), WEBRTC_VIDEO_CODEC_OK); - RTC_CHECK_EQ(decoder_->InitDecode(&config_.codec_settings, - static_cast(config_.NumberOfCores())), - WEBRTC_VIDEO_CODEC_OK); + + for (auto& decoder : *decoders_) { + RTC_CHECK_EQ(decoder->InitDecode(&config_.codec_settings, + static_cast(config_.NumberOfCores())), + WEBRTC_VIDEO_CODEC_OK); + RTC_CHECK_EQ(decoder->RegisterDecodeCompleteCallback(&decode_callback_), + WEBRTC_VIDEO_CODEC_OK); + } } VideoProcessor::~VideoProcessor() { RTC_DCHECK_CALLED_SEQUENTIALLY(&sequence_checker_); RTC_CHECK_EQ(encoder_->Release(), WEBRTC_VIDEO_CODEC_OK); - RTC_CHECK_EQ(decoder_->Release(), WEBRTC_VIDEO_CODEC_OK); - encoder_->RegisterEncodeCompleteCallback(nullptr); - decoder_->RegisterDecodeCompleteCallback(nullptr); + + for (auto& decoder : *decoders_) { + RTC_CHECK_EQ(decoder->Release(), WEBRTC_VIDEO_CODEC_OK); + decoder->RegisterDecodeCompleteCallback(nullptr); + } + + RTC_CHECK(last_encoded_frames_.empty()); } void VideoProcessor::ProcessFrame() { @@ -155,7 +169,7 @@ void VideoProcessor::ProcessFrame() { // Get frame from file. rtc::scoped_refptr buffer( - analysis_frame_reader_->ReadFrame()); + input_frame_reader_->ReadFrame()); RTC_CHECK(buffer) << "Tried to read too many frames from the file."; size_t rtp_timestamp = @@ -170,14 +184,69 @@ void VideoProcessor::ProcessFrame() { std::vector frame_types = config_.FrameTypeForFrame(frame_number); - // Create frame statistics object used for aggregation at end of test run. - FrameStatistic* frame_stat = stats_->AddFrame(rtp_timestamp); + // Create frame statistics object for all simulcast /spatial layers. + for (size_t simulcast_svc_idx = 0; + simulcast_svc_idx < num_simulcast_or_spatial_layers_; + ++simulcast_svc_idx) { + stats_->at(simulcast_svc_idx).AddFrame(rtp_timestamp); + } // For the highest measurement accuracy of the encode time, the start/stop // time recordings should wrap the Encode call as tightly as possible. - frame_stat->encode_start_ns = rtc::TimeNanos(); - frame_stat->encode_return_code = + const int64_t encode_start_ns = rtc::TimeNanos(); + for (size_t simulcast_svc_idx = 0; + simulcast_svc_idx < num_simulcast_or_spatial_layers_; + ++simulcast_svc_idx) { + FrameStatistic* frame_stat = + stats_->at(simulcast_svc_idx).GetFrame(frame_number); + frame_stat->encode_start_ns = encode_start_ns; + } + + const int encode_return_code = encoder_->Encode(*input_frames_[frame_number], nullptr, &frame_types); + + for (size_t simulcast_svc_idx = 0; + simulcast_svc_idx < num_simulcast_or_spatial_layers_; + ++simulcast_svc_idx) { + FrameStatistic* frame_stat = + stats_->at(simulcast_svc_idx).GetFrame(frame_number); + frame_stat->encode_return_code = encode_return_code; + } + + // For async codecs frame decoding is done in frame encode callback. + if (!config_.IsAsyncCodec()) { + for (size_t simulcast_svc_idx = 0; + simulcast_svc_idx < num_simulcast_or_spatial_layers_; + ++simulcast_svc_idx) { + if (last_encoded_frames_.find(simulcast_svc_idx) != + last_encoded_frames_.end()) { + EncodedImage& encoded_image = last_encoded_frames_[simulcast_svc_idx]; + + FrameStatistic* frame_stat = + stats_->at(simulcast_svc_idx).GetFrame(frame_number); + + if (encoded_frame_writers_) { + RTC_CHECK(encoded_frame_writers_->at(simulcast_svc_idx) + ->WriteFrame(encoded_image, + config_.codec_settings.codecType)); + } + + // For the highest measurement accuracy of the decode time, the + // start/stop time recordings should wrap the Decode call as tightly as + // possible. + frame_stat->decode_start_ns = rtc::TimeNanos(); + frame_stat->decode_return_code = + decoders_->at(simulcast_svc_idx) + ->Decode(encoded_image, false, nullptr); + + RTC_CHECK(encoded_image._buffer); + delete[] encoded_image._buffer; + encoded_image._buffer = nullptr; + + last_encoded_frames_.erase(simulcast_svc_idx); + } + } + } } void VideoProcessor::SetRates(size_t bitrate_kbps, size_t framerate_fps) { @@ -192,49 +261,91 @@ void VideoProcessor::SetRates(size_t bitrate_kbps, size_t framerate_fps) { << "Failed to update encoder with new rate " << bitrate_kbps << "."; } -void VideoProcessor::FrameEncoded(webrtc::VideoCodecType codec, - const EncodedImage& encoded_image) { +void VideoProcessor::FrameEncoded( + const webrtc::EncodedImage& encoded_image, + const webrtc::CodecSpecificInfo& codec_specific) { RTC_DCHECK_CALLED_SEQUENTIALLY(&sequence_checker_); // For the highest measurement accuracy of the encode time, the start/stop // time recordings should wrap the Encode call as tightly as possible. int64_t encode_stop_ns = rtc::TimeNanos(); + const VideoCodecType codec = codec_specific.codecType; if (config_.encoded_frame_checker) { config_.encoded_frame_checker->CheckEncodedFrame(codec, encoded_image); } - FrameStatistic* frame_stat = - stats_->GetFrameWithTimestamp(encoded_image._timeStamp); + size_t simulcast_svc_idx = 0; + size_t temporal_idx = 0; - // Ensure strict monotonicity. + if (codec == kVideoCodecVP8) { + simulcast_svc_idx = codec_specific.codecSpecific.VP8.simulcastIdx; + temporal_idx = codec_specific.codecSpecific.VP8.temporalIdx; + } else if (codec == kVideoCodecVP9) { + simulcast_svc_idx = codec_specific.codecSpecific.VP9.spatial_idx; + temporal_idx = codec_specific.codecSpecific.VP9.temporal_idx; + } + + if (simulcast_svc_idx == kNoSpatialIdx) { + simulcast_svc_idx = 0; + } + + if (temporal_idx == kNoTemporalIdx) { + temporal_idx = 0; + } + + const size_t frame_wxh = + encoded_image._encodedWidth * encoded_image._encodedHeight; + frame_wxh_to_simulcast_svc_idx_[frame_wxh] = simulcast_svc_idx; + + FrameStatistic* frame_stat = + stats_->at(simulcast_svc_idx) + .GetFrameWithTimestamp(encoded_image._timeStamp); const size_t frame_number = frame_stat->frame_number; - if (num_encoded_frames_ > 0) { - RTC_CHECK_GT(frame_number, last_encoded_frame_num_); + + // Reordering is unexpected. Frames of different layers have the same value + // of frame_number. VP8 multi-res delivers frames starting from hires layer. + RTC_CHECK_GE(frame_number, last_encoded_frame_num_); + + // Ensure SVC spatial layers are delivered in ascending order. + if (config_.NumberOfSpatialLayers() > 1) { + RTC_CHECK(simulcast_svc_idx > last_encoded_simulcast_svc_idx_ || + frame_number != last_encoded_frame_num_ || + num_encoded_frames_ == 0); } last_encoded_frame_num_ = frame_number; + last_encoded_simulcast_svc_idx_ = simulcast_svc_idx; // Update frame statistics. + frame_stat->encoding_successful = true; frame_stat->encode_time_us = GetElapsedTimeMicroseconds(frame_stat->encode_start_ns, encode_stop_ns); - frame_stat->encoding_successful = true; + + // TODO(ssilkin): Implement bitrate allocation for VP9 SVC. For now set + // target for base layers equal to total target to avoid devision by zero + // at analysis. + frame_stat->target_bitrate_kbps = + bitrate_allocation_.GetSpatialLayerSum( + codec == kVideoCodecVP9 ? 0 : simulcast_svc_idx) / + 1000; frame_stat->encoded_frame_size_bytes = encoded_image._length; frame_stat->frame_type = encoded_image._frameType; - frame_stat->temporal_layer_idx = config_.TemporalLayerForFrame(frame_number); - frame_stat->qp = encoded_image.qp_; - frame_stat->target_bitrate_kbps = - bitrate_allocation_.GetSpatialLayerSum(0) / 1000; + frame_stat->temporal_layer_idx = temporal_idx; + frame_stat->simulcast_svc_idx = simulcast_svc_idx; frame_stat->max_nalu_size_bytes = GetMaxNaluSizeBytes(encoded_image, config_); + frame_stat->qp = encoded_image.qp_; - // For the highest measurement accuracy of the decode time, the start/stop - // time recordings should wrap the Decode call as tightly as possible. - frame_stat->decode_start_ns = rtc::TimeNanos(); - frame_stat->decode_return_code = - decoder_->Decode(encoded_image, false, nullptr); - - if (encoded_frame_writer_) { - RTC_CHECK(encoded_frame_writer_->WriteFrame(encoded_image, codec)); + if (!config_.IsAsyncCodec()) { + // Store encoded frame. It will be decoded after all layers are encoded. + CopyEncodedImage(encoded_image, codec, frame_number, simulcast_svc_idx); + } else { + const size_t simulcast_idx = + codec == kVideoCodecVP8 ? codec_specific.codecSpecific.VP8.simulcastIdx + : 0; + frame_stat->decode_start_ns = rtc::TimeNanos(); + frame_stat->decode_return_code = + decoders_->at(simulcast_idx)->Decode(encoded_image, false, nullptr); } ++num_encoded_frames_; @@ -247,41 +358,49 @@ void VideoProcessor::FrameDecoded(const VideoFrame& decoded_frame) { // time recordings should wrap the Decode call as tightly as possible. int64_t decode_stop_ns = rtc::TimeNanos(); - // Update frame statistics. + RTC_CHECK(frame_wxh_to_simulcast_svc_idx_.find(decoded_frame.size()) != + frame_wxh_to_simulcast_svc_idx_.end()); + const size_t simulcast_svc_idx = + frame_wxh_to_simulcast_svc_idx_[decoded_frame.size()]; + FrameStatistic* frame_stat = - stats_->GetFrameWithTimestamp(decoded_frame.timestamp()); - frame_stat->decoded_width = decoded_frame.width(); - frame_stat->decoded_height = decoded_frame.height(); - frame_stat->decode_time_us = - GetElapsedTimeMicroseconds(frame_stat->decode_start_ns, decode_stop_ns); - frame_stat->decoding_successful = true; - - // Ensure strict monotonicity. + stats_->at(simulcast_svc_idx) + .GetFrameWithTimestamp(decoded_frame.timestamp()); const size_t frame_number = frame_stat->frame_number; - if (num_decoded_frames_ > 0) { - RTC_CHECK_GT(frame_number, last_decoded_frame_num_); - } - // Check if the codecs have resized the frame since previously decoded frame. - if (frame_number > 0) { - if (decoded_frame_writer_ && num_decoded_frames_ > 0) { - // For dropped/lost frames, write out the last decoded frame to make it - // look like a freeze at playback. - const size_t num_dropped_frames = - frame_number - last_decoded_frame_num_ - 1; - for (size_t i = 0; i < num_dropped_frames; i++) { - WriteDecodedFrameToFile(&last_decoded_frame_buffer_); + // Reordering is unexpected. Frames of different layers have the same value + // of frame_number. + RTC_CHECK_GE(frame_number, last_decoded_frame_num_); + + if (decoded_frame_writers_ && num_decoded_frames_ > 0) { + // For dropped frames, write out the last decoded frame to make it look like + // a freeze at playback. + for (size_t num_dropped_frames = 0; num_dropped_frames < frame_number; + ++num_dropped_frames) { + const FrameStatistic* prev_frame_stat = + stats_->at(simulcast_svc_idx) + .GetFrame(frame_number - num_dropped_frames - 1); + if (prev_frame_stat->decoding_successful) { + break; } + WriteDecodedFrameToFile(&last_decoded_frame_buffers_[simulcast_svc_idx], + simulcast_svc_idx); } } + last_decoded_frame_num_ = frame_number; + // Update frame statistics. + frame_stat->decoding_successful = true; + frame_stat->decode_time_us = + GetElapsedTimeMicroseconds(frame_stat->decode_start_ns, decode_stop_ns); + frame_stat->decoded_width = decoded_frame.width(); + frame_stat->decoded_height = decoded_frame.height(); + // Skip quality metrics calculation to not affect CPU usage. if (!config_.measure_cpu) { - frame_stat->psnr = - I420PSNR(input_frames_[frame_number].get(), &decoded_frame); - frame_stat->ssim = - I420SSIM(input_frames_[frame_number].get(), &decoded_frame); + CalculateFrameQuality(*input_frames_[frame_number], decoded_frame, + frame_stat); } // Delay erasing of input frames by one frame. The current frame might @@ -291,19 +410,96 @@ void VideoProcessor::FrameDecoded(const VideoFrame& decoded_frame) { input_frames_.erase(input_frames_.begin(), input_frame_erase_to); } - if (decoded_frame_writer_) { + if (decoded_frame_writers_) { ExtractBufferWithSize(decoded_frame, config_.codec_settings.width, config_.codec_settings.height, - &last_decoded_frame_buffer_); - WriteDecodedFrameToFile(&last_decoded_frame_buffer_); + &last_decoded_frame_buffers_[simulcast_svc_idx]); + WriteDecodedFrameToFile(&last_decoded_frame_buffers_[simulcast_svc_idx], + simulcast_svc_idx); } ++num_decoded_frames_; } -void VideoProcessor::WriteDecodedFrameToFile(rtc::Buffer* buffer) { - RTC_DCHECK_EQ(buffer->size(), decoded_frame_writer_->FrameLength()); - RTC_CHECK(decoded_frame_writer_->WriteFrame(buffer->data())); +void VideoProcessor::CopyEncodedImage(const EncodedImage& encoded_image, + const VideoCodecType codec, + size_t frame_number, + size_t simulcast_svc_idx) { + RTC_DCHECK_CALLED_SEQUENTIALLY(&sequence_checker_); + + EncodedImage base_image; + RTC_CHECK_EQ(base_image._length, 0); + + // Each SVC layer is decoded with dedicated decoder. Add data of base layers + // to current coded frame buffer. + if (config_.NumberOfSpatialLayers() > 1 && simulcast_svc_idx > 0) { + RTC_CHECK(last_encoded_frames_.find(simulcast_svc_idx - 1) != + last_encoded_frames_.end()); + base_image = last_encoded_frames_[simulcast_svc_idx - 1]; + } + + const size_t payload_size_bytes = base_image._length + encoded_image._length; + const size_t buffer_size_bytes = + payload_size_bytes + EncodedImage::GetBufferPaddingBytes(codec); + + uint8_t* copied_buffer = new uint8_t[buffer_size_bytes]; + RTC_CHECK(copied_buffer); + + if (base_image._length) { + memcpy(copied_buffer, base_image._buffer, base_image._length); + } + + memcpy(copied_buffer + base_image._length, encoded_image._buffer, + encoded_image._length); + + EncodedImage copied_image = encoded_image; + copied_image = encoded_image; + copied_image._buffer = copied_buffer; + copied_image._length = payload_size_bytes; + copied_image._size = buffer_size_bytes; + + last_encoded_frames_[simulcast_svc_idx] = copied_image; +} + +void VideoProcessor::CalculateFrameQuality(const VideoFrame& ref_frame, + const VideoFrame& dec_frame, + FrameStatistic* frame_stat) { + if (ref_frame.width() == dec_frame.width() || + ref_frame.height() == dec_frame.height()) { + frame_stat->psnr = I420PSNR(&ref_frame, &dec_frame); + frame_stat->ssim = I420SSIM(&ref_frame, &dec_frame); + } else { + RTC_CHECK_GE(ref_frame.width(), dec_frame.width()); + RTC_CHECK_GE(ref_frame.height(), dec_frame.height()); + // Downscale reference frame. Use bilinear interpolation since it is used + // to get lowres inputs for encoder at simulcasting. + // TODO(ssilkin): Sync with VP9 SVC which uses 8-taps polyphase. + rtc::scoped_refptr scaled_buffer = + I420Buffer::Create(dec_frame.width(), dec_frame.height()); + const I420BufferInterface& ref_buffer = + *ref_frame.video_frame_buffer()->ToI420(); + I420Scale(ref_buffer.DataY(), ref_buffer.StrideY(), ref_buffer.DataU(), + ref_buffer.StrideU(), ref_buffer.DataV(), ref_buffer.StrideV(), + ref_buffer.width(), ref_buffer.height(), + scaled_buffer->MutableDataY(), scaled_buffer->StrideY(), + scaled_buffer->MutableDataU(), scaled_buffer->StrideU(), + scaled_buffer->MutableDataV(), scaled_buffer->StrideV(), + scaled_buffer->width(), scaled_buffer->height(), + libyuv::kFilterBilinear); + frame_stat->psnr = + I420PSNR(*scaled_buffer, *dec_frame.video_frame_buffer()->ToI420()); + frame_stat->ssim = + I420SSIM(*scaled_buffer, *dec_frame.video_frame_buffer()->ToI420()); + } +} + +void VideoProcessor::WriteDecodedFrameToFile(rtc::Buffer* buffer, + size_t simulcast_svc_idx) { + RTC_CHECK(simulcast_svc_idx < decoded_frame_writers_->size()); + RTC_DCHECK_EQ(buffer->size(), + decoded_frame_writers_->at(simulcast_svc_idx)->FrameLength()); + RTC_CHECK(decoded_frame_writers_->at(simulcast_svc_idx) + ->WriteFrame(buffer->data())); } } // namespace test diff --git a/modules/video_coding/codecs/test/videoprocessor.h b/modules/video_coding/codecs/test/videoprocessor.h index 5467d41225..23efb8998e 100644 --- a/modules/video_coding/codecs/test/videoprocessor.h +++ b/modules/video_coding/codecs/test/videoprocessor.h @@ -48,19 +48,23 @@ namespace test { // Note this class is not thread safe and is meant for simple testing purposes. class VideoProcessor { public: + using VideoDecoderList = std::vector>; + using IvfFileWriterList = std::vector>; + using FrameWriterList = std::vector>; + VideoProcessor(webrtc::VideoEncoder* encoder, - webrtc::VideoDecoder* decoder, - FrameReader* analysis_frame_reader, + VideoDecoderList* decoders, + FrameReader* input_frame_reader, const TestConfig& config, - Stats* stats, - IvfFileWriter* encoded_frame_writer, - FrameWriter* decoded_frame_writer); + std::vector* stats, + IvfFileWriterList* encoded_frame_writers, + FrameWriterList* decoded_frame_writers); ~VideoProcessor(); - // Reads a frame from the analysis frame reader and sends it to the encoder. - // When the encode callback is received, the encoded frame is sent to the - // decoder. The decoded frame is written to disk by the analysis frame writer. - // Objective video quality metrics can thus be calculated after the fact. + // Reads a frame and sends it to the encoder. When the encode callback + // is received, the encoded frame is buffered. After encoding is finished + // buffered frame is sent to decoder. Quality evaluation is done in + // the decode callback. void ProcessFrame(); // Updates the encoder with target rates. Must be called at least once. @@ -88,8 +92,7 @@ class VideoProcessor { return Result(Result::OK, 0); } - video_processor_->FrameEncoded(codec_specific_info->codecType, - encoded_image); + video_processor_->FrameEncoded(encoded_image, *codec_specific_info); return Result(Result::OK, 0); } @@ -107,8 +110,7 @@ class VideoProcessor { } bool Run() override { - video_processor_->FrameEncoded(codec_specific_info_.codecType, - encoded_image_); + video_processor_->FrameEncoded(encoded_image_, codec_specific_info_); return true; } @@ -158,18 +160,29 @@ class VideoProcessor { }; // Invoked by the callback adapter when a frame has completed encoding. - void FrameEncoded(webrtc::VideoCodecType codec, - const webrtc::EncodedImage& encodedImage); + void FrameEncoded(const webrtc::EncodedImage& encoded_image, + const webrtc::CodecSpecificInfo& codec_specific); // Invoked by the callback adapter when a frame has completed decoding. void FrameDecoded(const webrtc::VideoFrame& image); - void WriteDecodedFrameToFile(rtc::Buffer* buffer); + void CopyEncodedImage(const EncodedImage& encoded_image, + const VideoCodecType codec, + size_t frame_number, + size_t simulcast_svc_idx); + + void CalculateFrameQuality(const VideoFrame& ref_frame, + const VideoFrame& dec_frame, + FrameStatistic* frame_stat); + + void WriteDecodedFrameToFile(rtc::Buffer* buffer, size_t simulcast_svc_idx); TestConfig config_ RTC_GUARDED_BY(sequence_checker_); + const size_t num_simulcast_or_spatial_layers_; + webrtc::VideoEncoder* const encoder_; - webrtc::VideoDecoder* const decoder_; + VideoDecoderList* const decoders_; const std::unique_ptr bitrate_allocator_; BitrateAllocation bitrate_allocation_ RTC_GUARDED_BY(sequence_checker_); @@ -184,30 +197,41 @@ class VideoProcessor { std::map> input_frames_ RTC_GUARDED_BY(sequence_checker_); - // These (mandatory) file manipulators are used for, e.g., objective PSNR and - // SSIM calculations at the end of a test run. - FrameReader* const analysis_frame_reader_; + FrameReader* const input_frame_reader_; // These (optional) file writers are used to persistently store the encoded // and decoded bitstreams. The purpose is to give the experimenter an option // to subjectively evaluate the quality of the processing. Each frame writer // is enabled by being non-null. - IvfFileWriter* const encoded_frame_writer_; - FrameWriter* const decoded_frame_writer_; + IvfFileWriterList* const encoded_frame_writers_; + FrameWriterList* const decoded_frame_writers_; // Keep track of inputed/encoded/decoded frames, so we can detect frame drops. size_t last_inputed_frame_num_ RTC_GUARDED_BY(sequence_checker_); size_t last_encoded_frame_num_ RTC_GUARDED_BY(sequence_checker_); + size_t last_encoded_simulcast_svc_idx_ RTC_GUARDED_BY(sequence_checker_); size_t last_decoded_frame_num_ RTC_GUARDED_BY(sequence_checker_); size_t num_encoded_frames_ RTC_GUARDED_BY(sequence_checker_); size_t num_decoded_frames_ RTC_GUARDED_BY(sequence_checker_); + // Map of frame size (in pixels) to simulcast/spatial layer index. + std::map frame_wxh_to_simulcast_svc_idx_ + RTC_GUARDED_BY(sequence_checker_); + + // Encoder delivers coded frame layer-by-layer. We store coded frames and + // then, after all layers are encoded, decode them. Such separation of + // frame processing on superframe level simplifies encoding/decoding time + // measurement. + std::map last_encoded_frames_ + RTC_GUARDED_BY(sequence_checker_); + // Keep track of the last successfully decoded frame, since we write that - // frame to disk when decoding fails. - rtc::Buffer last_decoded_frame_buffer_ RTC_GUARDED_BY(sequence_checker_); + // frame to disk when frame got dropped or decoding fails. + std::map last_decoded_frame_buffers_ + RTC_GUARDED_BY(sequence_checker_); // Statistics. - Stats* stats_; + std::vector* const stats_; rtc::SequencedTaskChecker sequence_checker_; diff --git a/modules/video_coding/codecs/test/videoprocessor_integrationtest.cc b/modules/video_coding/codecs/test/videoprocessor_integrationtest.cc index 3673618cb2..ad59913165 100644 --- a/modules/video_coding/codecs/test/videoprocessor_integrationtest.cc +++ b/modules/video_coding/codecs/test/videoprocessor_integrationtest.cc @@ -351,16 +351,11 @@ std::vector VideoProcessorIntegrationTest::ExtractLayerStats( } } + // Target bitrate of extracted interval is bitrate of the highest + // spatial and temporal layer. target_bitrate_kbps = std::max(target_bitrate_kbps, superframe_stat.target_bitrate_kbps); - if (superframe_stat.encoding_successful) { - RTC_CHECK(superframe_stat.target_bitrate_kbps <= target_bitrate_kbps || - tl_idx == target_temporal_layer_number); - RTC_CHECK(superframe_stat.target_bitrate_kbps == target_bitrate_kbps || - tl_idx < target_temporal_layer_number); - } - layer_stats.push_back(superframe_stat); } } @@ -425,8 +420,14 @@ void VideoProcessorIntegrationTest::CreateEncoderAndDecoder() { const SdpVideoFormat format = CreateSdpVideoFormat(config_); encoder_ = encoder_factory->CreateVideoEncoder(format); - decoders_.push_back(std::unique_ptr( - decoder_factory->CreateVideoDecoder(format))); + + const size_t num_simulcast_or_spatial_layers = std::max( + config_.NumberOfSimulcastStreams(), config_.NumberOfSpatialLayers()); + + for (size_t i = 0; i < num_simulcast_or_spatial_layers; ++i) { + decoders_.push_back(std::unique_ptr( + decoder_factory->CreateVideoDecoder(format))); + } if (config_.sw_fallback_encoder) { encoder_ = rtc::MakeUnique( @@ -506,12 +507,10 @@ void VideoProcessorIntegrationTest::SetUpAndInitObjects( rtc::Event sync_event(false, false); task_queue->PostTask([this, &sync_event]() { processor_ = rtc::MakeUnique( - encoder_.get(), decoders_.at(0).get(), source_frame_reader_.get(), - config_, &stats_.at(0), - encoded_frame_writers_.empty() ? nullptr - : encoded_frame_writers_.at(0).get(), - decoded_frame_writers_.empty() ? nullptr - : decoded_frame_writers_.at(0).get()); + encoder_.get(), &decoders_, source_frame_reader_.get(), config_, + &stats_, + encoded_frame_writers_.empty() ? nullptr : &encoded_frame_writers_, + decoded_frame_writers_.empty() ? nullptr : &decoded_frame_writers_); sync_event.Set(); }); sync_event.Wait(rtc::Event::kForever); @@ -673,16 +672,24 @@ void VideoProcessorIntegrationTest::AnalyzeAndPrintStats( const float max_delta_frame_delay_sec = 8 * delta_frame_size_bytes.Max() / 1000 / target_bitrate_kbps; + printf("Frame width : %zu\n", + last_successfully_decoded_frame.decoded_width); + printf("Frame height : %zu\n", + last_successfully_decoded_frame.decoded_height); printf("Target bitrate : %f kbps\n", target_bitrate_kbps); printf("Encoded bitrate : %f kbps\n", encoded_bitrate_kbps); printf("Bitrate mismatch : %f %%\n", bitrate_mismatch_percent); printf("Time to reach target bitrate : %f sec\n", time_to_reach_target_bitrate_sec); printf("Target framerate : %f fps\n", target_framerate_fps); - printf("Encoding framerate : %f fps\n", encoded_framerate_fps); - printf("Decoding framerate : %f fps\n", decoded_framerate_fps); + printf("Encoded framerate : %f fps\n", encoded_framerate_fps); + printf("Decoded framerate : %f fps\n", decoded_framerate_fps); printf("Frame encoding time : %f us\n", encoding_time_us.Mean()); printf("Frame decoding time : %f us\n", decoding_time_us.Mean()); + printf("Encoding framerate : %f fps\n", + 1000000 / encoding_time_us.Mean()); + printf("Decoding framerate : %f fps\n", + 1000000 / decoding_time_us.Mean()); printf("Framerate mismatch percent : %f %%\n", framerate_mismatch_percent); printf("Avg buffer level : %f sec\n", buffer_level_sec.Mean()); diff --git a/modules/video_coding/codecs/test/videoprocessor_integrationtest_libvpx.cc b/modules/video_coding/codecs/test/videoprocessor_integrationtest_libvpx.cc index a2ade2d761..54b6f6cda5 100644 --- a/modules/video_coding/codecs/test/videoprocessor_integrationtest_libvpx.cc +++ b/modules/video_coding/codecs/test/videoprocessor_integrationtest_libvpx.cc @@ -209,10 +209,9 @@ TEST_F(VideoProcessorIntegrationTestLibvpx, HighBitrateVP8) { // Too slow to finish before timeout on iOS. See webrtc:4755. #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) -#define MAYBE_ProcessNoLossChangeBitRateVP8 \ - DISABLED_ProcessNoLossChangeBitRateVP8 +#define MAYBE_ChangeBitrateVP8 DISABLED_ChangeBitrateVP8 #else -#define MAYBE_ProcessNoLossChangeBitRateVP8 ProcessNoLossChangeBitRateVP8 +#define MAYBE_ChangeBitrateVP8 ChangeBitrateVP8 #endif TEST_F(VideoProcessorIntegrationTestLibvpx, MAYBE_ChangeBitrateVP8) { config_.SetCodecSettings(kVideoCodecVP8, 1, 1, 1, false, true, true, false, @@ -242,11 +241,9 @@ TEST_F(VideoProcessorIntegrationTestLibvpx, MAYBE_ChangeBitrateVP8) { // Too slow to finish before timeout on iOS. See webrtc:4755. #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) -#define MAYBE_ProcessNoLossChangeFrameRateFrameDropVP8 \ - DISABLED_ProcessNoLossChangeFrameRateFrameDropVP8 +#define MAYBE_ChangeFramerateVP8 DISABLED_ChangeFramerateVP8 #else -#define MAYBE_ProcessNoLossChangeFrameRateFrameDropVP8 \ - ProcessNoLossChangeFrameRateFrameDropVP8 +#define MAYBE_ChangeFramerateVP8 ChangeFramerateVP8 #endif TEST_F(VideoProcessorIntegrationTestLibvpx, MAYBE_ChangeFramerateVP8) { config_.SetCodecSettings(kVideoCodecVP8, 1, 1, 1, false, true, true, false, @@ -282,10 +279,9 @@ TEST_F(VideoProcessorIntegrationTestLibvpx, MAYBE_ChangeFramerateVP8) { // Too slow to finish before timeout on iOS. See webrtc:4755. #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) -#define MAYBE_ProcessNoLossTemporalLayersVP8 \ - DISABLED_ProcessNoLossTemporalLayersVP8 +#define MAYBE_TemporalLayersVP8 DISABLED_TemporalLayersVP8 #else -#define MAYBE_ProcessNoLossTemporalLayersVP8 ProcessNoLossTemporalLayersVP8 +#define MAYBE_TemporalLayersVP8 TemporalLayersVP8 #endif TEST_F(VideoProcessorIntegrationTestLibvpx, MAYBE_TemporalLayersVP8) { config_.SetCodecSettings(kVideoCodecVP8, 1, 1, 3, false, true, true, false, @@ -314,5 +310,53 @@ TEST_F(VideoProcessorIntegrationTestLibvpx, MAYBE_TemporalLayersVP8) { kNoVisualizationParams); } +// Might be too slow on mobile platforms. +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) +#define MAYBE_SimulcastVP8 DISABLED_SimulcastVP8 +#else +#define MAYBE_SimulcastVP8 SimulcastVP8 +#endif +TEST_F(VideoProcessorIntegrationTestLibvpx, MAYBE_SimulcastVP8) { + config_.filename = "ConferenceMotion_1280_720_50"; + config_.input_filename = ResourcePath(config_.filename, "yuv"); + config_.num_frames = 100; + config_.SetCodecSettings(kVideoCodecVP8, 3, 1, 3, false, true, true, false, + kResilienceOn, 1280, 720); + + std::vector rate_profiles = {{1500, 30, config_.num_frames}}; + + std::vector rc_thresholds = { + {5, 1, 5, 0.2, 0.3, 0.1, 0, 1}}; + std::vector quality_thresholds = {{34, 32, 0.90, 0.88}}; + + ProcessFramesAndMaybeVerify(rate_profiles, &rc_thresholds, + &quality_thresholds, nullptr, + kNoVisualizationParams); +} + +// Might be too slow on mobile platforms. +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) +#define MAYBE_SvcVP9 DISABLED_SvcVP9 +#else +#define MAYBE_SvcVP9 SvcVP9 +#endif +TEST_F(VideoProcessorIntegrationTestLibvpx, MAYBE_SvcVP9) { + config_.filename = "ConferenceMotion_1280_720_50"; + config_.input_filename = ResourcePath(config_.filename, "yuv"); + config_.num_frames = 100; + config_.SetCodecSettings(kVideoCodecVP9, 1, 3, 3, false, true, true, false, + kResilienceOn, 1280, 720); + + std::vector rate_profiles = {{1500, 30, config_.num_frames}}; + + std::vector rc_thresholds = { + {5, 1, 5, 0.2, 0.3, 0.1, 0, 1}}; + std::vector quality_thresholds = {{36, 34, 0.93, 0.91}}; + + ProcessFramesAndMaybeVerify(rate_profiles, &rc_thresholds, + &quality_thresholds, nullptr, + kNoVisualizationParams); +} + } // namespace test } // namespace webrtc diff --git a/modules/video_coding/codecs/test/videoprocessor_integrationtest_parameterized.cc b/modules/video_coding/codecs/test/videoprocessor_integrationtest_parameterized.cc index e58ebe5408..0cd6916bcb 100644 --- a/modules/video_coding/codecs/test/videoprocessor_integrationtest_parameterized.cc +++ b/modules/video_coding/codecs/test/videoprocessor_integrationtest_parameterized.cc @@ -23,8 +23,9 @@ const VideoCodecType kVideoCodecType[] = {kVideoCodecVP8}; const bool kHwCodec[] = {false}; // Codec settings. +const int kNumSpatialLayers = 1; const int kNumTemporalLayers = 1; -const bool kResilienceOn = kNumTemporalLayers > 1; +const bool kResilienceOn = kNumSpatialLayers > 1 || kNumTemporalLayers > 1; const bool kDenoisingOn = false; const bool kErrorConcealmentOn = false; const bool kSpatialResizeOn = false; @@ -67,7 +68,14 @@ class VideoProcessorIntegrationTestParameterized config_.hw_encoder = hw_codec_; config_.hw_decoder = hw_codec_; config_.num_frames = kNumFrames; - config_.SetCodecSettings(codec_type_, 1, 1, kNumTemporalLayers, + + const size_t num_simulcast_streams = + codec_type_ == kVideoCodecVP8 ? kNumSpatialLayers : 1; + const size_t num_spatial_layers = + codec_type_ == kVideoCodecVP9 ? kNumSpatialLayers : 1; + + config_.SetCodecSettings(codec_type_, num_simulcast_streams, + num_spatial_layers, kNumTemporalLayers, kErrorConcealmentOn, kDenoisingOn, kFrameDropperOn, kSpatialResizeOn, kResilienceOn, width, height); diff --git a/modules/video_coding/codecs/test/videoprocessor_unittest.cc b/modules/video_coding/codecs/test/videoprocessor_unittest.cc index 6f3373366d..f5dcdc8ce0 100644 --- a/modules/video_coding/codecs/test/videoprocessor_unittest.cc +++ b/modules/video_coding/codecs/test/videoprocessor_unittest.cc @@ -45,34 +45,40 @@ class VideoProcessorTest : public testing::Test { config_.codec_settings.width = kWidth; config_.codec_settings.height = kHeight; + stats_.resize(1); + + decoder_mock_ = new MockVideoDecoder(); + decoders_.push_back(std::unique_ptr(decoder_mock_)); + ExpectInit(); EXPECT_CALL(frame_reader_mock_, FrameLength()) .WillRepeatedly(Return(kFrameSize)); video_processor_ = rtc::MakeUnique( - &encoder_mock_, &decoder_mock_, &frame_reader_mock_, config_, &stats_, + &encoder_mock_, &decoders_, &frame_reader_mock_, config_, &stats_, nullptr /* encoded_frame_writer */, nullptr /* decoded_frame_writer */); } void ExpectInit() { EXPECT_CALL(encoder_mock_, InitEncode(_, _, _)).Times(1); EXPECT_CALL(encoder_mock_, RegisterEncodeCompleteCallback(_)).Times(1); - EXPECT_CALL(decoder_mock_, InitDecode(_, _)).Times(1); - EXPECT_CALL(decoder_mock_, RegisterDecodeCompleteCallback(_)).Times(1); + EXPECT_CALL(*decoder_mock_, InitDecode(_, _)).Times(1); + EXPECT_CALL(*decoder_mock_, RegisterDecodeCompleteCallback(_)).Times(1); } void ExpectRelease() { EXPECT_CALL(encoder_mock_, Release()).Times(1); EXPECT_CALL(encoder_mock_, RegisterEncodeCompleteCallback(_)).Times(1); - EXPECT_CALL(decoder_mock_, Release()).Times(1); - EXPECT_CALL(decoder_mock_, RegisterDecodeCompleteCallback(_)).Times(1); + EXPECT_CALL(*decoder_mock_, Release()).Times(1); + EXPECT_CALL(*decoder_mock_, RegisterDecodeCompleteCallback(_)).Times(1); } TestConfig config_; MockVideoEncoder encoder_mock_; - MockVideoDecoder decoder_mock_; + MockVideoDecoder* decoder_mock_; + std::vector> decoders_; MockFrameReader frame_reader_mock_; - Stats stats_; + std::vector stats_; std::unique_ptr video_processor_; };