diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn index c75b433cd4..d9e614ff81 100644 --- a/modules/video_coding/BUILD.gn +++ b/modules/video_coding/BUILD.gn @@ -1005,7 +1005,9 @@ if (rtc_include_tests) { "../../api/video:resolution", "../../api/video_codecs:builtin_video_decoder_factory", "../../api/video_codecs:builtin_video_encoder_factory", + "../../modules/video_coding/svc:scalability_mode_util", "../../rtc_base:logging", + "../../rtc_base:stringutils", "../../test:fileutils", "../../test:test_flags", "../../test:test_main", diff --git a/modules/video_coding/codecs/test/video_codec_test.cc b/modules/video_coding/codecs/test/video_codec_test.cc index 08961a312b..055d853b2b 100644 --- a/modules/video_coding/codecs/test/video_codec_test.cc +++ b/modules/video_coding/codecs/test/video_codec_test.cc @@ -23,19 +23,48 @@ #if defined(WEBRTC_ANDROID) #include "modules/video_coding/codecs/test/android_codec_factory_helper.h" #endif +#include "modules/video_coding/svc/scalability_mode_util.h" #include "rtc_base/logging.h" +#include "rtc_base/strings/str_join.h" +#include "rtc_base/strings/string_builder.h" #include "test/gtest.h" #include "test/test_flags.h" #include "test/testsupport/file_utils.h" #include "test/video_codec_tester.h" +ABSL_FLAG(std::string, + video_name, + "FourPeople_1280x720_30", + "Name of input video sequence."); +ABSL_FLAG(std::string, + encoder, + "libaom-av1", + "Encoder: libaom-av1, libvpx-vp9, libvpx-vp8, openh264, hw-vp8, " + "hw-vp9, hw-av1, hw-h264, hw-h265"); +ABSL_FLAG(std::string, + decoder, + "dav1d", + "Decoder: dav1d, libvpx-vp9, libvpx-vp8, ffmpeg-h264, hw-vp8, " + "hw-vp9, hw-av1, hw-h264, hw-h265"); +ABSL_FLAG(std::string, scalability_mode, "L1T1", "Scalability mode."); +ABSL_FLAG(int, width, 1280, "Width."); +ABSL_FLAG(int, height, 720, "Height."); +ABSL_FLAG(std::vector, + bitrate_kbps, + {"1024"}, + "Encode target bitrate per layer (l0t0,l0t1,...l1t0,l1t1 and so on) " + "in kbps."); +ABSL_FLAG(double, + framerate_fps, + 30.0, + "Encode target frame rate of the top temporal layer in fps."); +ABSL_FLAG(int, num_frames, 300, "Number of frames to encode and/or decode."); +ABSL_FLAG(std::string, test_name, "", "Test name."); ABSL_FLAG(bool, dump_decoder_input, false, "Dump decoder input."); - ABSL_FLAG(bool, dump_decoder_output, false, "Dump decoder output."); - ABSL_FLAG(bool, dump_encoder_input, false, "Dump encoder input."); - ABSL_FLAG(bool, dump_encoder_output, false, "Dump encoder output."); +ABSL_FLAG(bool, write_csv, false, "Write metrics to a CSV file."); namespace webrtc { namespace test { @@ -55,13 +84,58 @@ struct VideoInfo { Frequency framerate; }; -const VideoInfo kFourPeople_1280x720_30 = { - .name = "FourPeople_1280x720_30", - .resolution = {.width = 1280, .height = 720}, - .framerate = Frequency::Hertz(30)}; +const std::map kRawVideos = { + {"FourPeople_1280x720_30", + {.name = "FourPeople_1280x720_30", + .resolution = {.width = 1280, .height = 720}, + .framerate = Frequency::Hertz(30)}}, + {"vidyo1_1280x720_30", + {.name = "vidyo1_1280x720_30", + .resolution = {.width = 1280, .height = 720}, + .framerate = Frequency::Hertz(30)}}, + {"vidyo4_1280x720_30", + {.name = "vidyo4_1280x720_30", + .resolution = {.width = 1280, .height = 720}, + .framerate = Frequency::Hertz(30)}}, + {"KristenAndSara_1280x720_30", + {.name = "KristenAndSara_1280x720_30", + .resolution = {.width = 1280, .height = 720}, + .framerate = Frequency::Hertz(30)}}, + {"Johnny_1280x720_30", + {.name = "Johnny_1280x720_30", + .resolution = {.width = 1280, .height = 720}, + .framerate = Frequency::Hertz(30)}}}; static constexpr Frequency k90kHz = Frequency::Hertz(90000); +std::string CodecNameToCodecType(std::string name) { + if (name.find("av1") != std::string::npos) { + return "AV1"; + } + if (name.find("vp9") != std::string::npos) { + return "VP9"; + } + if (name.find("vp8") != std::string::npos) { + return "VP8"; + } + if (name.find("h264") != std::string::npos) { + return "H264"; + } + if (name.find("h265") != std::string::npos) { + return "H265"; + } + RTC_CHECK_NOTREACHED(); +} + +// TODO(webrtc:14852): Make Create[Encoder,Decoder]Factory to work with codec +// name directly. +std::string CodecNameToCodecImpl(std::string name) { + if (name.find("hw") != std::string::npos) { + return "mediacodec"; + } + return "builtin"; +} + std::unique_ptr CreateEncoderFactory(std::string impl) { if (impl == "builtin") { return CreateBuiltinVideoEncoderFactory(); @@ -86,10 +160,17 @@ std::unique_ptr CreateDecoderFactory(std::string impl) { #endif } +std::string TestName() { + std::string test_name = absl::GetFlag(FLAGS_test_name); + if (!test_name.empty()) { + return test_name; + } + return ::testing::UnitTest::GetInstance()->current_test_info()->name(); +} + std::string TestOutputPath() { std::string output_path = - OutputPath() + - ::testing::UnitTest::GetInstance()->current_test_info()->name(); + (rtc::StringBuilder() << OutputPath() << TestName()).str(); std::string output_dir = DirName(output_path); bool result = CreateDir(output_dir); RTC_CHECK(result) << "Cannot create " << output_dir; @@ -98,7 +179,6 @@ std::string TestOutputPath() { } // namespace std::unique_ptr RunEncodeDecodeTest( - std::string codec_type, std::string codec_impl, const VideoInfo& video_info, const std::map& encoding_settings) { @@ -239,7 +319,7 @@ TEST_P(SpatialQualityTest, SpatialQuality) { {bitrate_kbps}, framerate_fps, num_frames); std::unique_ptr stats = - RunEncodeDecodeTest(codec_type, codec_impl, video_info, frames_settings); + RunEncodeDecodeTest(codec_impl, video_info, frames_settings); VideoCodecStats::Stream stream; if (stats != nullptr) { @@ -252,6 +332,7 @@ TEST_P(SpatialQualityTest, SpatialQuality) { stream.LogMetrics( GetGlobalMetricsLogger(), ::testing::UnitTest::GetInstance()->current_test_info()->name(), + /*prefix=*/"", /*metadata=*/ {{"video_name", video_info.name}, {"codec_type", codec_type}, @@ -267,7 +348,7 @@ INSTANTIATE_TEST_SUITE_P( #else Values("builtin"), #endif - Values(kFourPeople_1280x720_30), + Values(kRawVideos.at("FourPeople_1280x720_30")), Values(std::make_tuple(320, 180, 30, 32, 28), std::make_tuple(320, 180, 30, 64, 30), std::make_tuple(320, 180, 30, 128, 33), @@ -337,6 +418,7 @@ TEST_P(BitrateAdaptationTest, BitrateAdaptation) { stream.LogMetrics( GetGlobalMetricsLogger(), ::testing::UnitTest::GetInstance()->current_test_info()->name(), + /*prefix=*/"", /*metadata=*/ {{"codec_type", codec_type}, {"codec_impl", codec_impl}, @@ -345,18 +427,18 @@ TEST_P(BitrateAdaptationTest, BitrateAdaptation) { std::to_string(bitrate_kbps.second)}}); } -INSTANTIATE_TEST_SUITE_P(All, - BitrateAdaptationTest, - Combine(Values("AV1", "VP9", "VP8", "H264", "H265"), +INSTANTIATE_TEST_SUITE_P( + All, + BitrateAdaptationTest, + Combine(Values("AV1", "VP9", "VP8", "H264", "H265"), #if defined(WEBRTC_ANDROID) - Values("builtin", "mediacodec"), + Values("builtin", "mediacodec"), #else - Values("builtin"), + Values("builtin"), #endif - Values(kFourPeople_1280x720_30), - Values(std::pair(1024, 512), - std::pair(512, 1024))), - BitrateAdaptationTest::TestParamsToString); + Values(kRawVideos.at("FourPeople_1280x720_30")), + Values(std::pair(1024, 512), std::pair(512, 1024))), + BitrateAdaptationTest::TestParamsToString); class FramerateAdaptationTest : public ::testing::TestWithParamcurrent_test_info()->name(), + /*prefix=*/"", /*metadata=*/ {{"codec_type", codec_type}, {"codec_impl", codec_impl}, @@ -424,17 +507,72 @@ TEST_P(FramerateAdaptationTest, FramerateAdaptation) { std::to_string(framerate_fps.second)}}); } -INSTANTIATE_TEST_SUITE_P(All, - FramerateAdaptationTest, - Combine(Values("AV1", "VP9", "VP8", "H264", "H265"), +INSTANTIATE_TEST_SUITE_P( + All, + FramerateAdaptationTest, + Combine(Values("AV1", "VP9", "VP8", "H264", "H265"), #if defined(WEBRTC_ANDROID) - Values("builtin", "mediacodec"), + Values("builtin", "mediacodec"), #else - Values("builtin"), + Values("builtin"), #endif - Values(kFourPeople_1280x720_30), - Values(std::pair(30, 15), std::pair(15, 30))), - FramerateAdaptationTest::TestParamsToString); + Values(kRawVideos.at("FourPeople_1280x720_30")), + Values(std::pair(30, 15), std::pair(15, 30))), + FramerateAdaptationTest::TestParamsToString); + +TEST(VideoCodecTest, DISABLED_EncodeDecode) { + std::vector bitrate_str = absl::GetFlag(FLAGS_bitrate_kbps); + std::vector bitrate_kbps; + std::transform(bitrate_str.begin(), bitrate_str.end(), + std::back_inserter(bitrate_kbps), + [](const std::string& str) { return std::stoi(str); }); + + std::map frames_settings = + VideoCodecTester::CreateEncodingSettings( + CodecNameToCodecType(absl::GetFlag(FLAGS_encoder)), + absl::GetFlag(FLAGS_scalability_mode), absl::GetFlag(FLAGS_width), + absl::GetFlag(FLAGS_height), {bitrate_kbps}, + absl::GetFlag(FLAGS_framerate_fps), absl::GetFlag(FLAGS_num_frames)); + + // TODO(webrtc:14852): Pass encoder and decoder names directly, and update + // logged test name (implies lossing history in the chromeperf dashboard). + // Sync with changes in Stream::LogMetrics (see TODOs there). + std::unique_ptr stats = RunEncodeDecodeTest( + CodecNameToCodecImpl(absl::GetFlag(FLAGS_encoder)), + kRawVideos.at(absl::GetFlag(FLAGS_video_name)), frames_settings); + ASSERT_NE(nullptr, stats); + + // Log unsliced metrics. + VideoCodecStats::Stream stream = stats->Aggregate(Filter{}); + stream.LogMetrics(GetGlobalMetricsLogger(), TestName(), /*prefix=*/"", + /*metadata=*/{}); + + // Log metrics sliced on spatial and temporal layer. + ScalabilityMode scalability_mode = + *ScalabilityModeFromString(absl::GetFlag(FLAGS_scalability_mode)); + int num_spatial_layers = ScalabilityModeToNumSpatialLayers(scalability_mode); + int num_temporal_layers = + ScalabilityModeToNumTemporalLayers(scalability_mode); + for (int sidx = 0; sidx < num_spatial_layers; ++sidx) { + for (int tidx = 0; tidx < num_temporal_layers; ++tidx) { + std::string metric_name_prefix = + (rtc::StringBuilder() << "s" << sidx << "t" << tidx << "_").str(); + stream = stats->Aggregate( + {.layer_id = {{.spatial_idx = sidx, .temporal_idx = tidx}}}); + stream.LogMetrics(GetGlobalMetricsLogger(), TestName(), + metric_name_prefix, + /*metadata=*/{}); + } + } + + if (absl::GetFlag(FLAGS_write_csv)) { + stats->LogMetrics( + (rtc::StringBuilder() << TestOutputPath() << ".csv").str(), + stats->Slice(Filter{}, /*merge=*/false), /*metadata=*/ + {{"test_name", TestName()}}); + } +} + } // namespace test } // namespace webrtc diff --git a/test/video_codec_tester.cc b/test/video_codec_tester.cc index 26f0a61372..38d97eadbc 100644 --- a/test/video_codec_tester.cc +++ b/test/video_codec_tester.cc @@ -455,10 +455,12 @@ class VideoCodecAnalyzer : public VideoCodecTester::VideoCodecStats { continue; } if (filter.layer_id) { - if ((is_svc && - frame.layer_id.spatial_idx > filter.layer_id->spatial_idx) || - (!is_svc && - frame.layer_id.spatial_idx != filter.layer_id->spatial_idx)) { + if (is_svc && + frame.layer_id.spatial_idx > filter.layer_id->spatial_idx) { + continue; + } + if (!is_svc && + frame.layer_id.spatial_idx != filter.layer_id->spatial_idx) { continue; } if (frame.layer_id.temporal_idx > filter.layer_id->temporal_idx) { @@ -592,6 +594,61 @@ class VideoCodecAnalyzer : public VideoCodecTester::VideoCodecStats { return stream; } + void LogMetrics(absl::string_view csv_path, + std::vector frames, + std::map metadata) const { + RTC_LOG(LS_INFO) << "Write metrics to " << csv_path; + FILE* csv_file = fopen(csv_path.data(), "w"); + const std::string delimiter = ";"; + rtc::StringBuilder header; + header + << "timestamp_rtp;spatial_idx;temporal_idx;width;height;frame_size_" + "bytes;keyframe;qp;encode_time_us;decode_time_us;psnr_y_db;psnr_u_" + "db;psnr_v_db;target_bitrate_kbps;target_framerate_fps"; + for (const auto& data : metadata) { + header << ";" << data.first; + } + fwrite(header.str().c_str(), 1, header.size(), csv_file); + + for (const Frame& f : frames) { + rtc::StringBuilder row; + row << "\n" << f.timestamp_rtp; + row << ";" << f.layer_id.spatial_idx; + row << ";" << f.layer_id.temporal_idx; + row << ";" << f.width; + row << ";" << f.height; + row << ";" << f.frame_size.bytes(); + row << ";" << f.keyframe; + row << ";"; + if (f.qp) { + row << *f.qp; + } + row << ";" << f.encode_time.us(); + row << ";" << f.decode_time.us(); + if (f.psnr) { + row << ";" << f.psnr->y; + row << ";" << f.psnr->u; + row << ";" << f.psnr->v; + } else { + row << ";;;"; + } + + const auto& es = encoding_settings_.at(f.timestamp_rtp); + row << ";" + << f.target_bitrate.value_or(GetTargetBitrate(es, f.layer_id)).kbps(); + row << ";" + << f.target_framerate.value_or(GetTargetFramerate(es, f.layer_id)) + .hertz(); + + for (const auto& data : metadata) { + row << ";" << data.second; + } + fwrite(row.str().c_str(), 1, row.size(), csv_file); + } + + fclose(csv_file); + } + void Flush() { task_queue_.WaitForPreviouslyPostedTasks(); } private: @@ -1077,55 +1134,60 @@ SplitBitrateAndUpdateScalabilityMode(std::string codec_type, void VideoCodecStats::Stream::LogMetrics( MetricsLogger* logger, std::string test_case_name, + std::string prefix, std::map metadata) const { - logger->LogMetric("width", test_case_name, width, Unit::kCount, + logger->LogMetric(prefix + "width", test_case_name, width, Unit::kCount, ImprovementDirection::kBiggerIsBetter, metadata); - logger->LogMetric("height", test_case_name, height, Unit::kCount, + logger->LogMetric(prefix + "height", test_case_name, height, Unit::kCount, ImprovementDirection::kBiggerIsBetter, metadata); - logger->LogMetric("frame_size_bytes", test_case_name, frame_size_bytes, - Unit::kBytes, ImprovementDirection::kNeitherIsBetter, - metadata); - logger->LogMetric("keyframe", test_case_name, keyframe, Unit::kCount, + logger->LogMetric(prefix + "frame_size_bytes", test_case_name, + frame_size_bytes, Unit::kBytes, + ImprovementDirection::kNeitherIsBetter, metadata); + logger->LogMetric(prefix + "keyframe", test_case_name, keyframe, Unit::kCount, ImprovementDirection::kSmallerIsBetter, metadata); - logger->LogMetric("qp", test_case_name, qp, Unit::kUnitless, + logger->LogMetric(prefix + "qp", test_case_name, qp, Unit::kUnitless, ImprovementDirection::kSmallerIsBetter, metadata); - logger->LogMetric("encode_time_ms", test_case_name, encode_time_ms, + // TODO(webrtc:14852): Change to us or even ns. + logger->LogMetric(prefix + "encode_time_ms", test_case_name, encode_time_ms, Unit::kMilliseconds, ImprovementDirection::kSmallerIsBetter, metadata); - logger->LogMetric("decode_time_ms", test_case_name, decode_time_ms, + logger->LogMetric(prefix + "decode_time_ms", test_case_name, decode_time_ms, Unit::kMilliseconds, ImprovementDirection::kSmallerIsBetter, metadata); // TODO(webrtc:14852): Change to kUnitLess. kKilobitsPerSecond are converted // to bytes per second in Chromeperf dash. - logger->LogMetric("target_bitrate_kbps", test_case_name, target_bitrate_kbps, - Unit::kKilobitsPerSecond, + logger->LogMetric(prefix + "target_bitrate_kbps", test_case_name, + target_bitrate_kbps, Unit::kKilobitsPerSecond, ImprovementDirection::kBiggerIsBetter, metadata); - logger->LogMetric("target_framerate_fps", test_case_name, + logger->LogMetric(prefix + "target_framerate_fps", test_case_name, target_framerate_fps, Unit::kHertz, ImprovementDirection::kBiggerIsBetter, metadata); // TODO(webrtc:14852): Change to kUnitLess. kKilobitsPerSecond are converted // to bytes per second in Chromeperf dash. - logger->LogMetric("encoded_bitrate_kbps", test_case_name, + logger->LogMetric(prefix + "encoded_bitrate_kbps", test_case_name, encoded_bitrate_kbps, Unit::kKilobitsPerSecond, ImprovementDirection::kBiggerIsBetter, metadata); - logger->LogMetric("encoded_framerate_fps", test_case_name, + logger->LogMetric(prefix + "encoded_framerate_fps", test_case_name, encoded_framerate_fps, Unit::kHertz, ImprovementDirection::kBiggerIsBetter, metadata); - logger->LogMetric("bitrate_mismatch_pct", test_case_name, + logger->LogMetric(prefix + "bitrate_mismatch_pct", test_case_name, bitrate_mismatch_pct, Unit::kPercent, ImprovementDirection::kNeitherIsBetter, metadata); - logger->LogMetric("framerate_mismatch_pct", test_case_name, + logger->LogMetric(prefix + "framerate_mismatch_pct", test_case_name, framerate_mismatch_pct, Unit::kPercent, ImprovementDirection::kNeitherIsBetter, metadata); - logger->LogMetric("transmission_time_ms", test_case_name, + logger->LogMetric(prefix + "transmission_time_ms", test_case_name, transmission_time_ms, Unit::kMilliseconds, ImprovementDirection::kSmallerIsBetter, metadata); - logger->LogMetric("psnr_y_db", test_case_name, psnr.y, Unit::kUnitless, - ImprovementDirection::kBiggerIsBetter, metadata); - logger->LogMetric("psnr_u_db", test_case_name, psnr.u, Unit::kUnitless, - ImprovementDirection::kBiggerIsBetter, metadata); - logger->LogMetric("psnr_v_db", test_case_name, psnr.v, Unit::kUnitless, - ImprovementDirection::kBiggerIsBetter, metadata); + logger->LogMetric(prefix + "psnr_y_db", test_case_name, psnr.y, + Unit::kUnitless, ImprovementDirection::kBiggerIsBetter, + metadata); + logger->LogMetric(prefix + "psnr_u_db", test_case_name, psnr.u, + Unit::kUnitless, ImprovementDirection::kBiggerIsBetter, + metadata); + logger->LogMetric(prefix + "psnr_v_db", test_case_name, psnr.v, + Unit::kUnitless, ImprovementDirection::kBiggerIsBetter, + metadata); } // TODO(ssilkin): use Frequency and DataRate for framerate and bitrate. diff --git a/test/video_codec_tester.h b/test/video_codec_tester.h index dc72645c18..87cc5f76f8 100644 --- a/test/video_codec_tester.h +++ b/test/video_codec_tester.h @@ -68,7 +68,6 @@ class VideoCodecTester { }; struct Frame { - int frame_num = 0; uint32_t timestamp_rtp = 0; LayerId layer_id; bool encoded = false; @@ -118,6 +117,7 @@ class VideoCodecTester { // Logs `Stream` metrics to provided `MetricsLogger`. void LogMetrics(MetricsLogger* logger, std::string test_case_name, + std::string prefix, std::map metadata = {}) const; }; @@ -130,6 +130,12 @@ class VideoCodecTester { // Returns video statistics aggregated for the slice specified by `filter`. virtual Stream Aggregate(Filter filter) const = 0; + + // Write metrics to a CSV file. + virtual void LogMetrics( + absl::string_view csv_path, + std::vector frames, + std::map metadata) const = 0; }; // Pacing settings for codec input.