webrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc

/*
 *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 *
 */

#ifdef RTC_ENABLE_VP9

#include "modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h"

#include <algorithm>

#include "absl/strings/match.h"
#include "api/transport/field_trial_based_config.h"
#include "api/video/color_space.h"
#include "api/video/i010_buffer.h"
#include "common_video/include/video_frame_buffer.h"
#include "modules/video_coding/utility/vp9_uncompressed_header_parser.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "third_party/libyuv/include/libyuv/convert.h"
#include "vpx/vp8dx.h"
#include "vpx/vpx_decoder.h"

namespace webrtc {
namespace {

// Helper class for extracting VP9 colorspace.
ColorSpace ExtractVP9ColorSpace(vpx_color_space_t space_t,
                                vpx_color_range_t range_t,
                                unsigned int bit_depth) {
  ColorSpace::PrimaryID primaries = ColorSpace::PrimaryID::kUnspecified;
  ColorSpace::TransferID transfer = ColorSpace::TransferID::kUnspecified;
  ColorSpace::MatrixID matrix = ColorSpace::MatrixID::kUnspecified;
  switch (space_t) {
    case VPX_CS_BT_601:
    case VPX_CS_SMPTE_170:
      primaries = ColorSpace::PrimaryID::kSMPTE170M;
      transfer = ColorSpace::TransferID::kSMPTE170M;
      matrix = ColorSpace::MatrixID::kSMPTE170M;
      break;
    case VPX_CS_SMPTE_240:
      primaries = ColorSpace::PrimaryID::kSMPTE240M;
      transfer = ColorSpace::TransferID::kSMPTE240M;
      matrix = ColorSpace::MatrixID::kSMPTE240M;
      break;
    case VPX_CS_BT_709:
      primaries = ColorSpace::PrimaryID::kBT709;
      transfer = ColorSpace::TransferID::kBT709;
      matrix = ColorSpace::MatrixID::kBT709;
      break;
    case VPX_CS_BT_2020:
      primaries = ColorSpace::PrimaryID::kBT2020;
      switch (bit_depth) {
        case 8:
          transfer = ColorSpace::TransferID::kBT709;
          break;
        case 10:
          transfer = ColorSpace::TransferID::kBT2020_10;
          break;
        default:
          RTC_DCHECK_NOTREACHED();
          break;
      }
      matrix = ColorSpace::MatrixID::kBT2020_NCL;
      break;
    case VPX_CS_SRGB:
      primaries = ColorSpace::PrimaryID::kBT709;
      transfer = ColorSpace::TransferID::kIEC61966_2_1;
      matrix = ColorSpace::MatrixID::kBT709;
      break;
    default:
      break;
  }

  ColorSpace::RangeID range = ColorSpace::RangeID::kInvalid;
  switch (range_t) {
    case VPX_CR_STUDIO_RANGE:
      range = ColorSpace::RangeID::kLimited;
      break;
    case VPX_CR_FULL_RANGE:
      range = ColorSpace::RangeID::kFull;
      break;
    default:
      break;
  }
  return ColorSpace(primaries, transfer, matrix, range);
}

}  // namespace

LibvpxVp9Decoder::LibvpxVp9Decoder()
    : decode_complete_callback_(nullptr),
      inited_(false),
      decoder_(nullptr),
      key_frame_required_(true) {}

LibvpxVp9Decoder::~LibvpxVp9Decoder() {
  inited_ = true;  // in order to do the actual release
  Release();
  int num_buffers_in_use = libvpx_buffer_pool_.GetNumBuffersInUse();
  if (num_buffers_in_use > 0) {
    // The frame buffers are reference counted and frames are exposed after
    // decoding. There may be valid usage cases where previous frames are still
    // referenced after ~LibvpxVp9Decoder that is not a leak.
    RTC_LOG(LS_INFO) << num_buffers_in_use
                     << " Vp9FrameBuffers are still "
                        "referenced during ~LibvpxVp9Decoder.";
  }
}

bool LibvpxVp9Decoder::Configure(const Settings& settings) {
  if (Release() < 0) {
    return false;
  }

  if (decoder_ == nullptr) {
    decoder_ = new vpx_codec_ctx_t;
    memset(decoder_, 0, sizeof(*decoder_));
  }
  vpx_codec_dec_cfg_t cfg;
  memset(&cfg, 0, sizeof(cfg));

#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
  // We focus on webrtc fuzzing here, not libvpx itself. Use single thread for
  // fuzzing, because:
  //  - libvpx's VP9 single thread decoder is more fuzzer friendly. It detects
  //    errors earlier than the multi-threads version.
  //  - Make peak CPU usage under control (not depending on input)
  cfg.threads = 1;
#else
  const RenderResolution& resolution = settings.max_render_resolution();
  if (!resolution.Valid()) {
    // Postpone configuring number of threads until resolution is known.
    cfg.threads = 1;
  } else {
    // We want to use multithreading when decoding high resolution videos. But
    // not too many in order to avoid overhead when many stream are decoded
    // concurrently.
    // Set 2 thread as target for 1280x720 pixel count, and then scale up
    // linearly from there - but cap at physical core count.
    // For common resolutions this results in:
    // 1 for 360p
    // 2 for 720p
    // 4 for 1080p
    // 8 for 1440p
    // 18 for 4K
    int num_threads = std::max(
        1, 2 * resolution.Width() * resolution.Height() / (1280 * 720));
    cfg.threads = std::min(settings.number_of_cores(), num_threads);
  }
#endif

  current_settings_ = settings;

  vpx_codec_flags_t flags = 0;
  if (vpx_codec_dec_init(decoder_, vpx_codec_vp9_dx(), &cfg, flags)) {
    return false;
  }

  if (!libvpx_buffer_pool_.InitializeVpxUsePool(decoder_)) {
    return false;
  }

  inited_ = true;
  // Always start with a complete key frame.
  key_frame_required_ = true;
  if (absl::optional<int> buffer_pool_size = settings.buffer_pool_size()) {
    if (!libvpx_buffer_pool_.Resize(*buffer_pool_size)) {
      return false;
    }
  }

  vpx_codec_err_t status =
      vpx_codec_control(decoder_, VP9D_SET_LOOP_FILTER_OPT, 1);
  if (status != VPX_CODEC_OK) {
    RTC_LOG(LS_ERROR) << "Failed to enable VP9D_SET_LOOP_FILTER_OPT. "
                      << vpx_codec_error(decoder_);
    return false;
  }

  return true;
}

int LibvpxVp9Decoder::Decode(const EncodedImage& input_image,
                             bool missing_frames,
                             int64_t /*render_time_ms*/) {
  if (!inited_) {
    return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
  }
  if (decode_complete_callback_ == nullptr) {
    return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
  }

  if (input_image._frameType == VideoFrameType::kVideoFrameKey) {
    absl::optional<Vp9UncompressedHeader> frame_info =
        ParseUncompressedVp9Header(
            rtc::MakeArrayView(input_image.data(), input_image.size()));
    if (frame_info) {
      RenderResolution frame_resolution(frame_info->frame_width,
                                        frame_info->frame_height);
      if (frame_resolution != current_settings_.max_render_resolution()) {
        // Resolution has changed, tear down and re-init a new decoder in
        // order to get correct sizing.
        Release();
        current_settings_.set_max_render_resolution(frame_resolution);
        if (!Configure(current_settings_)) {
          RTC_LOG(LS_WARNING) << "Failed to re-init decoder.";
          return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
        }
      }
    } else {
      RTC_LOG(LS_WARNING) << "Failed to parse VP9 header from key-frame.";
    }
  }

  // Always start with a complete key frame.
  if (key_frame_required_) {
    if (input_image._frameType != VideoFrameType::kVideoFrameKey)
      return WEBRTC_VIDEO_CODEC_ERROR;
    key_frame_required_ = false;
  }
  vpx_codec_iter_t iter = nullptr;
  vpx_image_t* img;
  const uint8_t* buffer = input_image.data();
  if (input_image.size() == 0) {
    buffer = nullptr;  // Triggers full frame concealment.
  }
  // During decode libvpx may get and release buffers from
  // `libvpx_buffer_pool_`. In practice libvpx keeps a few (~3-4) buffers alive
  // at a time.
  if (vpx_codec_decode(decoder_, buffer,
                       static_cast<unsigned int>(input_image.size()), 0,
                       VPX_DL_REALTIME)) {
    return WEBRTC_VIDEO_CODEC_ERROR;
  }
  // `img->fb_priv` contains the image data, a reference counted Vp9FrameBuffer.
  // It may be released by libvpx during future vpx_codec_decode or
  // vpx_codec_destroy calls.
  img = vpx_codec_get_frame(decoder_, &iter);
  int qp;
  vpx_codec_err_t vpx_ret =
      vpx_codec_control(decoder_, VPXD_GET_LAST_QUANTIZER, &qp);
  RTC_DCHECK_EQ(vpx_ret, VPX_CODEC_OK);
  int ret =
      ReturnFrame(img, input_image.Timestamp(), qp, input_image.ColorSpace());
  if (ret != 0) {
    return ret;
  }
  return WEBRTC_VIDEO_CODEC_OK;
}

int LibvpxVp9Decoder::ReturnFrame(
    const vpx_image_t* img,
    uint32_t timestamp,
    int qp,
    const webrtc::ColorSpace* explicit_color_space) {
  if (img == nullptr) {
    // Decoder OK and nullptr image => No show frame.
    return WEBRTC_VIDEO_CODEC_NO_OUTPUT;
  }

  // This buffer contains all of `img`'s image data, a reference counted
  // Vp9FrameBuffer. (libvpx is done with the buffers after a few
  // vpx_codec_decode calls or vpx_codec_destroy).
  rtc::scoped_refptr<Vp9FrameBufferPool::Vp9FrameBuffer> img_buffer(
      static_cast<Vp9FrameBufferPool::Vp9FrameBuffer*>(img->fb_priv));

  // The buffer can be used directly by the VideoFrame (without copy) by
  // using a Wrapped*Buffer.
  rtc::scoped_refptr<VideoFrameBuffer> img_wrapped_buffer;
  switch (img->fmt) {
    case VPX_IMG_FMT_I420:
      img_wrapped_buffer = WrapI420Buffer(
          img->d_w, img->d_h, img->planes[VPX_PLANE_Y],
          img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U],
          img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V],
          img->stride[VPX_PLANE_V],
          // WrappedI420Buffer's mechanism for allowing the release of its
          // frame buffer is through a callback function. This is where we
          // should release `img_buffer`.
          [img_buffer] {});
      break;
    case VPX_IMG_FMT_I422:
      img_wrapped_buffer = WrapI422Buffer(
          img->d_w, img->d_h, img->planes[VPX_PLANE_Y],
          img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U],
          img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V],
          img->stride[VPX_PLANE_V],
          // WrappedI444Buffer's mechanism for allowing the release of its
          // frame buffer is through a callback function. This is where we
          // should release `img_buffer`.
          [img_buffer] {});
      break;
    case VPX_IMG_FMT_I444:
      img_wrapped_buffer = WrapI444Buffer(
          img->d_w, img->d_h, img->planes[VPX_PLANE_Y],
          img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U],
          img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V],
          img->stride[VPX_PLANE_V],
          // WrappedI444Buffer's mechanism for allowing the release of its
          // frame buffer is through a callback function. This is where we
          // should release `img_buffer`.
          [img_buffer] {});
      break;
    case VPX_IMG_FMT_I42016:
      img_wrapped_buffer = WrapI010Buffer(
          img->d_w, img->d_h,
          reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_Y]),
          img->stride[VPX_PLANE_Y] / 2,
          reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_U]),
          img->stride[VPX_PLANE_U] / 2,
          reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_V]),
          img->stride[VPX_PLANE_V] / 2, [img_buffer] {});
      break;
    case VPX_IMG_FMT_I42216:
      img_wrapped_buffer = WrapI210Buffer(
          img->d_w, img->d_h,
          reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_Y]),
          img->stride[VPX_PLANE_Y] / 2,
          reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_U]),
          img->stride[VPX_PLANE_U] / 2,
          reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_V]),
          img->stride[VPX_PLANE_V] / 2, [img_buffer] {});
      break;
    case VPX_IMG_FMT_I44416:
      img_wrapped_buffer = WrapI410Buffer(
          img->d_w, img->d_h,
          reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_Y]),
          img->stride[VPX_PLANE_Y] / 2,
          reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_U]),
          img->stride[VPX_PLANE_U] / 2,
          reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_V]),
          img->stride[VPX_PLANE_V] / 2, [img_buffer] {});
      break;
    default:
      RTC_LOG(LS_ERROR) << "Unsupported pixel format produced by the decoder: "
                        << static_cast<int>(img->fmt);
      return WEBRTC_VIDEO_CODEC_NO_OUTPUT;
  }

  auto builder = VideoFrame::Builder()
                     .set_video_frame_buffer(img_wrapped_buffer)
                     .set_timestamp_rtp(timestamp);
  if (explicit_color_space) {
    builder.set_color_space(*explicit_color_space);
  } else {
    builder.set_color_space(
        ExtractVP9ColorSpace(img->cs, img->range, img->bit_depth));
  }
  VideoFrame decoded_image = builder.build();

  decode_complete_callback_->Decoded(decoded_image, absl::nullopt, qp);
  return WEBRTC_VIDEO_CODEC_OK;
}

int LibvpxVp9Decoder::RegisterDecodeCompleteCallback(
    DecodedImageCallback* callback) {
  decode_complete_callback_ = callback;
  return WEBRTC_VIDEO_CODEC_OK;
}

int LibvpxVp9Decoder::Release() {
  int ret_val = WEBRTC_VIDEO_CODEC_OK;

  if (decoder_ != nullptr) {
    if (inited_) {
      // When a codec is destroyed libvpx will release any buffers of
      // `libvpx_buffer_pool_` it is currently using.
      if (vpx_codec_destroy(decoder_)) {
        ret_val = WEBRTC_VIDEO_CODEC_MEMORY;
      }
    }
    delete decoder_;
    decoder_ = nullptr;
  }
  // Releases buffers from the pool. Any buffers not in use are deleted. Buffers
  // still referenced externally are deleted once fully released, not returning
  // to the pool.
  libvpx_buffer_pool_.ClearPool();
  inited_ = false;
  return ret_val;
}

VideoDecoder::DecoderInfo LibvpxVp9Decoder::GetDecoderInfo() const {
  DecoderInfo info;
  info.implementation_name = "libvpx";
  info.is_hardware_accelerated = false;
  return info;
}

const char* LibvpxVp9Decoder::ImplementationName() const {
  return "libvpx";
}

}  // namespace webrtc

#endif  // RTC_ENABLE_VP9