mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-13 05:40:42 +01:00

Bug: webrtc:7472 Change-Id: Ib1af94cc434d93be254370f0d9b6ebaafe8817d4 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/232902 Reviewed-by: Åsa Persson <asapersson@webrtc.org> Commit-Queue: Danil Chapovalov <danilchap@webrtc.org> Cr-Commit-Position: refs/heads/main@{#35085}
416 lines
14 KiB
C++
416 lines
14 KiB
C++
/*
|
|
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "modules/rtp_rtcp/source/rtp_video_layers_allocation_extension.h"
|
|
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
|
|
#include "absl/algorithm/container.h"
|
|
#include "api/video/video_layers_allocation.h"
|
|
#include "modules/rtp_rtcp/source/byte_io.h"
|
|
#include "rtc_base/checks.h"
|
|
|
|
namespace webrtc {
|
|
|
|
constexpr RTPExtensionType RtpVideoLayersAllocationExtension::kId;
|
|
|
|
namespace {
|
|
|
|
constexpr int kMaxNumRtpStreams = 4;
|
|
|
|
// TODO(bugs.webrtc.org/12000): share Leb128 functions with av1 packetizer.
|
|
// Returns minimum number of bytes required to store `value`.
|
|
int Leb128Size(uint32_t value) {
|
|
int size = 0;
|
|
while (value >= 0x80) {
|
|
++size;
|
|
value >>= 7;
|
|
}
|
|
return size + 1;
|
|
}
|
|
|
|
// Returns number of bytes consumed.
|
|
int WriteLeb128(uint32_t value, uint8_t* buffer) {
|
|
int size = 0;
|
|
while (value >= 0x80) {
|
|
buffer[size] = 0x80 | (value & 0x7F);
|
|
++size;
|
|
value >>= 7;
|
|
}
|
|
buffer[size] = value;
|
|
++size;
|
|
return size;
|
|
}
|
|
|
|
// Reads leb128 encoded value and advance read_at by number of bytes consumed.
|
|
// Sets read_at to nullptr on error.
|
|
uint64_t ReadLeb128(const uint8_t*& read_at, const uint8_t* end) {
|
|
uint64_t value = 0;
|
|
int fill_bits = 0;
|
|
while (read_at != end && fill_bits < 64 - 7) {
|
|
uint8_t leb128_byte = *read_at;
|
|
value |= uint64_t{leb128_byte & 0x7Fu} << fill_bits;
|
|
++read_at;
|
|
fill_bits += 7;
|
|
if ((leb128_byte & 0x80) == 0) {
|
|
return value;
|
|
}
|
|
}
|
|
// Failed to find terminator leb128 byte.
|
|
read_at = nullptr;
|
|
return 0;
|
|
}
|
|
|
|
bool AllocationIsValid(const VideoLayersAllocation& allocation) {
|
|
// Since all multivalue fields are stored in (rtp_stream_id, spatial_id) order
|
|
// assume `allocation.active_spatial_layers` is already sorted. It is simpler
|
|
// to assemble it in the sorted way than to resort during serialization.
|
|
if (!absl::c_is_sorted(
|
|
allocation.active_spatial_layers,
|
|
[](const VideoLayersAllocation::SpatialLayer& lhs,
|
|
const VideoLayersAllocation::SpatialLayer& rhs) {
|
|
return std::make_tuple(lhs.rtp_stream_index, lhs.spatial_id) <
|
|
std::make_tuple(rhs.rtp_stream_index, rhs.spatial_id);
|
|
})) {
|
|
return false;
|
|
}
|
|
|
|
int max_rtp_stream_idx = 0;
|
|
for (const auto& spatial_layer : allocation.active_spatial_layers) {
|
|
if (spatial_layer.rtp_stream_index < 0 ||
|
|
spatial_layer.rtp_stream_index >= 4) {
|
|
return false;
|
|
}
|
|
if (spatial_layer.spatial_id < 0 || spatial_layer.spatial_id >= 4) {
|
|
return false;
|
|
}
|
|
if (spatial_layer.target_bitrate_per_temporal_layer.empty() ||
|
|
spatial_layer.target_bitrate_per_temporal_layer.size() > 4) {
|
|
return false;
|
|
}
|
|
if (max_rtp_stream_idx < spatial_layer.rtp_stream_index) {
|
|
max_rtp_stream_idx = spatial_layer.rtp_stream_index;
|
|
}
|
|
if (allocation.resolution_and_frame_rate_is_valid) {
|
|
// TODO(danilchap): Add check width and height are no more than 0x10000
|
|
// when width and height become larger type and thus would support maximum
|
|
// resolution.
|
|
if (spatial_layer.width <= 0) {
|
|
return false;
|
|
}
|
|
if (spatial_layer.height <= 0) {
|
|
return false;
|
|
}
|
|
if (spatial_layer.frame_rate_fps > 255) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
if (allocation.rtp_stream_index < 0 ||
|
|
(!allocation.active_spatial_layers.empty() &&
|
|
allocation.rtp_stream_index > max_rtp_stream_idx)) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
struct SpatialLayersBitmasks {
|
|
int max_rtp_stream_id = 0;
|
|
uint8_t spatial_layer_bitmask[kMaxNumRtpStreams] = {};
|
|
bool bitmasks_are_the_same = true;
|
|
};
|
|
|
|
SpatialLayersBitmasks SpatialLayersBitmasksPerRtpStream(
|
|
const VideoLayersAllocation& allocation) {
|
|
RTC_DCHECK(AllocationIsValid(allocation));
|
|
SpatialLayersBitmasks result;
|
|
for (const auto& layer : allocation.active_spatial_layers) {
|
|
result.spatial_layer_bitmask[layer.rtp_stream_index] |=
|
|
(1u << layer.spatial_id);
|
|
if (result.max_rtp_stream_id < layer.rtp_stream_index) {
|
|
result.max_rtp_stream_id = layer.rtp_stream_index;
|
|
}
|
|
}
|
|
for (int i = 1; i <= result.max_rtp_stream_id; ++i) {
|
|
if (result.spatial_layer_bitmask[i] != result.spatial_layer_bitmask[0]) {
|
|
result.bitmasks_are_the_same = false;
|
|
break;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
// +-+-+-+-+-+-+-+-+
|
|
// |RID| NS| sl_bm |
|
|
// +-+-+-+-+-+-+-+-+
|
|
// Spatial layer bitmask |sl0_bm |sl1_bm |
|
|
// up to 2 bytes |---------------|
|
|
// when sl_bm == 0 |sl2_bm |sl3_bm |
|
|
// +-+-+-+-+-+-+-+-+
|
|
// Number of temporal |#tl|#tl|#tl|#tl|
|
|
// layers per spatial layer :---------------:
|
|
// up to 4 bytes | ... |
|
|
// +-+-+-+-+-+-+-+-+
|
|
// Target bitrate in kpbs | |
|
|
// per temporal layer : ... :
|
|
// leb128 encoded | |
|
|
// +-+-+-+-+-+-+-+-+
|
|
// Resolution and framerate | |
|
|
// 5 bytes per spatial layer + width-1 for +
|
|
// (optional) | rid=0, sid=0 |
|
|
// +---------------+
|
|
// | |
|
|
// + height-1 for +
|
|
// | rid=0, sid=0 |
|
|
// +---------------+
|
|
// | max framerate |
|
|
// +-+-+-+-+-+-+-+-+
|
|
// : ... :
|
|
// +-+-+-+-+-+-+-+-+
|
|
//
|
|
// RID: RTP stream index this allocation is sent on, numbered from 0. 2 bits.
|
|
// NS: Number of RTP streams - 1. 2 bits, thus allowing up-to 4 RTP streams.
|
|
// sl_bm: BitMask of the active Spatial Layers when same for all RTP streams or
|
|
// 0 otherwise. 4 bits thus allows up to 4 spatial layers per RTP streams.
|
|
// slX_bm: BitMask of the active Spatial Layers for RTP stream with index=X.
|
|
// byte-aligned. When NS < 2, takes ones byte, otherwise uses two bytes.
|
|
// #tl: 2-bit value of number of temporal layers-1, thus allowing up-to 4
|
|
// temporal layer per spatial layer. One per spatial layer per RTP stream.
|
|
// values are stored in (RTP stream id, spatial id) ascending order.
|
|
// zero-padded to byte alignment.
|
|
// Target bitrate in kbps. Values are stored using leb128 encoding.
|
|
// one value per temporal layer. values are stored in
|
|
// (RTP stream id, spatial id, temporal id) ascending order.
|
|
// All bitrates are total required bitrate to receive the corresponding
|
|
// layer, i.e. in simulcast mode they include only corresponding spatial
|
|
// layer, in full-svc all lower spatial layers are included. All lower
|
|
// temporal layers are also included.
|
|
// Resolution and framerate.
|
|
// Optional. Presense is infered from the rtp header extension size.
|
|
// Encoded (width - 1), 16-bit, (height - 1), 16-bit, max frame rate 8-bit
|
|
// per spatial layer per RTP stream.
|
|
// Values are stored in (RTP stream id, spatial id) ascending order.
|
|
//
|
|
// An empty layer allocation (i.e nothing sent on ssrc) is encoded as
|
|
// special case with a single 0 byte.
|
|
|
|
bool RtpVideoLayersAllocationExtension::Write(
|
|
rtc::ArrayView<uint8_t> data,
|
|
const VideoLayersAllocation& allocation) {
|
|
RTC_DCHECK(AllocationIsValid(allocation));
|
|
RTC_DCHECK_GE(data.size(), ValueSize(allocation));
|
|
|
|
if (allocation.active_spatial_layers.empty()) {
|
|
data[0] = 0;
|
|
return true;
|
|
}
|
|
|
|
SpatialLayersBitmasks slb = SpatialLayersBitmasksPerRtpStream(allocation);
|
|
uint8_t* write_at = data.data();
|
|
// First half of the header byte.
|
|
*write_at = (allocation.rtp_stream_index << 6);
|
|
// number of rtp stream - 1 is the same as the maximum rtp_stream_id.
|
|
*write_at |= slb.max_rtp_stream_id << 4;
|
|
if (slb.bitmasks_are_the_same) {
|
|
// Second half of the header byte.
|
|
*write_at |= slb.spatial_layer_bitmask[0];
|
|
} else {
|
|
// spatial layer bitmasks when they are different for different RTP streams.
|
|
*++write_at =
|
|
(slb.spatial_layer_bitmask[0] << 4) | slb.spatial_layer_bitmask[1];
|
|
if (slb.max_rtp_stream_id >= 2) {
|
|
*++write_at =
|
|
(slb.spatial_layer_bitmask[2] << 4) | slb.spatial_layer_bitmask[3];
|
|
}
|
|
}
|
|
++write_at;
|
|
|
|
{ // Number of temporal layers.
|
|
int bit_offset = 8;
|
|
*write_at = 0;
|
|
for (const auto& layer : allocation.active_spatial_layers) {
|
|
if (bit_offset == 0) {
|
|
bit_offset = 6;
|
|
*++write_at = 0;
|
|
} else {
|
|
bit_offset -= 2;
|
|
}
|
|
*write_at |=
|
|
((layer.target_bitrate_per_temporal_layer.size() - 1) << bit_offset);
|
|
}
|
|
++write_at;
|
|
}
|
|
|
|
// Target bitrates.
|
|
for (const auto& spatial_layer : allocation.active_spatial_layers) {
|
|
for (const DataRate& bitrate :
|
|
spatial_layer.target_bitrate_per_temporal_layer) {
|
|
write_at += WriteLeb128(bitrate.kbps(), write_at);
|
|
}
|
|
}
|
|
|
|
if (allocation.resolution_and_frame_rate_is_valid) {
|
|
for (const auto& spatial_layer : allocation.active_spatial_layers) {
|
|
ByteWriter<uint16_t>::WriteBigEndian(write_at, spatial_layer.width - 1);
|
|
write_at += 2;
|
|
ByteWriter<uint16_t>::WriteBigEndian(write_at, spatial_layer.height - 1);
|
|
write_at += 2;
|
|
*write_at = spatial_layer.frame_rate_fps;
|
|
++write_at;
|
|
}
|
|
}
|
|
RTC_DCHECK_EQ(write_at - data.data(), ValueSize(allocation));
|
|
return true;
|
|
}
|
|
|
|
bool RtpVideoLayersAllocationExtension::Parse(
|
|
rtc::ArrayView<const uint8_t> data,
|
|
VideoLayersAllocation* allocation) {
|
|
if (data.empty() || allocation == nullptr) {
|
|
return false;
|
|
}
|
|
|
|
allocation->active_spatial_layers.clear();
|
|
|
|
const uint8_t* read_at = data.data();
|
|
const uint8_t* const end = data.data() + data.size();
|
|
|
|
if (data.size() == 1 && *read_at == 0) {
|
|
allocation->rtp_stream_index = 0;
|
|
allocation->resolution_and_frame_rate_is_valid = true;
|
|
return AllocationIsValid(*allocation);
|
|
}
|
|
|
|
// Header byte.
|
|
allocation->rtp_stream_index = *read_at >> 6;
|
|
int num_rtp_streams = 1 + ((*read_at >> 4) & 0b11);
|
|
uint8_t spatial_layers_bitmasks[kMaxNumRtpStreams];
|
|
spatial_layers_bitmasks[0] = *read_at & 0b1111;
|
|
|
|
if (spatial_layers_bitmasks[0] != 0) {
|
|
for (int i = 1; i < num_rtp_streams; ++i) {
|
|
spatial_layers_bitmasks[i] = spatial_layers_bitmasks[0];
|
|
}
|
|
} else {
|
|
// Spatial layer bitmasks when they are different for different RTP streams.
|
|
if (++read_at == end) {
|
|
return false;
|
|
}
|
|
spatial_layers_bitmasks[0] = *read_at >> 4;
|
|
spatial_layers_bitmasks[1] = *read_at & 0b1111;
|
|
if (num_rtp_streams > 2) {
|
|
if (++read_at == end) {
|
|
return false;
|
|
}
|
|
spatial_layers_bitmasks[2] = *read_at >> 4;
|
|
spatial_layers_bitmasks[3] = *read_at & 0b1111;
|
|
}
|
|
}
|
|
if (++read_at == end) {
|
|
return false;
|
|
}
|
|
|
|
// Read number of temporal layers,
|
|
// Create `allocation->active_spatial_layers` while iterating though it.
|
|
int bit_offset = 8;
|
|
for (int stream_idx = 0; stream_idx < num_rtp_streams; ++stream_idx) {
|
|
for (int sid = 0; sid < VideoLayersAllocation::kMaxSpatialIds; ++sid) {
|
|
if ((spatial_layers_bitmasks[stream_idx] & (1 << sid)) == 0) {
|
|
continue;
|
|
}
|
|
|
|
if (bit_offset == 0) {
|
|
bit_offset = 6;
|
|
if (++read_at == end) {
|
|
return false;
|
|
}
|
|
} else {
|
|
bit_offset -= 2;
|
|
}
|
|
int num_temporal_layers = 1 + ((*read_at >> bit_offset) & 0b11);
|
|
allocation->active_spatial_layers.emplace_back();
|
|
auto& layer = allocation->active_spatial_layers.back();
|
|
layer.rtp_stream_index = stream_idx;
|
|
layer.spatial_id = sid;
|
|
layer.target_bitrate_per_temporal_layer.resize(num_temporal_layers,
|
|
DataRate::Zero());
|
|
}
|
|
}
|
|
if (++read_at == end) {
|
|
return false;
|
|
}
|
|
|
|
// Target bitrates.
|
|
for (auto& layer : allocation->active_spatial_layers) {
|
|
for (DataRate& rate : layer.target_bitrate_per_temporal_layer) {
|
|
uint64_t bitrate_kbps = ReadLeb128(read_at, end);
|
|
// bitrate_kbps might represent larger values than DataRate type,
|
|
// discard unreasonably large values.
|
|
if (read_at == nullptr || bitrate_kbps > 1'000'000) {
|
|
return false;
|
|
}
|
|
rate = DataRate::KilobitsPerSec(bitrate_kbps);
|
|
}
|
|
}
|
|
|
|
if (read_at == end) {
|
|
allocation->resolution_and_frame_rate_is_valid = false;
|
|
return AllocationIsValid(*allocation);
|
|
}
|
|
|
|
if (read_at + 5 * allocation->active_spatial_layers.size() != end) {
|
|
// data is left, but it size is not what can be used for resolutions and
|
|
// framerates.
|
|
return false;
|
|
}
|
|
allocation->resolution_and_frame_rate_is_valid = true;
|
|
for (auto& layer : allocation->active_spatial_layers) {
|
|
layer.width = 1 + ByteReader<uint16_t, 2>::ReadBigEndian(read_at);
|
|
read_at += 2;
|
|
layer.height = 1 + ByteReader<uint16_t, 2>::ReadBigEndian(read_at);
|
|
read_at += 2;
|
|
layer.frame_rate_fps = *read_at;
|
|
++read_at;
|
|
}
|
|
|
|
return AllocationIsValid(*allocation);
|
|
}
|
|
|
|
size_t RtpVideoLayersAllocationExtension::ValueSize(
|
|
const VideoLayersAllocation& allocation) {
|
|
if (allocation.active_spatial_layers.empty()) {
|
|
return 1;
|
|
}
|
|
size_t result = 1; // header
|
|
SpatialLayersBitmasks slb = SpatialLayersBitmasksPerRtpStream(allocation);
|
|
if (!slb.bitmasks_are_the_same) {
|
|
++result;
|
|
if (slb.max_rtp_stream_id >= 2) {
|
|
++result;
|
|
}
|
|
}
|
|
// 2 bits per active spatial layer, rounded up to full byte, i.e.
|
|
// 0.25 byte per active spatial layer.
|
|
result += (allocation.active_spatial_layers.size() + 3) / 4;
|
|
for (const auto& spatial_layer : allocation.active_spatial_layers) {
|
|
for (DataRate value : spatial_layer.target_bitrate_per_temporal_layer) {
|
|
result += Leb128Size(value.kbps());
|
|
}
|
|
}
|
|
if (allocation.resolution_and_frame_rate_is_valid) {
|
|
result += 5 * allocation.active_spatial_layers.size();
|
|
}
|
|
return result;
|
|
}
|
|
|
|
} // namespace webrtc
|