Update to use Opus 1.5

This commit is contained in:
Jim Gustafson 2024-04-05 14:07:50 -07:00 committed by GitHub
parent ed3f2f4c8a
commit a170a82bb0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 50 additions and 177 deletions

4
DEPS
View file

@ -48,9 +48,9 @@ vars = {
}
deps = {
# RingRTC change to use a fork of opus
# RingRTC change to use a the upstream xiph opus
'src/ringrtc/opus/src':
'https://github.com/signalapp/opus.git@593419e833acab4d15b4901fe156177fb7315468',
'https://github.com/xiph/opus.git@0e30966b198ad28943799eaf5b3b08100b6f70c3',
# TODO(kjellander): Move this to be Android-only.
'src/base':

View file

@ -622,7 +622,19 @@ AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl(
// After 20 DTX frames (MAX_CONSECUTIVE_DTX) Opus will send a frame
// coding the background noise. Avoid flagging this frame as speech
// (even though there is a probability of the frame being speech).
info.speech = IsPacketSpeech(info.encoded_bytes, encoded->data());
// RingRTC change to detect if an encoded packet contains speech or not.
if (WebRtcOpus_GetInDtx(inst_) == 0) {
info.speech = true;
consecutive_dtx_frames_ = 0;
} else {
// Handle the case where the encoder is now in DTX mode but there might be a speech frame in the packet.
if (consecutive_dtx_frames_ == 0 && info.encoded_bytes > 2) {
info.speech = true;
} else {
info.speech = false;
}
consecutive_dtx_frames_ += 1;
}
info.encoder_type = CodecType::kOpus;
@ -632,177 +644,6 @@ AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl(
return info;
}
// RingRTC change to detect if an encoded packet contains speech or not.
// Generally, if the last frame in the packet is audio, it is speech, unless
// it is a DTX refresh frame. This function follows RFC-6716 to check frames
// in each encoded packet.
bool AudioEncoderOpusImpl::IsPacketSpeech(
int encoded_bytes,
const uint8_t* encoded) {
bool speech = false;
// If the encoder returns 0, 1, or 2 encoded bytes, by definition, the packet
// contains only DTX frame(s). 0 is the special case in which
// opus_interface.cc detected consecutive DTX packets and is instructing
// WebRTC not to send any packet out over the wire.
bool dtx_packet = (encoded_bytes <= 2);
if (config_.frame_size_ms > 20) {
// For packet times greater than 20ms, Opus will encode a group of 20ms
// frames and combine them into a 'packet' with a TOC.
if (dtx_packet) {
// The 'packet' contains only DTX frames.
consecutive_dtx_frames_ += config_.frame_size_ms / 20;
} else {
// The 'packet' contains at least one non-DTX frame.
if (((encoded[0] & 0x98) == 0x08) || // config values of 1, 5, 9, and 13
((encoded[0] & 0x78) == 0x78) || // config value of 15
((encoded[0] & 0x98) == 0x98)) { // config values of 19, 23, 27, and 31
// The TOC indicates a packet with 20ms frames.
int code = encoded[0] & 0x03;
if (code == 0) {
// Code 0: 1 frame in the packet
// This case is unlikely for DTX.
consecutive_dtx_frames_ = 0;
speech = true;
} else if (code == 1) {
// Code 1: 2 frames in the packet, each with equal compressed size
// If both frames were DTX, we would not reach here.
consecutive_dtx_frames_ = 0;
speech = true;
} else if (code == 2) {
// Code 2: 2 frames in the packet, with different compressed sizes
int header_bytes = 2;
int size_of_first_frame = encoded[1];
if (size_of_first_frame > 251) {
size_of_first_frame += encoded[2] * 4;
header_bytes = 3;
}
int size_of_second_frame = encoded_bytes - size_of_first_frame - header_bytes;
if (size_of_first_frame > 0 && size_of_second_frame > 0) {
// The second frame has to be speech.
consecutive_dtx_frames_ = 0;
speech = true;
} else if (size_of_first_frame == 0 && size_of_second_frame > 0) {
// Second frame may or may not be DTX refresh.
speech = (consecutive_dtx_frames_ + 1) != 20;
consecutive_dtx_frames_ = 0;
} else if (size_of_first_frame > 0 && size_of_second_frame == 0) {
// First frame may or may not be DTX refresh.
consecutive_dtx_frames_ = 1;
} else {
// Both frames are size 0/DTX, should not reach here.
consecutive_dtx_frames_ += 2;
}
} else if (code == 3) {
// Code 3: an arbitrary number of frames in the packet
bool variable = (encoded[1] & 0x80) == 0x80;
bool padding = (encoded[1] & 0x40) == 0x40;
int M = encoded[1] & 0x3f;
int padding_header_bytes = 0;
int padding_size = 0;
if (padding) {
if (encoded[2] == 0xff) {
if (encoded_bytes < 4) {
// The packet should be at least 4 bytes, reset.
consecutive_dtx_frames_ = 0;
return true;
}
padding_size = 254 + encoded[3];
padding_header_bytes = 2;
} else {
padding_size = encoded[2];
padding_header_bytes = 1;
}
}
if (variable) {
// Frames in the packet have a variable size, a mix of audio and DTX.
int offset = 2 + padding_header_bytes;
int frame_header_bytes = 0;
int total_size_of_frames = 0;
// Check the worst-case limits to be sure there is enough encoded
// data to evaluate.
if (encoded_bytes < offset + M * 2) {
// Note: This assumes that actual encoded data is larger than
// the guess of two bytes for each header... Reset.
consecutive_dtx_frames_ = 0;
return true;
}
// The only time we walk the packet header to check for dynamic frame
// sizes. Only expected for packets with at least one DTX frame and
// at least one audio/refresh frame.
for (int frame = 0; frame < M - 1; frame++) {
int frame_size = encoded[offset];
if (frame_size > 251) {
frame_size += encoded[++offset] * 4;
frame_header_bytes += 2;
} else {
frame_header_bytes += 1;
}
if (frame_size > 0) {
// Could be speech or a DTX refresh frame. In either case,
// reset the DTX count.
consecutive_dtx_frames_ = 0;
} else {
// DTX frame.
consecutive_dtx_frames_++;
}
total_size_of_frames += frame_size;
offset++;
}
// Then, the last frame size should be:
int frame_M_size = encoded_bytes - 2
- (padding_header_bytes + padding_size)
- (frame_header_bytes + total_size_of_frames);
if (frame_M_size > 0) {
// The packet is ending, could be speech or a DTX refresh frame.
speech = consecutive_dtx_frames_ != 20;
consecutive_dtx_frames_ = 0;
} else if (frame_M_size == 0) {
// The packet is ending on a DTX frame.
consecutive_dtx_frames_++;
} else {
// Badly formatted packet, reset.
consecutive_dtx_frames_ = 0;
return true;
}
} else {
// Frames in the packet have a constant size.
int R = encoded_bytes - 2 - (padding_header_bytes + padding_size);
if (R > 0) {
// All frames are the same size and larger than zero, so they must
// represent speech.
consecutive_dtx_frames_ = 0;
speech = true;
} else {
// All frames are DTX.
consecutive_dtx_frames_ += M;
}
}
}
} else {
// The TOC indicates a packet with something other than 20ms frames.
// This does not match the supported frame sizing, reset and consider
// the packet to represent speech.
consecutive_dtx_frames_ = 0;
speech = true;
}
}
} else {
speech = !dtx_packet && (consecutive_dtx_frames_ != 20);
consecutive_dtx_frames_ = (dtx_packet) ? (consecutive_dtx_frames_ + 1) : (0);
}
return speech;
}
size_t AudioEncoderOpusImpl::Num10msFramesPerPacket() const {
return static_cast<size_t>(rtc::CheckedDivExact(config_.frame_size_ms, 10));
}

View file

@ -8,7 +8,7 @@ import("//testing/test.gni")
# If ARM optimizations shall be used to accelerate performance.
use_opus_arm_optimization =
current_cpu == "arm" ||
(current_cpu == "arm64" && (is_fuchsia || is_ios || is_win))
(current_cpu == "arm64" && (is_fuchsia || is_ios || is_win || is_mac))
# NaCl, unlike Chrome, doesn't target SSE2 minimum, so skip optimizations for
# the sake of simplicity.
@ -30,6 +30,7 @@ config("opus_private_config") {
"OPUS_BUILD",
"OPUS_EXPORT=",
"ENABLE_HARDENING",
"DISABLE_DEBUG_FLOAT",
# Prefer alloca() over variable length arrays which are often inefficient;
# the opus code will automatically handle this correctly per-platform.
@ -76,6 +77,7 @@ config("opus_private_config") {
# Run Time CPU Detections (RTCD) is always enabled for x86.
"OPUS_HAVE_RTCD",
"CPU_INFO_BY_ASM",
"FLOAT_APPROX",
# Chrome always targets SSE2+.
"OPUS_X86_MAY_HAVE_SSE",
@ -88,7 +90,7 @@ config("opus_private_config") {
# At present libopus has no AVX functions so no sources are add for this,
# if you see linker errors on AVX code the this flag is why.
"OPUS_X86_MAY_HAVE_AVX",
"OPUS_X86_MAY_HAVE_AVX2",
]
}
@ -187,11 +189,39 @@ if (use_opus_x86_optimization) {
":opus_config",
]
if (!is_debug) {
configs -= [ "//build/config/compiler:default_optimization" ]
configs += [ "//build/config/compiler:optimize_speed" ]
}
if (!is_win || is_clang) {
cflags = [ "-msse4.1" ]
}
}
# TODO(dalecurtis): If libopus ever adds AVX support, add an opus_avx block.
source_set("opus_avx2") {
sources = [
"src/celt/x86/pitch_avx.c",
"src/silk/x86/NSQ_del_dec_avx2.c",
"src/silk/float/x86/inner_product_FLP_avx2.c",
]
configs -= [ "//build/config/compiler:chromium_code" ]
configs += [ "//build/config/compiler:no_chromium_code" ]
configs += [
":opus_private_config",
":opus_config",
]
if (!is_debug) {
configs -= [ "//build/config/compiler:default_optimization" ]
configs += [ "//build/config/compiler:optimize_speed" ]
}
if (!is_win || is_clang) {
cflags = [ "-mavx", "-mfma", "-mavx2" ]
}
}
}
# Note: Do not add any defines or include_dirs to this target, those should all
@ -350,6 +380,7 @@ static_library("opus") {
"src/silk/typedef.h",
"src/src/analysis.c",
"src/src/analysis.h",
"src/src/extensions.c",
"src/src/mapping_matrix.c",
"src/src/mapping_matrix.h",
"src/src/mlp.c",
@ -437,6 +468,7 @@ static_library("opus") {
"src/silk/x86/x86_silk_map.c",
]
deps += [ ":opus_sse41" ]
deps += [ ":opus_avx2" ]
}
if (use_opus_arm_optimization) {