mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-12 21:30:45 +01:00
Update to use Opus 1.5
This commit is contained in:
parent
ed3f2f4c8a
commit
a170a82bb0
3 changed files with 50 additions and 177 deletions
4
DEPS
4
DEPS
|
@ -48,9 +48,9 @@ vars = {
|
|||
}
|
||||
|
||||
deps = {
|
||||
# RingRTC change to use a fork of opus
|
||||
# RingRTC change to use a the upstream xiph opus
|
||||
'src/ringrtc/opus/src':
|
||||
'https://github.com/signalapp/opus.git@593419e833acab4d15b4901fe156177fb7315468',
|
||||
'https://github.com/xiph/opus.git@0e30966b198ad28943799eaf5b3b08100b6f70c3',
|
||||
|
||||
# TODO(kjellander): Move this to be Android-only.
|
||||
'src/base':
|
||||
|
|
|
@ -622,7 +622,19 @@ AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl(
|
|||
// After 20 DTX frames (MAX_CONSECUTIVE_DTX) Opus will send a frame
|
||||
// coding the background noise. Avoid flagging this frame as speech
|
||||
// (even though there is a probability of the frame being speech).
|
||||
info.speech = IsPacketSpeech(info.encoded_bytes, encoded->data());
|
||||
// RingRTC change to detect if an encoded packet contains speech or not.
|
||||
if (WebRtcOpus_GetInDtx(inst_) == 0) {
|
||||
info.speech = true;
|
||||
consecutive_dtx_frames_ = 0;
|
||||
} else {
|
||||
// Handle the case where the encoder is now in DTX mode but there might be a speech frame in the packet.
|
||||
if (consecutive_dtx_frames_ == 0 && info.encoded_bytes > 2) {
|
||||
info.speech = true;
|
||||
} else {
|
||||
info.speech = false;
|
||||
}
|
||||
consecutive_dtx_frames_ += 1;
|
||||
}
|
||||
|
||||
info.encoder_type = CodecType::kOpus;
|
||||
|
||||
|
@ -632,177 +644,6 @@ AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl(
|
|||
return info;
|
||||
}
|
||||
|
||||
// RingRTC change to detect if an encoded packet contains speech or not.
|
||||
// Generally, if the last frame in the packet is audio, it is speech, unless
|
||||
// it is a DTX refresh frame. This function follows RFC-6716 to check frames
|
||||
// in each encoded packet.
|
||||
bool AudioEncoderOpusImpl::IsPacketSpeech(
|
||||
int encoded_bytes,
|
||||
const uint8_t* encoded) {
|
||||
bool speech = false;
|
||||
|
||||
// If the encoder returns 0, 1, or 2 encoded bytes, by definition, the packet
|
||||
// contains only DTX frame(s). 0 is the special case in which
|
||||
// opus_interface.cc detected consecutive DTX packets and is instructing
|
||||
// WebRTC not to send any packet out over the wire.
|
||||
bool dtx_packet = (encoded_bytes <= 2);
|
||||
|
||||
if (config_.frame_size_ms > 20) {
|
||||
// For packet times greater than 20ms, Opus will encode a group of 20ms
|
||||
// frames and combine them into a 'packet' with a TOC.
|
||||
if (dtx_packet) {
|
||||
// The 'packet' contains only DTX frames.
|
||||
consecutive_dtx_frames_ += config_.frame_size_ms / 20;
|
||||
} else {
|
||||
// The 'packet' contains at least one non-DTX frame.
|
||||
if (((encoded[0] & 0x98) == 0x08) || // config values of 1, 5, 9, and 13
|
||||
((encoded[0] & 0x78) == 0x78) || // config value of 15
|
||||
((encoded[0] & 0x98) == 0x98)) { // config values of 19, 23, 27, and 31
|
||||
// The TOC indicates a packet with 20ms frames.
|
||||
int code = encoded[0] & 0x03;
|
||||
if (code == 0) {
|
||||
// Code 0: 1 frame in the packet
|
||||
// This case is unlikely for DTX.
|
||||
consecutive_dtx_frames_ = 0;
|
||||
speech = true;
|
||||
} else if (code == 1) {
|
||||
// Code 1: 2 frames in the packet, each with equal compressed size
|
||||
// If both frames were DTX, we would not reach here.
|
||||
consecutive_dtx_frames_ = 0;
|
||||
speech = true;
|
||||
} else if (code == 2) {
|
||||
// Code 2: 2 frames in the packet, with different compressed sizes
|
||||
int header_bytes = 2;
|
||||
int size_of_first_frame = encoded[1];
|
||||
if (size_of_first_frame > 251) {
|
||||
size_of_first_frame += encoded[2] * 4;
|
||||
header_bytes = 3;
|
||||
}
|
||||
int size_of_second_frame = encoded_bytes - size_of_first_frame - header_bytes;
|
||||
if (size_of_first_frame > 0 && size_of_second_frame > 0) {
|
||||
// The second frame has to be speech.
|
||||
consecutive_dtx_frames_ = 0;
|
||||
speech = true;
|
||||
} else if (size_of_first_frame == 0 && size_of_second_frame > 0) {
|
||||
// Second frame may or may not be DTX refresh.
|
||||
speech = (consecutive_dtx_frames_ + 1) != 20;
|
||||
consecutive_dtx_frames_ = 0;
|
||||
} else if (size_of_first_frame > 0 && size_of_second_frame == 0) {
|
||||
// First frame may or may not be DTX refresh.
|
||||
consecutive_dtx_frames_ = 1;
|
||||
} else {
|
||||
// Both frames are size 0/DTX, should not reach here.
|
||||
consecutive_dtx_frames_ += 2;
|
||||
}
|
||||
} else if (code == 3) {
|
||||
// Code 3: an arbitrary number of frames in the packet
|
||||
bool variable = (encoded[1] & 0x80) == 0x80;
|
||||
bool padding = (encoded[1] & 0x40) == 0x40;
|
||||
int M = encoded[1] & 0x3f;
|
||||
|
||||
int padding_header_bytes = 0;
|
||||
int padding_size = 0;
|
||||
if (padding) {
|
||||
if (encoded[2] == 0xff) {
|
||||
if (encoded_bytes < 4) {
|
||||
// The packet should be at least 4 bytes, reset.
|
||||
consecutive_dtx_frames_ = 0;
|
||||
return true;
|
||||
}
|
||||
padding_size = 254 + encoded[3];
|
||||
padding_header_bytes = 2;
|
||||
} else {
|
||||
padding_size = encoded[2];
|
||||
padding_header_bytes = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (variable) {
|
||||
// Frames in the packet have a variable size, a mix of audio and DTX.
|
||||
int offset = 2 + padding_header_bytes;
|
||||
int frame_header_bytes = 0;
|
||||
int total_size_of_frames = 0;
|
||||
|
||||
// Check the worst-case limits to be sure there is enough encoded
|
||||
// data to evaluate.
|
||||
if (encoded_bytes < offset + M * 2) {
|
||||
// Note: This assumes that actual encoded data is larger than
|
||||
// the guess of two bytes for each header... Reset.
|
||||
consecutive_dtx_frames_ = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// The only time we walk the packet header to check for dynamic frame
|
||||
// sizes. Only expected for packets with at least one DTX frame and
|
||||
// at least one audio/refresh frame.
|
||||
for (int frame = 0; frame < M - 1; frame++) {
|
||||
int frame_size = encoded[offset];
|
||||
if (frame_size > 251) {
|
||||
frame_size += encoded[++offset] * 4;
|
||||
frame_header_bytes += 2;
|
||||
} else {
|
||||
frame_header_bytes += 1;
|
||||
}
|
||||
|
||||
if (frame_size > 0) {
|
||||
// Could be speech or a DTX refresh frame. In either case,
|
||||
// reset the DTX count.
|
||||
consecutive_dtx_frames_ = 0;
|
||||
} else {
|
||||
// DTX frame.
|
||||
consecutive_dtx_frames_++;
|
||||
}
|
||||
|
||||
total_size_of_frames += frame_size;
|
||||
offset++;
|
||||
}
|
||||
|
||||
// Then, the last frame size should be:
|
||||
int frame_M_size = encoded_bytes - 2
|
||||
- (padding_header_bytes + padding_size)
|
||||
- (frame_header_bytes + total_size_of_frames);
|
||||
if (frame_M_size > 0) {
|
||||
// The packet is ending, could be speech or a DTX refresh frame.
|
||||
speech = consecutive_dtx_frames_ != 20;
|
||||
consecutive_dtx_frames_ = 0;
|
||||
} else if (frame_M_size == 0) {
|
||||
// The packet is ending on a DTX frame.
|
||||
consecutive_dtx_frames_++;
|
||||
} else {
|
||||
// Badly formatted packet, reset.
|
||||
consecutive_dtx_frames_ = 0;
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
// Frames in the packet have a constant size.
|
||||
int R = encoded_bytes - 2 - (padding_header_bytes + padding_size);
|
||||
if (R > 0) {
|
||||
// All frames are the same size and larger than zero, so they must
|
||||
// represent speech.
|
||||
consecutive_dtx_frames_ = 0;
|
||||
speech = true;
|
||||
} else {
|
||||
// All frames are DTX.
|
||||
consecutive_dtx_frames_ += M;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// The TOC indicates a packet with something other than 20ms frames.
|
||||
// This does not match the supported frame sizing, reset and consider
|
||||
// the packet to represent speech.
|
||||
consecutive_dtx_frames_ = 0;
|
||||
speech = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
speech = !dtx_packet && (consecutive_dtx_frames_ != 20);
|
||||
consecutive_dtx_frames_ = (dtx_packet) ? (consecutive_dtx_frames_ + 1) : (0);
|
||||
}
|
||||
|
||||
return speech;
|
||||
}
|
||||
|
||||
size_t AudioEncoderOpusImpl::Num10msFramesPerPacket() const {
|
||||
return static_cast<size_t>(rtc::CheckedDivExact(config_.frame_size_ms, 10));
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ import("//testing/test.gni")
|
|||
# If ARM optimizations shall be used to accelerate performance.
|
||||
use_opus_arm_optimization =
|
||||
current_cpu == "arm" ||
|
||||
(current_cpu == "arm64" && (is_fuchsia || is_ios || is_win))
|
||||
(current_cpu == "arm64" && (is_fuchsia || is_ios || is_win || is_mac))
|
||||
|
||||
# NaCl, unlike Chrome, doesn't target SSE2 minimum, so skip optimizations for
|
||||
# the sake of simplicity.
|
||||
|
@ -30,6 +30,7 @@ config("opus_private_config") {
|
|||
"OPUS_BUILD",
|
||||
"OPUS_EXPORT=",
|
||||
"ENABLE_HARDENING",
|
||||
"DISABLE_DEBUG_FLOAT",
|
||||
|
||||
# Prefer alloca() over variable length arrays which are often inefficient;
|
||||
# the opus code will automatically handle this correctly per-platform.
|
||||
|
@ -76,6 +77,7 @@ config("opus_private_config") {
|
|||
# Run Time CPU Detections (RTCD) is always enabled for x86.
|
||||
"OPUS_HAVE_RTCD",
|
||||
"CPU_INFO_BY_ASM",
|
||||
"FLOAT_APPROX",
|
||||
|
||||
# Chrome always targets SSE2+.
|
||||
"OPUS_X86_MAY_HAVE_SSE",
|
||||
|
@ -88,7 +90,7 @@ config("opus_private_config") {
|
|||
|
||||
# At present libopus has no AVX functions so no sources are add for this,
|
||||
# if you see linker errors on AVX code the this flag is why.
|
||||
"OPUS_X86_MAY_HAVE_AVX",
|
||||
"OPUS_X86_MAY_HAVE_AVX2",
|
||||
]
|
||||
}
|
||||
|
||||
|
@ -187,11 +189,39 @@ if (use_opus_x86_optimization) {
|
|||
":opus_config",
|
||||
]
|
||||
|
||||
if (!is_debug) {
|
||||
configs -= [ "//build/config/compiler:default_optimization" ]
|
||||
configs += [ "//build/config/compiler:optimize_speed" ]
|
||||
}
|
||||
|
||||
if (!is_win || is_clang) {
|
||||
cflags = [ "-msse4.1" ]
|
||||
}
|
||||
}
|
||||
# TODO(dalecurtis): If libopus ever adds AVX support, add an opus_avx block.
|
||||
|
||||
source_set("opus_avx2") {
|
||||
sources = [
|
||||
"src/celt/x86/pitch_avx.c",
|
||||
"src/silk/x86/NSQ_del_dec_avx2.c",
|
||||
"src/silk/float/x86/inner_product_FLP_avx2.c",
|
||||
]
|
||||
|
||||
configs -= [ "//build/config/compiler:chromium_code" ]
|
||||
configs += [ "//build/config/compiler:no_chromium_code" ]
|
||||
configs += [
|
||||
":opus_private_config",
|
||||
":opus_config",
|
||||
]
|
||||
|
||||
if (!is_debug) {
|
||||
configs -= [ "//build/config/compiler:default_optimization" ]
|
||||
configs += [ "//build/config/compiler:optimize_speed" ]
|
||||
}
|
||||
|
||||
if (!is_win || is_clang) {
|
||||
cflags = [ "-mavx", "-mfma", "-mavx2" ]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Note: Do not add any defines or include_dirs to this target, those should all
|
||||
|
@ -350,6 +380,7 @@ static_library("opus") {
|
|||
"src/silk/typedef.h",
|
||||
"src/src/analysis.c",
|
||||
"src/src/analysis.h",
|
||||
"src/src/extensions.c",
|
||||
"src/src/mapping_matrix.c",
|
||||
"src/src/mapping_matrix.h",
|
||||
"src/src/mlp.c",
|
||||
|
@ -437,6 +468,7 @@ static_library("opus") {
|
|||
"src/silk/x86/x86_silk_map.c",
|
||||
]
|
||||
deps += [ ":opus_sse41" ]
|
||||
deps += [ ":opus_avx2" ]
|
||||
}
|
||||
|
||||
if (use_opus_arm_optimization) {
|
||||
|
|
Loading…
Reference in a new issue