From a170a82bb00862eea7ff31c7948dc903a567e9c3 Mon Sep 17 00:00:00 2001 From: Jim Gustafson Date: Fri, 5 Apr 2024 14:07:50 -0700 Subject: [PATCH] Update to use Opus 1.5 --- DEPS | 4 +- .../codecs/opus/audio_encoder_opus.cc | 185 ++---------------- ringrtc/opus/BUILD.gn | 38 +++- 3 files changed, 50 insertions(+), 177 deletions(-) diff --git a/DEPS b/DEPS index f3330968ef..d9e653f10a 100644 --- a/DEPS +++ b/DEPS @@ -48,9 +48,9 @@ vars = { } deps = { - # RingRTC change to use a fork of opus + # RingRTC change to use a the upstream xiph opus 'src/ringrtc/opus/src': - 'https://github.com/signalapp/opus.git@593419e833acab4d15b4901fe156177fb7315468', + 'https://github.com/xiph/opus.git@0e30966b198ad28943799eaf5b3b08100b6f70c3', # TODO(kjellander): Move this to be Android-only. 'src/base': diff --git a/modules/audio_coding/codecs/opus/audio_encoder_opus.cc b/modules/audio_coding/codecs/opus/audio_encoder_opus.cc index db8275617d..37b5191122 100644 --- a/modules/audio_coding/codecs/opus/audio_encoder_opus.cc +++ b/modules/audio_coding/codecs/opus/audio_encoder_opus.cc @@ -622,7 +622,19 @@ AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl( // After 20 DTX frames (MAX_CONSECUTIVE_DTX) Opus will send a frame // coding the background noise. Avoid flagging this frame as speech // (even though there is a probability of the frame being speech). - info.speech = IsPacketSpeech(info.encoded_bytes, encoded->data()); + // RingRTC change to detect if an encoded packet contains speech or not. + if (WebRtcOpus_GetInDtx(inst_) == 0) { + info.speech = true; + consecutive_dtx_frames_ = 0; + } else { + // Handle the case where the encoder is now in DTX mode but there might be a speech frame in the packet. + if (consecutive_dtx_frames_ == 0 && info.encoded_bytes > 2) { + info.speech = true; + } else { + info.speech = false; + } + consecutive_dtx_frames_ += 1; + } info.encoder_type = CodecType::kOpus; @@ -632,177 +644,6 @@ AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl( return info; } -// RingRTC change to detect if an encoded packet contains speech or not. -// Generally, if the last frame in the packet is audio, it is speech, unless -// it is a DTX refresh frame. This function follows RFC-6716 to check frames -// in each encoded packet. -bool AudioEncoderOpusImpl::IsPacketSpeech( - int encoded_bytes, - const uint8_t* encoded) { - bool speech = false; - - // If the encoder returns 0, 1, or 2 encoded bytes, by definition, the packet - // contains only DTX frame(s). 0 is the special case in which - // opus_interface.cc detected consecutive DTX packets and is instructing - // WebRTC not to send any packet out over the wire. - bool dtx_packet = (encoded_bytes <= 2); - - if (config_.frame_size_ms > 20) { - // For packet times greater than 20ms, Opus will encode a group of 20ms - // frames and combine them into a 'packet' with a TOC. - if (dtx_packet) { - // The 'packet' contains only DTX frames. - consecutive_dtx_frames_ += config_.frame_size_ms / 20; - } else { - // The 'packet' contains at least one non-DTX frame. - if (((encoded[0] & 0x98) == 0x08) || // config values of 1, 5, 9, and 13 - ((encoded[0] & 0x78) == 0x78) || // config value of 15 - ((encoded[0] & 0x98) == 0x98)) { // config values of 19, 23, 27, and 31 - // The TOC indicates a packet with 20ms frames. - int code = encoded[0] & 0x03; - if (code == 0) { - // Code 0: 1 frame in the packet - // This case is unlikely for DTX. - consecutive_dtx_frames_ = 0; - speech = true; - } else if (code == 1) { - // Code 1: 2 frames in the packet, each with equal compressed size - // If both frames were DTX, we would not reach here. - consecutive_dtx_frames_ = 0; - speech = true; - } else if (code == 2) { - // Code 2: 2 frames in the packet, with different compressed sizes - int header_bytes = 2; - int size_of_first_frame = encoded[1]; - if (size_of_first_frame > 251) { - size_of_first_frame += encoded[2] * 4; - header_bytes = 3; - } - int size_of_second_frame = encoded_bytes - size_of_first_frame - header_bytes; - if (size_of_first_frame > 0 && size_of_second_frame > 0) { - // The second frame has to be speech. - consecutive_dtx_frames_ = 0; - speech = true; - } else if (size_of_first_frame == 0 && size_of_second_frame > 0) { - // Second frame may or may not be DTX refresh. - speech = (consecutive_dtx_frames_ + 1) != 20; - consecutive_dtx_frames_ = 0; - } else if (size_of_first_frame > 0 && size_of_second_frame == 0) { - // First frame may or may not be DTX refresh. - consecutive_dtx_frames_ = 1; - } else { - // Both frames are size 0/DTX, should not reach here. - consecutive_dtx_frames_ += 2; - } - } else if (code == 3) { - // Code 3: an arbitrary number of frames in the packet - bool variable = (encoded[1] & 0x80) == 0x80; - bool padding = (encoded[1] & 0x40) == 0x40; - int M = encoded[1] & 0x3f; - - int padding_header_bytes = 0; - int padding_size = 0; - if (padding) { - if (encoded[2] == 0xff) { - if (encoded_bytes < 4) { - // The packet should be at least 4 bytes, reset. - consecutive_dtx_frames_ = 0; - return true; - } - padding_size = 254 + encoded[3]; - padding_header_bytes = 2; - } else { - padding_size = encoded[2]; - padding_header_bytes = 1; - } - } - - if (variable) { - // Frames in the packet have a variable size, a mix of audio and DTX. - int offset = 2 + padding_header_bytes; - int frame_header_bytes = 0; - int total_size_of_frames = 0; - - // Check the worst-case limits to be sure there is enough encoded - // data to evaluate. - if (encoded_bytes < offset + M * 2) { - // Note: This assumes that actual encoded data is larger than - // the guess of two bytes for each header... Reset. - consecutive_dtx_frames_ = 0; - return true; - } - - // The only time we walk the packet header to check for dynamic frame - // sizes. Only expected for packets with at least one DTX frame and - // at least one audio/refresh frame. - for (int frame = 0; frame < M - 1; frame++) { - int frame_size = encoded[offset]; - if (frame_size > 251) { - frame_size += encoded[++offset] * 4; - frame_header_bytes += 2; - } else { - frame_header_bytes += 1; - } - - if (frame_size > 0) { - // Could be speech or a DTX refresh frame. In either case, - // reset the DTX count. - consecutive_dtx_frames_ = 0; - } else { - // DTX frame. - consecutive_dtx_frames_++; - } - - total_size_of_frames += frame_size; - offset++; - } - - // Then, the last frame size should be: - int frame_M_size = encoded_bytes - 2 - - (padding_header_bytes + padding_size) - - (frame_header_bytes + total_size_of_frames); - if (frame_M_size > 0) { - // The packet is ending, could be speech or a DTX refresh frame. - speech = consecutive_dtx_frames_ != 20; - consecutive_dtx_frames_ = 0; - } else if (frame_M_size == 0) { - // The packet is ending on a DTX frame. - consecutive_dtx_frames_++; - } else { - // Badly formatted packet, reset. - consecutive_dtx_frames_ = 0; - return true; - } - } else { - // Frames in the packet have a constant size. - int R = encoded_bytes - 2 - (padding_header_bytes + padding_size); - if (R > 0) { - // All frames are the same size and larger than zero, so they must - // represent speech. - consecutive_dtx_frames_ = 0; - speech = true; - } else { - // All frames are DTX. - consecutive_dtx_frames_ += M; - } - } - } - } else { - // The TOC indicates a packet with something other than 20ms frames. - // This does not match the supported frame sizing, reset and consider - // the packet to represent speech. - consecutive_dtx_frames_ = 0; - speech = true; - } - } - } else { - speech = !dtx_packet && (consecutive_dtx_frames_ != 20); - consecutive_dtx_frames_ = (dtx_packet) ? (consecutive_dtx_frames_ + 1) : (0); - } - - return speech; -} - size_t AudioEncoderOpusImpl::Num10msFramesPerPacket() const { return static_cast(rtc::CheckedDivExact(config_.frame_size_ms, 10)); } diff --git a/ringrtc/opus/BUILD.gn b/ringrtc/opus/BUILD.gn index d0ee667956..3b3dc5d746 100644 --- a/ringrtc/opus/BUILD.gn +++ b/ringrtc/opus/BUILD.gn @@ -8,7 +8,7 @@ import("//testing/test.gni") # If ARM optimizations shall be used to accelerate performance. use_opus_arm_optimization = current_cpu == "arm" || - (current_cpu == "arm64" && (is_fuchsia || is_ios || is_win)) + (current_cpu == "arm64" && (is_fuchsia || is_ios || is_win || is_mac)) # NaCl, unlike Chrome, doesn't target SSE2 minimum, so skip optimizations for # the sake of simplicity. @@ -30,6 +30,7 @@ config("opus_private_config") { "OPUS_BUILD", "OPUS_EXPORT=", "ENABLE_HARDENING", + "DISABLE_DEBUG_FLOAT", # Prefer alloca() over variable length arrays which are often inefficient; # the opus code will automatically handle this correctly per-platform. @@ -76,6 +77,7 @@ config("opus_private_config") { # Run Time CPU Detections (RTCD) is always enabled for x86. "OPUS_HAVE_RTCD", "CPU_INFO_BY_ASM", + "FLOAT_APPROX", # Chrome always targets SSE2+. "OPUS_X86_MAY_HAVE_SSE", @@ -88,7 +90,7 @@ config("opus_private_config") { # At present libopus has no AVX functions so no sources are add for this, # if you see linker errors on AVX code the this flag is why. - "OPUS_X86_MAY_HAVE_AVX", + "OPUS_X86_MAY_HAVE_AVX2", ] } @@ -187,11 +189,39 @@ if (use_opus_x86_optimization) { ":opus_config", ] + if (!is_debug) { + configs -= [ "//build/config/compiler:default_optimization" ] + configs += [ "//build/config/compiler:optimize_speed" ] + } + if (!is_win || is_clang) { cflags = [ "-msse4.1" ] } } - # TODO(dalecurtis): If libopus ever adds AVX support, add an opus_avx block. + + source_set("opus_avx2") { + sources = [ + "src/celt/x86/pitch_avx.c", + "src/silk/x86/NSQ_del_dec_avx2.c", + "src/silk/float/x86/inner_product_FLP_avx2.c", + ] + + configs -= [ "//build/config/compiler:chromium_code" ] + configs += [ "//build/config/compiler:no_chromium_code" ] + configs += [ + ":opus_private_config", + ":opus_config", + ] + + if (!is_debug) { + configs -= [ "//build/config/compiler:default_optimization" ] + configs += [ "//build/config/compiler:optimize_speed" ] + } + + if (!is_win || is_clang) { + cflags = [ "-mavx", "-mfma", "-mavx2" ] + } + } } # Note: Do not add any defines or include_dirs to this target, those should all @@ -350,6 +380,7 @@ static_library("opus") { "src/silk/typedef.h", "src/src/analysis.c", "src/src/analysis.h", + "src/src/extensions.c", "src/src/mapping_matrix.c", "src/src/mapping_matrix.h", "src/src/mlp.c", @@ -437,6 +468,7 @@ static_library("opus") { "src/silk/x86/x86_silk_map.c", ] deps += [ ":opus_sse41" ] + deps += [ ":opus_avx2" ] } if (use_opus_arm_optimization) {