webrtc/system_wrappers/source/cpu_features.cc
Zhaoliang Ma 1ca8d87239 Support AVX2/FMA intrinsics in Audio Resampler module
From the test result, using AVX2/FMA is 1.60x faster than SSE on atlas.

Bug: webrtc:11663
Test: common_audio_unittests on atlas and octopus.
Change-Id: Ibd45ea46aa97d5790a24e5116f741592b95f6416
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/176382
Reviewed-by: Per Åhgren <peah@webrtc.org>
Reviewed-by: Henrik Andreassson <henrika@webrtc.org>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Commit-Queue: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#31810}
2020-07-30 11:39:38 +00:00

105 lines
3 KiB
C++

/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Parts of this file derived from Chromium's base/cpu.cc.
#include "rtc_base/system/arch.h"
#include "system_wrappers/include/cpu_features_wrapper.h"
#if defined(WEBRTC_ARCH_X86_FAMILY) && defined(_MSC_VER)
#include <intrin.h>
#endif
// No CPU feature is available => straight C path.
int GetCPUInfoNoASM(CPUFeature feature) {
(void)feature;
return 0;
}
#if defined(WEBRTC_ARCH_X86_FAMILY)
// xgetbv returns the value of an Intel Extended Control Register (XCR).
// Currently only XCR0 is defined by Intel so |xcr| should always be zero.
uint64_t xgetbv(uint32_t xcr) {
#if defined(_MSC_VER)
return _xgetbv(xcr);
#else
uint32_t eax, edx;
__asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
return (static_cast<uint64_t>(edx) << 32) | eax;
#endif // _MSC_VER
}
#ifndef _MSC_VER
// Intrinsic for "cpuid".
#if defined(__pic__) && defined(__i386__)
static inline void __cpuid(int cpu_info[4], int info_type) {
__asm__ volatile(
"mov %%ebx, %%edi\n"
"cpuid\n"
"xchg %%edi, %%ebx\n"
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]),
"=d"(cpu_info[3])
: "a"(info_type));
}
#else
static inline void __cpuid(int cpu_info[4], int info_type) {
__asm__ volatile("cpuid\n"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]),
"=d"(cpu_info[3])
: "a"(info_type), "c"(0));
}
#endif
#endif // _MSC_VER
#endif // WEBRTC_ARCH_X86_FAMILY
#if defined(WEBRTC_ARCH_X86_FAMILY)
// Actual feature detection for x86.
static int GetCPUInfo(CPUFeature feature) {
int cpu_info[4];
__cpuid(cpu_info, 0);
int num_ids = cpu_info[0];
__cpuid(cpu_info, 1);
if (feature == kSSE2) {
return 0 != (cpu_info[3] & 0x04000000);
}
if (feature == kSSE3) {
return 0 != (cpu_info[2] & 0x00000001);
}
if (feature == kAVX2) {
// Interpret CPU feature information.
int cpu_info7[4] = {-1};
if (num_ids >= 7) {
__cpuid(cpu_info7, 7);
}
#if defined(WEBRTC_ENABLE_AVX2)
return (cpu_info[2] & 0x10000000) != 0 &&
(cpu_info[2] & 0x04000000) != 0 /* XSAVE */ &&
(cpu_info[2] & 0x08000000) != 0 /* OSXSAVE */ &&
(xgetbv(0) & 0x00000006) == 6 /* XSAVE enabled by kernel */ &&
(cpu_info7[1] & 0x00000020) != 0;
#else
return 0;
#endif // WEBRTC_ENABLE_AVX2
}
return 0;
}
#else
// Default to straight C for other platforms.
static int GetCPUInfo(CPUFeature feature) {
(void)feature;
return 0;
}
#endif
WebRtc_CPUInfo WebRtc_GetCPUInfo = GetCPUInfo;
WebRtc_CPUInfo WebRtc_GetCPUInfoNoASM = GetCPUInfoNoASM;