mirror of
https://github.com/mollyim/webrtc.git
synced 2025-05-14 14:20:45 +01:00

Now that we have moved WebRTC from src/webrtc to src/, common_types.h and typedefs.h are triggering a cpplint error. The cpplint complaint is: Include the directory when naming .h files [build/include] [4] This CL disables the error but we have to remove these two headers from the root directory. NOPRESUBMIT=true Bug: webrtc:5876 Change-Id: I08e1b69aadcc4b28ab83bf25e3819d135d41d333 Reviewed-on: https://webrtc-review.googlesource.com/1577 Commit-Queue: Mirko Bonadei <mbonadei@webrtc.org> Reviewed-by: Henrik Kjellander <kjellander@google.com> Reviewed-by: Karl Wiberg <kwiberg@webrtc.org> Cr-Commit-Position: refs/heads/master@{#19859}
329 lines
18 KiB
C
329 lines
18 KiB
C
/*
|
|
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include <stddef.h>
|
|
|
|
#include "modules/audio_coding/codecs/isac/fix/source/settings.h"
|
|
#include "typedefs.h" // NOLINT(build/include)
|
|
|
|
// Filter ar_g_Q0[] and ar_f_Q0[] through an AR filter with coefficients
|
|
// cth_Q15[] and sth_Q15[].
|
|
void WebRtcIsacfix_FilterArLoop(int16_t* ar_g_Q0, // Input samples
|
|
int16_t* ar_f_Q0, // Input samples
|
|
int16_t* cth_Q15, // Filter coefficients
|
|
int16_t* sth_Q15, // Filter coefficients
|
|
size_t order_coef) { // order of the filter
|
|
int n = 0;
|
|
|
|
for (n = 0; n < HALF_SUBFRAMELEN - 1; n++) {
|
|
int count = (int)(order_coef - 1);
|
|
int offset;
|
|
#if !defined(MIPS_DSP_R1_LE)
|
|
int16_t* tmp_cth;
|
|
int16_t* tmp_sth;
|
|
int16_t* tmp_arg;
|
|
int32_t max_q16 = 0x7fff;
|
|
int32_t min_q16 = 0xffff8000;
|
|
#endif
|
|
// Declare variables used as temporary registers.
|
|
int32_t r0, r1, r2, t0, t1, t2, t_ar;
|
|
|
|
__asm __volatile (
|
|
".set push \n\t"
|
|
".set noreorder \n\t"
|
|
"bltz %[count], 2f \n\t"
|
|
" lh %[t_ar], 0(%[tmp]) \n\t"
|
|
// Inner loop
|
|
"1: \n\t"
|
|
"sll %[offset], %[count], 1 \n\t"
|
|
#if defined(MIPS_DSP_R1_LE)
|
|
"lhx %[r0], %[offset](%[cth_Q15]) \n\t"
|
|
"lhx %[r1], %[offset](%[sth_Q15]) \n\t"
|
|
"lhx %[r2], %[offset](%[ar_g_Q0]) \n\t"
|
|
#else
|
|
"addu %[tmp_cth], %[cth_Q15], %[offset] \n\t"
|
|
"addu %[tmp_sth], %[sth_Q15], %[offset] \n\t"
|
|
"addu %[tmp_arg], %[ar_g_Q0], %[offset] \n\t"
|
|
"lh %[r0], 0(%[tmp_cth]) \n\t"
|
|
"lh %[r1], 0(%[tmp_sth]) \n\t"
|
|
"lh %[r2], 0(%[tmp_arg]) \n\t"
|
|
#endif
|
|
"mul %[t0], %[r0], %[t_ar] \n\t"
|
|
"mul %[t1], %[r1], %[t_ar] \n\t"
|
|
"mul %[t2], %[r1], %[r2] \n\t"
|
|
"mul %[r0], %[r0], %[r2] \n\t"
|
|
"subu %[t0], %[t0], %[t2] \n\t"
|
|
"addu %[t1], %[t1], %[r0] \n\t"
|
|
#if defined(MIPS_DSP_R1_LE)
|
|
"shra_r.w %[t1], %[t1], 15 \n\t"
|
|
"shra_r.w %[t0], %[t0], 15 \n\t"
|
|
#else
|
|
"addiu %[t1], %[t1], 0x4000 \n\t"
|
|
"sra %[t1], %[t1], 15 \n\t"
|
|
"addiu %[t0], %[t0], 0x4000 \n\t"
|
|
"sra %[t0], %[t0], 15 \n\t"
|
|
#endif
|
|
"addiu %[offset], %[offset], 2 \n\t"
|
|
#if defined(MIPS_DSP_R1_LE)
|
|
"shll_s.w %[t1], %[t1], 16 \n\t"
|
|
"shll_s.w %[t_ar], %[t0], 16 \n\t"
|
|
#else
|
|
"slt %[r0], %[t1], %[max_q16] \n\t"
|
|
"slt %[r1], %[t0], %[max_q16] \n\t"
|
|
"movz %[t1], %[max_q16], %[r0] \n\t"
|
|
"movz %[t0], %[max_q16], %[r1] \n\t"
|
|
#endif
|
|
"addu %[offset], %[offset], %[ar_g_Q0] \n\t"
|
|
#if defined(MIPS_DSP_R1_LE)
|
|
"sra %[t1], %[t1], 16 \n\t"
|
|
"sra %[t_ar], %[t_ar], 16 \n\t"
|
|
#else
|
|
"slt %[r0], %[t1], %[min_q16] \n\t"
|
|
"slt %[r1], %[t0], %[min_q16] \n\t"
|
|
"movn %[t1], %[min_q16], %[r0] \n\t"
|
|
"movn %[t0], %[min_q16], %[r1] \n\t"
|
|
"addu %[t_ar], $zero, %[t0] \n\t"
|
|
#endif
|
|
"sh %[t1], 0(%[offset]) \n\t"
|
|
"bgtz %[count], 1b \n\t"
|
|
" addiu %[count], %[count], -1 \n\t"
|
|
"2: \n\t"
|
|
"sh %[t_ar], 0(%[tmp]) \n\t"
|
|
"sh %[t_ar], 0(%[ar_g_Q0]) \n\t"
|
|
".set pop \n\t"
|
|
: [t_ar] "=&r" (t_ar), [count] "+r" (count), [offset] "=&r" (offset),
|
|
[r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [t0] "=&r" (t0),
|
|
#if !defined(MIPS_DSP_R1_LE)
|
|
[tmp_cth] "=&r" (tmp_cth), [tmp_sth] "=&r" (tmp_sth),
|
|
[tmp_arg] "=&r" (tmp_arg),
|
|
#endif
|
|
[t1] "=&r" (t1), [t2] "=&r" (t2)
|
|
: [tmp] "r" (&ar_f_Q0[n+1]), [cth_Q15] "r" (cth_Q15),
|
|
#if !defined(MIPS_DSP_R1_LE)
|
|
[max_q16] "r" (max_q16), [min_q16] "r" (min_q16),
|
|
#endif
|
|
[sth_Q15] "r" (sth_Q15), [ar_g_Q0] "r" (ar_g_Q0)
|
|
: "memory", "hi", "lo"
|
|
);
|
|
}
|
|
}
|
|
|
|
// MIPS optimization of the inner loop used for function
|
|
// WebRtcIsacfix_NormLatticeFilterMa(). It does:
|
|
//
|
|
// for 0 <= n < HALF_SUBFRAMELEN - 1:
|
|
// *ptr2 = input2 * (*ptr2) + input0 * (*ptr0));
|
|
// *ptr1 = input1 * (*ptr0) + input0 * (*ptr2);
|
|
//
|
|
// Note, function WebRtcIsacfix_FilterMaLoopMIPS and WebRtcIsacfix_FilterMaLoopC
|
|
// are not bit-exact. The accuracy of the MIPS function is same or better.
|
|
void WebRtcIsacfix_FilterMaLoopMIPS(int16_t input0, // Filter coefficient
|
|
int16_t input1, // Filter coefficient
|
|
int32_t input2, // Inverse coeff (1/input1)
|
|
int32_t* ptr0, // Sample buffer
|
|
int32_t* ptr1, // Sample buffer
|
|
int32_t* ptr2) { // Sample buffer
|
|
#if defined(MIPS_DSP_R2_LE)
|
|
// MIPS DSPR2 version. 4 available accumulators allows loop unrolling 4 times.
|
|
// This variant is not bit-exact with WebRtcIsacfix_FilterMaLoopC, since we
|
|
// are exploiting 64-bit accumulators. The accuracy of the MIPS DSPR2 function
|
|
// is same or better.
|
|
int n = (HALF_SUBFRAMELEN - 1) >> 2;
|
|
int m = (HALF_SUBFRAMELEN - 1) & 3;
|
|
|
|
int r0, r1, r2, r3;
|
|
int t0, t1, t2, t3;
|
|
int s0, s1, s2, s3;
|
|
|
|
__asm __volatile (
|
|
".set push \n\t"
|
|
".set noreorder \n\t"
|
|
"1: \n\t"
|
|
"lw %[r0], 0(%[ptr0]) \n\t"
|
|
"lw %[r1], 4(%[ptr0]) \n\t"
|
|
"lw %[r2], 8(%[ptr0]) \n\t"
|
|
"lw %[r3], 12(%[ptr0]) \n\t"
|
|
"mult $ac0, %[r0], %[input0] \n\t"
|
|
"mult $ac1, %[r1], %[input0] \n\t"
|
|
"mult $ac2, %[r2], %[input0] \n\t"
|
|
"mult $ac3, %[r3], %[input0] \n\t"
|
|
"lw %[t0], 0(%[ptr2]) \n\t"
|
|
"extr_rs.w %[s0], $ac0, 15 \n\t"
|
|
"extr_rs.w %[s1], $ac1, 15 \n\t"
|
|
"extr_rs.w %[s2], $ac2, 15 \n\t"
|
|
"extr_rs.w %[s3], $ac3, 15 \n\t"
|
|
"lw %[t1], 4(%[ptr2]) \n\t"
|
|
"lw %[t2], 8(%[ptr2]) \n\t"
|
|
"lw %[t3], 12(%[ptr2]) \n\t"
|
|
"addu %[t0], %[t0], %[s0] \n\t"
|
|
"addu %[t1], %[t1], %[s1] \n\t"
|
|
"addu %[t2], %[t2], %[s2] \n\t"
|
|
"addu %[t3], %[t3], %[s3] \n\t"
|
|
"mult $ac0, %[t0], %[input2] \n\t"
|
|
"mult $ac1, %[t1], %[input2] \n\t"
|
|
"mult $ac2, %[t2], %[input2] \n\t"
|
|
"mult $ac3, %[t3], %[input2] \n\t"
|
|
"addiu %[ptr0], %[ptr0], 16 \n\t"
|
|
"extr_rs.w %[t0], $ac0, 16 \n\t"
|
|
"extr_rs.w %[t1], $ac1, 16 \n\t"
|
|
"extr_rs.w %[t2], $ac2, 16 \n\t"
|
|
"extr_rs.w %[t3], $ac3, 16 \n\t"
|
|
"addiu %[n], %[n], -1 \n\t"
|
|
"mult $ac0, %[r0], %[input1] \n\t"
|
|
"mult $ac1, %[r1], %[input1] \n\t"
|
|
"mult $ac2, %[r2], %[input1] \n\t"
|
|
"mult $ac3, %[r3], %[input1] \n\t"
|
|
"sw %[t0], 0(%[ptr2]) \n\t"
|
|
"extr_rs.w %[s0], $ac0, 15 \n\t"
|
|
"extr_rs.w %[s1], $ac1, 15 \n\t"
|
|
"extr_rs.w %[s2], $ac2, 15 \n\t"
|
|
"extr_rs.w %[s3], $ac3, 15 \n\t"
|
|
"sw %[t1], 4(%[ptr2]) \n\t"
|
|
"sw %[t2], 8(%[ptr2]) \n\t"
|
|
"sw %[t3], 12(%[ptr2]) \n\t"
|
|
"mult $ac0, %[t0], %[input0] \n\t"
|
|
"mult $ac1, %[t1], %[input0] \n\t"
|
|
"mult $ac2, %[t2], %[input0] \n\t"
|
|
"mult $ac3, %[t3], %[input0] \n\t"
|
|
"addiu %[ptr2], %[ptr2], 16 \n\t"
|
|
"extr_rs.w %[t0], $ac0, 15 \n\t"
|
|
"extr_rs.w %[t1], $ac1, 15 \n\t"
|
|
"extr_rs.w %[t2], $ac2, 15 \n\t"
|
|
"extr_rs.w %[t3], $ac3, 15 \n\t"
|
|
"addu %[t0], %[t0], %[s0] \n\t"
|
|
"addu %[t1], %[t1], %[s1] \n\t"
|
|
"addu %[t2], %[t2], %[s2] \n\t"
|
|
"addu %[t3], %[t3], %[s3] \n\t"
|
|
"sw %[t0], 0(%[ptr1]) \n\t"
|
|
"sw %[t1], 4(%[ptr1]) \n\t"
|
|
"sw %[t2], 8(%[ptr1]) \n\t"
|
|
"sw %[t3], 12(%[ptr1]) \n\t"
|
|
"bgtz %[n], 1b \n\t"
|
|
" addiu %[ptr1], %[ptr1], 16 \n\t"
|
|
"beq %[m], %0, 3f \n\t"
|
|
" nop \n\t"
|
|
"2: \n\t"
|
|
"lw %[r0], 0(%[ptr0]) \n\t"
|
|
"lw %[t0], 0(%[ptr2]) \n\t"
|
|
"addiu %[ptr0], %[ptr0], 4 \n\t"
|
|
"mult $ac0, %[r0], %[input0] \n\t"
|
|
"mult $ac1, %[r0], %[input1] \n\t"
|
|
"extr_rs.w %[r1], $ac0, 15 \n\t"
|
|
"extr_rs.w %[t1], $ac1, 15 \n\t"
|
|
"addu %[t0], %[t0], %[r1] \n\t"
|
|
"mult $ac0, %[t0], %[input2] \n\t"
|
|
"extr_rs.w %[t0], $ac0, 16 \n\t"
|
|
"sw %[t0], 0(%[ptr2]) \n\t"
|
|
"mult $ac0, %[t0], %[input0] \n\t"
|
|
"addiu %[ptr2], %[ptr2], 4 \n\t"
|
|
"addiu %[m], %[m], -1 \n\t"
|
|
"extr_rs.w %[t0], $ac0, 15 \n\t"
|
|
"addu %[t0], %[t0], %[t1] \n\t"
|
|
"sw %[t0], 0(%[ptr1]) \n\t"
|
|
"bgtz %[m], 2b \n\t"
|
|
" addiu %[ptr1], %[ptr1], 4 \n\t"
|
|
"3: \n\t"
|
|
".set pop \n\t"
|
|
: [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
|
|
[r3] "=&r" (r3), [t0] "=&r" (t0), [t1] "=&r" (t1),
|
|
[t2] "=&r" (t2), [t3] "=&r" (t3), [s0] "=&r" (s0),
|
|
[s1] "=&r" (s1), [s2] "=&r" (s2), [s3] "=&r" (s3),
|
|
[ptr0] "+r" (ptr0), [ptr1] "+r" (ptr1), [m] "+r" (m),
|
|
[ptr2] "+r" (ptr2), [n] "+r" (n)
|
|
: [input0] "r" (input0), [input1] "r" (input1),
|
|
[input2] "r" (input2)
|
|
: "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi",
|
|
"$ac2lo", "$ac3hi", "$ac3lo"
|
|
);
|
|
#else
|
|
// Non-DSPR2 version of the function. Avoiding the accumulator usage due to
|
|
// large latencies. This variant is bit-exact with C code.
|
|
int n = HALF_SUBFRAMELEN - 1;
|
|
int32_t t16a, t16b;
|
|
int32_t r0, r1, r2, r3, r4;
|
|
|
|
__asm __volatile (
|
|
".set push \n\t"
|
|
".set noreorder \n\t"
|
|
"sra %[t16a], %[input2], 16 \n\t"
|
|
"andi %[t16b], %[input2], 0xFFFF \n\t"
|
|
#if defined(MIPS32R2_LE)
|
|
"seh %[t16b], %[t16b] \n\t"
|
|
"seh %[input0], %[input0] \n\t"
|
|
"seh %[input1], %[input1] \n\t"
|
|
#else
|
|
"sll %[t16b], %[t16b], 16 \n\t"
|
|
"sra %[t16b], %[t16b], 16 \n\t"
|
|
"sll %[input0], %[input0], 16 \n\t"
|
|
"sra %[input0], %[input0], 16 \n\t"
|
|
"sll %[input1], %[input1], 16 \n\t"
|
|
"sra %[input1], %[input1], 16 \n\t"
|
|
#endif
|
|
"addiu %[r0], %[t16a], 1 \n\t"
|
|
"slt %[r1], %[t16b], $zero \n\t"
|
|
"movn %[t16a], %[r0], %[r1] \n\t"
|
|
"1: \n\t"
|
|
"lw %[r0], 0(%[ptr0]) \n\t"
|
|
"lw %[r1], 0(%[ptr2]) \n\t"
|
|
"addiu %[ptr0], %[ptr0], 4 \n\t"
|
|
"sra %[r2], %[r0], 16 \n\t"
|
|
"andi %[r0], %[r0], 0xFFFF \n\t"
|
|
"mul %[r3], %[r2], %[input0] \n\t"
|
|
"mul %[r4], %[r0], %[input0] \n\t"
|
|
"mul %[r2], %[r2], %[input1] \n\t"
|
|
"mul %[r0], %[r0], %[input1] \n\t"
|
|
"addiu %[ptr2], %[ptr2], 4 \n\t"
|
|
"sll %[r3], %[r3], 1 \n\t"
|
|
"sra %[r4], %[r4], 1 \n\t"
|
|
"addiu %[r4], %[r4], 0x2000 \n\t"
|
|
"sra %[r4], %[r4], 14 \n\t"
|
|
"addu %[r3], %[r3], %[r4] \n\t"
|
|
"addu %[r1], %[r1], %[r3] \n\t"
|
|
"sra %[r3], %[r1], 16 \n\t"
|
|
"andi %[r4], %[r1], 0xFFFF \n\t"
|
|
"sra %[r4], %[r4], 1 \n\t"
|
|
"mul %[r1], %[r1], %[t16a] \n\t"
|
|
"mul %[r3], %[r3], %[t16b] \n\t"
|
|
"mul %[r4], %[r4], %[t16b] \n\t"
|
|
"sll %[r2], %[r2], 1 \n\t"
|
|
"sra %[r0], %[r0], 1 \n\t"
|
|
"addiu %[r0], %[r0], 0x2000 \n\t"
|
|
"sra %[r0], %[r0], 14 \n\t"
|
|
"addu %[r0], %[r0], %[r2] \n\t"
|
|
"addiu %[n], %[n], -1 \n\t"
|
|
"addu %[r1], %[r1], %[r3] \n\t"
|
|
"addiu %[r4], %[r4], 0x4000 \n\t"
|
|
"sra %[r4], %[r4], 15 \n\t"
|
|
"addu %[r1], %[r1], %[r4] \n\t"
|
|
"sra %[r2], %[r1], 16 \n\t"
|
|
"andi %[r3], %[r1], 0xFFFF \n\t"
|
|
"mul %[r3], %[r3], %[input0] \n\t"
|
|
"mul %[r2], %[r2], %[input0] \n\t"
|
|
"sw %[r1], -4(%[ptr2]) \n\t"
|
|
"sra %[r3], %[r3], 1 \n\t"
|
|
"addiu %[r3], %[r3], 0x2000 \n\t"
|
|
"sra %[r3], %[r3], 14 \n\t"
|
|
"addu %[r0], %[r0], %[r3] \n\t"
|
|
"sll %[r2], %[r2], 1 \n\t"
|
|
"addu %[r0], %[r0], %[r2] \n\t"
|
|
"sw %[r0], 0(%[ptr1]) \n\t"
|
|
"bgtz %[n], 1b \n\t"
|
|
" addiu %[ptr1], %[ptr1], 4 \n\t"
|
|
".set pop \n\t"
|
|
: [t16a] "=&r" (t16a), [t16b] "=&r" (t16b), [r0] "=&r" (r0),
|
|
[r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
|
|
[r4] "=&r" (r4), [ptr0] "+r" (ptr0), [ptr1] "+r" (ptr1),
|
|
[ptr2] "+r" (ptr2), [n] "+r" (n)
|
|
: [input0] "r" (input0), [input1] "r" (input1),
|
|
[input2] "r" (input2)
|
|
: "hi", "lo", "memory"
|
|
);
|
|
#endif
|
|
}
|