webrtc/modules/audio_coding/codecs/isac/fix/source/lattice.c
Mirko Bonadei 08973eed36 Using fully qualified #include paths in isac code.
WebRTC internal code should always used include paths that starts
from the root of the project and that clearly identify the header file.

This allows 'gn check' to actually keep dependencies under control
because 'gn check' cannot enforce anything if the include path
is not fully qualified (starting from the root of the project).

Bug: webrtc:8815
Change-Id: I23fb4fed0c27a4d98bea360315b959af843587bc
Reviewed-on: https://webrtc-review.googlesource.com/46101
Commit-Queue: Mirko Bonadei <mbonadei@webrtc.org>
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#21847}
2018-02-01 14:57:44 +00:00

322 lines
12 KiB
C

/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* lattice.c
*
* Contains the normalized lattice filter routines (MA and AR) for iSAC codec
*
*/
#include "modules/audio_coding/codecs/isac/fix/source/codec.h"
#include "modules/audio_coding/codecs/isac/fix/source/settings.h"
#include "rtc_base/sanitizer.h"
#define LATTICE_MUL_32_32_RSFT16(a32a, a32b, b32) \
((int32_t)(WEBRTC_SPL_MUL(a32a, b32) + (WEBRTC_SPL_MUL_16_32_RSFT16(a32b, b32))))
/* This macro is FORBIDDEN to use elsewhere than in a function in this file and
its corresponding neon version. It might give unpredictable results, since a
general int32_t*int32_t multiplication results in a 64 bit value.
The result is then shifted just 16 steps to the right, giving need for 48
bits, i.e. in the generel case, it will NOT fit in a int32_t. In the
cases used in here, the int32_t will be enough, since (for a good
reason) the involved multiplicands aren't big enough to overflow a
int32_t after shifting right 16 bits. I have compared the result of a
multiplication between t32 and tmp32, done in two ways:
1) Using (int32_t) (((float)(tmp32))*((float)(tmp32b))/65536.0);
2) Using LATTICE_MUL_32_32_RSFT16(t16a, t16b, tmp32b);
By running 25 files, I haven't found any bigger diff than 64 - this was in the
case when method 1) gave 650235648 and 2) gave 650235712.
*/
/* Function prototype: filtering ar_g_Q0[] and ar_f_Q0[] through an AR filter
with coefficients cth_Q15[] and sth_Q15[].
Implemented for both generic and ARMv7 platforms.
*/
void WebRtcIsacfix_FilterArLoop(int16_t* ar_g_Q0,
int16_t* ar_f_Q0,
int16_t* cth_Q15,
int16_t* sth_Q15,
size_t order_coef);
/* Inner loop used for function WebRtcIsacfix_NormLatticeFilterMa(). It does:
for 0 <= n < HALF_SUBFRAMELEN - 1:
*ptr2 = input2 * (*ptr2) + input0 * (*ptr0));
*ptr1 = input1 * (*ptr0) + input0 * (*ptr2);
Note, function WebRtcIsacfix_FilterMaLoopNeon and WebRtcIsacfix_FilterMaLoopC
are not bit-exact. The accuracy by the ARM Neon function is same or better.
*/
void WebRtcIsacfix_FilterMaLoopC(int16_t input0, // Filter coefficient
int16_t input1, // Filter coefficient
int32_t input2, // Inverse coeff. (1/input1)
int32_t* ptr0, // Sample buffer
int32_t* ptr1, // Sample buffer
int32_t* ptr2) { // Sample buffer
int n = 0;
// Separate the 32-bit variable input2 into two 16-bit integers (high 16 and
// low 16 bits), for using LATTICE_MUL_32_32_RSFT16 in the loop.
int16_t t16a = (int16_t)(input2 >> 16);
int16_t t16b = (int16_t)input2;
if (t16b < 0) t16a++;
// The loop filtering the samples *ptr0, *ptr1, *ptr2 with filter coefficients
// input0, input1, and input2.
for(n = 0; n < HALF_SUBFRAMELEN - 1; n++, ptr0++, ptr1++, ptr2++) {
int32_t tmp32a = 0;
int32_t tmp32b = 0;
// Calculate *ptr2 = input2 * (*ptr2 + input0 * (*ptr0));
tmp32a = WEBRTC_SPL_MUL_16_32_RSFT15(input0, *ptr0); // Q15 * Q15 >> 15 = Q15
tmp32b = *ptr2 + tmp32a; // Q15 + Q15 = Q15
*ptr2 = LATTICE_MUL_32_32_RSFT16(t16a, t16b, tmp32b);
// Calculate *ptr1 = input1 * (*ptr0) + input0 * (*ptr2);
tmp32a = WEBRTC_SPL_MUL_16_32_RSFT15(input1, *ptr0); // Q15*Q15>>15 = Q15
tmp32b = WEBRTC_SPL_MUL_16_32_RSFT15(input0, *ptr2); // Q15*Q15>>15 = Q15
*ptr1 = tmp32a + tmp32b; // Q15 + Q15 = Q15
}
}
/* filter the signal using normalized lattice filter */
/* MA filter */
void WebRtcIsacfix_NormLatticeFilterMa(size_t orderCoef,
int32_t *stateGQ15,
int16_t *lat_inQ0,
int16_t *filt_coefQ15,
int32_t *gain_lo_hiQ17,
int16_t lo_hi,
int16_t *lat_outQ9)
{
int16_t sthQ15[MAX_AR_MODEL_ORDER];
int16_t cthQ15[MAX_AR_MODEL_ORDER];
int u, n;
size_t i, k;
int16_t temp2,temp3;
size_t ord_1 = orderCoef+1;
int32_t inv_cthQ16[MAX_AR_MODEL_ORDER];
int32_t gain32, fQtmp;
int16_t gain16;
int16_t gain_sh;
int32_t tmp32, tmp32b;
int32_t fQ15vec[HALF_SUBFRAMELEN];
int32_t gQ15[MAX_AR_MODEL_ORDER+1][HALF_SUBFRAMELEN];
int16_t sh;
int16_t t16a;
int16_t t16b;
for (u=0;u<SUBFRAMES;u++)
{
int32_t temp1 = u * HALF_SUBFRAMELEN;
/* set the Direct Form coefficients */
temp2 = (int16_t)(u * orderCoef);
temp3 = (int16_t)(2 * u + lo_hi);
/* compute lattice filter coefficients */
memcpy(sthQ15, &filt_coefQ15[temp2], orderCoef * sizeof(int16_t));
WebRtcSpl_SqrtOfOneMinusXSquared(sthQ15, orderCoef, cthQ15);
/* compute the gain */
gain32 = gain_lo_hiQ17[temp3];
gain_sh = WebRtcSpl_NormW32(gain32);
gain32 <<= gain_sh; // Q(17+gain_sh)
for (k=0;k<orderCoef;k++)
{
gain32 = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[k], gain32); //Q15*Q(17+gain_sh)>>15 = Q(17+gain_sh)
inv_cthQ16[k] = WebRtcSpl_DivW32W16((int32_t)2147483647, cthQ15[k]); // 1/cth[k] in Q31/Q15 = Q16
}
gain16 = (int16_t)(gain32 >> 16); // Q(1+gain_sh).
/* normalized lattice filter */
/*****************************/
/* initial conditions */
for (i=0;i<HALF_SUBFRAMELEN;i++)
{
fQ15vec[i] = lat_inQ0[i + temp1] << 15; // Q15
gQ15[0][i] = lat_inQ0[i + temp1] << 15; // Q15
}
fQtmp = fQ15vec[0];
/* get the state of f&g for the first input, for all orders */
for (i=1;i<ord_1;i++)
{
// Calculate f[i][0] = inv_cth[i-1]*(f[i-1][0] + sth[i-1]*stateG[i-1]);
tmp32 = WEBRTC_SPL_MUL_16_32_RSFT15(sthQ15[i-1], stateGQ15[i-1]);//Q15*Q15>>15 = Q15
tmp32b= fQtmp + tmp32; //Q15+Q15=Q15
tmp32 = inv_cthQ16[i-1]; //Q16
t16a = (int16_t)(tmp32 >> 16);
t16b = (int16_t)(tmp32 - (t16a << 16));
if (t16b<0) t16a++;
tmp32 = LATTICE_MUL_32_32_RSFT16(t16a, t16b, tmp32b);
fQtmp = tmp32; // Q15
// Calculate g[i][0] = cth[i-1]*stateG[i-1] + sth[i-1]* f[i][0];
tmp32 = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[i-1], stateGQ15[i-1]); //Q15*Q15>>15 = Q15
tmp32b = WEBRTC_SPL_MUL_16_32_RSFT15(sthQ15[i-1], fQtmp); //Q15*Q15>>15 = Q15
tmp32 = tmp32 + tmp32b;//Q15+Q15 = Q15
gQ15[i][0] = tmp32; // Q15
}
/* filtering */
/* save the states */
for(k=0;k<orderCoef;k++)
{
// for 0 <= n < HALF_SUBFRAMELEN - 1:
// f[k+1][n+1] = inv_cth[k]*(f[k][n+1] + sth[k]*g[k][n]);
// g[k+1][n+1] = cth[k]*g[k][n] + sth[k]* f[k+1][n+1];
WebRtcIsacfix_FilterMaLoopFix(sthQ15[k], cthQ15[k], inv_cthQ16[k],
&gQ15[k][0], &gQ15[k+1][1], &fQ15vec[1]);
}
fQ15vec[0] = fQtmp;
for(n=0;n<HALF_SUBFRAMELEN;n++)
{
//gain32 >>= gain_sh; // Q(17+gain_sh) -> Q17
tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(gain16, fQ15vec[n]); //Q(1+gain_sh)*Q15>>16 = Q(gain_sh)
sh = 9-gain_sh; //number of needed shifts to reach Q9
t16a = (int16_t) WEBRTC_SPL_SHIFT_W32(tmp32, sh);
lat_outQ9[n + temp1] = t16a;
}
/* save the states */
for (i=0;i<ord_1;i++)
{
stateGQ15[i] = gQ15[i][HALF_SUBFRAMELEN-1];
}
//process next frame
}
return;
}
// Left shift of an int32_t that's allowed to overflow. (It's still undefined
// behavior, so not a good idea; this just makes UBSan ignore the violation, so
// that our old code can continue to do what it's always been doing.)
static inline int32_t RTC_NO_SANITIZE("shift")
OverflowingLShiftS32(int32_t x, int shift) {
return x << shift;
}
/* ----------------AR filter-------------------------*/
/* filter the signal using normalized lattice filter */
void WebRtcIsacfix_NormLatticeFilterAr(size_t orderCoef,
int16_t *stateGQ0,
int32_t *lat_inQ25,
int16_t *filt_coefQ15,
int32_t *gain_lo_hiQ17,
int16_t lo_hi,
int16_t *lat_outQ0)
{
size_t ii, k, i;
int n, u;
int16_t sthQ15[MAX_AR_MODEL_ORDER];
int16_t cthQ15[MAX_AR_MODEL_ORDER];
int32_t tmp32;
int16_t tmpAR;
int16_t ARfQ0vec[HALF_SUBFRAMELEN];
int16_t ARgQ0vec[MAX_AR_MODEL_ORDER+1];
int32_t inv_gain32;
int16_t inv_gain16;
int16_t den16;
int16_t sh;
int16_t temp2,temp3;
size_t ord_1 = orderCoef+1;
for (u=0;u<SUBFRAMES;u++)
{
int32_t temp1 = u * HALF_SUBFRAMELEN;
//set the denominator and numerator of the Direct Form
temp2 = (int16_t)(u * orderCoef);
temp3 = (int16_t)(2 * u + lo_hi);
for (ii=0; ii<orderCoef; ii++) {
sthQ15[ii] = filt_coefQ15[temp2+ii];
}
WebRtcSpl_SqrtOfOneMinusXSquared(sthQ15, orderCoef, cthQ15);
// Originally, this line was assumed to never overflow, since "[s]imulation
// of the 25 files shows that maximum value in the vector gain_lo_hiQ17[]
// is 441344, which means that it is log2((2^31)/441344) = 12.2 shifting
// bits from saturation. Therefore, it should be safe to use Q27 instead of
// Q17." However, a fuzzer test succeeded in provoking an overflow here,
// which we ignore on the theory that only "abnormal" inputs cause
// overflow.
tmp32 = OverflowingLShiftS32(gain_lo_hiQ17[temp3], 10); // Q27
for (k=0;k<orderCoef;k++) {
tmp32 = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[k], tmp32); // Q15*Q27>>15 = Q27
}
sh = WebRtcSpl_NormW32(tmp32); // tmp32 is the gain
den16 = (int16_t) WEBRTC_SPL_SHIFT_W32(tmp32, sh-16); //Q(27+sh-16) = Q(sh+11) (all 16 bits are value bits)
inv_gain32 = WebRtcSpl_DivW32W16((int32_t)2147483647, den16); // 1/gain in Q31/Q(sh+11) = Q(20-sh)
//initial conditions
inv_gain16 = (int16_t)(inv_gain32 >> 2); // 1/gain in Q(20-sh-2) = Q(18-sh)
for (i=0;i<HALF_SUBFRAMELEN;i++)
{
tmp32 = lat_inQ25[i + temp1] * (1 << 1); // Q25->Q26
tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(inv_gain16, tmp32); //lat_in[]*inv_gain in (Q(18-sh)*Q26)>>16 = Q(28-sh)
tmp32 = WEBRTC_SPL_SHIFT_W32(tmp32, -(28-sh)); // lat_in[]*inv_gain in Q0
ARfQ0vec[i] = (int16_t)WebRtcSpl_SatW32ToW16(tmp32); // Q0
}
// Get the state of f & g for the first input, for all orders.
for (i = orderCoef; i > 0; i--)
{
tmp32 = (cthQ15[i - 1] * ARfQ0vec[0] - sthQ15[i - 1] * stateGQ0[i - 1] +
16384) >> 15;
tmpAR = (int16_t)WebRtcSpl_SatW32ToW16(tmp32); // Q0
tmp32 = (sthQ15[i - 1] * ARfQ0vec[0] + cthQ15[i - 1] * stateGQ0[i - 1] +
16384) >> 15;
ARgQ0vec[i] = (int16_t)WebRtcSpl_SatW32ToW16(tmp32); // Q0
ARfQ0vec[0] = tmpAR;
}
ARgQ0vec[0] = ARfQ0vec[0];
// Filter ARgQ0vec[] and ARfQ0vec[] through coefficients cthQ15[] and sthQ15[].
WebRtcIsacfix_FilterArLoop(ARgQ0vec, ARfQ0vec, cthQ15, sthQ15, orderCoef);
for(n=0;n<HALF_SUBFRAMELEN;n++)
{
lat_outQ0[n + temp1] = ARfQ0vec[n];
}
/* cannot use memcpy in the following */
for (i=0;i<ord_1;i++)
{
stateGQ0[i] = ARgQ0vec[i];
}
}
return;
}