webrtc/modules/audio_coding/neteq/merge.h
Henrik Lundin 6dc82e8f8b NetEq: Change NetEq's ramp-up behavior after expansions
NetEq tapers down the audio produced through loss concealment when the
expansion has been going on for some time. When the audio packets starts
coming in again, there is a ramp-up that happens. This ramp-up could
before this change extend over more than one 10 ms block, which made
keeping track of the scaling factor necessary. With this change, we make
this ramp-up quicker in the rare cases when it lasted more than 10 ms,
so that it always ramps up to 100% within one block. This way, we can
remove the mute_factor_array.

This change breaks bit-exactness, but careful listening could not reveal
an audible difference.

This change is a part of a larger refactoring of NetEq's PLC code.

Bug: webrtc:9180
Change-Id: I4c513ce3ed8d66f9beec2abfb1f0c7ffaac7a21e
Reviewed-on: https://webrtc-review.googlesource.com/77180
Commit-Queue: Henrik Lundin <henrik.lundin@webrtc.org>
Reviewed-by: Minyue Li <minyue@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#23342}
2018-05-22 09:38:28 +00:00

99 lines
3.9 KiB
C++

/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_CODING_NETEQ_MERGE_H_
#define MODULES_AUDIO_CODING_NETEQ_MERGE_H_
#include <assert.h>
#include "modules/audio_coding/neteq/audio_multi_vector.h"
#include "rtc_base/constructormagic.h"
#include "typedefs.h" // NOLINT(build/include)
namespace webrtc {
// Forward declarations.
class Expand;
class SyncBuffer;
// This class handles the transition from expansion to normal operation.
// When a packet is not available for decoding when needed, the expand operation
// is called to generate extrapolation data. If the missing packet arrives,
// i.e., it was just delayed, it can be decoded and appended directly to the
// end of the expanded data (thanks to how the Expand class operates). However,
// if a later packet arrives instead, the loss is a fact, and the new data must
// be stitched together with the end of the expanded data. This stitching is
// what the Merge class does.
class Merge {
public:
Merge(int fs_hz,
size_t num_channels,
Expand* expand,
SyncBuffer* sync_buffer);
virtual ~Merge();
// The main method to produce the audio data. The decoded data is supplied in
// |input|, having |input_length| samples in total for all channels
// (interleaved). The result is written to |output|. The number of channels
// allocated in |output| defines the number of channels that will be used when
// de-interleaving |input|.
virtual size_t Process(int16_t* input, size_t input_length,
AudioMultiVector* output);
virtual size_t RequiredFutureSamples();
protected:
const int fs_hz_;
const size_t num_channels_;
private:
static const int kMaxSampleRate = 48000;
static const size_t kExpandDownsampLength = 100;
static const size_t kInputDownsampLength = 40;
static const size_t kMaxCorrelationLength = 60;
// Calls |expand_| to get more expansion data to merge with. The data is
// written to |expanded_signal_|. Returns the length of the expanded data,
// while |expand_period| will be the number of samples in one expansion period
// (typically one pitch period). The value of |old_length| will be the number
// of samples that were taken from the |sync_buffer_|.
size_t GetExpandedSignal(size_t* old_length, size_t* expand_period);
// Analyzes |input| and |expanded_signal| and returns muting factor (Q14) to
// be used on the new data.
int16_t SignalScaling(const int16_t* input, size_t input_length,
const int16_t* expanded_signal) const;
// Downsamples |input| (|input_length| samples) and |expanded_signal| to
// 4 kHz sample rate. The downsampled signals are written to
// |input_downsampled_| and |expanded_downsampled_|, respectively.
void Downsample(const int16_t* input, size_t input_length,
const int16_t* expanded_signal, size_t expanded_length);
// Calculates cross-correlation between |input_downsampled_| and
// |expanded_downsampled_|, and finds the correlation maximum. The maximizing
// lag is returned.
size_t CorrelateAndPeakSearch(size_t start_position, size_t input_length,
size_t expand_period) const;
const int fs_mult_; // fs_hz_ / 8000.
const size_t timestamps_per_call_;
Expand* expand_;
SyncBuffer* sync_buffer_;
int16_t expanded_downsampled_[kExpandDownsampLength];
int16_t input_downsampled_[kInputDownsampLength];
AudioMultiVector expanded_;
std::vector<int16_t> temp_data_;
RTC_DISALLOW_COPY_AND_ASSIGN(Merge);
};
} // namespace webrtc
#endif // MODULES_AUDIO_CODING_NETEQ_MERGE_H_