/**
|
* Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
*
|
* See LICENSE for clarification regarding multiple authors
|
*
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
* you may not use this file except in compliance with the License.
|
* You may obtain a copy of the License at
|
*
|
* http://www.apache.org/licenses/LICENSE-2.0
|
*
|
* Unless required by applicable law or agreed to in writing, software
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* See the License for the specific language governing permissions and
|
* limitations under the License.
|
*/
|
|
// The content in this file is copied/modified from
|
// This file is copied/modified from kaldi/src/feat/online-feature.h
|
#ifndef KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
|
#define KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
|
|
#include <cstdint>
|
#include <deque>
|
#include <vector>
|
|
#include "kaldi-native-fbank/csrc/feature-fbank.h"
|
|
namespace knf {
|
|
/// This class serves as a storage for feature vectors with an option to limit
|
/// the memory usage by removing old elements. The deleted frames indices are
|
/// "remembered" so that regardless of the MAX_ITEMS setting, the user always
|
/// provides the indices as if no deletion was being performed.
|
/// This is useful when processing very long recordings which would otherwise
|
/// cause the memory to eventually blow up when the features are not being
|
/// removed.
|
class RecyclingVector {
|
public:
|
/// By default it does not remove any elements.
|
explicit RecyclingVector(int32_t items_to_hold = -1);
|
|
~RecyclingVector() = default;
|
RecyclingVector(const RecyclingVector &) = delete;
|
RecyclingVector &operator=(const RecyclingVector &) = delete;
|
|
// The pointer is owned by RecyclingVector
|
// Users should not free it
|
const float *At(int32_t index) const;
|
|
void PushBack(std::vector<float> item);
|
|
/// This method returns the size as if no "recycling" had happened,
|
/// i.e. equivalent to the number of times the PushBack method has been
|
/// called.
|
int32_t Size() const;
|
|
private:
|
std::deque<std::vector<float>> items_;
|
int32_t items_to_hold_;
|
int32_t first_available_index_;
|
};
|
|
/// This is a templated class for online feature extraction;
|
/// it's templated on a class like MfccComputer or PlpComputer
|
/// that does the basic feature extraction.
|
template <class C>
|
class OnlineGenericBaseFeature {
|
public:
|
// Constructor from options class
|
explicit OnlineGenericBaseFeature(const typename C::Options &opts);
|
|
int32_t Dim() const { return computer_.Dim(); }
|
|
float FrameShiftInSeconds() const {
|
return computer_.GetFrameOptions().frame_shift_ms / 1000.0f;
|
}
|
|
int32_t NumFramesReady() const { return features_.Size(); }
|
|
// Note: IsLastFrame() will only ever return true if you have called
|
// InputFinished() (and this frame is the last frame).
|
bool IsLastFrame(int32_t frame) const {
|
return input_finished_ && frame == NumFramesReady() - 1;
|
}
|
|
const float *GetFrame(int32_t frame) const { return features_.At(frame); }
|
|
// This would be called from the application, when you get
|
// more wave data. Note: the sampling_rate is only provided so
|
// the code can assert that it matches the sampling rate
|
// expected in the options.
|
//
|
// @param sampling_rate The sampling_rate of the input waveform
|
// @param waveform Pointer to a 1-D array of size n
|
// @param n Number of entries in waveform
|
void AcceptWaveform(float sampling_rate, const float *waveform, int32_t n);
|
|
// InputFinished() tells the class you won't be providing any
|
// more waveform. This will help flush out the last frame or two
|
// of features, in the case where snip-edges == false; it also
|
// affects the return value of IsLastFrame().
|
void InputFinished();
|
|
private:
|
// This function computes any additional feature frames that it is possible to
|
// compute from 'waveform_remainder_', which at this point may contain more
|
// than just a remainder-sized quantity (because AcceptWaveform() appends to
|
// waveform_remainder_ before calling this function). It adds these feature
|
// frames to features_, and shifts off any now-unneeded samples of input from
|
// waveform_remainder_ while incrementing waveform_offset_ by the same amount.
|
void ComputeFeatures();
|
|
C computer_; // class that does the MFCC or PLP or filterbank computation
|
|
FeatureWindowFunction window_function_;
|
|
// features_ is the Mfcc or Plp or Fbank features that we have already
|
// computed.
|
|
RecyclingVector features_;
|
|
// True if the user has called "InputFinished()"
|
bool input_finished_;
|
|
// waveform_offset_ is the number of samples of waveform that we have
|
// already discarded, i.e. that were prior to 'waveform_remainder_'.
|
int64_t waveform_offset_;
|
|
// waveform_remainder_ is a short piece of waveform that we may need to keep
|
// after extracting all the whole frames we can (whatever length of feature
|
// will be required for the next phase of computation).
|
// It is a 1-D tensor
|
std::vector<float> waveform_remainder_;
|
};
|
|
using OnlineFbank = OnlineGenericBaseFeature<FbankComputer>;
|
|
} // namespace knf
|
|
#endif // KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
|