| | |
| | | int frame_from_waves = (waves.size() - frame_sample_length_) / frame_shift_sample_length_ + 1; |
| | | int minus_frame = reserve_waveforms_.empty() ? (lfr_m - 1) / 2 : 0; |
| | | int lfr_splice_frame_idxs = OnlineLfrCmvn(vad_feats, input_finished); |
| | | int reserve_frame_idx = lfr_splice_frame_idxs - minus_frame; |
| | | int reserve_frame_idx = std::abs(lfr_splice_frame_idxs - minus_frame); |
| | | reserve_waveforms_.clear(); |
| | | reserve_waveforms_.insert(reserve_waveforms_.begin(), |
| | | waves.begin() + reserve_frame_idx * frame_shift_sample_length_, |
| | |
| | | int FsmnVadOnline::OnlineLfrCmvn(vector<vector<float>> &vad_feats, bool input_finished) { |
| | | vector<vector<float>> out_feats; |
| | | int T = vad_feats.size(); |
| | | int T_lrf = ceil((T - (lfr_m - 1) / 2) / lfr_n); |
| | | int T_lrf = ceil((T - (lfr_m - 1) / 2) / (float)lfr_n); |
| | | int lfr_splice_frame_idxs = T_lrf; |
| | | vector<float> p; |
| | | for (int i = 0; i < T_lrf; i++) { |
| | |
| | | vad_silence_duration_ = vad_silence_duration; |
| | | vad_max_len_ = vad_max_len; |
| | | vad_speech_noise_thres_ = vad_speech_noise_thres; |
| | | |
| | | // 2pass |
| | | audio_handle = make_unique<Audio>(1); |
| | | } |
| | | |
| | | FsmnVadOnline::~FsmnVadOnline() { |