paraformer streaming bugfix
| | |
| | | kwargs["is_final"] = _is_final and i == n -1 |
| | | audio_sample_i = audio_sample[i*chunk_stride_samples:(i+1)*chunk_stride_samples] |
| | | if kwargs["is_final"] and len(audio_sample_i) < 960: |
| | | continue |
| | | |
| | | cache["encoder"]["tail_chunk"] = True |
| | | speech = cache["encoder"]["feats"] |
| | | speech_lengths = torch.tensor([speech.shape[1]], dtype=torch.int64).to(speech.device) |
| | | else: |
| | | # extract fbank feats |
| | | speech, speech_lengths = extract_fbank([audio_sample_i], data_type=kwargs.get("data_type", "sound"), |
| | | frontend=frontend, cache=cache["frontend"], is_final=kwargs["is_final"]) |