| | |
| | | chunk_size = 60000 # ms |
| | | model = AutoModel(model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch", model_revision="v2.0.2") |
| | | |
| | | res = model(input=wav_file, chunk_size=chunk_size, ) |
| | | res = model.generate(input=wav_file, chunk_size=chunk_size, ) |
| | | print(res) |
| | | |
| | | |
| | |
| | | for i in range(total_chunk_num): |
| | | speech_chunk = speech[i*chunk_stride:(i+1)*chunk_stride] |
| | | is_final = i == total_chunk_num - 1 |
| | | res = model(input=speech_chunk, |
| | | res = model.generate(input=speech_chunk, |
| | | cache=cache, |
| | | is_final=is_final, |
| | | chunk_size=chunk_size, |