| | |
| | | batch_size_token=5000, batch_size_token_threshold_s=40, max_single_segment_time=6000) |
| | | print(rec_result) |
| | | ``` |
| | | Where, |
| | | 其中: |
| | | - `batch_size_token` 表示采用动态batch,batch中总token数为 `batch_size_token`,1 token = 60 ms. |
| | | - `batch_size_token_threshold_s`: 表示音频时长超过 `batch_size_token_threshold_s`阈值是,batch数设置为1, 单位为s. |
| | | - `max_single_segment_time`: 表示VAD最大切割音频时长, 单位是ms. |
| | |
| | | ): |
| | | |
| | | from funasr.tasks.whisper import ASRTask |
| | | from funasr.utils.whisper_utils.transcribe import transcribe |
| | | from funasr.utils.whisper_utils.audio import pad_or_trim, log_mel_spectrogram |
| | | from funasr.utils.whisper_utils.decoding import DecodingOptions, detect_language, decode |
| | | |
| | | # 1. Build ASR model |
| | | scorers = {} |
| | |
| | | |
| | | """ |
| | | |
| | | from funasr.utils.whisper_utils.transcribe import transcribe |
| | | from funasr.utils.whisper_utils.audio import pad_or_trim, log_mel_spectrogram |
| | | from funasr.utils.whisper_utils.decoding import DecodingOptions, detect_language, decode |
| | | |
| | | speech = speech[0] |
| | | speech = pad_or_trim(speech) |
| | | mel = log_mel_spectrogram(speech).to(self.device) |