| | |
| | | from funasr.text.token_id_converter import TokenIDConverter |
| | | from funasr.torch_utils.device_funcs import to_device |
| | | from funasr.utils.timestamp_tools import ts_prediction_lfr6_standard |
| | | from funasr.utils.whisper_utils.decoding import DecodingOptions, detect_language, decode |
| | | from funasr.utils.whisper_utils.transcribe import transcribe |
| | | from funasr.utils.whisper_utils.audio import pad_or_trim, log_mel_spectrogram |
| | | |
| | | |
| | | class Speech2Text: |
| | | """Speech2Text class |
| | |
| | | **kwargs, |
| | | ): |
| | | |
| | | from funasr.tasks.whisper import ASRTask |
| | | from funasr.utils.whisper_utils.transcribe import transcribe |
| | | from funasr.utils.whisper_utils.audio import pad_or_trim, log_mel_spectrogram |
| | | from funasr.utils.whisper_utils.decoding import DecodingOptions, detect_language, decode |
| | | |
| | | # 1. Build ASR model |
| | | scorers = {} |
| | | from funasr.tasks.whisper import ASRTask |
| | | asr_model, asr_train_args = ASRTask.build_model_from_file( |
| | | asr_train_config, asr_model_file, cmvn_file, device |
| | | ) |