| | |
| | | from funasr.modules.scorers.ctc import CTCPrefixScorer |
| | | from funasr.modules.scorers.length_bonus import LengthBonus |
| | | from funasr.build_utils.build_asr_model import frontend_choices |
| | | from funasr.text.build_tokenizer import build_tokenizer |
| | | from funasr.text.token_id_converter import TokenIDConverter |
| | | from funasr.tokenizer.build_tokenizer import build_tokenizer |
| | | from funasr.tokenizer.token_id_converter import TokenIDConverter |
| | | from funasr.torch_utils.device_funcs import to_device |
| | | from funasr.utils.timestamp_tools import ts_prediction_lfr6_standard |
| | | |
| | |
| | | """Speech2Text class |
| | | |
| | | Examples: |
| | | >>> import soundfile |
| | | >>> import librosa |
| | | >>> speech2text = Speech2Text("asr_config.yml", "asr.pb") |
| | | >>> audio, rate = soundfile.read("speech.wav") |
| | | >>> audio, rate = librosa.load("speech.wav") |
| | | >>> speech2text(audio) |
| | | [(text, token, token_int, hypothesis object), ...] |
| | | |
| | |
| | | """Speech2Text class |
| | | |
| | | Examples: |
| | | >>> import soundfile |
| | | >>> import librosa |
| | | >>> speech2text = Speech2TextParaformer("asr_config.yml", "asr.pb") |
| | | >>> audio, rate = soundfile.read("speech.wav") |
| | | >>> audio, rate = librosa.load("speech.wav") |
| | | >>> speech2text(audio) |
| | | [(text, token, token_int, hypothesis object), ...] |
| | | |
| | |
| | | """Speech2Text class |
| | | |
| | | Examples: |
| | | >>> import soundfile |
| | | >>> import librosa |
| | | >>> speech2text = Speech2TextParaformerOnline("asr_config.yml", "asr.pth") |
| | | >>> audio, rate = soundfile.read("speech.wav") |
| | | >>> audio, rate = librosa.load("speech.wav") |
| | | >>> speech2text(audio) |
| | | [(text, token, token_int, hypothesis object), ...] |
| | | |
| | |
| | | """Speech2Text class |
| | | |
| | | Examples: |
| | | >>> import soundfile |
| | | >>> import librosa |
| | | >>> speech2text = Speech2TextUniASR("asr_config.yml", "asr.pb") |
| | | >>> audio, rate = soundfile.read("speech.wav") |
| | | >>> audio, rate = librosa.load("speech.wav") |
| | | >>> speech2text(audio) |
| | | [(text, token, token_int, hypothesis object), ...] |
| | | |
| | |
| | | """Speech2Text class |
| | | |
| | | Examples: |
| | | >>> import soundfile |
| | | >>> import librosa |
| | | >>> speech2text = Speech2TextMFCCA("asr_config.yml", "asr.pb") |
| | | >>> audio, rate = soundfile.read("speech.wav") |
| | | >>> audio, rate = librosa.load("speech.wav") |
| | | >>> speech2text(audio) |
| | | [(text, token, token_int, hypothesis object), ...] |
| | | |
| | |
| | | feats_lengths = to_device(feats_lengths, device=self.device) |
| | | |
| | | enc_out, _, _ = self.asr_model.encoder(feats, feats_lengths) |
| | | |
| | | nbest_hyps = self.beam_search(enc_out[0]) |
| | | |
| | | return nbest_hyps |
| | |
| | | """Speech2Text class |
| | | |
| | | Examples: |
| | | >>> import soundfile |
| | | >>> import librosa |
| | | >>> speech2text = Speech2TextSAASR("asr_config.yml", "asr.pb") |
| | | >>> audio, rate = soundfile.read("speech.wav") |
| | | >>> audio, rate = librosa.load("speech.wav") |
| | | >>> speech2text(audio) |
| | | [(text, token, token_int, hypothesis object), ...] |
| | | |
| | |
| | | """Speech2Text class |
| | | |
| | | Examples: |
| | | >>> import soundfile |
| | | >>> import librosa |
| | | >>> speech2text = Speech2Text("asr_config.yml", "asr.pb") |
| | | >>> audio, rate = soundfile.read("speech.wav") |
| | | >>> audio, rate = librosa.load("speech.wav") |
| | | >>> speech2text(audio) |
| | | [(text, token, token_int, hypothesis object), ...] |
| | | |