| | |
| | | # use vad, punc, spk or not as you need |
| | | model = AutoModel(model="paraformer-zh", model_revision="v2.0.2", |
| | | vad_model="fsmn-vad", vad_model_revision="v2.0.2", |
| | | punc_model="ct-punc-c", punc_model_revision="v2.0.2", |
| | | punc_model="ct-punc-c", punc_model_revision="v2.0.3", |
| | | # spk_model="cam++", spk_model_revision="v2.0.2", |
| | | ) |
| | | res = model.generate(input=f"{model.model_path}/example/asr_example.wav", |
| | |
| | | # use vad, punc, spk or not as you need |
| | | model = AutoModel(model="paraformer-zh", model_revision="v2.0.2", |
| | | vad_model="fsmn-vad", vad_model_revision="v2.0.2", |
| | | punc_model="ct-punc-c", punc_model_revision="v2.0.2", |
| | | punc_model="ct-punc-c", punc_model_revision="v2.0.3", |
| | | # spk_model="cam++", spk_model_revision="v2.0.2", |
| | | ) |
| | | res = model.generate(input=f"{model.model_path}/example/asr_example.wav", |
| | |
| | | vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch", |
| | | vad_model_revision="v2.0.2", |
| | | punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch", |
| | | punc_model_revision="v2.0.2", |
| | | punc_model_revision="v2.0.3", |
| | | spk_model="damo/speech_campplus_sv_zh-cn_16k-common", |
| | | spk_model_revision="v2.0.2", |
| | | ) |
| | |
| | | vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch" |
| | | vad_model_revision="v2.0.2" |
| | | punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" |
| | | punc_model_revision="v2.0.2" |
| | | punc_model_revision="v2.0.3" |
| | | spk_model="damo/speech_campplus_sv_zh-cn_16k-common" |
| | | spk_model_revision="v2.0.2" |
| | | |
| | |
| | | vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch", |
| | | vad_model_revision="v2.0.2", |
| | | punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch", |
| | | punc_model_revision="v2.0.2", |
| | | punc_model_revision="v2.0.3", |
| | | spk_model="damo/speech_campplus_sv_zh-cn_16k-common", |
| | | spk_model_revision="v2.0.2" |
| | | ) |
| | |
| | | vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch" |
| | | vad_model_revision="v2.0.2" |
| | | punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" |
| | | punc_model_revision="v2.0.2" |
| | | punc_model_revision="v2.0.3" |
| | | spk_model="damo/speech_campplus_sv_zh-cn_16k-common" |
| | | spk_model_revision="v2.0.2" |
| | | |
| | |
| | | |
| | | from funasr import AutoModel |
| | | |
| | | model = AutoModel(model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", model_revision="v2.0.2") |
| | | model = AutoModel(model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", model_revision="v2.0.3", |
| | | # vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch", |
| | | # vad_model_revision="v2.0.2", |
| | | # punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch", |
| | | # punc_model_revision="v2.0.3", |
| | | ) |
| | | |
| | | res = model.generate(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav") |
| | | print(res) |
| | |
| | | vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch", |
| | | vad_model_revision="v2.0.2", |
| | | punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch", |
| | | punc_model_revision="v2.0.2", |
| | | punc_model_revision="v2.0.3", |
| | | spk_model="damo/speech_campplus_sv_zh-cn_16k-common", |
| | | spk_model_revision="v2.0.2", |
| | | ) |
| | |
| | | vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch" |
| | | vad_model_revision="v2.0.2" |
| | | punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" |
| | | punc_model_revision="v2.0.2" |
| | | punc_model_revision="v2.0.3" |
| | | |
| | | python funasr/bin/inference.py \ |
| | | +model=${model} \ |
| | |
| | | if self.punc_model is not None: |
| | | self.punc_kwargs.update(cfg) |
| | | punc_res = self.inference(result["text"], model=self.punc_model, kwargs=self.punc_kwargs, **cfg) |
| | | result["text_with_punc"] = punc_res[0]["text"] |
| | | result["text"] = punc_res[0]["text"] |
| | | |
| | | # speaker embedding cluster after resorted |
| | | if self.spk_model is not None: |
| | |
| | | self.nbest = kwargs.get("nbest", 1) |
| | | |
| | | meta_data = {} |
| | | if isinstance(data_in, torch.Tensor): # fbank |
| | | if isinstance(data_in, torch.Tensor) and kwargs.get("data_type", "sound") == "fbank": # fbank |
| | | speech, speech_lengths = data_in, data_lengths |
| | | if len(speech.shape) < 3: |
| | | speech = speech[None, :, :] |