| | |
| | | |
| | | |
| | | def inference_paraformer_vad_punc( |
| | | maxlenratio: float, |
| | | minlenratio: float, |
| | | batch_size: int, |
| | | beam_size: int, |
| | | ngpu: int, |
| | | ctc_weight: float, |
| | | lm_weight: float, |
| | | penalty: float, |
| | | log_level: Union[int, str], |
| | | maxlenratio: float=0.0, |
| | | minlenratio: float=0.0, |
| | | batch_size: int=1, |
| | | beam_size: int=1, |
| | | ngpu: int=1, |
| | | ctc_weight: float=0.0, |
| | | lm_weight: float=0.0, |
| | | penalty: float=0.0, |
| | | log_level: Union[int, str]=logging.ERROR, |
| | | # data_path_and_name_and_type, |
| | | asr_train_config: Optional[str], |
| | | asr_model_file: Optional[str], |
| | | asr_train_config: Optional[str]=None, |
| | | asr_model_file: Optional[str]=None, |
| | | cmvn_file: Optional[str] = None, |
| | | lm_train_config: Optional[str] = None, |
| | | lm_file: Optional[str] = None, |
| | |
| | | seed: int = 0, |
| | | ngram_weight: float = 0.9, |
| | | nbest: int = 1, |
| | | num_workers: int = 1, |
| | | num_workers: int = 0, |
| | | vad_infer_config: Optional[str] = None, |
| | | vad_model_file: Optional[str] = None, |
| | | vad_cmvn_file: Optional[str] = None, |
| | |
| | | format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", |
| | | ) |
| | | |
| | | if sv_model_file is None: |
| | | sv_model_file = "{}/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/campplus_cn_common.bin".format(get_cache_dir(None)) |
| | | sv_model_file = asr_model_file.replace("model.pb", "campplus_cn_common.bin") |
| | | |
| | | if param_dict is not None: |
| | | hotword_list_or_file = param_dict.get('hotword') |
| | |
| | | |
| | | |
| | | def inference_paraformer_online( |
| | | maxlenratio: float, |
| | | minlenratio: float, |
| | | batch_size: int, |
| | | beam_size: int, |
| | | ngpu: int, |
| | | ctc_weight: float, |
| | | lm_weight: float, |
| | | penalty: float, |
| | | log_level: Union[int, str], |
| | | maxlenratio: float=0.0, |
| | | minlenratio: float=0.0, |
| | | batch_size: int=1, |
| | | beam_size: int=1, |
| | | ngpu: int=1, |
| | | ctc_weight: float=0.0, |
| | | lm_weight: float=0.0, |
| | | penalty: float=0.0, |
| | | log_level: Union[int, str]=logging.ERROR, |
| | | # data_path_and_name_and_type, |
| | | asr_train_config: Optional[str], |
| | | asr_model_file: Optional[str], |
| | | asr_train_config: Optional[str]=None, |
| | | asr_model_file: Optional[str]=None, |
| | | cmvn_file: Optional[str] = None, |
| | | lm_train_config: Optional[str] = None, |
| | | lm_file: Optional[str] = None, |
| | |
| | | |
| | | ncpu = kwargs.get("ncpu", 1) |
| | | torch.set_num_threads(ncpu) |
| | | if param_dict: |
| | | language = param_dict.get("language", None) |
| | | task = param_dict.get("task", "transcribe") |
| | | else: |
| | | language = None |
| | | task = "transcribe" |
| | | if batch_size > 1: |
| | | raise NotImplementedError("batch decoding is not implemented") |
| | | if word_lm_train_config is not None: |
| | |
| | | penalty=penalty, |
| | | nbest=nbest, |
| | | streaming=streaming, |
| | | language=language, |
| | | task=task, |
| | | ) |
| | | logging.info("speech2text_kwargs: {}".format(speech2text_kwargs)) |
| | | speech2text = Speech2TextWhisper(**speech2text_kwargs) |