| | |
| | | from funasr.models.decoder.transformer_decoder import TransformerDecoder |
| | | from funasr.models.decoder.contextual_decoder import ContextualParaformerDecoder |
| | | from funasr.models.e2e_asr import ESPnetASRModel |
| | | from funasr.models.e2e_asr_paraformer import Paraformer, ParaformerBert, BiCifParaformer, ContextualParaformer |
| | | from funasr.models.e2e_asr_paraformer import Paraformer, ParaformerOnline, ParaformerBert, BiCifParaformer, ContextualParaformer |
| | | from funasr.models.e2e_tp import TimestampPredictor |
| | | from funasr.models.e2e_asr_mfcca import MFCCA |
| | | from funasr.models.e2e_uni_asr import UniASR |
| | |
| | | asr=ESPnetASRModel, |
| | | uniasr=UniASR, |
| | | paraformer=Paraformer, |
| | | paraformer_online=ParaformerOnline, |
| | | paraformer_bert=ParaformerBert, |
| | | bicif_paraformer=BiCifParaformer, |
| | | contextual_paraformer=ContextualParaformer, |
| | | mfcca=MFCCA, |
| | | timestamp_predictor=TimestampPredictor, |
| | | timestamp_prediction=TimestampPredictor, |
| | | ), |
| | | type_check=AbsESPnetModel, |
| | | default="asr", |
| | |
| | | if "model.ckpt-" in model_name or ".bin" in model_name: |
| | | model_name_pth = os.path.join(model_dir, model_name.replace('.bin', |
| | | '.pb')) if ".bin" in model_name else os.path.join( |
| | | model_dir, "{}.pth".format(model_name)) |
| | | model_dir, "{}.pb".format(model_name)) |
| | | if os.path.exists(model_name_pth): |
| | | logging.info("model_file is load from pth: {}".format(model_name_pth)) |
| | | model_dict = torch.load(model_name_pth, map_location=device) |
| | |
| | | if "model.ckpt-" in model_name or ".bin" in model_name: |
| | | model_name_pth = os.path.join(model_dir, model_name.replace('.bin', |
| | | '.pb')) if ".bin" in model_name else os.path.join( |
| | | model_dir, "{}.pth".format(model_name)) |
| | | model_dir, "{}.pb".format(model_name)) |
| | | if os.path.exists(model_name_pth): |
| | | logging.info("model_file is load from pth: {}".format(model_name_pth)) |
| | | model_dict = torch.load(model_name_pth, map_location=device) |
| | |
| | | token_list = list(args.token_list) |
| | | else: |
| | | raise RuntimeError("token_list must be str or list") |
| | | vocab_size = len(token_list) |
| | | logging.info(f"Vocabulary size: {vocab_size}") |
| | | |
| | | # 1. frontend |
| | | if args.input_size is None: |
| | |
| | | frontend=frontend, |
| | | encoder=encoder, |
| | | predictor=predictor, |
| | | token_list=token_list, |
| | | **args.model_conf, |
| | | ) |
| | | |
| | |
| | | ) -> Tuple[str, ...]: |
| | | retval = ("speech", "text") |
| | | return retval |
| | | |
| | | |
| | | class ASRTaskAligner(ASRTaskParaformer): |
| | | @classmethod |
| | | def required_data_names( |
| | | cls, train: bool = True, inference: bool = False |
| | | ) -> Tuple[str, ...]: |
| | | retval = ("speech", "text") |
| | | return retval |