python/FunASR-XL.git

			@@ -125,7 +125,7 @@
			bicif_paraformer=BiCifParaformer,
			contextual_paraformer=ContextualParaformer,
			mfcca=MFCCA,
			timestamp_predictor=TimestampPredictor,
			timestamp_prediction=TimestampPredictor,
			),
			type_check=AbsESPnetModel,
			default="asr",
			@@ -411,6 +411,12 @@
			type=str,
			default="13_15",
			help="The range of noise decibel level.",
			)
			parser.add_argument(
			"--batch_interval",
			type=int,
			default=10000,
			help="The batch interval for saving model.",
			)

			for class_choices in cls.class_choices_list:
			@@ -826,7 +832,7 @@
			if "model.ckpt-" in model_name or ".bin" in model_name:
			model_name_pth = os.path.join(model_dir, model_name.replace('.bin',
			'.pb')) if ".bin" in model_name else os.path.join(
			model_dir, "{}.pth".format(model_name))
			model_dir, "{}.pb".format(model_name))
			if os.path.exists(model_name_pth):
			logging.info("model_file is load from pth: {}".format(model_name_pth))
			model_dict = torch.load(model_name_pth, map_location=device)
			@@ -1073,7 +1079,7 @@
			if "model.ckpt-" in model_name or ".bin" in model_name:
			model_name_pth = os.path.join(model_dir, model_name.replace('.bin',
			'.pb')) if ".bin" in model_name else os.path.join(
			model_dir, "{}.pth".format(model_name))
			model_dir, "{}.pb".format(model_name))
			if os.path.exists(model_name_pth):
			logging.info("model_file is load from pth: {}".format(model_name_pth))
			model_dict = torch.load(model_name_pth, map_location=device)
			@@ -1278,8 +1284,6 @@
			token_list = list(args.token_list)
			else:
			raise RuntimeError("token_list must be str or list")
			vocab_size = len(token_list)
			logging.info(f"Vocabulary size: {vocab_size}")

			# 1. frontend
			if args.input_size is None:
			@@ -1316,6 +1320,7 @@
			frontend=frontend,
			encoder=encoder,
			predictor=predictor,
			token_list=token_list,
			**args.model_conf,
			)

			@@ -1332,12 +1337,3 @@
			) -> Tuple[str, ...]:
			retval = ("speech", "text")
			return retval


			class ASRTaskAligner(ASRTaskParaformer):
			@classmethod
			def required_data_names(
			cls, train: bool = True, inference: bool = False
			) -> Tuple[str, ...]:
			retval = ("speech", "text")
			return retval