python/FunASR-XL.git

			@@ -820,7 +820,7 @@


			def build_preprocess(args, train):
			if args.use_preprocessor:
			if not args.use_preprocessor:
			return None
			if args.task_name in ["asr", "data2vec", "diar", "sv"]:
			retval = CommonPreprocessor(
			@@ -828,7 +828,7 @@
			token_type=args.token_type,
			token_list=args.token_list,
			bpemodel=args.bpemodel,
			non_linguistic_symbols=args.non_linguistic_symbols,
			non_linguistic_symbols=args.non_linguistic_symbols if hasattr(args, "non_linguistic_symbols") else None,
			text_cleaner=args.cleaner,
			g2p_type=args.g2p,
			split_with_space=args.split_with_space if hasattr(args, "split_with_space") else False,
			@@ -855,6 +855,19 @@
			text_name=text_names,
			non_linguistic_symbols=args.non_linguistic_symbols,
			)
			elif args.task_name == "lm":
			retval = LMPreprocessor(
			train=train,
			token_type=args.token_type,
			token_list=args.token_list,
			bpemodel=args.bpemodel,
			text_cleaner=args.cleaner,
			g2p_type=args.g2p,
			text_name="text",
			non_linguistic_symbols=args.non_linguistic_symbols,
			split_with_space=args.split_with_space,
			seg_dict_file=args.seg_dict_file
			)
			elif args.task_name == "vad":
			retval = None
			else: