python/FunASR-XL.git

			@@ -40,18 +40,10 @@
			from funasr.tasks.vad import VADTask
			from funasr.utils.timestamp_tools import time_stamp_lfr6
			from funasr.bin.punctuation_infer import Text2Punc
			from funasr.torch_utils.forward_adaptor import ForwardAdaptor
			from funasr.datasets.preprocessor import CommonPreprocessor
			from funasr.punctuation.text_preprocessor import split_to_mini_sentence

			header_colors = '\033[95m'
			end_colors = '\033[0m'

			global_asr_language: str = 'zh-cn'
			global_sample_rate: Union[int, Dict[Any, int]] = {
			'audio_fs': 16000,
			'model_fs': 16000
			}

			class Speech2Text:
			"""Speech2Text class
			@@ -548,6 +540,7 @@
			def _forward(data_path_and_name_and_type,
			raw_inputs: Union[np.ndarray, torch.Tensor] = None,
			output_dir_v2: Optional[str] = None,
			fs: dict = None,
			param_dict: dict = None,
			):
			# 3. Build data-iterator
			@@ -558,6 +551,7 @@
			loader = ASRTask.build_streaming_iterator(
			data_path_and_name_and_type,
			dtype=dtype,
			fs=fs,
			batch_size=1,
			key_file=key_file,
			num_workers=num_workers,
			@@ -659,12 +653,8 @@
			punc_id_list = ""
			text_postprocessed_punc = ""

			item = {'key': key, 'value': text_postprocessed_punc_time_stamp, 'text': text_postprocessed,
			'time_stamp': time_stamp_postprocessed, 'punc': punc_id_list, 'token': token}
			if outputs_dict:
			item = {'text_punc': text_postprocessed_punc, 'text': text_postprocessed,
			'punc_id': punc_id_list, 'token': token, 'time_stamp': time_stamp_postprocessed}
			item = {'key': key, 'value': item}
			item = {'key': key, 'value': text_postprocessed_punc, 'text_postprocessed': text_postprocessed,
			'time_stamp': time_stamp_postprocessed, 'token': token}
			asr_result_list.append(item)
			finish_count += 1
			# asr_utils.print_progress(finish_count / file_count)