python/FunASR-XL.git

			@@ -47,6 +47,8 @@
			from funasr.bin.punctuation_infer import Text2Punc
			from funasr.models.e2e_asr_paraformer import BiCifParaformer, ContextualParaformer

			from FunASR.funasr.utils.timestamp_tools import time_stamp_sentence

			header_colors = '\033[95m'
			end_colors = '\033[0m'

			@@ -720,6 +722,7 @@
			text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1]

			text_postprocessed_punc = text_postprocessed
			punc_id_list = []
			if len(word_lists) > 0 and text2punc is not None:
			text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)

			@@ -729,6 +732,8 @@
			if time_stamp_postprocessed != "":
			item['time_stamp'] = time_stamp_postprocessed

			item['sentences'] = time_stamp_sentence(punc_id_list, time_stamp_postprocessed, text_postprocessed)

			asr_result_list.append(item)
			finish_count += 1
			# asr_utils.print_progress(finish_count / file_count)

			@@ -54,3 +54,55 @@
			res.append([int(timestamp[0] * 1000), int(timestamp[1] * 1000)])
			return res

			def time_stamp_sentence(punc_id_list, time_stamp_postprocessed, text_postprocessed):
			res = []
			if text_postprocessed is None:
			return res
			if time_stamp_postprocessed is None:
			return res
			if len(time_stamp_postprocessed) == 0:
			return res
			if len(text_postprocessed) == 0:
			return res
			if punc_id_list is None or len(punc_id_list) == 0:
			res.append({
			'text': text_postprocessed.split(),
			"start": time_stamp_postprocessed[0][0],
			"end": time_stamp_postprocessed[-1][1]
			})
			return res
			if len(punc_id_list) != len(time_stamp_postprocessed):
			res.append({
			'text': text_postprocessed.split(),
			"start": time_stamp_postprocessed[0][0],
			"end": time_stamp_postprocessed[-1][1]
			})
			return res

			sentence_text = ''
			sentence_start = time_stamp_postprocessed[0][0]
			texts = text_postprocessed.split()
			for i in range(len(punc_id_list)):
			sentence_text += texts[i]
			if punc_id_list[i] == 2:
			sentence_text += ','
			res.append({
			'text': sentence_text,
			"start": sentence_start,
			"end": time_stamp_postprocessed[i][1]
			})
			sentence_text = ''
			sentence_start = time_stamp_postprocessed[i][1]
			elif punc_id_list[i] == 3:
			sentence_text += '.'
			res.append({
			'text': sentence_text,
			"start": sentence_start,
			"end": time_stamp_postprocessed[i][1]
			})
			sentence_text = ''
			sentence_start = time_stamp_postprocessed[i][1]
			return res

	funasr/bin/asr_inference_paraformer_vad_punc.py	5 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/utils/timestamp_tools.py	52 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史