python/FunASR-XL.git

			@@ -126,10 +126,10 @@
			end = time_stamp[ts_nums[num]][1]
			ts_lists.append([begin, end])
			else:
			word_lists.append(words[num])
			# length of time_stamp may not equal to length of words because of the (somehow improper) threshold set in timestamp_tools.py line 46, e.g., length of time_stamp can be zero but length of words is not.
			# Moreover, move "word_lists.append(words[num])" into if clause, to keep length of word_lists and length of ts_lists equal.
			if time_stamp is not None and ts_nums[num]<len(time_stamp) and words[num] != " ":
			word_lists.append(words[num])
			begin = time_stamp[ts_nums[num]][0]
			end = time_stamp[ts_nums[num]][1]
			ts_lists.append([begin, end])
			@@ -302,6 +302,7 @@
			sentence = "".join(word_lists)
			return sentence, real_word_lists


			emo_dict = {
			"<\|HAPPY\|>": "😊",
			"<\|SAD\|>": "😔",
			@@ -365,7 +366,15 @@
			}

			emo_set = {"😊", "😔", "😡", "😰", "🤢", "😮"}
			event_set = {"🎼", "👏", "😀", "😭", "🤧", "😷",}
			event_set = {
			"🎼",
			"👏",
			"😀",
			"😭",
			"🤧",
			"😷",
			}


			def format_str_v2(s):
			sptk_dict = {}
			@@ -386,9 +395,11 @@
			s = s.replace(emoji + " ", emoji)
			return s.strip()


			def rich_transcription_postprocess(s):
			def get_emo(s):
			return s[-1] if s[-1] in emo_set else None

			def get_event(s):
			return s[0] if s[0] in event_set else None

			@@ -410,4 +421,3 @@
			new_s += s_list[i].strip().lstrip()
			new_s = new_s.replace("The.", " ")
			return new_s.strip()

	funasr/utils/postprocess_utils.py	16 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/version.txt	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史