python/FunASR-XL.git

			@@ -299,3 +299,113 @@
			real_word_lists.append(ch)
			sentence = "".join(word_lists)
			return sentence, real_word_lists

			emo_dict = {
			"<\|HAPPY\|>": "😊",
			"<\|SAD\|>": "😔",
			"<\|ANGRY\|>": "😡",
			"<\|NEUTRAL\|>": "",
			"<\|FEARFUL\|>": "😰",
			"<\|DISGUSTED\|>": "🤢",
			"<\|SURPRISED\|>": "😮",
			}

			event_dict = {
			"<\|BGM\|>": "🎼",
			"<\|Speech\|>": "",
			"<\|Applause\|>": "👏",
			"<\|Laughter\|>": "😀",
			"<\|Cry\|>": "😭",
			"<\|Sneeze\|>": "🤧",
			"<\|Breath\|>": "",
			"<\|Cough\|>": "🤧",
			}

			lang_dict = {
			"<\|zh\|>": "<\|lang\|>",
			"<\|en\|>": "<\|lang\|>",
			"<\|yue\|>": "<\|lang\|>",
			"<\|ja\|>": "<\|lang\|>",
			"<\|ko\|>": "<\|lang\|>",
			"<\|nospeech\|>": "<\|lang\|>",
			}

			emoji_dict = {
			"<\|nospeech\|><\|Event_UNK\|>": "❓",
			"<\|zh\|>": "",
			"<\|en\|>": "",
			"<\|yue\|>": "",
			"<\|ja\|>": "",
			"<\|ko\|>": "",
			"<\|nospeech\|>": "",
			"<\|HAPPY\|>": "😊",
			"<\|SAD\|>": "😔",
			"<\|ANGRY\|>": "😡",
			"<\|NEUTRAL\|>": "",
			"<\|BGM\|>": "🎼",
			"<\|Speech\|>": "",
			"<\|Applause\|>": "👏",
			"<\|Laughter\|>": "😀",
			"<\|FEARFUL\|>": "😰",
			"<\|DISGUSTED\|>": "🤢",
			"<\|SURPRISED\|>": "😮",
			"<\|Cry\|>": "😭",
			"<\|EMO_UNKNOWN\|>": "",
			"<\|Sneeze\|>": "🤧",
			"<\|Breath\|>": "",
			"<\|Cough\|>": "😷",
			"<\|Sing\|>": "",
			"<\|Speech_Noise\|>": "",
			"<\|withitn\|>": "",
			"<\|woitn\|>": "",
			"<\|GBG\|>": "",
			"<\|Event_UNK\|>": "",
			}

			emo_set = {"😊", "😔", "😡", "😰", "🤢", "😮"}
			event_set = {"🎼", "👏", "😀", "😭", "🤧", "😷",}

			def format_str_v2(s):
			sptk_dict = {}
			for sptk in emoji_dict:
			sptk_dict[sptk] = s.count(sptk)
			s = s.replace(sptk, "")
			emo = "<\|NEUTRAL\|>"
			for e in emo_dict:
			if sptk_dict[e] > sptk_dict[emo]:
			emo = e
			for e in event_dict:
			if sptk_dict[e] > 0:
			s = event_dict[e] + s
			s = s + emo_dict[emo]

			for emoji in emo_set.union(event_set):
			s = s.replace(" " + emoji, emoji)
			s = s.replace(emoji + " ", emoji)
			return s.strip()

			def rich_transcription_postprocess(s):
			def get_emo(s):
			return s[-1] if s[-1] in emo_set else None
			def get_event(s):
			return s[0] if s[0] in event_set else None

			s = s.replace("<\|nospeech\|><\|Event_UNK\|>", "❓")
			for lang in lang_dict:
			s = s.replace(lang, "<\|lang\|>")
			s_list = [format_str_v2(s_i).strip(" ") for s_i in s.split("<\|lang\|>")]
			new_s = " " + s_list[0]
			cur_ent_event = get_event(new_s)
			for i in range(1, len(s_list)):
			if len(s_list[i]) == 0:
			continue
			if get_event(s_list[i]) == cur_ent_event and get_event(s_list[i]) != None:
			s_list[i] = s_list[i][1:]
			#else:
			cur_ent_event = get_event(s_list[i])
			if get_emo(s_list[i]) != None and get_emo(s_list[i]) == get_emo(new_s):
			new_s = new_s[:-1]
			new_s += s_list[i].strip().lstrip()
			new_s = new_s.replace("The.", " ")
			return new_s.strip()

	examples/industrial_data_pretraining/sense_voice/demo.py	7 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/utils/postprocess_utils.py	110 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

			@@ -5,11 +5,14 @@

			import sys
			from funasr import AutoModel
			from funasr.utils.postprocess_utils import rich_transcription_postprocess

			model_dir = "iic/SenseVoiceSmall"
			input_file = (
			"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav"
			)

			input_file = ("/nfs/beinian.lzr/workspace/models/modelscope_models/SenseVoice/example/rich_2.wav")

			model = AutoModel(
			model=model_dir,
			@@ -22,4 +25,6 @@
			use_itn=False,
			)

			print(res)
			text = rich_transcription_postprocess(res[0]["text"])

			print(text)

			@@ -299,3 +299,113 @@
			real_word_lists.append(ch)
			sentence = "".join(word_lists)
			return sentence, real_word_lists

			emo_dict = {
			"<\|HAPPY\|>": "😊",
			"<\|SAD\|>": "😔",
			"<\|ANGRY\|>": "😡",
			"<\|NEUTRAL\|>": "",
			"<\|FEARFUL\|>": "😰",
			"<\|DISGUSTED\|>": "🤢",
			"<\|SURPRISED\|>": "😮",
			}

			event_dict = {
			"<\|BGM\|>": "🎼",
			"<\|Speech\|>": "",
			"<\|Applause\|>": "👏",
			"<\|Laughter\|>": "😀",
			"<\|Cry\|>": "😭",
			"<\|Sneeze\|>": "🤧",
			"<\|Breath\|>": "",
			"<\|Cough\|>": "🤧",
			}

			lang_dict = {
			"<\|zh\|>": "<\|lang\|>",
			"<\|en\|>": "<\|lang\|>",
			"<\|yue\|>": "<\|lang\|>",
			"<\|ja\|>": "<\|lang\|>",
			"<\|ko\|>": "<\|lang\|>",
			"<\|nospeech\|>": "<\|lang\|>",
			}

			emoji_dict = {
			"<\|nospeech\|><\|Event_UNK\|>": "❓",
			"<\|zh\|>": "",
			"<\|en\|>": "",
			"<\|yue\|>": "",
			"<\|ja\|>": "",
			"<\|ko\|>": "",
			"<\|nospeech\|>": "",
			"<\|HAPPY\|>": "😊",
			"<\|SAD\|>": "😔",
			"<\|ANGRY\|>": "😡",
			"<\|NEUTRAL\|>": "",
			"<\|BGM\|>": "🎼",
			"<\|Speech\|>": "",
			"<\|Applause\|>": "👏",
			"<\|Laughter\|>": "😀",
			"<\|FEARFUL\|>": "😰",
			"<\|DISGUSTED\|>": "🤢",
			"<\|SURPRISED\|>": "😮",
			"<\|Cry\|>": "😭",
			"<\|EMO_UNKNOWN\|>": "",
			"<\|Sneeze\|>": "🤧",
			"<\|Breath\|>": "",
			"<\|Cough\|>": "😷",
			"<\|Sing\|>": "",
			"<\|Speech_Noise\|>": "",
			"<\|withitn\|>": "",
			"<\|woitn\|>": "",
			"<\|GBG\|>": "",
			"<\|Event_UNK\|>": "",
			}

			emo_set = {"😊", "😔", "😡", "😰", "🤢", "😮"}
			event_set = {"🎼", "👏", "😀", "😭", "🤧", "😷",}

			def format_str_v2(s):
			sptk_dict = {}
			for sptk in emoji_dict:
			sptk_dict[sptk] = s.count(sptk)
			s = s.replace(sptk, "")
			emo = "<\|NEUTRAL\|>"
			for e in emo_dict:
			if sptk_dict[e] > sptk_dict[emo]:
			emo = e
			for e in event_dict:
			if sptk_dict[e] > 0:
			s = event_dict[e] + s
			s = s + emo_dict[emo]

			for emoji in emo_set.union(event_set):
			s = s.replace(" " + emoji, emoji)
			s = s.replace(emoji + " ", emoji)
			return s.strip()

			def rich_transcription_postprocess(s):
			def get_emo(s):
			return s[-1] if s[-1] in emo_set else None
			def get_event(s):
			return s[0] if s[0] in event_set else None

			s = s.replace("<\|nospeech\|><\|Event_UNK\|>", "❓")
			for lang in lang_dict:
			s = s.replace(lang, "<\|lang\|>")
			s_list = [format_str_v2(s_i).strip(" ") for s_i in s.split("<\|lang\|>")]
			new_s = " " + s_list[0]
			cur_ent_event = get_event(new_s)
			for i in range(1, len(s_list)):
			if len(s_list[i]) == 0:
			continue
			if get_event(s_list[i]) == cur_ent_event and get_event(s_list[i]) != None:
			s_list[i] = s_list[i][1:]
			#else:
			cur_ent_event = get_event(s_list[i])
			if get_emo(s_list[i]) != None and get_emo(s_list[i]) == get_emo(new_s):
			new_s = new_s[:-1]
			new_s += s_list[i].strip().lstrip()
			new_s = new_s.replace("The.", " ")
			return new_s.strip()