| | |
| | | end = time_stamp[ts_nums[num]][1] |
| | | ts_lists.append([begin, end]) |
| | | else: |
| | | word_lists.append(words[num]) |
| | | # length of time_stamp may not equal to length of words because of the (somehow improper) threshold set in timestamp_tools.py line 46, e.g., length of time_stamp can be zero but length of words is not. |
| | | # Moreover, move "word_lists.append(words[num])" into if clause, to keep length of word_lists and length of ts_lists equal. |
| | | if time_stamp is not None and ts_nums[num]<len(time_stamp) and words[num] != " ": |
| | | word_lists.append(words[num]) |
| | | begin = time_stamp[ts_nums[num]][0] |
| | | end = time_stamp[ts_nums[num]][1] |
| | | ts_lists.append([begin, end]) |
| | |
| | | sentence = "".join(word_lists) |
| | | return sentence, real_word_lists |
| | | |
| | | |
| | | emo_dict = { |
| | | "<|HAPPY|>": "😊", |
| | | "<|SAD|>": "😔", |
| | |
| | | } |
| | | |
| | | emo_set = {"😊", "😔", "😡", "😰", "🤢", "😮"} |
| | | event_set = {"🎼", "👏", "😀", "😭", "🤧", "😷",} |
| | | event_set = { |
| | | "🎼", |
| | | "👏", |
| | | "😀", |
| | | "😭", |
| | | "🤧", |
| | | "😷", |
| | | } |
| | | |
| | | |
| | | def format_str_v2(s): |
| | | sptk_dict = {} |
| | |
| | | s = s.replace(emoji + " ", emoji) |
| | | return s.strip() |
| | | |
| | | |
| | | def rich_transcription_postprocess(s): |
| | | def get_emo(s): |
| | | return s[-1] if s[-1] in emo_set else None |
| | | |
| | | def get_event(s): |
| | | return s[0] if s[0] in event_set else None |
| | | |
| | |
| | | new_s += s_list[i].strip().lstrip() |
| | | new_s = new_s.replace("The.", " ") |
| | | return new_s.strip() |
| | | |