| | |
| | | res.append({ |
| | | 'text': text_postprocessed.split(), |
| | | "start": time_stamp_postprocessed[0][0], |
| | | "end": time_stamp_postprocessed[-1][1] |
| | | "end": time_stamp_postprocessed[-1][1], |
| | | 'text_seg': text_postprocessed.split(), |
| | | "ts_list": time_stamp_postprocessed, |
| | | }) |
| | | return res |
| | | if len(punc_id_list) != len(time_stamp_postprocessed): |
| | | print(" warning length mistach!!!!!!") |
| | | sentence_text = '' |
| | | sentence_text = "" |
| | | sentence_text_seg = "" |
| | | ts_list = [] |
| | | sentence_start = time_stamp_postprocessed[0][0] |
| | | sentence_end = time_stamp_postprocessed[0][1] |
| | | texts = text_postprocessed.split() |
| | | punc_stamp_text_list = list(zip_longest(punc_id_list, time_stamp_postprocessed, texts, fillvalue=None)) |
| | | for punc_stamp_text in punc_stamp_text_list: |
| | | punc_id, time_stamp, text = punc_stamp_text |
| | | sentence_text += text if text is not None else '' |
| | | # sentence_text += text if text is not None else '' |
| | | if text is not None: |
| | | if 'a' <= text[0] <= 'z' or 'A' <= text[0] <= 'Z': |
| | | sentence_text += ' ' + text |
| | | elif len(sentence_text) and ('a' <= sentence_text[-1] <= 'z' or 'A' <= sentence_text[-1] <= 'Z'): |
| | | sentence_text += ' ' + text |
| | | else: |
| | | sentence_text += text |
| | | sentence_text_seg += text + ' ' |
| | | ts_list.append(time_stamp) |
| | | |
| | | punc_id = int(punc_id) if punc_id is not None else 1 |
| | | sentence_end = time_stamp[1] if time_stamp is not None else sentence_end |
| | | |
| | |
| | | res.append({ |
| | | 'text': sentence_text, |
| | | "start": sentence_start, |
| | | "end": sentence_end |
| | | "end": sentence_end, |
| | | "text_seg": sentence_text_seg, |
| | | "ts_list": ts_list |
| | | }) |
| | | sentence_text = '' |
| | | sentence_text_seg = '' |
| | | ts_list = [] |
| | | sentence_start = sentence_end |
| | | elif punc_id == 3: |
| | | sentence_text += '.' |
| | | res.append({ |
| | | 'text': sentence_text, |
| | | "start": sentence_start, |
| | | "end": sentence_end |
| | | "end": sentence_end, |
| | | "text_seg": sentence_text_seg, |
| | | "ts_list": ts_list |
| | | }) |
| | | sentence_text = '' |
| | | sentence_text_seg = '' |
| | | ts_list = [] |
| | | sentence_start = sentence_end |
| | | elif punc_id == 4: |
| | | sentence_text += '?' |
| | | res.append({ |
| | | 'text': sentence_text, |
| | | "start": sentence_start, |
| | | "end": sentence_end |
| | | "end": sentence_end, |
| | | "text_seg": sentence_text_seg, |
| | | "ts_list": ts_list |
| | | }) |
| | | sentence_text = '' |
| | | sentence_text_seg = '' |
| | | ts_list = [] |
| | | sentence_start = sentence_end |
| | | return res |
| | | |