游雁
2024-02-21 741d089eb9dd9be7b6e2cabbd40fc0a784eb38f3
funasr/utils/timestamp_tools.py
@@ -98,7 +98,7 @@
    return res_txt, res
def timestamp_sentence(punc_id_list, timestamp_postprocessed, text_postprocessed):
def timestamp_sentence(punc_id_list, timestamp_postprocessed, text_postprocessed, return_raw_text=False):
    punc_list = [',', '。', '?', '、']
    res = []
    if text_postprocessed is None:
@@ -142,15 +142,24 @@
        punc_id = int(punc_id) if punc_id is not None else 1
        sentence_end = timestamp[1] if timestamp is not None else sentence_end
        sentence_text_seg = sentence_text_seg[:-1] if sentence_text_seg[-1] == ' ' else sentence_text_seg
        if punc_id > 1:
            sentence_text += punc_list[punc_id - 2]
            res.append({
                'text': sentence_text,
                "start": sentence_start,
                "end": sentence_end,
                "timestamp": ts_list
            })
            if return_raw_text:
                res.append({
                    'text': sentence_text,
                    "start": sentence_start,
                    "end": sentence_end,
                    "timestamp": ts_list,
                    'raw_text': sentence_text_seg,
                })
            else:
                res.append({
                    'text': sentence_text,
                    "start": sentence_start,
                    "end": sentence_end,
                    "timestamp": ts_list,
                })
            sentence_text = ''
            sentence_text_seg = ''
            ts_list = []