wusong
2024-09-25 d20c030e5b75306dd67e8fe9924d5d94eac1bf30
funasr/utils/timestamp_tools.py
@@ -141,6 +141,8 @@
    )
    for punc_stamp_text in punc_stamp_text_list:
        punc_id, timestamp, text = punc_stamp_text
        if sentence_start is None and timestamp is not None:
            sentence_start = timestamp[0]
        # sentence_text += text if text is not None else ''
        if text is not None:
            if "a" <= text[0] <= "z" or "A" <= text[0] <= "Z":
@@ -156,7 +158,6 @@
        punc_id = int(punc_id) if punc_id is not None else 1
        sentence_end = timestamp[1] if timestamp is not None else sentence_end
        sentence_start = timestamp[0] if timestamp is not None else sentence_start
        sentence_text_seg = (
            sentence_text_seg[:-1] if sentence_text_seg[-1] == " " else sentence_text_seg
        )
@@ -184,6 +185,7 @@
            sentence_text = ""
            sentence_text_seg = ""
            ts_list = []
            sentence_start = None
    return res
@@ -222,6 +224,7 @@
    punc_stamp_text_list = list(
        zip_longest(punc_id_list, timestamp_postprocessed, texts, fillvalue=None)
    )
    is_sentence_start = True
    for punc_stamp_text in punc_stamp_text_list:
        punc_id, timestamp, text = punc_stamp_text
        # sentence_text += text if text is not None else ''
@@ -240,8 +243,11 @@
        punc_id = int(punc_id) if punc_id is not None else 1
        sentence_end = timestamp[1] if timestamp is not None else sentence_end
        sentence_text = sentence_text[1:] if sentence_text[0] == ' ' else sentence_text
        if is_sentence_start:
            sentence_start = timestamp[0] if timestamp is not None else sentence_start
            is_sentence_start = False
        if punc_id > 1:
            is_sentence_start = True
            sentence_text += punc_list[punc_id - 2]
            sentence_text_seg = (
                sentence_text_seg[:-1] if sentence_text_seg[-1] == " " else sentence_text_seg
@@ -268,5 +274,4 @@
            sentence_text = ""
            sentence_text_seg = ""
            ts_list = []
            sentence_start = sentence_end
    return res
    return res