Shi Xian
2024-12-05 22b928dd3ff37ccee57ab2b5c2e4fcda4d33d24d
funasr/utils/timestamp_tools.py
@@ -84,7 +84,8 @@
        timestamp_list.append([_end * TIME_RATE, num_frames * TIME_RATE])
        new_char_list.append("<sil>")
    else:
        timestamp_list[-1][1] = num_frames * TIME_RATE
        if len(timestamp_list)>0:
            timestamp_list[-1][1] = num_frames * TIME_RATE
    if vad_offset:  # add offset time in model with vad
        for i in range(len(timestamp_list)):
            timestamp_list[i][0] = timestamp_list[i][0] + vad_offset / 1000.0
@@ -141,6 +142,8 @@
    )
    for punc_stamp_text in punc_stamp_text_list:
        punc_id, timestamp, text = punc_stamp_text
        if sentence_start is None and timestamp is not None:
            sentence_start = timestamp[0]
        # sentence_text += text if text is not None else ''
        if text is not None:
            if "a" <= text[0] <= "z" or "A" <= text[0] <= "Z":
@@ -156,7 +159,6 @@
        punc_id = int(punc_id) if punc_id is not None else 1
        sentence_end = timestamp[1] if timestamp is not None else sentence_end
        sentence_start = timestamp[0] if timestamp is not None else sentence_start
        sentence_text_seg = (
            sentence_text_seg[:-1] if sentence_text_seg[-1] == " " else sentence_text_seg
        )
@@ -184,6 +186,7 @@
            sentence_text = ""
            sentence_text_seg = ""
            ts_list = []
            sentence_start = None
    return res
@@ -222,6 +225,7 @@
    punc_stamp_text_list = list(
        zip_longest(punc_id_list, timestamp_postprocessed, texts, fillvalue=None)
    )
    is_sentence_start = True
    for punc_stamp_text in punc_stamp_text_list:
        punc_id, timestamp, text = punc_stamp_text
        # sentence_text += text if text is not None else ''
@@ -240,8 +244,11 @@
        punc_id = int(punc_id) if punc_id is not None else 1
        sentence_end = timestamp[1] if timestamp is not None else sentence_end
        sentence_text = sentence_text[1:] if sentence_text[0] == ' ' else sentence_text
        if is_sentence_start:
            sentence_start = timestamp[0] if timestamp is not None else sentence_start
            is_sentence_start = False
        if punc_id > 1:
            is_sentence_start = True
            sentence_text += punc_list[punc_id - 2]
            sentence_text_seg = (
                sentence_text_seg[:-1] if sentence_text_seg[-1] == " " else sentence_text_seg
@@ -268,5 +275,4 @@
            sentence_text = ""
            sentence_text_seg = ""
            ts_list = []
            sentence_start = sentence_end
    return res
    return res