wuhongsheng
2024-07-02 ba0325c004b4d660c769c02c70e9e5a9534ca6ed
修复增加标点后断句起点时间戳bug (#1865)

* 修复断句之间时间戳bug

* 修复增加标点后断句起点时间戳
1个文件已修改
7 ■■■■ 已修改文件
funasr/utils/timestamp_tools.py 7 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/utils/timestamp_tools.py
@@ -222,6 +222,7 @@
    punc_stamp_text_list = list(
        zip_longest(punc_id_list, timestamp_postprocessed, texts, fillvalue=None)
    )
    is_sentence_start = True
    for punc_stamp_text in punc_stamp_text_list:
        punc_id, timestamp, text = punc_stamp_text
        # sentence_text += text if text is not None else ''
@@ -240,8 +241,11 @@
        punc_id = int(punc_id) if punc_id is not None else 1
        sentence_end = timestamp[1] if timestamp is not None else sentence_end
        sentence_text = sentence_text[1:] if sentence_text[0] == ' ' else sentence_text
        if is_sentence_start:
            sentence_start = timestamp[0] if timestamp is not None else sentence_start
            is_sentence_start = False
        if punc_id > 1:
            is_sentence_start = True
            sentence_text += punc_list[punc_id - 2]
            sentence_text_seg = (
                sentence_text_seg[:-1] if sentence_text_seg[-1] == " " else sentence_text_seg
@@ -268,5 +272,4 @@
            sentence_text = ""
            sentence_text_seg = ""
            ts_list = []
            sentence_start = sentence_end
    return res