From ba0325c004b4d660c769c02c70e9e5a9534ca6ed Mon Sep 17 00:00:00 2001
From: wuhongsheng <664116298@qq.com>
Date: 星期二, 02 七月 2024 12:23:48 +0800
Subject: [PATCH] 修复增加标点后断句起点时间戳bug (#1865)

---
 funasr/utils/timestamp_tools.py |    7 +++++--
 1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/funasr/utils/timestamp_tools.py b/funasr/utils/timestamp_tools.py
index 4fc2d15..84a02b2 100644
--- a/funasr/utils/timestamp_tools.py
+++ b/funasr/utils/timestamp_tools.py
@@ -222,6 +222,7 @@
     punc_stamp_text_list = list(
         zip_longest(punc_id_list, timestamp_postprocessed, texts, fillvalue=None)
     )
+    is_sentence_start = True
     for punc_stamp_text in punc_stamp_text_list:
         punc_id, timestamp, text = punc_stamp_text
         # sentence_text += text if text is not None else ''
@@ -240,8 +241,11 @@
         punc_id = int(punc_id) if punc_id is not None else 1
         sentence_end = timestamp[1] if timestamp is not None else sentence_end
         sentence_text = sentence_text[1:] if sentence_text[0] == ' ' else sentence_text
-        
+        if is_sentence_start:
+            sentence_start = timestamp[0] if timestamp is not None else sentence_start
+            is_sentence_start = False
         if punc_id > 1:
+            is_sentence_start = True
             sentence_text += punc_list[punc_id - 2]
             sentence_text_seg = (
                 sentence_text_seg[:-1] if sentence_text_seg[-1] == " " else sentence_text_seg
@@ -268,5 +272,4 @@
             sentence_text = ""
             sentence_text_seg = ""
             ts_list = []
-            sentence_start = sentence_end
     return res
\ No newline at end of file

--
Gitblit v1.9.1