From ba0325c004b4d660c769c02c70e9e5a9534ca6ed Mon Sep 17 00:00:00 2001
From: wuhongsheng <664116298@qq.com>
Date: 星期二, 02 七月 2024 12:23:48 +0800
Subject: [PATCH] 修复增加标点后断句起点时间戳bug (#1865)
---
funasr/utils/timestamp_tools.py | 7 +++++--
1 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/funasr/utils/timestamp_tools.py b/funasr/utils/timestamp_tools.py
index 4fc2d15..84a02b2 100644
--- a/funasr/utils/timestamp_tools.py
+++ b/funasr/utils/timestamp_tools.py
@@ -222,6 +222,7 @@
punc_stamp_text_list = list(
zip_longest(punc_id_list, timestamp_postprocessed, texts, fillvalue=None)
)
+ is_sentence_start = True
for punc_stamp_text in punc_stamp_text_list:
punc_id, timestamp, text = punc_stamp_text
# sentence_text += text if text is not None else ''
@@ -240,8 +241,11 @@
punc_id = int(punc_id) if punc_id is not None else 1
sentence_end = timestamp[1] if timestamp is not None else sentence_end
sentence_text = sentence_text[1:] if sentence_text[0] == ' ' else sentence_text
-
+ if is_sentence_start:
+ sentence_start = timestamp[0] if timestamp is not None else sentence_start
+ is_sentence_start = False
if punc_id > 1:
+ is_sentence_start = True
sentence_text += punc_list[punc_id - 2]
sentence_text_seg = (
sentence_text_seg[:-1] if sentence_text_seg[-1] == " " else sentence_text_seg
@@ -268,5 +272,4 @@
sentence_text = ""
sentence_text_seg = ""
ts_list = []
- sentence_start = sentence_end
return res
\ No newline at end of file
--
Gitblit v1.9.1