From 741d089eb9dd9be7b6e2cabbd40fc0a784eb38f3 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 21 二月 2024 16:28:58 +0800
Subject: [PATCH] Merge branch 'main' of github.com:alibaba-damo-academy/FunASR merge
---
funasr/utils/timestamp_tools.py | 25 +++++++++++++++++--------
1 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/funasr/utils/timestamp_tools.py b/funasr/utils/timestamp_tools.py
index 63f179a..32f0f84 100644
--- a/funasr/utils/timestamp_tools.py
+++ b/funasr/utils/timestamp_tools.py
@@ -98,7 +98,7 @@
return res_txt, res
-def timestamp_sentence(punc_id_list, timestamp_postprocessed, text_postprocessed):
+def timestamp_sentence(punc_id_list, timestamp_postprocessed, text_postprocessed, return_raw_text=False):
punc_list = ['锛�', '銆�', '锛�', '銆�']
res = []
if text_postprocessed is None:
@@ -142,15 +142,24 @@
punc_id = int(punc_id) if punc_id is not None else 1
sentence_end = timestamp[1] if timestamp is not None else sentence_end
-
+ sentence_text_seg = sentence_text_seg[:-1] if sentence_text_seg[-1] == ' ' else sentence_text_seg
if punc_id > 1:
sentence_text += punc_list[punc_id - 2]
- res.append({
- 'text': sentence_text,
- "start": sentence_start,
- "end": sentence_end,
- "timestamp": ts_list
- })
+ if return_raw_text:
+ res.append({
+ 'text': sentence_text,
+ "start": sentence_start,
+ "end": sentence_end,
+ "timestamp": ts_list,
+ 'raw_text': sentence_text_seg,
+ })
+ else:
+ res.append({
+ 'text': sentence_text,
+ "start": sentence_start,
+ "end": sentence_end,
+ "timestamp": ts_list,
+ })
sentence_text = ''
sentence_text_seg = ''
ts_list = []
--
Gitblit v1.9.1