From 741d089eb9dd9be7b6e2cabbd40fc0a784eb38f3 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 21 二月 2024 16:28:58 +0800
Subject: [PATCH] Merge branch 'main' of github.com:alibaba-damo-academy/FunASR merge

---
 funasr/utils/timestamp_tools.py |   25 +++++++++++++++++--------
 1 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/funasr/utils/timestamp_tools.py b/funasr/utils/timestamp_tools.py
index 63f179a..32f0f84 100644
--- a/funasr/utils/timestamp_tools.py
+++ b/funasr/utils/timestamp_tools.py
@@ -98,7 +98,7 @@
     return res_txt, res
 
 
-def timestamp_sentence(punc_id_list, timestamp_postprocessed, text_postprocessed):
+def timestamp_sentence(punc_id_list, timestamp_postprocessed, text_postprocessed, return_raw_text=False):
     punc_list = ['锛�', '銆�', '锛�', '銆�']
     res = []
     if text_postprocessed is None:
@@ -142,15 +142,24 @@
 
         punc_id = int(punc_id) if punc_id is not None else 1
         sentence_end = timestamp[1] if timestamp is not None else sentence_end
-
+        sentence_text_seg = sentence_text_seg[:-1] if sentence_text_seg[-1] == ' ' else sentence_text_seg
         if punc_id > 1:
             sentence_text += punc_list[punc_id - 2]
-            res.append({
-                'text': sentence_text,
-                "start": sentence_start,
-                "end": sentence_end,
-                "timestamp": ts_list
-            })
+            if return_raw_text:
+                res.append({
+                    'text': sentence_text,
+                    "start": sentence_start,
+                    "end": sentence_end,
+                    "timestamp": ts_list,
+                    'raw_text': sentence_text_seg,
+                })
+            else:
+                res.append({
+                    'text': sentence_text,
+                    "start": sentence_start,
+                    "end": sentence_end,
+                    "timestamp": ts_list,
+                })
             sentence_text = ''
             sentence_text_seg = ''
             ts_list = []

--
Gitblit v1.9.1