From 3ff9f5bf779f79750807d706c08b5b5c5943fca0 Mon Sep 17 00:00:00 2001
From: shixian.shi <shixian.shi@alibaba-inc.com>
Date: 星期四, 09 三月 2023 17:22:00 +0800
Subject: [PATCH] update timestamp inference

---
 funasr/bin/tp_inference.py |    8 +++++---
 1 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/funasr/bin/tp_inference.py b/funasr/bin/tp_inference.py
index 67e82a7..baa7485 100644
--- a/funasr/bin/tp_inference.py
+++ b/funasr/bin/tp_inference.py
@@ -91,7 +91,7 @@
     for char, timestamp in zip(new_char_list, timestamp_list):
         res_str += "{} {} {};".format(char, str(timestamp[0]+0.0005)[:5], str(timestamp[1]+0.0005)[:5])
     res = []
-    for char, timestamp in zip(char_list, timestamp_list):
+    for char, timestamp in zip(new_char_list, timestamp_list):
         if char != '<sil>':
             res.append([int(timestamp[0] * 1000), int(timestamp[1] * 1000)])
     return res_str, res
@@ -114,7 +114,7 @@
         )
         if 'cuda' in device:
             tp_model = tp_model.cuda()
-            
+
         frontend = None
         if tp_train_args.frontend is not None:
             frontend = WavFrontend(cmvn_file=timestamp_cmvn_file, **tp_train_args.frontend_conf)
@@ -304,7 +304,9 @@
                 token = speechtext2timestamp.converter.ids2tokens(batch['text'][batch_id])
                 ts_str, ts_list = time_stamp_lfr6_advance(us_alphas[batch_id], us_cif_peak[batch_id], token)
                 logging.warning(ts_str)
-                tp_result_list.append({'text':"".join([i for i in token if i != '<sil>']), 'timestamp': ts_list})
+                item = {'key': key, 'value': ts_str, 'timestamp':ts_list}
+                # tp_result_list.append({'text':"".join([i for i in token if i != '<sil>']), 'timestamp': ts_list})
+                tp_result_list.append(item)
         return tp_result_list
 
     return _forward

--
Gitblit v1.9.1