From 6dcca251b3a109b719bdc6928d23e42ff97ce97e Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 16 二月 2023 15:26:22 +0800
Subject: [PATCH] Merge pull request #124 from alibaba-damo-academy/dev_lzr

---
 funasr/bin/asr_inference_paraformer_vad_punc.py |   32 ++++++++++++++++++++------------
 1 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/funasr/bin/asr_inference_paraformer_vad_punc.py b/funasr/bin/asr_inference_paraformer_vad_punc.py
index 629ee4f..755cc9c 100644
--- a/funasr/bin/asr_inference_paraformer_vad_punc.py
+++ b/funasr/bin/asr_inference_paraformer_vad_punc.py
@@ -39,8 +39,9 @@
 from funasr.utils import asr_utils, wav_utils, postprocess_utils
 from funasr.models.frontend.wav_frontend import WavFrontend
 from funasr.tasks.vad import VADTask
-from funasr.utils.timestamp_tools import time_stamp_lfr6, time_stamp_lfr6_pl
+from funasr.utils.timestamp_tools import time_stamp_lfr6_pl
 from funasr.bin.punctuation_infer import Text2Punc
+from funasr.models.e2e_asr_paraformer import BiCifParaformer
 
 header_colors = '\033[95m'
 end_colors = '\033[0m'
@@ -281,12 +282,8 @@
                 else:
                     text = None
 
-                if isinstance(self.asr_model, BiCifParaformer):
-                    timestamp = time_stamp_lfr6_pl(us_alphas[i], us_cif_peak[i], copy.copy(token), begin_time, end_time)
-                    results.append((text, token, token_int, timestamp, enc_len_batch_total, lfr_factor))
-                else:
-                    time_stamp = time_stamp_lfr6(alphas[i:i + 1, ], enc_len[i:i + 1, ], copy.copy(token), begin_time, end_time)
-                    results.append((text, token, token_int, time_stamp, enc_len_batch_total, lfr_factor))
+                timestamp = time_stamp_lfr6_pl(us_alphas[i], us_cif_peak[i], copy.copy(token), begin_time, end_time)
+                results.append((text, token, token_int, timestamp, enc_len_batch_total, lfr_factor))
 
         # assert check_return_type(results)
         return results
@@ -569,6 +566,11 @@
             allow_variable_data_keys=allow_variable_data_keys,
             inference=True,
         )
+
+        if param_dict is not None:
+            use_timestamp = param_dict.get('use_timestamp', True)
+        else:
+            use_timestamp = True
     
         finish_count = 0
         file_count = 1
@@ -611,8 +613,11 @@
                 result = result_segments[0]
                 text, token, token_int = result[0], result[1], result[2]
                 time_stamp = None if len(result) < 4 else result[3]
-    
-                postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
+
+                if use_timestamp and time_stamp is not None: 
+                    postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
+                else:
+                    postprocessed_result = postprocess_utils.sentence_postprocess(token)
                 text_postprocessed = ""
                 time_stamp_postprocessed = ""
                 text_postprocessed_punc = postprocessed_result
@@ -620,9 +625,12 @@
                     text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \
                                                                                postprocessed_result[1], \
                                                                                postprocessed_result[2]
-                    text_postprocessed_punc = text_postprocessed
-                    if len(word_lists) > 0 and text2punc is not None:
-                        text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
+                else:
+                    text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1]
+
+                text_postprocessed_punc = text_postprocessed
+                if len(word_lists) > 0 and text2punc is not None:
+                    text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
     
                 item = {'key': key, 'value': text_postprocessed_punc}
                 if text_postprocessed != "":

--
Gitblit v1.9.1