shixian.shi
2023-03-13 5a7ee30783debe2d0cb900f83e534b4d1699a277
funasr/bin/asr_inference_paraformer_vad_punc.py
@@ -44,11 +44,10 @@
from funasr.models.frontend.wav_frontend import WavFrontend
from funasr.tasks.vad import VADTask
from funasr.bin.vad_inference import Speech2VadSegment
from funasr.utils.timestamp_tools import time_stamp_lfr6_pl
from funasr.utils.timestamp_tools import time_stamp_sentence, ts_prediction_lfr6_standard
from funasr.bin.punctuation_infer import Text2Punc
from funasr.models.e2e_asr_paraformer import BiCifParaformer, ContextualParaformer
from funasr.utils.timestamp_tools import time_stamp_sentence
header_colors = '\033[95m'
end_colors = '\033[0m'
@@ -303,7 +302,10 @@
                    text = None
                if isinstance(self.asr_model, BiCifParaformer):
                    timestamp = time_stamp_lfr6_pl(us_alphas[i], us_cif_peak[i], copy.copy(token), begin_time, end_time)
                    _, timestamp = ts_prediction_lfr6_standard(us_alphas[i],
                                                            us_cif_peak[i],
                                                            copy.copy(token),
                                                            vad_offset=begin_time)
                    results.append((text, token, token_int, timestamp, enc_len_batch_total, lfr_factor))
                else:
                    results.append((text, token, token_int, enc_len_batch_total, lfr_factor))