| | |
| | | import logging |
| | | import sys |
| | | import time |
| | | import json |
| | | from pathlib import Path |
| | | from typing import Optional |
| | | from typing import Sequence |
| | |
| | | self.converter = converter |
| | | self.tokenizer = tokenizer |
| | | is_use_lm = lm_weight != 0.0 and lm_file is not None |
| | | if ctc_weight == 0.0 and not is_use_lm: |
| | | if (ctc_weight == 0.0 or asr_model.ctc == None) and not is_use_lm: |
| | | beam_search = None |
| | | self.beam_search = beam_search |
| | | logging.info(f"Beam_search: {self.beam_search}") |
| | |
| | | text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \ |
| | | postprocessed_result[1], \ |
| | | postprocessed_result[2] |
| | | text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20) |
| | | text_postprocessed_punc_time_stamp = "predictions: {} time_stamp: {}".format( |
| | | text_postprocessed_punc, time_stamp_postprocessed) |
| | | if len(word_lists) > 0: |
| | | text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20) |
| | | text_postprocessed_punc_time_stamp = json.dumps({"predictions": text_postprocessed_punc, |
| | | "time_stamp": time_stamp_postprocessed}, |
| | | ensure_ascii=False) |
| | | else: |
| | | text_postprocessed_punc = "" |
| | | punc_id_list = [] |
| | | text_postprocessed_punc_time_stamp = "" |
| | | |
| | | else: |
| | | text_postprocessed = "" |
| | | time_stamp_postprocessed = "" |
| | |
| | | time_stamp_postprocessed)) |
| | | |
| | | logging.info("decoding, feature length total: {}, forward_time total: {:.4f}, rtf avg: {:.4f}". |
| | | format(length_total, forward_time_total, 100 * forward_time_total / (length_total * lfr_factor))) |
| | | format(length_total, forward_time_total, 100 * forward_time_total / (length_total * lfr_factor+1e-6))) |
| | | return asr_result_list |
| | | return _forward |
| | | |