| | |
| | | import logging |
| | | import sys |
| | | import time |
| | | import json |
| | | from pathlib import Path |
| | | from typing import Optional |
| | | from typing import Sequence |
| | |
| | | # logging.info("asr_train_args: {}".format(asr_train_args)) |
| | | asr_model.to(dtype=getattr(torch, dtype)).eval() |
| | | |
| | | ctc = CTCPrefixScorer(ctc=asr_model.ctc, eos=asr_model.eos) |
| | | if asr_model.ctc != None: |
| | | ctc = CTCPrefixScorer(ctc=asr_model.ctc, eos=asr_model.eos) |
| | | scorers.update( |
| | | ctc=ctc |
| | | ) |
| | | token_list = asr_model.token_list |
| | | scorers.update( |
| | | ctc=ctc, |
| | | length_bonus=LengthBonus(len(token_list)), |
| | | ) |
| | | |
| | |
| | | self.converter = converter |
| | | self.tokenizer = tokenizer |
| | | is_use_lm = lm_weight != 0.0 and lm_file is not None |
| | | if ctc_weight == 0.0 and not is_use_lm: |
| | | if (ctc_weight == 0.0 or asr_model.ctc == None) and not is_use_lm: |
| | | beam_search = None |
| | | self.beam_search = beam_search |
| | | logging.info(f"Beam_search: {self.beam_search}") |
| | |
| | | length_total = 0.0 |
| | | finish_count = 0 |
| | | file_count = 1 |
| | | lfr_factor = 6 |
| | | # 7 .Start for-loop |
| | | asr_result_list = [] |
| | | output_path = output_dir_v2 if output_dir_v2 is not None else output_dir |
| | |
| | | results = speech2text(**batch) |
| | | if len(results) < 1: |
| | | hyp = Hypothesis(score=0.0, scores={}, states={}, yseq=[]) |
| | | results = [[" ", ["<space>"], [2], 10, 6]] * nbest |
| | | results = [[" ", ["<space>"], [2], 0, 1, 6]] * nbest |
| | | time_end = time.time() |
| | | forward_time = time_end - time_beg |
| | | lfr_factor = results[0][-1] |
| | |
| | | |
| | | key = keys[0] |
| | | result = result_segments[0] |
| | | text, token, token_int, time_stamp = result |
| | | text, token, token_int = result[0], result[1], result[2] |
| | | time_stamp = None if len(result) < 4 else result[3] |
| | | |
| | | # Create a directory: outdir/{n}best_recog |
| | | if writer is not None: |
| | |
| | | text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \ |
| | | postprocessed_result[1], \ |
| | | postprocessed_result[2] |
| | | text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20) |
| | | text_postprocessed_punc_time_stamp = "predictions: {} time_stamp: {}".format( |
| | | text_postprocessed_punc, time_stamp_postprocessed) |
| | | if len(word_lists) > 0: |
| | | text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20) |
| | | text_postprocessed_punc_time_stamp = json.dumps({"predictions": text_postprocessed_punc, |
| | | "time_stamp": time_stamp_postprocessed}, |
| | | ensure_ascii=False) |
| | | else: |
| | | text_postprocessed_punc = "" |
| | | punc_id_list = [] |
| | | text_postprocessed_punc_time_stamp = "" |
| | | |
| | | else: |
| | | text_postprocessed = postprocessed_result |
| | | time_stamp_postprocessed = None |
| | | word_lists = None |
| | | text_postprocessed_punc_time_stamp = None |
| | | punc_id_list = None |
| | | text_postprocessed = "" |
| | | time_stamp_postprocessed = "" |
| | | word_lists = "" |
| | | text_postprocessed_punc_time_stamp = "" |
| | | punc_id_list = "" |
| | | text_postprocessed_punc = "" |
| | | |
| | | item = {'key': key, 'value': text_postprocessed_punc_time_stamp, 'text': text_postprocessed, |
| | | 'time_stamp': time_stamp_postprocessed, 'punc': punc_id_list, 'token': token} |
| | |
| | | time_stamp_postprocessed)) |
| | | |
| | | logging.info("decoding, feature length total: {}, forward_time total: {:.4f}, rtf avg: {:.4f}". |
| | | format(length_total, forward_time_total, 100 * forward_time_total / (length_total * lfr_factor))) |
| | | format(length_total, forward_time_total, 100 * forward_time_total / (length_total * lfr_factor+1e-6))) |
| | | return asr_result_list |
| | | return _forward |
| | | |