| | |
| | | # -*- encoding: utf-8 -*- |
| | | #!/usr/bin/env python3 |
| | | # -*- encoding: utf-8 -*- |
| | | # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. |
| | | # MIT License (https://opensource.org/licenses/MIT) |
| | | |
| | |
| | | import logging |
| | | import os |
| | | import sys |
| | | from typing import Union, Dict, Any |
| | | |
| | | from funasr.utils import config_argparse |
| | | from funasr.utils.cli_utils import get_commandline_args |
| | | from funasr.utils.types import str2bool |
| | | from funasr.utils.types import str2triple_str |
| | | from funasr.utils.types import str_or_none |
| | | |
| | | #!/usr/bin/env python3 |
| | | import argparse |
| | | import logging |
| | | import sys |
| | | import time |
| | | import copy |
| | | import os |
| | | import codecs |
| | | import tempfile |
| | | import requests |
| | | from pathlib import Path |
| | | from typing import Dict |
| | | from typing import List |
| | | from typing import Optional |
| | | from typing import Sequence |
| | | from typing import Tuple |
| | | from typing import Union |
| | | from typing import Dict |
| | | from typing import Any |
| | | from typing import List |
| | | import yaml |
| | | |
| | | import numpy as np |
| | | import torch |
| | | import torchaudio |
| | | import yaml |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | from funasr.fileio.datadir_writer import DatadirWriter |
| | | from funasr.modules.beam_search.beam_search import BeamSearch |
| | | # from funasr.modules.beam_search.beam_search import BeamSearchPara as BeamSearch |
| | | |
| | | from funasr.bin.asr_infer import Speech2Text |
| | | from funasr.bin.asr_infer import Speech2TextMFCCA |
| | | from funasr.bin.asr_infer import Speech2TextParaformer, Speech2TextParaformerOnline |
| | | from funasr.bin.asr_infer import Speech2TextSAASR |
| | | from funasr.bin.asr_infer import Speech2TextTransducer |
| | | from funasr.bin.asr_infer import Speech2TextUniASR |
| | | from funasr.bin.punc_infer import Text2Punc |
| | | from funasr.bin.tp_infer import Speech2Timestamp |
| | | from funasr.bin.vad_infer import Speech2VadSegment |
| | | from funasr.fileio.datadir_writer import DatadirWriter |
| | | from funasr.modules.beam_search.beam_search import Hypothesis |
| | | from funasr.modules.scorers.ctc import CTCPrefixScorer |
| | | from funasr.modules.scorers.length_bonus import LengthBonus |
| | | from funasr.modules.subsampling import TooShortUttError |
| | | from funasr.tasks.asr import ASRTask |
| | | from funasr.tasks.lm import LMTask |
| | | from funasr.text.build_tokenizer import build_tokenizer |
| | | from funasr.text.token_id_converter import TokenIDConverter |
| | | from funasr.tasks.vad import VADTask |
| | | from funasr.torch_utils.device_funcs import to_device |
| | | from funasr.torch_utils.set_all_random_seed import set_all_random_seed |
| | | from funasr.utils import asr_utils, postprocess_utils |
| | | from funasr.utils import config_argparse |
| | | from funasr.utils.cli_utils import get_commandline_args |
| | | from funasr.utils.timestamp_tools import time_stamp_sentence, ts_prediction_lfr6_standard |
| | | from funasr.utils.types import str2bool |
| | | from funasr.utils.types import str2triple_str |
| | | from funasr.utils.types import str_or_none |
| | | from funasr.utils import asr_utils, wav_utils, postprocess_utils |
| | | from funasr.models.frontend.wav_frontend import WavFrontend, WavFrontendOnline |
| | | from funasr.models.e2e_asr_paraformer import BiCifParaformer, ContextualParaformer |
| | | from funasr.models.e2e_asr_contextual_paraformer import NeatContextualParaformer |
| | | from funasr.export.models.e2e_asr_paraformer import Paraformer as Paraformer_export |
| | | from funasr.utils.timestamp_tools import ts_prediction_lfr6_standard |
| | | |
| | | |
| | | from funasr.utils.vad_utils import slice_padding_fbank |
| | | from funasr.tasks.vad import VADTask |
| | | from funasr.utils.timestamp_tools import time_stamp_sentence, ts_prediction_lfr6_standard |
| | | from funasr.bin.asr_infer import Speech2Text |
| | | from funasr.bin.asr_infer import Speech2TextParaformer, Speech2TextParaformerOnline |
| | | from funasr.bin.asr_infer import Speech2TextUniASR |
| | | from funasr.bin.asr_infer import Speech2TextMFCCA |
| | | from funasr.bin.vad_infer import Speech2VadSegment |
| | | from funasr.bin.punc_infer import Text2Punc |
| | | from funasr.bin.tp_infer import Speech2Timestamp |
| | | from funasr.bin.asr_infer import Speech2TextTransducer |
| | | from funasr.bin.asr_infer import Speech2TextSAASR |
| | | |
| | | |
| | | def inference_asr( |
| | | maxlenratio: float, |
| | |
| | | beg_idx = 0 |
| | | for j, _ in enumerate(range(0, n)): |
| | | batch_size_token_ms_cum += (sorted_data[j][0][1] - sorted_data[j][0][0]) |
| | | if j < n-1 and (batch_size_token_ms_cum + sorted_data[j+1][0][1] - sorted_data[j+1][0][0])<batch_size_token_ms: |
| | | if j < n - 1 and (batch_size_token_ms_cum + sorted_data[j + 1][0][1] - sorted_data[j + 1][0][ |
| | | 0]) < batch_size_token_ms: |
| | | continue |
| | | batch_size_token_ms_cum = 0 |
| | | end_idx = j + 1 |
| | |
| | | return asr_result_list |
| | | |
| | | return _forward |
| | | |
| | | |
| | | def inference_paraformer_online( |
| | | maxlenratio: float, |
| | |
| | | feats_dims = config["frontend_conf"]["n_mels"] * config["frontend_conf"]["lfr_m"] |
| | | cache_en = {"start_idx": 0, "cif_hidden": torch.zeros((batch_size, 1, enc_output_size)), |
| | | "cif_alphas": torch.zeros((batch_size, 1)), "chunk_size": chunk_size, "last_chunk": False, |
| | | "feats": torch.zeros((batch_size, chunk_size[0] + chunk_size[2], feats_dims)), "tail_chunk": False} |
| | | "feats": torch.zeros((batch_size, chunk_size[0] + chunk_size[2], feats_dims)), |
| | | "tail_chunk": False} |
| | | cache["encoder"] = cache_en |
| | | |
| | | cache_de = {"decode_fsmn": None} |
| | |
| | | |
| | | return _forward |
| | | |
| | | |
| | | def inference_transducer( |
| | | output_dir: str, |
| | | batch_size: int, |
| | |
| | | |
| | | if text is not None: |
| | | ibest_writer["text"][key] = text |
| | | |
| | | |
| | | return _forward |
| | | |
| | |
| | | |
| | | inference_pipeline = inference_launch(**kwargs) |
| | | return inference_pipeline(kwargs["data_path_and_name_and_type"], hotword=kwargs.get("hotword", None)) |
| | | |
| | | |
| | | |
| | | if __name__ == "__main__": |