Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into main
122个文件已修改
4个文件已添加
13 文件已重命名
| | |
| | | import resampy |
| | | import soundfile |
| | | from tqdm import tqdm |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.utils.cli_utils import get_commandline_args |
| | | from funasr.fileio.read_text import read_2column_text |
| | |
| | | (3, 4, 5) |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | if integers.strip() in ("none", "None", "NONE", "null", "Null", "NULL"): |
| | | return None |
| | | return tuple(map(int, integers.strip().split(","))) |
| | |
| | | import os |
| | | <<<<<<< HEAD |
| | | |
| | | from modelscope.metainfo import Trainers |
| | | from modelscope.trainers import build_trainer |
| | |
| | | batch_bins=params.batch_bins, |
| | | max_epoch=params.max_epoch, |
| | | lr=params.lr) |
| | | ======= |
| | | from modelscope.metainfo import Trainers |
| | | from modelscope.trainers import build_trainer |
| | | from funasr.datasets.ms_dataset import MsDataset |
| | | |
| | | |
| | | def modelscope_finetune(params): |
| | | if not os.path.exists(params["output_dir"]): |
| | | os.makedirs(params["output_dir"], exist_ok=True) |
| | | # dataset split ["train", "validation"] |
| | | ds_dict = MsDataset.load(params["data_dir"]) |
| | | kwargs = dict( |
| | | model=params["model"], |
| | | model_revision=params["model_revision"], |
| | | data_dir=ds_dict, |
| | | dataset_type=params["dataset_type"], |
| | | work_dir=params["output_dir"], |
| | | batch_bins=params["batch_bins"], |
| | | max_epoch=params["max_epoch"], |
| | | lr=params["lr"]) |
| | | >>>>>>> main |
| | | trainer = build_trainer(Trainers.speech_asr_trainer, default_args=kwargs) |
| | | trainer.train() |
| | | |
| | | |
| | | if __name__ == '__main__': |
| | | <<<<<<< HEAD |
| | | params = modelscope_args(model="damo/speech_UniASR_asr_2pass-tr-16k-common-vocab1582-pytorch", data_path="./data") |
| | | params.output_dir = "./checkpoint" # m模型保存路径 |
| | | params.data_path = "./example_data/" # 数据路径 |
| | | params.dataset_type = "small" # 小数据量设置small,若数据量大于1000小时,请使用large |
| | | params.batch_bins = 2000 # batch size,如果dataset_type="small",batch_bins单位为fbank特征帧数,如果dataset_type="large",batch_bins单位为毫秒, |
| | | params.max_epoch = 50 # 最大训练轮数 |
| | | params.max_epoch = 20 # 最大训练轮数 |
| | | params.lr = 0.00005 # 设置学习率 |
| | | |
| | | ======= |
| | | params = {} |
| | | params["output_dir"] = "./checkpoint" |
| | | params["data_dir"] = "./data" |
| | | params["batch_bins"] = 2000 |
| | | params["dataset_type"] = "small" |
| | | params["max_epoch"] = 50 |
| | | params["lr"] = 0.00005 |
| | | params["model"] = "damo/speech_UniASR_asr_2pass-tr-16k-common-vocab1582-pytorch" |
| | | params["model_revision"] = None |
| | | >>>>>>> main |
| | | modelscope_finetune(params) |
| | | modelscope_finetune(params) |
| | |
| | | <<<<<<< HEAD |
| | | import os |
| | | import shutil |
| | | import argparse |
| | | from modelscope.pipelines import pipeline |
| | | from modelscope.utils.constant import Tasks |
| | | |
| | | def modelscope_infer(args): |
| | | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpuid) |
| | | inference_pipeline = pipeline( |
| | | task=Tasks.auto_speech_recognition, |
| | | model=args.model, |
| | | output_dir=args.output_dir, |
| | | batch_size=args.batch_size, |
| | | param_dict={"decoding_model": args.decoding_mode, "hotword": args.hotword_txt} |
| | | ) |
| | | inference_pipeline(audio_in=args.audio_in) |
| | | |
| | | if __name__ == "__main__": |
| | | parser = argparse.ArgumentParser() |
| | | parser.add_argument('--model', type=str, default="damo/speech_UniASR_asr_2pass-tr-16k-common-vocab1582-pytorch") |
| | | parser.add_argument('--audio_in', type=str, default="./data/test/wav.scp") |
| | | parser.add_argument('--output_dir', type=str, default="./results/") |
| | | parser.add_argument('--decoding_mode', type=str, default="normal") |
| | | parser.add_argument('--hotword_txt', type=str, default=None) |
| | | parser.add_argument('--batch_size', type=int, default=64) |
| | | parser.add_argument('--gpuid', type=str, default="0") |
| | | args = parser.parse_args() |
| | | modelscope_infer(args) |
| | | ======= |
| | | from modelscope.pipelines import pipeline |
| | | from modelscope.utils.constant import Tasks |
| | | |
| | |
| | | output_dir=output_dir, |
| | | ) |
| | | rec_result = inference_pipeline(audio_in=audio_in, param_dict={"decoding_model":"offline"}) |
| | | print(rec_result) |
| | | >>>>>>> main |
| | | print(rec_result) |
| | |
| | | import requests |
| | | import torch |
| | | from packaging.version import parse as V |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | from funasr.build_utils.build_model_from_file import build_model_from_file |
| | | from funasr.build_utils.build_model_from_file import build_model_from_file |
| | | from funasr.models.e2e_asr_contextual_paraformer import NeatContextualParaformer |
| | | from funasr.models.e2e_asr_paraformer import BiCifParaformer, ContextualParaformer |
| | | from funasr.models.frontend.wav_frontend import WavFrontend, WavFrontendOnline |
| | |
| | | frontend_conf: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | # 1. Build ASR model |
| | | scorers = {} |
| | |
| | | text, token, token_int, hyp |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | |
| | | # Input as audio signal |
| | | if isinstance(speech, np.ndarray): |
| | |
| | | text = None |
| | | results.append((text, token, token_int, hyp)) |
| | | |
| | | assert check_return_type(results) |
| | | return results |
| | | |
| | | |
| | |
| | | decoding_ind: int = 0, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | # 1. Build ASR model |
| | | scorers = {} |
| | |
| | | text, token, token_int, hyp |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | |
| | | # Input as audio signal |
| | | if isinstance(speech, np.ndarray): |
| | |
| | | vad_offset=begin_time) |
| | | results.append((text, token, token_int, hyp, timestamp, enc_len_batch_total, lfr_factor)) |
| | | |
| | | # assert check_return_type(results) |
| | | return results |
| | | |
| | | def generate_hotwords_list(self, hotword_list_or_file): |
| | |
| | | hotword_list_or_file: str = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | # 1. Build ASR model |
| | | scorers = {} |
| | |
| | | text, token, token_int, hyp |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | results = [] |
| | | cache_en = cache["encoder"] |
| | | if speech.shape[1] < 16 * 60 and cache_en["is_final"]: |
| | |
| | | |
| | | results.append(postprocessed_result) |
| | | |
| | | # assert check_return_type(results) |
| | | return results |
| | | |
| | | |
| | |
| | | frontend_conf: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | # 1. Build ASR model |
| | | scorers = {} |
| | |
| | | text, token, token_int, hyp |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | |
| | | # Input as audio signal |
| | | if isinstance(speech, np.ndarray): |
| | |
| | | text = None |
| | | results.append((text, token, token_int, hyp)) |
| | | |
| | | assert check_return_type(results) |
| | | return results |
| | | |
| | | |
| | |
| | | streaming: bool = False, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | # 1. Build ASR model |
| | | scorers = {} |
| | |
| | | text, token, token_int, hyp |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | # Input as audio signal |
| | | if isinstance(speech, np.ndarray): |
| | | speech = torch.tensor(speech) |
| | |
| | | text = None |
| | | results.append((text, token, token_int, hyp)) |
| | | |
| | | assert check_return_type(results) |
| | | return results |
| | | |
| | | |
| | |
| | | """Construct a Speech2Text object.""" |
| | | super().__init__() |
| | | |
| | | assert check_argument_types() |
| | | asr_model, asr_train_args = build_model_from_file( |
| | | asr_train_config, asr_model_file, cmvn_file, device |
| | | ) |
| | |
| | | Returns: |
| | | nbest_hypothesis: N-best hypothesis. |
| | | """ |
| | | assert check_argument_types() |
| | | |
| | | if isinstance(speech, np.ndarray): |
| | | speech = torch.tensor(speech) |
| | |
| | | Returns: |
| | | nbest_hypothesis: N-best hypothesis. |
| | | """ |
| | | assert check_argument_types() |
| | | |
| | | if isinstance(speech, np.ndarray): |
| | | speech = torch.tensor(speech) |
| | |
| | | text = None |
| | | results.append((text, token, token_int, hyp)) |
| | | |
| | | assert check_return_type(results) |
| | | |
| | | return results |
| | | |
| | | @staticmethod |
| | | def from_pretrained( |
| | | model_tag: Optional[str] = None, |
| | | **kwargs: Optional[Any], |
| | | ) -> Speech2Text: |
| | | """Build Speech2Text instance from the pretrained model. |
| | | Args: |
| | | model_tag: Model tag of the pretrained models. |
| | | Return: |
| | | : Speech2Text instance. |
| | | """ |
| | | if model_tag is not None: |
| | | try: |
| | | from espnet_model_zoo.downloader import ModelDownloader |
| | | |
| | | except ImportError: |
| | | logging.error( |
| | | "`espnet_model_zoo` is not installed. " |
| | | "Please install via `pip install -U espnet_model_zoo`." |
| | | ) |
| | | raise |
| | | d = ModelDownloader() |
| | | kwargs.update(**d.download_and_unpack(model_tag)) |
| | | |
| | | return Speech2TextTransducer(**kwargs) |
| | | |
| | | |
| | | class Speech2TextSAASR: |
| | |
| | | frontend_conf: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | # 1. Build ASR model |
| | | scorers = {} |
| | |
| | | text, text_id, token, token_int, hyp |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | |
| | | # Input as audio signal |
| | | if isinstance(speech, np.ndarray): |
| | |
| | | |
| | | results.append((text, text_id, token, token_int, hyp)) |
| | | |
| | | assert check_return_type(results) |
| | | return results |
| | |
| | | import torchaudio |
| | | import soundfile |
| | | import yaml |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.bin.asr_infer import Speech2Text |
| | | from funasr.bin.asr_infer import Speech2TextMFCCA |
| | |
| | | param_dict: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | ncpu = kwargs.get("ncpu", 1) |
| | | torch.set_num_threads(ncpu) |
| | | if batch_size > 1: |
| | |
| | | param_dict: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | ncpu = kwargs.get("ncpu", 1) |
| | | torch.set_num_threads(ncpu) |
| | | |
| | |
| | | param_dict: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | ncpu = kwargs.get("ncpu", 1) |
| | | torch.set_num_threads(ncpu) |
| | | |
| | |
| | | param_dict: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | if word_lm_train_config is not None: |
| | | raise NotImplementedError("Word LM is not implemented") |
| | |
| | | param_dict: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | ncpu = kwargs.get("ncpu", 1) |
| | | torch.set_num_threads(ncpu) |
| | | if batch_size > 1: |
| | |
| | | param_dict: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | ncpu = kwargs.get("ncpu", 1) |
| | | torch.set_num_threads(ncpu) |
| | | if batch_size > 1: |
| | |
| | | right_context: Number of frames in right context AFTER subsampling. |
| | | display_partial_hypotheses: Whether to display partial hypotheses. |
| | | """ |
| | | assert check_argument_types() |
| | | |
| | | if batch_size > 1: |
| | | raise NotImplementedError("batch decoding is not implemented") |
| | |
| | | param_dict: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | if batch_size > 1: |
| | | raise NotImplementedError("batch decoding is not implemented") |
| | | if word_lm_train_config is not None: |
| | |
| | | import torch |
| | | from scipy.ndimage import median_filter |
| | | from torch.nn import functional as F |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.models.frontend.wav_frontend import WavFrontendMel23 |
| | | from funasr.tasks.diar import DiarTask |
| | |
| | | device: str = "cpu", |
| | | dtype: str = "float32", |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | # 1. Build Diarization model |
| | | diar_model, diar_train_args = build_model_from_file( |
| | |
| | | diarization results |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | # Input as audio signal |
| | | if isinstance(speech, np.ndarray): |
| | | speech = torch.tensor(speech) |
| | |
| | | results = self.diar_model.estimate_sequential(**batch) |
| | | |
| | | return results |
| | | |
| | | @staticmethod |
| | | def from_pretrained( |
| | | model_tag: Optional[str] = None, |
| | | **kwargs: Optional[Any], |
| | | ): |
| | | """Build Speech2Diarization instance from the pretrained model. |
| | | |
| | | Args: |
| | | model_tag (Optional[str]): Model tag of the pretrained models. |
| | | Currently, the tags of espnet_model_zoo are supported. |
| | | |
| | | Returns: |
| | | Speech2Diarization: Speech2Diarization instance. |
| | | |
| | | """ |
| | | if model_tag is not None: |
| | | try: |
| | | from espnet_model_zoo.downloader import ModelDownloader |
| | | |
| | | except ImportError: |
| | | logging.error( |
| | | "`espnet_model_zoo` is not installed. " |
| | | "Please install via `pip install -U espnet_model_zoo`." |
| | | ) |
| | | raise |
| | | d = ModelDownloader() |
| | | kwargs.update(**d.download_and_unpack(model_tag)) |
| | | |
| | | return Speech2DiarizationEEND(**kwargs) |
| | | |
| | | |
| | | class Speech2DiarizationSOND: |
| | |
| | | smooth_size: int = 83, |
| | | dur_threshold: float = 10, |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | # TODO: 1. Build Diarization model |
| | | diar_model, diar_train_args = build_model_from_file( |
| | |
| | | diarization results for each speaker |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | # Input as audio signal |
| | | if isinstance(speech, np.ndarray): |
| | | speech = torch.tensor(speech) |
| | |
| | | results, pse_labels = self.post_processing(logits, profile.shape[1], output_format) |
| | | |
| | | return results, pse_labels |
| | | |
| | | @staticmethod |
| | | def from_pretrained( |
| | | model_tag: Optional[str] = None, |
| | | **kwargs: Optional[Any], |
| | | ): |
| | | """Build Speech2Xvector instance from the pretrained model. |
| | | |
| | | Args: |
| | | model_tag (Optional[str]): Model tag of the pretrained models. |
| | | Currently, the tags of espnet_model_zoo are supported. |
| | | |
| | | Returns: |
| | | Speech2Xvector: Speech2Xvector instance. |
| | | |
| | | """ |
| | | if model_tag is not None: |
| | | try: |
| | | from espnet_model_zoo.downloader import ModelDownloader |
| | | |
| | | except ImportError: |
| | | logging.error( |
| | | "`espnet_model_zoo` is not installed. " |
| | | "Please install via `pip install -U espnet_model_zoo`." |
| | | ) |
| | | raise |
| | | d = ModelDownloader() |
| | | kwargs.update(**d.download_and_unpack(model_tag)) |
| | | |
| | | return Speech2DiarizationSOND(**kwargs) |
| | |
| | | import soundfile |
| | | import torch |
| | | from scipy.signal import medfilt |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.bin.diar_infer import Speech2DiarizationSOND, Speech2DiarizationEEND |
| | | from funasr.datasets.iterable_dataset import load_bytes |
| | |
| | | mode: str = "sond", |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | ncpu = kwargs.get("ncpu", 1) |
| | | torch.set_num_threads(ncpu) |
| | | if batch_size > 1: |
| | |
| | | param_dict: Optional[dict] = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | ncpu = kwargs.get("ncpu", 1) |
| | | torch.set_num_threads(ncpu) |
| | | if batch_size > 1: |
| | |
| | | import numpy as np |
| | | import torch |
| | | from torch.nn.parallel import data_parallel |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.build_utils.build_model_from_file import build_model_from_file |
| | | from funasr.build_utils.build_streaming_iterator import build_streaming_iterator |
| | |
| | | param_dict: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | ncpu = kwargs.get("ncpu", 1) |
| | | torch.set_num_threads(ncpu) |
| | | |
| | |
| | | from typing import Union |
| | | |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.bin.punc_infer import Text2Punc, Text2PuncVADRealtime |
| | | from funasr.torch_utils.set_all_random_seed import set_all_random_seed |
| | |
| | | param_dict: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | logging.basicConfig( |
| | | level=log_level, |
| | | format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", |
| | |
| | | param_dict: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | ncpu = kwargs.get("ncpu", 1) |
| | | torch.set_num_threads(ncpu) |
| | | |
| | |
| | | |
| | | import numpy as np |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.build_utils.build_model_from_file import build_model_from_file |
| | | from funasr.torch_utils.device_funcs import to_device |
| | |
| | | streaming: bool = False, |
| | | embedding_node: str = "resnet1_dense", |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | # TODO: 1. Build SV model |
| | | sv_model, sv_train_args = build_model_from_file( |
| | |
| | | embedding, ref_embedding, similarity_score |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | self.sv_model.eval() |
| | | embedding = self.calculate_embedding(speech) |
| | | ref_emb, score = None, None |
| | |
| | | score = torch.cosine_similarity(embedding, ref_emb) |
| | | |
| | | results = (embedding, ref_emb, score) |
| | | assert check_return_type(results) |
| | | return results |
| | | |
| | | @staticmethod |
| | | def from_pretrained( |
| | | model_tag: Optional[str] = None, |
| | | **kwargs: Optional[Any], |
| | | ): |
| | | """Build Speech2Xvector instance from the pretrained model. |
| | | |
| | | Args: |
| | | model_tag (Optional[str]): Model tag of the pretrained models. |
| | | Currently, the tags of espnet_model_zoo are supported. |
| | | |
| | | Returns: |
| | | Speech2Xvector: Speech2Xvector instance. |
| | | |
| | | """ |
| | | if model_tag is not None: |
| | | try: |
| | | from espnet_model_zoo.downloader import ModelDownloader |
| | | |
| | | except ImportError: |
| | | logging.error( |
| | | "`espnet_model_zoo` is not installed. " |
| | | "Please install via `pip install -U espnet_model_zoo`." |
| | | ) |
| | | raise |
| | | d = ModelDownloader() |
| | | kwargs.update(**d.download_and_unpack(model_tag)) |
| | | |
| | | return Speech2Xvector(**kwargs) |
| | |
| | | import numpy as np |
| | | import torch |
| | | from kaldiio import WriteHelper |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.bin.sv_infer import Speech2Xvector |
| | | from funasr.build_utils.build_streaming_iterator import build_streaming_iterator |
| | |
| | | param_dict: Optional[dict] = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | ncpu = kwargs.get("ncpu", 1) |
| | | torch.set_num_threads(ncpu) |
| | | |
| | |
| | | embedding_node=embedding_node |
| | | ) |
| | | logging.info("speech2xvector_kwargs: {}".format(speech2xvector_kwargs)) |
| | | speech2xvector = Speech2Xvector.from_pretrained( |
| | | model_tag=model_tag, |
| | | **speech2xvector_kwargs, |
| | | ) |
| | | speech2xvector = Speech2Xvector(**speech2xvector_kwargs) |
| | | speech2xvector.sv_model.eval() |
| | | |
| | | def _forward( |
| | |
| | | from typing import List |
| | | from typing import Optional |
| | | |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.utils.cli_utils import get_commandline_args |
| | | from funasr.text.build_tokenizer import build_tokenizer |
| | |
| | | cleaner: Optional[str], |
| | | g2p: Optional[str], |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | logging.basicConfig( |
| | | level=log_level, |
| | |
| | | |
| | | import numpy as np |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | from funasr.build_utils.build_model_from_file import build_model_from_file |
| | | from funasr.models.frontend.wav_frontend import WavFrontend |
| | | from funasr.text.token_id_converter import TokenIDConverter |
| | |
| | | dtype: str = "float32", |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | # 1. Build ASR model |
| | | tp_model, tp_train_args = build_model_from_file( |
| | | timestamp_infer_config, timestamp_model_file, cmvn_file=None, device=device, task_name="asr", mode="tp" |
| | |
| | | speech_lengths: Union[torch.Tensor, np.ndarray] = None, |
| | | text_lengths: Union[torch.Tensor, np.ndarray] = None |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | # Input as audio signal |
| | | if isinstance(speech, np.ndarray): |
| | |
| | | |
| | | import numpy as np |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.bin.tp_infer import Speech2Timestamp |
| | | from funasr.build_utils.build_streaming_iterator import build_streaming_iterator |
| | |
| | | seg_dict_file: Optional[str] = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | ncpu = kwargs.get("ncpu", 1) |
| | | torch.set_num_threads(ncpu) |
| | | |
| | |
| | | |
| | | import numpy as np |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.build_utils.build_model_from_file import build_model_from_file |
| | | from funasr.models.frontend.wav_frontend import WavFrontend, WavFrontendOnline |
| | |
| | | dtype: str = "float32", |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | # 1. Build vad model |
| | | vad_model, vad_infer_args = build_model_from_file( |
| | |
| | | text, token, token_int, hyp |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | |
| | | # Input as audio signal |
| | | if isinstance(speech, np.ndarray): |
| | |
| | | text, token, token_int, hyp |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | |
| | | # Input as audio signal |
| | | if isinstance(speech, np.ndarray): |
| | |
| | | |
| | | import numpy as np |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | from funasr.build_utils.build_streaming_iterator import build_streaming_iterator |
| | | from funasr.fileio.datadir_writer import DatadirWriter |
| | | from funasr.torch_utils.set_all_random_seed import set_all_random_seed |
| | |
| | | num_workers: int = 1, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | if batch_size > 1: |
| | | raise NotImplementedError("batch decoding is not implemented") |
| | | |
| | |
| | | num_workers: int = 1, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | logging.basicConfig( |
| | | level=log_level, |
| | |
| | | |
| | | import torch |
| | | import yaml |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.build_utils.build_model import build_model |
| | | from funasr.models.base_model import FunASRModel |
| | |
| | | device: Device type, "cpu", "cuda", or "cuda:N". |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | if config_file is None: |
| | | assert model_file is not None, ( |
| | | "The argument 'model_file' must be provided " |
| | |
| | | import numpy as np |
| | | from torch.utils.data import DataLoader |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.datasets.iterable_dataset import IterableESPnetDataset |
| | | from funasr.datasets.small_datasets.collate_fn import CommonCollateFn |
| | |
| | | train: bool = False, |
| | | ) -> DataLoader: |
| | | """Build DataLoader using iterable dataset""" |
| | | assert check_argument_types() |
| | | |
| | | # preprocess |
| | | if preprocess_fn is not None: |
| | |
| | | import logging |
| | | |
| | | import torch |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.layers.abs_normalize import AbsNormalize |
| | | from funasr.layers.global_mvn import GlobalMVN |
| | |
| | | if args.init is not None: |
| | | initialize(model, args.init) |
| | | |
| | | assert check_return_type(model) |
| | | return model |
| | |
| | | import torch |
| | | import torch.nn |
| | | import torch.optim |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.iterators.abs_iter_factory import AbsIterFactory |
| | | from funasr.main_funcs.average_nbest_models import average_nbest_models |
| | |
| | | |
| | | def build_options(self, args: argparse.Namespace) -> TrainerOptions: |
| | | """Build options consumed by train(), eval()""" |
| | | assert check_argument_types() |
| | | return build_dataclass(TrainerOptions, args) |
| | | |
| | | @classmethod |
| | |
| | | |
| | | def run(self) -> None: |
| | | """Perform training. This method performs the main process of training.""" |
| | | assert check_argument_types() |
| | | # NOTE(kamo): Don't check the type more strictly as far trainer_options |
| | | model = self.model |
| | | optimizers = self.optimizers |
| | |
| | | options: TrainerOptions, |
| | | distributed_option: DistributedOption, |
| | | ) -> Tuple[bool, bool]: |
| | | assert check_argument_types() |
| | | |
| | | grad_noise = options.grad_noise |
| | | accum_grad = options.accum_grad |
| | |
| | | options: TrainerOptions, |
| | | distributed_option: DistributedOption, |
| | | ) -> None: |
| | | assert check_argument_types() |
| | | ngpu = options.ngpu |
| | | # no_forward_run = options.no_forward_run |
| | | distributed = distributed_option.distributed |
| | |
| | | |
| | | import numpy as np |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.modules.nets_utils import pad_list |
| | | |
| | |
| | | not_sequence: Collection[str] = (), |
| | | max_sample_size=None |
| | | ): |
| | | assert check_argument_types() |
| | | self.float_pad_value = float_pad_value |
| | | self.int_pad_value = int_pad_value |
| | | self.not_sequence = set(not_sequence) |
| | |
| | | ) -> Tuple[List[str], Dict[str, torch.Tensor]]: |
| | | """Concatenate ndarray-list to an array and convert to torch.Tensor. |
| | | """ |
| | | assert check_argument_types() |
| | | uttids = [u for u, _ in data] |
| | | data = [d for _, d in data] |
| | | |
| | |
| | | output[key + "_lengths"] = lens |
| | | |
| | | output = (uttids, output) |
| | | assert check_return_type(output) |
| | | return output |
| | | |
| | | def crop_to_max_size(feature, target_size): |
| | |
| | | not_sequence: Collection[str] = (), |
| | | ) -> Tuple[List[str], Dict[str, torch.Tensor]]: |
| | | # mainly for pre-training |
| | | assert check_argument_types() |
| | | uttids = [u for u, _ in data] |
| | | data = [d for _, d in data] |
| | | |
| | |
| | | output[key + "_lengths"] = lens |
| | | |
| | | output = (uttids, output) |
| | | assert check_return_type(output) |
| | | return output |
| | |
| | | import numpy as np |
| | | import torch |
| | | from torch.utils.data.dataset import Dataset |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.fileio.npy_scp import NpyScpReader |
| | | from funasr.fileio.rand_gen_dataset import FloatRandomGenerateDataset |
| | |
| | | |
| | | class AdapterForSoundScpReader(collections.abc.Mapping): |
| | | def __init__(self, loader, dtype=None): |
| | | assert check_argument_types() |
| | | self.loader = loader |
| | | self.dtype = dtype |
| | | self.rate = None |
| | |
| | | max_cache_fd: int = 0, |
| | | dest_sample_rate: int = 16000, |
| | | ): |
| | | assert check_argument_types() |
| | | if len(path_name_type_list) == 0: |
| | | raise ValueError( |
| | | '1 or more elements are required for "path_name_type_list"' |
| | |
| | | return _mes |
| | | |
| | | def __getitem__(self, uid: Union[str, int]) -> Tuple[str, Dict[str, np.ndarray]]: |
| | | assert check_argument_types() |
| | | |
| | | # Change integer-id to string-id |
| | | if isinstance(uid, int): |
| | |
| | | self.cache[uid] = data |
| | | |
| | | retval = uid, data |
| | | assert check_return_type(retval) |
| | | return retval |
| | |
| | | import torchaudio |
| | | import soundfile |
| | | from torch.utils.data.dataset import IterableDataset |
| | | from typeguard import check_argument_types |
| | | import os.path |
| | | |
| | | from funasr.datasets.dataset import ESPnetDataset |
| | |
| | | int_dtype: str = "long", |
| | | key_file: str = None, |
| | | ): |
| | | assert check_argument_types() |
| | | if len(path_name_type_list) == 0: |
| | | raise ValueError( |
| | | '1 or more elements are required for "path_name_type_list"' |
| | |
| | | |
| | | import sentencepiece as spm |
| | | from torch.utils.data import DataLoader |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.datasets.large_datasets.dataset import Dataset |
| | | from funasr.iterators.abs_iter_factory import AbsIterFactory |
| | |
| | | |
| | | class SentencepiecesTokenizer(AbsTokenizer): |
| | | def __init__(self, model: Union[Path, str]): |
| | | assert check_argument_types() |
| | | self.model = str(model) |
| | | self.sp = None |
| | | |
| | |
| | | import numpy as np |
| | | import scipy.signal |
| | | import soundfile |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.text.build_tokenizer import build_tokenizer |
| | | from funasr.text.cleaner import TextCleaner |
| | |
| | | def _speech_process( |
| | | self, data: Dict[str, Union[str, np.ndarray]] |
| | | ) -> Dict[str, Union[str, np.ndarray]]: |
| | | assert check_argument_types() |
| | | if self.speech_name in data: |
| | | if self.train and (self.rirs is not None or self.noises is not None): |
| | | speech = data[self.speech_name] |
| | |
| | | speech = data[self.speech_name] |
| | | ma = np.max(np.abs(speech)) |
| | | data[self.speech_name] = speech * self.speech_volume_normalize / ma |
| | | assert check_return_type(data) |
| | | return data |
| | | |
| | | def _text_process( |
| | |
| | | tokens = self.tokenizer.text2tokens(text) |
| | | text_ints = self.token_id_converter.tokens2ids(tokens) |
| | | data[self.text_name] = np.array(text_ints, dtype=np.int64) |
| | | assert check_return_type(data) |
| | | return data |
| | | |
| | | def __call__( |
| | | self, uid: str, data: Dict[str, Union[str, np.ndarray]] |
| | | ) -> Dict[str, np.ndarray]: |
| | | assert check_argument_types() |
| | | |
| | | data = self._speech_process(data) |
| | | data = self._text_process(data) |
| | |
| | | tokens = self.tokenizer.text2tokens(text) |
| | | text_ints = self.token_id_converter.tokens2ids(tokens) |
| | | data[self.text_name] = np.array(text_ints, dtype=np.int64) |
| | | assert check_return_type(data) |
| | | return data |
| | | |
| | | |
| | |
| | | tokens = self.tokenizer.text2tokens(text) |
| | | text_ints = self.token_id_converter.tokens2ids(tokens) |
| | | data[text_n] = np.array(text_ints, dtype=np.int64) |
| | | assert check_return_type(data) |
| | | return data |
| | | |
| | | def __call__( |
| | | self, uid: str, data: Dict[str, Union[str, np.ndarray]] |
| | | ) -> Dict[str, np.ndarray]: |
| | | assert check_argument_types() |
| | | |
| | | if self.speech_name in data: |
| | | # Nothing now: candidates: |
| | |
| | | tokens = self.tokenizer[i].text2tokens(text) |
| | | text_ints = self.token_id_converter[i].tokens2ids(tokens) |
| | | data[text_name] = np.array(text_ints, dtype=np.int64) |
| | | assert check_return_type(data) |
| | | return data |
| | | |
| | | class CodeMixTokenizerCommonPreprocessor(CommonPreprocessor): |
| | |
| | | def __call__( |
| | | self, uid: str, data: Dict[str, Union[list, str, np.ndarray]] |
| | | ) -> Dict[str, Union[list, np.ndarray]]: |
| | | assert check_argument_types() |
| | | # Split words. |
| | | if isinstance(data[self.text_name], str): |
| | | split_text = self.split_words(data[self.text_name]) |
| | |
| | | |
| | | import numpy as np |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.modules.nets_utils import pad_list |
| | | |
| | |
| | | not_sequence: Collection[str] = (), |
| | | max_sample_size=None |
| | | ): |
| | | assert check_argument_types() |
| | | self.float_pad_value = float_pad_value |
| | | self.int_pad_value = int_pad_value |
| | | self.not_sequence = set(not_sequence) |
| | |
| | | ) -> Tuple[List[str], Dict[str, torch.Tensor]]: |
| | | """Concatenate ndarray-list to an array and convert to torch.Tensor. |
| | | """ |
| | | assert check_argument_types() |
| | | uttids = [u for u, _ in data] |
| | | data = [d for _, d in data] |
| | | |
| | |
| | | output[key + "_lengths"] = lens |
| | | |
| | | output = (uttids, output) |
| | | assert check_return_type(output) |
| | | return output |
| | | |
| | | def crop_to_max_size(feature, target_size): |
| | |
| | | import numpy as np |
| | | import torch |
| | | from torch.utils.data.dataset import Dataset |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.fileio.npy_scp import NpyScpReader |
| | | from funasr.fileio.sound_scp import SoundScpReader |
| | |
| | | |
| | | class AdapterForSoundScpReader(collections.abc.Mapping): |
| | | def __init__(self, loader, dtype=None): |
| | | assert check_argument_types() |
| | | self.loader = loader |
| | | self.dtype = dtype |
| | | self.rate = None |
| | |
| | | speed_perturb: Union[list, tuple] = None, |
| | | mode: str = "train", |
| | | ): |
| | | assert check_argument_types() |
| | | if len(path_name_type_list) == 0: |
| | | raise ValueError( |
| | | '1 or more elements are required for "path_name_type_list"' |
| | |
| | | return _mes |
| | | |
| | | def __getitem__(self, uid: Union[str, int]) -> Tuple[str, Dict[str, np.ndarray]]: |
| | | assert check_argument_types() |
| | | |
| | | # Change integer-id to string-id |
| | | if isinstance(uid, int): |
| | |
| | | data[name] = value |
| | | |
| | | retval = uid, data |
| | | assert check_return_type(retval) |
| | | return retval |
| | |
| | | from typing import Tuple |
| | | from typing import Union |
| | | |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.fileio.read_text import load_num_sequence_text |
| | | from funasr.samplers.abs_sampler import AbsSampler |
| | |
| | | drop_last: bool = False, |
| | | padding: bool = True, |
| | | ): |
| | | assert check_argument_types() |
| | | assert batch_bins > 0 |
| | | if sort_batch != "ascending" and sort_batch != "descending": |
| | | raise ValueError( |
| | |
| | | import numpy as np |
| | | import scipy.signal |
| | | import soundfile |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.text.build_tokenizer import build_tokenizer |
| | | from funasr.text.cleaner import TextCleaner |
| | |
| | | def _speech_process( |
| | | self, data: Dict[str, Union[str, np.ndarray]] |
| | | ) -> Dict[str, Union[str, np.ndarray]]: |
| | | assert check_argument_types() |
| | | if self.speech_name in data: |
| | | if self.train and (self.rirs is not None or self.noises is not None): |
| | | speech = data[self.speech_name] |
| | |
| | | speech = data[self.speech_name] |
| | | ma = np.max(np.abs(speech)) |
| | | data[self.speech_name] = speech * self.speech_volume_normalize / ma |
| | | assert check_return_type(data) |
| | | return data |
| | | |
| | | def _text_process( |
| | |
| | | tokens = self.tokenizer.text2tokens(text) |
| | | text_ints = self.token_id_converter.tokens2ids(tokens) |
| | | data[self.text_name] = np.array(text_ints, dtype=np.int64) |
| | | assert check_return_type(data) |
| | | return data |
| | | |
| | | def __call__( |
| | | self, uid: str, data: Dict[str, Union[str, np.ndarray]] |
| | | ) -> Dict[str, np.ndarray]: |
| | | assert check_argument_types() |
| | | |
| | | data = self._speech_process(data) |
| | | data = self._text_process(data) |
| | |
| | | tokens = self.tokenizer.text2tokens(text) |
| | | text_ints = self.token_id_converter.tokens2ids(tokens) |
| | | data[self.text_name] = np.array(text_ints, dtype=np.int64) |
| | | assert check_return_type(data) |
| | | return data |
| | | |
| | | |
| | |
| | | tokens = self.tokenizer.text2tokens(text) |
| | | text_ints = self.token_id_converter.tokens2ids(tokens) |
| | | data[text_n] = np.array(text_ints, dtype=np.int64) |
| | | assert check_return_type(data) |
| | | return data |
| | | |
| | | def __call__( |
| | | self, uid: str, data: Dict[str, Union[str, np.ndarray]] |
| | | ) -> Dict[str, np.ndarray]: |
| | | assert check_argument_types() |
| | | |
| | | if self.speech_name in data: |
| | | # Nothing now: candidates: |
| | |
| | | tokens = self.tokenizer[i].text2tokens(text) |
| | | text_ints = self.token_id_converter[i].tokens2ids(tokens) |
| | | data[text_name] = np.array(text_ints, dtype=np.int64) |
| | | assert check_return_type(data) |
| | | return data |
| | | |
| | | |
| | |
| | | def __call__( |
| | | self, uid: str, data: Dict[str, Union[list, str, np.ndarray]] |
| | | ) -> Dict[str, Union[list, np.ndarray]]: |
| | | assert check_argument_types() |
| | | # Split words. |
| | | if isinstance(data[self.text_name], str): |
| | | split_text = self.split_words(data[self.text_name]) |
| | |
| | | import json |
| | | from typing import Union, Dict |
| | | from pathlib import Path |
| | | from typeguard import check_argument_types |
| | | |
| | | import os |
| | | import logging |
| | |
| | | calib_num: int = 200, |
| | | model_revision: str = None, |
| | | ): |
| | | assert check_argument_types() |
| | | self.set_all_random_seed(0) |
| | | |
| | | self.cache_dir = cache_dir |
| | |
| | | from typing import Union |
| | | import warnings |
| | | |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | |
| | | class DatadirWriter: |
| | |
| | | """ |
| | | |
| | | def __init__(self, p: Union[Path, str]): |
| | | assert check_argument_types() |
| | | self.path = Path(p) |
| | | self.chilidren = {} |
| | | self.fd = None |
| | |
| | | return self |
| | | |
| | | def __getitem__(self, key: str) -> "DatadirWriter": |
| | | assert check_argument_types() |
| | | if self.fd is not None: |
| | | raise RuntimeError("This writer points out a file") |
| | | |
| | |
| | | self.has_children = True |
| | | |
| | | retval = self.chilidren[key] |
| | | assert check_return_type(retval) |
| | | return retval |
| | | |
| | | def __setitem__(self, key: str, value: str): |
| | | assert check_argument_types() |
| | | if self.has_children: |
| | | raise RuntimeError("This writer points out a directory") |
| | | if key in self.keys: |
| | |
| | | from typing import Union |
| | | |
| | | import numpy as np |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.fileio.read_text import read_2column_text |
| | | |
| | |
| | | """ |
| | | |
| | | def __init__(self, outdir: Union[Path, str], scpfile: Union[Path, str]): |
| | | assert check_argument_types() |
| | | self.dir = Path(outdir) |
| | | self.dir.mkdir(parents=True, exist_ok=True) |
| | | scpfile = Path(scpfile) |
| | |
| | | """ |
| | | |
| | | def __init__(self, fname: Union[Path, str]): |
| | | assert check_argument_types() |
| | | self.fname = Path(fname) |
| | | self.data = read_2column_text(fname) |
| | | |
| | |
| | | from typing import Union |
| | | |
| | | import numpy as np |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.fileio.read_text import load_num_sequence_text |
| | | |
| | |
| | | dtype: Union[str, np.dtype] = "float32", |
| | | loader_type: str = "csv_int", |
| | | ): |
| | | assert check_argument_types() |
| | | shape_file = Path(shape_file) |
| | | self.utt2shape = load_num_sequence_text(shape_file, loader_type) |
| | | self.dtype = np.dtype(dtype) |
| | |
| | | dtype: Union[str, np.dtype] = "int64", |
| | | loader_type: str = "csv_int", |
| | | ): |
| | | assert check_argument_types() |
| | | shape_file = Path(shape_file) |
| | | self.utt2shape = load_num_sequence_text(shape_file, loader_type) |
| | | self.dtype = np.dtype(dtype) |
| | |
| | | from typing import List |
| | | from typing import Union |
| | | |
| | | from typeguard import check_argument_types |
| | | |
| | | |
| | | def read_2column_text(path: Union[Path, str]) -> Dict[str, str]: |
| | |
| | | {'key1': '/some/path/a.wav', 'key2': '/some/path/b.wav'} |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | |
| | | data = {} |
| | | with Path(path).open("r", encoding="utf-8") as f: |
| | |
| | | >>> d = load_num_sequence_text('text') |
| | | >>> np.testing.assert_array_equal(d["key1"], np.array([1, 2, 3])) |
| | | """ |
| | | assert check_argument_types() |
| | | if loader_type == "text_int": |
| | | delimiter = " " |
| | | dtype = int |
| | |
| | | import numpy as np |
| | | import soundfile |
| | | import librosa |
| | | from typeguard import check_argument_types |
| | | |
| | | import torch |
| | | import torchaudio |
| | |
| | | dest_sample_rate: int = 16000, |
| | | speed_perturb: Union[list, tuple] = None, |
| | | ): |
| | | assert check_argument_types() |
| | | self.fname = fname |
| | | self.dtype = dtype |
| | | self.always_2d = always_2d |
| | |
| | | format="wav", |
| | | dtype=None, |
| | | ): |
| | | assert check_argument_types() |
| | | self.dir = Path(outdir) |
| | | self.dir.mkdir(parents=True, exist_ok=True) |
| | | scpfile = Path(scpfile) |
| | |
| | | |
| | | import numpy as np |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.iterators.abs_iter_factory import AbsIterFactory |
| | | from funasr.iterators.sequence_iter_factory import SequenceIterFactory |
| | |
| | | collate_fn=None, |
| | | pin_memory: bool = False, |
| | | ): |
| | | assert check_argument_types() |
| | | assert all(len(x) == 1 for x in batches), "batch-size must be 1" |
| | | |
| | | self.per_sample_iter_factory = SequenceIterFactory( |
| | |
| | | from typing import Iterator |
| | | |
| | | import numpy as np |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.iterators.abs_iter_factory import AbsIterFactory |
| | | |
| | |
| | | seed: int = 0, |
| | | shuffle: bool = False, |
| | | ): |
| | | assert check_argument_types() |
| | | self.build_funcs = list(build_funcs) |
| | | self.seed = seed |
| | | self.shuffle = shuffle |
| | |
| | | |
| | | import numpy as np |
| | | from torch.utils.data import DataLoader |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.iterators.abs_iter_factory import AbsIterFactory |
| | | from funasr.samplers.abs_sampler import AbsSampler |
| | |
| | | collate_fn=None, |
| | | pin_memory: bool = False, |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | if not isinstance(batches, AbsSampler): |
| | | self.sampler = RawSampler(batches) |
| | |
| | | |
| | | import numpy as np |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.modules.nets_utils import make_pad_mask |
| | | from funasr.layers.abs_normalize import AbsNormalize |
| | |
| | | norm_vars: bool = True, |
| | | eps: float = 1.0e-20, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self.norm_means = norm_means |
| | | self.norm_vars = norm_vars |
| | |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | from typing import Optional |
| | | from typing import Tuple |
| | | |
| | |
| | | hop_length: int = 128, |
| | | center: bool = True, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | |
| | | self.win_length = win_length |
| | |
| | | import math |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | from typing import Sequence |
| | | from typing import Union |
| | | |
| | |
| | | dim: Union[int, str] = "time", |
| | | replace_with_zero: bool = True, |
| | | ): |
| | | assert check_argument_types() |
| | | if isinstance(mask_width_range, int): |
| | | mask_width_range = (0, mask_width_range) |
| | | if len(mask_width_range) != 2: |
| | |
| | | dim: Union[int, str] = "time", |
| | | replace_with_zero: bool = True, |
| | | ): |
| | | assert check_argument_types() |
| | | if isinstance(mask_width_ratio_range, float): |
| | | mask_width_ratio_range = (0.0, mask_width_ratio_range) |
| | | if len(mask_width_ratio_range) != 2: |
| | |
| | | replace_with_zero: bool = True, |
| | | lfr_rate: int = 1, |
| | | ): |
| | | assert check_argument_types() |
| | | if isinstance(mask_width_range, int): |
| | | mask_width_range = (0, mask_width_range) |
| | | if len(mask_width_range) != 2: |
| | |
| | | """Sinc convolutions.""" |
| | | import math |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | from typing import Union |
| | | |
| | | |
| | |
| | | window_func: Window function on the filter, one of ["hamming", "none"]. |
| | | fs (str, int, float): Sample rate of the input data |
| | | """ |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | window_funcs = { |
| | | "none": self.none_window, |
| | |
| | | torch.Tensor: Filter start frequencíes. |
| | | torch.Tensor: Filter stop frequencies. |
| | | """ |
| | | assert check_argument_types() |
| | | # min and max bandpass edge frequencies |
| | | min_frequency = torch.tensor(30.0) |
| | | max_frequency = torch.tensor(fs * 0.5) |
| | |
| | | torch.Tensor: Filter start frequencíes. |
| | | torch.Tensor: Filter stop frequencíes. |
| | | """ |
| | | assert check_argument_types() |
| | | # min and max BARK center frequencies by approximation |
| | | min_center_frequency = torch.tensor(70.0) |
| | | max_center_frequency = torch.tensor(fs * 0.45) |
| | |
| | | |
| | | import torch |
| | | from torch_complex.tensor import ComplexTensor |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.modules.nets_utils import make_pad_mask |
| | | from funasr.layers.complex_utils import is_complex |
| | |
| | | normalized: bool = False, |
| | | onesided: bool = True, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self.n_fft = n_fft |
| | | if win_length is None: |
| | |
| | | from typing import Tuple |
| | | |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.modules.nets_utils import make_pad_mask |
| | | from funasr.layers.abs_normalize import AbsNormalize |
| | |
| | | norm_vars: bool = False, |
| | | eps: float = 1.0e-20, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self.norm_means = norm_means |
| | | self.norm_vars = norm_vars |
| | |
| | | from io import BytesIO |
| | | |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | from typing import Collection |
| | | |
| | | from funasr.train.reporter import Reporter |
| | |
| | | nbest: Number of best model files to be averaged |
| | | suffix: A suffix added to the averaged model file name |
| | | """ |
| | | assert check_argument_types() |
| | | if isinstance(nbest, int): |
| | | nbests = [nbest] |
| | | else: |
| | |
| | | import torch |
| | | from torch.nn.parallel import data_parallel |
| | | from torch.utils.data import DataLoader |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.fileio.datadir_writer import DatadirWriter |
| | | from funasr.fileio.npy_scp import NpyScpWriter |
| | |
| | | This method is used before executing train(). |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | |
| | | npy_scp_writers = {} |
| | | for itr, mode in zip([train_iter, valid_iter], ["train", "valid"]): |
| | |
| | | |
| | | import torch |
| | | import torch.nn.functional as F |
| | | from typeguard import check_argument_types |
| | | |
| | | |
| | | class CTC(torch.nn.Module): |
| | |
| | | reduce: bool = True, |
| | | ignore_nan_grad: bool = True, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | eprojs = encoder_output_size |
| | | self.dropout_rate = dropout_rate |
| | |
| | | if ignore_nan_grad: |
| | | logging.warning("ignore_nan_grad option is not supported for warp_ctc") |
| | | self.ctc_loss = warp_ctc.CTCLoss(size_average=True, reduce=reduce) |
| | | |
| | | elif self.ctc_type == "gtnctc": |
| | | from espnet.nets.pytorch_backend.gtn_ctc import GTNCTCLossFunction |
| | | |
| | | self.ctc_loss = GTNCTCLossFunction.apply |
| | | else: |
| | | raise ValueError( |
| | | f'ctc_type must be "builtin" or "warpctc": {self.ctc_type}' |
| | |
| | | from typing import Tuple |
| | | |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.layers.abs_normalize import AbsNormalize |
| | | from funasr.models.encoder.abs_encoder import AbsEncoder |
| | |
| | | preencoder: Optional[AbsPreEncoder], |
| | | encoder: AbsEncoder, |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | super().__init__() |
| | | |
| | |
| | | |
| | | from funasr.modules.streaming_utils import utils as myutils |
| | | from funasr.models.decoder.transformer_decoder import BaseTransformerDecoder |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.modules.attention import MultiHeadedAttentionSANMDecoder, MultiHeadedAttentionCrossAtt |
| | | from funasr.modules.embedding import PositionalEncoding |
| | |
| | | kernel_size: int = 21, |
| | | sanm_shfit: int = 0, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__( |
| | | vocab_size=vocab_size, |
| | | encoder_output_size=encoder_output_size, |
| | |
| | | import numpy as np |
| | | import torch |
| | | import torch.nn.functional as F |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.modules.nets_utils import make_pad_mask |
| | | from funasr.modules.nets_utils import to_device |
| | |
| | | att_conf: dict = get_default_kwargs(build_attention_list), |
| | | ): |
| | | # FIXME(kamo): The parts of num_spk should be refactored more more more |
| | | assert check_argument_types() |
| | | if rnn_type not in {"lstm", "gru"}: |
| | | raise ValueError(f"Not supported: rnn_type={rnn_type}") |
| | | |
| | |
| | | from typing import List, Optional, Tuple |
| | | |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.modules.beam_search.beam_search_transducer import Hypothesis |
| | | from funasr.models.specaug.specaug import SpecAug |
| | |
| | | """Construct a RNNDecoder object.""" |
| | | super().__init__() |
| | | |
| | | assert check_argument_types() |
| | | |
| | | if rnn_type not in ("lstm", "gru"): |
| | | raise ValueError(f"Not supported: rnn_type={rnn_type}") |
| | |
| | | |
| | | from funasr.modules.streaming_utils import utils as myutils |
| | | from funasr.models.decoder.transformer_decoder import BaseTransformerDecoder |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.modules.attention import MultiHeadedAttentionSANMDecoder, MultiHeadedAttentionCrossAtt |
| | | from funasr.modules.embedding import PositionalEncoding |
| | |
| | | tf2torch_tensor_name_prefix_tf: str = "seq2seq/decoder", |
| | | embed_tensor_name_prefix_tf: str = None, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__( |
| | | vocab_size=vocab_size, |
| | | encoder_output_size=encoder_output_size, |
| | |
| | | tf2torch_tensor_name_prefix_torch: str = "decoder", |
| | | tf2torch_tensor_name_prefix_tf: str = "seq2seq/decoder", |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__( |
| | | vocab_size=vocab_size, |
| | | encoder_output_size=encoder_output_size, |
| | |
| | | |
| | | import torch |
| | | from torch import nn |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.models.decoder.abs_decoder import AbsDecoder |
| | | from funasr.modules.attention import MultiHeadedAttention |
| | |
| | | pos_enc_class=PositionalEncoding, |
| | | normalize_before: bool = True, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | attention_dim = encoder_output_size |
| | | |
| | |
| | | normalize_before: bool = True, |
| | | concat_after: bool = False, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__( |
| | | vocab_size=vocab_size, |
| | | encoder_output_size=encoder_output_size, |
| | |
| | | concat_after: bool = False, |
| | | embeds_id: int = -1, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__( |
| | | vocab_size=vocab_size, |
| | | encoder_output_size=encoder_output_size, |
| | |
| | | conv_kernel_length: Sequence[int] = (11, 11, 11, 11, 11, 11), |
| | | conv_usebias: int = False, |
| | | ): |
| | | assert check_argument_types() |
| | | if len(conv_kernel_length) != num_blocks: |
| | | raise ValueError( |
| | | "conv_kernel_length must have equal number of values to num_blocks: " |
| | |
| | | conv_kernel_length: Sequence[int] = (11, 11, 11, 11, 11, 11), |
| | | conv_usebias: int = False, |
| | | ): |
| | | assert check_argument_types() |
| | | if len(conv_kernel_length) != num_blocks: |
| | | raise ValueError( |
| | | "conv_kernel_length must have equal number of values to num_blocks: " |
| | |
| | | conv_kernel_length: Sequence[int] = (11, 11, 11, 11, 11, 11), |
| | | conv_usebias: int = False, |
| | | ): |
| | | assert check_argument_types() |
| | | if len(conv_kernel_length) != num_blocks: |
| | | raise ValueError( |
| | | "conv_kernel_length must have equal number of values to num_blocks: " |
| | |
| | | conv_kernel_length: Sequence[int] = (11, 11, 11, 11, 11, 11), |
| | | conv_usebias: int = False, |
| | | ): |
| | | assert check_argument_types() |
| | | if len(conv_kernel_length) != num_blocks: |
| | | raise ValueError( |
| | | "conv_kernel_length must have equal number of values to num_blocks: " |
| | |
| | | pos_enc_class=PositionalEncoding, |
| | | normalize_before: bool = True, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | attention_dim = encoder_output_size |
| | | |
| | |
| | | normalize_before: bool = True, |
| | | concat_after: bool = False, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__( |
| | | vocab_size=vocab_size, |
| | | encoder_output_size=encoder_output_size, |
| | |
| | | from typing import Union |
| | | |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.layers.abs_normalize import AbsNormalize |
| | | from funasr.losses.label_smoothing_loss import ( |
| | |
| | | preencoder: Optional[AbsPreEncoder] = None, |
| | | postencoder: Optional[AbsPostEncoder] = None, |
| | | ): |
| | | assert check_argument_types() |
| | | assert 0.0 <= ctc_weight <= 1.0, ctc_weight |
| | | assert 0.0 <= interctc_weight < 1.0, interctc_weight |
| | | |
| | |
| | | import numpy as np |
| | | |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.layers.abs_normalize import AbsNormalize |
| | | from funasr.models.ctc import CTC |
| | |
| | | preencoder: Optional[AbsPreEncoder] = None, |
| | | postencoder: Optional[AbsPostEncoder] = None, |
| | | ): |
| | | assert check_argument_types() |
| | | assert 0.0 <= ctc_weight <= 1.0, ctc_weight |
| | | assert 0.0 <= interctc_weight < 1.0, interctc_weight |
| | | |
| | |
| | | from typing import Union |
| | | import logging |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.modules.e2e_asr_common import ErrorCalculator |
| | | from funasr.modules.nets_utils import th_accuracy |
| | |
| | | sym_blank: str = "<blank>", |
| | | preencoder: Optional[AbsPreEncoder] = None, |
| | | ): |
| | | assert check_argument_types() |
| | | assert 0.0 <= ctc_weight <= 1.0, ctc_weight |
| | | assert rnnt_decoder is None, "Not implemented" |
| | | |
| | |
| | | import torch |
| | | import random |
| | | import numpy as np |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.layers.abs_normalize import AbsNormalize |
| | | from funasr.losses.label_smoothing_loss import ( |
| | |
| | | postencoder: Optional[AbsPostEncoder] = None, |
| | | use_1st_decoder_loss: bool = False, |
| | | ): |
| | | assert check_argument_types() |
| | | assert 0.0 <= ctc_weight <= 1.0, ctc_weight |
| | | assert 0.0 <= interctc_weight < 1.0, interctc_weight |
| | | |
| | |
| | | postencoder: Optional[AbsPostEncoder] = None, |
| | | use_1st_decoder_loss: bool = False, |
| | | ): |
| | | assert check_argument_types() |
| | | assert 0.0 <= ctc_weight <= 1.0, ctc_weight |
| | | assert 0.0 <= interctc_weight < 1.0, interctc_weight |
| | | |
| | |
| | | preencoder: Optional[AbsPreEncoder] = None, |
| | | postencoder: Optional[AbsPostEncoder] = None, |
| | | ): |
| | | assert check_argument_types() |
| | | assert 0.0 <= ctc_weight <= 1.0, ctc_weight |
| | | assert 0.0 <= interctc_weight < 1.0, interctc_weight |
| | | |
| | |
| | | preencoder: Optional[AbsPreEncoder] = None, |
| | | postencoder: Optional[AbsPostEncoder] = None, |
| | | ): |
| | | assert check_argument_types() |
| | | assert 0.0 <= ctc_weight <= 1.0, ctc_weight |
| | | assert 0.0 <= interctc_weight < 1.0, interctc_weight |
| | | |
| | |
| | | preencoder: Optional[AbsPreEncoder] = None, |
| | | postencoder: Optional[AbsPostEncoder] = None, |
| | | ): |
| | | assert check_argument_types() |
| | | assert 0.0 <= ctc_weight <= 1.0, ctc_weight |
| | | assert 0.0 <= interctc_weight < 1.0, interctc_weight |
| | | |
| | |
| | | |
| | | import torch |
| | | from packaging.version import parse as V |
| | | from typeguard import check_argument_types |
| | | from funasr.losses.label_smoothing_loss import ( |
| | | LabelSmoothingLoss, # noqa: H301 |
| | | ) |
| | |
| | | ) -> None: |
| | | """Construct an ESPnetASRTransducerModel object.""" |
| | | super().__init__() |
| | | |
| | | assert check_argument_types() |
| | | |
| | | # The following labels ID are reserved: 0 (blank) and vocab_size - 1 (sos/eos) |
| | | self.blank_id = 0 |
| | |
| | | """Construct an ESPnetASRTransducerModel object.""" |
| | | super().__init__() |
| | | |
| | | assert check_argument_types() |
| | | |
| | | # The following labels ID are reserved: 0 (blank) and vocab_size - 1 (sos/eos) |
| | | self.blank_id = 0 |
| | | |
| | |
| | | loss_lm = self._calc_lm_loss(decoder_out, target) |
| | | |
| | | loss_trans = loss_trans_utt + loss_trans_chunk |
| | | loss_ctc = loss_ctc + loss_ctc_chunk |
| | | loss_ctc = loss_ctc + loss_ctc_chunk |
| | | loss_ctc = loss_att + loss_att_chunk |
| | | |
| | | loss = ( |
| | |
| | | ignore_label=self.ignore_id, |
| | | ) |
| | | |
| | | return loss_att, acc_att |
| | | return loss_att, acc_att |
| | |
| | | import numpy as np |
| | | import torch |
| | | import torch.nn as nn |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.models.frontend.wav_frontend import WavFrontendMel23 |
| | | from funasr.modules.eend_ola.encoder import EENDOLATransformerEncoder |
| | |
| | | mapping_dict=None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | super().__init__() |
| | | self.frontend = frontend |
| | |
| | | import numpy as np |
| | | import torch |
| | | from torch.nn import functional as F |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.modules.nets_utils import to_device |
| | | from funasr.modules.nets_utils import make_pad_mask |
| | |
| | | inter_score_loss_weight: float = 0.0, |
| | | inputs_type: str = "raw", |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | super().__init__() |
| | | |
| | |
| | | |
| | | import torch |
| | | import torch.nn.functional as F |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.layers.abs_normalize import AbsNormalize |
| | | from funasr.losses.label_smoothing_loss import ( |
| | |
| | | sym_blank: str = "<blank>", |
| | | extract_feats_in_collect_stats: bool = True, |
| | | ): |
| | | assert check_argument_types() |
| | | assert 0.0 <= ctc_weight <= 1.0, ctc_weight |
| | | assert 0.0 <= interctc_weight < 1.0, interctc_weight |
| | | |
| | |
| | | from typing import Union |
| | | |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.layers.abs_normalize import AbsNormalize |
| | | from funasr.losses.label_smoothing_loss import ( |
| | |
| | | pooling_layer: torch.nn.Module, |
| | | decoder: AbsDecoder, |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | super().__init__() |
| | | # note that eos is the same as sos (equivalent ID) |
| | |
| | | |
| | | import torch |
| | | import numpy as np |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.models.encoder.abs_encoder import AbsEncoder |
| | | from funasr.models.frontend.abs_frontend import AbsFrontend |
| | |
| | | predictor_bias: int = 0, |
| | | token_list=None, |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | super().__init__() |
| | | # note that eos is the same as sos (equivalent ID) |
| | |
| | | from typing import Union |
| | | |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.models.e2e_asr_common import ErrorCalculator |
| | | from funasr.modules.nets_utils import th_accuracy |
| | |
| | | postencoder: Optional[AbsPostEncoder] = None, |
| | | encoder1_encoder2_joint_training: bool = True, |
| | | ): |
| | | assert check_argument_types() |
| | | assert 0.0 <= ctc_weight <= 1.0, ctc_weight |
| | | assert 0.0 <= interctc_weight < 1.0, interctc_weight |
| | | |
| | |
| | | |
| | | import torch |
| | | from torch import nn |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.models.ctc import CTC |
| | | from funasr.modules.attention import ( |
| | |
| | | interctc_use_conditioning: bool = False, |
| | | stochastic_depth_rate: Union[float, List[float]] = 0.0, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self._output_size = output_size |
| | | |
| | |
| | | """Construct an Encoder object.""" |
| | | super().__init__() |
| | | |
| | | assert check_argument_types() |
| | | |
| | | self.embed = StreamingConvInput( |
| | | input_size, |
| | |
| | | import torch.distributed as dist |
| | | import torch.nn as nn |
| | | import torch.nn.functional as F |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.models.encoder.abs_encoder import AbsEncoder |
| | | from funasr.modules.data2vec.data_utils import compute_mask_indices |
| | |
| | | # FP16 optimization |
| | | required_seq_len_multiple: int = 2, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | |
| | | # ConvFeatureExtractionModel |
| | |
| | | import torch |
| | | from torch import nn |
| | | |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.models.encoder.encoder_layer_mfcca import EncoderLayer |
| | | from funasr.modules.nets_utils import get_activation |
| | |
| | | cnn_module_kernel: int = 31, |
| | | padding_idx: int = -1, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self._output_size = output_size |
| | | |
| | |
| | | import torch |
| | | import torch.nn as nn |
| | | from torch.nn import functional as F |
| | | from typeguard import check_argument_types |
| | | import numpy as np |
| | | from funasr.modules.nets_utils import make_pad_mask |
| | | from funasr.modules.layer_norm import LayerNorm |
| | |
| | | tf2torch_tensor_name_prefix_torch: str = "speaker_encoder", |
| | | tf2torch_tensor_name_prefix_tf: str = "EAND/speaker_encoder", |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self._output_size = num_units |
| | | |
| | |
| | | import torch |
| | | import torch.nn as nn |
| | | from torch.nn import functional as F |
| | | from typeguard import check_argument_types |
| | | import numpy as np |
| | | from funasr.modules.nets_utils import make_pad_mask |
| | | from funasr.modules.layer_norm import LayerNorm |
| | |
| | | import torch |
| | | import torch.nn as nn |
| | | from funasr.modules.streaming_utils.chunk_utilis import overlap_chunk |
| | | from typeguard import check_argument_types |
| | | import numpy as np |
| | | from funasr.modules.nets_utils import make_pad_mask |
| | | from funasr.modules.attention import MultiHeadSelfAttention, MultiHeadedAttentionSANM |
| | |
| | | tf2torch_tensor_name_prefix_tf: str = "seq2seq/encoder", |
| | | out_units=None, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self._output_size = output_size |
| | | |
| | |
| | | |
| | | import numpy as np |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.modules.nets_utils import make_pad_mask |
| | | from funasr.modules.rnn.encoders import RNN |
| | |
| | | dropout: float = 0.0, |
| | | subsample: Optional[Sequence[int]] = (2, 2, 1, 1), |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self._output_size = output_size |
| | | self.rnn_type = rnn_type |
| | |
| | | import torch.nn as nn |
| | | import torch.nn.functional as F |
| | | from funasr.modules.streaming_utils.chunk_utilis import overlap_chunk |
| | | from typeguard import check_argument_types |
| | | import numpy as np |
| | | from funasr.torch_utils.device_funcs import to_device |
| | | from funasr.modules.nets_utils import make_pad_mask |
| | |
| | | tf2torch_tensor_name_prefix_torch: str = "encoder", |
| | | tf2torch_tensor_name_prefix_tf: str = "seq2seq/encoder", |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self._output_size = output_size |
| | | |
| | |
| | | tf2torch_tensor_name_prefix_torch: str = "encoder", |
| | | tf2torch_tensor_name_prefix_tf: str = "seq2seq/encoder", |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self._output_size = output_size |
| | | |
| | |
| | | sanm_shfit : int = 0, |
| | | selfattention_layer_type: str = "sanm", |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self._output_size = output_size |
| | | |
| | |
| | | |
| | | import torch |
| | | from torch import nn |
| | | from typeguard import check_argument_types |
| | | import logging |
| | | |
| | | from funasr.models.ctc import CTC |
| | |
| | | interctc_layer_idx: List[int] = [], |
| | | interctc_use_conditioning: bool = False, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self._output_size = output_size |
| | | |
| | |
| | | import numpy as np |
| | | import torch |
| | | from torch_complex.tensor import ComplexTensor |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.layers.log_mel import LogMel |
| | | from funasr.layers.stft import Stft |
| | |
| | | apply_stft: bool = True, |
| | | use_channel: int = None, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | if isinstance(fs, str): |
| | | fs = humanfriendly.parse_size(fs) |
| | |
| | | cmvn_file: str = None, |
| | | mc: bool = True |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | if isinstance(fs, str): |
| | | fs = humanfriendly.parse_size(fs) |
| | |
| | | from funasr.models.frontend.s3prl import S3prlFrontend |
| | | import numpy as np |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | from typing import Tuple |
| | | |
| | | |
| | |
| | | self, frontends=None, align_method="linear_projection", proj_dim=100, fs=16000 |
| | | ): |
| | | |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self.align_method = ( |
| | | align_method # fusing method : linear_projection only for now |
| | |
| | | |
| | | import humanfriendly |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.models.frontend.abs_frontend import AbsFrontend |
| | | from funasr.modules.frontends.frontend import Frontend |
| | |
| | | download_dir: str = None, |
| | | multilayer_feature: bool = False, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | if isinstance(fs, str): |
| | | fs = humanfriendly.parse_size(fs) |
| | |
| | | import torch |
| | | import torchaudio.compliance.kaldi as kaldi |
| | | from torch.nn.utils.rnn import pad_sequence |
| | | from typeguard import check_argument_types |
| | | |
| | | import funasr.models.frontend.eend_ola_feature as eend_ola_feature |
| | | from funasr.models.frontend.abs_frontend import AbsFrontend |
| | |
| | | snip_edges: bool = True, |
| | | upsacle_samples: bool = True, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self.fs = fs |
| | | self.window = window |
| | |
| | | snip_edges: bool = True, |
| | | upsacle_samples: bool = True, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self.fs = fs |
| | | self.window = window |
| | |
| | | lfr_m: int = 1, |
| | | lfr_n: int = 1, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self.fs = fs |
| | | self.frame_length = frame_length |
| | |
| | | |
| | | from funasr.models.frontend.abs_frontend import AbsFrontend |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | from typing import Tuple |
| | | |
| | | |
| | |
| | | padding: Padding (placeholder, currently not implemented). |
| | | fs: Sampling rate (placeholder for compatibility, not used). |
| | | """ |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self.fs = fs |
| | | self.win_length = win_length |
| | |
| | | |
| | | from funasr.modules.nets_utils import make_pad_mask |
| | | from funasr.models.postencoder.abs_postencoder import AbsPostEncoder |
| | | from typeguard import check_argument_types |
| | | from typing import Tuple |
| | | |
| | | import copy |
| | |
| | | model_name_or_path: str, |
| | | ): |
| | | """Initialize the module.""" |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | |
| | | if not is_transformers_available: |
| | |
| | | """Linear Projection.""" |
| | | |
| | | from funasr.models.preencoder.abs_preencoder import AbsPreEncoder |
| | | from typeguard import check_argument_types |
| | | from typing import Tuple |
| | | |
| | | import torch |
| | |
| | | output_size: int, |
| | | ): |
| | | """Initialize the module.""" |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | |
| | | self.output_dim = output_size |
| | |
| | | from funasr.layers.sinc_conv import SincConv |
| | | import humanfriendly |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | from typing import Optional |
| | | from typing import Tuple |
| | | from typing import Union |
| | |
| | | windowing_type: Choice of windowing function. |
| | | scale_type: Choice of filter-bank initialization scale. |
| | | """ |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | if isinstance(fs, str): |
| | | fs = humanfriendly.parse_size(fs) |
| | |
| | | dropout_probability: Dropout probability. |
| | | shape (tuple, list): Shape of input tensors. |
| | | """ |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | if shape is None: |
| | | shape = (0, 2, 1) |
| | |
| | | |
| | | import torch |
| | | import torch.nn as nn |
| | | from typeguard import check_argument_types |
| | | from funasr.train.abs_model import AbsLM |
| | | |
| | | |
| | |
| | | rnn_type: str = "lstm", |
| | | ignore_id: int = 0, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | |
| | | ninp = unit |
| | |
| | | import numpy as np |
| | | import time |
| | | import torch |
| | | from eend.utils.power import create_powerlabel |
| | | from funasr.modules.eend_ola.utils.power import create_powerlabel |
| | | from itertools import combinations |
| | | |
| | | metrics = [ |
| | |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | |
| | | class SGD(torch.optim.SGD): |
| | |
| | | weight_decay: float = 0.0, |
| | | nesterov: bool = False, |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__( |
| | | params, |
| | | lr=lr, |
| New file |
| | |
| | | # FunASR离线文件转写服务开发指南 |
| | | |
| | | FunASR提供可一键本地或者云端服务器部署的中文离线文件转写服务,内核为FunASR已开源runtime-SDK。FunASR-runtime结合了达摩院语音实验室在Modelscope社区开源的语音端点检测(VAD)、Paraformer-large语音识别(ASR)、标点检测(PUNC) 等相关能力,可以准确、高效的对音频进行高并发转写。 |
| | | |
| | | 本文档为FunASR离线文件转写服务开发指南。如果您想快速体验离线文件转写服务,请参考FunASR离线文件转写服务一键部署示例([点击此处](./SDK_tutorial.md))。 |
| | | |
| | | ## Docker安装 |
| | | |
| | | 下述步骤为手动安装docker及docker镜像的步骤,如您docker镜像已启动,可以忽略本步骤: |
| | | |
| | | ### docker环境安装 |
| | | ```shell |
| | | # Ubuntu: |
| | | curl -fsSL https://test.docker.com -o test-docker.sh |
| | | sudo sh test-docker.sh |
| | | # Debian: |
| | | curl -fsSL https://get.docker.com -o get-docker.sh |
| | | sudo sh get-docker.sh |
| | | # CentOS: |
| | | curl -fsSL https://get.docker.com | bash -s docker --mirror Aliyun |
| | | # MacOS: |
| | | brew install --cask --appdir=/Applications docker |
| | | ``` |
| | | |
| | | 安装详见:https://alibaba-damo-academy.github.io/FunASR/en/installation/docker.html |
| | | |
| | | ### docker启动 |
| | | |
| | | ```shell |
| | | sudo systemctl start docker |
| | | ``` |
| | | |
| | | ### 镜像拉取及启动 |
| | | |
| | | 通过下述命令拉取并启动FunASR runtime-SDK的docker镜像: |
| | | |
| | | ```shell |
| | | sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.0.1 |
| | | |
| | | sudo docker run -p 10095:10095 -it --privileged=true -v /root:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.0.1 |
| | | ``` |
| | | |
| | | 命令参数介绍: |
| | | ```text |
| | | -p <宿主机端口>:<映射到docker端口> |
| | | 如示例,宿主机(ecs)端口10095映射到docker端口10095上。前提是确保ecs安全规则打开了10095端口。 |
| | | -v <宿主机路径>:<挂载至docker路径> |
| | | 如示例,宿主机路径/root挂载至docker路径/workspace/models |
| | | ``` |
| | | |
| | | |
| | | ## 服务端启动 |
| | | |
| | | docker启动之后,启动 funasr-wss-server服务程序: |
| | | |
| | | funasr-wss-server支持从Modelscope下载模型,需要设置同时设置模型下载地址(--download-model-dir)及model ID(--model-dir、--vad-dir、--punc-dir),示例如下: |
| | | ```shell |
| | | cd /workspace/FunASR/funasr/runtime/websocket/build/bin |
| | | ./funasr-wss-server \ |
| | | --download-model-dir /workspace/models \ |
| | | --model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx \ |
| | | --vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx \ |
| | | --punc-dir damo/punc_ct-transformer_zh-cn-common-vocab272727-onnx \ |
| | | --decoder-thread-num 32 \ |
| | | --io-thread-num 8 \ |
| | | --port 10095 \ |
| | | --certfile ../../../ssl_key/server.crt \ |
| | | --keyfile ../../../ssl_key/server.key |
| | | ``` |
| | | 命令参数介绍: |
| | | ```text |
| | | --download-model-dir #模型下载地址,通过设置model ID从Modelscope下载模型 |
| | | --model-dir # modelscope model ID |
| | | --quantize # True为量化ASR模型,False为非量化ASR模型,默认是True |
| | | --vad-dir # modelscope model ID |
| | | --vad-quant # True为量化VAD模型,False为非量化VAD模型,默认是True |
| | | --punc-dir # modelscope model ID |
| | | --punc-quant # True为量化PUNC模型,False为非量化PUNC模型,默认是True |
| | | --port # 服务端监听的端口号,默认为 10095 |
| | | --decoder-thread-num # 服务端启动的推理线程数,默认为 8 |
| | | --io-thread-num # 服务端启动的IO线程数,默认为 1 |
| | | --certfile <string> # ssl的证书文件,默认为:../../../ssl_key/server.crt |
| | | --keyfile <string> # ssl的密钥文件,默认为:../../../ssl_key/server.key |
| | | ``` |
| | | |
| | | funasr-wss-server同时也支持从本地路径加载模型(本地模型资源准备详见[模型资源准备](#anchor-1)),需要设置模型本地路径(--download-model-dir)示例如下: |
| | | ```shell |
| | | cd /workspace/FunASR/funasr/runtime/websocket/build/bin |
| | | |
| | | ./funasr-wss-server \ |
| | | --model-dir /workspace/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx \ |
| | | --vad-dir /workspace/models/damo/speech_fsmn_vad_zh-cn-16k-common-onnx \ |
| | | --punc-dir /workspace/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-onnx \ |
| | | --decoder-thread-num 32 \ |
| | | --io-thread-num 8 \ |
| | | --port 10095 \ |
| | | --certfile ../../../ssl_key/server.crt \ |
| | | --keyfile ../../../ssl_key/server.key |
| | | ``` |
| | | 命令参数介绍: |
| | | ```text |
| | | --model-dir # ASR模型路径,默认为:/workspace/models/asr |
| | | --quantize # True为量化ASR模型,False为非量化ASR模型,默认是True |
| | | --vad-dir # VAD模型路径,默认为:/workspace/models/vad |
| | | --vad-quant # True为量化VAD模型,False为非量化VAD模型,默认是True |
| | | --punc-dir # PUNC模型路径,默认为:/workspace/models/punc |
| | | --punc-quant # True为量化PUNC模型,False为非量化PUNC模型,默认是True |
| | | --port # 服务端监听的端口号,默认为 10095 |
| | | --decoder-thread-num # 服务端启动的推理线程数,默认为 8 |
| | | --io-thread-num # 服务端启动的IO线程数,默认为 1 |
| | | --certfile <string> # ssl的证书文件,默认为:../../../ssl_key/server.crt |
| | | --keyfile <string> # ssl的密钥文件,默认为:../../../ssl_key/server.key |
| | | ``` |
| | | |
| | | ## <a id="anchor-1">模型资源准备</a> |
| | | |
| | | 如果您选择通过funasr-wss-server从Modelscope下载模型,可以跳过本步骤。 |
| | | |
| | | FunASR离线文件转写服务中的vad、asr和punc模型资源均来自Modelscope,模型地址详见下表: |
| | | |
| | | | 模型 | Modelscope链接 | |
| | | |------|------------------------------------------------------------------------------------------------------------------| |
| | | | VAD | https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary | |
| | | | ASR | https://www.modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary | |
| | | | PUNC | https://www.modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/summary | |
| | | |
| | | 离线文件转写服务中部署的是量化后的ONNX模型,下面介绍下如何导出ONNX模型及其量化:您可以选择从Modelscope导出ONNX模型、从本地文件导出ONNX模型或者从finetune后的资源导出模型: |
| | | |
| | | ### 从Modelscope导出ONNX模型 |
| | | |
| | | 从Modelscope网站下载对应model name的模型,然后导出量化后的ONNX模型: |
| | | |
| | | ```shell |
| | | python -m funasr.export.export_model --model-name damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir ./export --type onnx --quantize True |
| | | ``` |
| | | |
| | | 命令参数介绍: |
| | | ```text |
| | | --model-name Modelscope上的模型名称,例如damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch |
| | | --export-dir ONNX模型导出地址 |
| | | --type 模型类型,目前支持 ONNX、torch |
| | | --quantize int8模型量化 |
| | | ``` |
| | | |
| | | ### 从本地文件导出ONNX模型 |
| | | |
| | | 设置model name为模型本地路径,导出量化后的ONNX模型: |
| | | |
| | | ```shell |
| | | python -m funasr.export.export_model --model-name /workspace/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir ./export --type onnx --quantize True |
| | | ``` |
| | | 命令参数介绍: |
| | | ```text |
| | | --model-name 模型本地路径,例如/workspace/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch |
| | | --export-dir ONNX模型导出地址 |
| | | --type 模型类型,目前支持 ONNX、torch |
| | | --quantize int8模型量化 |
| | | ``` |
| | | |
| | | ### 从finetune后的资源导出模型 |
| | | |
| | | 假如您想部署finetune后的模型,可以参考如下步骤: |
| | | |
| | | 将您finetune后需要部署的模型(例如10epoch.pb),重命名为model.pb,并将原modelscope中模型model.pb替换掉,假如替换后的模型路径为/path/to/finetune/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch,通过下述命令把finetune后的模型转成onnx模型: |
| | | |
| | | ```shell |
| | | python -m funasr.export.export_model --model-name /path/to/finetune/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir ./export --type onnx --quantize True |
| | | ``` |
| | | |
| | | ## 客户端启动 |
| | | |
| | | 在服务器上完成FunASR离线文件转写服务部署以后,可以通过如下的步骤来测试和使用离线文件转写服务。目前FunASR-bin支持多种方式启动客户端,如下是基于python-client、c++-client的命令行实例及自定义客户端Websocket通信协议: |
| | | |
| | | ### python-client |
| | | ```shell |
| | | python wss_client_asr.py --host "127.0.0.1" --port 10095 --mode offline --audio_in "./data/wav.scp" --send_without_sleep --output_dir "./results" |
| | | ``` |
| | | 命令参数介绍: |
| | | ```text |
| | | --host # 服务端ip地址,本机测试可设置为 127.0.0.1 |
| | | --port # 服务端监听端口号 |
| | | --audio_in # 音频输入,输入可以是:wav路径 或者 wav.scp路径(kaldi格式的wav list,wav_id \t wav_path) |
| | | --output_dir # 识别结果输出路径 |
| | | --ssl # 是否使用SSL加密,默认使用 |
| | | --mode # offline模式 |
| | | ``` |
| | | |
| | | ### c++-client: |
| | | ```shell |
| | | . /funasr-wss-client --server-ip 127.0.0.1 --port 10095 --wav-path test.wav --thread-num 1 --is-ssl 1 |
| | | ``` |
| | | 命令参数介绍: |
| | | ```text |
| | | --server-ip # 服务端ip地址,本机测试可设置为 127.0.0.1 |
| | | --port # 服务端监听端口号 |
| | | --wav-path # 音频输入,输入可以是:wav路径 或者 wav.scp路径(kaldi格式的wav list,wav_id \t wav_path) |
| | | --thread-num # 客户端线程数 |
| | | --is-ssl # 是否使用SSL加密,默认使用 |
| | | ``` |
| | | |
| | | ### 自定义客户端: |
| | | |
| | | 如果您想定义自己的client,websocket通信协议为: |
| | | |
| | | ```text |
| | | # 首次通信 |
| | | {"mode": "offline", "wav_name": wav_name, "is_speaking": True} |
| | | # 发送wav数据 |
| | | bytes数据 |
| | | # 发送结束标志 |
| | | {"is_speaking": False} |
| | | ``` |
| | | |
| | | ## 如何定制服务部署 |
| | | |
| | | FunASR-runtime的代码已开源,如果服务端和客户端不能很好的满足您的需求,您可以根据自己的需求进行进一步的开发: |
| | | ### c++ 客户端: |
| | | |
| | | https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/websocket |
| | | |
| | | ### python 客户端: |
| | | |
| | | https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/websocket |
| | | ### c++ 服务端: |
| | | |
| | | #### VAD |
| | | ```c++ |
| | | // VAD模型的使用分为FsmnVadInit和FsmnVadInfer两个步骤: |
| | | FUNASR_HANDLE vad_hanlde=FsmnVadInit(model_path, thread_num); |
| | | // 其中:model_path 包含"model-dir"、"quantize",thread_num为onnx线程数; |
| | | FUNASR_RESULT result=FsmnVadInfer(vad_hanlde, wav_file.c_str(), NULL, 16000); |
| | | // 其中:vad_hanlde为FunOfflineInit返回值,wav_file为音频路径,sampling_rate为采样率(默认16k) |
| | | ``` |
| | | |
| | | 使用示例详见:https://github.com/alibaba-damo-academy/FunASR/blob/main/funasr/runtime/onnxruntime/bin/funasr-onnx-offline-vad.cpp |
| | | |
| | | #### ASR |
| | | ```text |
| | | // ASR模型的使用分为FunOfflineInit和FunOfflineInfer两个步骤: |
| | | FUNASR_HANDLE asr_hanlde=FunOfflineInit(model_path, thread_num); |
| | | // 其中:model_path 包含"model-dir"、"quantize",thread_num为onnx线程数; |
| | | FUNASR_RESULT result=FunOfflineInfer(asr_hanlde, wav_file.c_str(), RASR_NONE, NULL, 16000); |
| | | // 其中:asr_hanlde为FunOfflineInit返回值,wav_file为音频路径,sampling_rate为采样率(默认16k) |
| | | ``` |
| | | |
| | | 使用示例详见:https://github.com/alibaba-damo-academy/FunASR/blob/main/funasr/runtime/onnxruntime/bin/funasr-onnx-offline.cpp |
| | | |
| | | #### PUNC |
| | | ```text |
| | | // PUNC模型的使用分为CTTransformerInit和CTTransformerInfer两个步骤: |
| | | FUNASR_HANDLE punc_hanlde=CTTransformerInit(model_path, thread_num); |
| | | // 其中:model_path 包含"model-dir"、"quantize",thread_num为onnx线程数; |
| | | FUNASR_RESULT result=CTTransformerInfer(punc_hanlde, txt_str.c_str(), RASR_NONE, NULL); |
| | | // 其中:punc_hanlde为CTTransformerInit返回值,txt_str为文本 |
| | | ``` |
| | | 使用示例详见:https://github.com/alibaba-damo-academy/FunASR/blob/main/funasr/runtime/onnxruntime/bin/funasr-onnx-offline-punc.cpp |
| New file |
| | |
| | | # FunASR离线文件转写服务便捷部署教程 |
| | | |
| | | FunASR提供可便捷本地或者云端服务器部署的离线文件转写服务,内核为FunASR已开源runtime-SDK。集成了达摩院语音实验室在Modelscope社区开源的语音端点检测(VAD)、Paraformer-large语音识别(ASR)、标点恢复(PUNC) 等相关能力,可以准确、高效的对音频进行高并发转写。 |
| | | |
| | | ## 环境安装与启动服务 |
| | | |
| | | 环境准备与配置([点击此处](./aliyun_server_tutorial.md)) |
| | | ### 获得脚本工具并一键部署 |
| | | |
| | | 通过以下命令运行一键部署服务,按照提示逐步完成FunASR runtime-SDK服务的部署和运行。目前暂时仅支持Linux环境,其他环境参考文档[高阶开发指南]()。 |
| | | 受限于网络,funasr-runtime-deploy.sh一键部署工具的下载可能不顺利,遇到数秒还未下载进入一键部署工具的情况,请Ctrl + C 终止后再次运行以下命令。 |
| | | |
| | | ```shell |
| | | curl -O https://raw.githubusercontent.com/alibaba-damo-academy/FunASR-APP/main/TransAudio/funasr-runtime-deploy.sh ; sudo bash funasr-runtime-deploy.sh install |
| | | ``` |
| | | |
| | | #### 启动过程配置详解 |
| | | |
| | | ##### 选择FunASR Docker镜像 |
| | | 推荐选择latest使用我们的最新镜像,也可选择历史版本。 |
| | | ```text |
| | | [1/10] |
| | | Please choose the Docker image. |
| | | 1) registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-latest |
| | | 2) registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.0.1 |
| | | Enter your choice: 1 |
| | | You have chosen the Docker image: registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-latest |
| | | ``` |
| | | |
| | | ##### 选择ASR/VAD/PUNC模型 |
| | | |
| | | 你可以选择ModelScope中的模型,也可以选<model_name>自行填入ModelScope中的模型名,将会在Docker运行时自动下载。同时也可以选择<model_path>填入宿主机中的本地模型路径。 |
| | | |
| | | ```text |
| | | [2/10] |
| | | Please input [y/n] to confirm whether to automatically download model_id in ModelScope or use a local model. |
| | | [y] With the model in ModelScope, the model will be automatically downloaded to Docker(/workspace/models). |
| | | [n] Use the models on the localhost, the directory where the model is located will be mapped to Docker. |
| | | Setting confirmation[Y/n]: |
| | | You have chosen to use the model in ModelScope, please set the model ID in the next steps, and the model will be automatically downloaded during the run. |
| | | |
| | | [2.1/10] |
| | | Please select ASR model_id in ModelScope from the list below. |
| | | 1) damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx |
| | | 2) model_name |
| | | 3) model_path |
| | | Enter your choice: 1 |
| | | The model ID is damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx |
| | | The model dir in Docker is /workspace/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx |
| | | |
| | | [2.2/10] |
| | | Please select VAD model_id in ModelScope from the list below. |
| | | 1) damo/speech_fsmn_vad_zh-cn-16k-common-onnx |
| | | 2) model_name |
| | | 3) model_path |
| | | Enter your choice: 1 |
| | | The model ID is damo/speech_fsmn_vad_zh-cn-16k-common-onnx |
| | | The model dir in Docker is /workspace/models/speech_fsmn_vad_zh-cn-16k-common-onnx |
| | | |
| | | [2.3/10] |
| | | Please select PUNC model_id in ModelScope from the list below. |
| | | 1) damo/punc_ct-transformer_zh-cn-common-vocab272727-onnx |
| | | 2) model_name |
| | | 3) model_path |
| | | Enter your choice: 1 |
| | | The model ID is damo/punc_ct-transformer_zh-cn-common-vocab272727-onnx |
| | | The model dir in Docker is /workspace/models/punc_ct-transformer_zh-cn-common-vocab272727-onnx |
| | | ``` |
| | | |
| | | ##### 输入宿主机中FunASR服务可执行程序路径 |
| | | |
| | | 输入FunASR服务可执行程序的宿主机路径,Docker运行时将自动挂载到Docker中运行。默认不输入的情况下将指定Docker中默认的/workspace/FunASR/funasr/runtime/websocket/build/bin/funasr-wss-server。 |
| | | |
| | | ```text |
| | | [3/10] |
| | | Please enter the path to the excutor of the FunASR service on the localhost. |
| | | If not set, the default /workspace/FunASR/funasr/runtime/websocket/build/bin/funasr-wss-server in Docker is used. |
| | | Setting the path to the excutor of the FunASR service on the localhost: |
| | | Corresponding, the path of FunASR in Docker is /workspace/FunASR/funasr/runtime/websocket/build/bin/funasr-wss-server |
| | | ``` |
| | | |
| | | ##### 设置宿主机提供给FunASR的端口 |
| | | 设置提供给Docker的宿主机端口,默认为10095。请保证此端口可用。 |
| | | ```text |
| | | [4/10] |
| | | Please input the opened port in the host used for FunASR server. |
| | | Default: 10095 |
| | | Setting the opened host port [1-65535]: |
| | | The port of the host is 10095 |
| | | ``` |
| | | |
| | | ##### 设置Docker中提供给FunASR的端口 |
| | | 设置Docker中FunASR服务使用的端口,默认为10095,此端口将于step1.4中设置的宿主机端口进行映射。 |
| | | ```text |
| | | 5/10] |
| | | Please input port for docker mapped. |
| | | Default: 10095, the opened port of current host is 10095 |
| | | Setting the port in Docker for FunASR server [1-65535]: |
| | | The port in Docker for FunASR server is 10095 |
| | | ``` |
| | | |
| | | ##### 设置FunASR服务的推理线程数 |
| | | 设置FunASR服务的推理线程数,默认为宿主机核数,同时自动设置服务的IO线程数,为推理线程数的四分之一。 |
| | | ```text |
| | | [6/10] |
| | | Please input thread number for FunASR decoder. |
| | | Default: 1 |
| | | Setting the number of decoder thread: |
| | | |
| | | The number of decoder threads is 1 |
| | | The number of IO threads is 1 |
| | | ``` |
| | | |
| | | ##### 所有设置参数展示及确认 |
| | | |
| | | 展示前面6步设置的参数,确认则将所有参数存储到/var/funasr/config,并开始启动Docker,否则提示用户进行重新设置。 |
| | | |
| | | ```text |
| | | [7/10] |
| | | Show parameters of FunASR server setting and confirm to run ... |
| | | |
| | | The current Docker image is : registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-latest |
| | | The model will be automatically downloaded to the directory : /workspace/models |
| | | The ASR model_id used : damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx |
| | | The ASR model directory corresponds to the directory in Docker : /workspace/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx |
| | | The VAD model_id used : damo/speech_fsmn_vad_zh-cn-16k-common-onnx |
| | | The VAD model directory corresponds to the directory in Docker : /workspace/models/speech_fsmn_vad_zh-cn-16k-common-onnx |
| | | The PUNC model_id used : damo/punc_ct-transformer_zh-cn-common-vocab272727-onnx |
| | | The PUNC model directory corresponds to the directory in Docker: /workspace/models/punc_ct-transformer_zh-cn-common-vocab272727-onnx |
| | | |
| | | The path in the docker of the FunASR service executor : /workspace/FunASR/funasr/runtime/websocket/build/bin/funasr-wss-server |
| | | Set the host port used for use by the FunASR service : 10095 |
| | | Set the docker port used by the FunASR service : 10095 |
| | | Set the number of threads used for decoding the FunASR service : 1 |
| | | Set the number of threads used for IO the FunASR service : 1 |
| | | |
| | | Please input [y/n] to confirm the parameters. |
| | | [y] Verify that these parameters are correct and that the service will run. |
| | | [n] The parameters set are incorrect, it will be rolled out, please rerun. |
| | | read confirmation[y/n]: |
| | | |
| | | Will run FunASR server later ... |
| | | Parameters are stored in the file /var/funasr/config |
| | | ``` |
| | | |
| | | ##### 检查Docker服务 |
| | | |
| | | 检查当前宿主机是否安装了Docker服务,若未安装,则安装Docker并启动。 |
| | | |
| | | ```text |
| | | [8/10] |
| | | Start install docker for ubuntu |
| | | Get docker installer: curl -fsSL https://test.docker.com -o test-docker.sh |
| | | Get docker run: sudo sh test-docker.sh |
| | | # Executing docker install script, commit: c2de0811708b6d9015ed1a2c80f02c9b70c8ce7b |
| | | + sh -c apt-get update -qq >/dev/null |
| | | + sh -c DEBIAN_FRONTEND=noninteractive apt-get install -y -qq apt-transport-https ca-certificates curl >/dev/null |
| | | + sh -c install -m 0755 -d /etc/apt/keyrings |
| | | + sh -c curl -fsSL "https://download.docker.com/linux/ubuntu/gpg" | gpg --dearmor --yes -o /etc/apt/keyrings/docker.gpg |
| | | + sh -c chmod a+r /etc/apt/keyrings/docker.gpg |
| | | + sh -c echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu focal test" > /etc/apt/sources.list.d/docker.list |
| | | + sh -c apt-get update -qq >/dev/null |
| | | + sh -c DEBIAN_FRONTEND=noninteractive apt-get install -y -qq docker-ce docker-ce-cli containerd.io docker-compose-plugin docker-ce-rootless-extras docker-buildx-plugin >/dev/null |
| | | + sh -c docker version |
| | | Client: Docker Engine - Community |
| | | Version: 24.0.2 |
| | | |
| | | ... |
| | | ... |
| | | |
| | | Docker install success, start docker server. |
| | | ``` |
| | | |
| | | ##### 下载FunASR Docker镜像 |
| | | |
| | | 下载并更新step1.1中选择的FunASR Docker镜像。 |
| | | |
| | | ```text |
| | | [9/10] |
| | | Pull docker image(registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-latest)... |
| | | funasr-runtime-cpu-0.0.1: Pulling from funasr_repo/funasr |
| | | 7608715873ec: Pull complete |
| | | 3e1014c56f38: Pull complete |
| | | |
| | | ... |
| | | ... |
| | | ``` |
| | | |
| | | ##### 启动FunASR Docker |
| | | |
| | | 启动FunASR Docker,等待step1.2选择的模型下载完成并启动FunASR服务。 |
| | | |
| | | ```text |
| | | [10/10] |
| | | Construct command and run docker ... |
| | | 943d8f02b4e5011b71953a0f6c1c1b9bc5aff63e5a96e7406c83e80943b23474 |
| | | |
| | | Loading models: |
| | | [ASR ][Done ][==================================================][100%][1.10MB/s][v1.2.1] |
| | | [VAD ][Done ][==================================================][100%][7.26MB/s][v1.2.0] |
| | | [PUNC][Done ][==================================================][100%][ 474kB/s][v1.1.7] |
| | | The service has been started. |
| | | If you want to see an example of how to use the client, you can run sudo bash funasr-runtime-deploy.sh -c . |
| | | ``` |
| | | |
| | | #### 启动已经部署过的FunASR服务 |
| | | 一键部署后若出现重启电脑等关闭Docker的动作,可通过如下命令直接启动FunASR服务,启动配置为上次一键部署的设置。 |
| | | |
| | | ```shell |
| | | sudo bash funasr-runtime-deploy.sh start |
| | | ``` |
| | | |
| | | #### 关闭FunASR服务 |
| | | |
| | | ```shell |
| | | sudo bash funasr-runtime-deploy.sh stop |
| | | ``` |
| | | |
| | | #### 重启FunASR服务 |
| | | |
| | | 根据上次一键部署的设置重启启动FunASR服务。 |
| | | ```shell |
| | | sudo bash funasr-runtime-deploy.sh restart |
| | | ``` |
| | | |
| | | #### 替换模型并重启FunASR服务 |
| | | |
| | | 替换正在使用的模型,并重新启动FunASR服务。模型需为ModelScope中的ASR/VAD/PUNC模型。 |
| | | |
| | | ```shell |
| | | sudo bash scripts/funasr-runtime-deploy.sh update model <model ID in ModelScope> |
| | | |
| | | e.g |
| | | sudo bash scripts/funasr-runtime-deploy.sh update model damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx |
| | | ``` |
| | | |
| | | ### 测试与使用离线文件转写服务 |
| | | |
| | | 在服务器上完成FunASR服务部署以后,可以通过如下的步骤来测试和使用离线文件转写服务。目前分别支持Python、C++、Java版本client的的命令行运行,以及可在浏览器可直接体验的html网页版本,更多语言client支持参考文档【FunASR高阶开发指南】。 |
| | | funasr-runtime-deploy.sh运行结束后,可通过命令以交互的形式自动下载测试样例samples到当前目录的funasr_samples中,并设置参数运行: |
| | | |
| | | ```shell |
| | | sudo bash funasr-runtime-deploy.sh client |
| | | ``` |
| | | |
| | | 可选择提供的Python和Linux C++范例程序,以Python范例为例: |
| | | |
| | | ```text |
| | | Will download sample tools for the client to show how speech recognition works. |
| | | Please select the client you want to run. |
| | | 1) Python |
| | | 2) Linux_Cpp |
| | | Enter your choice: 1 |
| | | |
| | | Please enter the IP of server, default(127.0.0.1): |
| | | Please enter the port of server, default(10095): |
| | | Please enter the audio path, default(/root/funasr_samples/audio/asr_example.wav): |
| | | |
| | | Run pip3 install click>=8.0.4 |
| | | Looking in indexes: http://mirrors.cloud.aliyuncs.com/pypi/simple/ |
| | | Requirement already satisfied: click>=8.0.4 in /usr/local/lib/python3.8/dist-packages (8.1.3) |
| | | |
| | | Run pip3 install -r /root/funasr_samples/python/requirements_client.txt |
| | | Looking in indexes: http://mirrors.cloud.aliyuncs.com/pypi/simple/ |
| | | Requirement already satisfied: websockets in /usr/local/lib/python3.8/dist-packages (from -r /root/funasr_samples/python/requirements_client.txt (line 1)) (11.0.3) |
| | | |
| | | Run python3 /root/funasr_samples/python/wss_client_asr.py --host 127.0.0.1 --port 10095 --mode offline --audio_in /root/funasr_samples/audio/asr_example.wav --send_without_sleep --output_dir ./funasr_samples/python |
| | | |
| | | ... |
| | | ... |
| | | |
| | | pid0_0: 欢迎大家来体验达摩院推出的语音识别模型。 |
| | | Exception: sent 1000 (OK); then received 1000 (OK) |
| | | end |
| | | |
| | | If failed, you can try (python3 /root/funasr_samples/python/wss_client_asr.py --host 127.0.0.1 --port 10095 --mode offline --audio_in /root/funasr_samples/audio/asr_example.wav --send_without_sleep --output_dir ./funasr_samples/python) in your Shell. |
| | | |
| | | ``` |
| | | |
| | | #### python-client |
| | | 若想直接运行client进行测试,可参考如下简易说明,以python版本为例: |
| | | |
| | | ```shell |
| | | python3 wss_client_asr.py --host "127.0.0.1" --port 10095 --mode offline --audio_in "../audio/asr_example.wav" --send_without_sleep --output_dir "./results" |
| | | ``` |
| | | |
| | | 命令参数说明: |
| | | ```text |
| | | --host 为FunASR runtime-SDK服务部署机器ip,默认为本机ip(127.0.0.1),如果client与服务不在同一台服务器,需要改为部署机器ip |
| | | --port 10095 部署端口号 |
| | | --mode offline表示离线文件转写 |
| | | --audio_in 需要进行转写的音频文件,支持文件路径,文件列表wav.scp |
| | | --output_dir 识别结果保存路径 |
| | | ``` |
| | | |
| | | #### cpp-client |
| | | |
| | | ```shell |
| | | export LD_LIBRARY_PATH=/root/funasr_samples/cpp/libs:$LD_LIBRARY_PATH |
| | | /root/funasr_samples/cpp/funasr-wss-client --server-ip 127.0.0.1 --port 10095 --wav-path /root/funasr_samples/audio/asr_example.wav |
| | | ``` |
| | | |
| | | 命令参数说明: |
| | | |
| | | ```text |
| | | --server-ip 为FunASR runtime-SDK服务部署机器ip,默认为本机ip(127.0.0.1),如果client与服务不在同一台服务器,需要改为部署机器ip |
| | | --port 10095 部署端口号 |
| | | --wav-path 需要进行转写的音频文件,支持文件路径 |
| | | ``` |
| | | |
| | | ### 视频demo |
| | | |
| | | [点击此处]() |
| | | |
| | | |
| | | |
| | | |
| | | |
| | | |
| | | |
| | | |
| | | |
| | | |
| | | |
| | | |
| | | |
| | | |
| | | |
| File was renamed from docs/runtime/aliyun_server_tutorial.md |
| | |
| | | # 云服务器申请教程 |
| | | |
| | | 我们以阿里云([点此链接](https://www.aliyun.com/))为例,演示如何申请云服务器 |
| | | |
| | | ## 服务器配置 |
| | | |
| | | 用户可以根据自己的业务需求,选择合适的服务器配置,推荐配置为: |
| | | - 配置一(高配):X86架构,32/64核8369CPU,内存8G以上; |
| | | - 配置二:X86架构,32/64核8132CPU,内存8G以上; |
| | | - 配置二:X86架构,32/64核8163CPU,内存8G以上; |
| | | |
| | | 详细性能测试报告:[点此链接](../../funasr/runtime/python/benchmark_onnx_cpp.md) |
| | | |
| | |
| | | ### 登陆个人账号 |
| | | 打开阿里云官网[点此链接](https://www.aliyun.com/),注册并登陆个人账号,如下图标号1所示 |
| | | |
| | | <img src="images/aliyun1.png" width="500"/> |
| | | <img src="images/aliyun1.png" width="900"/> |
| | | |
| | | ### 免费试用 |
| | | |
| | | 点击如上图所以标号2,出现如下界面 |
| | | |
| | | <img src="images/aliyun2.png" width="500"/> |
| | | <img src="images/aliyun2.png" width="900"/> |
| | | |
| | | 再点击标号3,出现如下界面 |
| | | |
| | | <img src="images/aliyun3.png" width="500"/> |
| | | <img src="images/aliyun3.png" width="900"/> |
| | | |
| | | ### 申请ECS实例 |
| | | |
| | | 个人账号可以免费试用1核2GB内存,每月750小时,企业认证后,可以免费试用2核8GB内存 3个月,根据账号情况,点击上图中标号4,出现如下界面: |
| | | |
| | | <img src="images/aliyun4.png" width="500"/> |
| | | <img src="images/aliyun4.png" width="900"/> |
| | | |
| | | 依次按照上图标号5、6、7选择后,点击立即试用,出现如下界面 |
| | | |
| | | <img src="images/aliyun5.png" width="500"/> |
| | | <img src="images/aliyun5.png" width="900"/> |
| | | |
| | | ### 开放服务端口 |
| | | |
| | | 点击安全组(标号9),出现如下界面 |
| | | |
| | | <img src="images/aliyun6.png" width="500"/> |
| | | <img src="images/aliyun6.png" width="900"/> |
| | | |
| | | 再点击标号10,出现如下界面 |
| | | |
| | | <img src="images/aliyun7.png" width="500"/> |
| | | <img src="images/aliyun7.png" width="900"/> |
| | | |
| | | 点击手动添加(标号11),分别按照标号12、13填入内容,后点击保存(标号14),再点击实例(标号15),出现如下界面 |
| | | |
| | | <img src="images/aliyun8.png" width="500"/> |
| | | <img src="images/aliyun8.png" width="900"/> |
| | | |
| | | ### 启动ECS示例 |
| | | |
| | | 点击示例名称(标号16),出现如下页面 |
| | | |
| | | <img src="images/aliyun9.png" width="500"/> |
| | | <img src="images/aliyun9.png" width="900"/> |
| | | |
| | | 点击远程启动(标号17),出现页面后,点击立即登陆,出现如下界面 |
| | | |
| | | <img src="images/aliyun10.png" width="500"/> |
| | | <img src="images/aliyun10.png" width="900"/> |
| | | |
| | | 首次登陆需要点击重置密码(上图中绿色箭头),设置好密码后,输入密码(标号18),点击确认(标号19) |
| | | |
| | | <img src="images/aliyun11.png" width="500"/> |
| | | <img src="images/aliyun11.png" width="900"/> |
| | | |
| | | 首次登陆会遇到上图所示问题,点击标号20,根据文档操作后,重新登陆,登陆成功后出现如下页面 |
| | | |
| | | <img src="images/aliyun12.png" width="500"/> |
| | | <img src="images/aliyun12.png" width="900"/> |
| | | |
| | | 上图表示已经成功申请了云服务器,后续可以根据FunASR runtime-SDK部署文档进行一键部署([点击此处]()) |
| | | |
| | |
| | | from typing import Any, Dict, Iterable, List, NamedTuple, Set, Tuple, Union |
| | | |
| | | import numpy as np |
| | | from typeguard import check_argument_types |
| | | import kaldi_native_fbank as knf |
| | | |
| | | root_dir = Path(__file__).resolve().parent |
| | |
| | | dither: float = 1.0, |
| | | **kwargs, |
| | | ) -> None: |
| | | check_argument_types() |
| | | |
| | | opts = knf.FbankOptions() |
| | | opts.frame_opts.samp_freq = fs |
| | |
| | | import numpy as np |
| | | import yaml |
| | | |
| | | from typeguard import check_argument_types |
| | | |
| | | import warnings |
| | | |
| | |
| | | class TokenIDConverter(): |
| | | def __init__(self, token_list: Union[List, str], |
| | | ): |
| | | check_argument_types() |
| | | |
| | | self.token_list = token_list |
| | | self.unk_symbol = token_list[-1] |
| | |
| | | space_symbol: str = "<space>", |
| | | remove_non_linguistic_symbols: bool = False, |
| | | ): |
| | | check_argument_types() |
| | | |
| | | self.space_symbol = space_symbol |
| | | self.non_linguistic_symbols = self.load_symbols(symbol_value) |
| | |
| | | import copy |
| | | |
| | | import numpy as np |
| | | from typeguard import check_argument_types |
| | | import kaldi_native_fbank as knf |
| | | |
| | | root_dir = Path(__file__).resolve().parent |
| | |
| | | dither: float = 1.0, |
| | | **kwargs, |
| | | ) -> None: |
| | | check_argument_types() |
| | | |
| | | opts = knf.FbankOptions() |
| | | opts.frame_opts.samp_freq = fs |
| | |
| | | import yaml |
| | | from onnxruntime import (GraphOptimizationLevel, InferenceSession, |
| | | SessionOptions, get_available_providers, get_device) |
| | | from typeguard import check_argument_types |
| | | |
| | | import warnings |
| | | |
| | |
| | | class TokenIDConverter(): |
| | | def __init__(self, token_list: Union[List, str], |
| | | ): |
| | | check_argument_types() |
| | | |
| | | self.token_list = token_list |
| | | self.unk_symbol = token_list[-1] |
| | |
| | | space_symbol: str = "<space>", |
| | | remove_non_linguistic_symbols: bool = False, |
| | | ): |
| | | check_argument_types() |
| | | |
| | | self.space_symbol = space_symbol |
| | | self.non_linguistic_symbols = self.load_symbols(symbol_value) |
| New file |
| | |
| | | # FunASR runtime-SDK |
| | | 中文文档([点击此处](./readme_cn.md)) |
| | | |
| | | FunASR is a speech recognition framework developed by the Speech Lab of DAMO Academy, which integrates industrial-level models in the fields of speech endpoint detection, speech recognition, punctuation segmentation, and more. |
| | | It has attracted many developers to participate in experiencing and developing. To solve the last mile of industrial landing and integrate models into business, we have developed the FunASR runtime-SDK. The SDK supports several service deployments, including: |
| | | |
| | | - File transcription service, Mandarin, CPU version, done |
| | | - File transcription service, Mandarin, GPU version, in progress |
| | | - File transcription service, Mandarin, in progress |
| | | - Streaming speech recognition service, is in progress |
| | | - and more. |
| | | |
| | | |
| | | ## File Transcription Service, Mandarin (CPU) |
| | | |
| | | Currently, the FunASR runtime-SDK-0.0.1 version supports the deployment of file transcription service, Mandarin (CPU version), with a complete speech recognition chain that can transcribe tens of hours of audio into punctuated text, and supports recognition for more than a hundred concurrent streams. |
| | | |
| | | To meet the needs of different users, we have prepared different tutorials with text and images for both novice and advanced developers. |
| | | |
| | | ### Technical Principles |
| | | |
| | | The technical principles and documentation behind FunASR explain the underlying technology, recognition accuracy, computational efficiency, and core advantages of the framework, including convenience, high precision, high efficiency, and support for long audio chains. For detailed information, please refer to the documentation available by [docs](). |
| | | |
| | | ### Deployment Tutorial |
| | | |
| | | The documentation mainly targets novice users who have no need for modifications or customization. It supports downloading model deployments from modelscope and also supports deploying models that users have fine-tuned. For detailed tutorials, please refer to [docs](). |
| | | |
| | | ### Advanced Development Guide |
| | | |
| | | The documentation mainly targets advanced developers who require modifications and customization of the service. It supports downloading model deployments from modelscope and also supports deploying models that users have fine-tuned. For detailed information, please refer to the documentation available by [docs]() |
| New file |
| | |
| | | # FunASR runtime-SDK |
| | | |
| | | English Version([docs](./readme.md)) |
| | | |
| | | FunASR是由达摩院语音实验室开源的一款语音识别基础框架,集成了语音端点检测、语音识别、标点断句等领域的工业级别模型,吸引了众多开发者参与体验和开发。为了解决工业落地的最后一公里,将模型集成到业务中去,我们开发了FunASR runtime-SDK。 |
| | | SDK 支持以下几种服务部署: |
| | | |
| | | - 中文离线文件转写服务(CPU版本),已完成 |
| | | - 中文离线文件转写服务(GPU版本),进行中 |
| | | - 英文离线转写服务,进行中 |
| | | - 流式语音识别服务,进行中 |
| | | - 。。。 |
| | | |
| | | |
| | | ## 中文离线文件转写服务部署(CPU版本) |
| | | |
| | | 目前FunASR runtime-SDK-0.0.1版本已支持中文语音离线文件服务部署(CPU版本),拥有完整的语音识别链路,可以将几十个小时的音频识别成带标点的文字,而且支持上百路并发同时进行识别。 |
| | | |
| | | 为了支持不同用户的需求,我们分别针对小白与高阶开发者,准备了不同的图文教程: |
| | | |
| | | ### 技术原理揭秘 |
| | | |
| | | 文档介绍了背后技术原理,识别准确率,计算效率等,以及核心优势介绍:便捷、高精度、高效率、长音频链路,详细文档参考([点击此处]()) |
| | | |
| | | ### 便捷部署教程 |
| | | |
| | | 文档主要针对小白用户,无修改定制需求,支持从modelscope中下载模型部署,也支持用户finetune后的模型部署,详细教程参考([点击此处](./docs/SDK_tutorial_cn.md)) |
| | | |
| | | ### 高阶开发指南 |
| | | |
| | | 文档主要针对高阶开发者,需要对服务进行修改与定制,支持从modelscope中下载模型部署,也支持用户finetune后的模型部署,详细文档参考([点击此处](./docs/SDK_advanced_guide_cn.md)) |
| | |
| | | lfr_n: int = 6, |
| | | dither: float = 1.0 |
| | | ) -> None: |
| | | # check_argument_types() |
| | | |
| | | self.fs = fs |
| | | self.window = window |
| | |
| | | from typing import Tuple |
| | | from typing import Union |
| | | |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.samplers.abs_sampler import AbsSampler |
| | | from funasr.samplers.folded_batch_sampler import FoldedBatchSampler |
| | |
| | | padding: Whether sequences are input as a padded tensor or not. |
| | | used for "numel" mode |
| | | """ |
| | | assert check_argument_types() |
| | | if len(shape_files) == 0: |
| | | raise ValueError("No shape file are given") |
| | | |
| | |
| | | |
| | | else: |
| | | raise ValueError(f"Not supported: {type}") |
| | | assert check_return_type(retval) |
| | | return retval |
| | |
| | | from typing import Tuple |
| | | from typing import Union |
| | | |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.fileio.read_text import load_num_sequence_text |
| | | from funasr.fileio.read_text import read_2column_text |
| | |
| | | drop_last: bool = False, |
| | | utt2category_file: str = None, |
| | | ): |
| | | assert check_argument_types() |
| | | assert batch_size > 0 |
| | | if sort_batch != "ascending" and sort_batch != "descending": |
| | | raise ValueError( |
| | |
| | | from typing import Tuple |
| | | from typing import Union |
| | | |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.fileio.read_text import load_num_sequence_text |
| | | from funasr.samplers.abs_sampler import AbsSampler |
| | |
| | | drop_last: bool = False, |
| | | padding: bool = True, |
| | | ): |
| | | assert check_argument_types() |
| | | assert batch_bins > 0 |
| | | if sort_batch != "ascending" and sort_batch != "descending": |
| | | raise ValueError( |
| | |
| | | from typing import Union |
| | | |
| | | import numpy as np |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.fileio.read_text import load_num_sequence_text |
| | | from funasr.samplers.abs_sampler import AbsSampler |
| | |
| | | drop_last: bool = False, |
| | | padding: bool = True, |
| | | ): |
| | | assert check_argument_types() |
| | | assert batch_bins > 0 |
| | | if sort_batch != "ascending" and sort_batch != "descending": |
| | | raise ValueError( |
| | |
| | | from typing import Iterator |
| | | from typing import Tuple |
| | | |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.fileio.read_text import load_num_sequence_text |
| | | from funasr.samplers.abs_sampler import AbsSampler |
| | |
| | | sort_batch: str = "ascending", |
| | | drop_last: bool = False, |
| | | ): |
| | | assert check_argument_types() |
| | | assert batch_size > 0 |
| | | self.batch_size = batch_size |
| | | self.shape_file = shape_file |
| | |
| | | from typing import Iterator |
| | | from typing import Tuple |
| | | |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.fileio.read_text import read_2column_text |
| | | from funasr.samplers.abs_sampler import AbsSampler |
| | |
| | | drop_last: bool = False, |
| | | utt2category_file: str = None, |
| | | ): |
| | | assert check_argument_types() |
| | | assert batch_size > 0 |
| | | self.batch_size = batch_size |
| | | self.key_file = key_file |
| | |
| | | |
| | | import torch |
| | | from torch.optim.lr_scheduler import _LRScheduler |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.schedulers.abs_scheduler import AbsBatchStepScheduler |
| | | |
| | |
| | | warmup_steps: Union[int, float] = 25000, |
| | | last_epoch: int = -1, |
| | | ): |
| | | assert check_argument_types() |
| | | self.model_size = model_size |
| | | self.warmup_steps = warmup_steps |
| | | |
| | |
| | | |
| | | import torch |
| | | from torch.optim.lr_scheduler import _LRScheduler |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.schedulers.abs_scheduler import AbsBatchStepScheduler |
| | | |
| | |
| | | init_lr_scale: float = 0.01, |
| | | final_lr_scale: float = 0.01, |
| | | ): |
| | | assert check_argument_types() |
| | | self.optimizer = optimizer |
| | | self.last_epoch = last_epoch |
| | | self.phase_ratio = phase_ratio |
| | |
| | | |
| | | import torch |
| | | from torch.optim.lr_scheduler import _LRScheduler |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.schedulers.abs_scheduler import AbsBatchStepScheduler |
| | | |
| | |
| | | warmup_steps: Union[int, float] = 25000, |
| | | last_epoch: int = -1, |
| | | ): |
| | | assert check_argument_types() |
| | | self.warmup_steps = warmup_steps |
| | | |
| | | # __init__() must be invoked before setting field |
| | |
| | | import yaml |
| | | from funasr.models.base_model import FunASRModel |
| | | from torch.utils.data import DataLoader |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr import __version__ |
| | | from funasr.datasets.dataset import AbsDataset |
| | |
| | | |
| | | @classmethod |
| | | def get_parser(cls) -> config_argparse.ArgumentParser: |
| | | assert check_argument_types() |
| | | |
| | | class ArgumentDefaultsRawTextHelpFormatter( |
| | | argparse.RawTextHelpFormatter, |
| | |
| | | cls.trainer.add_arguments(parser) |
| | | cls.add_task_arguments(parser) |
| | | |
| | | assert check_return_type(parser) |
| | | return parser |
| | | |
| | | @classmethod |
| | |
| | | return _cls |
| | | |
| | | # This method is used only for --print_config |
| | | assert check_argument_types() |
| | | parser = cls.get_parser() |
| | | args, _ = parser.parse_known_args() |
| | | config = vars(args) |
| | |
| | | |
| | | @classmethod |
| | | def check_required_command_args(cls, args: argparse.Namespace): |
| | | assert check_argument_types() |
| | | if hasattr(args, "required"): |
| | | for k in vars(args): |
| | | if "-" in k: |
| | |
| | | inference: bool = False, |
| | | ) -> None: |
| | | """Check if the dataset satisfy the requirement of current Task""" |
| | | assert check_argument_types() |
| | | mes = ( |
| | | f"If you intend to use an additional input, modify " |
| | | f'"{cls.__name__}.required_data_names()" or ' |
| | |
| | | |
| | | @classmethod |
| | | def print_config(cls, file=sys.stdout) -> None: |
| | | assert check_argument_types() |
| | | # Shows the config: e.g. python train.py asr --print_config |
| | | config = cls.get_default_config() |
| | | file.write(yaml_no_alias_safe_dump(config, indent=4, sort_keys=False)) |
| | | |
| | | @classmethod |
| | | def main(cls, args: argparse.Namespace = None, cmd: Sequence[str] = None): |
| | | assert check_argument_types() |
| | | print(get_commandline_args(), file=sys.stderr) |
| | | if args is None: |
| | | parser = cls.get_parser() |
| | |
| | | |
| | | @classmethod |
| | | def main_worker(cls, args: argparse.Namespace): |
| | | assert check_argument_types() |
| | | |
| | | # 0. Init distributed process |
| | | distributed_option = build_dataclass(DistributedOption, args) |
| | |
| | | - 4 epoch with "--num_iters_per_epoch" == 4 |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | iter_options = cls.build_iter_options(args, distributed_option, mode) |
| | | |
| | | # Overwrite iter_options if any kwargs is given |
| | |
| | | def build_sequence_iter_factory( |
| | | cls, args: argparse.Namespace, iter_options: IteratorOptions, mode: str |
| | | ) -> AbsIterFactory: |
| | | assert check_argument_types() |
| | | |
| | | if hasattr(args, "frontend_conf"): |
| | | if args.frontend_conf is not None and "fs" in args.frontend_conf: |
| | |
| | | iter_options: IteratorOptions, |
| | | mode: str, |
| | | ) -> AbsIterFactory: |
| | | assert check_argument_types() |
| | | |
| | | dataset = ESPnetDataset( |
| | | iter_options.data_path_and_name_and_type, |
| | |
| | | def build_multiple_iter_factory( |
| | | cls, args: argparse.Namespace, distributed_option: DistributedOption, mode: str |
| | | ): |
| | | assert check_argument_types() |
| | | iter_options = cls.build_iter_options(args, distributed_option, mode) |
| | | assert len(iter_options.data_path_and_name_and_type) > 0, len( |
| | | iter_options.data_path_and_name_and_type |
| | |
| | | inference: bool = False, |
| | | ) -> DataLoader: |
| | | """Build DataLoader using iterable dataset""" |
| | | assert check_argument_types() |
| | | # For backward compatibility for pytorch DataLoader |
| | | if collate_fn is not None: |
| | | kwargs = dict(collate_fn=collate_fn) |
| | |
| | | device: Device type, "cpu", "cuda", or "cuda:N". |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | if config_file is None: |
| | | assert model_file is not None, ( |
| | | "The argument 'model_file' must be provided " |
| | |
| | | import numpy as np |
| | | import torch |
| | | import yaml |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.datasets.collate_fn import CommonCollateFn |
| | | from funasr.datasets.preprocessor import CommonPreprocessor |
| | |
| | | [Collection[Tuple[str, Dict[str, np.ndarray]]]], |
| | | Tuple[List[str], Dict[str, torch.Tensor]], |
| | | ]: |
| | | assert check_argument_types() |
| | | # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol |
| | | return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1) |
| | | |
| | |
| | | def build_preprocess_fn( |
| | | cls, args: argparse.Namespace, train: bool |
| | | ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]: |
| | | assert check_argument_types() |
| | | if args.use_preprocessor: |
| | | retval = CommonPreprocessor( |
| | | train=train, |
| | |
| | | ) |
| | | else: |
| | | retval = None |
| | | assert check_return_type(retval) |
| | | return retval |
| | | |
| | | @classmethod |
| | |
| | | cls, train: bool = True, inference: bool = False |
| | | ) -> Tuple[str, ...]: |
| | | retval = () |
| | | assert check_return_type(retval) |
| | | return retval |
| | | |
| | | @classmethod |
| | | def build_model(cls, args: argparse.Namespace): |
| | | assert check_argument_types() |
| | | if isinstance(args.token_list, str): |
| | | with open(args.token_list, encoding="utf-8") as f: |
| | | token_list = [line.rstrip() for line in f] |
| | |
| | | if args.init is not None: |
| | | initialize(model, args.init) |
| | | |
| | | assert check_return_type(model) |
| | | return model |
| | | |
| | | |
| | |
| | | |
| | | @classmethod |
| | | def build_model(cls, args: argparse.Namespace): |
| | | assert check_argument_types() |
| | | if isinstance(args.token_list, str): |
| | | with open(args.token_list, encoding="utf-8") as f: |
| | | token_list = [line.rstrip() for line in f] |
| | |
| | | if args.init is not None: |
| | | initialize(model, args.init) |
| | | |
| | | assert check_return_type(model) |
| | | return model |
| | | |
| | | # ~~~~~~~~~ The methods below are mainly used for inference ~~~~~~~~~ |
| | |
| | | device: Device type, "cpu", "cuda", or "cuda:N". |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | if config_file is None: |
| | | assert model_file is not None, ( |
| | | "The argument 'model_file' must be provided " |
| | |
| | | |
| | | @classmethod |
| | | def build_model(cls, args: argparse.Namespace): |
| | | assert check_argument_types() |
| | | if isinstance(args.token_list, str): |
| | | with open(args.token_list, encoding="utf-8") as f: |
| | | token_list = [line.rstrip() for line in f] |
| | |
| | | if args.init is not None: |
| | | initialize(model, args.init) |
| | | |
| | | assert check_return_type(model) |
| | | return model |
| | | |
| | | # ~~~~~~~~~ The methods below are mainly used for inference ~~~~~~~~~ |
| | |
| | | device: Device type, "cpu", "cuda", or "cuda:N". |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | if config_file is None: |
| | | assert model_file is not None, ( |
| | | "The argument 'model_file' must be provided " |
| | |
| | | |
| | | @classmethod |
| | | def build_model(cls, args: argparse.Namespace): |
| | | assert check_argument_types() |
| | | if isinstance(args.token_list, str): |
| | | with open(args.token_list, encoding="utf-8") as f: |
| | | token_list = [line.rstrip() for line in f] |
| | |
| | | if args.init is not None: |
| | | initialize(model, args.init) |
| | | |
| | | assert check_return_type(model) |
| | | return model |
| | | |
| | | |
| | |
| | | |
| | | @classmethod |
| | | def build_model(cls, args: argparse.Namespace): |
| | | assert check_argument_types() |
| | | if isinstance(args.token_list, str): |
| | | with open(args.token_list, encoding="utf-8") as f: |
| | | token_list = [line.rstrip() for line in f] |
| | |
| | | if args.init is not None: |
| | | initialize(model, args.init) |
| | | |
| | | assert check_return_type(model) |
| | | return model |
| | | |
| | | @classmethod |
| | |
| | | Return: |
| | | model: ASR Transducer model. |
| | | """ |
| | | assert check_argument_types() |
| | | |
| | | if isinstance(args.token_list, str): |
| | | with open(args.token_list, encoding="utf-8") as f: |
| | |
| | | "Initialization part will be reworked in a short future.", |
| | | ) |
| | | |
| | | #assert check_return_type(model) |
| | | |
| | | return model |
| | | |
| | |
| | | |
| | | @classmethod |
| | | def build_model(cls, args: argparse.Namespace): |
| | | assert check_argument_types() |
| | | if isinstance(args.token_list, str): |
| | | with open(args.token_list, encoding="utf-8") as f: |
| | | token_list = [line.rstrip() for line in f] |
| | |
| | | if args.init is not None: |
| | | initialize(model, args.init) |
| | | |
| | | assert check_return_type(model) |
| | | return model |
| | |
| | | |
| | | import numpy as np |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.datasets.collate_fn import CommonCollateFn |
| | | from funasr.datasets.preprocessor import CommonPreprocessor |
| | |
| | | [Collection[Tuple[str, Dict[str, np.ndarray]]]], |
| | | Tuple[List[str], Dict[str, torch.Tensor]], |
| | | ]: |
| | | assert check_argument_types() |
| | | return CommonCollateFn(clipping=True) |
| | | |
| | | @classmethod |
| | | def build_preprocess_fn( |
| | | cls, args: argparse.Namespace, train: bool |
| | | ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]: |
| | | assert check_argument_types() |
| | | if args.use_preprocessor: |
| | | retval = CommonPreprocessor( |
| | | train=train, |
| | |
| | | ) |
| | | else: |
| | | retval = None |
| | | assert check_return_type(retval) |
| | | return retval |
| | | |
| | | @classmethod |
| | |
| | | cls, train: bool = True, inference: bool = False |
| | | ) -> Tuple[str, ...]: |
| | | retval = () |
| | | assert check_return_type(retval) |
| | | return retval |
| | | |
| | | @classmethod |
| | | def build_model(cls, args: argparse.Namespace): |
| | | assert check_argument_types() |
| | | |
| | | # 1. frontend |
| | | if args.input_size is None: |
| | |
| | | if args.init is not None: |
| | | initialize(model, args.init) |
| | | |
| | | assert check_return_type(model) |
| | | return model |
| | |
| | | import numpy as np |
| | | import torch |
| | | import yaml |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.datasets.collate_fn import CommonCollateFn |
| | | from funasr.datasets.preprocessor import CommonPreprocessor |
| | |
| | | [Collection[Tuple[str, Dict[str, np.ndarray]]]], |
| | | Tuple[List[str], Dict[str, torch.Tensor]], |
| | | ]: |
| | | assert check_argument_types() |
| | | # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol |
| | | return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1) |
| | | |
| | |
| | | def build_preprocess_fn( |
| | | cls, args: argparse.Namespace, train: bool |
| | | ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]: |
| | | assert check_argument_types() |
| | | if args.use_preprocessor: |
| | | retval = CommonPreprocessor( |
| | | train=train, |
| | |
| | | ) |
| | | else: |
| | | retval = None |
| | | assert check_return_type(retval) |
| | | return retval |
| | | |
| | | @classmethod |
| | |
| | | cls, train: bool = True, inference: bool = False |
| | | ) -> Tuple[str, ...]: |
| | | retval = () |
| | | assert check_return_type(retval) |
| | | return retval |
| | | |
| | | @classmethod |
| | | def build_model(cls, args: argparse.Namespace): |
| | | assert check_argument_types() |
| | | if isinstance(args.token_list, str): |
| | | with open(args.token_list, encoding="utf-8") as f: |
| | | token_list = [line.rstrip() for line in f] |
| | |
| | | if args.init is not None: |
| | | initialize(model, args.init) |
| | | |
| | | assert check_return_type(model) |
| | | return model |
| | | |
| | | # ~~~~~~~~~ The methods below are mainly used for inference ~~~~~~~~~ |
| | |
| | | device: Device type, "cpu", "cuda", or "cuda:N". |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | if config_file is None: |
| | | assert model_file is not None, ( |
| | | "The argument 'model_file' must be provided " |
| | |
| | | [Collection[Tuple[str, Dict[str, np.ndarray]]]], |
| | | Tuple[List[str], Dict[str, torch.Tensor]], |
| | | ]: |
| | | assert check_argument_types() |
| | | # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol |
| | | return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1) |
| | | |
| | |
| | | def build_preprocess_fn( |
| | | cls, args: argparse.Namespace, train: bool |
| | | ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]: |
| | | assert check_argument_types() |
| | | # if args.use_preprocessor: |
| | | # retval = CommonPreprocessor( |
| | | # train=train, |
| | |
| | | # ) |
| | | # else: |
| | | # retval = None |
| | | # assert check_return_type(retval) |
| | | return None |
| | | |
| | | @classmethod |
| | |
| | | cls, train: bool = True, inference: bool = False |
| | | ) -> Tuple[str, ...]: |
| | | retval = () |
| | | assert check_return_type(retval) |
| | | return retval |
| | | |
| | | @classmethod |
| | | def build_model(cls, args: argparse.Namespace): |
| | | assert check_argument_types() |
| | | |
| | | # 1. frontend |
| | | if args.input_size is None or args.frontend == "wav_frontend_mel23": |
| | |
| | | if args.init is not None: |
| | | initialize(model, args.init) |
| | | |
| | | assert check_return_type(model) |
| | | return model |
| | | |
| | | # ~~~~~~~~~ The methods below are mainly used for inference ~~~~~~~~~ |
| | |
| | | device: Device type, "cpu", "cuda", or "cuda:N". |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | if config_file is None: |
| | | assert model_file is not None, ( |
| | | "The argument 'model_file' must be provided " |
| | |
| | | |
| | | import numpy as np |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.datasets.collate_fn import CommonCollateFn |
| | | from funasr.datasets.preprocessor import CommonPreprocessor |
| | |
| | | @classmethod |
| | | def add_task_arguments(cls, parser: argparse.ArgumentParser): |
| | | # NOTE(kamo): Use '_' instead of '-' to avoid confusion |
| | | assert check_argument_types() |
| | | group = parser.add_argument_group(description="Task related") |
| | | |
| | | # NOTE(kamo): add_arguments(..., required=True) can't be used |
| | |
| | | for class_choices in cls.class_choices_list: |
| | | class_choices.add_arguments(group) |
| | | |
| | | assert check_return_type(parser) |
| | | return parser |
| | | |
| | | @classmethod |
| | |
| | | [Collection[Tuple[str, Dict[str, np.ndarray]]]], |
| | | Tuple[List[str], Dict[str, torch.Tensor]], |
| | | ]: |
| | | assert check_argument_types() |
| | | return CommonCollateFn(int_pad_value=0) |
| | | |
| | | @classmethod |
| | | def build_preprocess_fn( |
| | | cls, args: argparse.Namespace, train: bool |
| | | ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]: |
| | | assert check_argument_types() |
| | | if args.use_preprocessor: |
| | | retval = CommonPreprocessor( |
| | | train=train, |
| | |
| | | ) |
| | | else: |
| | | retval = None |
| | | assert check_return_type(retval) |
| | | return retval |
| | | |
| | | @classmethod |
| | |
| | | |
| | | @classmethod |
| | | def build_model(cls, args: argparse.Namespace) -> LanguageModel: |
| | | assert check_argument_types() |
| | | if isinstance(args.token_list, str): |
| | | with open(args.token_list, encoding="utf-8") as f: |
| | | token_list = [line.rstrip() for line in f] |
| | |
| | | |
| | | import numpy as np |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.datasets.collate_fn import CommonCollateFn |
| | | from funasr.datasets.preprocessor import PuncTrainTokenizerCommonPreprocessor |
| | |
| | | @classmethod |
| | | def add_task_arguments(cls, parser: argparse.ArgumentParser): |
| | | # NOTE(kamo): Use '_' instead of '-' to avoid confusion |
| | | assert check_argument_types() |
| | | group = parser.add_argument_group(description="Task related") |
| | | |
| | | # NOTE(kamo): add_arguments(..., required=True) can't be used |
| | |
| | | # e.g. --encoder and --encoder_conf |
| | | class_choices.add_arguments(group) |
| | | |
| | | assert check_return_type(parser) |
| | | return parser |
| | | |
| | | @classmethod |
| | |
| | | [Collection[Tuple[str, Dict[str, np.ndarray]]]], |
| | | Tuple[List[str], Dict[str, torch.Tensor]], |
| | | ]: |
| | | assert check_argument_types() |
| | | return CommonCollateFn(int_pad_value=0) |
| | | |
| | | @classmethod |
| | | def build_preprocess_fn( |
| | | cls, args: argparse.Namespace, train: bool |
| | | ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]: |
| | | assert check_argument_types() |
| | | token_types = [args.token_type, args.token_type] |
| | | token_lists = [args.token_list, args.punc_list] |
| | | bpemodels = [args.bpemodel, args.bpemodel] |
| | |
| | | ) |
| | | else: |
| | | retval = None |
| | | assert check_return_type(retval) |
| | | return retval |
| | | |
| | | @classmethod |
| | |
| | | |
| | | @classmethod |
| | | def build_model(cls, args: argparse.Namespace) -> PunctuationModel: |
| | | assert check_argument_types() |
| | | if isinstance(args.token_list, str): |
| | | with open(args.token_list, encoding="utf-8") as f: |
| | | token_list = [line.rstrip() for line in f] |
| | |
| | | if args.init is not None: |
| | | initialize(model, args.init) |
| | | |
| | | assert check_return_type(model) |
| | | return model |
| | |
| | | import numpy as np |
| | | import torch |
| | | import yaml |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.datasets.collate_fn import CommonCollateFn |
| | | from funasr.datasets.preprocessor import CommonPreprocessor |
| | |
| | | [Collection[Tuple[str, Dict[str, np.ndarray]]]], |
| | | Tuple[List[str], Dict[str, torch.Tensor]], |
| | | ]: |
| | | assert check_argument_types() |
| | | # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol |
| | | return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1) |
| | | |
| | |
| | | def build_preprocess_fn( |
| | | cls, args: argparse.Namespace, train: bool |
| | | ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]: |
| | | assert check_argument_types() |
| | | if args.use_preprocessor: |
| | | retval = CommonPreprocessor( |
| | | train=train, |
| | |
| | | ) |
| | | else: |
| | | retval = None |
| | | assert check_return_type(retval) |
| | | return retval |
| | | |
| | | @classmethod |
| | |
| | | cls, train: bool = True, inference: bool = False |
| | | ) -> Tuple[str, ...]: |
| | | retval = () |
| | | assert check_return_type(retval) |
| | | return retval |
| | | |
| | | @classmethod |
| | | def build_model(cls, args: argparse.Namespace): |
| | | assert check_argument_types() |
| | | if isinstance(args.token_list, str): |
| | | with open(args.token_list, encoding="utf-8") as f: |
| | | token_list = [line.rstrip() for line in f] |
| | |
| | | if args.init is not None: |
| | | initialize(model, args.init) |
| | | |
| | | assert check_return_type(model) |
| | | return model |
| | |
| | | import numpy as np |
| | | import torch |
| | | import yaml |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.datasets.collate_fn import CommonCollateFn |
| | | from funasr.datasets.preprocessor import CommonPreprocessor |
| | |
| | | [Collection[Tuple[str, Dict[str, np.ndarray]]]], |
| | | Tuple[List[str], Dict[str, torch.Tensor]], |
| | | ]: |
| | | assert check_argument_types() |
| | | # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol |
| | | return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1) |
| | | |
| | |
| | | def build_preprocess_fn( |
| | | cls, args: argparse.Namespace, train: bool |
| | | ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]: |
| | | assert check_argument_types() |
| | | if args.use_preprocessor: |
| | | retval = CommonPreprocessor( |
| | | train=train, |
| | |
| | | ) |
| | | else: |
| | | retval = None |
| | | assert check_return_type(retval) |
| | | return retval |
| | | |
| | | @classmethod |
| | |
| | | retval = () |
| | | if inference: |
| | | retval = ("ref_speech",) |
| | | assert check_return_type(retval) |
| | | return retval |
| | | |
| | | @classmethod |
| | | def build_model(cls, args: argparse.Namespace) -> ESPnetSVModel: |
| | | assert check_argument_types() |
| | | if isinstance(args.token_list, str): |
| | | with open(args.token_list, encoding="utf-8") as f: |
| | | token_list = [line.rstrip() for line in f] |
| | |
| | | if args.init is not None: |
| | | initialize(model, args.init) |
| | | |
| | | assert check_return_type(model) |
| | | return model |
| | | |
| | | # ~~~~~~~~~ The methods below are mainly used for inference ~~~~~~~~~ |
| | |
| | | device: Device type, "cpu", "cuda", or "cuda:N". |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | if config_file is None: |
| | | assert model_file is not None, ( |
| | | "The argument 'model_file' must be provided " |
| | |
| | | import numpy as np |
| | | import torch |
| | | import yaml |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.datasets.collate_fn import CommonCollateFn |
| | | from funasr.layers.abs_normalize import AbsNormalize |
| | |
| | | [Collection[Tuple[str, Dict[str, np.ndarray]]]], |
| | | Tuple[List[str], Dict[str, torch.Tensor]], |
| | | ]: |
| | | assert check_argument_types() |
| | | # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol |
| | | return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1) |
| | | |
| | |
| | | def build_preprocess_fn( |
| | | cls, args: argparse.Namespace, train: bool |
| | | ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]: |
| | | assert check_argument_types() |
| | | # if args.use_preprocessor: |
| | | # retval = CommonPreprocessor( |
| | | # train=train, |
| | |
| | | # else: |
| | | # retval = None |
| | | retval = None |
| | | assert check_return_type(retval) |
| | | return retval |
| | | |
| | | @classmethod |
| | |
| | | cls, train: bool = True, inference: bool = False |
| | | ) -> Tuple[str, ...]: |
| | | retval = () |
| | | assert check_return_type(retval) |
| | | return retval |
| | | |
| | | @classmethod |
| | | def build_model(cls, args: argparse.Namespace): |
| | | assert check_argument_types() |
| | | # 4. Encoder |
| | | encoder_class = encoder_choices.get_class(args.encoder) |
| | | encoder = encoder_class(**args.encoder_conf) |
| | |
| | | device: Device type, "cpu", "cuda", or "cuda:N". |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | if config_file is None: |
| | | assert model_file is not None, ( |
| | | "The argument 'model_file' must be provided " |
| | |
| | | from typing import Iterable |
| | | from typing import Union |
| | | |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.text.abs_tokenizer import AbsTokenizer |
| | | from funasr.text.char_tokenizer import CharTokenizer |
| | |
| | | g2p_type: str = None, |
| | | ) -> AbsTokenizer: |
| | | """A helper function to instantiate Tokenizer""" |
| | | assert check_argument_types() |
| | | if token_type == "bpe": |
| | | if bpemodel is None: |
| | | raise ValueError('bpemodel is required if token_type = "bpe"') |
| | |
| | | from typing import Union |
| | | import warnings |
| | | |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.text.abs_tokenizer import AbsTokenizer |
| | | |
| | |
| | | space_symbol: str = "<space>", |
| | | remove_non_linguistic_symbols: bool = False, |
| | | ): |
| | | assert check_argument_types() |
| | | self.space_symbol = space_symbol |
| | | if non_linguistic_symbols is None: |
| | | self.non_linguistic_symbols = set() |
| | |
| | | |
| | | from jaconv import jaconv |
| | | import tacotron_cleaner.cleaners |
| | | from typeguard import check_argument_types |
| | | |
| | | try: |
| | | from vietnamese_cleaner import vietnamese_cleaners |
| | |
| | | """ |
| | | |
| | | def __init__(self, cleaner_types: Collection[str] = None): |
| | | assert check_argument_types() |
| | | |
| | | if cleaner_types is None: |
| | | self.cleaner_types = [] |
| | |
| | | |
| | | # import g2p_en |
| | | import jamo |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.text.abs_tokenizer import AbsTokenizer |
| | | |
| | |
| | | space_symbol: str = "<space>", |
| | | remove_non_linguistic_symbols: bool = False, |
| | | ): |
| | | assert check_argument_types() |
| | | if g2p_type is None: |
| | | self.g2p = split_by_space |
| | | elif g2p_type == "g2p_en": |
| | |
| | | from typing import Union |
| | | |
| | | import sentencepiece as spm |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.text.abs_tokenizer import AbsTokenizer |
| | | |
| | | |
| | | class SentencepiecesTokenizer(AbsTokenizer): |
| | | def __init__(self, model: Union[Path, str]): |
| | | assert check_argument_types() |
| | | self.model = str(model) |
| | | # NOTE(kamo): |
| | | # Don't build SentencePieceProcessor in __init__() |
| | |
| | | from typing import Union |
| | | |
| | | import numpy as np |
| | | from typeguard import check_argument_types |
| | | |
| | | |
| | | class TokenIDConverter: |
| | |
| | | token_list: Union[Path, str, Iterable[str]], |
| | | unk_symbol: str = "<unk>", |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | if isinstance(token_list, (Path, str)): |
| | | token_list = Path(token_list) |
| | |
| | | from typing import Union |
| | | import warnings |
| | | |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.text.abs_tokenizer import AbsTokenizer |
| | | |
| | |
| | | non_linguistic_symbols: Union[Path, str, Iterable[str]] = None, |
| | | remove_non_linguistic_symbols: bool = False, |
| | | ): |
| | | assert check_argument_types() |
| | | self.delimiter = delimiter |
| | | |
| | | if not remove_non_linguistic_symbols and non_linguistic_symbols is not None: |
| | |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | |
| | | class ForwardAdaptor(torch.nn.Module): |
| | |
| | | """ |
| | | |
| | | def __init__(self, module: torch.nn.Module, name: str): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self.module = module |
| | | self.name = name |
| | |
| | | |
| | | import math |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | |
| | | def initialize(model: torch.nn.Module, init: str): |
| | |
| | | model: Target. |
| | | init: Method of initialization. |
| | | """ |
| | | assert check_argument_types() |
| | | |
| | | if init == "chainer": |
| | | # 1. lecun_normal_init_parameters |
| | |
| | | |
| | | import torch |
| | | import torch.nn.functional as F |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.modules.nets_utils import make_pad_mask |
| | | from funasr.torch_utils.device_funcs import force_gatherable |
| | |
| | | |
| | | class LanguageModel(FunASRModel): |
| | | def __init__(self, lm: AbsLM, vocab_size: int, ignore_id: int = 0): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self.lm = lm |
| | | self.sos = 1 |
| | |
| | | class PunctuationModel(FunASRModel): |
| | | |
| | | def __init__(self, punc_model: torch.nn.Module, vocab_size: int, ignore_id: int = 0, punc_weight: list = None): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self.punc_model = punc_model |
| | | self.punc_weight = torch.Tensor(punc_weight) |
| | |
| | | from typing import Optional |
| | | from typing import Tuple |
| | | |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.utils.nested_dict_action import NestedDictAction |
| | | from funasr.utils.types import str_or_none |
| | |
| | | default: str = None, |
| | | optional: bool = False, |
| | | ): |
| | | assert check_argument_types() |
| | | self.name = name |
| | | self.base_type = type_check |
| | | self.classes = {k.lower(): v for k, v in classes.items()} |
| | |
| | | return retval |
| | | |
| | | def get_class(self, name: Optional[str]) -> Optional[type]: |
| | | assert check_argument_types() |
| | | if name is None or (self.optional and name.lower() == ("none", "null", "nil")): |
| | | retval = None |
| | | elif name.lower() in self.classes: |
| | | class_obj = self.classes[name] |
| | | assert check_return_type(class_obj) |
| | | retval = class_obj |
| | | else: |
| | | raise ValueError( |
| | |
| | | import humanfriendly |
| | | import numpy as np |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | Num = Union[float, int, complex, torch.Tensor, np.ndarray] |
| | | |
| | |
| | | |
| | | |
| | | def to_reported_value(v: Num, weight: Num = None) -> "ReportedValue": |
| | | assert check_argument_types() |
| | | if isinstance(v, (torch.Tensor, np.ndarray)): |
| | | if np.prod(v.shape) != 1: |
| | | raise ValueError(f"v must be 0 or 1 dimension: {len(v.shape)}") |
| | |
| | | retval = WeightedAverage(v, weight) |
| | | else: |
| | | retval = Average(v) |
| | | assert check_return_type(retval) |
| | | return retval |
| | | |
| | | |
| | | def aggregate(values: Sequence["ReportedValue"]) -> Num: |
| | | assert check_argument_types() |
| | | |
| | | for v in values: |
| | | if not isinstance(v, type(values[0])): |
| | |
| | | |
| | | else: |
| | | raise NotImplementedError(f"type={type(values[0])}") |
| | | assert check_return_type(retval) |
| | | return retval |
| | | |
| | | |
| | |
| | | """ |
| | | |
| | | def __init__(self, key: str, epoch: int, total_count: int): |
| | | assert check_argument_types() |
| | | self.key = key |
| | | self.epoch = epoch |
| | | self.start_time = time.perf_counter() |
| | |
| | | stats: Dict[str, Optional[Union[Num, Dict[str, Num]]]], |
| | | weight: Num = None, |
| | | ) -> None: |
| | | assert check_argument_types() |
| | | if self._finished: |
| | | raise RuntimeError("Already finished") |
| | | if len(self._seen_keys_in_the_step) == 0: |
| | |
| | | """ |
| | | |
| | | def __init__(self, epoch: int = 0): |
| | | assert check_argument_types() |
| | | if epoch < 0: |
| | | raise ValueError(f"epoch must be 0 or more: {epoch}") |
| | | self.epoch = epoch |
| | |
| | | import torch |
| | | import torch.nn |
| | | import torch.optim |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.iterators.abs_iter_factory import AbsIterFactory |
| | | from funasr.main_funcs.average_nbest_models import average_nbest_models |
| | |
| | | @classmethod |
| | | def build_options(cls, args: argparse.Namespace) -> TrainerOptions: |
| | | """Build options consumed by train(), eval()""" |
| | | assert check_argument_types() |
| | | return build_dataclass(TrainerOptions, args) |
| | | |
| | | @classmethod |
| | |
| | | distributed_option: DistributedOption, |
| | | ) -> None: |
| | | """Perform training. This method performs the main process of training.""" |
| | | assert check_argument_types() |
| | | # NOTE(kamo): Don't check the type more strictly as far trainer_options |
| | | assert is_dataclass(trainer_options), type(trainer_options) |
| | | assert len(optimizers) == len(schedulers), (len(optimizers), len(schedulers)) |
| | |
| | | options: TrainerOptions, |
| | | distributed_option: DistributedOption, |
| | | ) -> Tuple[bool, bool]: |
| | | assert check_argument_types() |
| | | |
| | | grad_noise = options.grad_noise |
| | | accum_grad = options.accum_grad |
| | |
| | | options: TrainerOptions, |
| | | distributed_option: DistributedOption, |
| | | ) -> None: |
| | | assert check_argument_types() |
| | | ngpu = options.ngpu |
| | | no_forward_run = options.no_forward_run |
| | | distributed = distributed_option.distributed |
| | |
| | | |
| | | from distutils.version import LooseVersion |
| | | from functools import partial |
| | | from typeguard import check_argument_types |
| | | from typing import Optional |
| | | |
| | | import librosa |
| | |
| | | griffin_lim_iters: The number of iterations. |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | self.fs = fs |
| | | self.logmel2linear = ( |
| | | partial( |
| | |
| | | |
| | | import os |
| | | |
| | | from distutils.version import LooseVersion |
| | | from setuptools import find_packages |
| | | from setuptools import setup |
| | | |
| | |
| | | requirements = { |
| | | "install": [ |
| | | "setuptools>=38.5.1", |
| | | # "configargparse>=1.2.1", |
| | | "typeguard==2.13.3", |
| | | "typeguard>=3.0.1", |
| | | "humanfriendly", |
| | | "scipy>=1.4.1", |
| | | # "filelock", |
| | | "librosa", |
| | | "jamo==0.4.1", # For kss |
| | | "jamo", # For kss |
| | | "PyYAML>=5.1.2", |
| | | "soundfile>=0.11.0", |
| | | "soundfile>=0.10.2", |
| | | "h5py>=2.10.0", |
| | | "kaldiio>=2.17.0", |
| | | "torch_complex", |
| | | "nltk>=3.4.5", |
| | | # ASR |
| | | "sentencepiece", |
| | | # "ctc-segmentation<1.8,>=1.6.6", |
| | | # TTS |
| | | # "pyworld>=0.2.10", |
| | | "pypinyin<=0.44.0", |
| | | "pypinyin>=0.44.0", |
| | | "espnet_tts_frontend", |
| | | # ENH |
| | | # "ci_sdr", |
| | | "pytorch_wpe", |
| | | "editdistance>=0.5.2", |
| | | "tensorboard==1.15", |
| | | "tensorboard", |
| | | "g2p", |
| | | # PAI |
| | | "oss2", |
| | | # "kaldi-native-fbank", |
| | | # timestamp |
| | | "edit-distance", |
| | | # textgrid |
| | | "textgrid", |
| | | "protobuf==3.20.0", |
| | | "protobuf", |
| | | ], |
| | | # train: The modules invoked when training only. |
| | | "train": [ |
| | | # "pillow>=6.1.0", |
| | | "editdistance==0.5.2", |
| | | "editdistance", |
| | | "wandb", |
| | | ], |
| | | # recipe: The modules actually are not invoked in the main module of funasr, |
| | | # but are invoked for the python scripts in each recipe |
| | | "recipe": [ |
| | | "espnet_model_zoo", |
| | | # "gdown", |
| | | # "resampy", |
| | | # "pysptk>=0.1.17", |
| | | # "morfessor", # for zeroth-korean |
| | | # "youtube_dl", # for laborotv |
| | | # "nnmnkwii", |
| | | # "museval>=0.2.1", |
| | | # "pystoi>=0.2.2", |
| | | # "mir-eval>=0.6", |
| | | # "fastdtw", |
| | | # "nara_wpe>=0.0.5", |
| | | # "sacrebleu>=1.5.1", |
| | | ], |
| | | # all: The modules should be optionally installled due to some reason. |
| | | # Please consider moving them to "install" occasionally |
| | | # NOTE(kamo): The modules in "train" and "recipe" are appended into "all" |
| | | "all": [ |
| | | # NOTE(kamo): Append modules requiring specific pytorch version or torch>1.3.0 |
| | | "torch_optimizer", |
| | | "fairscale", |
| | | "transformers", |
| | | # "gtn==0.0.0", |
| | | ], |
| | | "setup": [ |
| | | "numpy", |
| | |
| | | "black", |
| | | ], |
| | | "doc": [ |
| | | "Jinja2<3.1", |
| | | "Sphinx==2.1.2", |
| | | "Jinja2", |
| | | "Sphinx", |
| | | "sphinx-rtd-theme>=0.2.4", |
| | | "sphinx-argparse>=0.2.5", |
| | | "commonmark==0.8.1", |
| | | "commonmark", |
| | | "recommonmark>=0.4.0", |
| | | "nbsphinx>=0.4.2", |
| | | "sphinx-markdown-tables>=0.0.12", |
| | | "configargparse>=1.2.1" |
| | | ], |
| | | } |
| | | requirements["all"].extend(requirements["train"] + requirements["recipe"]) |
| | | requirements["all"].extend(requirements["train"]) |
| | | requirements["test"].extend(requirements["train"]) |
| | | |
| | | install_requires = requirements["install"] |
| | |
| | | "License :: OSI Approved :: Apache Software License", |
| | | "Topic :: Software Development :: Libraries :: Python Modules", |
| | | ], |
| | | ) |
| | | ) |