python/FunASR-XL.git

parent: 43fb6a35 | 补丁 | 提交 | ignore whitespace

Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into ...

雾聪

2023-06-29 d550a62bead38c06ab1430ae63c30fa46243e3c3

Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into main

122个文件已修改

4个文件已添加

13 文件已重命名

	egs/alimeeting/sa_asr/local/format_wav_scp.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-tr-16k-common-vocab1582-pytorch/finetune.py	38 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-tr-16k-common-vocab1582-pytorch/infer.py	33 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/asr_infer.py	52 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/asr_inference_launch.py	9 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/diar_infer.py	65 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/diar_inference_launch.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/lm_inference_launch.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/punc_inference_launch.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/sv_infer.py	35 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/sv_inference_launch.py	7 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/tokenize_text.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/tp_infer.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/tp_inference_launch.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/vad_infer.py	4 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/vad_inference_launch.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/build_utils/build_model_from_file.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/build_utils/build_streaming_iterator.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/build_utils/build_sv_model.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/build_utils/build_trainer.py	5 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/datasets/collate_fn.py	7 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/datasets/dataset.py	6 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/datasets/iterable_dataset.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/datasets/large_datasets/build_dataloader.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/datasets/preprocessor.py	11 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/datasets/small_datasets/collate_fn.py	5 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/datasets/small_datasets/dataset.py	6 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/datasets/small_datasets/length_batch_sampler.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/datasets/small_datasets/preprocessor.py	11 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/export/export_model.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/fileio/datadir_writer.py	6 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/fileio/npy_scp.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/fileio/rand_gen_dataset.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/fileio/read_text.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/fileio/sound_scp.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/iterators/chunk_iter_factory.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/iterators/multiple_iter_factory.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/iterators/sequence_iter_factory.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/layers/global_mvn.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/layers/label_aggregation.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/layers/mask_along_axis.py	4 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/layers/sinc_conv.py	4 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/layers/stft.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/layers/utterance_mvn.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/main_funcs/average_nbest_models.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/main_funcs/collect_stats.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/ctc.py	7 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/data2vec.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/decoder/contextual_decoder.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/decoder/rnn_decoder.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/decoder/rnnt_decoder.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/decoder/sanm_decoder.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/decoder/transformer_decoder.py	10 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/e2e_asr.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/e2e_asr_contextual_paraformer.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/e2e_asr_mfcca.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/e2e_asr_paraformer.py	6 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/e2e_asr_transducer.py	9 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/e2e_diar_eend_ola.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/e2e_diar_sond.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/e2e_sa_asr.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/e2e_sv.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/e2e_tp.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/e2e_uni_asr.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/encoder/conformer_encoder.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/encoder/data2vec_encoder.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/encoder/mfcca_encoder.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/encoder/opennmt_encoders/conv_encoder.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/encoder/opennmt_encoders/fsmn_encoder.py	1 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/encoder/opennmt_encoders/self_attention_encoder.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/encoder/rnn_encoder.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/encoder/sanm_encoder.py	4 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/encoder/transformer_encoder.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/frontend/default.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/frontend/fused.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/frontend/s3prl.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/frontend/wav_frontend.py	4 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/frontend/windowing.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/postencoder/hugging_face_transformers_postencoder.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/preencoder/linear.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/preencoder/sinc.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/seq_rnn_lm.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/modules/eend_ola/utils/report.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/optimizers/sgd.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/docs/SDK_advanced_guide_cn.md	256 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/docs/SDK_tutorial_cn.md	328 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/docs/aliyun_server_tutorial.md	28 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/docs/images/aliyun1.png	补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/docs/images/aliyun10.png	补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/docs/images/aliyun11.png	补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/docs/images/aliyun12.png	补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/docs/images/aliyun2.png	补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/docs/images/aliyun3.png	补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/docs/images/aliyun4.png	补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/docs/images/aliyun5.png	补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/docs/images/aliyun6.png	补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/docs/images/aliyun7.png	补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/docs/images/aliyun8.png	补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/docs/images/aliyun9.png	补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/python/libtorch/funasr_torch/utils/frontend.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/python/libtorch/funasr_torch/utils/utils.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/python/onnxruntime/funasr_onnx/utils/frontend.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/readme.md	30 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/readme_cn.md	31 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/triton_gpu/model_repo_paraformer_large_offline/feature_extractor/1/model.py	1 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/samplers/build_batch_sampler.py	4 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/samplers/folded_batch_sampler.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/samplers/length_batch_sampler.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/samplers/num_elements_batch_sampler.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/samplers/sorted_batch_sampler.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/samplers/unsorted_batch_sampler.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/schedulers/noam_lr.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/schedulers/tri_stage_scheduler.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/schedulers/warmup_lr.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/tasks/abs_task.py	16 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/tasks/asr.py	22 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/tasks/data2vec.py	8 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/tasks/diar.py	16 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/tasks/lm.py	8 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/tasks/punctuation.py	9 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/tasks/sa_asr.py	8 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/tasks/sv.py	9 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/tasks/vad.py	8 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/text/build_tokenizer.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/text/char_tokenizer.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/text/cleaner.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/text/phoneme_tokenizer.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/text/sentencepiece_tokenizer.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/text/token_id_converter.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/text/word_tokenizer.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/torch_utils/forward_adaptor.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/torch_utils/initialize.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/train/abs_model.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/train/class_choices.py	5 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/train/reporter.py	9 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/train/trainer.py	5 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/utils/griffin_lim.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	setup.py	54 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 egs/alimeeting/sa_asr/local/format_wav_scp.py

@@ -11,7 +11,6 @@
import resampy
import soundfile
from tqdm import tqdm
from typeguard import check_argument_types

from funasr.utils.cli_utils import get_commandline_args
from funasr.fileio.read_text import read_2column_text
@@ -31,7 +30,6 @@
    (3, 4, 5)

    """
    assert check_argument_types()
    if integers.strip() in ("none", "None", "NONE", "null", "Null", "NULL"):
        return None
    return tuple(map(int, integers.strip().split(",")))

 egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-tr-16k-common-vocab1582-pytorch/finetune.py

@@ -1,5 +1,4 @@
import os
<<<<<<< HEAD

from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer
@@ -21,50 +20,17 @@
        batch_bins=params.batch_bins,
        max_epoch=params.max_epoch,
        lr=params.lr)
=======
from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer
from funasr.datasets.ms_dataset import MsDataset


def modelscope_finetune(params):
    if not os.path.exists(params["output_dir"]):
        os.makedirs(params["output_dir"], exist_ok=True)
    # dataset split ["train", "validation"]
    ds_dict = MsDataset.load(params["data_dir"])
    kwargs = dict(
        model=params["model"],
        model_revision=params["model_revision"],
        data_dir=ds_dict,
        dataset_type=params["dataset_type"],
        work_dir=params["output_dir"],
        batch_bins=params["batch_bins"],
        max_epoch=params["max_epoch"],
        lr=params["lr"])
>>>>>>> main
    trainer = build_trainer(Trainers.speech_asr_trainer, default_args=kwargs)
    trainer.train()


if __name__ == '__main__':
<<<<<<< HEAD
    params = modelscope_args(model="damo/speech_UniASR_asr_2pass-tr-16k-common-vocab1582-pytorch", data_path="./data")
    params.output_dir = "./checkpoint"              # m模型保存路径
    params.data_path = "./example_data/"            # 数据路径
    params.dataset_type = "small"                   # 小数据量设置small，若数据量大于1000小时，请使用large
    params.batch_bins = 2000                       # batch size，如果dataset_type="small"，batch_bins单位为fbank特征帧数，如果dataset_type="large"，batch_bins单位为毫秒，
    params.max_epoch = 50                           # 最大训练轮数
    params.max_epoch = 20                           # 最大训练轮数
    params.lr = 0.00005                             # 设置学习率
    
=======
    params = {}
    params["output_dir"] = "./checkpoint"
    params["data_dir"] = "./data"
    params["batch_bins"] = 2000
    params["dataset_type"] = "small"
    params["max_epoch"] = 50
    params["lr"] = 0.00005
    params["model"] = "damo/speech_UniASR_asr_2pass-tr-16k-common-vocab1582-pytorch"
    params["model_revision"] = None
>>>>>>> main
    modelscope_finetune(params)
    modelscope_finetune(params)

 egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-tr-16k-common-vocab1582-pytorch/infer.py

@@ -1,33 +1,3 @@
<<<<<<< HEAD
import os
import shutil
import argparse
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks

def modelscope_infer(args):
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpuid)
    inference_pipeline = pipeline(
        task=Tasks.auto_speech_recognition,
        model=args.model,
        output_dir=args.output_dir,
        batch_size=args.batch_size,
        param_dict={"decoding_model": args.decoding_mode, "hotword": args.hotword_txt}
    )
    inference_pipeline(audio_in=args.audio_in)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', type=str, default="damo/speech_UniASR_asr_2pass-tr-16k-common-vocab1582-pytorch")
    parser.add_argument('--audio_in', type=str, default="./data/test/wav.scp")
    parser.add_argument('--output_dir', type=str, default="./results/")
    parser.add_argument('--decoding_mode', type=str, default="normal")
    parser.add_argument('--hotword_txt', type=str, default=None)
    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--gpuid', type=str, default="0")
    args = parser.parse_args()
    modelscope_infer(args)
=======
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks

@@ -40,5 +10,4 @@
        output_dir=output_dir,
    )
    rec_result = inference_pipeline(audio_in=audio_in, param_dict={"decoding_model":"offline"})
    print(rec_result)
>>>>>>> main
    print(rec_result)

 funasr/bin/asr_infer.py

@@ -22,9 +22,7 @@
import requests
import torch
from packaging.version import parse as V
from typeguard import check_argument_types
from typeguard import check_return_type
from  funasr.build_utils.build_model_from_file import build_model_from_file
from funasr.build_utils.build_model_from_file import build_model_from_file
from funasr.models.e2e_asr_contextual_paraformer import NeatContextualParaformer
from funasr.models.e2e_asr_paraformer import BiCifParaformer, ContextualParaformer
from funasr.models.frontend.wav_frontend import WavFrontend, WavFrontendOnline
@@ -78,7 +76,6 @@
            frontend_conf: dict = None,
            **kwargs,
    ):
        assert check_argument_types()

        # 1. Build ASR model
        scorers = {}
@@ -192,7 +189,6 @@
            text, token, token_int, hyp

        """
        assert check_argument_types()

        # Input as audio signal
        if isinstance(speech, np.ndarray):
@@ -248,7 +244,6 @@
                text = None
            results.append((text, token, token_int, hyp))

        assert check_return_type(results)
        return results


@@ -288,7 +283,6 @@
            decoding_ind: int = 0,
            **kwargs,
    ):
        assert check_argument_types()

        # 1. Build ASR model
        scorers = {}
@@ -413,7 +407,6 @@
                text, token, token_int, hyp

        """
        assert check_argument_types()

        # Input as audio signal
        if isinstance(speech, np.ndarray):
@@ -516,7 +509,6 @@
                                                               vad_offset=begin_time)
                results.append((text, token, token_int, hyp, timestamp, enc_len_batch_total, lfr_factor))

        # assert check_return_type(results)
        return results

    def generate_hotwords_list(self, hotword_list_or_file):
@@ -656,7 +648,6 @@
            hotword_list_or_file: str = None,
            **kwargs,
    ):
        assert check_argument_types()

        # 1. Build ASR model
        scorers = {}
@@ -776,7 +767,6 @@
                text, token, token_int, hyp

        """
        assert check_argument_types()
        results = []
        cache_en = cache["encoder"]
        if speech.shape[1] < 16 * 60 and cache_en["is_final"]:
@@ -871,7 +861,6 @@

                results.append(postprocessed_result)

        # assert check_return_type(results)
        return results


@@ -912,7 +901,6 @@
            frontend_conf: dict = None,
            **kwargs,
    ):
        assert check_argument_types()

        # 1. Build ASR model
        scorers = {}
@@ -1036,7 +1024,6 @@
            text, token, token_int, hyp

        """
        assert check_argument_types()

        # Input as audio signal
        if isinstance(speech, np.ndarray):
@@ -1104,7 +1091,6 @@
                text = None
            results.append((text, token, token_int, hyp))

        assert check_return_type(results)
        return results


@@ -1143,7 +1129,6 @@
            streaming: bool = False,
            **kwargs,
    ):
        assert check_argument_types()

        # 1. Build ASR model
        scorers = {}
@@ -1248,7 +1233,6 @@
            text, token, token_int, hyp

        """
        assert check_argument_types()
        # Input as audio signal
        if isinstance(speech, np.ndarray):
            speech = torch.tensor(speech)
@@ -1298,7 +1282,6 @@
                text = None
            results.append((text, token, token_int, hyp))

        assert check_return_type(results)
        return results


@@ -1355,7 +1338,6 @@
        """Construct a Speech2Text object."""
        super().__init__()

        assert check_argument_types()
        asr_model, asr_train_args = build_model_from_file(
            asr_train_config, asr_model_file, cmvn_file, device
        )
@@ -1534,7 +1516,6 @@
        Returns:
            nbest_hypothesis: N-best hypothesis.
        """
        assert check_argument_types()

        if isinstance(speech, np.ndarray):
            speech = torch.tensor(speech)
@@ -1566,7 +1547,6 @@
        Returns:
            nbest_hypothesis: N-best hypothesis.
        """
        assert check_argument_types()

        if isinstance(speech, np.ndarray):
            speech = torch.tensor(speech)
@@ -1608,35 +1588,8 @@
                text = None
            results.append((text, token, token_int, hyp))

            assert check_return_type(results)

        return results

    @staticmethod
    def from_pretrained(
            model_tag: Optional[str] = None,
            **kwargs: Optional[Any],
    ) -> Speech2Text:
        """Build Speech2Text instance from the pretrained model.
        Args:
            model_tag: Model tag of the pretrained models.
        Return:
            : Speech2Text instance.
        """
        if model_tag is not None:
            try:
                from espnet_model_zoo.downloader import ModelDownloader

            except ImportError:
                logging.error(
                    "`espnet_model_zoo` is not installed. "
                    "Please install via `pip install -U espnet_model_zoo`."
                )
                raise
            d = ModelDownloader()
            kwargs.update(**d.download_and_unpack(model_tag))

        return Speech2TextTransducer(**kwargs)


class Speech2TextSAASR:
@@ -1675,7 +1628,6 @@
            frontend_conf: dict = None,
            **kwargs,
    ):
        assert check_argument_types()

        # 1. Build ASR model
        scorers = {}
@@ -1793,7 +1745,6 @@
            text, text_id, token, token_int, hyp

        """
        assert check_argument_types()

        # Input as audio signal
        if isinstance(speech, np.ndarray):
@@ -1886,5 +1837,4 @@

            results.append((text, text_id, token, token_int, hyp))

        assert check_return_type(results)
        return results

 funasr/bin/asr_inference_launch.py

@@ -21,7 +21,6 @@
import torchaudio
import soundfile
import yaml
from typeguard import check_argument_types

from funasr.bin.asr_infer import Speech2Text
from funasr.bin.asr_infer import Speech2TextMFCCA
@@ -80,7 +79,6 @@
        param_dict: dict = None,
        **kwargs,
):
    assert check_argument_types()
    ncpu = kwargs.get("ncpu", 1)
    torch.set_num_threads(ncpu)
    if batch_size > 1:
@@ -240,7 +238,6 @@
        param_dict: dict = None,
        **kwargs,
):
    assert check_argument_types()
    ncpu = kwargs.get("ncpu", 1)
    torch.set_num_threads(ncpu)

@@ -481,7 +478,6 @@
        param_dict: dict = None,
        **kwargs,
):
    assert check_argument_types()
    ncpu = kwargs.get("ncpu", 1)
    torch.set_num_threads(ncpu)

@@ -749,7 +745,6 @@
        param_dict: dict = None,
        **kwargs,
):
    assert check_argument_types()

    if word_lm_train_config is not None:
        raise NotImplementedError("Word LM is not implemented")
@@ -957,7 +952,6 @@
        param_dict: dict = None,
        **kwargs,
):
    assert check_argument_types()
    ncpu = kwargs.get("ncpu", 1)
    torch.set_num_threads(ncpu)
    if batch_size > 1:
@@ -1126,7 +1120,6 @@
        param_dict: dict = None,
        **kwargs,
):
    assert check_argument_types()
    ncpu = kwargs.get("ncpu", 1)
    torch.set_num_threads(ncpu)
    if batch_size > 1:
@@ -1314,7 +1307,6 @@
        right_context: Number of frames in right context AFTER subsampling.
        display_partial_hypotheses: Whether to display partial hypotheses.
    """
    assert check_argument_types()

    if batch_size > 1:
        raise NotImplementedError("batch decoding is not implemented")
@@ -1464,7 +1456,6 @@
        param_dict: dict = None,
        **kwargs,
):
    assert check_argument_types()
    if batch_size > 1:
        raise NotImplementedError("batch decoding is not implemented")
    if word_lm_train_config is not None:

 funasr/bin/diar_infer.py

@@ -15,7 +15,6 @@
import torch
from scipy.ndimage import median_filter
from torch.nn import functional as F
from typeguard import check_argument_types

from funasr.models.frontend.wav_frontend import WavFrontendMel23
from funasr.tasks.diar import DiarTask
@@ -45,7 +44,6 @@
            device: str = "cpu",
            dtype: str = "float32",
    ):
        assert check_argument_types()

        # 1. Build Diarization model
        diar_model, diar_train_args = build_model_from_file(
@@ -88,7 +86,6 @@
            diarization results

        """
        assert check_argument_types()
        # Input as audio signal
        if isinstance(speech, np.ndarray):
            speech = torch.tensor(speech)
@@ -106,36 +103,6 @@
        results = self.diar_model.estimate_sequential(**batch)

        return results

    @staticmethod
    def from_pretrained(
            model_tag: Optional[str] = None,
            **kwargs: Optional[Any],
    ):
        """Build Speech2Diarization instance from the pretrained model.

        Args:
            model_tag (Optional[str]): Model tag of the pretrained models.
                Currently, the tags of espnet_model_zoo are supported.

        Returns:
            Speech2Diarization: Speech2Diarization instance.

        """
        if model_tag is not None:
            try:
                from espnet_model_zoo.downloader import ModelDownloader

            except ImportError:
                logging.error(
                    "`espnet_model_zoo` is not installed. "
                    "Please install via `pip install -U espnet_model_zoo`."
                )
                raise
            d = ModelDownloader()
            kwargs.update(**d.download_and_unpack(model_tag))

        return Speech2DiarizationEEND(**kwargs)


class Speech2DiarizationSOND:
@@ -163,7 +130,6 @@
            smooth_size: int = 83,
            dur_threshold: float = 10,
    ):
        assert check_argument_types()

        # TODO: 1. Build Diarization model
        diar_model, diar_train_args = build_model_from_file(
@@ -283,7 +249,6 @@
            diarization results for each speaker

        """
        assert check_argument_types()
        # Input as audio signal
        if isinstance(speech, np.ndarray):
            speech = torch.tensor(speech)
@@ -305,33 +270,3 @@
        results, pse_labels = self.post_processing(logits, profile.shape[1], output_format)

        return results, pse_labels

    @staticmethod
    def from_pretrained(
            model_tag: Optional[str] = None,
            **kwargs: Optional[Any],
    ):
        """Build Speech2Xvector instance from the pretrained model.

        Args:
            model_tag (Optional[str]): Model tag of the pretrained models.
                Currently, the tags of espnet_model_zoo are supported.

        Returns:
            Speech2Xvector: Speech2Xvector instance.

        """
        if model_tag is not None:
            try:
                from espnet_model_zoo.downloader import ModelDownloader

            except ImportError:
                logging.error(
                    "`espnet_model_zoo` is not installed. "
                    "Please install via `pip install -U espnet_model_zoo`."
                )
                raise
            d = ModelDownloader()
            kwargs.update(**d.download_and_unpack(model_tag))

        return Speech2DiarizationSOND(**kwargs)

 funasr/bin/diar_inference_launch.py

@@ -18,7 +18,6 @@
import soundfile
import torch
from scipy.signal import medfilt
from typeguard import check_argument_types

from funasr.bin.diar_infer import Speech2DiarizationSOND, Speech2DiarizationEEND
from funasr.datasets.iterable_dataset import load_bytes
@@ -52,7 +51,6 @@
        mode: str = "sond",
        **kwargs,
):
    assert check_argument_types()
    ncpu = kwargs.get("ncpu", 1)
    torch.set_num_threads(ncpu)
    if batch_size > 1:
@@ -233,7 +231,6 @@
        param_dict: Optional[dict] = None,
        **kwargs,
):
    assert check_argument_types()
    ncpu = kwargs.get("ncpu", 1)
    torch.set_num_threads(ncpu)
    if batch_size > 1:

 funasr/bin/lm_inference_launch.py

@@ -15,7 +15,6 @@
import numpy as np
import torch
from torch.nn.parallel import data_parallel
from typeguard import check_argument_types

from funasr.build_utils.build_model_from_file import build_model_from_file
from funasr.build_utils.build_streaming_iterator import build_streaming_iterator
@@ -50,7 +49,6 @@
        param_dict: dict = None,
        **kwargs,
):
    assert check_argument_types()
    ncpu = kwargs.get("ncpu", 1)
    torch.set_num_threads(ncpu)


 funasr/bin/punc_inference_launch.py

@@ -14,7 +14,6 @@
from typing import Union

import torch
from typeguard import check_argument_types

from funasr.bin.punc_infer import Text2Punc, Text2PuncVADRealtime
from funasr.torch_utils.set_all_random_seed import set_all_random_seed
@@ -38,7 +37,6 @@
        param_dict: dict = None,
        **kwargs,
):
    assert check_argument_types()
    logging.basicConfig(
        level=log_level,
        format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
@@ -118,7 +116,6 @@
        param_dict: dict = None,
        **kwargs,
):
    assert check_argument_types()
    ncpu = kwargs.get("ncpu", 1)
    torch.set_num_threads(ncpu)


 funasr/bin/sv_infer.py

@@ -12,8 +12,6 @@

import numpy as np
import torch
from typeguard import check_argument_types
from typeguard import check_return_type

from funasr.build_utils.build_model_from_file import build_model_from_file
from funasr.torch_utils.device_funcs import to_device
@@ -42,7 +40,6 @@
            streaming: bool = False,
            embedding_node: str = "resnet1_dense",
    ):
        assert check_argument_types()

        # TODO: 1. Build SV model
        sv_model, sv_train_args = build_model_from_file(
@@ -108,7 +105,6 @@
            embedding, ref_embedding, similarity_score

        """
        assert check_argument_types()
        self.sv_model.eval()
        embedding = self.calculate_embedding(speech)
        ref_emb, score = None, None
@@ -117,35 +113,4 @@
            score = torch.cosine_similarity(embedding, ref_emb)

        results = (embedding, ref_emb, score)
        assert check_return_type(results)
        return results

    @staticmethod
    def from_pretrained(
            model_tag: Optional[str] = None,
            **kwargs: Optional[Any],
    ):
        """Build Speech2Xvector instance from the pretrained model.

        Args:
            model_tag (Optional[str]): Model tag of the pretrained models.
                Currently, the tags of espnet_model_zoo are supported.

        Returns:
            Speech2Xvector: Speech2Xvector instance.

        """
        if model_tag is not None:
            try:
                from espnet_model_zoo.downloader import ModelDownloader

            except ImportError:
                logging.error(
                    "`espnet_model_zoo` is not installed. "
                    "Please install via `pip install -U espnet_model_zoo`."
                )
                raise
            d = ModelDownloader()
            kwargs.update(**d.download_and_unpack(model_tag))

        return Speech2Xvector(**kwargs)

 funasr/bin/sv_inference_launch.py

@@ -15,7 +15,6 @@
import numpy as np
import torch
from kaldiio import WriteHelper
from typeguard import check_argument_types

from funasr.bin.sv_infer import Speech2Xvector
from funasr.build_utils.build_streaming_iterator import build_streaming_iterator
@@ -46,7 +45,6 @@
        param_dict: Optional[dict] = None,
        **kwargs,
):
    assert check_argument_types()
    ncpu = kwargs.get("ncpu", 1)
    torch.set_num_threads(ncpu)

@@ -79,10 +77,7 @@
        embedding_node=embedding_node
    )
    logging.info("speech2xvector_kwargs: {}".format(speech2xvector_kwargs))
    speech2xvector = Speech2Xvector.from_pretrained(
        model_tag=model_tag,
        **speech2xvector_kwargs,
    )
    speech2xvector = Speech2Xvector(**speech2xvector_kwargs)
    speech2xvector.sv_model.eval()

    def _forward(

 funasr/bin/tokenize_text.py

@@ -7,7 +7,6 @@
from typing import List
from typing import Optional

from typeguard import check_argument_types

from funasr.utils.cli_utils import get_commandline_args
from funasr.text.build_tokenizer import build_tokenizer
@@ -81,7 +80,6 @@
    cleaner: Optional[str],
    g2p: Optional[str],
):
    assert check_argument_types()

    logging.basicConfig(
        level=log_level,

 funasr/bin/tp_infer.py

@@ -9,7 +9,6 @@

import numpy as np
import torch
from typeguard import check_argument_types
from funasr.build_utils.build_model_from_file import build_model_from_file
from funasr.models.frontend.wav_frontend import WavFrontend
from funasr.text.token_id_converter import TokenIDConverter
@@ -26,7 +25,6 @@
            dtype: str = "float32",
            **kwargs,
    ):
        assert check_argument_types()
        # 1. Build ASR model
        tp_model, tp_train_args = build_model_from_file(
            timestamp_infer_config, timestamp_model_file, cmvn_file=None, device=device, task_name="asr", mode="tp"
@@ -64,7 +62,6 @@
            speech_lengths: Union[torch.Tensor, np.ndarray] = None,
            text_lengths: Union[torch.Tensor, np.ndarray] = None
    ):
        assert check_argument_types()

        # Input as audio signal
        if isinstance(speech, np.ndarray):

 funasr/bin/tp_inference_launch.py

@@ -13,7 +13,6 @@

import numpy as np
import torch
from typeguard import check_argument_types

from funasr.bin.tp_infer import Speech2Timestamp
from funasr.build_utils.build_streaming_iterator import build_streaming_iterator
@@ -47,7 +46,6 @@
        seg_dict_file: Optional[str] = None,
        **kwargs,
):
    assert check_argument_types()
    ncpu = kwargs.get("ncpu", 1)
    torch.set_num_threads(ncpu)


 funasr/bin/vad_infer.py

@@ -13,7 +13,6 @@

import numpy as np
import torch
from typeguard import check_argument_types

from funasr.build_utils.build_model_from_file import build_model_from_file
from funasr.models.frontend.wav_frontend import WavFrontend, WavFrontendOnline
@@ -42,7 +41,6 @@
            dtype: str = "float32",
            **kwargs,
    ):
        assert check_argument_types()

        # 1. Build vad model
        vad_model, vad_infer_args = build_model_from_file(
@@ -76,7 +74,6 @@
            text, token, token_int, hyp

        """
        assert check_argument_types()

        # Input as audio signal
        if isinstance(speech, np.ndarray):
@@ -149,7 +146,6 @@
            text, token, token_int, hyp

        """
        assert check_argument_types()

        # Input as audio signal
        if isinstance(speech, np.ndarray):

 funasr/bin/vad_inference_launch.py

@@ -18,7 +18,6 @@

import numpy as np
import torch
from typeguard import check_argument_types
from funasr.build_utils.build_streaming_iterator import build_streaming_iterator
from funasr.fileio.datadir_writer import DatadirWriter
from funasr.torch_utils.set_all_random_seed import set_all_random_seed
@@ -47,7 +46,6 @@
        num_workers: int = 1,
        **kwargs,
):
    assert check_argument_types()
    if batch_size > 1:
        raise NotImplementedError("batch decoding is not implemented")

@@ -148,7 +146,6 @@
        num_workers: int = 1,
        **kwargs,
):
    assert check_argument_types()

    logging.basicConfig(
        level=log_level,

 funasr/build_utils/build_model_from_file.py

@@ -6,7 +6,6 @@

import torch
import yaml
from typeguard import check_argument_types

from funasr.build_utils.build_model import build_model
from funasr.models.base_model import FunASRModel
@@ -30,7 +29,6 @@
        device: Device type, "cpu", "cuda", or "cuda:N".

    """
    assert check_argument_types()
    if config_file is None:
        assert model_file is not None, (
            "The argument 'model_file' must be provided "

 funasr/build_utils/build_streaming_iterator.py

@@ -1,6 +1,5 @@
import numpy as np
from torch.utils.data import DataLoader
from typeguard import check_argument_types

from funasr.datasets.iterable_dataset import IterableESPnetDataset
from funasr.datasets.small_datasets.collate_fn import CommonCollateFn
@@ -23,7 +22,6 @@
        train: bool = False,
) -> DataLoader:
    """Build DataLoader using iterable dataset"""
    assert check_argument_types()

    # preprocess
    if preprocess_fn is not None:

 funasr/build_utils/build_sv_model.py

@@ -1,7 +1,6 @@
import logging

import torch
from typeguard import check_return_type

from funasr.layers.abs_normalize import AbsNormalize
from funasr.layers.global_mvn import GlobalMVN
@@ -254,5 +253,4 @@
    if args.init is not None:
        initialize(model, args.init)

    assert check_return_type(model)
    return model

 funasr/build_utils/build_trainer.py

@@ -25,7 +25,6 @@
import torch
import torch.nn
import torch.optim
from typeguard import check_argument_types

from funasr.iterators.abs_iter_factory import AbsIterFactory
from funasr.main_funcs.average_nbest_models import average_nbest_models
@@ -118,7 +117,6 @@

    def build_options(self, args: argparse.Namespace) -> TrainerOptions:
        """Build options consumed by train(), eval()"""
        assert check_argument_types()
        return build_dataclass(TrainerOptions, args)

    @classmethod
@@ -156,7 +154,6 @@

    def run(self) -> None:
        """Perform training. This method performs the main process of training."""
        assert check_argument_types()
        # NOTE(kamo): Don't check the type more strictly as far trainer_options
        model = self.model
        optimizers = self.optimizers
@@ -522,7 +519,6 @@
            options: TrainerOptions,
            distributed_option: DistributedOption,
    ) -> Tuple[bool, bool]:
        assert check_argument_types()

        grad_noise = options.grad_noise
        accum_grad = options.accum_grad
@@ -758,7 +754,6 @@
            options: TrainerOptions,
            distributed_option: DistributedOption,
    ) -> None:
        assert check_argument_types()
        ngpu = options.ngpu
        # no_forward_run = options.no_forward_run
        distributed = distributed_option.distributed

 funasr/datasets/collate_fn.py

@@ -6,8 +6,6 @@

import numpy as np
import torch
from typeguard import check_argument_types
from typeguard import check_return_type

from funasr.modules.nets_utils import pad_list

@@ -22,7 +20,6 @@
            not_sequence: Collection[str] = (),
            max_sample_size=None
    ):
        assert check_argument_types()
        self.float_pad_value = float_pad_value
        self.int_pad_value = int_pad_value
        self.not_sequence = set(not_sequence)
@@ -53,7 +50,6 @@
) -> Tuple[List[str], Dict[str, torch.Tensor]]:
    """Concatenate ndarray-list to an array and convert to torch.Tensor.
    """
    assert check_argument_types()
    uttids = [u for u, _ in data]
    data = [d for _, d in data]

@@ -79,7 +75,6 @@
            output[key + "_lengths"] = lens

    output = (uttids, output)
    assert check_return_type(output)
    return output

def crop_to_max_size(feature, target_size):
@@ -99,7 +94,6 @@
        not_sequence: Collection[str] = (),
) -> Tuple[List[str], Dict[str, torch.Tensor]]:
    # mainly for pre-training
    assert check_argument_types()
    uttids = [u for u, _ in data]
    data = [d for _, d in data]

@@ -131,5 +125,4 @@
            output[key + "_lengths"] = lens

    output = (uttids, output)
    assert check_return_type(output)
    return output

 funasr/datasets/dataset.py

@@ -23,8 +23,6 @@
import numpy as np
import torch
from torch.utils.data.dataset import Dataset
from typeguard import check_argument_types
from typeguard import check_return_type

from funasr.fileio.npy_scp import NpyScpReader
from funasr.fileio.rand_gen_dataset import FloatRandomGenerateDataset
@@ -37,7 +35,6 @@

class AdapterForSoundScpReader(collections.abc.Mapping):
    def __init__(self, loader, dtype=None):
        assert check_argument_types()
        self.loader = loader
        self.dtype = dtype
        self.rate = None
@@ -284,7 +281,6 @@
        max_cache_fd: int = 0,
        dest_sample_rate: int = 16000,
    ):
        assert check_argument_types()
        if len(path_name_type_list) == 0:
            raise ValueError(
                '1 or more elements are required for "path_name_type_list"'
@@ -379,7 +375,6 @@
        return _mes

    def __getitem__(self, uid: Union[str, int]) -> Tuple[str, Dict[str, np.ndarray]]:
        assert check_argument_types()

        # Change integer-id to string-id
        if isinstance(uid, int):
@@ -444,5 +439,4 @@
            self.cache[uid] = data

        retval = uid, data
        assert check_return_type(retval)
        return retval

 funasr/datasets/iterable_dataset.py

@@ -16,7 +16,6 @@
import torchaudio
import soundfile
from torch.utils.data.dataset import IterableDataset
from typeguard import check_argument_types
import os.path

from funasr.datasets.dataset import ESPnetDataset
@@ -121,7 +120,6 @@
            int_dtype: str = "long",
            key_file: str = None,
    ):
        assert check_argument_types()
        if len(path_name_type_list) == 0:
            raise ValueError(
                '1 or more elements are required for "path_name_type_list"'

 funasr/datasets/large_datasets/build_dataloader.py

@@ -6,7 +6,6 @@

import sentencepiece as spm
from torch.utils.data import DataLoader
from typeguard import check_argument_types

from funasr.datasets.large_datasets.dataset import Dataset
from funasr.iterators.abs_iter_factory import AbsIterFactory
@@ -43,7 +42,6 @@

class SentencepiecesTokenizer(AbsTokenizer):
    def __init__(self, model: Union[Path, str]):
        assert check_argument_types()
        self.model = str(model)
        self.sp = None


 funasr/datasets/preprocessor.py

@@ -11,8 +11,6 @@
import numpy as np
import scipy.signal
import soundfile
from typeguard import check_argument_types
from typeguard import check_return_type

from funasr.text.build_tokenizer import build_tokenizer
from funasr.text.cleaner import TextCleaner
@@ -268,7 +266,6 @@
    def _speech_process(
            self, data: Dict[str, Union[str, np.ndarray]]
    ) -> Dict[str, Union[str, np.ndarray]]:
        assert check_argument_types()
        if self.speech_name in data:
            if self.train and (self.rirs is not None or self.noises is not None):
                speech = data[self.speech_name]
@@ -355,7 +352,6 @@
                speech = data[self.speech_name]
                ma = np.max(np.abs(speech))
                data[self.speech_name] = speech * self.speech_volume_normalize / ma
        assert check_return_type(data)
        return data

    def _text_process(
@@ -372,13 +368,11 @@
                tokens = self.tokenizer.text2tokens(text)
            text_ints = self.token_id_converter.tokens2ids(tokens)
            data[self.text_name] = np.array(text_ints, dtype=np.int64)
        assert check_return_type(data)
        return data

    def __call__(
            self, uid: str, data: Dict[str, Union[str, np.ndarray]]
    ) -> Dict[str, np.ndarray]:
        assert check_argument_types()

        data = self._speech_process(data)
        data = self._text_process(data)
@@ -445,7 +439,6 @@
                tokens = self.tokenizer.text2tokens(text)
            text_ints = self.token_id_converter.tokens2ids(tokens)
            data[self.text_name] = np.array(text_ints, dtype=np.int64)
        assert check_return_type(data)
        return data


@@ -502,13 +495,11 @@
                tokens = self.tokenizer.text2tokens(text)
                text_ints = self.token_id_converter.tokens2ids(tokens)
                data[text_n] = np.array(text_ints, dtype=np.int64)
        assert check_return_type(data)
        return data

    def __call__(
            self, uid: str, data: Dict[str, Union[str, np.ndarray]]
    ) -> Dict[str, np.ndarray]:
        assert check_argument_types()

        if self.speech_name in data:
            # Nothing now: candidates:
@@ -612,7 +603,6 @@
                tokens = self.tokenizer[i].text2tokens(text)
                text_ints = self.token_id_converter[i].tokens2ids(tokens)
                data[text_name] = np.array(text_ints, dtype=np.int64)
        assert check_return_type(data)
        return data

class CodeMixTokenizerCommonPreprocessor(CommonPreprocessor):
@@ -690,7 +680,6 @@
    def __call__(
            self, uid: str, data: Dict[str, Union[list, str, np.ndarray]]
    ) -> Dict[str, Union[list, np.ndarray]]:
        assert check_argument_types()
        # Split words.
        if isinstance(data[self.text_name], str):
            split_text = self.split_words(data[self.text_name])

 funasr/datasets/small_datasets/collate_fn.py

@@ -6,8 +6,6 @@

import numpy as np
import torch
from typeguard import check_argument_types
from typeguard import check_return_type

from funasr.modules.nets_utils import pad_list

@@ -22,7 +20,6 @@
            not_sequence: Collection[str] = (),
            max_sample_size=None
    ):
        assert check_argument_types()
        self.float_pad_value = float_pad_value
        self.int_pad_value = int_pad_value
        self.not_sequence = set(not_sequence)
@@ -53,7 +50,6 @@
) -> Tuple[List[str], Dict[str, torch.Tensor]]:
    """Concatenate ndarray-list to an array and convert to torch.Tensor.
    """
    assert check_argument_types()
    uttids = [u for u, _ in data]
    data = [d for _, d in data]

@@ -79,7 +75,6 @@
            output[key + "_lengths"] = lens

    output = (uttids, output)
    assert check_return_type(output)
    return output

def crop_to_max_size(feature, target_size):

 funasr/datasets/small_datasets/dataset.py

@@ -15,8 +15,6 @@
import numpy as np
import torch
from torch.utils.data.dataset import Dataset
from typeguard import check_argument_types
from typeguard import check_return_type

from funasr.fileio.npy_scp import NpyScpReader
from funasr.fileio.sound_scp import SoundScpReader
@@ -24,7 +22,6 @@

class AdapterForSoundScpReader(collections.abc.Mapping):
    def __init__(self, loader, dtype=None):
        assert check_argument_types()
        self.loader = loader
        self.dtype = dtype
        self.rate = None
@@ -112,7 +109,6 @@
            speed_perturb: Union[list, tuple] = None,
            mode: str = "train",
    ):
        assert check_argument_types()
        if len(path_name_type_list) == 0:
            raise ValueError(
                '1 or more elements are required for "path_name_type_list"'
@@ -207,7 +203,6 @@
        return _mes

    def __getitem__(self, uid: Union[str, int]) -> Tuple[str, Dict[str, np.ndarray]]:
        assert check_argument_types()

        # Change integer-id to string-id
        if isinstance(uid, int):
@@ -265,5 +260,4 @@
            data[name] = value

        retval = uid, data
        assert check_return_type(retval)
        return retval

 funasr/datasets/small_datasets/length_batch_sampler.py

@@ -4,7 +4,6 @@
from typing import Tuple
from typing import Union

from typeguard import check_argument_types

from funasr.fileio.read_text import load_num_sequence_text
from funasr.samplers.abs_sampler import AbsSampler
@@ -21,7 +20,6 @@
        drop_last: bool = False,
        padding: bool = True,
    ):
        assert check_argument_types()
        assert batch_bins > 0
        if sort_batch != "ascending" and sort_batch != "descending":
            raise ValueError(

 funasr/datasets/small_datasets/preprocessor.py

@@ -10,8 +10,6 @@
import numpy as np
import scipy.signal
import soundfile
from typeguard import check_argument_types
from typeguard import check_return_type

from funasr.text.build_tokenizer import build_tokenizer
from funasr.text.cleaner import TextCleaner
@@ -260,7 +258,6 @@
    def _speech_process(
            self, data: Dict[str, Union[str, np.ndarray]]
    ) -> Dict[str, Union[str, np.ndarray]]:
        assert check_argument_types()
        if self.speech_name in data:
            if self.train and (self.rirs is not None or self.noises is not None):
                speech = data[self.speech_name]
@@ -347,7 +344,6 @@
                speech = data[self.speech_name]
                ma = np.max(np.abs(speech))
                data[self.speech_name] = speech * self.speech_volume_normalize / ma
        assert check_return_type(data)
        return data

    def _text_process(
@@ -365,13 +361,11 @@
                tokens = self.tokenizer.text2tokens(text)
            text_ints = self.token_id_converter.tokens2ids(tokens)
            data[self.text_name] = np.array(text_ints, dtype=np.int64)
        assert check_return_type(data)
        return data

    def __call__(
            self, uid: str, data: Dict[str, Union[str, np.ndarray]]
    ) -> Dict[str, np.ndarray]:
        assert check_argument_types()

        data = self._speech_process(data)
        data = self._text_process(data)
@@ -439,7 +433,6 @@
                tokens = self.tokenizer.text2tokens(text)
            text_ints = self.token_id_converter.tokens2ids(tokens)
            data[self.text_name] = np.array(text_ints, dtype=np.int64)
        assert check_return_type(data)
        return data


@@ -496,13 +489,11 @@
                tokens = self.tokenizer.text2tokens(text)
                text_ints = self.token_id_converter.tokens2ids(tokens)
                data[text_n] = np.array(text_ints, dtype=np.int64)
        assert check_return_type(data)
        return data

    def __call__(
            self, uid: str, data: Dict[str, Union[str, np.ndarray]]
    ) -> Dict[str, np.ndarray]:
        assert check_argument_types()

        if self.speech_name in data:
            # Nothing now: candidates:
@@ -606,7 +597,6 @@
                tokens = self.tokenizer[i].text2tokens(text)
                text_ints = self.token_id_converter[i].tokens2ids(tokens)
                data[text_name] = np.array(text_ints, dtype=np.int64)
        assert check_return_type(data)
        return data


@@ -685,7 +675,6 @@
    def __call__(
            self, uid: str, data: Dict[str, Union[list, str, np.ndarray]]
    ) -> Dict[str, Union[list, np.ndarray]]:
        assert check_argument_types()
        # Split words.
        if isinstance(data[self.text_name], str):
            split_text = self.split_words(data[self.text_name])

 funasr/export/export_model.py

@@ -1,7 +1,6 @@
import json
from typing import Union, Dict
from pathlib import Path
from typeguard import check_argument_types

import os
import logging
@@ -26,7 +25,6 @@
        calib_num: int = 200,
        model_revision: str = None,
    ):
        assert check_argument_types()
        self.set_all_random_seed(0)

        self.cache_dir = cache_dir

 funasr/fileio/datadir_writer.py

@@ -2,8 +2,6 @@
from typing import Union
import warnings

from typeguard import check_argument_types
from typeguard import check_return_type


class DatadirWriter:
@@ -20,7 +18,6 @@
    """

    def __init__(self, p: Union[Path, str]):
        assert check_argument_types()
        self.path = Path(p)
        self.chilidren = {}
        self.fd = None
@@ -31,7 +28,6 @@
        return self

    def __getitem__(self, key: str) -> "DatadirWriter":
        assert check_argument_types()
        if self.fd is not None:
            raise RuntimeError("This writer points out a file")

@@ -41,11 +37,9 @@
            self.has_children = True

        retval = self.chilidren[key]
        assert check_return_type(retval)
        return retval

    def __setitem__(self, key: str, value: str):
        assert check_argument_types()
        if self.has_children:
            raise RuntimeError("This writer points out a directory")
        if key in self.keys:

 funasr/fileio/npy_scp.py

@@ -3,7 +3,6 @@
from typing import Union

import numpy as np
from typeguard import check_argument_types

from funasr.fileio.read_text import read_2column_text

@@ -25,7 +24,6 @@
    """

    def __init__(self, outdir: Union[Path, str], scpfile: Union[Path, str]):
        assert check_argument_types()
        self.dir = Path(outdir)
        self.dir.mkdir(parents=True, exist_ok=True)
        scpfile = Path(scpfile)
@@ -73,7 +71,6 @@
    """

    def __init__(self, fname: Union[Path, str]):
        assert check_argument_types()
        self.fname = Path(fname)
        self.data = read_2column_text(fname)


 funasr/fileio/rand_gen_dataset.py

@@ -3,7 +3,6 @@
from typing import Union

import numpy as np
from typeguard import check_argument_types

from funasr.fileio.read_text import load_num_sequence_text

@@ -29,7 +28,6 @@
        dtype: Union[str, np.dtype] = "float32",
        loader_type: str = "csv_int",
    ):
        assert check_argument_types()
        shape_file = Path(shape_file)
        self.utt2shape = load_num_sequence_text(shape_file, loader_type)
        self.dtype = np.dtype(dtype)
@@ -68,7 +66,6 @@
        dtype: Union[str, np.dtype] = "int64",
        loader_type: str = "csv_int",
    ):
        assert check_argument_types()
        shape_file = Path(shape_file)
        self.utt2shape = load_num_sequence_text(shape_file, loader_type)
        self.dtype = np.dtype(dtype)

 funasr/fileio/read_text.py

@@ -4,7 +4,6 @@
from typing import List
from typing import Union

from typeguard import check_argument_types


def read_2column_text(path: Union[Path, str]) -> Dict[str, str]:
@@ -19,7 +18,6 @@
        {'key1': '/some/path/a.wav', 'key2': '/some/path/b.wav'}

    """
    assert check_argument_types()

    data = {}
    with Path(path).open("r", encoding="utf-8") as f:
@@ -47,7 +45,6 @@
        >>> d = load_num_sequence_text('text')
        >>> np.testing.assert_array_equal(d["key1"], np.array([1, 2, 3]))
    """
    assert check_argument_types()
    if loader_type == "text_int":
        delimiter = " "
        dtype = int

 funasr/fileio/sound_scp.py

@@ -6,7 +6,6 @@
import numpy as np
import soundfile
import librosa
from typeguard import check_argument_types

import torch
import torchaudio
@@ -106,7 +105,6 @@
        dest_sample_rate: int = 16000,
        speed_perturb: Union[list, tuple] = None,
    ):
        assert check_argument_types()
        self.fname = fname
        self.dtype = dtype
        self.always_2d = always_2d
@@ -179,7 +177,6 @@
        format="wav",
        dtype=None,
    ):
        assert check_argument_types()
        self.dir = Path(outdir)
        self.dir.mkdir(parents=True, exist_ok=True)
        scpfile = Path(scpfile)

 funasr/iterators/chunk_iter_factory.py

@@ -9,7 +9,6 @@

import numpy as np
import torch
from typeguard import check_argument_types

from funasr.iterators.abs_iter_factory import AbsIterFactory
from funasr.iterators.sequence_iter_factory import SequenceIterFactory
@@ -51,7 +50,6 @@
        collate_fn=None,
        pin_memory: bool = False,
    ):
        assert check_argument_types()
        assert all(len(x) == 1 for x in batches), "batch-size must be 1"

        self.per_sample_iter_factory = SequenceIterFactory(

 funasr/iterators/multiple_iter_factory.py

@@ -4,7 +4,6 @@
from typing import Iterator

import numpy as np
from typeguard import check_argument_types

from funasr.iterators.abs_iter_factory import AbsIterFactory

@@ -16,7 +15,6 @@
        seed: int = 0,
        shuffle: bool = False,
    ):
        assert check_argument_types()
        self.build_funcs = list(build_funcs)
        self.seed = seed
        self.shuffle = shuffle

 funasr/iterators/sequence_iter_factory.py

@@ -4,7 +4,6 @@

import numpy as np
from torch.utils.data import DataLoader
from typeguard import check_argument_types

from funasr.iterators.abs_iter_factory import AbsIterFactory
from funasr.samplers.abs_sampler import AbsSampler
@@ -46,7 +45,6 @@
        collate_fn=None,
        pin_memory: bool = False,
    ):
        assert check_argument_types()

        if not isinstance(batches, AbsSampler):
            self.sampler = RawSampler(batches)

 funasr/layers/global_mvn.py

@@ -4,7 +4,6 @@

import numpy as np
import torch
from typeguard import check_argument_types

from funasr.modules.nets_utils import make_pad_mask
from funasr.layers.abs_normalize import AbsNormalize
@@ -28,7 +27,6 @@
        norm_vars: bool = True,
        eps: float = 1.0e-20,
    ):
        assert check_argument_types()
        super().__init__()
        self.norm_means = norm_means
        self.norm_vars = norm_vars

 funasr/layers/label_aggregation.py

@@ -1,5 +1,4 @@
import torch
from typeguard import check_argument_types
from typing import Optional
from typing import Tuple

@@ -13,7 +12,6 @@
        hop_length: int = 128,
        center: bool = True,
    ):
        assert check_argument_types()
        super().__init__()

        self.win_length = win_length

 funasr/layers/mask_along_axis.py

@@ -1,6 +1,5 @@
import math
import torch
from typeguard import check_argument_types
from typing import Sequence
from typing import Union

@@ -147,7 +146,6 @@
        dim: Union[int, str] = "time",
        replace_with_zero: bool = True,
    ):
        assert check_argument_types()
        if isinstance(mask_width_range, int):
            mask_width_range = (0, mask_width_range)
        if len(mask_width_range) != 2:
@@ -214,7 +212,6 @@
        dim: Union[int, str] = "time",
        replace_with_zero: bool = True,
    ):
        assert check_argument_types()
        if isinstance(mask_width_ratio_range, float):
            mask_width_ratio_range = (0.0, mask_width_ratio_range)
        if len(mask_width_ratio_range) != 2:
@@ -283,7 +280,6 @@
        replace_with_zero: bool = True,
        lfr_rate: int = 1,
    ):
        assert check_argument_types()
        if isinstance(mask_width_range, int):
            mask_width_range = (0, mask_width_range)
        if len(mask_width_range) != 2:

 funasr/layers/sinc_conv.py

@@ -5,7 +5,6 @@
"""Sinc convolutions."""
import math
import torch
from typeguard import check_argument_types
from typing import Union


@@ -71,7 +70,6 @@
            window_func: Window function on the filter, one of ["hamming", "none"].
            fs (str, int, float): Sample rate of the input data
        """
        assert check_argument_types()
        super().__init__()
        window_funcs = {
            "none": self.none_window,
@@ -208,7 +206,6 @@
            torch.Tensor: Filter start frequencíes.
            torch.Tensor: Filter stop frequencies.
        """
        assert check_argument_types()
        # min and max bandpass edge frequencies
        min_frequency = torch.tensor(30.0)
        max_frequency = torch.tensor(fs * 0.5)
@@ -257,7 +254,6 @@
            torch.Tensor: Filter start frequencíes.
            torch.Tensor: Filter stop frequencíes.
        """
        assert check_argument_types()
        # min and max BARK center frequencies by approximation
        min_center_frequency = torch.tensor(70.0)
        max_center_frequency = torch.tensor(fs * 0.45)

 funasr/layers/stft.py

@@ -5,7 +5,6 @@

import torch
from torch_complex.tensor import ComplexTensor
from typeguard import check_argument_types

from funasr.modules.nets_utils import make_pad_mask
from funasr.layers.complex_utils import is_complex
@@ -30,7 +29,6 @@
        normalized: bool = False,
        onesided: bool = True,
    ):
        assert check_argument_types()
        super().__init__()
        self.n_fft = n_fft
        if win_length is None:

 funasr/layers/utterance_mvn.py

@@ -1,7 +1,6 @@
from typing import Tuple

import torch
from typeguard import check_argument_types

from funasr.modules.nets_utils import make_pad_mask
from funasr.layers.abs_normalize import AbsNormalize
@@ -14,7 +13,6 @@
        norm_vars: bool = False,
        eps: float = 1.0e-20,
    ):
        assert check_argument_types()
        super().__init__()
        self.norm_means = norm_means
        self.norm_vars = norm_vars

 funasr/main_funcs/average_nbest_models.py

@@ -8,7 +8,6 @@
from io import BytesIO

import torch
from typeguard import check_argument_types
from typing import Collection

from funasr.train.reporter import Reporter
@@ -34,7 +33,6 @@
        nbest: Number of best model files to be averaged
        suffix: A suffix added to the averaged model file name
    """
    assert check_argument_types()
    if isinstance(nbest, int):
        nbests = [nbest]
    else:

 funasr/main_funcs/collect_stats.py

@@ -11,7 +11,6 @@
import torch
from torch.nn.parallel import data_parallel
from torch.utils.data import DataLoader
from typeguard import check_argument_types

from funasr.fileio.datadir_writer import DatadirWriter
from funasr.fileio.npy_scp import NpyScpWriter
@@ -37,7 +36,6 @@
    This method is used before executing train().

    """
    assert check_argument_types()

    npy_scp_writers = {}
    for itr, mode in zip([train_iter, valid_iter], ["train", "valid"]):

 funasr/models/ctc.py

@@ -2,7 +2,6 @@

import torch
import torch.nn.functional as F
from typeguard import check_argument_types


class CTC(torch.nn.Module):
@@ -25,7 +24,6 @@
        reduce: bool = True,
        ignore_nan_grad: bool = True,
    ):
        assert check_argument_types()
        super().__init__()
        eprojs = encoder_output_size
        self.dropout_rate = dropout_rate
@@ -41,11 +39,6 @@
            if ignore_nan_grad:
                logging.warning("ignore_nan_grad option is not supported for warp_ctc")
            self.ctc_loss = warp_ctc.CTCLoss(size_average=True, reduce=reduce)

        elif self.ctc_type == "gtnctc":
            from espnet.nets.pytorch_backend.gtn_ctc import GTNCTCLossFunction

            self.ctc_loss = GTNCTCLossFunction.apply
        else:
            raise ValueError(
                f'ctc_type must be "builtin" or "warpctc": {self.ctc_type}'

 funasr/models/data2vec.py

@@ -10,7 +10,6 @@
from typing import Tuple

import torch
from typeguard import check_argument_types

from funasr.layers.abs_normalize import AbsNormalize
from funasr.models.encoder.abs_encoder import AbsEncoder
@@ -40,7 +39,6 @@
            preencoder: Optional[AbsPreEncoder],
            encoder: AbsEncoder,
    ):
        assert check_argument_types()

        super().__init__()


 funasr/models/decoder/contextual_decoder.py

@@ -7,7 +7,6 @@

from funasr.modules.streaming_utils import utils as myutils
from funasr.models.decoder.transformer_decoder import BaseTransformerDecoder
from typeguard import check_argument_types

from funasr.modules.attention import MultiHeadedAttentionSANMDecoder, MultiHeadedAttentionCrossAtt
from funasr.modules.embedding import PositionalEncoding
@@ -126,7 +125,6 @@
        kernel_size: int = 21,
        sanm_shfit: int = 0,
    ):
        assert check_argument_types()
        super().__init__(
            vocab_size=vocab_size,
            encoder_output_size=encoder_output_size,

 funasr/models/decoder/rnn_decoder.py

@@ -3,7 +3,6 @@
import numpy as np
import torch
import torch.nn.functional as F
from typeguard import check_argument_types

from funasr.modules.nets_utils import make_pad_mask
from funasr.modules.nets_utils import to_device
@@ -97,7 +96,6 @@
        att_conf: dict = get_default_kwargs(build_attention_list),
    ):
        # FIXME(kamo): The parts of num_spk should be refactored more more more
        assert check_argument_types()
        if rnn_type not in {"lstm", "gru"}:
            raise ValueError(f"Not supported: rnn_type={rnn_type}")


 funasr/models/decoder/rnnt_decoder.py

@@ -3,7 +3,6 @@
from typing import List, Optional, Tuple

import torch
from typeguard import check_argument_types

from funasr.modules.beam_search.beam_search_transducer import Hypothesis
from funasr.models.specaug.specaug import SpecAug
@@ -38,7 +37,6 @@
        """Construct a RNNDecoder object."""
        super().__init__()

        assert check_argument_types()

        if rnn_type not in ("lstm", "gru"):
            raise ValueError(f"Not supported: rnn_type={rnn_type}")

 funasr/models/decoder/sanm_decoder.py

@@ -7,7 +7,6 @@

from funasr.modules.streaming_utils import utils as myutils
from funasr.models.decoder.transformer_decoder import BaseTransformerDecoder
from typeguard import check_argument_types

from funasr.modules.attention import MultiHeadedAttentionSANMDecoder, MultiHeadedAttentionCrossAtt
from funasr.modules.embedding import PositionalEncoding
@@ -181,7 +180,6 @@
            tf2torch_tensor_name_prefix_tf: str = "seq2seq/decoder",
            embed_tensor_name_prefix_tf: str = None,
    ):
        assert check_argument_types()
        super().__init__(
            vocab_size=vocab_size,
            encoder_output_size=encoder_output_size,
@@ -838,7 +836,6 @@
        tf2torch_tensor_name_prefix_torch: str = "decoder",
        tf2torch_tensor_name_prefix_tf: str = "seq2seq/decoder",
    ):
        assert check_argument_types()
        super().__init__(
            vocab_size=vocab_size,
            encoder_output_size=encoder_output_size,

 funasr/models/decoder/transformer_decoder.py

@@ -9,7 +9,6 @@

import torch
from torch import nn
from typeguard import check_argument_types

from funasr.models.decoder.abs_decoder import AbsDecoder
from funasr.modules.attention import MultiHeadedAttention
@@ -184,7 +183,6 @@
            pos_enc_class=PositionalEncoding,
            normalize_before: bool = True,
    ):
        assert check_argument_types()
        super().__init__()
        attention_dim = encoder_output_size

@@ -373,7 +371,6 @@
            normalize_before: bool = True,
            concat_after: bool = False,
    ):
        assert check_argument_types()
        super().__init__(
            vocab_size=vocab_size,
            encoder_output_size=encoder_output_size,
@@ -428,7 +425,6 @@
            concat_after: bool = False,
            embeds_id: int = -1,
    ):
        assert check_argument_types()
        super().__init__(
            vocab_size=vocab_size,
            encoder_output_size=encoder_output_size,
@@ -540,7 +536,6 @@
            conv_kernel_length: Sequence[int] = (11, 11, 11, 11, 11, 11),
            conv_usebias: int = False,
    ):
        assert check_argument_types()
        if len(conv_kernel_length) != num_blocks:
            raise ValueError(
                "conv_kernel_length must have equal number of values to num_blocks: "
@@ -602,7 +597,6 @@
            conv_kernel_length: Sequence[int] = (11, 11, 11, 11, 11, 11),
            conv_usebias: int = False,
    ):
        assert check_argument_types()
        if len(conv_kernel_length) != num_blocks:
            raise ValueError(
                "conv_kernel_length must have equal number of values to num_blocks: "
@@ -664,7 +658,6 @@
            conv_kernel_length: Sequence[int] = (11, 11, 11, 11, 11, 11),
            conv_usebias: int = False,
    ):
        assert check_argument_types()
        if len(conv_kernel_length) != num_blocks:
            raise ValueError(
                "conv_kernel_length must have equal number of values to num_blocks: "
@@ -726,7 +719,6 @@
            conv_kernel_length: Sequence[int] = (11, 11, 11, 11, 11, 11),
            conv_usebias: int = False,
    ):
        assert check_argument_types()
        if len(conv_kernel_length) != num_blocks:
            raise ValueError(
                "conv_kernel_length must have equal number of values to num_blocks: "
@@ -781,7 +773,6 @@
        pos_enc_class=PositionalEncoding,
        normalize_before: bool = True,
    ):
        assert check_argument_types()
        super().__init__()
        attention_dim = encoder_output_size

@@ -955,7 +946,6 @@
        normalize_before: bool = True,
        concat_after: bool = False,
    ):
        assert check_argument_types()
        super().__init__(
            vocab_size=vocab_size,
            encoder_output_size=encoder_output_size,

 funasr/models/e2e_asr.py

@@ -11,7 +11,6 @@
from typing import Union

import torch
from typeguard import check_argument_types

from funasr.layers.abs_normalize import AbsNormalize
from funasr.losses.label_smoothing_loss import (
@@ -65,7 +64,6 @@
            preencoder: Optional[AbsPreEncoder] = None,
            postencoder: Optional[AbsPostEncoder] = None,
    ):
        assert check_argument_types()
        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
        assert 0.0 <= interctc_weight < 1.0, interctc_weight


 funasr/models/e2e_asr_contextual_paraformer.py

@@ -9,7 +9,6 @@
import numpy as np

import torch
from typeguard import check_argument_types

from funasr.layers.abs_normalize import AbsNormalize
from funasr.models.ctc import CTC
@@ -73,7 +72,6 @@
        preencoder: Optional[AbsPreEncoder] = None,
        postencoder: Optional[AbsPostEncoder] = None,
    ):
        assert check_argument_types()
        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
        assert 0.0 <= interctc_weight < 1.0, interctc_weight


 funasr/models/e2e_asr_mfcca.py

@@ -7,7 +7,6 @@
from typing import Union
import logging
import torch
from typeguard import check_argument_types

from funasr.modules.e2e_asr_common import ErrorCalculator
from funasr.modules.nets_utils import th_accuracy
@@ -65,7 +64,6 @@
            sym_blank: str = "<blank>",
            preencoder: Optional[AbsPreEncoder] = None,
    ):
        assert check_argument_types()
        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
        assert rnnt_decoder is None, "Not implemented"


 funasr/models/e2e_asr_paraformer.py

@@ -10,7 +10,6 @@
import torch
import random
import numpy as np
from typeguard import check_argument_types

from funasr.layers.abs_normalize import AbsNormalize
from funasr.losses.label_smoothing_loss import (
@@ -80,7 +79,6 @@
            postencoder: Optional[AbsPostEncoder] = None,
            use_1st_decoder_loss: bool = False,
    ):
        assert check_argument_types()
        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
        assert 0.0 <= interctc_weight < 1.0, interctc_weight

@@ -645,7 +643,6 @@
            postencoder: Optional[AbsPostEncoder] = None,
            use_1st_decoder_loss: bool = False,
    ):
        assert check_argument_types()
        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
        assert 0.0 <= interctc_weight < 1.0, interctc_weight

@@ -1255,7 +1252,6 @@
            preencoder: Optional[AbsPreEncoder] = None,
            postencoder: Optional[AbsPostEncoder] = None,
    ):
        assert check_argument_types()
        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
        assert 0.0 <= interctc_weight < 1.0, interctc_weight

@@ -1528,7 +1524,6 @@
            preencoder: Optional[AbsPreEncoder] = None,
            postencoder: Optional[AbsPostEncoder] = None,
    ):
        assert check_argument_types()
        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
        assert 0.0 <= interctc_weight < 1.0, interctc_weight

@@ -1806,7 +1801,6 @@
            preencoder: Optional[AbsPreEncoder] = None,
            postencoder: Optional[AbsPostEncoder] = None,
    ):
        assert check_argument_types()
        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
        assert 0.0 <= interctc_weight < 1.0, interctc_weight


 funasr/models/e2e_asr_transducer.py

@@ -6,7 +6,6 @@

import torch
from packaging.version import parse as V
from typeguard import check_argument_types
from funasr.losses.label_smoothing_loss import (
    LabelSmoothingLoss,  # noqa: H301
)
@@ -85,8 +84,6 @@
    ) -> None:
        """Construct an ESPnetASRTransducerModel object."""
        super().__init__()

        assert check_argument_types()

        # The following labels ID are reserved: 0 (blank) and vocab_size - 1 (sos/eos)
        self.blank_id = 0
@@ -546,8 +543,6 @@
        """Construct an ESPnetASRTransducerModel object."""
        super().__init__()

        assert check_argument_types()

        # The following labels ID are reserved: 0 (blank) and vocab_size - 1 (sos/eos)
        self.blank_id = 0

@@ -713,7 +708,7 @@
            loss_lm = self._calc_lm_loss(decoder_out, target)

        loss_trans = loss_trans_utt + loss_trans_chunk
        loss_ctc = loss_ctc + loss_ctc_chunk 
        loss_ctc = loss_ctc + loss_ctc_chunk
        loss_ctc = loss_att + loss_att_chunk

        loss = (
@@ -1018,4 +1013,4 @@
            ignore_label=self.ignore_id,
        )

        return loss_att, acc_att
        return loss_att, acc_att

 funasr/models/e2e_diar_eend_ola.py

@@ -9,7 +9,6 @@
import numpy as np
import torch
import torch.nn as  nn
from typeguard import check_argument_types

from funasr.models.frontend.wav_frontend import WavFrontendMel23
from funasr.modules.eend_ola.encoder import EENDOLATransformerEncoder
@@ -48,7 +47,6 @@
            mapping_dict=None,
            **kwargs,
    ):
        assert check_argument_types()

        super().__init__()
        self.frontend = frontend

 funasr/models/e2e_diar_sond.py

@@ -12,7 +12,6 @@
import numpy as np
import torch
from torch.nn import functional as F
from typeguard import check_argument_types

from funasr.modules.nets_utils import to_device
from funasr.modules.nets_utils import make_pad_mask
@@ -66,7 +65,6 @@
        inter_score_loss_weight: float = 0.0,
        inputs_type: str = "raw",
    ):
        assert check_argument_types()

        super().__init__()


 funasr/models/e2e_sa_asr.py

@@ -12,7 +12,6 @@

import torch
import torch.nn.functional as F
from typeguard import check_argument_types

from funasr.layers.abs_normalize import AbsNormalize
from funasr.losses.label_smoothing_loss import (
@@ -67,7 +66,6 @@
            sym_blank: str = "<blank>",
            extract_feats_in_collect_stats: bool = True,
    ):
        assert check_argument_types()
        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
        assert 0.0 <= interctc_weight < 1.0, interctc_weight


 funasr/models/e2e_sv.py

@@ -12,7 +12,6 @@
from typing import Union

import torch
from typeguard import check_argument_types

from funasr.layers.abs_normalize import AbsNormalize
from funasr.losses.label_smoothing_loss import (
@@ -56,7 +55,6 @@
            pooling_layer: torch.nn.Module,
            decoder: AbsDecoder,
    ):
        assert check_argument_types()

        super().__init__()
        # note that eos is the same as sos (equivalent ID)

 funasr/models/e2e_tp.py

@@ -9,7 +9,6 @@

import torch
import numpy as np
from typeguard import check_argument_types

from funasr.models.encoder.abs_encoder import AbsEncoder
from funasr.models.frontend.abs_frontend import AbsFrontend
@@ -42,7 +41,6 @@
            predictor_bias: int = 0,
            token_list=None,
    ):
        assert check_argument_types()

        super().__init__()
        # note that eos is the same as sos (equivalent ID)

 funasr/models/e2e_uni_asr.py

@@ -8,7 +8,6 @@
from typing import Union

import torch
from typeguard import check_argument_types

from funasr.models.e2e_asr_common import ErrorCalculator
from funasr.modules.nets_utils import th_accuracy
@@ -82,7 +81,6 @@
        postencoder: Optional[AbsPostEncoder] = None,
        encoder1_encoder2_joint_training: bool = True,
    ):
        assert check_argument_types()
        assert 0.0 <= ctc_weight <= 1.0, ctc_weight
        assert 0.0 <= interctc_weight < 1.0, interctc_weight


 funasr/models/encoder/conformer_encoder.py

@@ -12,7 +12,6 @@

import torch
from torch import nn
from typeguard import check_argument_types

from funasr.models.ctc import CTC
from funasr.modules.attention import (
@@ -533,7 +532,6 @@
            interctc_use_conditioning: bool = False,
            stochastic_depth_rate: Union[float, List[float]] = 0.0,
    ):
        assert check_argument_types()
        super().__init__()
        self._output_size = output_size

@@ -943,7 +941,6 @@
        """Construct an Encoder object."""
        super().__init__()

        assert check_argument_types()

        self.embed = StreamingConvInput(
            input_size,

 funasr/models/encoder/data2vec_encoder.py

@@ -10,7 +10,6 @@
import torch.distributed as dist
import torch.nn as nn
import torch.nn.functional as F
from typeguard import check_argument_types

from funasr.models.encoder.abs_encoder import AbsEncoder
from funasr.modules.data2vec.data_utils import compute_mask_indices
@@ -97,7 +96,6 @@
            # FP16 optimization
            required_seq_len_multiple: int = 2,
    ):
        assert check_argument_types()
        super().__init__()

        # ConvFeatureExtractionModel

 funasr/models/encoder/mfcca_encoder.py

@@ -5,7 +5,6 @@
import torch
from torch import nn

from typeguard import check_argument_types

from funasr.models.encoder.encoder_layer_mfcca import EncoderLayer
from funasr.modules.nets_utils import get_activation
@@ -161,7 +160,6 @@
            cnn_module_kernel: int = 31,
            padding_idx: int = -1,
    ):
        assert check_argument_types()
        super().__init__()
        self._output_size = output_size


 funasr/models/encoder/opennmt_encoders/conv_encoder.py

@@ -7,7 +7,6 @@
import torch
import torch.nn as nn
from torch.nn import functional as F
from typeguard import check_argument_types
import numpy as np
from funasr.modules.nets_utils import make_pad_mask
from funasr.modules.layer_norm import LayerNorm
@@ -90,7 +89,6 @@
            tf2torch_tensor_name_prefix_torch: str = "speaker_encoder",
            tf2torch_tensor_name_prefix_tf: str = "EAND/speaker_encoder",
    ):
        assert check_argument_types()
        super().__init__()
        self._output_size = num_units


 funasr/models/encoder/opennmt_encoders/fsmn_encoder.py

@@ -7,7 +7,6 @@
import torch
import torch.nn as nn
from torch.nn import functional as F
from typeguard import check_argument_types
import numpy as np
from funasr.modules.nets_utils import make_pad_mask
from funasr.modules.layer_norm import LayerNorm

 funasr/models/encoder/opennmt_encoders/self_attention_encoder.py

@@ -7,7 +7,6 @@
import torch
import torch.nn as nn
from funasr.modules.streaming_utils.chunk_utilis import overlap_chunk
from typeguard import check_argument_types
import numpy as np
from funasr.modules.nets_utils import make_pad_mask
from funasr.modules.attention import MultiHeadSelfAttention, MultiHeadedAttentionSANM
@@ -144,7 +143,6 @@
        tf2torch_tensor_name_prefix_tf: str = "seq2seq/encoder",
        out_units=None,
    ):
        assert check_argument_types()
        super().__init__()
        self._output_size = output_size


 funasr/models/encoder/rnn_encoder.py

@@ -5,7 +5,6 @@

import numpy as np
import torch
from typeguard import check_argument_types

from funasr.modules.nets_utils import make_pad_mask
from funasr.modules.rnn.encoders import RNN
@@ -37,7 +36,6 @@
        dropout: float = 0.0,
        subsample: Optional[Sequence[int]] = (2, 2, 1, 1),
    ):
        assert check_argument_types()
        super().__init__()
        self._output_size = output_size
        self.rnn_type = rnn_type

 funasr/models/encoder/sanm_encoder.py

@@ -8,7 +8,6 @@
import torch.nn as nn
import torch.nn.functional as F
from funasr.modules.streaming_utils.chunk_utilis import overlap_chunk
from typeguard import check_argument_types
import numpy as np
from funasr.torch_utils.device_funcs import to_device
from funasr.modules.nets_utils import make_pad_mask
@@ -151,7 +150,6 @@
        tf2torch_tensor_name_prefix_torch: str = "encoder",
        tf2torch_tensor_name_prefix_tf: str = "seq2seq/encoder",
    ):
        assert check_argument_types()
        super().__init__()
        self._output_size = output_size

@@ -601,7 +599,6 @@
            tf2torch_tensor_name_prefix_torch: str = "encoder",
            tf2torch_tensor_name_prefix_tf: str = "seq2seq/encoder",
    ):
        assert check_argument_types()
        super().__init__()
        self._output_size = output_size

@@ -1060,7 +1057,6 @@
        sanm_shfit : int = 0,
        selfattention_layer_type: str = "sanm",
    ):
        assert check_argument_types()
        super().__init__()
        self._output_size = output_size


 funasr/models/encoder/transformer_encoder.py

@@ -9,7 +9,6 @@

import torch
from torch import nn
from typeguard import check_argument_types
import logging

from funasr.models.ctc import CTC
@@ -189,7 +188,6 @@
            interctc_layer_idx: List[int] = [],
            interctc_use_conditioning: bool = False,
    ):
        assert check_argument_types()
        super().__init__()
        self._output_size = output_size


 funasr/models/frontend/default.py

@@ -7,7 +7,6 @@
import numpy as np
import torch
from torch_complex.tensor import ComplexTensor
from typeguard import check_argument_types

from funasr.layers.log_mel import LogMel
from funasr.layers.stft import Stft
@@ -40,7 +39,6 @@
            apply_stft: bool = True,
            use_channel: int = None,
    ):
        assert check_argument_types()
        super().__init__()
        if isinstance(fs, str):
            fs = humanfriendly.parse_size(fs)
@@ -167,7 +165,6 @@
            cmvn_file: str = None,
            mc: bool = True
    ):
        assert check_argument_types()
        super().__init__()
        if isinstance(fs, str):
            fs = humanfriendly.parse_size(fs)

 funasr/models/frontend/fused.py

@@ -3,7 +3,6 @@
from funasr.models.frontend.s3prl import S3prlFrontend
import numpy as np
import torch
from typeguard import check_argument_types
from typing import Tuple


@@ -12,7 +11,6 @@
        self, frontends=None, align_method="linear_projection", proj_dim=100, fs=16000
    ):

        assert check_argument_types()
        super().__init__()
        self.align_method = (
            align_method  # fusing method : linear_projection only for now

 funasr/models/frontend/s3prl.py

@@ -8,7 +8,6 @@

import humanfriendly
import torch
from typeguard import check_argument_types

from funasr.models.frontend.abs_frontend import AbsFrontend
from funasr.modules.frontends.frontend import Frontend
@@ -37,7 +36,6 @@
            download_dir: str = None,
            multilayer_feature: bool = False,
    ):
        assert check_argument_types()
        super().__init__()
        if isinstance(fs, str):
            fs = humanfriendly.parse_size(fs)

 funasr/models/frontend/wav_frontend.py

@@ -6,7 +6,6 @@
import torch
import torchaudio.compliance.kaldi as kaldi
from torch.nn.utils.rnn import pad_sequence
from typeguard import check_argument_types

import funasr.models.frontend.eend_ola_feature as eend_ola_feature
from funasr.models.frontend.abs_frontend import AbsFrontend
@@ -95,7 +94,6 @@
            snip_edges: bool = True,
            upsacle_samples: bool = True,
    ):
        assert check_argument_types()
        super().__init__()
        self.fs = fs
        self.window = window
@@ -227,7 +225,6 @@
            snip_edges: bool = True,
            upsacle_samples: bool = True,
    ):
        assert check_argument_types()
        super().__init__()
        self.fs = fs
        self.window = window
@@ -466,7 +463,6 @@
            lfr_m: int = 1,
            lfr_n: int = 1,
    ):
        assert check_argument_types()
        super().__init__()
        self.fs = fs
        self.frame_length = frame_length

 funasr/models/frontend/windowing.py

@@ -6,7 +6,6 @@

from funasr.models.frontend.abs_frontend import AbsFrontend
import torch
from typeguard import check_argument_types
from typing import Tuple


@@ -38,7 +37,6 @@
            padding: Padding (placeholder, currently not implemented).
            fs:  Sampling rate (placeholder for compatibility, not used).
        """
        assert check_argument_types()
        super().__init__()
        self.fs = fs
        self.win_length = win_length

 funasr/models/postencoder/hugging_face_transformers_postencoder.py

@@ -6,7 +6,6 @@

from funasr.modules.nets_utils import make_pad_mask
from funasr.models.postencoder.abs_postencoder import AbsPostEncoder
from typeguard import check_argument_types
from typing import Tuple

import copy
@@ -30,7 +29,6 @@
        model_name_or_path: str,
    ):
        """Initialize the module."""
        assert check_argument_types()
        super().__init__()

        if not is_transformers_available:

 funasr/models/preencoder/linear.py

@@ -5,7 +5,6 @@
"""Linear Projection."""

from funasr.models.preencoder.abs_preencoder import AbsPreEncoder
from typeguard import check_argument_types
from typing import Tuple

import torch
@@ -20,7 +19,6 @@
        output_size: int,
    ):
        """Initialize the module."""
        assert check_argument_types()
        super().__init__()

        self.output_dim = output_size

 funasr/models/preencoder/sinc.py

@@ -10,7 +10,6 @@
from funasr.layers.sinc_conv import SincConv
import humanfriendly
import torch
from typeguard import check_argument_types
from typing import Optional
from typing import Tuple
from typing import Union
@@ -60,7 +59,6 @@
            windowing_type: Choice of windowing function.
            scale_type:  Choice of filter-bank initialization scale.
        """
        assert check_argument_types()
        super().__init__()
        if isinstance(fs, str):
            fs = humanfriendly.parse_size(fs)
@@ -268,7 +266,6 @@
            dropout_probability: Dropout probability.
            shape (tuple, list): Shape of input tensors.
        """
        assert check_argument_types()
        super().__init__()
        if shape is None:
            shape = (0, 2, 1)

 funasr/models/seq_rnn_lm.py

@@ -4,7 +4,6 @@

import torch
import torch.nn as nn
from typeguard import check_argument_types
from funasr.train.abs_model import AbsLM


@@ -27,7 +26,6 @@
        rnn_type: str = "lstm",
        ignore_id: int = 0,
    ):
        assert check_argument_types()
        super().__init__()

        ninp = unit

 funasr/modules/eend_ola/utils/report.py

@@ -2,7 +2,7 @@
import numpy as np
import time
import torch
from eend.utils.power import create_powerlabel
from funasr.modules.eend_ola.utils.power import create_powerlabel
from itertools import combinations

metrics = [

 funasr/optimizers/sgd.py

@@ -1,5 +1,4 @@
import torch
from typeguard import check_argument_types


class SGD(torch.optim.SGD):
@@ -21,7 +20,6 @@
        weight_decay: float = 0.0,
        nesterov: bool = False,
    ):
        assert check_argument_types()
        super().__init__(
            params,
            lr=lr,

 funasr/runtime/docs/SDK_advanced_guide_cn.md

New file
@@ -0,0 +1,256 @@
# FunASR离线文件转写服务开发指南

FunASR提供可一键本地或者云端服务器部署的中文离线文件转写服务，内核为FunASR已开源runtime-SDK。FunASR-runtime结合了达摩院语音实验室在Modelscope社区开源的语音端点检测(VAD)、Paraformer-large语音识别(ASR)、标点检测(PUNC) 等相关能力，可以准确、高效的对音频进行高并发转写。

本文档为FunASR离线文件转写服务开发指南。如果您想快速体验离线文件转写服务，请参考FunASR离线文件转写服务一键部署示例（[点击此处](./SDK_tutorial.md)）。

## Docker安装

下述步骤为手动安装docker及docker镜像的步骤，如您docker镜像已启动，可以忽略本步骤：

### docker环境安装
```shell
# Ubuntu：
curl -fsSL https://test.docker.com -o test-docker.sh 
sudo sh test-docker.sh 
# Debian：
curl -fsSL https://get.docker.com -o get-docker.sh 
sudo sh get-docker.sh 
# CentOS：
curl -fsSL https://get.docker.com | bash -s docker --mirror Aliyun 
# MacOS：
brew install --cask --appdir=/Applications docker
```

安装详见：https://alibaba-damo-academy.github.io/FunASR/en/installation/docker.html

### docker启动

```shell
sudo systemctl start docker
```

### 镜像拉取及启动

通过下述命令拉取并启动FunASR runtime-SDK的docker镜像：

```shell
sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.0.1

sudo docker run -p 10095:10095 -it --privileged=true -v /root:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.0.1
```

命令参数介绍：
```text
-p <宿主机端口>:<映射到docker端口>
如示例，宿主机(ecs)端口10095映射到docker端口10095上。前提是确保ecs安全规则打开了10095端口。
-v <宿主机路径>:<挂载至docker路径>
如示例，宿主机路径/root挂载至docker路径/workspace/models
```


## 服务端启动

docker启动之后，启动 funasr-wss-server服务程序：

funasr-wss-server支持从Modelscope下载模型，需要设置同时设置模型下载地址（--download-model-dir）及model ID（--model-dir、--vad-dir、--punc-dir）,示例如下：
```shell
cd /workspace/FunASR/funasr/runtime/websocket/build/bin
./funasr-wss-server  \
  --download-model-dir /workspace/models \
  --model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx \
  --vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx \
  --punc-dir damo/punc_ct-transformer_zh-cn-common-vocab272727-onnx \
  --decoder-thread-num 32 \
  --io-thread-num  8 \
  --port 10095 \
  --certfile  ../../../ssl_key/server.crt \
  --keyfile ../../../ssl_key/server.key
 ```
命令参数介绍：
```text
--download-model-dir #模型下载地址，通过设置model ID从Modelscope下载模型
--model-dir # modelscope model ID
--quantize  # True为量化ASR模型，False为非量化ASR模型，默认是True
--vad-dir # modelscope model ID
--vad-quant  # True为量化VAD模型，False为非量化VAD模型，默认是True
--punc-dir # modelscope model ID
--punc-quant  # True为量化PUNC模型，False为非量化PUNC模型，默认是True
--port # 服务端监听的端口号，默认为 10095
--decoder-thread-num # 服务端启动的推理线程数，默认为 8
--io-thread-num # 服务端启动的IO线程数，默认为 1
--certfile <string> # ssl的证书文件，默认为：../../../ssl_key/server.crt
--keyfile <string> # ssl的密钥文件，默认为：../../../ssl_key/server.key
```

funasr-wss-server同时也支持从本地路径加载模型（本地模型资源准备详见[模型资源准备](#anchor-1)），需要设置模型本地路径（--download-model-dir）示例如下：
```shell
cd /workspace/FunASR/funasr/runtime/websocket/build/bin

./funasr-wss-server  \
  --model-dir /workspace/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx \
  --vad-dir /workspace/models/damo/speech_fsmn_vad_zh-cn-16k-common-onnx \
  --punc-dir /workspace/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-onnx \
  --decoder-thread-num 32 \
  --io-thread-num  8 \
  --port 10095 \
  --certfile  ../../../ssl_key/server.crt \
  --keyfile ../../../ssl_key/server.key
 ```
命令参数介绍：
```text
--model-dir # ASR模型路径，默认为：/workspace/models/asr
--quantize  # True为量化ASR模型，False为非量化ASR模型，默认是True
--vad-dir # VAD模型路径，默认为：/workspace/models/vad
--vad-quant  # True为量化VAD模型，False为非量化VAD模型，默认是True
--punc-dir # PUNC模型路径，默认为：/workspace/models/punc
--punc-quant  # True为量化PUNC模型，False为非量化PUNC模型，默认是True
--port # 服务端监听的端口号，默认为 10095
--decoder-thread-num # 服务端启动的推理线程数，默认为 8
--io-thread-num # 服务端启动的IO线程数，默认为 1
--certfile <string> # ssl的证书文件，默认为：../../../ssl_key/server.crt
--keyfile <string> # ssl的密钥文件，默认为：../../../ssl_key/server.key
```

## <a id="anchor-1">模型资源准备</a>

如果您选择通过funasr-wss-server从Modelscope下载模型，可以跳过本步骤。

FunASR离线文件转写服务中的vad、asr和punc模型资源均来自Modelscope，模型地址详见下表：

| 模型 | Modelscope链接                                                                                                     |
|------|------------------------------------------------------------------------------------------------------------------|
| VAD  | https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary |
| ASR  | https://www.modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary                           |
| PUNC | https://www.modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/summary               |

离线文件转写服务中部署的是量化后的ONNX模型，下面介绍下如何导出ONNX模型及其量化：您可以选择从Modelscope导出ONNX模型、从本地文件导出ONNX模型或者从finetune后的资源导出模型：

### 从Modelscope导出ONNX模型

从Modelscope网站下载对应model name的模型，然后导出量化后的ONNX模型：

```shell
python -m funasr.export.export_model --model-name damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir ./export --type onnx --quantize True
```

命令参数介绍：
```text
--model-name  Modelscope上的模型名称，例如damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
--export-dir  ONNX模型导出地址
--type 模型类型，目前支持 ONNX、torch
--quantize  int8模型量化
```

### 从本地文件导出ONNX模型

设置model name为模型本地路径，导出量化后的ONNX模型：

```shell
python -m funasr.export.export_model --model-name /workspace/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir ./export --type onnx --quantize True
```
命令参数介绍：
```text
--model-name  模型本地路径，例如/workspace/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
--export-dir  ONNX模型导出地址
--type 模型类型，目前支持 ONNX、torch
--quantize  int8模型量化
```

### 从finetune后的资源导出模型

假如您想部署finetune后的模型，可以参考如下步骤：

将您finetune后需要部署的模型（例如10epoch.pb），重命名为model.pb，并将原modelscope中模型model.pb替换掉，假如替换后的模型路径为/path/to/finetune/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch，通过下述命令把finetune后的模型转成onnx模型：

```shell
python -m funasr.export.export_model --model-name /path/to/finetune/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir ./export --type onnx --quantize True
```

## 客户端启动

在服务器上完成FunASR离线文件转写服务部署以后，可以通过如下的步骤来测试和使用离线文件转写服务。目前FunASR-bin支持多种方式启动客户端，如下是基于python-client、c++-client的命令行实例及自定义客户端Websocket通信协议：

### python-client
```shell
python wss_client_asr.py --host "127.0.0.1" --port 10095 --mode offline --audio_in "./data/wav.scp" --send_without_sleep --output_dir "./results"
```
命令参数介绍：
```text
--host # 服务端ip地址，本机测试可设置为 127.0.0.1
--port # 服务端监听端口号
--audio_in # 音频输入，输入可以是：wav路径 或者 wav.scp路径（kaldi格式的wav list，wav_id \t wav_path）
--output_dir # 识别结果输出路径
--ssl # 是否使用SSL加密，默认使用
--mode # offline模式
```

### c++-client：
```shell
. /funasr-wss-client --server-ip 127.0.0.1 --port 10095 --wav-path test.wav --thread-num 1 --is-ssl 1
```
命令参数介绍：
```text
--server-ip # 服务端ip地址，本机测试可设置为 127.0.0.1
--port # 服务端监听端口号
--wav-path # 音频输入，输入可以是：wav路径 或者 wav.scp路径（kaldi格式的wav list，wav_id \t wav_path）
--thread-num # 客户端线程数
--is-ssl # 是否使用SSL加密，默认使用
```

### 自定义客户端：

如果您想定义自己的client，websocket通信协议为：

```text
# 首次通信
{"mode": "offline", "wav_name": wav_name, "is_speaking": True}
# 发送wav数据
bytes数据
# 发送结束标志
{"is_speaking": False}
```

## 如何定制服务部署

FunASR-runtime的代码已开源，如果服务端和客户端不能很好的满足您的需求，您可以根据自己的需求进行进一步的开发：
### c++ 客户端：

https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/websocket

### python 客户端：

https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/websocket
### c++ 服务端：

#### VAD
```c++
// VAD模型的使用分为FsmnVadInit和FsmnVadInfer两个步骤：
FUNASR_HANDLE vad_hanlde=FsmnVadInit(model_path, thread_num);
// 其中：model_path 包含"model-dir"、"quantize"，thread_num为onnx线程数；
FUNASR_RESULT result=FsmnVadInfer(vad_hanlde, wav_file.c_str(), NULL, 16000);
// 其中：vad_hanlde为FunOfflineInit返回值，wav_file为音频路径，sampling_rate为采样率(默认16k)
```

使用示例详见：https://github.com/alibaba-damo-academy/FunASR/blob/main/funasr/runtime/onnxruntime/bin/funasr-onnx-offline-vad.cpp

#### ASR
```text
// ASR模型的使用分为FunOfflineInit和FunOfflineInfer两个步骤：
FUNASR_HANDLE asr_hanlde=FunOfflineInit(model_path, thread_num);
// 其中：model_path 包含"model-dir"、"quantize"，thread_num为onnx线程数；
FUNASR_RESULT result=FunOfflineInfer(asr_hanlde, wav_file.c_str(), RASR_NONE, NULL, 16000);
// 其中：asr_hanlde为FunOfflineInit返回值，wav_file为音频路径，sampling_rate为采样率(默认16k)
```

使用示例详见：https://github.com/alibaba-damo-academy/FunASR/blob/main/funasr/runtime/onnxruntime/bin/funasr-onnx-offline.cpp

#### PUNC
```text
// PUNC模型的使用分为CTTransformerInit和CTTransformerInfer两个步骤：
FUNASR_HANDLE punc_hanlde=CTTransformerInit(model_path, thread_num);
// 其中：model_path 包含"model-dir"、"quantize"，thread_num为onnx线程数；
FUNASR_RESULT result=CTTransformerInfer(punc_hanlde, txt_str.c_str(), RASR_NONE, NULL);
// 其中：punc_hanlde为CTTransformerInit返回值，txt_str为文本
```
使用示例详见：https://github.com/alibaba-damo-academy/FunASR/blob/main/funasr/runtime/onnxruntime/bin/funasr-onnx-offline-punc.cpp

 funasr/runtime/docs/SDK_tutorial_cn.md

New file
@@ -0,0 +1,328 @@
# FunASR离线文件转写服务便捷部署教程

FunASR提供可便捷本地或者云端服务器部署的离线文件转写服务，内核为FunASR已开源runtime-SDK。集成了达摩院语音实验室在Modelscope社区开源的语音端点检测(VAD)、Paraformer-large语音识别(ASR)、标点恢复(PUNC) 等相关能力，可以准确、高效的对音频进行高并发转写。

## 环境安装与启动服务

环境准备与配置（[点击此处](./aliyun_server_tutorial.md)）
### 获得脚本工具并一键部署

通过以下命令运行一键部署服务，按照提示逐步完成FunASR runtime-SDK服务的部署和运行。目前暂时仅支持Linux环境，其他环境参考文档[高阶开发指南]()。
受限于网络，funasr-runtime-deploy.sh一键部署工具的下载可能不顺利，遇到数秒还未下载进入一键部署工具的情况，请Ctrl + C 终止后再次运行以下命令。

```shell
curl -O https://raw.githubusercontent.com/alibaba-damo-academy/FunASR-APP/main/TransAudio/funasr-runtime-deploy.sh ; sudo bash funasr-runtime-deploy.sh install
```

#### 启动过程配置详解

##### 选择FunASR Docker镜像
推荐选择latest使用我们的最新镜像，也可选择历史版本。
```text
[1/10]
  Please choose the Docker image.
    1) registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-latest
    2) registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.0.1
  Enter your choice: 1
  You have chosen the Docker image: registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-latest
```

##### 选择ASR/VAD/PUNC模型

你可以选择ModelScope中的模型，也可以选<model_name>自行填入ModelScope中的模型名，将会在Docker运行时自动下载。同时也可以选择<model_path>填入宿主机中的本地模型路径。

```text
[2/10]
  Please input [y/n] to confirm whether to automatically download model_id in ModelScope or use a local model.
  [y] With the model in ModelScope, the model will be automatically downloaded to Docker(/workspace/models).
  [n] Use the models on the localhost, the directory where the model is located will be mapped to Docker.
  Setting confirmation[Y/n]: 
  You have chosen to use the model in ModelScope, please set the model ID in the next steps, and the model will be automatically downloaded during the run.

  [2.1/10]
    Please select ASR model_id in ModelScope from the list below.
    1) damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
    2) model_name
    3) model_path
  Enter your choice: 1
    The model ID is damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
    The model dir in Docker is /workspace/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx

  [2.2/10]
    Please select VAD model_id in ModelScope from the list below.
    1) damo/speech_fsmn_vad_zh-cn-16k-common-onnx
    2) model_name
    3) model_path
  Enter your choice: 1
    The model ID is damo/speech_fsmn_vad_zh-cn-16k-common-onnx
    The model dir in Docker is /workspace/models/speech_fsmn_vad_zh-cn-16k-common-onnx

  [2.3/10]
    Please select PUNC model_id in ModelScope from the list below.
    1) damo/punc_ct-transformer_zh-cn-common-vocab272727-onnx
    2) model_name
    3) model_path
  Enter your choice: 1
    The model ID is damo/punc_ct-transformer_zh-cn-common-vocab272727-onnx
    The model dir in Docker is /workspace/models/punc_ct-transformer_zh-cn-common-vocab272727-onnx
```

##### 输入宿主机中FunASR服务可执行程序路径

输入FunASR服务可执行程序的宿主机路径，Docker运行时将自动挂载到Docker中运行。默认不输入的情况下将指定Docker中默认的/workspace/FunASR/funasr/runtime/websocket/build/bin/funasr-wss-server。

```text
[3/10]
  Please enter the path to the excutor of the FunASR service on the localhost.
  If not set, the default /workspace/FunASR/funasr/runtime/websocket/build/bin/funasr-wss-server in Docker is used.
  Setting the path to the excutor of the FunASR service on the localhost:
  Corresponding, the path of FunASR in Docker is /workspace/FunASR/funasr/runtime/websocket/build/bin/funasr-wss-server
```

##### 设置宿主机提供给FunASR的端口
设置提供给Docker的宿主机端口，默认为10095。请保证此端口可用。
```text
[4/10]
  Please input the opened port in the host used for FunASR server.
  Default: 10095
  Setting the opened host port [1-65535]:
  The port of the host is 10095
```

##### 设置Docker中提供给FunASR的端口
设置Docker中FunASR服务使用的端口，默认为10095，此端口将于step1.4中设置的宿主机端口进行映射。
```text
5/10]
  Please input port for docker mapped.
  Default: 10095, the opened port of current host is 10095
  Setting the port in Docker for FunASR server [1-65535]:
  The port in Docker for FunASR server is 10095
```

##### 设置FunASR服务的推理线程数
设置FunASR服务的推理线程数，默认为宿主机核数，同时自动设置服务的IO线程数，为推理线程数的四分之一。
```text
[6/10]
  Please input thread number for FunASR decoder.
  Default: 1
  Setting the number of decoder thread:

  The number of decoder threads is 1
  The number of IO threads is 1
```

##### 所有设置参数展示及确认

展示前面6步设置的参数，确认则将所有参数存储到/var/funasr/config，并开始启动Docker，否则提示用户进行重新设置。

```text
[7/10]
  Show parameters of FunASR server setting and confirm to run ...

  The current Docker image is                                    : registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-latest
  The model will be automatically downloaded to the directory    : /workspace/models
  The ASR model_id used                                          : damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
  The ASR model directory corresponds to the directory in Docker : /workspace/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
  The VAD model_id used                                          : damo/speech_fsmn_vad_zh-cn-16k-common-onnx
  The VAD model directory corresponds to the directory in Docker : /workspace/models/speech_fsmn_vad_zh-cn-16k-common-onnx
  The PUNC model_id used                                         : damo/punc_ct-transformer_zh-cn-common-vocab272727-onnx
  The PUNC model directory corresponds to the directory in Docker: /workspace/models/punc_ct-transformer_zh-cn-common-vocab272727-onnx

  The path in the docker of the FunASR service executor          : /workspace/FunASR/funasr/runtime/websocket/build/bin/funasr-wss-server
  Set the host port used for use by the FunASR service           : 10095
  Set the docker port used by the FunASR service                 : 10095
  Set the number of threads used for decoding the FunASR service : 1
  Set the number of threads used for IO the FunASR service       : 1

  Please input [y/n] to confirm the parameters.
  [y] Verify that these parameters are correct and that the service will run.
  [n] The parameters set are incorrect, it will be rolled out, please rerun.
  read confirmation[y/n]: 

  Will run FunASR server later ...
  Parameters are stored in the file /var/funasr/config
```

##### 检查Docker服务

检查当前宿主机是否安装了Docker服务，若未安装，则安装Docker并启动。

```text
[8/10]
  Start install docker for ubuntu 
  Get docker installer: curl -fsSL https://test.docker.com -o test-docker.sh
  Get docker run: sudo sh test-docker.sh
# Executing docker install script, commit: c2de0811708b6d9015ed1a2c80f02c9b70c8ce7b
+ sh -c apt-get update -qq >/dev/null
+ sh -c DEBIAN_FRONTEND=noninteractive apt-get install -y -qq apt-transport-https ca-certificates curl >/dev/null
+ sh -c install -m 0755 -d /etc/apt/keyrings
+ sh -c curl -fsSL "https://download.docker.com/linux/ubuntu/gpg" | gpg --dearmor --yes -o /etc/apt/keyrings/docker.gpg
+ sh -c chmod a+r /etc/apt/keyrings/docker.gpg
+ sh -c echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu focal test" > /etc/apt/sources.list.d/docker.list
+ sh -c apt-get update -qq >/dev/null
+ sh -c DEBIAN_FRONTEND=noninteractive apt-get install -y -qq docker-ce docker-ce-cli containerd.io docker-compose-plugin docker-ce-rootless-extras docker-buildx-plugin >/dev/null
+ sh -c docker version
Client: Docker Engine - Community
 Version:           24.0.2

 ...
 ...

   Docker install success, start docker server.
```

##### 下载FunASR Docker镜像

下载并更新step1.1中选择的FunASR Docker镜像。

```text
[9/10]
  Pull docker image(registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-latest)...
funasr-runtime-cpu-0.0.1: Pulling from funasr_repo/funasr
7608715873ec: Pull complete 
3e1014c56f38: Pull complete 

 ...
 ...
```

##### 启动FunASR Docker

启动FunASR Docker，等待step1.2选择的模型下载完成并启动FunASR服务。

```text
[10/10]
  Construct command and run docker ...
943d8f02b4e5011b71953a0f6c1c1b9bc5aff63e5a96e7406c83e80943b23474

  Loading models:
    [ASR ][Done       ][==================================================][100%][1.10MB/s][v1.2.1]
    [VAD ][Done       ][==================================================][100%][7.26MB/s][v1.2.0]
    [PUNC][Done       ][==================================================][100%][ 474kB/s][v1.1.7]
  The service has been started.
  If you want to see an example of how to use the client, you can run sudo bash funasr-runtime-deploy.sh -c .
```

#### 启动已经部署过的FunASR服务
一键部署后若出现重启电脑等关闭Docker的动作，可通过如下命令直接启动FunASR服务，启动配置为上次一键部署的设置。

```shell
sudo bash funasr-runtime-deploy.sh start
```

#### 关闭FunASR服务

```shell
sudo bash funasr-runtime-deploy.sh stop
```

#### 重启FunASR服务

根据上次一键部署的设置重启启动FunASR服务。
```shell
sudo bash funasr-runtime-deploy.sh restart
```

#### 替换模型并重启FunASR服务

替换正在使用的模型，并重新启动FunASR服务。模型需为ModelScope中的ASR/VAD/PUNC模型。

```shell
sudo bash scripts/funasr-runtime-deploy.sh update model <model ID in ModelScope>

e.g
sudo bash scripts/funasr-runtime-deploy.sh update model damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
```

### 测试与使用离线文件转写服务

在服务器上完成FunASR服务部署以后，可以通过如下的步骤来测试和使用离线文件转写服务。目前分别支持Python、C++、Java版本client的的命令行运行，以及可在浏览器可直接体验的html网页版本，更多语言client支持参考文档【FunASR高阶开发指南】。
funasr-runtime-deploy.sh运行结束后，可通过命令以交互的形式自动下载测试样例samples到当前目录的funasr_samples中，并设置参数运行：

```shell
sudo bash funasr-runtime-deploy.sh client
```

可选择提供的Python和Linux C++范例程序，以Python范例为例：

```text
Will download sample tools for the client to show how speech recognition works.
  Please select the client you want to run.
    1) Python
    2) Linux_Cpp
  Enter your choice: 1

  Please enter the IP of server, default(127.0.0.1): 
  Please enter the port of server, default(10095): 
  Please enter the audio path, default(/root/funasr_samples/audio/asr_example.wav): 

  Run pip3 install click>=8.0.4
Looking in indexes: http://mirrors.cloud.aliyuncs.com/pypi/simple/
Requirement already satisfied: click>=8.0.4 in /usr/local/lib/python3.8/dist-packages (8.1.3)

  Run pip3 install -r /root/funasr_samples/python/requirements_client.txt
Looking in indexes: http://mirrors.cloud.aliyuncs.com/pypi/simple/
Requirement already satisfied: websockets in /usr/local/lib/python3.8/dist-packages (from -r /root/funasr_samples/python/requirements_client.txt (line 1)) (11.0.3)

  Run python3 /root/funasr_samples/python/wss_client_asr.py --host 127.0.0.1 --port 10095 --mode offline --audio_in /root/funasr_samples/audio/asr_example.wav --send_without_sleep --output_dir ./funasr_samples/python

  ...
  ...

  pid0_0: 欢迎大家来体验达摩院推出的语音识别模型。
Exception: sent 1000 (OK); then received 1000 (OK)
end

  If failed, you can try (python3 /root/funasr_samples/python/wss_client_asr.py --host 127.0.0.1 --port 10095 --mode offline --audio_in /root/funasr_samples/audio/asr_example.wav --send_without_sleep --output_dir ./funasr_samples/python) in your Shell.

```

#### python-client
若想直接运行client进行测试，可参考如下简易说明，以python版本为例：

```shell
python3 wss_client_asr.py --host "127.0.0.1" --port 10095 --mode offline --audio_in "../audio/asr_example.wav" --send_without_sleep --output_dir "./results"
```

命令参数说明：
```text
--host 为FunASR runtime-SDK服务部署机器ip，默认为本机ip（127.0.0.1），如果client与服务不在同一台服务器，需要改为部署机器ip
--port 10095 部署端口号
--mode offline表示离线文件转写
--audio_in 需要进行转写的音频文件，支持文件路径，文件列表wav.scp
--output_dir 识别结果保存路径
```

#### cpp-client

```shell
export LD_LIBRARY_PATH=/root/funasr_samples/cpp/libs:$LD_LIBRARY_PATH
/root/funasr_samples/cpp/funasr-wss-client --server-ip 127.0.0.1 --port 10095 --wav-path /root/funasr_samples/audio/asr_example.wav
```

命令参数说明：

```text
--server-ip 为FunASR runtime-SDK服务部署机器ip，默认为本机ip（127.0.0.1），如果client与服务不在同一台服务器，需要改为部署机器ip
--port 10095 部署端口号
--wav-path 需要进行转写的音频文件，支持文件路径
```

### 视频demo

[点击此处]()
















 funasr/runtime/docs/aliyun_server_tutorial.md

File was renamed from docs/runtime/aliyun_server_tutorial.md
@@ -1,10 +1,12 @@
# 云服务器申请教程

我们以阿里云（[点此链接](https://www.aliyun.com/)）为例，演示如何申请云服务器

## 服务器配置

用户可以根据自己的业务需求，选择合适的服务器配置，推荐配置为：
- 配置一（高配）：X86架构，32/64核8369CPU，内存8G以上；
- 配置二：X86架构，32/64核8132CPU，内存8G以上；
- 配置二：X86架构，32/64核8163CPU，内存8G以上；

详细性能测试报告：[点此链接](../../funasr/runtime/python/benchmark_onnx_cpp.md)

@@ -13,59 +15,59 @@
### 登陆个人账号
打开阿里云官网[点此链接](https://www.aliyun.com/)，注册并登陆个人账号，如下图标号1所示

<img src="images/aliyun1.png" width="500"/>
<img src="images/aliyun1.png"  width="900"/>

### 免费试用

点击如上图所以标号2，出现如下界面

<img src="images/aliyun2.png" width="500"/>
<img src="images/aliyun2.png"  width="900"/>

再点击标号3，出现如下界面

<img src="images/aliyun3.png" width="500"/>
<img src="images/aliyun3.png"  width="900"/>

### 申请ECS实例

个人账号可以免费试用1核2GB内存，每月750小时，企业认证后，可以免费试用2核8GB内存 3个月，根据账号情况，点击上图中标号4，出现如下界面：

<img src="images/aliyun4.png" width="500"/>
<img src="images/aliyun4.png"  width="900"/>

依次按照上图标号5、6、7选择后，点击立即试用，出现如下界面

<img src="images/aliyun5.png" width="500"/>
<img src="images/aliyun5.png"  width="900"/>

### 开放服务端口

点击安全组（标号9），出现如下界面

<img src="images/aliyun6.png" width="500"/>
<img src="images/aliyun6.png"  width="900"/>

再点击标号10，出现如下界面

<img src="images/aliyun7.png" width="500"/>
<img src="images/aliyun7.png"  width="900"/>

点击手动添加（标号11），分别按照标号12、13填入内容，后点击保存（标号14），再点击实例（标号15），出现如下界面

<img src="images/aliyun8.png" width="500"/>
<img src="images/aliyun8.png"  width="900"/>

### 启动ECS示例

点击示例名称（标号16），出现如下页面

<img src="images/aliyun9.png" width="500"/>
<img src="images/aliyun9.png"  width="900"/>

点击远程启动（标号17），出现页面后，点击立即登陆，出现如下界面

<img src="images/aliyun10.png" width="500"/>
<img src="images/aliyun10.png"  width="900"/>

首次登陆需要点击重置密码（上图中绿色箭头），设置好密码后，输入密码（标号18），点击确认（标号19）

<img src="images/aliyun11.png" width="500"/>
<img src="images/aliyun11.png"  width="900"/>

首次登陆会遇到上图所示问题，点击标号20，根据文档操作后，重新登陆，登陆成功后出现如下页面

<img src="images/aliyun12.png" width="500"/>
<img src="images/aliyun12.png"  width="900"/>

上图表示已经成功申请了云服务器，后续可以根据FunASR runtime-SDK部署文档进行一键部署（[点击此处]()）


 funasr/runtime/docs/images/aliyun1.png



 funasr/runtime/docs/images/aliyun10.png



 funasr/runtime/docs/images/aliyun11.png



 funasr/runtime/docs/images/aliyun12.png



 funasr/runtime/docs/images/aliyun2.png



 funasr/runtime/docs/images/aliyun3.png



 funasr/runtime/docs/images/aliyun4.png



 funasr/runtime/docs/images/aliyun5.png



 funasr/runtime/docs/images/aliyun6.png



 funasr/runtime/docs/images/aliyun7.png



 funasr/runtime/docs/images/aliyun8.png



 funasr/runtime/docs/images/aliyun9.png



 funasr/runtime/python/libtorch/funasr_torch/utils/frontend.py

@@ -3,7 +3,6 @@
from typing import Any, Dict, Iterable, List, NamedTuple, Set, Tuple, Union

import numpy as np
from typeguard import check_argument_types
import kaldi_native_fbank as knf

root_dir = Path(__file__).resolve().parent
@@ -28,7 +27,6 @@
            dither: float = 1.0,
            **kwargs,
    ) -> None:
        check_argument_types()

        opts = knf.FbankOptions()
        opts.frame_opts.samp_freq = fs

 funasr/runtime/python/libtorch/funasr_torch/utils/utils.py

@@ -9,7 +9,6 @@
import numpy as np
import yaml

from typeguard import check_argument_types

import warnings

@@ -21,7 +20,6 @@
class TokenIDConverter():
    def __init__(self, token_list: Union[List, str],
                 ):
        check_argument_types()

        self.token_list = token_list
        self.unk_symbol = token_list[-1]
@@ -51,7 +49,6 @@
        space_symbol: str = "<space>",
        remove_non_linguistic_symbols: bool = False,
    ):
        check_argument_types()

        self.space_symbol = space_symbol
        self.non_linguistic_symbols = self.load_symbols(symbol_value)

 funasr/runtime/python/onnxruntime/funasr_onnx/utils/frontend.py

@@ -4,7 +4,6 @@
import copy

import numpy as np
from typeguard import check_argument_types
import kaldi_native_fbank as knf

root_dir = Path(__file__).resolve().parent
@@ -29,7 +28,6 @@
            dither: float = 1.0,
            **kwargs,
    ) -> None:
        check_argument_types()

        opts = knf.FbankOptions()
        opts.frame_opts.samp_freq = fs

 funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py

@@ -10,7 +10,6 @@
import yaml
from onnxruntime import (GraphOptimizationLevel, InferenceSession,
                         SessionOptions, get_available_providers, get_device)
from typeguard import check_argument_types

import warnings

@@ -22,7 +21,6 @@
class TokenIDConverter():
    def __init__(self, token_list: Union[List, str],
                 ):
        check_argument_types()

        self.token_list = token_list
        self.unk_symbol = token_list[-1]
@@ -52,7 +50,6 @@
        space_symbol: str = "<space>",
        remove_non_linguistic_symbols: bool = False,
    ):
        check_argument_types()

        self.space_symbol = space_symbol
        self.non_linguistic_symbols = self.load_symbols(symbol_value)

 funasr/runtime/readme.md

New file
@@ -0,0 +1,30 @@
# FunASR runtime-SDK
中文文档（[点击此处](./readme_cn.md)）

FunASR is a speech recognition framework developed by the Speech Lab of DAMO Academy, which integrates industrial-level models in the fields of speech endpoint detection, speech recognition, punctuation segmentation, and more. 
It has attracted many developers to participate in experiencing and developing. To solve the last mile of industrial landing and integrate models into business, we have developed the FunASR runtime-SDK. The SDK supports several service deployments, including:

- File transcription service, Mandarin, CPU version, done
- File transcription service, Mandarin, GPU version, in progress
- File transcription service, Mandarin, in progress
- Streaming speech recognition service, is in progress
- and more.


## File Transcription Service, Mandarin (CPU)

Currently, the FunASR runtime-SDK-0.0.1 version supports the deployment of file transcription service, Mandarin (CPU version), with a complete speech recognition chain that can transcribe tens of hours of audio into punctuated text, and supports recognition for more than a hundred concurrent streams. 

To meet the needs of different users, we have prepared different tutorials with text and images for both novice and advanced developers.

### Technical Principles

The technical principles and documentation behind FunASR explain the underlying technology, recognition accuracy, computational efficiency, and core advantages of the framework, including convenience, high precision, high efficiency, and support for long audio chains. For detailed information, please refer to the documentation available by [docs](). 

### Deployment Tutorial

The documentation mainly targets novice users who have no need for modifications or customization. It supports downloading model deployments from modelscope and also supports deploying models that users have fine-tuned. For detailed tutorials, please refer to [docs]().

### Advanced Development Guide

The documentation mainly targets advanced developers who require modifications and customization of the service. It supports downloading model deployments from modelscope and also supports deploying models that users have fine-tuned. For detailed information, please refer to the documentation available by [docs]()

 funasr/runtime/readme_cn.md

New file
@@ -0,0 +1,31 @@
# FunASR runtime-SDK

English Version（[docs](./readme.md)）

FunASR是由达摩院语音实验室开源的一款语音识别基础框架，集成了语音端点检测、语音识别、标点断句等领域的工业级别模型，吸引了众多开发者参与体验和开发。为了解决工业落地的最后一公里，将模型集成到业务中去，我们开发了FunASR runtime-SDK。
SDK 支持以下几种服务部署：

- 中文离线文件转写服务（CPU版本），已完成
- 中文离线文件转写服务（GPU版本），进行中
- 英文离线转写服务，进行中
- 流式语音识别服务，进行中
- 。。。


## 中文离线文件转写服务部署（CPU版本）

目前FunASR runtime-SDK-0.0.1版本已支持中文语音离线文件服务部署（CPU版本），拥有完整的语音识别链路，可以将几十个小时的音频识别成带标点的文字，而且支持上百路并发同时进行识别。

为了支持不同用户的需求，我们分别针对小白与高阶开发者，准备了不同的图文教程：

### 技术原理揭秘

文档介绍了背后技术原理，识别准确率，计算效率等，以及核心优势介绍：便捷、高精度、高效率、长音频链路，详细文档参考（[点击此处]()）

### 便捷部署教程

文档主要针对小白用户，无修改定制需求，支持从modelscope中下载模型部署，也支持用户finetune后的模型部署，详细教程参考（[点击此处](./docs/SDK_tutorial_cn.md)）

### 高阶开发指南

文档主要针对高阶开发者，需要对服务进行修改与定制，支持从modelscope中下载模型部署，也支持用户finetune后的模型部署，详细文档参考（[点击此处](./docs/SDK_advanced_guide_cn.md)）

 funasr/runtime/triton_gpu/model_repo_paraformer_large_offline/feature_extractor/1/model.py

@@ -109,7 +109,6 @@
            lfr_n: int = 6,
            dither: float = 1.0
    ) -> None:
        # check_argument_types()

        self.fs = fs
        self.window = window

 funasr/samplers/build_batch_sampler.py

@@ -4,8 +4,6 @@
from typing import Tuple
from typing import Union

from typeguard import check_argument_types
from typeguard import check_return_type

from funasr.samplers.abs_sampler import AbsSampler
from funasr.samplers.folded_batch_sampler import FoldedBatchSampler
@@ -104,7 +102,6 @@
        padding: Whether sequences are input as a padded tensor or not.
            used for "numel" mode
    """
    assert check_argument_types()
    if len(shape_files) == 0:
        raise ValueError("No shape file are given")

@@ -164,5 +161,4 @@

    else:
        raise ValueError(f"Not supported: {type}")
    assert check_return_type(retval)
    return retval

 funasr/samplers/folded_batch_sampler.py

@@ -4,7 +4,6 @@
from typing import Tuple
from typing import Union

from typeguard import check_argument_types

from funasr.fileio.read_text import load_num_sequence_text
from funasr.fileio.read_text import read_2column_text
@@ -23,7 +22,6 @@
        drop_last: bool = False,
        utt2category_file: str = None,
    ):
        assert check_argument_types()
        assert batch_size > 0
        if sort_batch != "ascending" and sort_batch != "descending":
            raise ValueError(

 funasr/samplers/length_batch_sampler.py

@@ -4,7 +4,6 @@
from typing import Tuple
from typing import Union

from typeguard import check_argument_types

from funasr.fileio.read_text import load_num_sequence_text
from funasr.samplers.abs_sampler import AbsSampler
@@ -21,7 +20,6 @@
        drop_last: bool = False,
        padding: bool = True,
    ):
        assert check_argument_types()
        assert batch_bins > 0
        if sort_batch != "ascending" and sort_batch != "descending":
            raise ValueError(

 funasr/samplers/num_elements_batch_sampler.py

@@ -4,7 +4,6 @@
from typing import Union

import numpy as np
from typeguard import check_argument_types

from funasr.fileio.read_text import load_num_sequence_text
from funasr.samplers.abs_sampler import AbsSampler
@@ -21,7 +20,6 @@
        drop_last: bool = False,
        padding: bool = True,
    ):
        assert check_argument_types()
        assert batch_bins > 0
        if sort_batch != "ascending" and sort_batch != "descending":
            raise ValueError(

 funasr/samplers/sorted_batch_sampler.py

@@ -2,7 +2,6 @@
from typing import Iterator
from typing import Tuple

from typeguard import check_argument_types

from funasr.fileio.read_text import load_num_sequence_text
from funasr.samplers.abs_sampler import AbsSampler
@@ -26,7 +25,6 @@
        sort_batch: str = "ascending",
        drop_last: bool = False,
    ):
        assert check_argument_types()
        assert batch_size > 0
        self.batch_size = batch_size
        self.shape_file = shape_file

 funasr/samplers/unsorted_batch_sampler.py

@@ -2,7 +2,6 @@
from typing import Iterator
from typing import Tuple

from typeguard import check_argument_types

from funasr.fileio.read_text import read_2column_text
from funasr.samplers.abs_sampler import AbsSampler
@@ -28,7 +27,6 @@
        drop_last: bool = False,
        utt2category_file: str = None,
    ):
        assert check_argument_types()
        assert batch_size > 0
        self.batch_size = batch_size
        self.key_file = key_file

 funasr/schedulers/noam_lr.py

@@ -4,7 +4,6 @@

import torch
from torch.optim.lr_scheduler import _LRScheduler
from typeguard import check_argument_types

from funasr.schedulers.abs_scheduler import AbsBatchStepScheduler

@@ -31,7 +30,6 @@
        warmup_steps: Union[int, float] = 25000,
        last_epoch: int = -1,
    ):
        assert check_argument_types()
        self.model_size = model_size
        self.warmup_steps = warmup_steps


 funasr/schedulers/tri_stage_scheduler.py

@@ -8,7 +8,6 @@

import torch
from torch.optim.lr_scheduler import _LRScheduler
from typeguard import check_argument_types

from funasr.schedulers.abs_scheduler import AbsBatchStepScheduler

@@ -22,7 +21,6 @@
            init_lr_scale: float = 0.01,
            final_lr_scale: float = 0.01,
    ):
        assert check_argument_types()
        self.optimizer = optimizer
        self.last_epoch = last_epoch
        self.phase_ratio = phase_ratio

 funasr/schedulers/warmup_lr.py

@@ -3,7 +3,6 @@

import torch
from torch.optim.lr_scheduler import _LRScheduler
from typeguard import check_argument_types

from funasr.schedulers.abs_scheduler import AbsBatchStepScheduler

@@ -30,7 +29,6 @@
        warmup_steps: Union[int, float] = 25000,
        last_epoch: int = -1,
    ):
        assert check_argument_types()
        self.warmup_steps = warmup_steps

        # __init__() must be invoked before setting field

 funasr/tasks/abs_task.py

@@ -32,8 +32,6 @@
import yaml
from funasr.models.base_model import FunASRModel
from torch.utils.data import DataLoader
from typeguard import check_argument_types
from typeguard import check_return_type

from funasr import __version__
from funasr.datasets.dataset import AbsDataset
@@ -269,7 +267,6 @@

    @classmethod
    def get_parser(cls) -> config_argparse.ArgumentParser:
        assert check_argument_types()

        class ArgumentDefaultsRawTextHelpFormatter(
            argparse.RawTextHelpFormatter,
@@ -959,7 +956,6 @@
        cls.trainer.add_arguments(parser)
        cls.add_task_arguments(parser)

        assert check_return_type(parser)
        return parser

    @classmethod
@@ -1007,7 +1003,6 @@
            return _cls

        # This method is used only for --print_config
        assert check_argument_types()
        parser = cls.get_parser()
        args, _ = parser.parse_known_args()
        config = vars(args)
@@ -1047,7 +1042,6 @@

    @classmethod
    def check_required_command_args(cls, args: argparse.Namespace):
        assert check_argument_types()
        if hasattr(args, "required"):
            for k in vars(args):
                if "-" in k:
@@ -1077,7 +1071,6 @@
            inference: bool = False,
    ) -> None:
        """Check if the dataset satisfy the requirement of current Task"""
        assert check_argument_types()
        mes = (
            f"If you intend to use an additional input, modify "
            f'"{cls.__name__}.required_data_names()" or '
@@ -1104,14 +1097,12 @@

    @classmethod
    def print_config(cls, file=sys.stdout) -> None:
        assert check_argument_types()
        # Shows the config: e.g. python train.py asr --print_config
        config = cls.get_default_config()
        file.write(yaml_no_alias_safe_dump(config, indent=4, sort_keys=False))

    @classmethod
    def main(cls, args: argparse.Namespace = None, cmd: Sequence[str] = None):
        assert check_argument_types()
        print(get_commandline_args(), file=sys.stderr)
        if args is None:
            parser = cls.get_parser()
@@ -1148,7 +1139,6 @@

    @classmethod
    def main_worker(cls, args: argparse.Namespace):
        assert check_argument_types()

        # 0. Init distributed process
        distributed_option = build_dataclass(DistributedOption, args)
@@ -1556,7 +1546,6 @@
        - 4 epoch with "--num_iters_per_epoch" == 4

        """
        assert check_argument_types()
        iter_options = cls.build_iter_options(args, distributed_option, mode)

        # Overwrite iter_options if any kwargs is given
@@ -1589,7 +1578,6 @@
    def build_sequence_iter_factory(
            cls, args: argparse.Namespace, iter_options: IteratorOptions, mode: str
    ) -> AbsIterFactory:
        assert check_argument_types()

        if hasattr(args, "frontend_conf"):
            if args.frontend_conf is not None and "fs" in args.frontend_conf:
@@ -1683,7 +1671,6 @@
            iter_options: IteratorOptions,
            mode: str,
    ) -> AbsIterFactory:
        assert check_argument_types()

        dataset = ESPnetDataset(
            iter_options.data_path_and_name_and_type,
@@ -1788,7 +1775,6 @@
    def build_multiple_iter_factory(
            cls, args: argparse.Namespace, distributed_option: DistributedOption, mode: str
    ):
        assert check_argument_types()
        iter_options = cls.build_iter_options(args, distributed_option, mode)
        assert len(iter_options.data_path_and_name_and_type) > 0, len(
            iter_options.data_path_and_name_and_type
@@ -1885,7 +1871,6 @@
            inference: bool = False,
    ) -> DataLoader:
        """Build DataLoader using iterable dataset"""
        assert check_argument_types()
        # For backward compatibility for pytorch DataLoader
        if collate_fn is not None:
            kwargs = dict(collate_fn=collate_fn)
@@ -1935,7 +1920,6 @@
            device: Device type, "cpu", "cuda", or "cuda:N".

        """
        assert check_argument_types()
        if config_file is None:
            assert model_file is not None, (
                "The argument 'model_file' must be provided "

 funasr/tasks/asr.py

@@ -13,8 +13,6 @@
import numpy as np
import torch
import yaml
from typeguard import check_argument_types
from typeguard import check_return_type

from funasr.datasets.collate_fn import CommonCollateFn
from funasr.datasets.preprocessor import CommonPreprocessor
@@ -491,7 +489,6 @@
        [Collection[Tuple[str, Dict[str, np.ndarray]]]],
        Tuple[List[str], Dict[str, torch.Tensor]],
    ]:
        assert check_argument_types()
        # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol
        return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)

@@ -499,7 +496,6 @@
    def build_preprocess_fn(
            cls, args: argparse.Namespace, train: bool
    ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
        assert check_argument_types()
        if args.use_preprocessor:
            retval = CommonPreprocessor(
                train=train,
@@ -529,7 +525,6 @@
            )
        else:
            retval = None
        assert check_return_type(retval)
        return retval

    @classmethod
@@ -548,12 +543,10 @@
            cls, train: bool = True, inference: bool = False
    ) -> Tuple[str, ...]:
        retval = ()
        assert check_return_type(retval)
        return retval

    @classmethod
    def build_model(cls, args: argparse.Namespace):
        assert check_argument_types()
        if isinstance(args.token_list, str):
            with open(args.token_list, encoding="utf-8") as f:
                token_list = [line.rstrip() for line in f]
@@ -658,7 +651,6 @@
        if args.init is not None:
            initialize(model, args.init)

        assert check_return_type(model)
        return model


@@ -701,7 +693,6 @@

    @classmethod
    def build_model(cls, args: argparse.Namespace):
        assert check_argument_types()
        if isinstance(args.token_list, str):
            with open(args.token_list, encoding="utf-8") as f:
                token_list = [line.rstrip() for line in f]
@@ -838,7 +829,6 @@
        if args.init is not None:
            initialize(model, args.init)

        assert check_return_type(model)
        return model

    # ~~~~~~~~~ The methods below are mainly used for inference ~~~~~~~~~
@@ -860,7 +850,6 @@
            device: Device type, "cpu", "cuda", or "cuda:N".

        """
        assert check_argument_types()
        if config_file is None:
            assert model_file is not None, (
                "The argument 'model_file' must be provided "
@@ -975,7 +964,6 @@

    @classmethod
    def build_model(cls, args: argparse.Namespace):
        assert check_argument_types()
        if isinstance(args.token_list, str):
            with open(args.token_list, encoding="utf-8") as f:
                token_list = [line.rstrip() for line in f]
@@ -1085,7 +1073,6 @@
        if args.init is not None:
            initialize(model, args.init)

        assert check_return_type(model)
        return model

    # ~~~~~~~~~ The methods below are mainly used for inference ~~~~~~~~~
@@ -1107,7 +1094,6 @@
            device: Device type, "cpu", "cuda", or "cuda:N".

        """
        assert check_argument_types()
        if config_file is None:
            assert model_file is not None, (
                "The argument 'model_file' must be provided "
@@ -1210,7 +1196,6 @@

    @classmethod
    def build_model(cls, args: argparse.Namespace):
        assert check_argument_types()
        if isinstance(args.token_list, str):
            with open(args.token_list, encoding="utf-8") as f:
                token_list = [line.rstrip() for line in f]
@@ -1308,7 +1293,6 @@
        if args.init is not None:
            initialize(model, args.init)

        assert check_return_type(model)
        return model


@@ -1333,7 +1317,6 @@

    @classmethod
    def build_model(cls, args: argparse.Namespace):
        assert check_argument_types()
        if isinstance(args.token_list, str):
            with open(args.token_list, encoding="utf-8") as f:
                token_list = [line.rstrip() for line in f]
@@ -1388,7 +1371,6 @@
        if args.init is not None:
            initialize(model, args.init)

        assert check_return_type(model)
        return model

    @classmethod
@@ -1425,7 +1407,6 @@
        Return:
            model: ASR Transducer model.
        """
        assert check_argument_types()

        if isinstance(args.token_list, str):
            with open(args.token_list, encoding="utf-8") as f:
@@ -1524,7 +1505,6 @@
                "Initialization part will be reworked in a short future.",
            )

        #assert check_return_type(model)

        return model

@@ -1559,7 +1539,6 @@

    @classmethod
    def build_model(cls, args: argparse.Namespace):
        assert check_argument_types()
        if isinstance(args.token_list, str):
            with open(args.token_list, encoding="utf-8") as f:
                token_list = [line.rstrip() for line in f]
@@ -1645,5 +1624,4 @@
        if args.init is not None:
            initialize(model, args.init)

        assert check_return_type(model)
        return model

 funasr/tasks/data2vec.py

@@ -8,8 +8,6 @@

import numpy as np
import torch
from typeguard import check_argument_types
from typeguard import check_return_type

from funasr.datasets.collate_fn import CommonCollateFn
from funasr.datasets.preprocessor import CommonPreprocessor
@@ -256,14 +254,12 @@
        [Collection[Tuple[str, Dict[str, np.ndarray]]]],
        Tuple[List[str], Dict[str, torch.Tensor]],
    ]:
        assert check_argument_types()
        return CommonCollateFn(clipping=True)

    @classmethod
    def build_preprocess_fn(
            cls, args: argparse.Namespace, train: bool
    ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
        assert check_argument_types()
        if args.use_preprocessor:
            retval = CommonPreprocessor(
                train=train,
@@ -289,7 +285,6 @@
            )
        else:
            retval = None
        assert check_return_type(retval)
        return retval

    @classmethod
@@ -305,12 +300,10 @@
            cls, train: bool = True, inference: bool = False
    ) -> Tuple[str, ...]:
        retval = ()
        assert check_return_type(retval)
        return retval

    @classmethod
    def build_model(cls, args: argparse.Namespace):
        assert check_argument_types()

        # 1. frontend
        if args.input_size is None:
@@ -372,5 +365,4 @@
        if args.init is not None:
            initialize(model, args.init)

        assert check_return_type(model)
        return model

 funasr/tasks/diar.py

@@ -21,8 +21,6 @@
import numpy as np
import torch
import yaml
from typeguard import check_argument_types
from typeguard import check_return_type

from funasr.datasets.collate_fn import CommonCollateFn
from funasr.datasets.preprocessor import CommonPreprocessor
@@ -344,7 +342,6 @@
        [Collection[Tuple[str, Dict[str, np.ndarray]]]],
        Tuple[List[str], Dict[str, torch.Tensor]],
    ]:
        assert check_argument_types()
        # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol
        return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)

@@ -352,7 +349,6 @@
    def build_preprocess_fn(
            cls, args: argparse.Namespace, train: bool
    ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
        assert check_argument_types()
        if args.use_preprocessor:
            retval = CommonPreprocessor(
                train=train,
@@ -382,7 +378,6 @@
            )
        else:
            retval = None
        assert check_return_type(retval)
        return retval

    @classmethod
@@ -401,12 +396,10 @@
            cls, train: bool = True, inference: bool = False
    ) -> Tuple[str, ...]:
        retval = ()
        assert check_return_type(retval)
        return retval

    @classmethod
    def build_model(cls, args: argparse.Namespace):
        assert check_argument_types()
        if isinstance(args.token_list, str):
            with open(args.token_list, encoding="utf-8") as f:
                token_list = [line.rstrip() for line in f]
@@ -505,7 +498,6 @@
        if args.init is not None:
            initialize(model, args.init)

        assert check_return_type(model)
        return model

    # ~~~~~~~~~ The methods below are mainly used for inference ~~~~~~~~~
@@ -528,7 +520,6 @@
            device: Device type, "cpu", "cuda", or "cuda:N".

        """
        assert check_argument_types()
        if config_file is None:
            assert model_file is not None, (
                "The argument 'model_file' must be provided "
@@ -764,7 +755,6 @@
        [Collection[Tuple[str, Dict[str, np.ndarray]]]],
        Tuple[List[str], Dict[str, torch.Tensor]],
    ]:
        assert check_argument_types()
        # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol
        return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)

@@ -772,7 +762,6 @@
    def build_preprocess_fn(
            cls, args: argparse.Namespace, train: bool
    ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
        assert check_argument_types()
        # if args.use_preprocessor:
        #     retval = CommonPreprocessor(
        #         train=train,
@@ -802,7 +791,6 @@
        #     )
        # else:
        #     retval = None
        # assert check_return_type(retval)
        return None

    @classmethod
@@ -821,12 +809,10 @@
            cls, train: bool = True, inference: bool = False
    ) -> Tuple[str, ...]:
        retval = ()
        assert check_return_type(retval)
        return retval

    @classmethod
    def build_model(cls, args: argparse.Namespace):
        assert check_argument_types()

        # 1. frontend
        if args.input_size is None or args.frontend == "wav_frontend_mel23":
@@ -865,7 +851,6 @@
        if args.init is not None:
            initialize(model, args.init)

        assert check_return_type(model)
        return model

    # ~~~~~~~~~ The methods below are mainly used for inference ~~~~~~~~~
@@ -888,7 +873,6 @@
            device: Device type, "cpu", "cuda", or "cuda:N".

        """
        assert check_argument_types()
        if config_file is None:
            assert model_file is not None, (
                "The argument 'model_file' must be provided "

 funasr/tasks/lm.py

@@ -9,8 +9,6 @@

import numpy as np
import torch
from typeguard import check_argument_types
from typeguard import check_return_type

from funasr.datasets.collate_fn import CommonCollateFn
from funasr.datasets.preprocessor import CommonPreprocessor
@@ -52,7 +50,6 @@
    @classmethod
    def add_task_arguments(cls, parser: argparse.ArgumentParser):
        # NOTE(kamo): Use '_' instead of '-' to avoid confusion
        assert check_argument_types()
        group = parser.add_argument_group(description="Task related")

        # NOTE(kamo): add_arguments(..., required=True) can't be used
@@ -130,7 +127,6 @@
        for class_choices in cls.class_choices_list:
            class_choices.add_arguments(group)

        assert check_return_type(parser)
        return parser

    @classmethod
@@ -140,14 +136,12 @@
        [Collection[Tuple[str, Dict[str, np.ndarray]]]],
        Tuple[List[str], Dict[str, torch.Tensor]],
    ]:
        assert check_argument_types()
        return CommonCollateFn(int_pad_value=0)

    @classmethod
    def build_preprocess_fn(
            cls, args: argparse.Namespace, train: bool
    ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
        assert check_argument_types()
        if args.use_preprocessor:
            retval = CommonPreprocessor(
                train=train,
@@ -160,7 +154,6 @@
            )
        else:
            retval = None
        assert check_return_type(retval)
        return retval

    @classmethod
@@ -179,7 +172,6 @@

    @classmethod
    def build_model(cls, args: argparse.Namespace) -> LanguageModel:
        assert check_argument_types()
        if isinstance(args.token_list, str):
            with open(args.token_list, encoding="utf-8") as f:
                token_list = [line.rstrip() for line in f]

 funasr/tasks/punctuation.py

@@ -9,8 +9,6 @@

import numpy as np
import torch
from typeguard import check_argument_types
from typeguard import check_return_type

from funasr.datasets.collate_fn import CommonCollateFn
from funasr.datasets.preprocessor import PuncTrainTokenizerCommonPreprocessor
@@ -47,7 +45,6 @@
    @classmethod
    def add_task_arguments(cls, parser: argparse.ArgumentParser):
        # NOTE(kamo): Use '_' instead of '-' to avoid confusion
        assert check_argument_types()
        group = parser.add_argument_group(description="Task related")

        # NOTE(kamo): add_arguments(..., required=True) can't be used
@@ -126,7 +123,6 @@
            # e.g. --encoder and --encoder_conf
            class_choices.add_arguments(group)

        assert check_return_type(parser)
        return parser

    @classmethod
@@ -136,14 +132,12 @@
        [Collection[Tuple[str, Dict[str, np.ndarray]]]],
        Tuple[List[str], Dict[str, torch.Tensor]],
    ]:
        assert check_argument_types()
        return CommonCollateFn(int_pad_value=0)

    @classmethod
    def build_preprocess_fn(
            cls, args: argparse.Namespace, train: bool
    ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
        assert check_argument_types()
        token_types = [args.token_type, args.token_type]
        token_lists = [args.token_list, args.punc_list]
        bpemodels = [args.bpemodel, args.bpemodel]
@@ -161,7 +155,6 @@
            )
        else:
            retval = None
        assert check_return_type(retval)
        return retval

    @classmethod
@@ -182,7 +175,6 @@

    @classmethod
    def build_model(cls, args: argparse.Namespace) -> PunctuationModel:
        assert check_argument_types()
        if isinstance(args.token_list, str):
            with open(args.token_list, encoding="utf-8") as f:
                token_list = [line.rstrip() for line in f]
@@ -223,5 +215,4 @@
        if args.init is not None:
            initialize(model, args.init)

        assert check_return_type(model)
        return model

 funasr/tasks/sa_asr.py

@@ -13,8 +13,6 @@
import numpy as np
import torch
import yaml
from typeguard import check_argument_types
from typeguard import check_return_type

from funasr.datasets.collate_fn import CommonCollateFn
from funasr.datasets.preprocessor import CommonPreprocessor
@@ -445,7 +443,6 @@
        [Collection[Tuple[str, Dict[str, np.ndarray]]]],
        Tuple[List[str], Dict[str, torch.Tensor]],
    ]:
        assert check_argument_types()
        # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol
        return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)

@@ -453,7 +450,6 @@
    def build_preprocess_fn(
            cls, args: argparse.Namespace, train: bool
    ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
        assert check_argument_types()
        if args.use_preprocessor:
            retval = CommonPreprocessor(
                train=train,
@@ -483,7 +479,6 @@
            )
        else:
            retval = None
        assert check_return_type(retval)
        return retval

    @classmethod
@@ -502,12 +497,10 @@
            cls, train: bool = True, inference: bool = False
    ) -> Tuple[str, ...]:
        retval = ()
        assert check_return_type(retval)
        return retval

    @classmethod
    def build_model(cls, args: argparse.Namespace):
        assert check_argument_types()
        if isinstance(args.token_list, str):
            with open(args.token_list, encoding="utf-8") as f:
                token_list = [line.rstrip() for line in f]
@@ -619,5 +612,4 @@
        if args.init is not None:
            initialize(model, args.init)

        assert check_return_type(model)
        return model

 funasr/tasks/sv.py

@@ -17,8 +17,6 @@
import numpy as np
import torch
import yaml
from typeguard import check_argument_types
from typeguard import check_return_type

from funasr.datasets.collate_fn import CommonCollateFn
from funasr.datasets.preprocessor import CommonPreprocessor
@@ -273,7 +271,6 @@
        [Collection[Tuple[str, Dict[str, np.ndarray]]]],
        Tuple[List[str], Dict[str, torch.Tensor]],
    ]:
        assert check_argument_types()
        # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol
        return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)

@@ -281,7 +278,6 @@
    def build_preprocess_fn(
            cls, args: argparse.Namespace, train: bool
    ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
        assert check_argument_types()
        if args.use_preprocessor:
            retval = CommonPreprocessor(
                train=train,
@@ -309,7 +305,6 @@
            )
        else:
            retval = None
        assert check_return_type(retval)
        return retval

    @classmethod
@@ -330,12 +325,10 @@
        retval = ()
        if inference:
            retval = ("ref_speech",)
        assert check_return_type(retval)
        return retval

    @classmethod
    def build_model(cls, args: argparse.Namespace) -> ESPnetSVModel:
        assert check_argument_types()
        if isinstance(args.token_list, str):
            with open(args.token_list, encoding="utf-8") as f:
                token_list = [line.rstrip() for line in f]
@@ -449,7 +442,6 @@
        if args.init is not None:
            initialize(model, args.init)

        assert check_return_type(model)
        return model

    # ~~~~~~~~~ The methods below are mainly used for inference ~~~~~~~~~
@@ -472,7 +464,6 @@
            device: Device type, "cpu", "cuda", or "cuda:N".

        """
        assert check_argument_types()
        if config_file is None:
            assert model_file is not None, (
                "The argument 'model_file' must be provided "

 funasr/tasks/vad.py

@@ -13,8 +13,6 @@
import numpy as np
import torch
import yaml
from typeguard import check_argument_types
from typeguard import check_return_type

from funasr.datasets.collate_fn import CommonCollateFn
from funasr.layers.abs_normalize import AbsNormalize
@@ -192,7 +190,6 @@
        [Collection[Tuple[str, Dict[str, np.ndarray]]]],
        Tuple[List[str], Dict[str, torch.Tensor]],
    ]:
        assert check_argument_types()
        # NOTE(kamo): int value = 0 is reserved by CTC-blank symbol
        return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)

@@ -200,7 +197,6 @@
    def build_preprocess_fn(
            cls, args: argparse.Namespace, train: bool
    ) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
        assert check_argument_types()
        # if args.use_preprocessor:
        #    retval = CommonPreprocessor(
        #        train=train,
@@ -223,7 +219,6 @@
        # else:
        #    retval = None
        retval = None
        assert check_return_type(retval)
        return retval

    @classmethod
@@ -242,12 +237,10 @@
            cls, train: bool = True, inference: bool = False
    ) -> Tuple[str, ...]:
        retval = ()
        assert check_return_type(retval)
        return retval

    @classmethod
    def build_model(cls, args: argparse.Namespace):
        assert check_argument_types()
        # 4. Encoder
        encoder_class = encoder_choices.get_class(args.encoder)
        encoder = encoder_class(**args.encoder_conf)
@@ -297,7 +290,6 @@
            device: Device type, "cpu", "cuda", or "cuda:N".

        """
        assert check_argument_types()
        if config_file is None:
            assert model_file is not None, (
                "The argument 'model_file' must be provided "

 funasr/text/build_tokenizer.py

@@ -2,7 +2,6 @@
from typing import Iterable
from typing import Union

from typeguard import check_argument_types

from funasr.text.abs_tokenizer import AbsTokenizer
from funasr.text.char_tokenizer import CharTokenizer
@@ -21,7 +20,6 @@
    g2p_type: str = None,
) -> AbsTokenizer:
    """A helper function to instantiate Tokenizer"""
    assert check_argument_types()
    if token_type == "bpe":
        if bpemodel is None:
            raise ValueError('bpemodel is required if token_type = "bpe"')

 funasr/text/char_tokenizer.py

@@ -4,7 +4,6 @@
from typing import Union
import warnings

from typeguard import check_argument_types

from funasr.text.abs_tokenizer import AbsTokenizer

@@ -16,7 +15,6 @@
        space_symbol: str = "<space>",
        remove_non_linguistic_symbols: bool = False,
    ):
        assert check_argument_types()
        self.space_symbol = space_symbol
        if non_linguistic_symbols is None:
            self.non_linguistic_symbols = set()

 funasr/text/cleaner.py

@@ -2,7 +2,6 @@

from jaconv import jaconv
import tacotron_cleaner.cleaners
from typeguard import check_argument_types

try:
    from vietnamese_cleaner import vietnamese_cleaners
@@ -21,7 +20,6 @@
    """

    def __init__(self, cleaner_types: Collection[str] = None):
        assert check_argument_types()

        if cleaner_types is None:
            self.cleaner_types = []

 funasr/text/phoneme_tokenizer.py

@@ -9,7 +9,6 @@

# import g2p_en
import jamo
from typeguard import check_argument_types

from funasr.text.abs_tokenizer import AbsTokenizer

@@ -365,7 +364,6 @@
        space_symbol: str = "<space>",
        remove_non_linguistic_symbols: bool = False,
    ):
        assert check_argument_types()
        if g2p_type is None:
            self.g2p = split_by_space
        elif g2p_type == "g2p_en":

 funasr/text/sentencepiece_tokenizer.py

@@ -4,14 +4,12 @@
from typing import Union

import sentencepiece as spm
from typeguard import check_argument_types

from funasr.text.abs_tokenizer import AbsTokenizer


class SentencepiecesTokenizer(AbsTokenizer):
    def __init__(self, model: Union[Path, str]):
        assert check_argument_types()
        self.model = str(model)
        # NOTE(kamo):
        # Don't build SentencePieceProcessor in __init__()

 funasr/text/token_id_converter.py

@@ -5,7 +5,6 @@
from typing import Union

import numpy as np
from typeguard import check_argument_types


class TokenIDConverter:
@@ -14,7 +13,6 @@
        token_list: Union[Path, str, Iterable[str]],
        unk_symbol: str = "<unk>",
    ):
        assert check_argument_types()

        if isinstance(token_list, (Path, str)):
            token_list = Path(token_list)

 funasr/text/word_tokenizer.py

@@ -4,7 +4,6 @@
from typing import Union
import warnings

from typeguard import check_argument_types

from funasr.text.abs_tokenizer import AbsTokenizer

@@ -16,7 +15,6 @@
        non_linguistic_symbols: Union[Path, str, Iterable[str]] = None,
        remove_non_linguistic_symbols: bool = False,
    ):
        assert check_argument_types()
        self.delimiter = delimiter

        if not remove_non_linguistic_symbols and non_linguistic_symbols is not None:

 funasr/torch_utils/forward_adaptor.py

@@ -1,5 +1,4 @@
import torch
from typeguard import check_argument_types


class ForwardAdaptor(torch.nn.Module):
@@ -21,7 +20,6 @@
    """

    def __init__(self, module: torch.nn.Module, name: str):
        assert check_argument_types()
        super().__init__()
        self.module = module
        self.name = name

 funasr/torch_utils/initialize.py

@@ -4,7 +4,6 @@

import math
import torch
from typeguard import check_argument_types


def initialize(model: torch.nn.Module, init: str):
@@ -19,7 +18,6 @@
        model: Target.
        init: Method of initialization.
    """
    assert check_argument_types()

    if init == "chainer":
        # 1. lecun_normal_init_parameters

 funasr/train/abs_model.py

@@ -8,7 +8,6 @@

import torch
import torch.nn.functional as F
from typeguard import check_argument_types

from funasr.modules.nets_utils import make_pad_mask
from funasr.torch_utils.device_funcs import force_gatherable
@@ -34,7 +33,6 @@

class LanguageModel(FunASRModel):
    def __init__(self, lm: AbsLM, vocab_size: int, ignore_id: int = 0):
        assert check_argument_types()
        super().__init__()
        self.lm = lm
        self.sos = 1
@@ -154,7 +152,6 @@
class PunctuationModel(FunASRModel):
    
    def __init__(self, punc_model: torch.nn.Module, vocab_size: int, ignore_id: int = 0, punc_weight: list = None):
        assert check_argument_types()
        super().__init__()
        self.punc_model = punc_model
        self.punc_weight = torch.Tensor(punc_weight)

 funasr/train/class_choices.py

@@ -2,8 +2,6 @@
from typing import Optional
from typing import Tuple

from typeguard import check_argument_types
from typeguard import check_return_type

from funasr.utils.nested_dict_action import NestedDictAction
from funasr.utils.types import str_or_none
@@ -40,7 +38,6 @@
        default: str = None,
        optional: bool = False,
    ):
        assert check_argument_types()
        self.name = name
        self.base_type = type_check
        self.classes = {k.lower(): v for k, v in classes.items()}
@@ -64,12 +61,10 @@
            return retval

    def get_class(self, name: Optional[str]) -> Optional[type]:
        assert check_argument_types()
        if name is None or (self.optional and name.lower() == ("none", "null", "nil")):
            retval = None
        elif name.lower() in self.classes:
            class_obj = self.classes[name]
            assert check_return_type(class_obj)
            retval = class_obj
        else:
            raise ValueError(

 funasr/train/reporter.py

@@ -18,8 +18,6 @@
import humanfriendly
import numpy as np
import torch
from typeguard import check_argument_types
from typeguard import check_return_type

Num = Union[float, int, complex, torch.Tensor, np.ndarray]

@@ -27,7 +25,6 @@


def to_reported_value(v: Num, weight: Num = None) -> "ReportedValue":
    assert check_argument_types()
    if isinstance(v, (torch.Tensor, np.ndarray)):
        if np.prod(v.shape) != 1:
            raise ValueError(f"v must be 0 or 1 dimension: {len(v.shape)}")
@@ -42,12 +39,10 @@
        retval = WeightedAverage(v, weight)
    else:
        retval = Average(v)
    assert check_return_type(retval)
    return retval


def aggregate(values: Sequence["ReportedValue"]) -> Num:
    assert check_argument_types()

    for v in values:
        if not isinstance(v, type(values[0])):
@@ -86,7 +81,6 @@

    else:
        raise NotImplementedError(f"type={type(values[0])}")
    assert check_return_type(retval)
    return retval


@@ -122,7 +116,6 @@
    """

    def __init__(self, key: str, epoch: int, total_count: int):
        assert check_argument_types()
        self.key = key
        self.epoch = epoch
        self.start_time = time.perf_counter()
@@ -160,7 +153,6 @@
            stats: Dict[str, Optional[Union[Num, Dict[str, Num]]]],
            weight: Num = None,
    ) -> None:
        assert check_argument_types()
        if self._finished:
            raise RuntimeError("Already finished")
        if len(self._seen_keys_in_the_step) == 0:
@@ -293,7 +285,6 @@
    """

    def __init__(self, epoch: int = 0):
        assert check_argument_types()
        if epoch < 0:
            raise ValueError(f"epoch must be 0 or more: {epoch}")
        self.epoch = epoch

 funasr/train/trainer.py

@@ -26,7 +26,6 @@
import torch
import torch.nn
import torch.optim
from typeguard import check_argument_types

from funasr.iterators.abs_iter_factory import AbsIterFactory
from funasr.main_funcs.average_nbest_models import average_nbest_models
@@ -127,7 +126,6 @@
    @classmethod
    def build_options(cls, args: argparse.Namespace) -> TrainerOptions:
        """Build options consumed by train(), eval()"""
        assert check_argument_types()
        return build_dataclass(TrainerOptions, args)

    @classmethod
@@ -188,7 +186,6 @@
        distributed_option: DistributedOption,
    ) -> None:
        """Perform training. This method performs the main process of training."""
        assert check_argument_types()
        # NOTE(kamo): Don't check the type more strictly as far trainer_options
        assert is_dataclass(trainer_options), type(trainer_options)
        assert len(optimizers) == len(schedulers), (len(optimizers), len(schedulers))
@@ -551,7 +548,6 @@
        options: TrainerOptions,
        distributed_option: DistributedOption,
    ) -> Tuple[bool, bool]:
        assert check_argument_types()

        grad_noise = options.grad_noise
        accum_grad = options.accum_grad
@@ -845,7 +841,6 @@
        options: TrainerOptions,
        distributed_option: DistributedOption,
    ) -> None:
        assert check_argument_types()
        ngpu = options.ngpu
        no_forward_run = options.no_forward_run
        distributed = distributed_option.distributed

 funasr/utils/griffin_lim.py

@@ -9,7 +9,6 @@

from distutils.version import LooseVersion
from functools import partial
from typeguard import check_argument_types
from typing import Optional

import librosa
@@ -138,7 +137,6 @@
            griffin_lim_iters: The number of iterations.

        """
        assert check_argument_types()
        self.fs = fs
        self.logmel2linear = (
            partial(

 setup.py

@@ -4,7 +4,6 @@

import os

from distutils.version import LooseVersion
from setuptools import find_packages
from setuptools import setup

@@ -12,73 +11,45 @@
requirements = {
    "install": [
        "setuptools>=38.5.1",
        # "configargparse>=1.2.1",
        "typeguard==2.13.3",
        "typeguard>=3.0.1",
        "humanfriendly",
        "scipy>=1.4.1",
        # "filelock",
        "librosa",
        "jamo==0.4.1",  # For kss
        "jamo",  # For kss
        "PyYAML>=5.1.2",
        "soundfile>=0.11.0",
        "soundfile>=0.10.2",
        "h5py>=2.10.0",
        "kaldiio>=2.17.0",
        "torch_complex",
        "nltk>=3.4.5",
        # ASR
        "sentencepiece",
        # "ctc-segmentation<1.8,>=1.6.6",
        # TTS
        # "pyworld>=0.2.10",
        "pypinyin<=0.44.0",
        "pypinyin>=0.44.0",
        "espnet_tts_frontend",
        # ENH
        # "ci_sdr",
        "pytorch_wpe",
        "editdistance>=0.5.2",
        "tensorboard==1.15",
        "tensorboard",
        "g2p",
        # PAI
        "oss2",
        # "kaldi-native-fbank",
        # timestamp
        "edit-distance",
        # textgrid
        "textgrid",
        "protobuf==3.20.0",
        "protobuf",
    ],
    # train: The modules invoked when training only.
    "train": [
        # "pillow>=6.1.0",
        "editdistance==0.5.2",
        "editdistance",
        "wandb",
    ],
    # recipe: The modules actually are not invoked in the main module of funasr,
    #         but are invoked for the python scripts in each recipe
    "recipe": [
        "espnet_model_zoo",
        # "gdown",
        # "resampy",
        # "pysptk>=0.1.17",
        # "morfessor",  # for zeroth-korean
        # "youtube_dl",  # for laborotv
        # "nnmnkwii",
        # "museval>=0.2.1",
        # "pystoi>=0.2.2",
        # "mir-eval>=0.6",
        # "fastdtw",
        # "nara_wpe>=0.0.5",
        # "sacrebleu>=1.5.1",
    ],
    # all: The modules should be optionally installled due to some reason.
    #      Please consider moving them to "install" occasionally
    # NOTE(kamo): The modules in "train" and "recipe" are appended into "all"
    "all": [
        # NOTE(kamo): Append modules requiring specific pytorch version or torch>1.3.0
        "torch_optimizer",
        "fairscale",
        "transformers",
        # "gtn==0.0.0",
    ],
    "setup": [
        "numpy",
@@ -98,17 +69,18 @@
        "black",
    ],
    "doc": [
        "Jinja2<3.1",
        "Sphinx==2.1.2",
        "Jinja2",
        "Sphinx",
        "sphinx-rtd-theme>=0.2.4",
        "sphinx-argparse>=0.2.5",
        "commonmark==0.8.1",
        "commonmark",
        "recommonmark>=0.4.0",
        "nbsphinx>=0.4.2",
        "sphinx-markdown-tables>=0.0.12",
        "configargparse>=1.2.1"
    ],
}
requirements["all"].extend(requirements["train"] + requirements["recipe"])
requirements["all"].extend(requirements["train"])
requirements["test"].extend(requirements["train"])

install_requires = requirements["install"]
@@ -151,4 +123,4 @@
        "License :: OSI Approved :: Apache Software License",
        "Topic :: Software Development :: Libraries :: Python Modules",
    ],
)
)

			@@ -11,7 +11,6 @@
			import resampy
			import soundfile
			from tqdm import tqdm
			from typeguard import check_argument_types

			from funasr.utils.cli_utils import get_commandline_args
			from funasr.fileio.read_text import read_2column_text
			@@ -31,7 +30,6 @@
			(3, 4, 5)

			"""
			assert check_argument_types()
			if integers.strip() in ("none", "None", "NONE", "null", "Null", "NULL"):
			return None
			return tuple(map(int, integers.strip().split(",")))

			@@ -1,5 +1,4 @@
			import os
			<<<<<<< HEAD

			from modelscope.metainfo import Trainers
			from modelscope.trainers import build_trainer
			@@ -21,50 +20,17 @@
			batch_bins=params.batch_bins,
			max_epoch=params.max_epoch,
			lr=params.lr)
			=======
			from modelscope.metainfo import Trainers
			from modelscope.trainers import build_trainer
			from funasr.datasets.ms_dataset import MsDataset


			def modelscope_finetune(params):
			if not os.path.exists(params["output_dir"]):
			os.makedirs(params["output_dir"], exist_ok=True)
			# dataset split ["train", "validation"]
			ds_dict = MsDataset.load(params["data_dir"])
			kwargs = dict(
			model=params["model"],
			model_revision=params["model_revision"],
			data_dir=ds_dict,
			dataset_type=params["dataset_type"],
			work_dir=params["output_dir"],
			batch_bins=params["batch_bins"],
			max_epoch=params["max_epoch"],
			lr=params["lr"])
			>>>>>>> main
			trainer = build_trainer(Trainers.speech_asr_trainer, default_args=kwargs)
			trainer.train()


			if __name__ == '__main__':
			<<<<<<< HEAD
			params = modelscope_args(model="damo/speech_UniASR_asr_2pass-tr-16k-common-vocab1582-pytorch", data_path="./data")
			params.output_dir = "./checkpoint" # m模型保存路径
			params.data_path = "./example_data/" # 数据路径
			params.dataset_type = "small" # 小数据量设置small，若数据量大于1000小时，请使用large
			params.batch_bins = 2000 # batch size，如果dataset_type="small"，batch_bins单位为fbank特征帧数，如果dataset_type="large"，batch_bins单位为毫秒，
			params.max_epoch = 50 # 最大训练轮数
			params.max_epoch = 20 # 最大训练轮数
			params.lr = 0.00005 # 设置学习率

			=======
			params = {}
			params["output_dir"] = "./checkpoint"
			params["data_dir"] = "./data"
			params["batch_bins"] = 2000
			params["dataset_type"] = "small"
			params["max_epoch"] = 50
			params["lr"] = 0.00005
			params["model"] = "damo/speech_UniASR_asr_2pass-tr-16k-common-vocab1582-pytorch"
			params["model_revision"] = None
			>>>>>>> main
			modelscope_finetune(params)
			modelscope_finetune(params)

			@@ -1,33 +1,3 @@
			<<<<<<< HEAD
			import os
			import shutil
			import argparse
			from modelscope.pipelines import pipeline
			from modelscope.utils.constant import Tasks

			def modelscope_infer(args):
			os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpuid)
			inference_pipeline = pipeline(
			task=Tasks.auto_speech_recognition,
			model=args.model,
			output_dir=args.output_dir,
			batch_size=args.batch_size,
			param_dict={"decoding_model": args.decoding_mode, "hotword": args.hotword_txt}
			)
			inference_pipeline(audio_in=args.audio_in)

			if __name__ == "__main__":
			parser = argparse.ArgumentParser()
			parser.add_argument('--model', type=str, default="damo/speech_UniASR_asr_2pass-tr-16k-common-vocab1582-pytorch")
			parser.add_argument('--audio_in', type=str, default="./data/test/wav.scp")
			parser.add_argument('--output_dir', type=str, default="./results/")
			parser.add_argument('--decoding_mode', type=str, default="normal")
			parser.add_argument('--hotword_txt', type=str, default=None)
			parser.add_argument('--batch_size', type=int, default=64)
			parser.add_argument('--gpuid', type=str, default="0")
			args = parser.parse_args()
			modelscope_infer(args)
			=======
			from modelscope.pipelines import pipeline
			from modelscope.utils.constant import Tasks

			@@ -40,5 +10,4 @@
			output_dir=output_dir,
			)
			rec_result = inference_pipeline(audio_in=audio_in, param_dict={"decoding_model":"offline"})
			print(rec_result)
			>>>>>>> main
			print(rec_result)

			@@ -22,9 +22,7 @@
			import requests
			import torch
			from packaging.version import parse as V
			from typeguard import check_argument_types
			from typeguard import check_return_type
			from funasr.build_utils.build_model_from_file import build_model_from_file
			from funasr.build_utils.build_model_from_file import build_model_from_file
			from funasr.models.e2e_asr_contextual_paraformer import NeatContextualParaformer
			from funasr.models.e2e_asr_paraformer import BiCifParaformer, ContextualParaformer
			from funasr.models.frontend.wav_frontend import WavFrontend, WavFrontendOnline
			@@ -78,7 +76,6 @@
			frontend_conf: dict = None,
			**kwargs,
			):
			assert check_argument_types()

			# 1. Build ASR model
			scorers = {}
			@@ -192,7 +189,6 @@
			text, token, token_int, hyp

			"""
			assert check_argument_types()

			# Input as audio signal
			if isinstance(speech, np.ndarray):
			@@ -248,7 +244,6 @@
			text = None
			results.append((text, token, token_int, hyp))

			assert check_return_type(results)
			return results


			@@ -288,7 +283,6 @@
			decoding_ind: int = 0,
			**kwargs,
			):
			assert check_argument_types()

			# 1. Build ASR model
			scorers = {}
			@@ -413,7 +407,6 @@
			text, token, token_int, hyp

			"""
			assert check_argument_types()

			# Input as audio signal
			if isinstance(speech, np.ndarray):
			@@ -516,7 +509,6 @@
			vad_offset=begin_time)
			results.append((text, token, token_int, hyp, timestamp, enc_len_batch_total, lfr_factor))

			# assert check_return_type(results)
			return results

			def generate_hotwords_list(self, hotword_list_or_file):
			@@ -656,7 +648,6 @@
			hotword_list_or_file: str = None,
			**kwargs,
			):
			assert check_argument_types()

			# 1. Build ASR model
			scorers = {}
			@@ -776,7 +767,6 @@
			text, token, token_int, hyp

			"""
			assert check_argument_types()
			results = []
			cache_en = cache["encoder"]
			if speech.shape[1] < 16 * 60 and cache_en["is_final"]:
			@@ -871,7 +861,6 @@

			results.append(postprocessed_result)

			# assert check_return_type(results)
			return results


			@@ -912,7 +901,6 @@
			frontend_conf: dict = None,
			**kwargs,
			):
			assert check_argument_types()

			# 1. Build ASR model
			scorers = {}
			@@ -1036,7 +1024,6 @@
			text, token, token_int, hyp

			"""
			assert check_argument_types()

			# Input as audio signal
			if isinstance(speech, np.ndarray):
			@@ -1104,7 +1091,6 @@
			text = None
			results.append((text, token, token_int, hyp))

			assert check_return_type(results)
			return results


			@@ -1143,7 +1129,6 @@
			streaming: bool = False,
			**kwargs,
			):
			assert check_argument_types()

			# 1. Build ASR model
			scorers = {}
			@@ -1248,7 +1233,6 @@
			text, token, token_int, hyp

			"""
			assert check_argument_types()
			# Input as audio signal
			if isinstance(speech, np.ndarray):
			speech = torch.tensor(speech)
			@@ -1298,7 +1282,6 @@
			text = None
			results.append((text, token, token_int, hyp))

			assert check_return_type(results)
			return results


			@@ -1355,7 +1338,6 @@
			"""Construct a Speech2Text object."""
			super().__init__()

			assert check_argument_types()
			asr_model, asr_train_args = build_model_from_file(
			asr_train_config, asr_model_file, cmvn_file, device
			)
			@@ -1534,7 +1516,6 @@
			Returns:
			nbest_hypothesis: N-best hypothesis.
			"""
			assert check_argument_types()

			if isinstance(speech, np.ndarray):
			speech = torch.tensor(speech)
			@@ -1566,7 +1547,6 @@
			Returns:
			nbest_hypothesis: N-best hypothesis.
			"""
			assert check_argument_types()

			if isinstance(speech, np.ndarray):
			speech = torch.tensor(speech)
			@@ -1608,35 +1588,8 @@
			text = None
			results.append((text, token, token_int, hyp))

			assert check_return_type(results)

			return results

			@staticmethod
			def from_pretrained(
			model_tag: Optional[str] = None,
			**kwargs: Optional[Any],
			) -> Speech2Text:
			"""Build Speech2Text instance from the pretrained model.
			Args:
			model_tag: Model tag of the pretrained models.
			Return:
			: Speech2Text instance.
			"""
			if model_tag is not None:
			try:
			from espnet_model_zoo.downloader import ModelDownloader

			except ImportError:
			logging.error(
			"`espnet_model_zoo` is not installed. "
			"Please install via `pip install -U espnet_model_zoo`."
			)
			raise
			d = ModelDownloader()
			kwargs.update(**d.download_and_unpack(model_tag))

			return Speech2TextTransducer(**kwargs)


			class Speech2TextSAASR:
			@@ -1675,7 +1628,6 @@
			frontend_conf: dict = None,
			**kwargs,
			):
			assert check_argument_types()

			# 1. Build ASR model
			scorers = {}
			@@ -1793,7 +1745,6 @@
			text, text_id, token, token_int, hyp

			"""
			assert check_argument_types()

			# Input as audio signal
			if isinstance(speech, np.ndarray):
			@@ -1886,5 +1837,4 @@

			results.append((text, text_id, token, token_int, hyp))

			assert check_return_type(results)
			return results

			@@ -21,7 +21,6 @@
			import torchaudio
			import soundfile
			import yaml
			from typeguard import check_argument_types

			from funasr.bin.asr_infer import Speech2Text
			from funasr.bin.asr_infer import Speech2TextMFCCA
			@@ -80,7 +79,6 @@
			param_dict: dict = None,
			**kwargs,
			):
			assert check_argument_types()
			ncpu = kwargs.get("ncpu", 1)
			torch.set_num_threads(ncpu)
			if batch_size > 1:
			@@ -240,7 +238,6 @@
			param_dict: dict = None,
			**kwargs,
			):
			assert check_argument_types()
			ncpu = kwargs.get("ncpu", 1)
			torch.set_num_threads(ncpu)

			@@ -481,7 +478,6 @@
			param_dict: dict = None,
			**kwargs,
			):
			assert check_argument_types()
			ncpu = kwargs.get("ncpu", 1)
			torch.set_num_threads(ncpu)

			@@ -749,7 +745,6 @@
			param_dict: dict = None,
			**kwargs,
			):
			assert check_argument_types()

			if word_lm_train_config is not None:
			raise NotImplementedError("Word LM is not implemented")
			@@ -957,7 +952,6 @@
			param_dict: dict = None,
			**kwargs,
			):
			assert check_argument_types()
			ncpu = kwargs.get("ncpu", 1)
			torch.set_num_threads(ncpu)
			if batch_size > 1:
			@@ -1126,7 +1120,6 @@
			param_dict: dict = None,
			**kwargs,
			):
			assert check_argument_types()
			ncpu = kwargs.get("ncpu", 1)
			torch.set_num_threads(ncpu)
			if batch_size > 1:
			@@ -1314,7 +1307,6 @@
			right_context: Number of frames in right context AFTER subsampling.
			display_partial_hypotheses: Whether to display partial hypotheses.
			"""
			assert check_argument_types()

			if batch_size > 1:
			raise NotImplementedError("batch decoding is not implemented")
			@@ -1464,7 +1456,6 @@
			param_dict: dict = None,
			**kwargs,
			):
			assert check_argument_types()
			if batch_size > 1:
			raise NotImplementedError("batch decoding is not implemented")
			if word_lm_train_config is not None:

			@@ -15,7 +15,6 @@
			import torch
			from scipy.ndimage import median_filter
			from torch.nn import functional as F
			from typeguard import check_argument_types

			from funasr.models.frontend.wav_frontend import WavFrontendMel23
			from funasr.tasks.diar import DiarTask
			@@ -45,7 +44,6 @@
			device: str = "cpu",
			dtype: str = "float32",
			):
			assert check_argument_types()

			# 1. Build Diarization model
			diar_model, diar_train_args = build_model_from_file(
			@@ -88,7 +86,6 @@
			diarization results

			"""
			assert check_argument_types()
			# Input as audio signal
			if isinstance(speech, np.ndarray):
			speech = torch.tensor(speech)
			@@ -106,36 +103,6 @@
			results = self.diar_model.estimate_sequential(**batch)

			return results

			@staticmethod
			def from_pretrained(
			model_tag: Optional[str] = None,
			**kwargs: Optional[Any],
			):
			"""Build Speech2Diarization instance from the pretrained model.

			Args:
			model_tag (Optional[str]): Model tag of the pretrained models.
			Currently, the tags of espnet_model_zoo are supported.

			Returns:
			Speech2Diarization: Speech2Diarization instance.

			"""
			if model_tag is not None:
			try:
			from espnet_model_zoo.downloader import ModelDownloader

			except ImportError:
			logging.error(
			"`espnet_model_zoo` is not installed. "
			"Please install via `pip install -U espnet_model_zoo`."
			)
			raise
			d = ModelDownloader()
			kwargs.update(**d.download_and_unpack(model_tag))

			return Speech2DiarizationEEND(**kwargs)


			class Speech2DiarizationSOND:
			@@ -163,7 +130,6 @@
			smooth_size: int = 83,
			dur_threshold: float = 10,
			):
			assert check_argument_types()

			# TODO: 1. Build Diarization model
			diar_model, diar_train_args = build_model_from_file(
			@@ -283,7 +249,6 @@
			diarization results for each speaker

			"""
			assert check_argument_types()
			# Input as audio signal
			if isinstance(speech, np.ndarray):
			speech = torch.tensor(speech)
			@@ -305,33 +270,3 @@
			results, pse_labels = self.post_processing(logits, profile.shape[1], output_format)

			return results, pse_labels

			@staticmethod
			def from_pretrained(
			model_tag: Optional[str] = None,
			**kwargs: Optional[Any],
			):
			"""Build Speech2Xvector instance from the pretrained model.

			Args:
			model_tag (Optional[str]): Model tag of the pretrained models.
			Currently, the tags of espnet_model_zoo are supported.

			Returns:
			Speech2Xvector: Speech2Xvector instance.

			"""
			if model_tag is not None:
			try:
			from espnet_model_zoo.downloader import ModelDownloader

			except ImportError:
			logging.error(
			"`espnet_model_zoo` is not installed. "
			"Please install via `pip install -U espnet_model_zoo`."
			)
			raise
			d = ModelDownloader()
			kwargs.update(**d.download_and_unpack(model_tag))

			return Speech2DiarizationSOND(**kwargs)

			@@ -18,7 +18,6 @@
			import soundfile
			import torch
			from scipy.signal import medfilt
			from typeguard import check_argument_types

			from funasr.bin.diar_infer import Speech2DiarizationSOND, Speech2DiarizationEEND
			from funasr.datasets.iterable_dataset import load_bytes
			@@ -52,7 +51,6 @@
			mode: str = "sond",
			**kwargs,
			):
			assert check_argument_types()
			ncpu = kwargs.get("ncpu", 1)
			torch.set_num_threads(ncpu)
			if batch_size > 1:
			@@ -233,7 +231,6 @@
			param_dict: Optional[dict] = None,
			**kwargs,
			):
			assert check_argument_types()
			ncpu = kwargs.get("ncpu", 1)
			torch.set_num_threads(ncpu)
			if batch_size > 1:

			@@ -15,7 +15,6 @@
			import numpy as np
			import torch
			from torch.nn.parallel import data_parallel
			from typeguard import check_argument_types

			from funasr.build_utils.build_model_from_file import build_model_from_file
			from funasr.build_utils.build_streaming_iterator import build_streaming_iterator
			@@ -50,7 +49,6 @@
			param_dict: dict = None,
			**kwargs,
			):
			assert check_argument_types()
			ncpu = kwargs.get("ncpu", 1)
			torch.set_num_threads(ncpu)

			@@ -14,7 +14,6 @@
			from typing import Union

			import torch
			from typeguard import check_argument_types

			from funasr.bin.punc_infer import Text2Punc, Text2PuncVADRealtime
			from funasr.torch_utils.set_all_random_seed import set_all_random_seed
			@@ -38,7 +37,6 @@
			param_dict: dict = None,
			**kwargs,
			):
			assert check_argument_types()
			logging.basicConfig(
			level=log_level,
			format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
			@@ -118,7 +116,6 @@
			param_dict: dict = None,
			**kwargs,
			):
			assert check_argument_types()
			ncpu = kwargs.get("ncpu", 1)
			torch.set_num_threads(ncpu)

			@@ -12,8 +12,6 @@

			import numpy as np
			import torch
			from typeguard import check_argument_types
			from typeguard import check_return_type

			from funasr.build_utils.build_model_from_file import build_model_from_file
			from funasr.torch_utils.device_funcs import to_device
			@@ -42,7 +40,6 @@
			streaming: bool = False,
			embedding_node: str = "resnet1_dense",
			):
			assert check_argument_types()

			# TODO: 1. Build SV model
			sv_model, sv_train_args = build_model_from_file(
			@@ -108,7 +105,6 @@
			embedding, ref_embedding, similarity_score

			"""
			assert check_argument_types()
			self.sv_model.eval()
			embedding = self.calculate_embedding(speech)
			ref_emb, score = None, None
			@@ -117,35 +113,4 @@
			score = torch.cosine_similarity(embedding, ref_emb)

			results = (embedding, ref_emb, score)
			assert check_return_type(results)
			return results

			@staticmethod
			def from_pretrained(
			model_tag: Optional[str] = None,
			**kwargs: Optional[Any],
			):
			"""Build Speech2Xvector instance from the pretrained model.

			Args:
			model_tag (Optional[str]): Model tag of the pretrained models.
			Currently, the tags of espnet_model_zoo are supported.

			Returns:
			Speech2Xvector: Speech2Xvector instance.

			"""
			if model_tag is not None:
			try:
			from espnet_model_zoo.downloader import ModelDownloader

			except ImportError:
			logging.error(
			"`espnet_model_zoo` is not installed. "
			"Please install via `pip install -U espnet_model_zoo`."
			)
			raise
			d = ModelDownloader()
			kwargs.update(**d.download_and_unpack(model_tag))

			return Speech2Xvector(**kwargs)

			@@ -7,7 +7,6 @@
			from typing import List
			from typing import Optional

			from typeguard import check_argument_types

			from funasr.utils.cli_utils import get_commandline_args
			from funasr.text.build_tokenizer import build_tokenizer
			@@ -81,7 +80,6 @@
			cleaner: Optional[str],
			g2p: Optional[str],
			):
			assert check_argument_types()

			logging.basicConfig(
			level=log_level,

			@@ -9,7 +9,6 @@

			import numpy as np
			import torch
			from typeguard import check_argument_types
			from funasr.build_utils.build_model_from_file import build_model_from_file
			from funasr.models.frontend.wav_frontend import WavFrontend
			from funasr.text.token_id_converter import TokenIDConverter
			@@ -26,7 +25,6 @@
			dtype: str = "float32",
			**kwargs,
			):
			assert check_argument_types()
			# 1. Build ASR model
			tp_model, tp_train_args = build_model_from_file(
			timestamp_infer_config, timestamp_model_file, cmvn_file=None, device=device, task_name="asr", mode="tp"
			@@ -64,7 +62,6 @@
			speech_lengths: Union[torch.Tensor, np.ndarray] = None,
			text_lengths: Union[torch.Tensor, np.ndarray] = None
			):
			assert check_argument_types()

			# Input as audio signal
			if isinstance(speech, np.ndarray):

			@@ -13,7 +13,6 @@

			import numpy as np
			import torch
			from typeguard import check_argument_types

			from funasr.bin.tp_infer import Speech2Timestamp
			from funasr.build_utils.build_streaming_iterator import build_streaming_iterator
			@@ -47,7 +46,6 @@
			seg_dict_file: Optional[str] = None,
			**kwargs,
			):
			assert check_argument_types()
			ncpu = kwargs.get("ncpu", 1)
			torch.set_num_threads(ncpu)

			@@ -6,7 +6,6 @@

			import torch
			import yaml
			from typeguard import check_argument_types

			from funasr.build_utils.build_model import build_model
			from funasr.models.base_model import FunASRModel
			@@ -30,7 +29,6 @@
			device: Device type, "cpu", "cuda", or "cuda:N".

			"""
			assert check_argument_types()
			if config_file is None:
			assert model_file is not None, (
			"The argument 'model_file' must be provided "

			@@ -1,6 +1,5 @@
			import numpy as np
			from torch.utils.data import DataLoader
			from typeguard import check_argument_types

			from funasr.datasets.iterable_dataset import IterableESPnetDataset
			from funasr.datasets.small_datasets.collate_fn import CommonCollateFn
			@@ -23,7 +22,6 @@
			train: bool = False,
			) -> DataLoader:
			"""Build DataLoader using iterable dataset"""
			assert check_argument_types()

			# preprocess
			if preprocess_fn is not None:

			@@ -1,7 +1,6 @@
			import logging

			import torch
			from typeguard import check_return_type

			from funasr.layers.abs_normalize import AbsNormalize
			from funasr.layers.global_mvn import GlobalMVN
			@@ -254,5 +253,4 @@
			if args.init is not None:
			initialize(model, args.init)

			assert check_return_type(model)
			return model

			@@ -25,7 +25,6 @@
			import torch
			import torch.nn
			import torch.optim
			from typeguard import check_argument_types

			from funasr.iterators.abs_iter_factory import AbsIterFactory
			from funasr.main_funcs.average_nbest_models import average_nbest_models
			@@ -118,7 +117,6 @@

			def build_options(self, args: argparse.Namespace) -> TrainerOptions:
			"""Build options consumed by train(), eval()"""
			assert check_argument_types()
			return build_dataclass(TrainerOptions, args)

			@classmethod
			@@ -156,7 +154,6 @@

			def run(self) -> None:
			"""Perform training. This method performs the main process of training."""
			assert check_argument_types()
			# NOTE(kamo): Don't check the type more strictly as far trainer_options
			model = self.model
			optimizers = self.optimizers
			@@ -522,7 +519,6 @@
			options: TrainerOptions,
			distributed_option: DistributedOption,
			) -> Tuple[bool, bool]:
			assert check_argument_types()

			grad_noise = options.grad_noise
			accum_grad = options.accum_grad
			@@ -758,7 +754,6 @@
			options: TrainerOptions,
			distributed_option: DistributedOption,
			) -> None:
			assert check_argument_types()
			ngpu = options.ngpu
			# no_forward_run = options.no_forward_run
			distributed = distributed_option.distributed

			@@ -6,8 +6,6 @@

			import numpy as np
			import torch
			from typeguard import check_argument_types
			from typeguard import check_return_type

			from funasr.modules.nets_utils import pad_list

			@@ -22,7 +20,6 @@
			not_sequence: Collection[str] = (),
			max_sample_size=None
			):
			assert check_argument_types()
			self.float_pad_value = float_pad_value
			self.int_pad_value = int_pad_value
			self.not_sequence = set(not_sequence)
			@@ -53,7 +50,6 @@
			) -> Tuple[List[str], Dict[str, torch.Tensor]]:
			"""Concatenate ndarray-list to an array and convert to torch.Tensor.
			"""
			assert check_argument_types()
			uttids = [u for u, _ in data]
			data = [d for _, d in data]

			@@ -79,7 +75,6 @@
			output[key + "_lengths"] = lens

			output = (uttids, output)
			assert check_return_type(output)
			return output

			def crop_to_max_size(feature, target_size):
			@@ -99,7 +94,6 @@
			not_sequence: Collection[str] = (),
			) -> Tuple[List[str], Dict[str, torch.Tensor]]:
			# mainly for pre-training
			assert check_argument_types()
			uttids = [u for u, _ in data]
			data = [d for _, d in data]

			@@ -131,5 +125,4 @@
			output[key + "_lengths"] = lens

			output = (uttids, output)
			assert check_return_type(output)
			return output

			@@ -23,8 +23,6 @@
			import numpy as np
			import torch
			from torch.utils.data.dataset import Dataset
			from typeguard import check_argument_types
			from typeguard import check_return_type

			from funasr.fileio.npy_scp import NpyScpReader
			from funasr.fileio.rand_gen_dataset import FloatRandomGenerateDataset
			@@ -37,7 +35,6 @@

			class AdapterForSoundScpReader(collections.abc.Mapping):
			def __init__(self, loader, dtype=None):
			assert check_argument_types()
			self.loader = loader
			self.dtype = dtype
			self.rate = None
			@@ -284,7 +281,6 @@
			max_cache_fd: int = 0,
			dest_sample_rate: int = 16000,
			):
			assert check_argument_types()
			if len(path_name_type_list) == 0:
			raise ValueError(
			'1 or more elements are required for "path_name_type_list"'
			@@ -379,7 +375,6 @@
			return _mes

			def __getitem__(self, uid: Union[str, int]) -> Tuple[str, Dict[str, np.ndarray]]:
			assert check_argument_types()

			# Change integer-id to string-id
			if isinstance(uid, int):
			@@ -444,5 +439,4 @@
			self.cache[uid] = data

			retval = uid, data
			assert check_return_type(retval)
			return retval

			@@ -16,7 +16,6 @@
			import torchaudio
			import soundfile
			from torch.utils.data.dataset import IterableDataset
			from typeguard import check_argument_types
			import os.path

			from funasr.datasets.dataset import ESPnetDataset
			@@ -121,7 +120,6 @@
			int_dtype: str = "long",
			key_file: str = None,
			):
			assert check_argument_types()
			if len(path_name_type_list) == 0:
			raise ValueError(
			'1 or more elements are required for "path_name_type_list"'

			@@ -6,7 +6,6 @@

			import sentencepiece as spm
			from torch.utils.data import DataLoader
			from typeguard import check_argument_types

			from funasr.datasets.large_datasets.dataset import Dataset
			from funasr.iterators.abs_iter_factory import AbsIterFactory
			@@ -43,7 +42,6 @@

			class SentencepiecesTokenizer(AbsTokenizer):
			def __init__(self, model: Union[Path, str]):
			assert check_argument_types()
			self.model = str(model)
			self.sp = None

			@@ -11,8 +11,6 @@
			import numpy as np
			import scipy.signal
			import soundfile
			from typeguard import check_argument_types
			from typeguard import check_return_type

			from funasr.text.build_tokenizer import build_tokenizer
			from funasr.text.cleaner import TextCleaner
			@@ -268,7 +266,6 @@
			def _speech_process(
			self, data: Dict[str, Union[str, np.ndarray]]
			) -> Dict[str, Union[str, np.ndarray]]:
			assert check_argument_types()
			if self.speech_name in data:
			if self.train and (self.rirs is not None or self.noises is not None):
			speech = data[self.speech_name]
			@@ -355,7 +352,6 @@
			speech = data[self.speech_name]
			ma = np.max(np.abs(speech))
			data[self.speech_name] = speech * self.speech_volume_normalize / ma
			assert check_return_type(data)
			return data

			def _text_process(
			@@ -372,13 +368,11 @@
			tokens = self.tokenizer.text2tokens(text)
			text_ints = self.token_id_converter.tokens2ids(tokens)
			data[self.text_name] = np.array(text_ints, dtype=np.int64)
			assert check_return_type(data)
			return data

			def __call__(
			self, uid: str, data: Dict[str, Union[str, np.ndarray]]
			) -> Dict[str, np.ndarray]:
			assert check_argument_types()

			data = self._speech_process(data)
			data = self._text_process(data)
			@@ -445,7 +439,6 @@
			tokens = self.tokenizer.text2tokens(text)
			text_ints = self.token_id_converter.tokens2ids(tokens)
			data[self.text_name] = np.array(text_ints, dtype=np.int64)
			assert check_return_type(data)
			return data


			@@ -502,13 +495,11 @@
			tokens = self.tokenizer.text2tokens(text)
			text_ints = self.token_id_converter.tokens2ids(tokens)
			data[text_n] = np.array(text_ints, dtype=np.int64)
			assert check_return_type(data)
			return data

			def __call__(
			self, uid: str, data: Dict[str, Union[str, np.ndarray]]
			) -> Dict[str, np.ndarray]:
			assert check_argument_types()

			if self.speech_name in data:
			# Nothing now: candidates:
			@@ -612,7 +603,6 @@
			tokens = self.tokenizer[i].text2tokens(text)
			text_ints = self.token_id_converter[i].tokens2ids(tokens)
			data[text_name] = np.array(text_ints, dtype=np.int64)
			assert check_return_type(data)
			return data

			class CodeMixTokenizerCommonPreprocessor(CommonPreprocessor):
			@@ -690,7 +680,6 @@
			def __call__(
			self, uid: str, data: Dict[str, Union[list, str, np.ndarray]]
			) -> Dict[str, Union[list, np.ndarray]]:
			assert check_argument_types()
			# Split words.
			if isinstance(data[self.text_name], str):
			split_text = self.split_words(data[self.text_name])

			@@ -15,8 +15,6 @@
			import numpy as np
			import torch
			from torch.utils.data.dataset import Dataset
			from typeguard import check_argument_types
			from typeguard import check_return_type

			from funasr.fileio.npy_scp import NpyScpReader
			from funasr.fileio.sound_scp import SoundScpReader
			@@ -24,7 +22,6 @@

			class AdapterForSoundScpReader(collections.abc.Mapping):
			def __init__(self, loader, dtype=None):
			assert check_argument_types()
			self.loader = loader
			self.dtype = dtype
			self.rate = None
			@@ -112,7 +109,6 @@
			speed_perturb: Union[list, tuple] = None,
			mode: str = "train",
			):
			assert check_argument_types()
			if len(path_name_type_list) == 0:
			raise ValueError(
			'1 or more elements are required for "path_name_type_list"'
			@@ -207,7 +203,6 @@
			return _mes

			def __getitem__(self, uid: Union[str, int]) -> Tuple[str, Dict[str, np.ndarray]]:
			assert check_argument_types()

			# Change integer-id to string-id
			if isinstance(uid, int):
			@@ -265,5 +260,4 @@
			data[name] = value

			retval = uid, data
			assert check_return_type(retval)
			return retval

			@@ -4,7 +4,6 @@
			from typing import Tuple
			from typing import Union

			from typeguard import check_argument_types

			from funasr.fileio.read_text import load_num_sequence_text
			from funasr.samplers.abs_sampler import AbsSampler
			@@ -21,7 +20,6 @@
			drop_last: bool = False,
			padding: bool = True,
			):
			assert check_argument_types()
			assert batch_bins > 0
			if sort_batch != "ascending" and sort_batch != "descending":
			raise ValueError(

			@@ -10,8 +10,6 @@
			import numpy as np
			import scipy.signal
			import soundfile
			from typeguard import check_argument_types
			from typeguard import check_return_type

			from funasr.text.build_tokenizer import build_tokenizer
			from funasr.text.cleaner import TextCleaner
			@@ -260,7 +258,6 @@
			def _speech_process(
			self, data: Dict[str, Union[str, np.ndarray]]
			) -> Dict[str, Union[str, np.ndarray]]:
			assert check_argument_types()
			if self.speech_name in data:
			if self.train and (self.rirs is not None or self.noises is not None):
			speech = data[self.speech_name]
			@@ -347,7 +344,6 @@
			speech = data[self.speech_name]
			ma = np.max(np.abs(speech))
			data[self.speech_name] = speech * self.speech_volume_normalize / ma
			assert check_return_type(data)
			return data

			def _text_process(
			@@ -365,13 +361,11 @@
			tokens = self.tokenizer.text2tokens(text)
			text_ints = self.token_id_converter.tokens2ids(tokens)
			data[self.text_name] = np.array(text_ints, dtype=np.int64)
			assert check_return_type(data)
			return data

			def __call__(
			self, uid: str, data: Dict[str, Union[str, np.ndarray]]
			) -> Dict[str, np.ndarray]:
			assert check_argument_types()

			data = self._speech_process(data)
			data = self._text_process(data)
			@@ -439,7 +433,6 @@
			tokens = self.tokenizer.text2tokens(text)
			text_ints = self.token_id_converter.tokens2ids(tokens)
			data[self.text_name] = np.array(text_ints, dtype=np.int64)
			assert check_return_type(data)
			return data


			@@ -496,13 +489,11 @@
			tokens = self.tokenizer.text2tokens(text)
			text_ints = self.token_id_converter.tokens2ids(tokens)
			data[text_n] = np.array(text_ints, dtype=np.int64)
			assert check_return_type(data)
			return data

			def __call__(
			self, uid: str, data: Dict[str, Union[str, np.ndarray]]
			) -> Dict[str, np.ndarray]:
			assert check_argument_types()

			if self.speech_name in data:
			# Nothing now: candidates:
			@@ -606,7 +597,6 @@
			tokens = self.tokenizer[i].text2tokens(text)
			text_ints = self.token_id_converter[i].tokens2ids(tokens)
			data[text_name] = np.array(text_ints, dtype=np.int64)
			assert check_return_type(data)
			return data


			@@ -685,7 +675,6 @@
			def __call__(
			self, uid: str, data: Dict[str, Union[list, str, np.ndarray]]
			) -> Dict[str, Union[list, np.ndarray]]:
			assert check_argument_types()
			# Split words.
			if isinstance(data[self.text_name], str):
			split_text = self.split_words(data[self.text_name])

			@@ -1,7 +1,6 @@
			import json
			from typing import Union, Dict
			from pathlib import Path
			from typeguard import check_argument_types

			import os
			import logging
			@@ -26,7 +25,6 @@
			calib_num: int = 200,
			model_revision: str = None,
			):
			assert check_argument_types()
			self.set_all_random_seed(0)

			self.cache_dir = cache_dir

			@@ -2,8 +2,6 @@
			from typing import Union
			import warnings

			from typeguard import check_argument_types
			from typeguard import check_return_type


			class DatadirWriter:
			@@ -20,7 +18,6 @@
			"""

			def __init__(self, p: Union[Path, str]):
			assert check_argument_types()
			self.path = Path(p)
			self.chilidren = {}
			self.fd = None
			@@ -31,7 +28,6 @@
			return self

			def __getitem__(self, key: str) -> "DatadirWriter":
			assert check_argument_types()
			if self.fd is not None:
			raise RuntimeError("This writer points out a file")

			@@ -41,11 +37,9 @@
			self.has_children = True

			retval = self.chilidren[key]
			assert check_return_type(retval)
			return retval

			def __setitem__(self, key: str, value: str):
			assert check_argument_types()
			if self.has_children:
			raise RuntimeError("This writer points out a directory")
			if key in self.keys:

			@@ -3,7 +3,6 @@
			from typing import Union

			import numpy as np
			from typeguard import check_argument_types

			from funasr.fileio.read_text import read_2column_text

			@@ -25,7 +24,6 @@
			"""

			def __init__(self, outdir: Union[Path, str], scpfile: Union[Path, str]):
			assert check_argument_types()
			self.dir = Path(outdir)
			self.dir.mkdir(parents=True, exist_ok=True)
			scpfile = Path(scpfile)
			@@ -73,7 +71,6 @@
			"""

			def __init__(self, fname: Union[Path, str]):
			assert check_argument_types()
			self.fname = Path(fname)
			self.data = read_2column_text(fname)

			@@ -4,7 +4,6 @@
			from typing import Iterator

			import numpy as np
			from typeguard import check_argument_types

			from funasr.iterators.abs_iter_factory import AbsIterFactory

			@@ -16,7 +15,6 @@
			seed: int = 0,
			shuffle: bool = False,
			):
			assert check_argument_types()
			self.build_funcs = list(build_funcs)
			self.seed = seed
			self.shuffle = shuffle

			@@ -1,6 +1,5 @@
			import math
			import torch
			from typeguard import check_argument_types
			from typing import Sequence
			from typing import Union

			@@ -147,7 +146,6 @@
			dim: Union[int, str] = "time",
			replace_with_zero: bool = True,
			):
			assert check_argument_types()
			if isinstance(mask_width_range, int):
			mask_width_range = (0, mask_width_range)
			if len(mask_width_range) != 2:
			@@ -214,7 +212,6 @@
			dim: Union[int, str] = "time",
			replace_with_zero: bool = True,
			):
			assert check_argument_types()
			if isinstance(mask_width_ratio_range, float):
			mask_width_ratio_range = (0.0, mask_width_ratio_range)
			if len(mask_width_ratio_range) != 2:
			@@ -283,7 +280,6 @@
			replace_with_zero: bool = True,
			lfr_rate: int = 1,
			):
			assert check_argument_types()
			if isinstance(mask_width_range, int):
			mask_width_range = (0, mask_width_range)
			if len(mask_width_range) != 2: