speech_asr
2023-04-19 58fb22cb2b8144b2e29d38327be44f3510ec8bb1
update
4个文件已修改
1个文件已添加
1个文件已删除
165 ■■■■■ 已修改文件
funasr/bin/train.py 29 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/datasets/small_datasets/build_dataloader.py 84 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/datasets/small_datasets/sequence_iter_factory.py 3 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/tasks/abs_task.py 27 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/utils/build_dataloader.py 9 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/utils/build_model.py 13 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/bin/train.py
@@ -6,6 +6,7 @@
from funasr.torch_utils.set_all_random_seed import set_all_random_seed
from funasr.utils import config_argparse
from funasr.utils.build_dataloader import build_dataloader
from funasr.utils.build_distributed import build_distributed
from funasr.utils.prepare_data import prepare_data
from funasr.utils.types import str2bool
@@ -338,14 +339,36 @@
            format=f"[{os.uname()[1].split('.')[0]}]"
                   f" %(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
        )
    logging.info("world size: {}, rank: {}, local_rank: {}".format(distributed_option.dist_world_size,
                                                                   distributed_option.dist_rank,
                                                                   distributed_option.local_rank))
    # prepare files for dataloader
    prepare_data(args, distributed_option)
    # set random seed
    set_all_random_seed(args.seed)
    torch.backends.cudnn.enabled = args.cudnn_enabled
    torch.backends.cudnn.benchmark = args.cudnn_benchmark
    torch.backends.cudnn.deterministic = args.cudnn_deterministic
    train_dataloader, valid_dataloader = build_dataloader(args)
    logging.info("world size: {}, rank: {}, local_rank: {}".format(distributed_option.dist_world_size,
                                                                   distributed_option.dist_rank,
                                                                   distributed_option.local_rank))
    # optimizers = cls.build_optimizers(args, model=model)
    # schedulers = []
    # for i, optim in enumerate(optimizers, 1):
    #     suf = "" if i == 1 else str(i)
    #     name = getattr(args, f"scheduler{suf}")
    #     conf = getattr(args, f"scheduler{suf}_conf")
    #     if name is not None:
    #         cls_ = scheduler_classes.get(name)
    #         if cls_ is None:
    #             raise ValueError(
    #                 f"must be one of {list(scheduler_classes)}: {name}"
    #             )
    #         scheduler = cls_(optim, **conf)
    #     else:
    #         scheduler = None
    #
    #     schedulers.append(scheduler)
funasr/datasets/small_datasets/build_dataloader.py
File was deleted
funasr/datasets/small_datasets/sequence_iter_factory.py
@@ -27,8 +27,7 @@
class SequenceIterFactory(AbsIterFactory):
    """Build iterator for each epoch.
    """Build iterator for each epoch, modified from ESPnet
    """
funasr/tasks/abs_task.py
@@ -1160,7 +1160,8 @@
                    args.batch_bins = args.batch_bins * args.ngpu
        # filter samples if wav.scp and text are mismatch
        if (args.train_shape_file is None and args.dataset_type == "small") or args.train_data_file is None and args.dataset_type == "large":
        if (
                args.train_shape_file is None and args.dataset_type == "small") or args.train_data_file is None and args.dataset_type == "large":
            if not args.simple_ddp or distributed_option.dist_rank == 0:
                filter_wav_text(args.data_dir, args.train_set)
                filter_wav_text(args.data_dir, args.dev_set)
@@ -1169,8 +1170,10 @@
        if args.train_shape_file is None and args.dataset_type == "small":
            if not args.simple_ddp or distributed_option.dist_rank == 0:
                calc_shape(args.data_dir, args.train_set, args.frontend_conf, args.speech_length_min, args.speech_length_max)
                calc_shape(args.data_dir, args.dev_set, args.frontend_conf, args.speech_length_min, args.speech_length_max)
                calc_shape(args.data_dir, args.train_set, args.frontend_conf, args.speech_length_min,
                           args.speech_length_max)
                calc_shape(args.data_dir, args.dev_set, args.frontend_conf, args.speech_length_min,
                           args.speech_length_max)
            if args.simple_ddp:
                dist.barrier()
            args.train_shape_file = [os.path.join(args.data_dir, args.train_set, "speech_shape")]
@@ -1360,15 +1363,21 @@
            if args.dataset_type == "large":
                from funasr.datasets.large_datasets.build_dataloader import ArkDataLoader
                train_iter_factory = ArkDataLoader(args.train_data_file, args.token_list, args.dataset_conf,
                                                   frontend_conf=args.frontend_conf if hasattr(args, "frontend_conf") else None,
                                                   seg_dict_file=args.seg_dict_file if hasattr(args, "seg_dict_file") else None,
                                                   punc_dict_file=args.punc_list if hasattr(args, "punc_list") else None,
                                                   frontend_conf=args.frontend_conf if hasattr(args,
                                                                                               "frontend_conf") else None,
                                                   seg_dict_file=args.seg_dict_file if hasattr(args,
                                                                                               "seg_dict_file") else None,
                                                   punc_dict_file=args.punc_list if hasattr(args,
                                                                                            "punc_list") else None,
                                                   bpemodel_file=args.bpemodel if hasattr(args, "bpemodel") else None,
                                                   mode="train")
                valid_iter_factory = ArkDataLoader(args.valid_data_file, args.token_list, args.dataset_conf, 
                                                   frontend_conf=args.frontend_conf if hasattr(args, "frontend_conf") else None,
                                                   seg_dict_file=args.seg_dict_file if hasattr(args, "seg_dict_file") else None,
                                                   punc_dict_file=args.punc_list if hasattr(args, "punc_list") else None,
                                                   frontend_conf=args.frontend_conf if hasattr(args,
                                                                                               "frontend_conf") else None,
                                                   seg_dict_file=args.seg_dict_file if hasattr(args,
                                                                                               "seg_dict_file") else None,
                                                   punc_dict_file=args.punc_list if hasattr(args,
                                                                                            "punc_list") else None,
                                                   bpemodel_file=args.bpemodel if hasattr(args, "bpemodel") else None,
                                                   mode="eval")
            elif args.dataset_type == "small":
funasr/utils/build_dataloader.py
@@ -1,12 +1,15 @@
from funasr.datasets.large_datasets.build_dataloader import LargeDataLoader
from funasr.datasets.small_datasets.build_dataloader import build_dataloader
from funasr.datasets.small_datasets.sequence_iter_factory import SequenceIterFactory
def build_dataloader(args):
    if args.dataset_type == "small":
        train_iter_factory = LargeDataLoader(args, mode="train")
        valid_iter_factory = LargeDataLoader(args, mode="valid")
        train_iter_factory = SequenceIterFactory(args, mode="train")
        valid_iter_factory = SequenceIterFactory(args, mode="valid")
    elif args.dataset_type == "large":
        train_iter_factory = LargeDataLoader(args, mode="train")
        valid_iter_factory = LargeDataLoader(args,  mode="valid")
    else:
        raise ValueError(f"Not supported dataset_type={args.dataset_type}")
    return train_iter_factory, valid_iter_factory
funasr/utils/build_model.py
New file
@@ -0,0 +1,13 @@
import logging
def build_model(args):
    if args.token_list is not None:
        with open(args.token_list, encoding="utf-8") as f:
            token_list = [line.rstrip() for line in f]
            args.token_list = list(token_list)
            vocab_size = len(token_list)
            logging.info(f"Vocabulary size: {vocab_size}")