嘉渊
2023-05-10 e422c6197b5bcada0429986500d8d5ca4ffcb3e4
funasr/bin/train.py
@@ -23,7 +23,6 @@
from funasr.utils.prepare_data import prepare_data
from funasr.utils.types import int_or_none
from funasr.utils.types import str2bool
from funasr.utils.types import str2triple_str
from funasr.utils.types import str_or_none
from funasr.utils.yaml_no_alias_safe_dump import yaml_no_alias_safe_dump
@@ -260,6 +259,12 @@
             "training phase. If None is given, it is decided according the number "
             "of training samples automatically .",
    )
    parser.add_argument(
        "--use_tensorboard",
        type=str2bool,
        default=True,
        help="Enable tensorboard logging",
    )
    # pretrained model related
    parser.add_argument(
@@ -310,42 +315,24 @@
        help=f"The keyword arguments for dataset",
    )
    parser.add_argument(
        "--train_data_file",
        "--data_dir",
        type=str,
        default=None,
        help="train_list for large dataset",
        help="root path of data",
    )
    parser.add_argument(
        "--valid_data_file",
        "--train_set",
        type=str,
        default=None,
        help="valid_list for large dataset",
        default="train",
        help="train dataset",
    )
    parser.add_argument(
        "--train_data_path_and_name_and_type",
        type=str2triple_str,
        action="append",
        default=[],
        help="e.g. '--train_data_path_and_name_and_type some/path/a.scp,foo,sound'. ",
    )
    parser.add_argument(
        "--valid_data_path_and_name_and_type",
        type=str2triple_str,
        action="append",
        default=[],
    )
    parser.add_argument(
        "--train_shape_file",
        "--valid_set",
        type=str,
        action="append",
        default=[],
        default="validation",
        help="dev dataset",
    )
    parser.add_argument(
        "--valid_shape_file",
        type=str,
        action="append",
        default=[],
    )
    parser.add_argument(
        "--use_preprocessor",
        type=str2bool,
@@ -514,6 +501,10 @@
    prepare_data(args, distributed_option)
    model = build_model(args)
    model = model.to(
        dtype=getattr(torch, args.train_dtype),
        device="cuda" if args.ngpu > 0 else "cpu",
    )
    optimizers = build_optimizer(args, model=model)
    schedulers = build_scheduler(args, optimizers)
@@ -521,6 +512,7 @@
                                                                   distributed_option.dist_rank,
                                                                   distributed_option.local_rank))
    logging.info(pytorch_cudnn_version())
    logging.info("Args: {}".format(args))
    logging.info(model_summary(model))
    logging.info("Optimizer: {}".format(optimizers))
    logging.info("Scheduler: {}".format(schedulers))