From 94a1deb5fb4220b28ea39da64c307fd78d287862 Mon Sep 17 00:00:00 2001
From: 嘉渊 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期一, 24 四月 2023 16:15:20 +0800
Subject: [PATCH] update
---
funasr/bin/train.py | 88 +++++++++++++++++++++++++++++++++++++++-----
1 files changed, 78 insertions(+), 10 deletions(-)
diff --git a/funasr/bin/train.py b/funasr/bin/train.py
old mode 100644
new mode 100755
index c6f19b6..a7a85f8
--- a/funasr/bin/train.py
+++ b/funasr/bin/train.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
import argparse
import logging
import os
@@ -6,18 +8,21 @@
import torch
+from funasr.build_utils.build_args import build_args
+from funasr.build_utils.build_dataloader import build_dataloader
+from funasr.build_utils.build_distributed import build_distributed
+from funasr.build_utils.build_model import build_model
+from funasr.build_utils.build_optimizer import build_optimizer
+from funasr.build_utils.build_scheduler import build_scheduler
+from funasr.build_utils.build_trainer import build_trainer
+from funasr.text.phoneme_tokenizer import g2p_choices
from funasr.torch_utils.model_summary import model_summary
from funasr.torch_utils.pytorch_version import pytorch_cudnn_version
from funasr.torch_utils.set_all_random_seed import set_all_random_seed
from funasr.utils import config_argparse
-from funasr.utils.build_args import build_args
-from funasr.utils.build_dataloader import build_dataloader
-from funasr.utils.build_distributed import build_distributed
-from funasr.utils.build_model import build_model
-from funasr.utils.build_optimizer import build_optimizer
-from funasr.utils.build_scheduler import build_scheduler
from funasr.utils.prepare_data import prepare_data
from funasr.utils.types import str2bool
+from funasr.utils.types import str_or_none
from funasr.utils.yaml_no_alias_safe_dump import yaml_no_alias_safe_dump
@@ -281,6 +286,55 @@
help="Apply preprocessing to data or not",
)
+ # most task related
+ parser.add_argument(
+ "--init",
+ type=lambda x: str_or_none(x.lower()),
+ default=None,
+ help="The initialization method",
+ choices=[
+ "chainer",
+ "xavier_uniform",
+ "xavier_normal",
+ "kaiming_uniform",
+ "kaiming_normal",
+ None,
+ ],
+ )
+ parser.add_argument(
+ "--token_list",
+ type=str_or_none,
+ default=None,
+ help="A text mapping int-id to token",
+ )
+ parser.add_argument(
+ "--token_type",
+ type=str,
+ default="bpe",
+ choices=["bpe", "char", "word"],
+ help="",
+ )
+ parser.add_argument(
+ "--bpemodel",
+ type=str_or_none,
+ default=None,
+ help="The model file fo sentencepiece",
+ )
+ parser.add_argument(
+ "--cleaner",
+ type=str_or_none,
+ choices=[None, "tacotron", "jaconv", "vietnamese"],
+ default=None,
+ help="Apply text cleaning",
+ )
+ parser.add_argument(
+ "--g2p",
+ type=str_or_none,
+ choices=g2p_choices,
+ default=None,
+ help="Specify g2p method if --token_type=phn",
+ )
+
# pai related
parser.add_argument(
"--use_pai",
@@ -369,16 +423,16 @@
prepare_data(args, distributed_option)
model = build_model(args)
- optimizer = build_optimizer(args, model=model)
- scheduler = build_scheduler(args, optimizer)
+ optimizers = build_optimizer(args, model=model)
+ schedulers = build_scheduler(args, optimizers)
logging.info("world size: {}, rank: {}, local_rank: {}".format(distributed_option.dist_world_size,
distributed_option.dist_rank,
distributed_option.local_rank))
logging.info(pytorch_cudnn_version())
logging.info(model_summary(model))
- logging.info("Optimizer: {}".format(optimizer))
- logging.info("Scheduler: {}".format(scheduler))
+ logging.info("Optimizer: {}".format(optimizers))
+ logging.info("Scheduler: {}".format(schedulers))
# dump args to config.yaml
if not distributed_option.distributed or distributed_option.dist_rank == 0:
@@ -392,4 +446,18 @@
else:
yaml_no_alias_safe_dump(vars(args), f, indent=4, sort_keys=False)
+ # dataloader for training/validation
train_dataloader, valid_dataloader = build_dataloader(args)
+
+ # Trainer, including model, optimizers, etc.
+ trainer = build_trainer(
+ args=args,
+ model=model,
+ optimizers=optimizers,
+ schedulers=schedulers,
+ train_dataloader=train_dataloader,
+ valid_dataloader=valid_dataloader,
+ distributed_option=distributed_option
+ )
+
+ trainer.run()
--
Gitblit v1.9.1