From fcaf910ceb4e07a13bf2d133f46df684b069b3f0 Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期三, 03 一月 2024 11:45:22 +0800
Subject: [PATCH] update online docs
---
funasr/bin/build_trainer.py | 30 +++++++++++++++++++++++-------
1 files changed, 23 insertions(+), 7 deletions(-)
diff --git a/funasr/bin/build_trainer.py b/funasr/bin/build_trainer.py
index 24811c9..c03bdf3 100644
--- a/funasr/bin/build_trainer.py
+++ b/funasr/bin/build_trainer.py
@@ -18,7 +18,7 @@
from funasr.build_utils.build_scheduler import build_scheduler
from funasr.build_utils.build_trainer import build_trainer as build_trainer_modelscope
from funasr.modules.lora.utils import mark_only_lora_as_trainable
-from funasr.text.phoneme_tokenizer import g2p_choices
+from funasr.tokenizer.phoneme_tokenizer import g2p_choices
from funasr.torch_utils.load_pretrained_model import load_pretrained_model
from funasr.torch_utils.model_summary import model_summary
from funasr.torch_utils.pytorch_version import pytorch_cudnn_version
@@ -529,13 +529,12 @@
**kwargs):
parser = get_parser()
args, extra_task_params = parser.parse_known_args()
- if extra_task_params:
- args = build_args(args, parser, extra_task_params)
+ args = build_args(args, parser, extra_task_params)
if args.local_rank is not None:
- args.distributed = True
+ distributed = True
else:
- args.distributed = False
+ distributed = False
args.local_rank = args.local_rank if args.local_rank is not None else 0
local_rank = args.local_rank
if "CUDA_VISIBLE_DEVICES" in os.environ.keys():
@@ -549,6 +548,10 @@
init_param = modelscope_dict['init_model']
cmvn_file = modelscope_dict['cmvn_file']
seg_dict_file = modelscope_dict['seg_dict']
+ if 'bpemodel' in modelscope_dict:
+ bpemodel = modelscope_dict['bpemodel']
+ else:
+ bpemodel = None
# overwrite parameters
with open(config) as f:
@@ -582,12 +585,26 @@
args.seg_dict_file = seg_dict_file
else:
args.seg_dict_file = None
+ if bpemodel is not None and os.path.exists(bpemodel):
+ args.bpemodel = bpemodel
+ else:
+ args.bpemodel = None
args.data_dir = data_dir
args.train_set = train_set
args.dev_set = dev_set
args.output_dir = output_dir
args.gpu_id = args.local_rank
args.config = finetune_config
+ args.use_pai = False
+ args.batch_type = "length"
+ args.oss_bucket = None
+ args.input_size = None
+ if distributed:
+ args.distributed = True
+ args.simple_ddp = True
+ else:
+ args.distributed = False
+ args.ngpu = 1
if optim is not None:
args.optim = optim
if lr is not None:
@@ -605,6 +622,7 @@
if batch_bins is not None:
if args.dataset_type == "small":
args.batch_bins = batch_bins
+ args.dataset_conf["batch_conf"]["batch_size"] = batch_bins
elif args.dataset_type == "large":
args.dataset_conf["batch_conf"]["batch_size"] = batch_bins
else:
@@ -622,8 +640,6 @@
torch.backends.cudnn.deterministic = args.cudnn_deterministic
# ddp init
- os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id)
- args.distributed = args.ngpu > 1 or args.dist_world_size > 1
distributed_option = build_distributed(args)
# for logging
--
Gitblit v1.9.1