From 33d3d2084403fd34b79c835d2f2fe04f6cd8f738 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 13 九月 2023 09:33:54 +0800
Subject: [PATCH] Merge branch 'main' of github.com:alibaba-damo-academy/FunASR add
---
funasr/bin/lm_train.py | 53 ++++++++++++++++++++++++++++++++++++++++-------------
1 files changed, 40 insertions(+), 13 deletions(-)
diff --git a/funasr/bin/lm_train.py b/funasr/bin/lm_train.py
index faa7a45..22b5f9c 100755
--- a/funasr/bin/lm_train.py
+++ b/funasr/bin/lm_train.py
@@ -1,22 +1,49 @@
+# -*- encoding: utf-8 -*-
#!/usr/bin/env python3
+# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+# MIT License (https://opensource.org/licenses/MIT)
+
+import os
+
from funasr.tasks.lm import LMTask
-def get_parser():
+# for LM Training
+def parse_args():
parser = LMTask.get_parser()
- return parser
+ parser.add_argument(
+ "--gpu_id",
+ type=int,
+ default=0,
+ help="local gpu id.",
+ )
+ args = parser.parse_args()
+ return args
-def main(cmd=None):
- """LM training.
-
- Example:
-
- % python lm_train.py asr --print_config --optim adadelta
- % python lm_train.py --config conf/train_asr.yaml
- """
- LMTask.main(cmd=cmd)
+def main(args=None, cmd=None):
+ # for LM Training
+ LMTask.main(args=args, cmd=cmd)
-if __name__ == "__main__":
- main()
+if __name__ == '__main__':
+ args = parse_args()
+
+ # setup local gpu_id
+ os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id)
+
+ # DDP settings
+ if args.ngpu > 1:
+ args.distributed = True
+ else:
+ args.distributed = False
+ assert args.num_worker_count == 1
+
+ # re-compute batch size: when dataset type is small
+ if args.dataset_type == "small" and args.ngpu != 0:
+ if args.batch_size is not None:
+ args.batch_size = args.batch_size * args.ngpu
+ if args.batch_bins is not None:
+ args.batch_bins = args.batch_bins * args.ngpu
+
+ main(args=args)
--
Gitblit v1.9.1