From 9e8a52153d1256061ae3f6930a847f6fcad36656 Mon Sep 17 00:00:00 2001
From: wucong.lyb <wucong.lyb@alibaba-inc.com>
Date: 星期五, 10 二月 2023 10:54:27 +0800
Subject: [PATCH] add language model infer pipeline
---
funasr/bin/lm_train.py | 50 +++++++++++++++++++++++++++++++++++++-------------
1 files changed, 37 insertions(+), 13 deletions(-)
diff --git a/funasr/bin/lm_train.py b/funasr/bin/lm_train.py
index faa7a45..8641465 100755
--- a/funasr/bin/lm_train.py
+++ b/funasr/bin/lm_train.py
@@ -1,22 +1,46 @@
#!/usr/bin/env python3
+
+import os
+
from funasr.tasks.lm import LMTask
-def get_parser():
+# for LM Training
+def parse_args():
parser = LMTask.get_parser()
- return parser
+ parser.add_argument(
+ "--gpu_id",
+ type=int,
+ default=0,
+ help="local gpu id.",
+ )
+ args = parser.parse_args()
+ return args
-def main(cmd=None):
- """LM training.
-
- Example:
-
- % python lm_train.py asr --print_config --optim adadelta
- % python lm_train.py --config conf/train_asr.yaml
- """
- LMTask.main(cmd=cmd)
+def main(args=None, cmd=None):
+ # for LM Training
+ LMTask.main(args=args, cmd=cmd)
-if __name__ == "__main__":
- main()
+if __name__ == '__main__':
+ args = parse_args()
+
+ # setup local gpu_id
+ os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id)
+
+ # DDP settings
+ if args.ngpu > 1:
+ args.distributed = True
+ else:
+ args.distributed = False
+ assert args.num_worker_count == 1
+
+ # re-compute batch size: when dataset type is small
+ if args.dataset_type == "small" and args.ngpu != 0:
+ if args.batch_size is not None:
+ args.batch_size = args.batch_size * args.ngpu
+ if args.batch_bins is not None:
+ args.batch_bins = args.batch_bins * args.ngpu
+
+ main(args=args)
--
Gitblit v1.9.1