From 4e2fe544ae37174a3e09dfcdbbdae5abfe711e53 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 05 七月 2023 16:57:21 +0800
Subject: [PATCH] funasr sdk

---
 funasr/bin/build_trainer.py |   39 ++++++++++++++++++++++++++++++++++++---
 1 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/funasr/bin/build_trainer.py b/funasr/bin/build_trainer.py
index c13f91b..891139a 100644
--- a/funasr/bin/build_trainer.py
+++ b/funasr/bin/build_trainer.py
@@ -23,10 +23,16 @@
         from funasr.tasks.asr import ASRTask as ASRTask
     elif mode == "paraformer":
         from funasr.tasks.asr import ASRTaskParaformer as ASRTask
+    elif mode == "paraformer_streaming":
+        from funasr.tasks.asr import ASRTaskParaformer as ASRTask
     elif mode == "paraformer_vad_punc":
         from funasr.tasks.asr import ASRTaskParaformer as ASRTask
     elif mode == "uniasr":
         from funasr.tasks.asr import ASRTaskUniASR as ASRTask
+    elif mode == "mfcca":
+        from funasr.tasks.asr import ASRTaskMFCCA as ASRTask
+    elif mode == "tp":
+        from funasr.tasks.asr import ASRTaskAligner as ASRTask
     else:
         raise ValueError("Unknown mode: {}".format(mode))
     parser = ASRTask.get_parser()
@@ -34,8 +40,23 @@
     return args, ASRTask
 
 
-def build_trainer(modelscope_dict, data_dir, output_dir, train_set="train", dev_set="validation", distributed=False,
-                  dataset_type="small", lr=None, batch_bins=None, max_epoch=None, mate_params=None):
+def build_trainer(modelscope_dict,
+                  data_dir,
+                  output_dir,
+                  train_set="train",
+                  dev_set="validation",
+                  distributed=False,
+                  dataset_type="small",
+                  batch_bins=None,
+                  max_epoch=None,
+                  optim=None,
+                  lr=None,
+                  scheduler=None,
+                  scheduler_conf=None,
+                  specaug=None,
+                  specaug_conf=None,
+                  param_dict=None,
+                  **kwargs):
     mode = modelscope_dict['mode']
     args, ASRTask = parse_args(mode=mode)
     # ddp related
@@ -64,7 +85,9 @@
         finetune_configs = yaml.safe_load(f)
         # set data_types
         if dataset_type == "large":
-            finetune_configs["dataset_conf"]["data_types"] = "sound,text"
+            # finetune_configs["dataset_conf"]["data_types"] = "sound,text"
+            if 'data_types' not in finetune_configs['dataset_conf']:
+                finetune_configs["dataset_conf"]["data_types"] = "sound,text"
     finetune_configs = update_dct(configs, finetune_configs)
     for key, value in finetune_configs.items():
         if hasattr(args, key):
@@ -94,8 +117,18 @@
     args.output_dir = output_dir
     args.gpu_id = args.local_rank
     args.config = finetune_config
+    if optim is not None:
+        args.optim = optim
     if lr is not None:
         args.optim_conf["lr"] = lr
+    if scheduler is not None:
+        args.scheduler = scheduler
+    if scheduler_conf is not None:
+        args.scheduler_conf = scheduler_conf
+    if specaug is not None:
+        args.specaug = specaug
+    if specaug_conf is not None:
+        args.specaug_conf = specaug_conf
     if max_epoch is not None:
         args.max_epoch = max_epoch
     if batch_bins is not None:

--
Gitblit v1.9.1