From fdf74bb85cfe3dd0ce6cbaf51ec8d5b3ca3d2039 Mon Sep 17 00:00:00 2001
From: 仁迷 <haoneng.lhn@alibaba-inc.com>
Date: 星期四, 09 二月 2023 17:18:43 +0800
Subject: [PATCH] update persian model recipe
---
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-offline/finetune.py | 39 ++++++++++++++++++++-------------------
1 files changed, 20 insertions(+), 19 deletions(-)
diff --git a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-offline/finetune.py b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-offline/finetune.py
index 1aef9c6..2ecc229 100644
--- a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-offline/finetune.py
+++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-offline/finetune.py
@@ -1,35 +1,36 @@
import os
+
from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer
+
from funasr.datasets.ms_dataset import MsDataset
+from funasr.utils.modelscope_param import modelscope_args
def modelscope_finetune(params):
- if not os.path.exists(params["output_dir"]):
- os.makedirs(params["output_dir"], exist_ok=True)
+ if not os.path.exists(params.output_dir):
+ os.makedirs(params.output_dir, exist_ok=True)
# dataset split ["train", "validation"]
- ds_dict = MsDataset.load(params["data_dir"])
+ ds_dict = MsDataset.load(params.data_path)
kwargs = dict(
- model=params["model"],
- model_revision=params["model_revision"],
+ model=params.model,
data_dir=ds_dict,
- dataset_type=params["dataset_type"],
- work_dir=params["output_dir"],
- batch_bins=params["batch_bins"],
- max_epoch=params["max_epoch"],
- lr=params["lr"])
+ dataset_type=params.dataset_type,
+ work_dir=params.output_dir,
+ batch_bins=params.batch_bins,
+ max_epoch=params.max_epoch,
+ lr=params.lr)
trainer = build_trainer(Trainers.speech_asr_trainer, default_args=kwargs)
trainer.train()
if __name__ == '__main__':
- params = {}
- params["output_dir"] = "./checkpoint"
- params["data_dir"] = "./data"
- params["batch_bins"] = 2000
- params["dataset_type"] = "small"
- params["max_epoch"] = 50
- params["lr"] = 0.00005
- params["model"] = "damo/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-offline"
- params["model_revision"] = None
+ params = modelscope_args(model="damo/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-offline", data_path="./data")
+ params.output_dir = "./checkpoint" # m妯″瀷淇濆瓨璺緞
+ params.data_path = "./example_data/" # 鏁版嵁璺緞
+ params.dataset_type = "small" # 灏忔暟鎹噺璁剧疆small锛岃嫢鏁版嵁閲忓ぇ浜�1000灏忔椂锛岃浣跨敤large
+ params.batch_bins = 2000 # batch size锛屽鏋渄ataset_type="small"锛宐atch_bins鍗曚綅涓篺bank鐗瑰緛甯ф暟锛屽鏋渄ataset_type="large"锛宐atch_bins鍗曚綅涓烘绉掞紝
+ params.max_epoch = 20 # 鏈�澶ц缁冭疆鏁�
+ params.lr = 0.00005 # 璁剧疆瀛︿範鐜�
+
modelscope_finetune(params)
--
Gitblit v1.9.1