From 94de39dde2e616a01683c518023d0fab72b4e103 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 19 二月 2024 22:21:50 +0800
Subject: [PATCH] aishell example

---
 examples/industrial_data_pretraining/paraformer/finetune.sh |   33 ++++++++++++++++++++++++---------
 1 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/examples/industrial_data_pretraining/paraformer/finetune.sh b/examples/industrial_data_pretraining/paraformer/finetune.sh
index ce1953c..8bdd8da 100644
--- a/examples/industrial_data_pretraining/paraformer/finetune.sh
+++ b/examples/industrial_data_pretraining/paraformer/finetune.sh
@@ -1,12 +1,27 @@
 
-cmd="funasr/bin/train.py"
+## download model
+#local_path_root=../modelscope_models
+#mkdir -p ${local_path_root}
+#local_path=${local_path_root}/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
+#git clone https://www.modelscope.cn/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git ${local_path}
 
-python $cmd \
-+model="/Users/zhifu/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
-+token_list="/Users/zhifu/.cache/modelscope/hub/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/tokens.txt" \
-+train_data_set_list="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len.jsonl" \
-+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2" \
-+device="cpu"
+## generate jsonl from wav.scp and text.txt
+#python funasr/datasets/audio_datasets/scp2jsonl.py \
+#++scp_file_list='["/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"]' \
+#++data_type_list='["source", "target"]' \
+#++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl
 
-#--config-path "/Users/zhifu/funasr_github/examples/industrial_data_pretraining/paraformer-large/conf" \
-#--config-name "finetune.yaml" \
\ No newline at end of file
+
+# torchrun \
+# --nnodes 1 \
+# --nproc_per_node 1 \
+python funasr/bin/train.py \
++model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
++model_revision="v2.0.4" \
++train_data_set_list="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len_10.jsonl" \
++valid_data_set_list="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len_10.jsonl" \
+++dataset_conf.batch_size=64 \
+++dataset_conf.batch_type="example" \
+++train_conf.max_epoch=2 \
+++dataset_conf.num_workers=4 \
++output_dir="outputs/debug/ckpt/funasr2/exp2"
\ No newline at end of file

--
Gitblit v1.9.1