游雁
2024-02-19 4ebde3c4ac27c15ff39ffbd5aa601035d189497a
examples/industrial_data_pretraining/paraformer/finetune.sh
@@ -6,10 +6,10 @@
#git clone https://www.modelscope.cn/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git ${local_path}
## generate jsonl from wav.scp and text.txt
python funasr/datasets/audio_datasets/scp2jsonl.py \
++scp_file_list='["/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"]' \
++data_type_list='["source", "target"]' \
++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl
#python funasr/datasets/audio_datasets/scp2jsonl.py \
#++scp_file_list='["/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"]' \
#++data_type_list='["source", "target"]' \
#++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl
# torchrun \
@@ -24,5 +24,4 @@
++dataset_conf.batch_type="example" \
++train_conf.max_epoch=2 \
++dataset_conf.num_workers=4 \
+output_dir="outputs/debug/ckpt/funasr2/exp2" \
+debug="true"
+output_dir="outputs/debug/ckpt/funasr2/exp2"