examples/aishell/branchformer/demo_infer.sh
New file @@ -0,0 +1 @@ ../paraformer/demo_infer.sh examples/aishell/branchformer/demo_train_or_finetune.sh
New file @@ -0,0 +1 @@ ../paraformer/demo_train_or_finetune.sh examples/aishell/conformer/demo_infer.sh
New file @@ -0,0 +1 @@ ../paraformer/demo_infer.sh examples/aishell/conformer/demo_train_or_finetune.sh
New file @@ -0,0 +1 @@ ../paraformer/demo_train_or_finetune.sh examples/aishell/e_branchformer/demo_infer.sh
New file @@ -0,0 +1 @@ ../paraformer/demo_infer.sh examples/aishell/e_branchformer/demo_train_or_finetune.sh
New file @@ -0,0 +1 @@ ../paraformer/demo_train_or_finetune.sh examples/aishell/e_branchformer/infer.sh
File was deleted examples/aishell/paraformer/demo_infer.sh
File was renamed from examples/aishell/conformer/infer.sh @@ -1,3 +1,6 @@ # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. # MIT License (https://opensource.org/licenses/MIT) python -m funasr.bin.inference \ examples/aishell/paraformer/demo_train_or_finetune.sh
New file @@ -0,0 +1,51 @@ # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. # MIT License (https://opensource.org/licenses/MIT) # which gpu to train or finetune export CUDA_VISIBLE_DEVICES="0,1" gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') # data dir, which contains: train.json, val.json, tokens.jsonl/tokens.txt, am.mvn data_dir="/Users/zhifu/funasr1.0/data/list" ## generate jsonl from wav.scp and text.txt #python -m funasr.datasets.audio_datasets.scp2jsonl \ #++scp_file_list='["/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"]' \ #++data_type_list='["source", "target"]' \ #++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl train_data="${data_dir}/train.jsonl" val_data="${data_dir}/val.jsonl" tokens="${data_dir}/tokens.jsonl" cmvn_file="${data_dir}/am.mvn" # exp output dir output_dir="/Users/zhifu/exp" log_file="${output_dir}/log.txt" workspace=`pwd` config="paraformer_conformer_12e_6d_2048_256.yaml" init_param="${output_dir}/model.pt" mkdir -p ${output_dir} echo "log_file: ${log_file}" torchrun \ --nnodes 1 \ --nproc_per_node ${gpu_num} \ ../../../funasr/bin/train.py \ --config-path "${workspace}/conf" \ --config-name "${config}" \ ++train_data_set_list="${train_data}" \ ++valid_data_set_list="${val_data}" \ ++tokenizer_conf.token_list="${tokens}" \ ++frontend_conf.cmvn_file="${cmvn_file}" \ ++dataset_conf.batch_size=32 \ ++dataset_conf.batch_type="example" \ ++dataset_conf.num_workers=4 \ ++train_conf.max_epoch=20 \ ++optim_conf.lr=0.0002 \ ++init_param="${init_param}" \ ++output_dir="${output_dir}" &> ${log_file} examples/aishell/paraformer/infer.sh
File was deleted examples/aishell/transformer/demo_infer.sh
New file @@ -0,0 +1 @@ ../paraformer/demo_infer.sh examples/aishell/transformer/demo_train_or_finetune.sh
New file @@ -0,0 +1 @@ ../paraformer/demo_train_or_finetune.sh examples/aishell/transformer/infer.sh
File was deleted examples/industrial_data_pretraining/paraformer/demo.sh
@@ -1,8 +1,10 @@ # method1, inference from model hub model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" model_revision="v2.0.4" python funasr/bin/inference.py \ python -m funasr.bin.inference \ +model=${model} \ +model_revision=${model_revision} \ +input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav" \ @@ -10,5 +12,18 @@ +device="cpu" \ # method2, inference from local model #python -m funasr.bin.inference \ #--config-path="/Users/zhifu/funasr_github/test_local/funasr_cli_egs" \ #--config-name="config.yaml" \ #++init_param="/Users/zhifu/funasr_github/test_local/funasr_cli_egs/model.pt" \ #++tokenizer_conf.token_list="/Users/zhifu/funasr_github/test_local/funasr_cli_egs/tokens.txt" \ #++frontend_conf.cmvn_file="/Users/zhifu/funasr_github/test_local/funasr_cli_egs/am.mvn" \ #++input="data/wav.scp" \ #++output_dir="./outputs/debug" \ #++device="cuda" \ examples/industrial_data_pretraining/paraformer/finetune.sh
File was deleted examples/industrial_data_pretraining/paraformer/finetune_from_local_model.sh
New file @@ -0,0 +1,58 @@ # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. # MIT License (https://opensource.org/licenses/MIT) # download model local_path_root=../modelscope_models mkdir -p ${local_path_root} local_path=${local_path_root}/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch git clone https://www.modelscope.cn/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git ${local_path} # which gpu to train or finetune export CUDA_VISIBLE_DEVICES="0,1" gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') # data dir, which contains: train.json, val.json data_dir="/Users/zhifu/funasr1.0/data/list" ## generate jsonl from wav.scp and text.txt #python -m funasr.datasets.audio_datasets.scp2jsonl \ #++scp_file_list='["/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"]' \ #++data_type_list='["source", "target"]' \ #++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl train_data="${data_dir}/train.jsonl" val_data="${data_dir}/val.jsonl" tokens="${local_path}/tokens.jsonl" cmvn_file="${local_path}/am.mvn" # exp output dir output_dir="/Users/zhifu/exp" log_file="${output_dir}/log.txt" workspace=`pwd` config="${local_path}/config.yaml" init_param="${local_path}/model.pt" mkdir -p ${output_dir} echo "log_file: ${log_file}" torchrun \ --nnodes 1 \ --nproc_per_node ${gpu_num} \ ../../../funasr/bin/train.py \ --config-path "${workspace}/conf" \ --config-name "${config}" \ ++train_data_set_list="${train_data}" \ ++valid_data_set_list="${val_data}" \ ++tokenizer_conf.token_list="${tokens}" \ ++frontend_conf.cmvn_file="${cmvn_file}" \ ++dataset_conf.batch_size=32 \ ++dataset_conf.batch_type="example" \ ++dataset_conf.num_workers=4 \ ++train_conf.max_epoch=20 \ ++optim_conf.lr=0.0002 \ ++init_param="${init_param}" \ ++output_dir="${output_dir}" &> ${log_file} examples/industrial_data_pretraining/paraformer/finetune_from_model_hub.sh
New file @@ -0,0 +1,42 @@ # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. # MIT License (https://opensource.org/licenses/MIT) # which gpu to train or finetune export CUDA_VISIBLE_DEVICES="0,1" gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') # data dir, which contains: train.json, val.json data_dir="/Users/zhifu/funasr1.0/data/list" ## generate jsonl from wav.scp and text.txt #python -m funasr.datasets.audio_datasets.scp2jsonl \ #++scp_file_list='["/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"]' \ #++data_type_list='["source", "target"]' \ #++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl train_data="${data_dir}/train.jsonl" val_data="${data_dir}/val.jsonl" # exp output dir output_dir="/Users/zhifu/exp" log_file="${output_dir}/log.txt" mkdir -p ${output_dir} echo "log_file: ${log_file}" torchrun \ --nnodes 1 \ --nproc_per_node ${gpu_num} \ funasr/bin/train.py \ +model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \ +model_revision="v2.0.4" \ ++train_data_set_list="${train_data}" \ ++valid_data_set_list="${val_data}" \ ++dataset_conf.batch_size=32 \ ++dataset_conf.batch_type="example" \ ++dataset_conf.num_workers=4 \ ++train_conf.max_epoch=20 \ ++optim_conf.lr=0.0002 \ ++output_dir="${output_dir}" &> ${log_file} examples/industrial_data_pretraining/paraformer/infer_after_finetune.sh
File was deleted funasr/bin/train.py
@@ -96,6 +96,7 @@ init_param = (init_param,) logging.info("init_param is not None: %s", init_param) for p in init_param: if os.path.exists(p): logging.info(f"Loading pretrained params from {p}") load_pretrained_model( model=model, @@ -106,6 +107,8 @@ excludes=kwargs.get("excludes", None), ) else: logging.info(f"Checkpoint does not exist, init randomly: {p}") else: initialize(model, kwargs.get("init", "kaiming_normal"))