Dev gzf (#1379)
* update train recipe
* v1.0.8
* llm
* update trainer
* update trainer
* update trainer
* train finetune demo
* train finetune demo
6个文件已修改
12个文件已添加
13 文件已重命名
6个文件已删除
| | |
| | | outputs* |
| | | emotion2vec* |
| | | GPT-SoVITS* |
| | | modelscope_models |
| | |
| | | ### Command-line usage |
| | | |
| | | ```shell |
| | | funasr +model=paraformer-zh +vad_model="fsmn-vad" +punc_model="ct-punc" +input=asr_example_zh.wav |
| | | funasr ++model=paraformer-zh ++vad_model="fsmn-vad" ++punc_model="ct-punc" ++input=asr_example_zh.wav |
| | | ``` |
| | | |
| | | Notes: Support recognition of single audio file, as well as file list in Kaldi-style wav.scp format: `wav_id wav_pat` |
| | |
| | | ### 可执行命令行 |
| | | |
| | | ```shell |
| | | funasr +model=paraformer-zh +vad_model="fsmn-vad" +punc_model="ct-punc" +input=asr_example_zh.wav |
| | | funasr ++model=paraformer-zh ++vad_model="fsmn-vad" ++punc_model="ct-punc" ++input=asr_example_zh.wav |
| | | ``` |
| | | |
| | | 注:支持单条音频文件识别,也支持文件列表,列表为kaldi风格wav.scp:`wav_id wav_path` |
| New file |
| | |
| | | ../paraformer/demo_infer.sh |
| New file |
| | |
| | | ../paraformer/demo_train_or_finetune.sh |
| New file |
| | |
| | | ../paraformer/demo_infer.sh |
| New file |
| | |
| | | ../paraformer/demo_train_or_finetune.sh |
| New file |
| | |
| | | ../paraformer/demo_infer.sh |
| New file |
| | |
| | | ../paraformer/demo_train_or_finetune.sh |
| File was renamed from examples/aishell/conformer/infer.sh |
| | |
| | | # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. |
| | | # MIT License (https://opensource.org/licenses/MIT) |
| | | |
| | | |
| | | |
| | | python -m funasr.bin.inference \ |
| New file |
| | |
| | | # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. |
| | | # MIT License (https://opensource.org/licenses/MIT) |
| | | |
| | | |
| | | # which gpu to train or finetune |
| | | export CUDA_VISIBLE_DEVICES="0,1" |
| | | gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') |
| | | |
| | | # data dir, which contains: train.json, val.json, tokens.jsonl/tokens.txt, am.mvn |
| | | data_dir="/Users/zhifu/funasr1.0/data/list" |
| | | |
| | | ## generate jsonl from wav.scp and text.txt |
| | | #python -m funasr.datasets.audio_datasets.scp2jsonl \ |
| | | #++scp_file_list='["/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"]' \ |
| | | #++data_type_list='["source", "target"]' \ |
| | | #++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl |
| | | |
| | | train_data="${data_dir}/train.jsonl" |
| | | val_data="${data_dir}/val.jsonl" |
| | | tokens="${data_dir}/tokens.json" |
| | | cmvn_file="${data_dir}/am.mvn" |
| | | |
| | | # exp output dir |
| | | output_dir="/Users/zhifu/exp" |
| | | log_file="${output_dir}/log.txt" |
| | | |
| | | workspace=`pwd` |
| | | config="paraformer_conformer_12e_6d_2048_256.yaml" |
| | | |
| | | init_param="${output_dir}/model.pt" |
| | | |
| | | mkdir -p ${output_dir} |
| | | echo "log_file: ${log_file}" |
| | | |
| | | torchrun \ |
| | | --nnodes 1 \ |
| | | --nproc_per_node ${gpu_num} \ |
| | | ../../../funasr/bin/train.py \ |
| | | --config-path "${workspace}/conf" \ |
| | | --config-name "${config}" \ |
| | | ++train_data_set_list="${train_data}" \ |
| | | ++valid_data_set_list="${val_data}" \ |
| | | ++tokenizer_conf.token_list="${tokens}" \ |
| | | ++frontend_conf.cmvn_file="${cmvn_file}" \ |
| | | ++dataset_conf.batch_size=32 \ |
| | | ++dataset_conf.batch_type="example" \ |
| | | ++dataset_conf.num_workers=4 \ |
| | | ++train_conf.max_epoch=150 \ |
| | | ++optim_conf.lr=0.0002 \ |
| | | ++init_param="${init_param}" \ |
| | | ++output_dir="${output_dir}" &> ${log_file} |
| New file |
| | |
| | | ../paraformer/demo_infer.sh |
| New file |
| | |
| | | ../paraformer/demo_train_or_finetune.sh |
| | |
| | | # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. |
| | | # MIT License (https://opensource.org/licenses/MIT) |
| | | |
| | | ## download model |
| | | #local_path_root=../modelscope_models |
| | | #mkdir -p ${local_path_root} |
| | | #local_path=${local_path_root}/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch |
| | | #git clone https://www.modelscope.cn/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git ${local_path} |
| | | # method1, finetune from model hub |
| | | |
| | | # which gpu to train or finetune |
| | | export CUDA_VISIBLE_DEVICES="0,1" |
| | | gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') |
| | | |
| | | # data dir, which contains: train.json, val.json |
| | | data_dir="/Users/zhifu/funasr1.0/data/list" |
| | | |
| | | ## generate jsonl from wav.scp and text.txt |
| | | #python -m funasr.datasets.audio_datasets.scp2jsonl \ |
| | |
| | | #++data_type_list='["source", "target"]' \ |
| | | #++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl |
| | | |
| | | train_data="${data_dir}/train.jsonl" |
| | | val_data="${data_dir}/val.jsonl" |
| | | |
| | | # torchrun \ |
| | | # --nnodes 1 \ |
| | | # --nproc_per_node 1 \ |
| | | python funasr/bin/train.py \ |
| | | +model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \ |
| | | +model_revision="v2.0.4" \ |
| | | +train_data_set_list="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len_10.jsonl" \ |
| | | +valid_data_set_list="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len_10.jsonl" \ |
| | | ++dataset_conf.batch_size=64 \ |
| | | |
| | | # exp output dir |
| | | output_dir="/Users/zhifu/exp" |
| | | log_file="${output_dir}/log.txt" |
| | | |
| | | |
| | | mkdir -p ${output_dir} |
| | | echo "log_file: ${log_file}" |
| | | |
| | | torchrun \ |
| | | --nnodes 1 \ |
| | | --nproc_per_node ${gpu_num} \ |
| | | funasr/bin/train.py \ |
| | | ++model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \ |
| | | ++model_revision="v2.0.4" \ |
| | | ++train_data_set_list="${train_data}" \ |
| | | ++valid_data_set_list="${val_data}" \ |
| | | ++dataset_conf.batch_size=32 \ |
| | | ++dataset_conf.batch_type="example" \ |
| | | ++train_conf.max_epoch=2 \ |
| | | ++dataset_conf.num_workers=4 \ |
| | | +output_dir="outputs/debug/ckpt/funasr2/exp2" |
| | | ++train_conf.max_epoch=20 \ |
| | | ++optim_conf.lr=0.0002 \ |
| | | ++output_dir="${output_dir}" &> ${log_file} |
| New file |
| | |
| | | # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. |
| | | # MIT License (https://opensource.org/licenses/MIT) |
| | | |
| | | # method2, finetune from local model |
| | | |
| | | workspace=`pwd` |
| | | |
| | | # download model |
| | | local_path_root=${workspace}/modelscope_models |
| | | mkdir -p ${local_path_root} |
| | | local_path=${local_path_root}/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch |
| | | git clone https://www.modelscope.cn/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git ${local_path} |
| | | |
| | | |
| | | # which gpu to train or finetune |
| | | export CUDA_VISIBLE_DEVICES="0,1" |
| | | gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') |
| | | |
| | | # data dir, which contains: train.json, val.json |
| | | data_dir="/Users/zhifu/funasr1.0/data/list" |
| | | |
| | | ## generate jsonl from wav.scp and text.txt |
| | | #python -m funasr.datasets.audio_datasets.scp2jsonl \ |
| | | #++scp_file_list='["/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"]' \ |
| | | #++data_type_list='["source", "target"]' \ |
| | | #++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl |
| | | |
| | | train_data="${data_dir}/train.jsonl" |
| | | val_data="${data_dir}/val.jsonl" |
| | | |
| | | tokens="${local_path}/tokens.json" |
| | | cmvn_file="${local_path}/am.mvn" |
| | | |
| | | # exp output dir |
| | | output_dir="/Users/zhifu/exp" |
| | | log_file="${output_dir}/log.txt" |
| | | |
| | | config="config.yaml" |
| | | |
| | | init_param="${local_path}/model.pt" |
| | | |
| | | mkdir -p ${output_dir} |
| | | echo "log_file: ${log_file}" |
| | | |
| | | torchrun \ |
| | | --nnodes 1 \ |
| | | --nproc_per_node ${gpu_num} \ |
| | | ../../../funasr/bin/train.py \ |
| | | --config-path "${local_path}" \ |
| | | --config-name "${config}" \ |
| | | ++train_data_set_list="${train_data}" \ |
| | | ++valid_data_set_list="${val_data}" \ |
| | | ++tokenizer_conf.token_list="${tokens}" \ |
| | | ++frontend_conf.cmvn_file="${cmvn_file}" \ |
| | | ++dataset_conf.batch_size=32 \ |
| | | ++dataset_conf.batch_type="example" \ |
| | | ++dataset_conf.num_workers=4 \ |
| | | ++train_conf.max_epoch=20 \ |
| | | ++optim_conf.lr=0.0002 \ |
| | | ++init_param="${init_param}" \ |
| | | ++output_dir="${output_dir}" &> ${log_file} |
| New file |
| | |
| | | # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. |
| | | # MIT License (https://opensource.org/licenses/MIT) |
| | | |
| | | # method1, inference from model hub |
| | | |
| | | # for more input type, please ref to readme.md |
| | | input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav" |
| | | |
| | | output_dir="./outputs/debug" |
| | | |
| | | model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" |
| | | model_revision="v2.0.4" |
| | | |
| | | device="cuda:0" # "cuda:0" for gpu0, "cuda:1" for gpu1, "cpu" |
| | | |
| | | python -m funasr.bin.inference \ |
| | | ++model=${model} \ |
| | | ++model_revision=${model_revision} \ |
| | | ++input="${input}" \ |
| | | ++output_dir="${output_dir}" \ |
| | | ++device="${device}" \ |
| New file |
| | |
| | | # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. |
| | | # MIT License (https://opensource.org/licenses/MIT) |
| | | |
| | | # method2, inference from local model |
| | | |
| | | # for more input type, please ref to readme.md |
| | | input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav" |
| | | |
| | | output_dir="./outputs/debug" |
| | | |
| | | workspace=`pwd` |
| | | |
| | | # download model |
| | | local_path_root=${workspace}/modelscope_models |
| | | mkdir -p ${local_path_root} |
| | | local_path=${local_path_root}/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch |
| | | git clone https://www.modelscope.cn/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch.git ${local_path} |
| | | |
| | | device="cuda:0" # "cuda:0" for gpu0, "cuda:1" for gpu1, "cpu" |
| | | |
| | | tokens="${local_path}/tokens.json" |
| | | cmvn_file="${local_path}/am.mvn" |
| | | |
| | | config="config.yaml" |
| | | init_param="${local_path}/model.pt" |
| | | |
| | | python -m funasr.bin.inference \ |
| | | --config-path "${local_path}" \ |
| | | --config-name "${config}" \ |
| | | ++init_param="${init_param}" \ |
| | | ++tokenizer_conf.token_list="${tokens}" \ |
| | | ++frontend_conf.cmvn_file="${cmvn_file}" \ |
| | | ++input="${input}" \ |
| | | ++output_dir="${output_dir}" \ |
| | | ++device="${device}" \ |
| | | |
| | | |
| | | |
| | | |
| | |
| | | init_param = (init_param,) |
| | | logging.info("init_param is not None: %s", init_param) |
| | | for p in init_param: |
| | | if os.path.exists(p): |
| | | logging.info(f"Loading pretrained params from {p}") |
| | | load_pretrained_model( |
| | | model=model, |
| | |
| | | excludes=kwargs.get("excludes", None), |
| | | ) |
| | | else: |
| | | logging.info(f"Checkpoint does not exist, init randomly: {p}") |
| | | else: |
| | | initialize(model, kwargs.get("init", "kaiming_normal")) |
| | | |
| | | |
| | |
| | | |
| | | time2 = time.perf_counter() |
| | | time_escaped = (time2 - time1)/3600.0 |
| | | print(f"\nrank: {self.local_rank}, time_escaped_epoch: {time_escaped:.3f} hours, estimated to finish {self.max_epoch} epoch: {(self.max_epoch-epoch)*time_escaped:.3f}\n") |
| | | print(f"\nrank: {self.local_rank}, time_escaped_epoch: {time_escaped:.3f} hours, estimated to finish {self.max_epoch} epoch: {(self.max_epoch-epoch)*time_escaped:.3f} hours\n") |
| | | |
| | | if self.rank == 0: |
| | | average_checkpoints(self.output_dir, self.avg_nbest_model) |
| | |
| | | f"{time_now}, " |
| | | f"rank: {self.local_rank}, " |
| | | f"epoch: {epoch}/{self.max_epoch}, " |
| | | f"step: {batch_idx+1}/{len(self.dataloader_train)}, total: {self.batch_total}, " |
| | | f"step: {batch_idx+1}/{len(self.dataloader_train)}, total step: {self.batch_total}, " |
| | | f"(loss: {loss.detach().cpu().item():.3f}), " |
| | | f"(lr: {lr:.3e}), " |
| | | f"{[(k, round(v.cpu().item(), 3)) for k, v in stats.items()]}, " |