Merge pull request #5 from alibaba-damo-academy/dev
update modelscope details
| | |
| | | train_cmd=utils/run.pl |
| | | |
| | | # general configuration |
| | | feats_dir="." #feature output dictionary, for large data |
| | | feats_dir="../DATA" #feature output dictionary, for large data |
| | | exp_dir="." |
| | | lang=zh |
| | | dumpdir=dump/fbank |
| | |
| | | lfr_n=6 |
| | | |
| | | init_model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch # pre-trained model, download from modelscope during fine-tuning |
| | | model_revision="v1.0.3" # please do not modify the model revision |
| | | cmvn_file=init_model/${init_model_name}/am.mvn |
| | | seg_file=init_model/${init_model_name}/seg_dict |
| | | vocab=init_model/${init_model_name}/tokens.txt |
| | |
| | | test_sets="dev test" |
| | | |
| | | asr_config=conf/train_asr_paraformer_sanm_50e_16d_2048_512_lfr6.yaml |
| | | init_param="init_model/${init_model_name}/${init_model_name}" |
| | | init_param="init_model/${init_model_name}/model.pb" |
| | | |
| | | inference_config=conf/decode_asr_transformer_noctc_1best.yaml |
| | | inference_asr_model=valid.acc.ave_10best.pth |
| | |
| | | . utils/parse_options.sh || exit 1; |
| | | |
| | | # download model from modelscope |
| | | python modelscope_utils/download_model.py --model_name ${init_model_name} |
| | | python modelscope_utils/download_model.py --model_name ${init_model_name} --model_revision ${model_revision} |
| | | |
| | | if [ ! -d ${HOME}/.cache/modelscope/hub/damo/${init_model_name} ]; then |
| | | echo "${HOME}/.cache/modelscope/hub/damo/${init_model_name} must exist" |
| | |
| | | world_size=$gpu_num # run on one machine |
| | | if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then |
| | | # update asr train config.yaml |
| | | python modelscope_utils/update_config.py --modelscope_config init_model/${init_model_name}/asr_train_config.yaml --finetune_config ${asr_config} --output_config init_model/${init_model_name}/asr_finetune_config.yaml |
| | | python modelscope_utils/update_config.py --modelscope_config init_model/${init_model_name}/finetune.yaml --finetune_config ${asr_config} --output_config init_model/${init_model_name}/asr_finetune_config.yaml |
| | | finetune_config=init_model/${init_model_name}/asr_finetune_config.yaml |
| | | |
| | | mkdir -p ${exp_dir}/exp/${model_dir} |
| | |
| | | data_dir= |
| | | exp_dir= |
| | | model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch |
| | | model_revision="v1.0.3" # please do not modify the model revision |
| | | inference_nj=32 |
| | | gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1" |
| | | ngpu=$(echo $gpuid_list | awk -F "," '{print NF}') |
| | |
| | | --exp_dir ${exp_dir}/aishell \ |
| | | --test_sets "${test_sets}" \ |
| | | --model_name ${model_name} \ |
| | | --model_revision ${model_revision} \ |
| | | --inference_nj ${inference_nj} \ |
| | | --gpuid_list ${gpuid_list} \ |
| | | --njob ${njob} \ |
| | |
| | | train_cmd=utils/run.pl |
| | | |
| | | # general configuration |
| | | feats_dir="." #feature output dictionary, for large data |
| | | feats_dir="../DATA" #feature output dictionary, for large data |
| | | exp_dir="." |
| | | lang=zh |
| | | dumpdir=dump/fbank |
| | |
| | | lfr_n=6 |
| | | |
| | | init_model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch # pre-trained model, download from modelscope during fine-tuning |
| | | model_revision="v1.0.3" # please do not modify the model revision |
| | | cmvn_file=init_model/${init_model_name}/am.mvn |
| | | seg_file=init_model/${init_model_name}/seg_dict |
| | | vocab=init_model/${init_model_name}/tokens.txt |
| | |
| | | test_sets="dev_ios test_android test_ios test_mic" |
| | | |
| | | asr_config=conf/train_asr_paraformer_sanm_50e_16d_2048_512_lfr6.yaml |
| | | init_param="init_model/${init_model_name}/${init_model_name}" |
| | | init_param="init_model/${init_model_name}/model.pb" |
| | | |
| | | inference_config=conf/decode_asr_transformer_noctc_1best.yaml |
| | | inference_asr_model=valid.acc.ave_10best.pth |
| | |
| | | . utils/parse_options.sh || exit 1; |
| | | |
| | | # download model from modelscope |
| | | python modelscope_utils/download_model.py --model_name ${init_model_name} |
| | | python modelscope_utils/download_model.py --model_name ${init_model_name} --model_revision ${model_revision} |
| | | |
| | | if [ ! -d ${HOME}/.cache/modelscope/hub/damo/${init_model_name} ]; then |
| | | echo "${HOME}/.cache/modelscope/hub/damo/${init_model_name} must exist" |
| | |
| | | world_size=$gpu_num # run on one machine |
| | | if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then |
| | | # update asr train config.yaml |
| | | python modelscope_utils/update_config.py --modelscope_config init_model/${init_model_name}/asr_train_config.yaml --finetune_config ${asr_config} --output_config init_model/${init_model_name}/asr_finetune_config.yaml |
| | | python modelscope_utils/update_config.py --modelscope_config init_model/${init_model_name}/finetune.yaml --finetune_config ${asr_config} --output_config init_model/${init_model_name}/asr_finetune_config.yaml |
| | | finetune_config=init_model/${init_model_name}/asr_finetune_config.yaml |
| | | |
| | | mkdir -p ${exp_dir}/exp/${model_dir} |
| | |
| | | data_dir= |
| | | exp_dir= |
| | | model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch |
| | | model_revision="v1.0.3" # please do not modify the model revision |
| | | inference_nj=32 |
| | | gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1" |
| | | ngpu=$(echo $gpuid_list | awk -F "," '{print NF}') |
| | |
| | | inference_nj=$njob |
| | | fi |
| | | |
| | | # LM configs |
| | | use_lm=false |
| | | beam_size=1 |
| | | lm_weight=0.0 |
| | |
| | | --exp_dir ${exp_dir}/aishell2 \ |
| | | --test_sets "${test_sets}" \ |
| | | --model_name ${model_name} \ |
| | | --model_revision ${model_revision} \ |
| | | --inference_nj ${inference_nj} \ |
| | | --gpuid_list ${gpuid_list} \ |
| | | --njob ${njob} \ |
| | |
| | | train_cmd=utils/run.pl |
| | | |
| | | # general configuration |
| | | feats_dir="." #feature output dictionary, for large data |
| | | feats_dir="../DATA" #feature output dictionary, for large data |
| | | exp_dir="." |
| | | lang=zh |
| | | dumpdir=dump/fbank |
| | |
| | | lfr_n=6 |
| | | |
| | | init_model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch # pre-trained model, download from modelscope during fine-tuning |
| | | model_revision="v1.0.3" # please do not modify the model revision |
| | | cmvn_file=init_model/${init_model_name}/am.mvn |
| | | seg_file=init_model/${init_model_name}/seg_dict |
| | | vocab=init_model/${init_model_name}/tokens.txt |
| | |
| | | test_sets="dev test" |
| | | |
| | | asr_config=conf/train_asr_paraformer_sanm_50e_16d_2048_512_lfr6.yaml |
| | | init_param="init_model/${init_model_name}/${init_model_name}" |
| | | init_param="init_model/${init_model_name}/model.pb" |
| | | |
| | | inference_config=conf/decode_asr_transformer_noctc_1best.yaml |
| | | inference_asr_model=valid.acc.ave_10best.pth |
| | |
| | | . utils/parse_options.sh || exit 1; |
| | | |
| | | # download model from modelscope |
| | | python modelscope_utils/download_model.py --model_name ${init_model_name} |
| | | python modelscope_utils/download_model.py --model_name ${init_model_name} --model_revision ${model_revision} |
| | | |
| | | if [ ! -d ${HOME}/.cache/modelscope/hub/damo/${init_model_name} ]; then |
| | | echo "${HOME}/.cache/modelscope/hub/damo/${init_model_name} must exist" |
| | |
| | | world_size=$gpu_num # run on one machine |
| | | if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then |
| | | # update asr train config.yaml |
| | | python modelscope_utils/update_config.py --modelscope_config init_model/${init_model_name}/asr_train_config.yaml --finetune_config ${asr_config} --output_config init_model/${init_model_name}/asr_finetune_config.yaml |
| | | python modelscope_utils/update_config.py --modelscope_config init_model/${init_model_name}/finetune.yaml --finetune_config ${asr_config} --output_config init_model/${init_model_name}/asr_finetune_config.yaml |
| | | finetune_config=init_model/${init_model_name}/asr_finetune_config.yaml |
| | | |
| | | mkdir -p ${exp_dir}/exp/${model_dir} |
| | |
| | | set -o pipefail |
| | | |
| | | model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch # pre-trained model, download from modelscope |
| | | model_revision="v1.0.3" # please do not modify the model revision |
| | | data_dir= # wav list, ${data_dir}/wav.scp |
| | | exp_dir="exp" |
| | | gpuid_list="0,1" |
| | |
| | | lm_weight=0.0 |
| | | |
| | | python modelscope_utils/download_model.py \ |
| | | --model_name ${model_name} |
| | | --model_name ${model_name} --model_revision ${model_revision} |
| | | |
| | | if [ -d ${exp_dir} ]; then |
| | | echo "${exp_dir} is already exists. if you want to decode again, please delete ${exp_dir} first." |
| | |
| | | utils/split_scp.pl "${data_dir}/wav.scp" ${split_scps} |
| | | |
| | | if "${use_lm}"; then |
| | | cp ${exp_dir}/${model_name}/decode_asr_transformer.yaml ${exp_dir}/${model_name}/decode_asr_transformer.yaml.back |
| | | cp ${exp_dir}/${model_name}/decode_asr_transformer_wav.yaml ${exp_dir}/${model_name}/decode_asr_transformer_wav.yaml.back |
| | | sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${exp_dir}/${model_name}/decode_asr_transformer.yaml |
| | | sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${exp_dir}/${model_name}/decode_asr_transformer_wav.yaml |
| | | sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${exp_dir}/${model_name}/decode_asr_transformer.yaml |
| | | sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${exp_dir}/${model_name}/decode_asr_transformer_wav.yaml |
| | | cp ${exp_dir}/${model_name}/decoding.yaml ${exp_dir}/${model_name}/decoding.yaml.back |
| | | sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${exp_dir}/${model_name}/decoding.yaml |
| | | sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${exp_dir}/${model_name}/decoding.yaml |
| | | fi |
| | | |
| | | echo "Decoding started... log: '${_logdir}/asr_inference.*.log'" |
| | |
| | | cat ${_logdir}/text.${i} |
| | | done | sort -k1 >${_dir}/text |
| | | |
| | | mv ${exp_dir}/${model_name}/decode_asr_transformer.yaml.back ${exp_dir}/${model_name}/decode_asr_transformer.yaml |
| | | mv ${exp_dir}/${model_name}/decode_asr_transformer_wav.yaml.back ${exp_dir}/${model_name}/decode_asr_transformer_wav.yaml |
| | | mv ${exp_dir}/${model_name}/decoding.yaml.back ${exp_dir}/${model_name}/decoding.yaml |
| | | |
| | |
| | | type=str, |
| | | default="speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", |
| | | help="model name in modelscope") |
| | | parser.add_argument("--model_revision", |
| | | type=str, |
| | | default="v1.0.3", |
| | | help="model revision in modelscope") |
| | | args = parser.parse_args() |
| | | |
| | | inference_pipeline = pipeline( |
| | | task=Tasks.auto_speech_recognition, |
| | | model='damo/{}'.format(args.model_name), |
| | | model_revision='v1.0.0') |
| | | model_revision=args.model_revision) |
| | |
| | | data_dir= |
| | | exp_dir= |
| | | model_name= |
| | | model_revision= |
| | | inference_nj=32 |
| | | gpuid_list="0,1,2,3" |
| | | njob=32 |
| | |
| | | |
| | | # download model from modelscope |
| | | python modelscope_utils/download_model.py \ |
| | | --model_name ${model_name} |
| | | --model_name ${model_name} --model_revision ${model_revision} |
| | | |
| | | modelscope_dir=${HOME}/.cache/modelscope/hub/damo/${model_name} |
| | | |
| | |
| | | fi |
| | | |
| | | if "${use_lm}"; then |
| | | cp ${modelscope_dir}/decode_asr_transformer.yaml ${modelscope_dir}/decode_asr_transformer.yaml.back |
| | | cp ${modelscope_dir}/decode_asr_transformer_wav.yaml ${modelscope_dir}/decode_asr_transformer_wav.yaml.back |
| | | sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${modelscope_dir}/decode_asr_transformer.yaml |
| | | sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${modelscope_dir}/decode_asr_transformer_wav.yaml |
| | | sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${modelscope_dir}/decode_asr_transformer.yaml |
| | | sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${modelscope_dir}/decode_asr_transformer_wav.yaml |
| | | cp ${modelscope_dir}/decoding.yaml ${modelscope_dir}/decoding.yaml.back |
| | | sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${modelscope_dir}/decoding.yaml |
| | | sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${modelscope_dir}/decoding.yaml |
| | | fi |
| | | |
| | | for n in $(seq "${inference_nj}"); do |
| | |
| | | done |
| | | |
| | | if "${use_lm}"; then |
| | | mv ${modelscope_dir}/decode_asr_transformer.yaml.back ${modelscope_dir}/decode_asr_transformer.yaml |
| | | mv ${modelscope_dir}/decode_asr_transformer_wav.yaml.back ${modelscope_dir}/decode_asr_transformer_wav.yaml |
| | | mv ${modelscope_dir}/decoding.yaml.back ${modelscope_dir}/decoding.yaml |
| | | fi |
| | |
| | | data_dir= |
| | | exp_dir= |
| | | model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch |
| | | model_revision="v1.0.3" # please do not modify the model revision |
| | | inference_nj=32 |
| | | gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1" |
| | | ngpu=$(echo $gpuid_list | awk -F "," '{print NF}') |
| | |
| | | --exp_dir ${exp_dir}/speechio \ |
| | | --test_sets "${test_sets}" \ |
| | | --model_name ${model_name} \ |
| | | --model_revision ${model_revision} \ |
| | | --inference_nj ${inference_nj} \ |
| | | --gpuid_list ${gpuid_list} \ |
| | | --njob ${njob} \ |
| | |
| | | data_dir= |
| | | exp_dir= |
| | | model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch |
| | | model_revision="v1.0.3" # please do not modify the model revision |
| | | inference_nj=32 |
| | | gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1" |
| | | ngpu=$(echo $gpuid_list | awk -F "," '{print NF}') |
| | |
| | | --exp_dir ${exp_dir}/wenetspeech \ |
| | | --test_sets "${test_sets}" \ |
| | | --model_name ${model_name} \ |
| | | --model_revision ${model_revision} \ |
| | | --inference_nj ${inference_nj} \ |
| | | --gpuid_list ${gpuid_list} \ |
| | | --njob ${njob} \ |