#!/usr/bin/env bash set -e set -u set -o pipefail ori_data= data_dir= exp_dir= model_name=speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch model_revision="v1.0.4" # please do not modify the model revision inference_nj=32 gpuid_list="0" # set gpus, e.g., gpuid_list="0,1" ngpu=$(echo $gpuid_list | awk -F "," '{print NF}') njob=1 # the number of jobs for each gpu gpu_inference=true # Whether to perform gpu decoding, set false for cpu decoding if ${gpu_inference}; then inference_nj=$[${ngpu}*${njob}] else inference_nj=$njob fi # LM configs use_lm=false beam_size=1 lm_weight=0.0 test_sets="SPEECHIO_ASR_ZH00001 SPEECHIO_ASR_ZH00002 SPEECHIO_ASR_ZH00003 SPEECHIO_ASR_ZH00004 SPEECHIO_ASR_ZH00005 SPEECHIO_ASR_ZH00006 SPEECHIO_ASR_ZH00007 SPEECHIO_ASR_ZH00008 SPEECHIO_ASR_ZH00009 SPEECHIO_ASR_ZH00010 SPEECHIO_ASR_ZH00011 SPEECHIO_ASR_ZH00012 SPEECHIO_ASR_ZH00013 SPEECHIO_ASR_ZH00014 SPEECHIO_ASR_ZH00015" . utils/parse_options.sh for tset_name in ${test_sets}; do test_dir=${data_dir}/speechio/${tset_name} mkdir -p ${test_dir} find ${ori_data}/${tset_name} -iname "*.wav" > ${test_dir}/wav.flist sed -e 's/\.wav//' ${test_dir}/wav.flist | awk -F '/' '{print $NF}' > ${test_dir}/utt.list paste -d' ' ${test_dir}/utt.list ${test_dir}/wav.flist > ${test_dir}/wav.scp cp ${ori_data}/${tset_name}/trans.txt ${test_dir}/text sed -i "s/\t/ /g" ${test_dir}/text done mkdir -p ${exp_dir}/speechio modelscope_utils/modelscope_infer.sh \ --data_dir ${data_dir}/speechio \ --exp_dir ${exp_dir}/speechio \ --test_sets "${test_sets}" \ --model_name ${model_name} \ --model_revision ${model_revision} \ --inference_nj ${inference_nj} \ --gpuid_list ${gpuid_list} \ --njob ${njob} \ --gpu_inference ${gpu_inference} \ --use_lm ${use_lm} \ --beam_size ${beam_size} \ --lm_weight ${lm_weight} # SpeechIO TIOBE textnorm for tset_name in ${test_sets}; do echo "$0 --> Normalizing REF text ..." ./utils/textnorm_zh.py \ --has_key --to_upper \ ${ori_data}/${tset_name}/trans.txt \ ${data_dir}/speechio/${tset_name}/ref.txt cp ${exp_dir}/speechio/${model_name}/decode_asr/${tset_name}/text.proc ${exp_dir}/speechio/${model_name}/decode_asr/${tset_name}/raw_rec.txt sed -i "s###g" ${exp_dir}/speechio/${model_name}/decode_asr/${tset_name}/raw_rec.txt echo "$0 --> Normalizing HYP text ..." ./utils/textnorm_zh.py \ --has_key --to_upper \ ${exp_dir}/speechio/${model_name}/decode_asr/${tset_name}/raw_rec.txt \ ${exp_dir}/speechio/${model_name}/decode_asr/${tset_name}/rec.txt grep -v $'\t$' ${exp_dir}/speechio/${model_name}/decode_asr/${tset_name}/rec.txt > ${exp_dir}/speechio/${model_name}/decode_asr/${tset_name}/rec_non_empty.txt echo "$0 --> computing WER/CER and alignment ..." ./utils/error_rate_zh \ --tokenizer char \ --ref ${data_dir}/speechio/${tset_name}/ref.txt \ --hyp ${exp_dir}/speechio/${model_name}/decode_asr/${tset_name}/rec_non_empty.txt \ ${exp_dir}/speechio/${model_name}/decode_asr/${tset_name}/DETAILS.txt | tee ${exp_dir}/speechio/${model_name}/decode_asr/${tset_name}/RESULTS.txt done