#!/usr/bin/env bash
|
|
set -e
|
set -u
|
set -o pipefail
|
|
data_dir=
|
exp_dir=
|
model_name=
|
inference_nj=32
|
gpuid_list="0,1,2,3"
|
njob=32
|
gpu_inference=true
|
|
test_sets="dev test"
|
decode_cmd=utils/run.pl
|
|
# LM configs
|
use_lm=false
|
beam_size=1
|
lm_weight=0.0
|
|
. utils/parse_options.sh
|
|
if ${gpu_inference}; then
|
_ngpu=1
|
else
|
_ngpu=0
|
fi
|
|
# download model from modelscope
|
python modelscope_utils/download_model.py \
|
--model_name ${model_name}
|
|
modelscope_dir=${HOME}/.cache/modelscope/hub/damo/${model_name}
|
|
|
for dset in ${test_sets}; do
|
_dir=${exp_dir}/${model_name}/decode_asr/${dset}
|
_logdir=${_dir}/logdir
|
_data=${data_dir}/${dset}
|
if [ -d ${_dir} ]; then
|
echo "${_dir} is already exists. if you want to decode again, please delete ${_dir} first."
|
exit 1
|
else
|
mkdir -p "${_dir}"
|
mkdir -p "${_logdir}"
|
fi
|
|
if "${use_lm}"; then
|
cp ${modelscope_dir}/decode_asr_transformer.yaml ${modelscope_dir}/decode_asr_transformer.yaml.back
|
cp ${modelscope_dir}/decode_asr_transformer_wav.yaml ${modelscope_dir}/decode_asr_transformer_wav.yaml.back
|
sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${modelscope_dir}/decode_asr_transformer.yaml
|
sed -i "s#beam_size: [0-9]*#beam_size: `echo $beam_size`#g" ${modelscope_dir}/decode_asr_transformer_wav.yaml
|
sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${modelscope_dir}/decode_asr_transformer.yaml
|
sed -i "s#lm_weight: 0.[0-9]*#lm_weight: `echo $lm_weight`#g" ${modelscope_dir}/decode_asr_transformer_wav.yaml
|
fi
|
|
for n in $(seq "${inference_nj}"); do
|
split_scps+=" ${_logdir}/keys.${n}.scp"
|
done
|
# shellcheck disable=SC2086
|
utils/split_scp.pl "${data_dir}/${dset}/wav.scp" ${split_scps}
|
|
echo "Decoding started... log: '${_logdir}/asr_inference.*.log'"
|
# shellcheck disable=SC2086
|
${decode_cmd} --max-jobs-run "${inference_nj}" JOB=1:"${inference_nj}" "${_logdir}"/asr_inference.JOB.log \
|
python -m funasr.bin.modelscope_infer \
|
--model_name ${model_name} \
|
--wav_list ${_logdir}/keys.JOB.scp \
|
--output_file ${_logdir}/text.JOB \
|
--gpuid_list ${gpuid_list} \
|
--njob ${njob} \
|
--ngpu ${_ngpu} \
|
|
for i in $(seq ${inference_nj}); do
|
cat ${_logdir}/text.${i}
|
done | sort -k1 >${_dir}/text
|
|
python utils/proce_text.py ${_dir}/text ${_dir}/text.proc
|
python utils/proce_text.py ${_data}/text ${_data}/text.proc
|
python utils/compute_wer.py ${_data}/text.proc ${_dir}/text.proc ${_dir}/text.cer
|
tail -n 3 ${_dir}/text.cer > ${_dir}/text.cer.txt
|
cat ${_dir}/text.cer.txt
|
done
|
|
if "${use_lm}"; then
|
mv ${modelscope_dir}/decode_asr_transformer.yaml.back ${modelscope_dir}/decode_asr_transformer.yaml
|
mv ${modelscope_dir}/decode_asr_transformer_wav.yaml.back ${modelscope_dir}/decode_asr_transformer_wav.yaml
|
fi
|