From 0dd89f678279c8792248135738b08c0a8be72e54 Mon Sep 17 00:00:00 2001
From: 嘉渊 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期四, 27 四月 2023 17:07:49 +0800
Subject: [PATCH] update
---
egs/aishell/paraformer/run.sh | 115 ++++++++++++++++++++++++++++-----------------------------
1 files changed, 57 insertions(+), 58 deletions(-)
diff --git a/egs/aishell/paraformer/run.sh b/egs/aishell/paraformer/run.sh
index 2237641..23fe42d 100755
--- a/egs/aishell/paraformer/run.sh
+++ b/egs/aishell/paraformer/run.sh
@@ -3,7 +3,7 @@
. ./path.sh || exit 1;
# machines configuration
-CUDA_VISIBLE_DEVICES="0,1"
+CUDA_VISIBLE_DEVICES="2,3"
gpu_num=2
count=1
gpu_inference=true # Whether to perform gpu decoding, set false for cpu decoding
@@ -21,7 +21,7 @@
token_type=char
scp=wav.scp
type=sound
-stage=2
+stage=1
stop_stage=3
# feature configuration
@@ -84,7 +84,6 @@
done
fi
-feat_train_dir=${feats_dir}/${dumpdir}/train; mkdir -p ${feat_train_dir}
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
echo "stage 1: Feature and CMVN Generation"
utils/compute_cmvn.sh --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} ${feats_dir}/data/${train_set}
@@ -146,58 +145,58 @@
wait
fi
-# Testing Stage
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
- echo "stage 4: Inference"
- for dset in ${test_sets}; do
- asr_exp=${exp_dir}/exp/${model_dir}
- inference_tag="$(basename "${inference_config}" .yaml)"
- _dir="${asr_exp}/${inference_tag}/${inference_asr_model}/${dset}"
- _logdir="${_dir}/logdir"
- if [ -d ${_dir} ]; then
- echo "${_dir} is already exists. if you want to decode again, please delete this dir first."
- exit 0
- fi
- mkdir -p "${_logdir}"
- _data="${feats_dir}/${dumpdir}/${dset}"
- key_file=${_data}/${scp}
- num_scp_file="$(<${key_file} wc -l)"
- _nj=$([ $inference_nj -le $num_scp_file ] && echo "$inference_nj" || echo "$num_scp_file")
- split_scps=
- for n in $(seq "${_nj}"); do
- split_scps+=" ${_logdir}/keys.${n}.scp"
- done
- # shellcheck disable=SC2086
- utils/split_scp.pl "${key_file}" ${split_scps}
- _opts=
- if [ -n "${inference_config}" ]; then
- _opts+="--config ${inference_config} "
- fi
- ${infer_cmd} --gpu "${_ngpu}" --max-jobs-run "${_nj}" JOB=1:"${_nj}" "${_logdir}"/asr_inference.JOB.log \
- python -m funasr.bin.asr_inference_launch \
- --batch_size 1 \
- --ngpu "${_ngpu}" \
- --njob ${njob} \
- --gpuid_list ${gpuid_list} \
- --data_path_and_name_and_type "${_data}/${scp},speech,${type}" \
- --key_file "${_logdir}"/keys.JOB.scp \
- --asr_train_config "${asr_exp}"/config.yaml \
- --asr_model_file "${asr_exp}"/"${inference_asr_model}" \
- --output_dir "${_logdir}"/output.JOB \
- --mode paraformer \
- ${_opts}
-
- for f in token token_int score text; do
- if [ -f "${_logdir}/output.1/1best_recog/${f}" ]; then
- for i in $(seq "${_nj}"); do
- cat "${_logdir}/output.${i}/1best_recog/${f}"
- done | sort -k1 >"${_dir}/${f}"
- fi
- done
- python utils/proce_text.py ${_dir}/text ${_dir}/text.proc
- python utils/proce_text.py ${_data}/text ${_data}/text.proc
- python utils/compute_wer.py ${_data}/text.proc ${_dir}/text.proc ${_dir}/text.cer
- tail -n 3 ${_dir}/text.cer > ${_dir}/text.cer.txt
- cat ${_dir}/text.cer.txt
- done
-fi
\ No newline at end of file
+## Testing Stage
+#if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
+# echo "stage 4: Inference"
+# for dset in ${test_sets}; do
+# asr_exp=${exp_dir}/exp/${model_dir}
+# inference_tag="$(basename "${inference_config}" .yaml)"
+# _dir="${asr_exp}/${inference_tag}/${inference_asr_model}/${dset}"
+# _logdir="${_dir}/logdir"
+# if [ -d ${_dir} ]; then
+# echo "${_dir} is already exists. if you want to decode again, please delete this dir first."
+# exit 0
+# fi
+# mkdir -p "${_logdir}"
+# _data="${feats_dir}/${dumpdir}/${dset}"
+# key_file=${_data}/${scp}
+# num_scp_file="$(<${key_file} wc -l)"
+# _nj=$([ $inference_nj -le $num_scp_file ] && echo "$inference_nj" || echo "$num_scp_file")
+# split_scps=
+# for n in $(seq "${_nj}"); do
+# split_scps+=" ${_logdir}/keys.${n}.scp"
+# done
+# # shellcheck disable=SC2086
+# utils/split_scp.pl "${key_file}" ${split_scps}
+# _opts=
+# if [ -n "${inference_config}" ]; then
+# _opts+="--config ${inference_config} "
+# fi
+# ${infer_cmd} --gpu "${_ngpu}" --max-jobs-run "${_nj}" JOB=1:"${_nj}" "${_logdir}"/asr_inference.JOB.log \
+# python -m funasr.bin.asr_inference_launch \
+# --batch_size 1 \
+# --ngpu "${_ngpu}" \
+# --njob ${njob} \
+# --gpuid_list ${gpuid_list} \
+# --data_path_and_name_and_type "${_data}/${scp},speech,${type}" \
+# --key_file "${_logdir}"/keys.JOB.scp \
+# --asr_train_config "${asr_exp}"/config.yaml \
+# --asr_model_file "${asr_exp}"/"${inference_asr_model}" \
+# --output_dir "${_logdir}"/output.JOB \
+# --mode paraformer \
+# ${_opts}
+#
+# for f in token token_int score text; do
+# if [ -f "${_logdir}/output.1/1best_recog/${f}" ]; then
+# for i in $(seq "${_nj}"); do
+# cat "${_logdir}/output.${i}/1best_recog/${f}"
+# done | sort -k1 >"${_dir}/${f}"
+# fi
+# done
+# python utils/proce_text.py ${_dir}/text ${_dir}/text.proc
+# python utils/proce_text.py ${_data}/text ${_data}/text.proc
+# python utils/compute_wer.py ${_data}/text.proc ${_dir}/text.proc ${_dir}/text.cer
+# tail -n 3 ${_dir}/text.cer > ${_dir}/text.cer.txt
+# cat ${_dir}/text.cer.txt
+# done
+#fi
\ No newline at end of file
--
Gitblit v1.9.1