From d76aea23d9f5daac4df7ee1985d07f7428abc719 Mon Sep 17 00:00:00 2001
From: smohan-speech <smohan@mail.ustc.edu.cn>
Date: 星期日, 07 五月 2023 02:21:58 +0800
Subject: [PATCH] add speaker-attributed ASR task for alimeeting
---
egs/alimeeting/sa-asr/asr_local.sh | 33 ++++++++++++++++++---------------
1 files changed, 18 insertions(+), 15 deletions(-)
diff --git a/egs/alimeeting/sa-asr/asr_local.sh b/egs/alimeeting/sa-asr/asr_local.sh
index c0359eb..419e341 100755
--- a/egs/alimeeting/sa-asr/asr_local.sh
+++ b/egs/alimeeting/sa-asr/asr_local.sh
@@ -434,14 +434,14 @@
log "Stage 2: Speed perturbation: data/${train_set} -> data/${train_set}_sp"
for factor in ${speed_perturb_factors}; do
if [[ $(bc <<<"${factor} != 1.0") == 1 ]]; then
- scripts/utils/perturb_data_dir_speed.sh "${factor}" "data/${train_set}" "data/${train_set}_sp${factor}"
+ local/perturb_data_dir_speed.sh "${factor}" "data/${train_set}" "data/${train_set}_sp${factor}"
_dirs+="data/${train_set}_sp${factor} "
else
# If speed factor is 1, same as the original
_dirs+="data/${train_set} "
fi
done
- utils/combine_data.sh "data/${train_set}_sp" ${_dirs}
+ local/combine_data.sh "data/${train_set}_sp" ${_dirs}
else
log "Skip stage 2: Speed perturbation"
fi
@@ -473,7 +473,7 @@
_suf=""
fi
fi
- utils/copy_data_dir.sh --validate_opts --non-print data/"${dset}" "${data_feats}${_suf}/${dset}"
+ local/copy_data_dir.sh --validate_opts --non-print data/"${dset}" "${data_feats}${_suf}/${dset}"
cp data/"${dset}"/utt2spk_all_fifo "${data_feats}${_suf}/${dset}/"
@@ -488,7 +488,7 @@
_opts+="--segments data/${dset}/segments "
fi
# shellcheck disable=SC2086
- scripts/audio/format_wav_scp.sh --nj "${nj}" --cmd "${train_cmd}" \
+ local/format_wav_scp.sh --nj "${nj}" --cmd "${train_cmd}" \
--audio-format "${audio_format}" --fs "${fs}" ${_opts} \
"data/${dset}/wav.scp" "${data_feats}${_suf}/${dset}"
@@ -515,7 +515,7 @@
for dset in $rm_dset; do
# Copy data dir
- utils/copy_data_dir.sh --validate_opts --non-print "${data_feats}/org/${dset}" "${data_feats}/${dset}"
+ local/copy_data_dir.sh --validate_opts --non-print "${data_feats}/org/${dset}" "${data_feats}/${dset}"
cp "${data_feats}/org/${dset}/feats_type" "${data_feats}/${dset}/feats_type"
# Remove short utterances
@@ -564,7 +564,7 @@
awk ' { if( NF != 1 ) print $0; } ' >"${data_feats}/${dset}/text"
# fix_data_dir.sh leaves only utts which exist in all files
- utils/fix_data_dir.sh "${data_feats}/${dset}"
+ local/fix_data_dir.sh "${data_feats}/${dset}"
# generate uttid
cut -d ' ' -f 1 "${data_feats}/${dset}/wav.scp" > "${data_feats}/${dset}/uttid"
@@ -1283,6 +1283,7 @@
${_cmd} --gpu "${_ngpu}" --max-jobs-run "${_nj}" JOB=1:"${_nj}" "${_logdir}"/asr_inference.JOB.log \
python -m funasr.bin.asr_inference_launch \
--batch_size 1 \
+ --mc True \
--nbest 1 \
--ngpu "${_ngpu}" \
--njob ${njob_infer} \
@@ -1312,10 +1313,10 @@
_data="${data_feats}/${dset}"
_dir="${asr_exp}/${inference_tag}/${dset}"
- python local/proce_text.py ${_data}/text ${_data}/text.proc
- python local/proce_text.py ${_dir}/text ${_dir}/text.proc
+ python utils/proce_text.py ${_data}/text ${_data}/text.proc
+ python utils/proce_text.py ${_dir}/text ${_dir}/text.proc
- python local/compute_wer.py ${_data}/text.proc ${_dir}/text.proc ${_dir}/text.cer
+ python utils/compute_wer.py ${_data}/text.proc ${_dir}/text.proc ${_dir}/text.cer
tail -n 3 ${_dir}/text.cer > ${_dir}/text.cer.txt
cat ${_dir}/text.cer.txt
@@ -1390,6 +1391,7 @@
${_cmd} --gpu "${_ngpu}" --max-jobs-run "${_nj}" JOB=1:"${_nj}" "${_logdir}"/asr_inference.JOB.log \
python -m funasr.bin.asr_inference_launch \
--batch_size 1 \
+ --mc True \
--nbest 1 \
--ngpu "${_ngpu}" \
--njob ${njob_infer} \
@@ -1421,10 +1423,10 @@
_data="${data_feats}/${dset}"
_dir="${sa_asr_exp}/${sa_asr_inference_tag}.oracle/${dset}"
- python local/proce_text.py ${_data}/text ${_data}/text.proc
- python local/proce_text.py ${_dir}/text ${_dir}/text.proc
+ python utils/proce_text.py ${_data}/text ${_data}/text.proc
+ python utils/proce_text.py ${_dir}/text ${_dir}/text.proc
- python local/compute_wer.py ${_data}/text.proc ${_dir}/text.proc ${_dir}/text.cer
+ python utils/compute_wer.py ${_data}/text.proc ${_dir}/text.proc ${_dir}/text.cer
tail -n 3 ${_dir}/text.cer > ${_dir}/text.cer.txt
cat ${_dir}/text.cer.txt
@@ -1506,6 +1508,7 @@
${_cmd} --gpu "${_ngpu}" --max-jobs-run "${_nj}" JOB=1:"${_nj}" "${_logdir}"/asr_inference.JOB.log \
python -m funasr.bin.asr_inference_launch \
--batch_size 1 \
+ --mc True \
--nbest 1 \
--ngpu "${_ngpu}" \
--njob ${njob_infer} \
@@ -1536,10 +1539,10 @@
_data="${data_feats}/${dset}"
_dir="${sa_asr_exp}/${sa_asr_inference_tag}.cluster/${dset}"
- python local/proce_text.py ${_data}/text ${_data}/text.proc
- python local/proce_text.py ${_dir}/text ${_dir}/text.proc
+ python utils/proce_text.py ${_data}/text ${_data}/text.proc
+ python utils/proce_text.py ${_dir}/text ${_dir}/text.proc
- python local/compute_wer.py ${_data}/text.proc ${_dir}/text.proc ${_dir}/text.cer
+ python utils/compute_wer.py ${_data}/text.proc ${_dir}/text.proc ${_dir}/text.cer
tail -n 3 ${_dir}/text.cer > ${_dir}/text.cer.txt
cat ${_dir}/text.cer.txt
--
Gitblit v1.9.1