From eb3948b4805f58bf03d364e8a8c37c0ef3a854ca Mon Sep 17 00:00:00 2001
From: 嘉渊 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期二, 16 五月 2023 11:11:44 +0800
Subject: [PATCH] update repo
---
egs/aishell2/paraformerbert/run.sh | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/egs/aishell2/paraformerbert/run.sh b/egs/aishell2/paraformerbert/run.sh
index ef74dd1..26c8063 100755
--- a/egs/aishell2/paraformerbert/run.sh
+++ b/egs/aishell2/paraformerbert/run.sh
@@ -106,8 +106,6 @@
utils/text2token.py -s 1 -n 1 --space "" ${feats_dir}/data/${train_set}/text | cut -f 2- -d" " | tr " " "\n" \
| sort | uniq | grep -a -v -e '^\s*$' | awk '{print $0}' >> ${token_list}
echo "<unk>" >> ${token_list}
- mkdir -p ${feats_dir}/asr_stats_fbank_zh_char/${train_set}
- mkdir -p ${feats_dir}/asr_stats_fbank_zh_char/${valid_set}
fi
# Training Stage
@@ -118,7 +116,8 @@
echo "extract embeddings..."
local/extract_embeds.sh \
--bert_model_name ${bert_model_name} \
- --raw_dataset_path ${feats_dir}
+ --raw_dataset_path ${feats_dir} \
+ --nj $nj
fi
mkdir -p ${exp_dir}/exp/${model_dir}
mkdir -p ${exp_dir}/exp/${model_dir}/log
@@ -173,7 +172,7 @@
exit 0
fi
mkdir -p "${_logdir}"
- _data="${feats_dir}/${dumpdir}/${dset}"
+ _data="${feats_dir}/data/${dset}"
key_file=${_data}/${scp}
num_scp_file="$(<${key_file} wc -l)"
_nj=$([ $inference_nj -le $num_scp_file ] && echo "$inference_nj" || echo "$num_scp_file")
@@ -194,6 +193,7 @@
--njob ${njob} \
--gpuid_list ${gpuid_list} \
--data_path_and_name_and_type "${_data}/${scp},speech,${type}" \
+ --cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \
--key_file "${_logdir}"/keys.JOB.scp \
--asr_train_config "${asr_exp}"/config.yaml \
--asr_model_file "${asr_exp}"/"${inference_asr_model}" \
--
Gitblit v1.9.1