| | |
| | | utils/text2token.py -s 1 -n 1 --space "" ${feats_dir}/data/$train_set/text | cut -f 2- -d" " | tr " " "\n" \ |
| | | | sort | uniq | grep -a -v -e '^\s*$' | awk '{print $0}' >> ${token_list} |
| | | echo "<unk>" >> ${token_list} |
| | | vocab_size=$(cat ${token_list} | wc -l) |
| | | fi |
| | | |
| | | # LM Training Stage |
| | |
| | | if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then |
| | | echo "stage 6: ModelScope Preparation" |
| | | cp ${feats_dir}/data/${train_set}/cmvn/am.mvn ${exp_dir}/exp/${model_dir}/am.mvn |
| | | vocab_size=$(cat ${token_list} | wc -l) |
| | | python utils/gen_modelscope_configuration.py \ |
| | | --am_model_file $inference_asr_model \ |
| | | --mode paraformer \ |