| | |
| | | token_type=bpe |
| | | type=sound |
| | | scp=wav.scp |
| | | stage=3 |
| | | stop_stage=4 |
| | | speed_perturb="0.9 1.0 1.1" |
| | | stage=0 |
| | | stop_stage=5 |
| | | |
| | | # feature configuration |
| | | feats_dim=80 |
| | |
| | | for x in dev-clean dev-other test-clean test-other train-clean-100; do |
| | | local/data_prep.sh ${raw_data}/LibriSpeech/${x} ${feats_dir}/data/${x//-/_} |
| | | done |
| | | mkdir $feats_dir/data/$valid_set |
| | | dev_sets="dev_clean dev_other" |
| | | for file in wav.scp text; do |
| | | ( for f in $dev_sets; do cat $feats_dir/data/$f/$file; done ) | sort -k1 > $feats_dir/data/$valid_set/$file || exit 1; |
| | | done |
| | | fi |
| | | |
| | | if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then |
| | |
| | | echo "<unk>" >> ${token_list} |
| | | fi |
| | | |
| | | # Training Stage |
| | | # LM Training Stage |
| | | world_size=$gpu_num # run on one machine |
| | | if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then |
| | | echo "stage 3: Training" |
| | | echo "stage 3: LM Training" |
| | | fi |
| | | |
| | | # ASR Training Stage |
| | | world_size=$gpu_num # run on one machine |
| | | if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4; then |
| | | echo "stage 4: ASR Training" |
| | | mkdir -p ${exp_dir}/exp/${model_dir} |
| | | mkdir -p ${exp_dir}/exp/${model_dir}/log |
| | | INIT_FILE=${exp_dir}/exp/${model_dir}/ddp_init |
| | |
| | | --data_dir ${feats_dir}/data \ |
| | | --train_set ${train_set} \ |
| | | --valid_set ${valid_set} \ |
| | | --cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \ |
| | | --speed_perturb ${speed_perturb} \ |
| | | --resume true \ |
| | | --output_dir ${exp_dir}/exp/${model_dir} \ |
| | | --config $asr_config \ |
| | | --input_size $feats_dim \ |
| | | --ngpu $gpu_num \ |
| | | --num_worker_count $count \ |
| | | --multiprocessing_distributed true \ |
| | |
| | | fi |
| | | |
| | | # Testing Stage |
| | | if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then |
| | | echo "stage 4: Inference" |
| | | if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then |
| | | echo "stage 5: Inference" |
| | | for dset in ${test_sets}; do |
| | | asr_exp=${exp_dir}/exp/${model_dir} |
| | | inference_tag="$(basename "${inference_config}" .yaml)" |
| | |
| | | exit 0 |
| | | fi |
| | | mkdir -p "${_logdir}" |
| | | _data="${feats_dir}/${dumpdir}/${dset}" |
| | | _data="${feats_dir}/data/${dset}" |
| | | key_file=${_data}/${scp} |
| | | num_scp_file="$(<${key_file} wc -l)" |
| | | _nj=$([ $inference_nj -le $num_scp_file ] && echo "$inference_nj" || echo "$num_scp_file") |