| | |
| | | token_type=bpe |
| | | type=sound |
| | | scp=wav.scp |
| | | speed_perturb="0.9 1.0 1.1" |
| | | stage=3 |
| | | stop_stage=4 |
| | | |
| | |
| | | for x in dev-clean dev-other test-clean test-other train-clean-100; do |
| | | local/data_prep.sh ${raw_data}/LibriSpeech/${x} ${feats_dir}/data/${x//-/_} |
| | | done |
| | | mkdir $feats_dir/data/$valid_set |
| | | dev_sets="dev_clean dev_other" |
| | | for file in wav.scp text; do |
| | | ( for f in $dev_sets; do cat $feats_dir/data/$f/$file; done ) | sort -k1 > $feats_dir/data/$valid_set/$file || exit 1; |
| | | done |
| | | fi |
| | | |
| | | if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then |
| | |
| | | --data_dir ${feats_dir}/data \ |
| | | --train_set ${train_set} \ |
| | | --valid_set ${valid_set} \ |
| | | --cmvn_file ${feats_dir}/data/${train_set}/cmvn/cmvn.mvn \ |
| | | --speed_perturb ${speed_perturb} \ |
| | | --resume true \ |
| | | --output_dir ${exp_dir}/exp/${model_dir} \ |
| | | --config $asr_config \ |
| | | --input_size $feats_dim \ |
| | | --ngpu $gpu_num \ |
| | | --num_worker_count $count \ |
| | | --multiprocessing_distributed true \ |
| | |
| | | exit 0 |
| | | fi |
| | | mkdir -p "${_logdir}" |
| | | _data="${feats_dir}/${dumpdir}/${dset}" |
| | | _data="${feats_dir}/data/${dset}" |
| | | key_file=${_data}/${scp} |
| | | num_scp_file="$(<${key_file} wc -l)" |
| | | _nj=$([ $inference_nj -le $num_scp_file ] && echo "$inference_nj" || echo "$num_scp_file") |