| | |
| | | --output_dir $output_dir \ |
| | | --index JOB |
| | | mkdir -p ${data_dir}/ark_data/dump/simu_data/data/$dataset |
| | | python local/gen_feats_scp.py \ |
| | | --root_path ${data_dir}/ark_data/dump/simu_data/$dataset \ |
| | | --out_path ${data_dir}/ark_data/dump/simu_data/data/$dataset \ |
| | | --split_num $nj |
| | | cat ${data_dir}/ark_data/dump/simu_data/$dataset/feature.scp.* > ${data_dir}/ark_data/dump/simu_data/data/$dataset/feature.scp |
| | | cat ${data_dir}/ark_data/dump/simu_data/$dataset/label.scp.* > ${data_dir}/ark_data/dump/simu_data/data/$dataset/label.scp |
| | | paste -d" " ${data_dir}/ark_data/dump/simu_data/data/$dataset/feature.scp <(cut -f2 -d" " ${data_dir}/ark_data/dump/simu_data/data/$dataset/label.scp) > ${data_dir}/ark_data/dump/simu_data/data/$dataset/feats.scp |
| | | grep "ns2" ${data_dir}/ark_data/dump/simu_data/data/$dataset/feats.scp > ${data_dir}/ark_data/dump/simu_data/data/$dataset/feats_2spkr.scp |
| | | # for chunk_size=2000 |
| | | output_dir=${data_dir}/ark_data/dump/simu_data_chunk2000/$dataset |
| | |
| | | --index JOB \ |
| | | --num_frames 2000 |
| | | mkdir -p ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset |
| | | python local/gen_feats_scp.py \ |
| | | --root_path ${data_dir}/ark_data/dump/simu_data_chunk2000/$dataset \ |
| | | --out_path ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset \ |
| | | --split_num $nj |
| | | grep "ns2" ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset/feats.scp > ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset/feats_2spkr.scp |
| | | cat ${data_dir}/ark_data/dump/simu_data_chunk2000/$dataset/feature.scp.* > ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset/feature.scp |
| | | cat ${data_dir}/ark_data/dump/simu_data_chunk2000/$dataset/label.scp.* > ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset/label.scp |
| | | paste -d" " ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset/feature.scp <(cut -f2 -d" " ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset/label.scp) > ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset/feats.scp |
| | | done |
| | | |
| | | # for callhome data |
| | | for dset in callhome1_spkall callhome2_spkall; do |
| | | find $data_dir/eval/$dset -maxdepth 1 -type f -exec cp {} {}.1 \; |
| | | output_dir=${data_dir}/ark_data/dump/callhome/$dset |
| | | output_dir=${data_dir}/ark_data/dump/callhome_chunk2000/$dset |
| | | mkdir -p $output_dir |
| | | python local/dump_feature.py \ |
| | | --data_dir $data_dir/eval/$dset \ |
| | | --output_dir $output_dir \ |
| | | --index 1 \ |
| | | --num_frames 2000 |
| | | mkdir -p ${data_dir}/ark_data/dump/callhome/data/$dset |
| | | python local/gen_feats_scp.py \ |
| | | --root_path ${data_dir}/ark_data/dump/callhome/$dset \ |
| | | --out_path ${data_dir}/ark_data/dump/callhome/data/$dset \ |
| | | --split_num 1 |
| | | mkdir -p ${data_dir}/ark_data/dump/callhome_chunk2000/data/$dset |
| | | paste -d" " ${data_dir}/ark_data/dump/callhome_chunk2000/$dset/feature.scp.1 <(cut -f2 -d" " ${data_dir}/ark_data/dump/callhome_chunk2000/$dset/label.scp.1) > ${data_dir}/ark_data/dump/callhome_chunk2000/data/$dset/feats.scp |
| | | done |
| | | fi |
| | | |
| | |
| | | python local/model_averaging.py ${exp_dir}/exp/${simu_allspkr_model_dir}/$simu_allspkr_ave_id.pb $models |
| | | fi |
| | | |
| | | # Training on simulated all-speaker data with chunk_size=2000 |
| | | # Training on simulated all-speaker data with chunk_size 2000 |
| | | world_size=$gpu_num |
| | | if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then |
| | | echo "stage 3: Training on simulated all-speaker data with chunk_size=2000" |
| | | echo "stage 3: Training on simulated all-speaker data with chunk_size 2000" |
| | | mkdir -p ${exp_dir}/exp/${simu_allspkr_chunk2000_model_dir} |
| | | mkdir -p ${exp_dir}/exp/${simu_allspkr_chunk2000_model_dir}/log |
| | | INIT_FILE=${exp_dir}/exp/${simu_allspkr_chunk2000_model_dir}/ddp_init |
| | |
| | | wait |
| | | fi |
| | | |
| | | # Training on callhome all-speaker data with chunk_size=2000 |
| | | # Training on callhome all-speaker data with chunk_size 2000 |
| | | world_size=$gpu_num |
| | | callhome_ave_id=avg${callhome_average_start}-${callhome_average_end} |
| | | if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then |
| | | echo "stage 4: Training on callhome all-speaker data with chunk_size=2000" |
| | | echo "stage 4: Training on callhome all-speaker data with chunk_size 2000" |
| | | mkdir -p ${exp_dir}/exp/${callhome_model_dir} |
| | | mkdir -p ${exp_dir}/exp/${callhome_model_dir}/log |
| | | INIT_FILE=${exp_dir}/exp/${callhome_model_dir}/ddp_init |
| | |
| | | --wav_scp_file $data_dir/eval/callhome2_spkall/wav.scp \ |
| | | 1> ${exp_dir}/exp/${callhome_model_dir}/inference/log/infer.log 2>&1 |
| | | md-eval.pl -c 0.25 \ |
| | | -r ${callhome_feats_dir_chunk2000}/${callhome_valid_dataset}/rttm \ |
| | | -r ${data_dir}/eval/${callhome_valid_dataset}/rttm \ |
| | | -s ${exp_dir}/exp/${callhome_model_dir}/inference/rttm > ${exp_dir}/exp/${callhome_model_dir}/inference/result_med11_collar0.25 2>/dev/null || exit |
| | | fi |