| | |
| | | if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then |
| | | log "stage 1:process alimeeting near dir" |
| | | |
| | | find -L $near_raw_dir/audio_dir -iname "*.wav" > $near_dir/wavlist |
| | | find -L $near_raw_dir/audio_dir -iname "*.wav" | sort > $near_dir/wavlist |
| | | awk -F '/' '{print $NF}' $near_dir/wavlist | awk -F '.' '{print $1}' > $near_dir/uttid |
| | | find -L $near_raw_dir/textgrid_dir -iname "*.TextGrid" > $near_dir/textgrid.flist |
| | | find -L $near_raw_dir/textgrid_dir -iname "*.TextGrid" | sort > $near_dir/textgrid.flist |
| | | n1_wav=$(wc -l < $near_dir/wavlist) |
| | | n2_text=$(wc -l < $near_dir/textgrid.flist) |
| | | log near file found $n1_wav wav and $n2_text text. |
| | |
| | | utils/filter_scp.pl -f 1 $near_dir/text $near_dir/utt2spk_all | sort -u > $near_dir/utt2spk |
| | | #sed -e 's/ [a-z,A-Z,_,0-9,-]\+SPK/ SPK/' $near_dir/utt2spk_old >$near_dir/tmp1 |
| | | #sed -e 's/-[a-z,A-Z,0-9]\+$//' $near_dir/tmp1 | sort -u > $near_dir/utt2spk |
| | | utils/utt2spk_to_spk2utt.pl $near_dir/utt2spk > $near_dir/spk2utt |
| | | local/utt2spk_to_spk2utt.pl $near_dir/utt2spk > $near_dir/spk2utt |
| | | utils/filter_scp.pl -f 1 $near_dir/text $near_dir/segments_all | sort -u > $near_dir/segments |
| | | sed -e 's/ $//g' $near_dir/text> $near_dir/tmp1 |
| | | sed -e 's/!//g' $near_dir/tmp1> $near_dir/tmp2 |
| | |
| | | if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then |
| | | log "stage 2:process alimeeting far dir" |
| | | |
| | | find -L $far_raw_dir/audio_dir -iname "*.wav" > $far_dir/wavlist |
| | | find -L $far_raw_dir/audio_dir -iname "*.wav" | sort > $far_dir/wavlist |
| | | awk -F '/' '{print $NF}' $far_dir/wavlist | awk -F '.' '{print $1}' > $far_dir/uttid |
| | | find -L $far_raw_dir/textgrid_dir -iname "*.TextGrid" > $far_dir/textgrid.flist |
| | | find -L $far_raw_dir/textgrid_dir -iname "*.TextGrid" | sort > $far_dir/textgrid.flist |
| | | n1_wav=$(wc -l < $far_dir/wavlist) |
| | | n2_text=$(wc -l < $far_dir/textgrid.flist) |
| | | log far file found $n1_wav wav and $n2_text text. |
| | |
| | | utils/filter_scp.pl -f 1 $far_dir/text $far_dir/utt2spk_all | sort -u > $far_dir/utt2spk |
| | | #sed -e 's/ [a-z,A-Z,_,0-9,-]\+SPK/ SPK/' $far_dir/utt2spk_old >$far_dir/utt2spk |
| | | |
| | | utils/utt2spk_to_spk2utt.pl $far_dir/utt2spk > $far_dir/spk2utt |
| | | local/utt2spk_to_spk2utt.pl $far_dir/utt2spk > $far_dir/spk2utt |
| | | utils/filter_scp.pl -f 1 $far_dir/text $far_dir/segments_all | sort -u > $far_dir/segments |
| | | sed -e 's/SRC/$/g' $far_dir/text> $far_dir/tmp1 |
| | | sed -e 's/ $//g' $far_dir/tmp1> $far_dir/tmp2 |
| | |
| | | |
| | | if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then |
| | | log "stage 3: finali data process" |
| | | |
| | | utils/copy_data_dir.sh $near_dir data/${tgt}_Ali_near |
| | | utils/copy_data_dir.sh $far_dir data/${tgt}_Ali_far |
| | | local/fix_data_dir.sh $near_dir |
| | | local/fix_data_dir.sh $far_dir |
| | | local/copy_data_dir.sh $near_dir data/${tgt}_Ali_near |
| | | local/copy_data_dir.sh $far_dir data/${tgt}_Ali_far |
| | | |
| | | sort $far_dir/utt2spk_all_fifo > data/${tgt}_Ali_far/utt2spk_all_fifo |
| | | sed -i "s/src/$/g" data/${tgt}_Ali_far/utt2spk_all_fifo |
| | |
| | | python local/process_textgrid_to_single_speaker_wav.py --path $far_single_speaker_dir |
| | | |
| | | cp $far_single_speaker_dir/utt2spk $far_single_speaker_dir/text |
| | | utils/utt2spk_to_spk2utt.pl $far_single_speaker_dir/utt2spk > $far_single_speaker_dir/spk2utt |
| | | local/utt2spk_to_spk2utt.pl $far_single_speaker_dir/utt2spk > $far_single_speaker_dir/spk2utt |
| | | |
| | | ./utils/fix_data_dir.sh $far_single_speaker_dir |
| | | utils/copy_data_dir.sh $far_single_speaker_dir data/${tgt}_Ali_far_single_speaker |
| | | ./local/fix_data_dir.sh $far_single_speaker_dir |
| | | local/copy_data_dir.sh $far_single_speaker_dir data/${tgt}_Ali_far_single_speaker |
| | | |
| | | # remove space in text |
| | | for x in ${tgt}_Ali_far_single_speaker; do |