#!/usr/bin/env bash # Copyright 2020 Jiawen Kang # Apache 2.0. # # This script prepares the CN-Celeb2 dataset. if [ $# != 2 ]; then echo "Usage: make_cnceleb2.sh " echo "E.g.: make_cnceleb2.sh /export/corpora/CN-Celeb2 data" exit 1 fi in_dir=$1 out_dir=$2 # Prepare the cnceleb2 training data this_out_dir=${out_dir} mkdir -p $this_out_dir 2>/dev/null WAVFILE=$this_out_dir/wav.scp SPKFILE=$this_out_dir/utt2spk rm $WAVFILE $SPKFILE 2>/dev/null this_in_dir=${in_dir} for spkr_id in `cat $this_in_dir/spk.lst`; do for f in $in_dir/data/$spkr_id/*.wav; do wav_id=$(basename $f | sed s:.wav$::) echo "${spkr_id}-${wav_id} $f" >> $WAVFILE echo "${spkr_id}-${wav_id} ${spkr_id}" >> $SPKFILE done done # utils/fix_data_dir.sh $this_out_dir