嘉渊
2023-05-12 57b47cb8261923baf056edb88aa3f0521e81a33c
update repo
1个文件已修改
6 ■■■■ 已修改文件
egs/aishell2/conformer/local/prepare_data.sh 6 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs/aishell2/conformer/local/prepare_data.sh
@@ -34,14 +34,14 @@
# validate utt-key list, IC0803W0380 is a bad utterance
awk '{print $1}' $corpus/wav.scp | grep -v 'IC0803W0380' > $tmp/wav_utt.list
awk '{print $1}' $corpus/trans.txt > $tmp/trans_utt.list
tools/filter_scp.pl -f 1 $tmp/wav_utt.list $tmp/trans_utt.list > $tmp/utt.list
utils/filter_scp.pl -f 1 $tmp/wav_utt.list $tmp/trans_utt.list > $tmp/utt.list
# wav.scp
awk -F'\t' -v path_prefix=$corpus '{printf("%s\t%s/%s\n",$1,path_prefix,$2)}' $corpus/wav.scp > $tmp/tmp_wav.scp
tools/filter_scp.pl -f 1 $tmp/utt.list $tmp/tmp_wav.scp | sort -k 1 | uniq > $tmp/wav.scp
utils/filter_scp.pl -f 1 $tmp/utt.list $tmp/tmp_wav.scp | sort -k 1 | uniq > $tmp/wav.scp
# text
tools/filter_scp.pl -f 1 $tmp/utt.list $corpus/trans.txt | sort -k 1 | uniq > $tmp/text
utils/filter_scp.pl -f 1 $tmp/utt.list $corpus/trans.txt | sort -k 1 | uniq > $tmp/text
# copy prepared resources from tmp_dir to target dir
mkdir -p $dir