From 559cc2c6e296bc80917a7408911f671dfcc2b68b Mon Sep 17 00:00:00 2001
From: 嘉渊 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期五, 12 五月 2023 17:25:54 +0800
Subject: [PATCH] update repo
---
egs/aishell2/transformer/utils/subset_data_dir_tr_cv.sh | 30 ++++++++++++++++++++++++++++++
1 files changed, 30 insertions(+), 0 deletions(-)
diff --git a/egs/aishell2/transformer/utils/subset_data_dir_tr_cv.sh b/egs/aishell2/transformer/utils/subset_data_dir_tr_cv.sh
new file mode 100755
index 0000000..e16cebd
--- /dev/null
+++ b/egs/aishell2/transformer/utils/subset_data_dir_tr_cv.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+
+dev_num_utt=1000
+
+echo "$0 $@"
+. utils/parse_options.sh || exit 1;
+
+train_data=$1
+out_dir=$2
+
+[ ! -f ${train_data}/wav.scp ] && echo "$0: no such file ${train_data}/wav.scp" && exit 1;
+[ ! -f ${train_data}/text ] && echo "$0: no such file ${train_data}/text" && exit 1;
+
+mkdir -p ${out_dir}/train && mkdir -p ${out_dir}/dev
+
+cp ${train_data}/wav.scp ${out_dir}/train/wav.scp.bak
+cp ${train_data}/text ${out_dir}/train/text.bak
+
+num_utt=$(wc -l <${out_dir}/train/wav.scp.bak)
+
+utils/shuffle_list.pl --srand 1 ${out_dir}/train/wav.scp.bak > ${out_dir}/train/wav.scp.shuf
+head -n ${dev_num_utt} ${out_dir}/train/wav.scp.shuf > ${out_dir}/dev/wav.scp
+tail -n $((${num_utt}-${dev_num_utt})) ${out_dir}/train/wav.scp.shuf > ${out_dir}/train/wav.scp
+
+utils/shuffle_list.pl --srand 1 ${out_dir}/train/text.bak > ${out_dir}/train/text.shuf
+head -n ${dev_num_utt} ${out_dir}/train/text.shuf > ${out_dir}/dev/text
+tail -n $((${num_utt}-${dev_num_utt})) ${out_dir}/train/text.shuf > ${out_dir}/train/text
+
+rm ${out_dir}/train/wav.scp.bak ${out_dir}/train/text.bak
+rm ${out_dir}/train/wav.scp.shuf ${out_dir}/train/text.shuf
--
Gitblit v1.9.1