From 0acf868dbb02642348e3addfec8e56974facfc2f Mon Sep 17 00:00:00 2001
From: hnluo <haoneng.lhn@alibaba-inc.com>
Date: 星期一, 29 五月 2023 10:40:21 +0800
Subject: [PATCH] Merge pull request #558 from alibaba-damo-academy/dev_wjm2

---
 egs/aishell/data2vec_transformer_finetune/run.sh                 |    8 
 egs/wenetspeech/conformer/conf/train_asr_conformer.yaml          |  104 +++++
 egs/aishell/paraformerbert/run.sh                                |    8 
 egs/wenetspeech/conformer/local/wenetspeech_data_prep.sh         |  135 ++++++
 egs/wenetspeech/conformer/local/extract_meta.py                  |  114 +++++
 egs/wenetspeech/conformer/local/path.sh                          |    0 
 egs/aishell2/paraformerbert/run.sh                               |    8 
 egs/aishell/conformer/run.sh                                     |    8 
 egs/aishell/transformer/run.sh                                   |    8 
 egs/aishell/transformer/utils/compute_cmvn.sh                    |    6 
 egs/aishell2/transformerLM/utils                                 |    1 
 egs/wenetspeech/conformer/utils                                  |    1 
 egs/aishell2/data2vec_pretrain/run.sh                            |    2 
 docs/academic_recipe/asr_recipe.md                               |   17 
 egs/wenetspeech/conformer/conf/decode_asr_transformer_5beam.yaml |    6 
 egs/wenetspeech/conformer/local/process_opus.py                  |   88 ++++
 egs/wenetspeech/conformer/run.sh                                 |  223 +++++++++++
 egs/aishell/transformer/utils/compute_cmvn.py                    |    4 
 egs/wenetspeech/conformer/local/data.sh                          |  102 +++++
 egs/aishell2/paraformerbert/utils                                |    2 
 egs/aishell2/conformer/utils                                     |    2 
 egs/aishell2/conformer/run.sh                                    |    8 
 egs/librispeech/conformer/run.sh                                 |    2 
 egs/librispeech_100h/conformer/run.sh                            |    4 
 egs/wenetspeech/conformer/local/text_normalize.pl                |   24 +
 funasr/datasets/large_datasets/dataset.py                        |    6 
 /dev/null                                                        |  246 ------------
 funasr/utils/prepare_data.py                                     |    2 
 egs/aishell/data2vec_paraformer_finetune/run.sh                  |    8 
 egs/aishell2/paraformer/utils                                    |    2 
 egs/aishell2/transformer/run.sh                                  |    8 
 egs/wenetspeech/conformer/path.sh                                |    5 
 egs/aishell/paraformer/run.sh                                    |    8 
 egs/aishell2/transformer/utils                                   |    1 
 docs/academic_recipe/images/loss.png                             |    0 
 egs/aishell2/paraformer/run.sh                                   |    8 
 36 files changed, 873 insertions(+), 306 deletions(-)

diff --git a/docs/academic_recipe/asr_recipe.md b/docs/academic_recipe/asr_recipe.md
index 4e8f072..e5a7f02 100644
--- a/docs/academic_recipe/asr_recipe.md
+++ b/docs/academic_recipe/asr_recipe.md
@@ -12,7 +12,7 @@
 Then you can directly start the recipe as follows:
 ```sh
 conda activate funasr
-. ./run.sh
+. ./run.sh --CUDA_VISIBLE_DEVICES="0,1" --gpu_num=2
 ```
 
 The training log files are saved in `${exp_dir}/exp/${model_dir}/log/train.log.*`锛� which can be viewed using the following command:
@@ -26,15 +26,18 @@
 ... 1epoch:train:801-850batch:850num_updates: ... loss_ctc=107.890, loss_att=87.832, acc=0.029, loss_pre=1.702 ...
 ```
 
-Also, users can use tensorboard to observe these training information by the following command:
-```sh
-tensorboard --logdir ${exp_dir}/exp/${model_dir}/tensorboard/train
-```
-
 At the end of each epoch, the evaluation metrics are calculated on the validation set, like follows:
 ```text
 ... [valid] loss_ctc=99.914, cer_ctc=1.000, loss_att=80.512, acc=0.029, cer=0.971, wer=1.000, loss_pre=1.952, loss=88.285 ...
 ```
+
+Also, users can use tensorboard to observe these training information by the following command:
+```sh
+tensorboard --logdir ${exp_dir}/exp/${model_dir}/tensorboard/train
+```
+Here is an example of loss:
+
+<img src="images/loss.png" width="200"/>
 
 The inference results are saved in `${exp_dir}/exp/${model_dir}/decode_asr_*/$dset`. The main two files are `text.cer` and `text.cer.txt`. `text.cer` saves the comparison between the recognized text and the reference text, like follows:
 ```text
@@ -177,7 +180,7 @@
 encoder: conformer
 encoder_conf:
     output_size: 256    # dimension of attention
-    attention_heads: 4  # number of heads in multi-head attention
+    attention_heads: 4  # the number of heads in multi-head attention
     linear_units: 2048  # the number of units of position-wise feed forward
     num_blocks: 12      # the number of encoder blocks
     dropout_rate: 0.1
diff --git a/docs/academic_recipe/images/loss.png b/docs/academic_recipe/images/loss.png
new file mode 100644
index 0000000..f559864
--- /dev/null
+++ b/docs/academic_recipe/images/loss.png
Binary files differ
diff --git a/egs/aishell/conformer/run.sh b/egs/aishell/conformer/run.sh
index 633e697..e8643e9 100755
--- a/egs/aishell/conformer/run.sh
+++ b/egs/aishell/conformer/run.sh
@@ -85,14 +85,14 @@
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     echo "stage 1: Feature and CMVN Generation"
-    utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0
+    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
 fi
 
-token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
+token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
 echo "dictionary: ${token_list}"
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     echo "stage 2: Dictionary Preparation"
-    mkdir -p ${feats_dir}/data/${lang}_token_list/char/
+    mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/
 
     echo "make a dictionary"
     echo "<blank>" > ${token_list}
@@ -130,7 +130,7 @@
                 --task_name asr \
                 --gpu_id $gpu_id \
                 --use_preprocessor true \
-                --token_type char \
+                --token_type $token_type \
                 --token_list $token_list \
                 --data_dir ${feats_dir}/data \
                 --train_set ${train_set} \
diff --git a/egs/aishell/data2vec_paraformer_finetune/run.sh b/egs/aishell/data2vec_paraformer_finetune/run.sh
index 694191a..1f96873 100755
--- a/egs/aishell/data2vec_paraformer_finetune/run.sh
+++ b/egs/aishell/data2vec_paraformer_finetune/run.sh
@@ -88,14 +88,14 @@
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     echo "stage 1: Feature and CMVN Generation"
-    utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0
+    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
 fi
 
-token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
+token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
 echo "dictionary: ${token_list}"
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     echo "stage 2: Dictionary Preparation"
-    mkdir -p ${feats_dir}/data/${lang}_token_list/char/
+    mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/
 
     echo "make a dictionary"
     echo "<blank>" > ${token_list}
@@ -134,7 +134,7 @@
                 --task_name asr \
                 --gpu_id $gpu_id \
                 --use_preprocessor true \
-                --token_type char \
+                --token_type $token_type \
                 --token_list $token_list \
                 --data_dir ${feats_dir}/data \
                 --train_set ${train_set} \
diff --git a/egs/aishell/data2vec_transformer_finetune/run.sh b/egs/aishell/data2vec_transformer_finetune/run.sh
index 27ba90c..7b01a5f 100755
--- a/egs/aishell/data2vec_transformer_finetune/run.sh
+++ b/egs/aishell/data2vec_transformer_finetune/run.sh
@@ -88,14 +88,14 @@
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     echo "stage 1: Feature and CMVN Generation"
-    utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0
+    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
 fi
 
-token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
+token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
 echo "dictionary: ${token_list}"
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     echo "stage 2: Dictionary Preparation"
-    mkdir -p ${feats_dir}/data/${lang}_token_list/char/
+    mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/
 
     echo "make a dictionary"
     echo "<blank>" > ${token_list}
@@ -134,7 +134,7 @@
                 --task_name asr \
                 --gpu_id $gpu_id \
                 --use_preprocessor true \
-                --token_type char \
+                --token_type $token_type \
                 --token_list $token_list \
                 --data_dir ${feats_dir}/data \
                 --train_set ${train_set} \
diff --git a/egs/aishell/paraformer/run.sh b/egs/aishell/paraformer/run.sh
index b7e3a82..7d79211 100755
--- a/egs/aishell/paraformer/run.sh
+++ b/egs/aishell/paraformer/run.sh
@@ -85,14 +85,14 @@
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     echo "stage 1: Feature and CMVN Generation"
-    utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0
+    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
 fi
 
-token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
+token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
 echo "dictionary: ${token_list}"
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     echo "stage 2: Dictionary Preparation"
-    mkdir -p ${feats_dir}/data/${lang}_token_list/char/
+    mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/
    
     echo "make a dictionary"
     echo "<blank>" > ${token_list}
@@ -130,7 +130,7 @@
                 --task_name asr \
                 --gpu_id $gpu_id \
                 --use_preprocessor true \
-                --token_type char \
+                --token_type $token_type \
                 --token_list $token_list \
                 --dataset_type small \
                 --data_dir ${feats_dir}/data \
diff --git a/egs/aishell/paraformerbert/run.sh b/egs/aishell/paraformerbert/run.sh
index 8a614e3..efc831f 100755
--- a/egs/aishell/paraformerbert/run.sh
+++ b/egs/aishell/paraformerbert/run.sh
@@ -89,14 +89,14 @@
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     echo "stage 1: Feature and CMVN Generation"
-    utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0
+    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
 fi
 
-token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
+token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
 echo "dictionary: ${token_list}"
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     echo "stage 2: Dictionary Preparation"
-    mkdir -p ${feats_dir}/data/${lang}_token_list/char/
+    mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/
    
     echo "make a dictionary"
     echo "<blank>" > ${token_list}
@@ -141,7 +141,7 @@
                 --task_name asr \
                 --gpu_id $gpu_id \
                 --use_preprocessor true \
-                --token_type char \
+                --token_type $token_type \
                 --token_list $token_list \
                 --data_dir ${feats_dir}/data \
                 --train_set ${train_set} \
diff --git a/egs/aishell/transformer/run.sh b/egs/aishell/transformer/run.sh
index 38343ae..e492fb2 100755
--- a/egs/aishell/transformer/run.sh
+++ b/egs/aishell/transformer/run.sh
@@ -85,14 +85,14 @@
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     echo "stage 1: Feature and CMVN Generation"
-    utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0
+    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
 fi
 
-token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
+token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
 echo "dictionary: ${token_list}"
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     echo "stage 2: Dictionary Preparation"
-    mkdir -p ${feats_dir}/data/${lang}_token_list/char/
+    mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/
 
     echo "make a dictionary"
     echo "<blank>" > ${token_list}
@@ -130,7 +130,7 @@
                 --task_name asr \
                 --gpu_id $gpu_id \
                 --use_preprocessor true \
-                --token_type char \
+                --token_type $token_type \
                 --token_list $token_list \
                 --data_dir ${feats_dir}/data \
                 --train_set ${train_set} \
diff --git a/egs/aishell/transformer/utils/compute_cmvn.py b/egs/aishell/transformer/utils/compute_cmvn.py
index 6c9b445..4986a5a 100755
--- a/egs/aishell/transformer/utils/compute_cmvn.py
+++ b/egs/aishell/transformer/utils/compute_cmvn.py
@@ -27,7 +27,7 @@
         help="the path of wav scps",
     )
     parser.add_argument(
-        "--config",
+        "--config_file",
         type=str,
         help="the config file for computing cmvn",
     )
@@ -89,7 +89,7 @@
     #         var_stats += np.sum(np.square(mat), axis=0)
     #         total_frames += mat.shape[0]
 
-    with open(args.config) as f:
+    with open(args.config_file) as f:
         configs = yaml.safe_load(f)
         frontend_configs = configs.get("frontend_conf", {})
         num_mel_bins = frontend_configs.get("n_mels", 80)
diff --git a/egs/aishell/transformer/utils/compute_cmvn.sh b/egs/aishell/transformer/utils/compute_cmvn.sh
index ad8813d..4e7f80e 100755
--- a/egs/aishell/transformer/utils/compute_cmvn.sh
+++ b/egs/aishell/transformer/utils/compute_cmvn.sh
@@ -2,11 +2,11 @@
 
 . ./path.sh || exit 1;
 # Begin configuration section.
-fbankdir=$1
+fbankdir=
 nj=32
 cmd=./utils/run.pl
 feats_dim=80
-config=
+config_file=
 scale=1.0
 
 echo "$0 $@"
@@ -29,7 +29,7 @@
     python utils/compute_cmvn.py \
       --dim ${feats_dim} \
       --wav_path $split_dir \
-      --config $config \
+      --config_file $config_file \
       --idx JOB \
 
 python utils/combine_cmvn_file.py --dim ${feats_dim} --cmvn_dir $split_dir --nj $nj --output_dir ${fbankdir}/cmvn
diff --git a/egs/aishell2/conformer/run.sh b/egs/aishell2/conformer/run.sh
index b5e8db1..193c4a3 100755
--- a/egs/aishell2/conformer/run.sh
+++ b/egs/aishell2/conformer/run.sh
@@ -87,14 +87,14 @@
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     echo "stage 1: Feature and CMVN Generation"
-    utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0
+    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
 fi
 
-token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
+token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
 echo "dictionary: ${token_list}"
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     echo "stage 2: Dictionary Preparation"
-    mkdir -p ${feats_dir}/data/${lang}_token_list/char/
+    mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/
    
     echo "make a dictionary"
     echo "<blank>" > ${token_list}
@@ -132,7 +132,7 @@
                 --task_name asr \
                 --gpu_id $gpu_id \
                 --use_preprocessor true \
-                --token_type char \
+                --token_type $token_type \
                 --token_list $token_list \
                 --data_dir ${feats_dir}/data \
                 --train_set ${train_set} \
diff --git a/egs/aishell2/conformer/utils b/egs/aishell2/conformer/utils
index 0853fce..fe070dd 120000
--- a/egs/aishell2/conformer/utils
+++ b/egs/aishell2/conformer/utils
@@ -1 +1 @@
-../transformer/utils/
\ No newline at end of file
+../../aishell/transformer/utils
\ No newline at end of file
diff --git a/egs/aishell2/data2vec_pretrain/run.sh b/egs/aishell2/data2vec_pretrain/run.sh
index e741919..f07deb5 100755
--- a/egs/aishell2/data2vec_pretrain/run.sh
+++ b/egs/aishell2/data2vec_pretrain/run.sh
@@ -66,7 +66,7 @@
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     echo "stage 1: Feature and CMVN Generation"
-    utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0
+    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
 fi
 
 token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
diff --git a/egs/aishell2/paraformer/run.sh b/egs/aishell2/paraformer/run.sh
index cf3ceb2..4268cf6 100755
--- a/egs/aishell2/paraformer/run.sh
+++ b/egs/aishell2/paraformer/run.sh
@@ -87,14 +87,14 @@
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     echo "stage 1: Feature and CMVN Generation"
-    utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0
+    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
 fi
 
-token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
+token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
 echo "dictionary: ${token_list}"
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     echo "stage 2: Dictionary Preparation"
-    mkdir -p ${feats_dir}/data/${lang}_token_list/char/
+    mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/
 
     echo "make a dictionary"
     echo "<blank>" > ${token_list}
@@ -132,7 +132,7 @@
                 --task_name asr \
                 --gpu_id $gpu_id \
                 --use_preprocessor true \
-                --token_type char \
+                --token_type $token_type \
                 --token_list $token_list \
                 --data_dir ${feats_dir}/data \
                 --train_set ${train_set} \
diff --git a/egs/aishell2/paraformer/utils b/egs/aishell2/paraformer/utils
index 0853fce..fe070dd 120000
--- a/egs/aishell2/paraformer/utils
+++ b/egs/aishell2/paraformer/utils
@@ -1 +1 @@
-../transformer/utils/
\ No newline at end of file
+../../aishell/transformer/utils
\ No newline at end of file
diff --git a/egs/aishell2/paraformerbert/run.sh b/egs/aishell2/paraformerbert/run.sh
index eee2912..548c4db 100755
--- a/egs/aishell2/paraformerbert/run.sh
+++ b/egs/aishell2/paraformerbert/run.sh
@@ -90,14 +90,14 @@
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     echo "stage 1: Feature and CMVN Generation"
-    utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0
+    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
 fi
 
-token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
+token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
 echo "dictionary: ${token_list}"
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     echo "stage 2: Dictionary Preparation"
-    mkdir -p ${feats_dir}/data/${lang}_token_list/char/
+    mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/
 
     echo "make a dictionary"
     echo "<blank>" > ${token_list}
@@ -142,7 +142,7 @@
                 --task_name asr \
                 --gpu_id $gpu_id \
                 --use_preprocessor true \
-                --token_type char \
+                --token_type $token_type \
                 --token_list $token_list \
                 --data_dir ${feats_dir}/data \
                 --train_set ${train_set} \
diff --git a/egs/aishell2/paraformerbert/utils b/egs/aishell2/paraformerbert/utils
index 0853fce..fe070dd 120000
--- a/egs/aishell2/paraformerbert/utils
+++ b/egs/aishell2/paraformerbert/utils
@@ -1 +1 @@
-../transformer/utils/
\ No newline at end of file
+../../aishell/transformer/utils
\ No newline at end of file
diff --git a/egs/aishell2/transformer/run.sh b/egs/aishell2/transformer/run.sh
index 895e403..22c3342 100755
--- a/egs/aishell2/transformer/run.sh
+++ b/egs/aishell2/transformer/run.sh
@@ -87,14 +87,14 @@
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     echo "stage 1: Feature and CMVN Generation"
-    utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0
+    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
 fi
 
-token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
+token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
 echo "dictionary: ${token_list}"
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     echo "stage 2: Dictionary Preparation"
-    mkdir -p ${feats_dir}/data/${lang}_token_list/char/
+    mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/
 
     echo "make a dictionary"
     echo "<blank>" > ${token_list}
@@ -132,7 +132,7 @@
                 --task_name asr \
                 --gpu_id $gpu_id \
                 --use_preprocessor true \
-                --token_type char \
+                --token_type $token_type \
                 --token_list $token_list \
                 --data_dir ${feats_dir}/data \
                 --train_set ${train_set} \
diff --git a/egs/aishell2/transformer/utils b/egs/aishell2/transformer/utils
new file mode 120000
index 0000000..fe070dd
--- /dev/null
+++ b/egs/aishell2/transformer/utils
@@ -0,0 +1 @@
+../../aishell/transformer/utils
\ No newline at end of file
diff --git a/egs/aishell2/transformer/utils/apply_cmvn.py b/egs/aishell2/transformer/utils/apply_cmvn.py
deleted file mode 100755
index b5c5086..0000000
--- a/egs/aishell2/transformer/utils/apply_cmvn.py
+++ /dev/null
@@ -1,79 +0,0 @@
-from kaldiio import ReadHelper
-from kaldiio import WriteHelper
-
-import argparse
-import json
-import math
-import numpy as np
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description="apply cmvn",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument(
-        "--ark-file",
-        "-a",
-        default=False,
-        required=True,
-        type=str,
-        help="fbank ark file",
-    )
-    parser.add_argument(
-        "--cmvn-file",
-        "-c",
-        default=False,
-        required=True,
-        type=str,
-        help="cmvn file",
-    )
-    parser.add_argument(
-        "--ark-index",
-        "-i",
-        default=1,
-        required=True,
-        type=int,
-        help="ark index",
-    )
-    parser.add_argument(
-        "--output-dir",
-        "-o",
-        default=False,
-        required=True,
-        type=str,
-        help="output dir",
-    )
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    ark_file = args.output_dir + "/feats." + str(args.ark_index) + ".ark"
-    scp_file = args.output_dir + "/feats." + str(args.ark_index) + ".scp"
-    ark_writer = WriteHelper('ark,scp:{},{}'.format(ark_file, scp_file))
-
-    with open(args.cmvn_file) as f:
-        cmvn_stats = json.load(f)
-
-    means = cmvn_stats['mean_stats']
-    vars = cmvn_stats['var_stats']
-    total_frames = cmvn_stats['total_frames']
-
-    for i in range(len(means)):
-        means[i] /= total_frames
-        vars[i] = vars[i] / total_frames - means[i] * means[i]
-        if vars[i] < 1.0e-20:
-            vars[i] = 1.0e-20
-        vars[i] = 1.0 / math.sqrt(vars[i])
-
-    with ReadHelper('ark:{}'.format(args.ark_file)) as ark_reader:
-        for key, mat in ark_reader:
-            mat = (mat - means) * vars
-            ark_writer(key, mat)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/egs/aishell2/transformer/utils/apply_cmvn.sh b/egs/aishell2/transformer/utils/apply_cmvn.sh
deleted file mode 100755
index f8fd1d1..0000000
--- a/egs/aishell2/transformer/utils/apply_cmvn.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env bash
-
-. ./path.sh || exit 1;
-# Begin configuration section.
-nj=32
-cmd=./utils/run.pl
-
-echo "$0 $@"
-
-. utils/parse_options.sh || exit 1;
-
-fbankdir=$1
-cmvn_file=$2
-logdir=$3
-output_dir=$4
-
-dump_dir=${output_dir}/ark; mkdir -p ${dump_dir}
-mkdir -p ${logdir}
-
-$cmd JOB=1:$nj $logdir/apply_cmvn.JOB.log \
-    python utils/apply_cmvn.py -a $fbankdir/ark/feats.JOB.ark \
-        -c $cmvn_file -i JOB -o ${dump_dir} \
-        || exit 1;
-
-for n in $(seq $nj); do
-    cat ${dump_dir}/feats.$n.scp || exit 1
-done > ${output_dir}/feats.scp || exit 1
-
-echo "$0: Succeeded apply cmvn"
diff --git a/egs/aishell2/transformer/utils/apply_lfr_and_cmvn.py b/egs/aishell2/transformer/utils/apply_lfr_and_cmvn.py
deleted file mode 100755
index 50d18d1..0000000
--- a/egs/aishell2/transformer/utils/apply_lfr_and_cmvn.py
+++ /dev/null
@@ -1,143 +0,0 @@
-from kaldiio import ReadHelper, WriteHelper
-
-import argparse
-import numpy as np
-
-
-def build_LFR_features(inputs, m=7, n=6):
-    LFR_inputs = []
-    T = inputs.shape[0]
-    T_lfr = int(np.ceil(T / n))
-    left_padding = np.tile(inputs[0], ((m - 1) // 2, 1))
-    inputs = np.vstack((left_padding, inputs))
-    T = T + (m - 1) // 2
-    for i in range(T_lfr):
-        if m <= T - i * n:
-            LFR_inputs.append(np.hstack(inputs[i * n:i * n + m]))
-        else:
-            num_padding = m - (T - i * n)
-            frame = np.hstack(inputs[i * n:])
-            for _ in range(num_padding):
-                frame = np.hstack((frame, inputs[-1]))
-            LFR_inputs.append(frame)
-    return np.vstack(LFR_inputs)
-
-
-def build_CMVN_features(inputs, mvn_file):  # noqa
-    with open(mvn_file, 'r', encoding='utf-8') as f:
-        lines = f.readlines()
-
-    add_shift_list = []
-    rescale_list = []
-    for i in range(len(lines)):
-        line_item = lines[i].split()
-        if line_item[0] == '<AddShift>':
-            line_item = lines[i + 1].split()
-            if line_item[0] == '<LearnRateCoef>':
-                add_shift_line = line_item[3:(len(line_item) - 1)]
-                add_shift_list = list(add_shift_line)
-                continue
-        elif line_item[0] == '<Rescale>':
-            line_item = lines[i + 1].split()
-            if line_item[0] == '<LearnRateCoef>':
-                rescale_line = line_item[3:(len(line_item) - 1)]
-                rescale_list = list(rescale_line)
-                continue
-
-    for j in range(inputs.shape[0]):
-        for k in range(inputs.shape[1]):
-            add_shift_value = add_shift_list[k]
-            rescale_value = rescale_list[k]
-            inputs[j, k] = float(inputs[j, k]) + float(add_shift_value)
-            inputs[j, k] = float(inputs[j, k]) * float(rescale_value)
-
-    return inputs
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description="apply low_frame_rate and cmvn",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument(
-        "--ark-file",
-        "-a",
-        default=False,
-        required=True,
-        type=str,
-        help="fbank ark file",
-    )
-    parser.add_argument(
-        "--lfr",
-        "-f",
-        default=True,
-        type=str,
-        help="low frame rate",
-    )
-    parser.add_argument(
-        "--lfr-m",
-        "-m",
-        default=7,
-        type=int,
-        help="number of frames to stack",
-    )
-    parser.add_argument(
-        "--lfr-n",
-        "-n",
-        default=6,
-        type=int,
-        help="number of frames to skip",
-    )
-    parser.add_argument(
-        "--cmvn-file",
-        "-c",
-        default=False,
-        required=True,
-        type=str,
-        help="global cmvn file",
-    )
-    parser.add_argument(
-        "--ark-index",
-        "-i",
-        default=1,
-        required=True,
-        type=int,
-        help="ark index",
-    )
-    parser.add_argument(
-        "--output-dir",
-        "-o",
-        default=False,
-        required=True,
-        type=str,
-        help="output dir",
-    )
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    dump_ark_file = args.output_dir + "/feats." + str(args.ark_index) + ".ark"
-    dump_scp_file = args.output_dir + "/feats." + str(args.ark_index) + ".scp"
-    shape_file = args.output_dir + "/len." + str(args.ark_index)
-    ark_writer = WriteHelper('ark,scp:{},{}'.format(dump_ark_file, dump_scp_file))
-
-    shape_writer = open(shape_file, 'w')
-    with ReadHelper('ark:{}'.format(args.ark_file)) as ark_reader:
-        for key, mat in ark_reader:
-            if args.lfr:
-                lfr = build_LFR_features(mat, args.lfr_m, args.lfr_n)
-            else:
-                lfr = mat
-            cmvn = build_CMVN_features(lfr, args.cmvn_file)
-            dims = cmvn.shape[1]
-            lens = cmvn.shape[0]
-            shape_writer.write(key + " " + str(lens) + "," + str(dims) + '\n')
-            ark_writer(key, cmvn)
-
-
-if __name__ == '__main__':
-    main()
-
diff --git a/egs/aishell2/transformer/utils/apply_lfr_and_cmvn.sh b/egs/aishell2/transformer/utils/apply_lfr_and_cmvn.sh
deleted file mode 100755
index 3119fdb..0000000
--- a/egs/aishell2/transformer/utils/apply_lfr_and_cmvn.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env bash
-
-
-# Begin configuration section.
-nj=32
-cmd=utils/run.pl
-
-# feature configuration
-lfr=True
-lfr_m=7
-lfr_n=6
-
-echo "$0 $@"
-
-. utils/parse_options.sh || exit 1;
-
-fbankdir=$1
-cmvn_file=$2
-logdir=$3
-output_dir=$4
-
-dump_dir=${output_dir}/ark; mkdir -p ${dump_dir}
-mkdir -p ${logdir}
-
-$cmd JOB=1:$nj $logdir/apply_lfr_and_cmvn.JOB.log \
-    python utils/apply_lfr_and_cmvn.py -a $fbankdir/ark/feats.JOB.ark \
-        -f $lfr -m $lfr_m -n $lfr_n -c $cmvn_file -i JOB -o ${dump_dir} \
-        || exit 1;
-
-for n in $(seq $nj); do
-    cat ${dump_dir}/feats.$n.scp || exit 1
-done > ${output_dir}/feats.scp || exit 1
-
-for n in $(seq $nj); do
-  cat ${dump_dir}/len.$n || exit 1
-done > ${output_dir}/speech_shape || exit 1
-
-echo "$0: Succeeded apply low frame rate and cmvn"
diff --git a/egs/aishell2/transformer/utils/cmvn_converter.py b/egs/aishell2/transformer/utils/cmvn_converter.py
deleted file mode 100644
index d405d12..0000000
--- a/egs/aishell2/transformer/utils/cmvn_converter.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import argparse
-import json
-import numpy as np
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description="cmvn converter",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument(
-        "--cmvn_json",
-        default=False,
-        required=True,
-        type=str,
-        help="cmvn json file",
-    )
-    parser.add_argument(
-        "--am_mvn",
-        default=False,
-        required=True,
-        type=str,
-        help="am mvn file",
-    )
-    return parser
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    with open(args.cmvn_json, "r") as fin:
-        cmvn_dict = json.load(fin)
-
-    mean_stats = np.array(cmvn_dict["mean_stats"])
-    var_stats = np.array(cmvn_dict["var_stats"])
-    total_frame = np.array(cmvn_dict["total_frames"])
-
-    mean = -1.0 * mean_stats / total_frame
-    var = 1.0 / np.sqrt(var_stats / total_frame - mean * mean)
-    dims = mean.shape[0]
-    with open(args.am_mvn, 'w') as fout:
-        fout.write("<Nnet>" + "\n" + "<Splice> " + str(dims) + " " + str(dims) + '\n' + "[ 0 ]" + "\n" + "<AddShift> " + str(dims) + " " + str(dims) + "\n")
-        mean_str = str(list(mean)).replace(',', '').replace('[', '[ ').replace(']', ' ]')
-        fout.write("<LearnRateCoef> 0 " + mean_str + '\n')
-        fout.write("<Rescale> " + str(dims) + " " + str(dims) + '\n')
-        var_str = str(list(var)).replace(',', '').replace('[', '[ ').replace(']', ' ]')
-        fout.write("<LearnRateCoef> 0 " + var_str + '\n')
-        fout.write("</Nnet>" + '\n')
-
-if __name__ == '__main__':
-    main()
diff --git a/egs/aishell2/transformer/utils/combine_cmvn_file.py b/egs/aishell2/transformer/utils/combine_cmvn_file.py
deleted file mode 100755
index c525973..0000000
--- a/egs/aishell2/transformer/utils/combine_cmvn_file.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import argparse
-import json
-import os
-
-import numpy as np
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description="combine cmvn file",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument(
-        "--dim",
-        default=80,
-        type=int,
-        help="feature dim",
-    )
-    parser.add_argument(
-        "--cmvn_dir",
-        default=False,
-        required=True,
-        type=str,
-        help="cmvn dir",
-    )
-
-    parser.add_argument(
-        "--nj",
-        default=1,
-        required=True,
-        type=int,
-        help="num of cmvn files",
-    )
-    parser.add_argument(
-        "--output_dir",
-        default=False,
-        required=True,
-        type=str,
-        help="output dir",
-    )
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    total_means = np.zeros(args.dim)
-    total_vars = np.zeros(args.dim)
-    total_frames = 0
-
-    cmvn_file = os.path.join(args.output_dir, "cmvn.json")
-
-    for i in range(1, args.nj + 1):
-        with open(os.path.join(args.cmvn_dir, "cmvn.{}.json".format(str(i)))) as fin:
-            cmvn_stats = json.load(fin)
-
-        total_means += np.array(cmvn_stats["mean_stats"])
-        total_vars += np.array(cmvn_stats["var_stats"])
-        total_frames += cmvn_stats["total_frames"]
-
-    cmvn_info = {
-        'mean_stats': list(total_means.tolist()),
-        'var_stats': list(total_vars.tolist()),
-        'total_frames': total_frames
-    }
-    with open(cmvn_file, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/egs/aishell2/transformer/utils/compute_cmvn.py b/egs/aishell2/transformer/utils/compute_cmvn.py
deleted file mode 100755
index 949cc08..0000000
--- a/egs/aishell2/transformer/utils/compute_cmvn.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import argparse
-import json
-import os
-
-import numpy as np
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description="computer global cmvn",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument(
-        "--dim",
-        default=80,
-        type=int,
-        help="feature dimension",
-    )
-    parser.add_argument(
-        "--wav_path",
-        default=False,
-        required=True,
-        type=str,
-        help="the path of wav scps",
-    )
-    parser.add_argument(
-        "--idx",
-        default=1,
-        required=True,
-        type=int,
-        help="index",
-    )
-    return parser
-
-
-def compute_fbank(wav_file,
-                  num_mel_bins=80,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0,
-                  resample_rate=16000,
-                  speed=1.0,
-                  window_type="hamming"):
-    waveform, sample_rate = torchaudio.load(wav_file)
-    if resample_rate != sample_rate:
-        waveform = torchaudio.transforms.Resample(orig_freq=sample_rate,
-                                                  new_freq=resample_rate)(waveform)
-    if speed != 1.0:
-        waveform, _ = torchaudio.sox_effects.apply_effects_tensor(
-            waveform, resample_rate,
-            [['speed', str(speed)], ['rate', str(resample_rate)]]
-        )
-
-    waveform = waveform * (1 << 15)
-    mat = kaldi.fbank(waveform,
-                      num_mel_bins=num_mel_bins,
-                      frame_length=frame_length,
-                      frame_shift=frame_shift,
-                      dither=dither,
-                      energy_floor=0.0,
-                      window_type=window_type,
-                      sample_frequency=resample_rate)
-
-    return mat.numpy()
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    wav_scp_file = os.path.join(args.wav_path, "wav.{}.scp".format(args.idx))
-    cmvn_file = os.path.join(args.wav_path, "cmvn.{}.json".format(args.idx))
-
-    mean_stats = np.zeros(args.dim)
-    var_stats = np.zeros(args.dim)
-    total_frames = 0
-
-    # with ReadHelper('ark:{}'.format(ark_file)) as ark_reader:
-    #     for key, mat in ark_reader:
-    #         mean_stats += np.sum(mat, axis=0)
-    #         var_stats += np.sum(np.square(mat), axis=0)
-    #         total_frames += mat.shape[0]
-    with open(wav_scp_file) as f:
-        lines = f.readlines()
-        for line in lines:
-            _, wav_file = line.strip().split()
-            fbank = compute_fbank(wav_file, num_mel_bins=args.dim)
-            mean_stats += np.sum(fbank, axis=0)
-            var_stats += np.sum(np.square(fbank), axis=0)
-            total_frames += fbank.shape[0]
-
-    cmvn_info = {
-        'mean_stats': list(mean_stats.tolist()),
-        'var_stats': list(var_stats.tolist()),
-        'total_frames': total_frames
-    }
-    with open(cmvn_file, 'w') as fout:
-        fout.write(json.dumps(cmvn_info))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/egs/aishell2/transformer/utils/compute_cmvn.sh b/egs/aishell2/transformer/utils/compute_cmvn.sh
deleted file mode 100755
index 75d88a2..0000000
--- a/egs/aishell2/transformer/utils/compute_cmvn.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env bash
-
-. ./path.sh || exit 1;
-# Begin configuration section.
-nj=32
-cmd=./utils/run.pl
-feats_dim=80
-
-echo "$0 $@"
-
-. utils/parse_options.sh || exit 1;
-
-fbankdir=$1
-
-split_dir=${fbankdir}/cmvn/split_${nj};
-mkdir -p $split_dir
-split_scps=""
-for n in $(seq $nj); do
-    split_scps="$split_scps $split_dir/wav.$n.scp"
-done
-utils/split_scp.pl ${fbankdir}/wav.scp $split_scps || exit 1;
-
-logdir=${fbankdir}/cmvn/log
-$cmd JOB=1:$nj $logdir/cmvn.JOB.log \
-    python utils/compute_cmvn.py \
-      --dim ${feats_dim} \
-      --wav_path $split_dir \
-      --idx JOB
-
-python utils/combine_cmvn_file.py --dim ${feats_dim} --cmvn_dir $split_dir --nj $nj --output_dir ${fbankdir}/cmvn
-
-python utils/cmvn_converter.py --cmvn_json ${fbankdir}/cmvn/cmvn.json --am_mvn ${fbankdir}/cmvn/am.mvn
-
-echo "$0: Succeeded compute global cmvn"
diff --git a/egs/aishell2/transformer/utils/compute_fbank.py b/egs/aishell2/transformer/utils/compute_fbank.py
deleted file mode 100755
index 9c3904f..0000000
--- a/egs/aishell2/transformer/utils/compute_fbank.py
+++ /dev/null
@@ -1,171 +0,0 @@
-from kaldiio import WriteHelper
-
-import argparse
-import numpy as np
-import json
-import torch
-import torchaudio
-import torchaudio.compliance.kaldi as kaldi
-
-
-def compute_fbank(wav_file,
-                  num_mel_bins=80,
-                  frame_length=25,
-                  frame_shift=10,
-                  dither=0.0,
-                  resample_rate=16000,
-                  speed=1.0,
-                  window_type="hamming"):
-
-    waveform, sample_rate = torchaudio.load(wav_file)
-    if resample_rate != sample_rate:
-        waveform = torchaudio.transforms.Resample(orig_freq=sample_rate,
-                                                  new_freq=resample_rate)(waveform)
-    if speed != 1.0:
-        waveform, _ = torchaudio.sox_effects.apply_effects_tensor(
-            waveform, resample_rate,
-            [['speed', str(speed)], ['rate', str(resample_rate)]]
-        )
-
-    waveform = waveform * (1 << 15)
-    mat = kaldi.fbank(waveform,
-                      num_mel_bins=num_mel_bins,
-                      frame_length=frame_length,
-                      frame_shift=frame_shift,
-                      dither=dither,
-                      energy_floor=0.0,
-                      window_type=window_type,
-                      sample_frequency=resample_rate)
-
-    return mat.numpy()
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description="computer features",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument(
-        "--wav-lists",
-        "-w",
-        default=False,
-        required=True,
-        type=str,
-        help="input wav lists",
-    )
-    parser.add_argument(
-        "--text-files",
-        "-t",
-        default=False,
-        required=True,
-        type=str,
-        help="input text files",
-    )
-    parser.add_argument(
-        "--dims",
-        "-d",
-        default=80,
-        type=int,
-        help="feature dims",
-    )
-    parser.add_argument(
-        "--max-lengths",
-        "-m",
-        default=1500,
-        type=int,
-        help="max frame numbers",
-    )
-    parser.add_argument(
-        "--sample-frequency",
-        "-s",
-        default=16000,
-        type=int,
-        help="sample frequency",
-    )
-    parser.add_argument(
-        "--speed-perturb",
-        "-p",
-        default="1.0",
-        type=str,
-        help="speed perturb",
-    )
-    parser.add_argument(
-        "--ark-index",
-        "-a",
-        default=1,
-        required=True,
-        type=int,
-        help="ark index",
-    )
-    parser.add_argument(
-        "--output-dir",
-        "-o",
-        default=False,
-        required=True,
-        type=str,
-        help="output dir",
-    )
-    parser.add_argument(
-        "--window-type",
-        default="hamming",
-        required=False,
-        type=str,
-        help="window type"
-    )
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    ark_file = args.output_dir + "/ark/feats." + str(args.ark_index) + ".ark"
-    scp_file = args.output_dir + "/ark/feats." + str(args.ark_index) + ".scp"
-    text_file = args.output_dir + "/txt/text." + str(args.ark_index) + ".txt"  
-    feats_shape_file = args.output_dir + "/ark/len." + str(args.ark_index)
-    text_shape_file = args.output_dir + "/txt/len." + str(args.ark_index)
-
-    ark_writer = WriteHelper('ark,scp:{},{}'.format(ark_file, scp_file))
-    text_writer = open(text_file, 'w')
-    feats_shape_writer = open(feats_shape_file, 'w')
-    text_shape_writer = open(text_shape_file, 'w')
-
-    speed_perturb_list = args.speed_perturb.split(',')
-    
-    for speed in speed_perturb_list:
-        with open(args.wav_lists, 'r', encoding='utf-8') as wavfile:
-            with open(args.text_files, 'r', encoding='utf-8') as textfile:
-                for wav, text in zip(wavfile, textfile): 
-                    s_w = wav.strip().split()
-                    wav_id = s_w[0]
-                    wav_file = s_w[1]
-
-                    s_t = text.strip().split()
-                    text_id = s_t[0]
-                    txt = s_t[1:]
-                    fbank = compute_fbank(wav_file,
-                                          num_mel_bins=args.dims,
-                                          resample_rate=args.sample_frequency,
-                                          speed=float(speed),
-                                          window_type=args.window_type
-                                          )
-                    feats_dims = fbank.shape[1]
-                    feats_lens = fbank.shape[0]
-                    if feats_lens >= args.max_lengths:
-                        continue
-                    txt_lens = len(txt)
-                    if speed == "1.0":
-                        wav_id_sp = wav_id
-                    else: 
-                        wav_id_sp = wav_id + "_sp" + speed
-
-                    feats_shape_writer.write(wav_id_sp + " " + str(feats_lens) + "," + str(feats_dims) + '\n')
-                    text_shape_writer.write(wav_id_sp + " " + str(txt_lens) + '\n')
-
-                    text_writer.write(wav_id_sp + " " + " ".join(txt) + '\n')
-                    ark_writer(wav_id_sp, fbank)
-                    
-
-if __name__ == '__main__':
-    main()
-
diff --git a/egs/aishell2/transformer/utils/compute_fbank.sh b/egs/aishell2/transformer/utils/compute_fbank.sh
deleted file mode 100755
index 8704b31..0000000
--- a/egs/aishell2/transformer/utils/compute_fbank.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env bash
-
-. ./path.sh || exit 1;
-# Begin configuration section.
-nj=32
-cmd=./utils/run.pl
-
-# feature configuration
-feats_dim=80
-sample_frequency=16000
-speed_perturb="1.0"
-window_type="hamming"
-max_lengths=1500
-
-echo "$0 $@"
-
-. utils/parse_options.sh || exit 1;
-
-data=$1
-logdir=$2
-fbankdir=$3
-
-[ ! -f $data/wav.scp ] && echo "$0: no such file $data/wav.scp" && exit 1;
-[ ! -f $data/text ] && echo "$0: no such file $data/text" && exit 1;
-
-python utils/split_data.py $data $data $nj
-
-ark_dir=${fbankdir}/ark; mkdir -p ${ark_dir}
-text_dir=${fbankdir}/txt; mkdir -p ${text_dir}
-mkdir -p ${logdir}
-
-$cmd JOB=1:$nj $logdir/make_fbank.JOB.log \
-    python utils/compute_fbank.py -w $data/split${nj}/JOB/wav.scp -t $data/split${nj}/JOB/text \
-        -d $feats_dim -s $sample_frequency -m ${max_lengths} -p ${speed_perturb} -a JOB -o ${fbankdir} \
-        --window-type ${window_type} \
-        || exit 1;
-
-for n in $(seq $nj); do
-    cat ${ark_dir}/feats.$n.scp || exit 1
-done > $fbankdir/feats.scp || exit 1
-
-for n in $(seq $nj); do
-    cat ${text_dir}/text.$n.txt || exit 1
-done > $fbankdir/text || exit 1
-
-for n in $(seq $nj); do
-    cat ${ark_dir}/len.$n || exit 1
-done > $fbankdir/speech_shape || exit 1
-
-for n in $(seq $nj); do
-    cat ${text_dir}/len.$n || exit 1
-done > $fbankdir/text_shape || exit 1
-
-echo "$0: Succeeded compute FBANK features"
diff --git a/egs/aishell2/transformer/utils/compute_wer.py b/egs/aishell2/transformer/utils/compute_wer.py
deleted file mode 100755
index 26a9f49..0000000
--- a/egs/aishell2/transformer/utils/compute_wer.py
+++ /dev/null
@@ -1,157 +0,0 @@
-import os
-import numpy as np
-import sys
-
-def compute_wer(ref_file,
-                hyp_file,
-                cer_detail_file):
-    rst = {
-        'Wrd': 0,
-        'Corr': 0,
-        'Ins': 0,
-        'Del': 0,
-        'Sub': 0,
-        'Snt': 0,
-        'Err': 0.0,
-        'S.Err': 0.0,
-        'wrong_words': 0,
-        'wrong_sentences': 0
-    }
-
-    hyp_dict = {}
-    ref_dict = {}
-    with open(hyp_file, 'r') as hyp_reader:
-        for line in hyp_reader:
-            key = line.strip().split()[0]
-            value = line.strip().split()[1:]
-            hyp_dict[key] = value
-    with open(ref_file, 'r') as ref_reader:
-        for line in ref_reader:
-            key = line.strip().split()[0]
-            value = line.strip().split()[1:]
-            ref_dict[key] = value
-
-    cer_detail_writer = open(cer_detail_file, 'w')
-    for hyp_key in hyp_dict:
-        if hyp_key in ref_dict:
-           out_item = compute_wer_by_line(hyp_dict[hyp_key], ref_dict[hyp_key])
-           rst['Wrd'] += out_item['nwords']
-           rst['Corr'] += out_item['cor']
-           rst['wrong_words'] += out_item['wrong']
-           rst['Ins'] += out_item['ins']
-           rst['Del'] += out_item['del']
-           rst['Sub'] += out_item['sub']
-           rst['Snt'] += 1
-           if out_item['wrong'] > 0:
-               rst['wrong_sentences'] += 1
-           cer_detail_writer.write(hyp_key + print_cer_detail(out_item) + '\n')
-           cer_detail_writer.write("ref:" + '\t' + " ".join(list(map(lambda x: x.lower(), ref_dict[hyp_key]))) + '\n')
-           cer_detail_writer.write("hyp:" + '\t' + " ".join(list(map(lambda x: x.lower(), hyp_dict[hyp_key]))) + '\n')
-
-    if rst['Wrd'] > 0:
-        rst['Err'] = round(rst['wrong_words'] * 100 / rst['Wrd'], 2)
-    if rst['Snt'] > 0:
-        rst['S.Err'] = round(rst['wrong_sentences'] * 100 / rst['Snt'], 2)
-
-    cer_detail_writer.write('\n')
-    cer_detail_writer.write("%WER " + str(rst['Err']) + " [ " + str(rst['wrong_words'])+ " / " + str(rst['Wrd']) +
-                            ", " + str(rst['Ins']) + " ins, " + str(rst['Del']) + " del, " + str(rst['Sub']) + " sub ]" + '\n')
-    cer_detail_writer.write("%SER " + str(rst['S.Err']) + " [ " + str(rst['wrong_sentences']) + " / " + str(rst['Snt']) + " ]" + '\n')
-    cer_detail_writer.write("Scored " + str(len(hyp_dict)) + " sentences, " + str(len(hyp_dict) - rst['Snt']) + " not present in hyp." + '\n')
-
-     
-def compute_wer_by_line(hyp,
-                        ref):
-    hyp = list(map(lambda x: x.lower(), hyp))
-    ref = list(map(lambda x: x.lower(), ref))
-
-    len_hyp = len(hyp)
-    len_ref = len(ref)
-
-    cost_matrix = np.zeros((len_hyp + 1, len_ref + 1), dtype=np.int16)
-
-    ops_matrix = np.zeros((len_hyp + 1, len_ref + 1), dtype=np.int8)
-
-    for i in range(len_hyp + 1):
-        cost_matrix[i][0] = i
-    for j in range(len_ref + 1):
-        cost_matrix[0][j] = j
-
-    for i in range(1, len_hyp + 1):
-        for j in range(1, len_ref + 1):
-            if hyp[i - 1] == ref[j - 1]:
-                cost_matrix[i][j] = cost_matrix[i - 1][j - 1]
-            else:
-                substitution = cost_matrix[i - 1][j - 1] + 1
-                insertion = cost_matrix[i - 1][j] + 1
-                deletion = cost_matrix[i][j - 1] + 1
-
-                compare_val = [substitution, insertion, deletion]
-
-                min_val = min(compare_val)
-                operation_idx = compare_val.index(min_val) + 1
-                cost_matrix[i][j] = min_val
-                ops_matrix[i][j] = operation_idx
-
-    match_idx = []
-    i = len_hyp
-    j = len_ref
-    rst = {
-        'nwords': len_ref,
-        'cor': 0,
-        'wrong': 0,
-        'ins': 0,
-        'del': 0,
-        'sub': 0
-    }
-    while i >= 0 or j >= 0:
-        i_idx = max(0, i)
-        j_idx = max(0, j)
-
-        if ops_matrix[i_idx][j_idx] == 0:  # correct
-            if i - 1 >= 0 and j - 1 >= 0:
-                match_idx.append((j - 1, i - 1))
-                rst['cor'] += 1
-
-            i -= 1
-            j -= 1
-
-        elif ops_matrix[i_idx][j_idx] == 2:  # insert
-            i -= 1
-            rst['ins'] += 1
-
-        elif ops_matrix[i_idx][j_idx] == 3:  # delete
-            j -= 1
-            rst['del'] += 1
-
-        elif ops_matrix[i_idx][j_idx] == 1:  # substitute
-            i -= 1
-            j -= 1
-            rst['sub'] += 1
-
-        if i < 0 and j >= 0:
-            rst['del'] += 1
-        elif j < 0 and i >= 0:
-            rst['ins'] += 1
-
-    match_idx.reverse()
-    wrong_cnt = cost_matrix[len_hyp][len_ref]
-    rst['wrong'] = wrong_cnt
-
-    return rst
-
-def print_cer_detail(rst):
-    return ("(" + "nwords=" + str(rst['nwords']) + ",cor=" + str(rst['cor'])
-            + ",ins=" + str(rst['ins']) + ",del=" + str(rst['del']) + ",sub="
-            + str(rst['sub']) + ") corr:" + '{:.2%}'.format(rst['cor']/rst['nwords'])
-            + ",cer:" + '{:.2%}'.format(rst['wrong']/rst['nwords']))
-
-if __name__ == '__main__':
-    if len(sys.argv) != 4:
-        print("usage : python compute-wer.py test.ref test.hyp test.wer")
-        sys.exit(0)
-
-    ref_file = sys.argv[1]
-    hyp_file = sys.argv[2]
-    cer_detail_file = sys.argv[3]
-    compute_wer(ref_file, hyp_file, cer_detail_file)
diff --git a/egs/aishell2/transformer/utils/download_model.py b/egs/aishell2/transformer/utils/download_model.py
deleted file mode 100755
index 70ea179..0000000
--- a/egs/aishell2/transformer/utils/download_model.py
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/env python3
-import argparse
-
-from modelscope.pipelines import pipeline
-from modelscope.utils.constant import Tasks
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description="download model configs",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument("--model_name",
-                        type=str,
-                        default="damo/speech_data2vec_pretrain-zh-cn-aishell2-16k-pytorch",
-                        help="model name in ModelScope")
-    args = parser.parse_args()
-
-    inference_pipeline = pipeline(
-        task=Tasks.auto_speech_recognition,
-        model=args.model_name)
diff --git a/egs/aishell2/transformer/utils/error_rate_zh b/egs/aishell2/transformer/utils/error_rate_zh
deleted file mode 100755
index 6871a07..0000000
--- a/egs/aishell2/transformer/utils/error_rate_zh
+++ /dev/null
@@ -1,370 +0,0 @@
-#!/usr/bin/env python3
-# coding=utf8
-
-# Copyright  2021  Jiayu DU
-
-import sys
-import argparse
-import json
-import logging
-logging.basicConfig(stream=sys.stderr, level=logging.INFO, format='[%(levelname)s] %(message)s')
-
-DEBUG = None
-
-def GetEditType(ref_token, hyp_token):
-    if ref_token == None and hyp_token != None:
-        return 'I'
-    elif ref_token != None and hyp_token == None:
-        return 'D'
-    elif ref_token == hyp_token:
-        return 'C'
-    elif ref_token != hyp_token:
-        return 'S'
-    else:
-        raise RuntimeError
-
-class AlignmentArc:
-    def __init__(self, src, dst, ref, hyp):
-        self.src = src
-        self.dst = dst
-        self.ref = ref
-        self.hyp = hyp
-        self.edit_type = GetEditType(ref, hyp)
-
-def similarity_score_function(ref_token, hyp_token):
-    return 0 if (ref_token == hyp_token) else -1.0
-
-def insertion_score_function(token):
-    return -1.0
-
-def deletion_score_function(token):
-    return -1.0
-
-def EditDistance(
-        ref,
-        hyp, 
-        similarity_score_function = similarity_score_function,
-        insertion_score_function = insertion_score_function,
-        deletion_score_function = deletion_score_function):
-    assert(len(ref) != 0)
-    class DPState:
-        def __init__(self):
-            self.score = -float('inf')
-            # backpointer
-            self.prev_r = None
-            self.prev_h = None
-    
-    def print_search_grid(S, R, H, fstream):
-        print(file=fstream)
-        for r in range(R):
-            for h in range(H):
-                print(F'[{r},{h}]:{S[r][h].score:4.3f}:({S[r][h].prev_r},{S[r][h].prev_h}) ', end='', file=fstream)
-            print(file=fstream)
-
-    R = len(ref) + 1
-    H = len(hyp) + 1
-
-    # Construct DP search space, a (R x H) grid
-    S = [ [] for r in range(R) ]
-    for r in range(R):
-        S[r] = [ DPState() for x in range(H) ]
-
-    # initialize DP search grid origin, S(r = 0, h = 0)
-    S[0][0].score = 0.0
-    S[0][0].prev_r = None
-    S[0][0].prev_h = None
-
-    # initialize REF axis
-    for r in range(1, R):
-        S[r][0].score = S[r-1][0].score + deletion_score_function(ref[r-1])
-        S[r][0].prev_r = r-1
-        S[r][0].prev_h = 0
-
-    # initialize HYP axis
-    for h in range(1, H):
-        S[0][h].score = S[0][h-1].score + insertion_score_function(hyp[h-1])
-        S[0][h].prev_r = 0
-        S[0][h].prev_h = h-1
-
-    best_score = S[0][0].score
-    best_state = (0, 0)
-
-    for r in range(1, R):
-        for h in range(1, H):
-            sub_or_cor_score = similarity_score_function(ref[r-1], hyp[h-1])
-            new_score = S[r-1][h-1].score + sub_or_cor_score
-            if new_score >= S[r][h].score:
-                S[r][h].score = new_score
-                S[r][h].prev_r = r-1
-                S[r][h].prev_h = h-1
-
-            del_score = deletion_score_function(ref[r-1])
-            new_score = S[r-1][h].score + del_score
-            if new_score >= S[r][h].score:
-                S[r][h].score = new_score
-                S[r][h].prev_r = r - 1
-                S[r][h].prev_h = h
-
-            ins_score = insertion_score_function(hyp[h-1])
-            new_score = S[r][h-1].score + ins_score
-            if new_score >= S[r][h].score:
-                S[r][h].score = new_score
-                S[r][h].prev_r = r
-                S[r][h].prev_h = h-1
-
-    best_score = S[R-1][H-1].score
-    best_state = (R-1, H-1)
-
-    if DEBUG:
-        print_search_grid(S, R, H, sys.stderr)
-
-    # Backtracing best alignment path, i.e. a list of arcs
-    # arc = (src, dst, ref, hyp, edit_type)
-    # src/dst = (r, h), where r/h refers to search grid state-id along Ref/Hyp axis
-    best_path = []
-    r, h = best_state[0], best_state[1]
-    prev_r, prev_h = S[r][h].prev_r, S[r][h].prev_h
-    score = S[r][h].score
-    # loop invariant:
-    #   1. (prev_r, prev_h) -> (r, h) is a "forward arc" on best alignment path
-    #   2. score is the value of point(r, h) on DP search grid
-    while prev_r != None or prev_h != None:
-        src = (prev_r, prev_h)
-        dst = (r, h)
-        if (r == prev_r + 1 and h == prev_h + 1): # Substitution or correct
-            arc = AlignmentArc(src, dst, ref[prev_r], hyp[prev_h])
-        elif (r == prev_r + 1 and h == prev_h): # Deletion
-            arc = AlignmentArc(src, dst, ref[prev_r], None)
-        elif (r == prev_r and h == prev_h + 1): # Insertion
-            arc = AlignmentArc(src, dst, None, hyp[prev_h])
-        else:
-            raise RuntimeError
-        best_path.append(arc)
-        r, h = prev_r, prev_h
-        prev_r, prev_h = S[r][h].prev_r, S[r][h].prev_h
-        score = S[r][h].score
-    
-    best_path.reverse()
-    return (best_path, best_score)
-
-def PrettyPrintAlignment(alignment, stream = sys.stderr):
-    def get_token_str(token):
-        if token == None:
-            return "*"
-        return token
-    
-    def is_double_width_char(ch):
-        if (ch >= '\u4e00') and (ch <= '\u9fa5'): # codepoint ranges for Chinese chars
-            return True
-        # TODO: support other double-width-char language such as Japanese, Korean 
-        else:
-            return False
-    
-    def display_width(token_str):
-        m = 0
-        for c in token_str:
-            if is_double_width_char(c):
-                m += 2
-            else:
-                m += 1
-        return m
-
-    R = '  REF  : '
-    H = '  HYP  : '
-    E = '  EDIT : '
-    for arc in alignment:
-        r = get_token_str(arc.ref)
-        h = get_token_str(arc.hyp)
-        e = arc.edit_type if arc.edit_type != 'C' else ''
-
-        nr, nh, ne = display_width(r), display_width(h), display_width(e)
-        n = max(nr, nh, ne) + 1
-
-        R += r + ' ' * (n-nr)
-        H += h + ' ' * (n-nh)
-        E += e + ' ' * (n-ne)
-
-    print(R, file=stream)
-    print(H, file=stream)
-    print(E, file=stream)
-
-def CountEdits(alignment):
-    c, s, i, d = 0, 0, 0, 0
-    for arc in alignment:
-        if arc.edit_type == 'C':
-            c += 1
-        elif arc.edit_type == 'S':
-            s += 1
-        elif arc.edit_type == 'I':
-            i += 1
-        elif arc.edit_type == 'D':
-            d += 1
-        else:
-            raise RuntimeError
-    return (c, s, i, d)
-
-def ComputeTokenErrorRate(c, s, i, d):
-    return 100.0 * (s + d + i) / (s + d + c)
-
-def ComputeSentenceErrorRate(num_err_utts, num_utts):
-    assert(num_utts != 0)
-    return 100.0 * num_err_utts / num_utts
-
-
-class EvaluationResult:
-    def __init__(self):
-        self.num_ref_utts = 0
-        self.num_hyp_utts = 0
-        self.num_eval_utts = 0 # seen in both ref & hyp
-        self.num_hyp_without_ref = 0
-
-        self.C = 0
-        self.S = 0
-        self.I = 0
-        self.D = 0
-        self.token_error_rate = 0.0
-
-        self.num_utts_with_error = 0
-        self.sentence_error_rate = 0.0
-    
-    def to_json(self):
-        return json.dumps(self.__dict__)
-    
-    def to_kaldi(self):
-        info = (
-            F'%WER {self.token_error_rate:.2f} [ {self.S + self.D + self.I} / {self.C + self.S + self.D}, {self.I} ins, {self.D} del, {self.S} sub ]\n'
-            F'%SER {self.sentence_error_rate:.2f} [ {self.num_utts_with_error} / {self.num_eval_utts} ]\n'
-        )
-        return info
-    
-    def to_sclite(self):
-        return "TODO"
-    
-    def to_espnet(self):
-        return "TODO"
-    
-    def to_summary(self):
-        #return json.dumps(self.__dict__, indent=4)
-        summary = (
-            '==================== Overall Statistics ====================\n'
-            F'num_ref_utts: {self.num_ref_utts}\n'
-            F'num_hyp_utts: {self.num_hyp_utts}\n'
-            F'num_hyp_without_ref: {self.num_hyp_without_ref}\n'
-            F'num_eval_utts: {self.num_eval_utts}\n'
-            F'sentence_error_rate: {self.sentence_error_rate:.2f}%\n'
-            F'token_error_rate: {self.token_error_rate:.2f}%\n'
-            F'token_stats:\n'
-            F'  - tokens:{self.C + self.S + self.D:>7}\n'
-            F'  - edits: {self.S + self.I + self.D:>7}\n'
-            F'  - cor:   {self.C:>7}\n'
-            F'  - sub:   {self.S:>7}\n'
-            F'  - ins:   {self.I:>7}\n'
-            F'  - del:   {self.D:>7}\n'
-            '============================================================\n'
-        )
-        return summary
-
-
-class Utterance:
-    def __init__(self, uid, text):
-        self.uid = uid
-        self.text = text
-
-
-def LoadUtterances(filepath, format):
-    utts = {}
-    if format == 'text': # utt_id word1 word2 ...
-        with open(filepath, 'r', encoding='utf8') as f:
-            for line in f:
-                line = line.strip()
-                if line:
-                    cols = line.split(maxsplit=1)
-                    assert(len(cols) == 2 or len(cols) == 1)
-                    uid = cols[0]
-                    text = cols[1] if len(cols) == 2 else ''
-                    if utts.get(uid) != None:
-                        raise RuntimeError(F'Found duplicated utterence id {uid}')
-                    utts[uid] = Utterance(uid, text)
-    else:
-        raise RuntimeError(F'Unsupported text format {format}')
-    return utts
-
-
-def tokenize_text(text, tokenizer):
-    if tokenizer == 'whitespace':
-        return text.split()
-    elif tokenizer == 'char':
-        return [ ch for ch in ''.join(text.split()) ]
-    else:
-        raise RuntimeError(F'ERROR: Unsupported tokenizer {tokenizer}')
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    # optional
-    parser.add_argument('--tokenizer', choices=['whitespace', 'char'], default='whitespace', help='whitespace for WER, char for CER')
-    parser.add_argument('--ref-format', choices=['text'], default='text', help='reference format, first col is utt_id, the rest is text')
-    parser.add_argument('--hyp-format', choices=['text'], default='text', help='hypothesis format, first col is utt_id, the rest is text')
-    # required
-    parser.add_argument('--ref', type=str, required=True, help='input reference file')
-    parser.add_argument('--hyp', type=str, required=True, help='input hypothesis file')
-
-    parser.add_argument('result_file', type=str)
-    args = parser.parse_args()
-    logging.info(args)
-
-    ref_utts = LoadUtterances(args.ref, args.ref_format)
-    hyp_utts = LoadUtterances(args.hyp, args.hyp_format)
-
-    r = EvaluationResult()
-
-    # check valid utterances in hyp that have matched non-empty reference
-    eval_utts = []
-    r.num_hyp_without_ref = 0
-    for uid in sorted(hyp_utts.keys()):
-        if uid in ref_utts.keys(): # TODO: efficiency
-            if ref_utts[uid].text.strip(): # non-empty reference
-                eval_utts.append(uid)
-            else:
-                logging.warn(F'Found {uid} with empty reference, skipping...')
-        else:
-            logging.warn(F'Found {uid} without reference, skipping...')
-            r.num_hyp_without_ref += 1
-
-    r.num_hyp_utts = len(hyp_utts)
-    r.num_ref_utts = len(ref_utts)
-    r.num_eval_utts = len(eval_utts)
-
-    with open(args.result_file, 'w+', encoding='utf8') as fo:
-        for uid in eval_utts:
-            ref = ref_utts[uid]
-            hyp = hyp_utts[uid]
-
-            alignment, score = EditDistance(
-                tokenize_text(ref.text, args.tokenizer),
-                tokenize_text(hyp.text, args.tokenizer)
-            )
-            
-            c, s, i, d = CountEdits(alignment)
-            utt_ter = ComputeTokenErrorRate(c, s, i, d)
-
-            # utt-level evaluation result
-            print(F'{{"uid":{uid}, "score":{score}, "ter":{utt_ter:.2f}, "cor":{c}, "sub":{s}, "ins":{i}, "del":{d}}}', file=fo)
-            PrettyPrintAlignment(alignment, fo)
-
-            r.C += c
-            r.S += s
-            r.I += i
-            r.D += d
-
-            if utt_ter > 0:
-                r.num_utts_with_error += 1
-
-        # corpus level evaluation result
-        r.sentence_error_rate = ComputeSentenceErrorRate(r.num_utts_with_error, r.num_eval_utts)
-        r.token_error_rate = ComputeTokenErrorRate(r.C, r.S, r.I, r.D)
-
-        print(r.to_summary(), file=fo)
-
-    print(r.to_json())
-    print(r.to_kaldi())
diff --git a/egs/aishell2/transformer/utils/extract_embeds.py b/egs/aishell2/transformer/utils/extract_embeds.py
deleted file mode 100755
index 7b817d8..0000000
--- a/egs/aishell2/transformer/utils/extract_embeds.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from transformers import AutoTokenizer, AutoModel, pipeline
-import numpy as np
-import sys
-import os
-import torch
-from kaldiio import WriteHelper
-import re
-text_file_json = sys.argv[1]
-out_ark = sys.argv[2]
-out_scp = sys.argv[3]
-out_shape = sys.argv[4]
-device = int(sys.argv[5])
-model_path = sys.argv[6]
-
-model = AutoModel.from_pretrained(model_path)
-tokenizer = AutoTokenizer.from_pretrained(model_path)
-extractor = pipeline(task="feature-extraction", model=model, tokenizer=tokenizer, device=device)
-
-with open(text_file_json, 'r') as f:
-    js = f.readlines()
-
-
-f_shape = open(out_shape, "w")
-with WriteHelper('ark,scp:{},{}'.format(out_ark, out_scp)) as writer:
-    with torch.no_grad():
-        for idx, line in enumerate(js):
-            id, tokens = line.strip().split(" ", 1)
-            tokens = re.sub(" ", "", tokens.strip())
-            tokens = ' '.join([j for j in tokens])
-            token_num = len(tokens.split(" "))
-            outputs = extractor(tokens)
-            outputs = np.array(outputs)
-            embeds = outputs[0, 1:-1, :]
-
-            token_num_embeds, dim = embeds.shape
-            if token_num == token_num_embeds:
-                writer(id, embeds)
-                shape_line = "{} {},{}\n".format(id, token_num_embeds, dim)
-                f_shape.write(shape_line)
-            else:
-                print("{}, size has changed, {}, {}, {}".format(id, token_num, token_num_embeds, tokens))
-
-
-
-f_shape.close()
-
-
diff --git a/egs/aishell2/transformer/utils/filter_scp.pl b/egs/aishell2/transformer/utils/filter_scp.pl
deleted file mode 100755
index 003530d..0000000
--- a/egs/aishell2/transformer/utils/filter_scp.pl
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-# Copyright 2010-2012 Microsoft Corporation
-#                     Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This script takes a list of utterance-ids or any file whose first field
-# of each line is an utterance-id, and filters an scp
-# file (or any file whose "n-th" field is an utterance id), printing
-# out only those lines whose "n-th" field is in id_list. The index of
-# the "n-th" field is 1, by default, but can be changed by using
-# the -f <n> switch
-
-$exclude = 0;
-$field = 1;
-$shifted = 0;
-
-do {
-  $shifted=0;
-  if ($ARGV[0] eq "--exclude") {
-    $exclude = 1;
-    shift @ARGV;
-    $shifted=1;
-  }
-  if ($ARGV[0] eq "-f") {
-    $field = $ARGV[1];
-    shift @ARGV; shift @ARGV;
-    $shifted=1
-  }
-} while ($shifted);
-
-if(@ARGV < 1 || @ARGV > 2) {
-  die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
-      "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
-      "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
-      "only the lines that were *not* in id_list.\n" .
-      "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
-      "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
-      "-f option, add 1 to the argument.\n" .
-      "See also: scripts/filter_scp.pl .\n";
-}
-
-
-$idlist = shift @ARGV;
-open(F, "<$idlist") || die "Could not open id-list file $idlist";
-while(<F>) {
-  @A = split;
-  @A>=1 || die "Invalid id-list file line $_";
-  $seen{$A[0]} = 1;
-}
-
-if ($field == 1) { # Treat this as special case, since it is common.
-  while(<>) {
-    $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
-    # $1 is what we filter on.
-    if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
-      print $_;
-    }
-  }
-} else {
-  while(<>) {
-    @A = split;
-    @A > 0 || die "Invalid scp file line $_";
-    @A >= $field || die "Invalid scp file line $_";
-    if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
-      print $_;
-    }
-  }
-}
-
-# tests:
-# the following should print "foo 1"
-# ( echo foo 1; echo bar 2 ) | scripts/filter_scp.pl <(echo foo)
-# the following should print "bar 2".
-# ( echo foo 1; echo bar 2 ) | scripts/filter_scp.pl -f 2 <(echo 2)
diff --git a/egs/aishell2/transformer/utils/fix_data.sh b/egs/aishell2/transformer/utils/fix_data.sh
deleted file mode 100755
index b1a2bb8..0000000
--- a/egs/aishell2/transformer/utils/fix_data.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env bash
-
-echo "$0 $@"
-data_dir=$1
-
-if [ ! -f ${data_dir}/wav.scp ]; then
-  echo "$0: wav.scp is not found"
-  exit 1;
-fi
-
-if [ ! -f ${data_dir}/text ]; then
-  echo "$0: text is not found"
-  exit 1;
-fi
-
-
-
-mkdir -p ${data_dir}/.backup
-
-awk '{print $1}' ${data_dir}/wav.scp > ${data_dir}/.backup/wav_id
-awk '{print $1}' ${data_dir}/text > ${data_dir}/.backup/text_id
-
-sort ${data_dir}/.backup/wav_id ${data_dir}/.backup/text_id | uniq -d > ${data_dir}/.backup/id
-
-cp ${data_dir}/wav.scp ${data_dir}/.backup/wav.scp
-cp ${data_dir}/text ${data_dir}/.backup/text
-
-mv ${data_dir}/wav.scp ${data_dir}/wav.scp.bak
-mv ${data_dir}/text ${data_dir}/text.bak
-
-utils/filter_scp.pl -f 1 ${data_dir}/.backup/id ${data_dir}/wav.scp.bak | sort -k1,1 -u > ${data_dir}/wav.scp
-utils/filter_scp.pl -f 1 ${data_dir}/.backup/id ${data_dir}/text.bak | sort -k1,1 -u > ${data_dir}/text
-
-rm ${data_dir}/wav.scp.bak
-rm ${data_dir}/text.bak
diff --git a/egs/aishell2/transformer/utils/fix_data_feat.sh b/egs/aishell2/transformer/utils/fix_data_feat.sh
deleted file mode 100755
index 84eea36..0000000
--- a/egs/aishell2/transformer/utils/fix_data_feat.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env bash
-
-echo "$0 $@"
-data_dir=$1
-
-if [ ! -f ${data_dir}/feats.scp ]; then
-  echo "$0: feats.scp is not found"
-  exit 1;
-fi
-
-if [ ! -f ${data_dir}/text ]; then
-  echo "$0: text is not found"
-  exit 1;
-fi
-
-if [ ! -f ${data_dir}/speech_shape ]; then
-  echo "$0: feature lengths is not found"
-  exit 1;
-fi
-
-if [ ! -f ${data_dir}/text_shape ]; then
-  echo "$0: text lengths is not found"
-  exit 1;
-fi
-
-mkdir -p ${data_dir}/.backup
-
-awk '{print $1}' ${data_dir}/feats.scp > ${data_dir}/.backup/wav_id
-awk '{print $1}' ${data_dir}/text > ${data_dir}/.backup/text_id
-
-sort ${data_dir}/.backup/wav_id ${data_dir}/.backup/text_id | uniq -d > ${data_dir}/.backup/id
-
-cp ${data_dir}/feats.scp ${data_dir}/.backup/feats.scp
-cp ${data_dir}/text ${data_dir}/.backup/text
-cp ${data_dir}/speech_shape ${data_dir}/.backup/speech_shape
-cp ${data_dir}/text_shape ${data_dir}/.backup/text_shape
-
-mv ${data_dir}/feats.scp ${data_dir}/feats.scp.bak
-mv ${data_dir}/text ${data_dir}/text.bak
-mv ${data_dir}/speech_shape ${data_dir}/speech_shape.bak
-mv ${data_dir}/text_shape ${data_dir}/text_shape.bak
-
-utils/filter_scp.pl -f 1 ${data_dir}/.backup/id ${data_dir}/feats.scp.bak | sort -k1,1 -u > ${data_dir}/feats.scp
-utils/filter_scp.pl -f 1 ${data_dir}/.backup/id ${data_dir}/text.bak | sort -k1,1 -u > ${data_dir}/text
-utils/filter_scp.pl -f 1 ${data_dir}/.backup/id ${data_dir}/speech_shape.bak | sort -k1,1 -u > ${data_dir}/speech_shape
-utils/filter_scp.pl -f 1 ${data_dir}/.backup/id ${data_dir}/text_shape.bak | sort -k1,1 -u > ${data_dir}/text_shape
-
-rm ${data_dir}/feats.scp.bak
-rm ${data_dir}/text.bak
-rm ${data_dir}/speech_shape.bak
-rm ${data_dir}/text_shape.bak
-
diff --git a/egs/aishell2/transformer/utils/gen_ark_list.sh b/egs/aishell2/transformer/utils/gen_ark_list.sh
deleted file mode 100755
index aebf356..0000000
--- a/egs/aishell2/transformer/utils/gen_ark_list.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env bash
-
-
-# Begin configuration section.
-nj=32
-cmd=./utils/run.pl
-
-echo "$0 $@"
-
-. utils/parse_options.sh || exit 1;
-
-ark_dir=$1
-txt_dir=$2
-output_dir=$3
-
-[ ! -d ${ark_dir}/ark ] && echo "$0: ark data is required" && exit 1;
-[ ! -d ${txt_dir}/txt ] && echo "$0: txt data is required" && exit 1;
-
-for n in $(seq $nj); do
-  echo "${ark_dir}/ark/feats.$n.ark ${txt_dir}/txt/text.$n.txt" || exit 1
-done > ${output_dir}/ark_txt.scp || exit 1
-
diff --git a/egs/aishell2/transformer/utils/gen_modelscope_configuration.py b/egs/aishell2/transformer/utils/gen_modelscope_configuration.py
deleted file mode 100644
index d340970..0000000
--- a/egs/aishell2/transformer/utils/gen_modelscope_configuration.py
+++ /dev/null
@@ -1,118 +0,0 @@
-import argparse
-import json
-import os
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--task",
-        type=str,
-        default="auto-speech-recognition",
-        help="task name",
-    )
-    parser.add_argument(
-        "--type",
-        type=str,
-        default="generic-asr",
-    )
-    parser.add_argument(
-        "--am_model_name",
-        type=str,
-        default="model.pb",
-        help="model file name",
-    )
-    parser.add_argument(
-        "--mode",
-        type=str,
-        default="paraformer",
-        help="mode for decoding",
-    )
-    parser.add_argument(
-        "--lang",
-        type=str,
-        default="zh-cn",
-        help="language",
-    )
-    parser.add_argument(
-        "--batch_size",
-        type=int,
-        default=1,
-        help="batch size",
-    )
-    parser.add_argument(
-        "--am_model_config",
-        type=str,
-        default="config.yaml",
-        help="config file",
-    )
-    parser.add_argument(
-        "--mvn_file",
-        type=str,
-        default="am.mvn",
-        help="cmvn file",
-    )
-    parser.add_argument(
-        "--model_name",
-        type=str,
-        help="model name",
-    )
-    parser.add_argument(
-        "--pipeline_type",
-        type=str,
-        default="asr-inference",
-        help="pipeline type",
-    )
-    parser.add_argument(
-        "--vocab_size",
-        type=int,
-        help="vocab_size",
-    )
-    parser.add_argument(
-        "--dataset",
-        type=str,
-        help="dataset name",
-    )
-    parser.add_argument(
-        "--output_dir",
-        type=str,
-        help="output path",
-    )
-    parser.add_argument(
-        "--nat",
-        type=str,
-        default="",
-        help="nat",
-    )
-    parser.add_argument(
-        "--tag",
-        type=str,
-        default="exp1",
-        help="model name tag",
-    )
-    args = parser.parse_args()
-
-    model = {
-        "type": args.type,
-        "am_model_name": args.am_model_name,
-        "model_config": {
-            "type": "pytorch",
-            "code_base": "funasr",
-            "mode": args.mode,
-            "lang": args.lang,
-            "batch_size": args.batch_size,
-            "am_model_config": args.am_model_config,
-            "mvn_file": args.mvn_file,
-            "model": "speech_{}_asr{}-{}-16k-{}-vocab{}-pytorch-{}".format(args.model_name, args.nat, args.lang,
-                                                                           args.dataset, args.vocab_size, args.tag),
-        }
-    }
-    pipeline = {"type": args.pipeline_type}
-    json_dict = {
-        "framework": "pytorch",
-        "task": args.task,
-        "model": model,
-        "pipeline": pipeline,
-    }
-
-    with open(os.path.join(args.output_dir, "configuration.json"), "w") as f:
-        json.dump(json_dict, f, indent=4)
diff --git a/egs/aishell2/transformer/utils/parse_options.sh b/egs/aishell2/transformer/utils/parse_options.sh
deleted file mode 100755
index 71fb9e5..0000000
--- a/egs/aishell2/transformer/utils/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### Now we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/egs/aishell2/transformer/utils/print_args.py b/egs/aishell2/transformer/utils/print_args.py
deleted file mode 100755
index b0c61e5..0000000
--- a/egs/aishell2/transformer/utils/print_args.py
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/usr/bin/env python
-import sys
-
-
-def get_commandline_args(no_executable=True):
-    extra_chars = [
-        " ",
-        ";",
-        "&",
-        "|",
-        "<",
-        ">",
-        "?",
-        "*",
-        "~",
-        "`",
-        '"',
-        "'",
-        "\\",
-        "{",
-        "}",
-        "(",
-        ")",
-    ]
-
-    # Escape the extra characters for shell
-    argv = [
-        arg.replace("'", "'\\''")
-        if all(char not in arg for char in extra_chars)
-        else "'" + arg.replace("'", "'\\''") + "'"
-        for arg in sys.argv
-    ]
-
-    if no_executable:
-        return " ".join(argv[1:])
-    else:
-        return sys.executable + " " + " ".join(argv)
-
-
-def main():
-    print(get_commandline_args())
-
-
-if __name__ == "__main__":
-    main()
diff --git a/egs/aishell2/transformer/utils/proc_conf_oss.py b/egs/aishell2/transformer/utils/proc_conf_oss.py
deleted file mode 100755
index c4a90c5..0000000
--- a/egs/aishell2/transformer/utils/proc_conf_oss.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from pathlib import Path
-
-import torch
-import yaml
-
-
-class NoAliasSafeDumper(yaml.SafeDumper):
-    # Disable anchor/alias in yaml because looks ugly
-    def ignore_aliases(self, data):
-        return True
-
-
-def yaml_no_alias_safe_dump(data, stream=None, **kwargs):
-    """Safe-dump in yaml with no anchor/alias"""
-    return yaml.dump(
-        data, stream, allow_unicode=True, Dumper=NoAliasSafeDumper, **kwargs
-    )
-
-
-def gen_conf(file, out_dir):
-    conf = torch.load(file)["config"]
-    conf["oss_bucket"] = "null"
-    print(conf)
-    output_dir = Path(out_dir)
-    output_dir.mkdir(parents=True, exist_ok=True)
-    with (output_dir / "config.yaml").open("w", encoding="utf-8") as f:
-        yaml_no_alias_safe_dump(conf, f, indent=4, sort_keys=False)
-
-
-if __name__ == "__main__":
-    import sys
-
-    in_f = sys.argv[1]
-    out_f = sys.argv[2]
-    gen_conf(in_f, out_f)
diff --git a/egs/aishell2/transformer/utils/proce_text.py b/egs/aishell2/transformer/utils/proce_text.py
deleted file mode 100755
index 9e517a4..0000000
--- a/egs/aishell2/transformer/utils/proce_text.py
+++ /dev/null
@@ -1,31 +0,0 @@
-
-import sys
-import re
-
-in_f = sys.argv[1]
-out_f = sys.argv[2]
-
-
-with open(in_f, "r", encoding="utf-8") as f:
-  lines = f.readlines()
-
-with open(out_f, "w", encoding="utf-8") as f:
-  for line in lines:
-    outs = line.strip().split(" ", 1)
-    if len(outs) == 2:
-      idx, text = outs
-      text = re.sub("</s>", "", text)
-      text = re.sub("<s>", "", text)
-      text = re.sub("@@", "", text)
-      text = re.sub("@", "", text)
-      text = re.sub("<unk>", "", text)
-      text = re.sub(" ", "", text)
-      text = text.lower()
-    else:
-      idx = outs[0]
-      text = " "
-
-    text = [x for x in text]
-    text = " ".join(text)
-    out = "{} {}\n".format(idx, text)
-    f.write(out)
diff --git a/egs/aishell2/transformer/utils/run.pl b/egs/aishell2/transformer/utils/run.pl
deleted file mode 100755
index 483f95b..0000000
--- a/egs/aishell2/transformer/utils/run.pl
+++ /dev/null
@@ -1,356 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# In general, doing
-#  run.pl some.log a b c is like running the command a b c in
-# the bash shell, and putting the standard error and output into some.log.
-# To run parallel jobs (backgrounded on the host machine), you can do (e.g.)
-#  run.pl JOB=1:4 some.JOB.log a b c JOB is like running the command a b c JOB
-# and putting it in some.JOB.log, for each one. [Note: JOB can be any identifier].
-# If any of the jobs fails, this script will fail.
-
-# A typical example is:
-#  run.pl some.log my-prog "--opt=foo bar" foo \|  other-prog baz
-# and run.pl will run something like:
-# ( my-prog '--opt=foo bar' foo |  other-prog baz ) >& some.log
-#
-# Basically it takes the command-line arguments, quotes them
-# as necessary to preserve spaces, and evaluates them with bash.
-# In addition it puts the command line at the top of the log, and
-# the start and end times of the command at the beginning and end.
-# The reason why this is useful is so that we can create a different
-# version of this program that uses a queueing system instead.
-
-#use Data::Dumper;
-
-@ARGV < 2 && die "usage: run.pl log-file command-line arguments...";
-
-#print STDERR "COMMAND-LINE: " .  Dumper(\@ARGV) . "\n";
-$job_pick = 'all';
-$max_jobs_run = -1;
-$jobstart = 1;
-$jobend = 1;
-$ignored_opts = ""; # These will be ignored.
-
-# First parse an option like JOB=1:4, and any
-# options that would normally be given to
-# queue.pl, which we will just discard.
-
-for (my $x = 1; $x <= 2; $x++) { # This for-loop is to
-  # allow the JOB=1:n option to be interleaved with the
-  # options to qsub.
-  while (@ARGV >= 2 && $ARGV[0] =~ m:^-:) {
-    # parse any options that would normally go to qsub, but which will be ignored here.
-    my $switch = shift @ARGV;
-    if ($switch eq "-V") {
-      $ignored_opts .= "-V ";
-    } elsif ($switch eq "--max-jobs-run" || $switch eq "-tc") {
-      # we do support the option --max-jobs-run n, and its GridEngine form -tc n.
-      # if the command appears multiple times uses the smallest option.
-      if ( $max_jobs_run <= 0 ) {
-          $max_jobs_run =  shift @ARGV;
-      } else {
-        my $new_constraint = shift @ARGV;
-        if ( ($new_constraint < $max_jobs_run) ) {
-          $max_jobs_run = $new_constraint;
-        }
-      }
-      
-      if (! ($max_jobs_run > 0)) {
-        die "run.pl: invalid option --max-jobs-run $max_jobs_run";
-      }
-    } else {
-      my $argument = shift @ARGV;
-      if ($argument =~ m/^--/) {
-        print STDERR "run.pl: WARNING: suspicious argument '$argument' to $switch; starts with '-'\n";
-      }
-      if ($switch eq "-sync" && $argument =~ m/^[yY]/) {
-        $ignored_opts .= "-sync "; # Note: in the
-        # corresponding code in queue.pl it says instead, just "$sync = 1;".
-      } elsif ($switch eq "-pe") { # e.g. -pe smp 5
-        my $argument2 = shift @ARGV;
-        $ignored_opts .= "$switch $argument $argument2 ";
-      } elsif ($switch eq "--gpu") {
-        $using_gpu = $argument;
-      } elsif ($switch eq "--pick") {
-        if($argument =~ m/^(all|failed|incomplete)$/) {
-          $job_pick = $argument;
-        } else {
-          print STDERR "run.pl: ERROR: --pick argument must be one of 'all', 'failed' or 'incomplete'"
-        }
-      } else {
-        # Ignore option.
-        $ignored_opts .= "$switch $argument ";
-      }
-    }
-  }
-  if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) { # e.g. JOB=1:20
-    $jobname = $1;
-    $jobstart = $2;
-    $jobend = $3;
-    if ($jobstart > $jobend) {
-      die "run.pl: invalid job range $ARGV[0]";
-    }
-    if ($jobstart <= 0) {
-      die "run.pl: invalid job range $ARGV[0], start must be strictly positive (this is required for GridEngine compatibility).";
-    }
-    shift;
-  } elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
-    $jobname = $1;
-    $jobstart = $2;
-    $jobend = $2;
-    shift;
-  } elsif ($ARGV[0] =~ m/.+\=.*\:.*$/) {
-    print STDERR "run.pl: Warning: suspicious first argument to run.pl: $ARGV[0]\n";
-  }
-}
-
-# Users found this message confusing so we are removing it.
-# if ($ignored_opts ne "") {
-#   print STDERR "run.pl: Warning: ignoring options \"$ignored_opts\"\n";
-# }
-
-if ($max_jobs_run == -1) { # If --max-jobs-run option not set,
-                           # then work out the number of processors if possible,
-                           # and set it based on that.
-  $max_jobs_run = 0;
-  if ($using_gpu) {
-    if (open(P, "nvidia-smi -L |")) {
-      $max_jobs_run++ while (<P>);
-      close(P);
-    }
-    if ($max_jobs_run == 0) {
-      $max_jobs_run = 1;
-      print STDERR "run.pl: Warning: failed to detect number of GPUs from nvidia-smi, using ${max_jobs_run}\n";
-    }
-  } elsif (open(P, "</proc/cpuinfo")) {  # Linux
-    while (<P>) { if (m/^processor/) { $max_jobs_run++; } }
-    if ($max_jobs_run == 0) {
-      print STDERR "run.pl: Warning: failed to detect any processors from /proc/cpuinfo\n";
-      $max_jobs_run = 10;  # reasonable default.
-    }
-    close(P);
-  } elsif (open(P, "sysctl -a |")) {  # BSD/Darwin
-    while (<P>) {
-      if (m/hw\.ncpu\s*[:=]\s*(\d+)/) { # hw.ncpu = 4, or hw.ncpu: 4
-        $max_jobs_run = $1;
-        last;
-      }
-    }
-    close(P);
-    if ($max_jobs_run == 0) {
-      print STDERR "run.pl: Warning: failed to detect any processors from sysctl -a\n";
-      $max_jobs_run = 10;  # reasonable default.
-    }
-  } else {
-    # allow at most 32 jobs at once, on non-UNIX systems; change this code
-    # if you need to change this default.
-    $max_jobs_run = 32;
-  }
-  # The just-computed value of $max_jobs_run is just the number of processors
-  # (or our best guess); and if it happens that the number of jobs we need to
-  # run is just slightly above $max_jobs_run, it will make sense to increase
-  # $max_jobs_run to equal the number of jobs, so we don't have a small number
-  # of leftover jobs.
-  $num_jobs = $jobend - $jobstart + 1;
-  if (!$using_gpu &&
-      $num_jobs > $max_jobs_run && $num_jobs < 1.4 * $max_jobs_run) {
-    $max_jobs_run = $num_jobs;
-  }
-}
-
-sub pick_or_exit {
-  # pick_or_exit ( $logfile ) 
-  # Invoked before each job is started helps to run jobs selectively.
-  #
-  # Given the name of the output logfile decides whether the job must be 
-  # executed (by returning from the subroutine) or not (by terminating the
-  # process calling exit)
-  # 
-  # PRE: $job_pick is a global variable set by command line switch --pick
-  #      and indicates which class of jobs must be executed.
-  #
-  # 1) If a failed job is not executed the process exit code will indicate 
-  #    failure, just as if the task was just executed  and failed.
-  #
-  # 2) If a task is incomplete it will be executed. Incomplete may be either
-  #    a job whose log file does not contain the accounting notes in the end,
-  #    or a job whose log file does not exist.
-  #
-  # 3) If the $job_pick is set to 'all' (default behavior) a task will be
-  #    executed regardless of the result of previous attempts.
-  #
-  # This logic could have been implemented in the main execution loop
-  # but a subroutine to preserve the current level of readability of
-  # that part of the code.
-  #
-  # Alexandre Felipe, (o.alexandre.felipe@gmail.com) 14th of August of 2020
-  #
-  if($job_pick eq 'all'){
-    return; # no need to bother with the previous log
-  }
-  open my $fh, "<", $_[0] or return; # job not executed yet
-  my $log_line;
-  my $cur_line;
-  while ($cur_line = <$fh>) {
-    if( $cur_line =~ m/# Ended \(code .*/ ) {
-      $log_line = $cur_line;
-    }
-  }
-  close $fh;
-  if (! defined($log_line)){
-    return; # incomplete
-  }
-  if ( $log_line =~ m/# Ended \(code 0\).*/ ) {
-    exit(0); # complete
-  } elsif ( $log_line =~ m/# Ended \(code \d+(; signal \d+)?\).*/ ){
-    if ($job_pick !~ m/^(failed|all)$/) {
-      exit(1); # failed but not going to run
-    } else {
-      return; # failed
-    }
-  } elsif ( $log_line =~ m/.*\S.*/ ) {
-    return; # incomplete jobs are always run
-  }
-}
-
-
-$logfile = shift @ARGV;
-
-if (defined $jobname && $logfile !~ m/$jobname/ &&
-    $jobend > $jobstart) {
-  print STDERR "run.pl: you are trying to run a parallel job but "
-    . "you are putting the output into just one log file ($logfile)\n";
-  exit(1);
-}
-
-$cmd = "";
-
-foreach $x (@ARGV) {
-    if ($x =~ m/^\S+$/) { $cmd .=  $x . " "; }
-    elsif ($x =~ m:\":) { $cmd .= "'$x' "; }
-    else { $cmd .= "\"$x\" "; }
-}
-
-#$Data::Dumper::Indent=0;
-$ret = 0;
-$numfail = 0;
-%active_pids=();
-
-use POSIX ":sys_wait_h";
-for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
-  if (scalar(keys %active_pids) >= $max_jobs_run) {
-
-    # Lets wait for a change in any child's status
-    # Then we have to work out which child finished
-    $r = waitpid(-1, 0);
-    $code = $?;
-    if ($r < 0 ) { die "run.pl: Error waiting for child process"; } # should never happen.
-    if ( defined $active_pids{$r} ) {
-        $jid=$active_pids{$r};
-        $fail[$jid]=$code;
-        if ($code !=0) { $numfail++;}
-        delete $active_pids{$r};
-        # print STDERR "Finished: $r/$jid " .  Dumper(\%active_pids) . "\n";
-    } else {
-        die "run.pl: Cannot find the PID of the child process that just finished.";
-    }
-
-    # In theory we could do a non-blocking waitpid over all jobs running just
-    # to find out if only one or more jobs finished during the previous waitpid()
-    # However, we just omit this and will reap the next one in the next pass
-    # through the for(;;) cycle
-  }
-  $childpid = fork();
-  if (!defined $childpid) { die "run.pl: Error forking in run.pl (writing to $logfile)"; }
-  if ($childpid == 0) { # We're in the child... this branch
-    # executes the job and returns (possibly with an error status).
-    if (defined $jobname) {
-      $cmd =~ s/$jobname/$jobid/g;
-      $logfile =~ s/$jobname/$jobid/g;
-    }
-    # exit if the job does not need to be executed
-    pick_or_exit( $logfile );
-
-    system("mkdir -p `dirname $logfile` 2>/dev/null");
-    open(F, ">$logfile") || die "run.pl: Error opening log file $logfile";
-    print F "# " . $cmd . "\n";
-    print F "# Started at " . `date`;
-    $starttime = `date +'%s'`;
-    print F "#\n";
-    close(F);
-
-    # Pipe into bash.. make sure we're not using any other shell.
-    open(B, "|bash") || die "run.pl: Error opening shell command";
-    print B "( " . $cmd . ") 2>>$logfile >> $logfile";
-    close(B);                   # If there was an error, exit status is in $?
-    $ret = $?;
-
-    $lowbits = $ret & 127;
-    $highbits = $ret >> 8;
-    if ($lowbits != 0) { $return_str = "code $highbits; signal $lowbits" }
-    else { $return_str = "code $highbits"; }
-
-    $endtime = `date +'%s'`;
-    open(F, ">>$logfile") || die "run.pl: Error opening log file $logfile (again)";
-    $enddate = `date`;
-    chop $enddate;
-    print F "# Accounting: time=" . ($endtime - $starttime) . " threads=1\n";
-    print F "# Ended ($return_str) at " . $enddate . ", elapsed time " . ($endtime-$starttime) . " seconds\n";
-    close(F);
-    exit($ret == 0 ? 0 : 1);
-  } else {
-    $pid[$jobid] = $childpid;
-    $active_pids{$childpid} = $jobid;
-    # print STDERR "Queued: " .  Dumper(\%active_pids) . "\n";
-  }
-}
-
-# Now we have submitted all the jobs, lets wait until all the jobs finish
-foreach $child (keys %active_pids) {
-    $jobid=$active_pids{$child};
-    $r = waitpid($pid[$jobid], 0);
-    $code = $?;
-    if ($r == -1) { die "run.pl: Error waiting for child process"; } # should never happen.
-    if ($r != 0) { $fail[$jobid]=$code; $numfail++ if $code!=0; } # Completed successfully
-}
-
-# Some sanity checks:
-# The $fail array should not contain undefined codes
-# The number of non-zeros in that array  should be equal to $numfail
-# We cannot do foreach() here, as the JOB ids do not start at zero
-$failed_jids=0;
-for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
-  $job_return = $fail[$jobid];
-  if (not defined $job_return ) {
-    # print Dumper(\@fail);
-
-    die "run.pl: Sanity check failed: we have indication that some jobs are running " .
-      "even after we waited for all jobs to finish" ;
-  }
-  if ($job_return != 0 ){ $failed_jids++;}
-}
-if ($failed_jids != $numfail) {
-  die "run.pl: Sanity check failed: cannot find out how many jobs failed ($failed_jids x $numfail)."
-}
-if ($numfail > 0) { $ret = 1; }
-
-if ($ret != 0) {
-  $njobs = $jobend - $jobstart + 1;
-  if ($njobs == 1) {
-    if (defined $jobname) {
-      $logfile =~ s/$jobname/$jobstart/; # only one numbered job, so replace name with
-                                         # that job.
-    }
-    print STDERR "run.pl: job failed, log is in $logfile\n";
-    if ($logfile =~ m/JOB/) {
-      print STDERR "run.pl: probably you forgot to put JOB=1:\$nj in your script.";
-    }
-  }
-  else {
-    $logfile =~ s/$jobname/*/g;
-    print STDERR "run.pl: $numfail / $njobs failed, log is in $logfile\n";
-  }
-}
-
-
-exit ($ret);
diff --git a/egs/aishell2/transformer/utils/shuffle_list.pl b/egs/aishell2/transformer/utils/shuffle_list.pl
deleted file mode 100755
index a116200..0000000
--- a/egs/aishell2/transformer/utils/shuffle_list.pl
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env perl
-
-# Copyright 2013  Johns Hopkins University (author: Daniel Povey)
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-if ($ARGV[0] eq "--srand") {
-  $n = $ARGV[1];
-  $n =~ m/\d+/ || die "Bad argument to --srand option: \"$n\"";
-  srand($ARGV[1]);
-  shift;
-  shift;
-} else {
-  srand(0); # Gives inconsistent behavior if we don't seed.
-}
-
-if (@ARGV > 1 || $ARGV[0] =~ m/^-.+/) { # >1 args, or an option we
-  # don't understand.
-  print "Usage: shuffle_list.pl [--srand N] [input file]  > output\n";
-  print "randomizes the order of lines of input.\n";
-  exit(1);
-}
-
-@lines;
-while (<>) {
-  push @lines, [ (rand(), $_)] ;
-}
-
-@lines = sort { $a->[0] cmp $b->[0] } @lines;
-foreach $l (@lines) {
-    print $l->[1];
-}
\ No newline at end of file
diff --git a/egs/aishell2/transformer/utils/split_data.py b/egs/aishell2/transformer/utils/split_data.py
deleted file mode 100755
index 060eae6..0000000
--- a/egs/aishell2/transformer/utils/split_data.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import os
-import sys
-import random
-
-
-in_dir = sys.argv[1]
-out_dir = sys.argv[2]
-num_split = sys.argv[3]
-
-
-def split_scp(scp, num):
-    assert len(scp) >= num
-    avg = len(scp) // num
-    out = []
-    begin = 0
-
-    for i in range(num):
-        if i == num - 1:
-            out.append(scp[begin:])
-        else:
-            out.append(scp[begin:begin+avg])
-        begin += avg
-
-    return out
-
-
-os.path.exists("{}/wav.scp".format(in_dir))
-os.path.exists("{}/text".format(in_dir))
-
-with open("{}/wav.scp".format(in_dir), 'r') as infile:
-    wav_list = infile.readlines()
-
-with open("{}/text".format(in_dir), 'r') as infile:
-    text_list = infile.readlines()
-
-assert len(wav_list) == len(text_list)
-
-x = list(zip(wav_list, text_list))
-random.shuffle(x)
-wav_shuffle_list, text_shuffle_list = zip(*x)
-
-num_split = int(num_split)
-wav_split_list = split_scp(wav_shuffle_list, num_split)
-text_split_list = split_scp(text_shuffle_list, num_split)
-
-for idx, wav_list in enumerate(wav_split_list, 1):
-    path = out_dir + "/split" + str(num_split) + "/" + str(idx)
-    if not os.path.exists(path):
-        os.makedirs(path)
-    with open("{}/wav.scp".format(path), 'w') as wav_writer:
-        for line in wav_list:
-            wav_writer.write(line)
-
-for idx, text_list in enumerate(text_split_list, 1):
-    path = out_dir + "/split" + str(num_split) + "/" + str(idx)
-    if not os.path.exists(path):
-        os.makedirs(path)
-    with open("{}/text".format(path), 'w') as text_writer:
-        for line in text_list:
-            text_writer.write(line)
diff --git a/egs/aishell2/transformer/utils/split_scp.pl b/egs/aishell2/transformer/utils/split_scp.pl
deleted file mode 100755
index 0876dcb..0000000
--- a/egs/aishell2/transformer/utils/split_scp.pl
+++ /dev/null
@@ -1,246 +0,0 @@
-#!/usr/bin/env perl
-
-# Copyright 2010-2011 Microsoft Corporation
-
-# See ../../COPYING for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This program splits up any kind of .scp or archive-type file.
-# If there is no utt2spk option it will work on any text  file and
-# will split it up with an approximately equal number of lines in
-# each but.
-# With the --utt2spk option it will work on anything that has the
-# utterance-id as the first entry on each line; the utt2spk file is
-# of the form "utterance speaker" (on each line).
-# It splits it into equal size chunks as far as it can.  If you use the utt2spk
-# option it will make sure these chunks coincide with speaker boundaries.  In
-# this case, if there are more chunks than speakers (and in some other
-# circumstances), some of the resulting chunks will be empty and it will print
-# an error message and exit with nonzero status.
-# You will normally call this like:
-# split_scp.pl scp scp.1 scp.2 scp.3 ...
-# or
-# split_scp.pl --utt2spk=utt2spk scp scp.1 scp.2 scp.3 ...
-# Note that you can use this script to split the utt2spk file itself,
-# e.g. split_scp.pl --utt2spk=utt2spk utt2spk utt2spk.1 utt2spk.2 ...
-
-# You can also call the scripts like:
-# split_scp.pl -j 3 0 scp scp.0
-# [note: with this option, it assumes zero-based indexing of the split parts,
-# i.e. the second number must be 0 <= n < num-jobs.]
-
-use warnings;
-
-$num_jobs = 0;
-$job_id = 0;
-$utt2spk_file = "";
-$one_based = 0;
-
-for ($x = 1; $x <= 3 && @ARGV > 0; $x++) {
-    if ($ARGV[0] eq "-j") {
-        shift @ARGV;
-        $num_jobs = shift @ARGV;
-        $job_id = shift @ARGV;
-    }
-    if ($ARGV[0] =~ /--utt2spk=(.+)/) {
-        $utt2spk_file=$1;
-        shift;
-    }
-    if ($ARGV[0] eq '--one-based') {
-        $one_based = 1;
-        shift @ARGV;
-    }
-}
-
-if ($num_jobs != 0 && ($num_jobs < 0 || $job_id - $one_based < 0 ||
-                       $job_id - $one_based >= $num_jobs)) {
-  die "$0: Invalid job number/index values for '-j $num_jobs $job_id" .
-      ($one_based ? " --one-based" : "") . "'\n"
-}
-
-$one_based
-    and $job_id--;
-
-if(($num_jobs == 0 && @ARGV < 2) || ($num_jobs > 0 && (@ARGV < 1 || @ARGV > 2))) {
-    die
-"Usage: split_scp.pl [--utt2spk=<utt2spk_file>] in.scp out1.scp out2.scp ...
-   or: split_scp.pl -j num-jobs job-id [--one-based] [--utt2spk=<utt2spk_file>] in.scp [out.scp]
- ... where 0 <= job-id < num-jobs, or 1 <= job-id <- num-jobs if --one-based.\n";
-}
-
-$error = 0;
-$inscp = shift @ARGV;
-if ($num_jobs == 0) { # without -j option
-    @OUTPUTS = @ARGV;
-} else {
-    for ($j = 0; $j < $num_jobs; $j++) {
-        if ($j == $job_id) {
-            if (@ARGV > 0) { push @OUTPUTS, $ARGV[0]; }
-            else { push @OUTPUTS, "-"; }
-        } else {
-            push @OUTPUTS, "/dev/null";
-        }
-    }
-}
-
-if ($utt2spk_file ne "") {  # We have the --utt2spk option...
-    open($u_fh, '<', $utt2spk_file) || die "$0: Error opening utt2spk file $utt2spk_file: $!\n";
-    while(<$u_fh>) {
-        @A = split;
-        @A == 2 || die "$0: Bad line $_ in utt2spk file $utt2spk_file\n";
-        ($u,$s) = @A;
-        $utt2spk{$u} = $s;
-    }
-    close $u_fh;
-    open($i_fh, '<', $inscp) || die "$0: Error opening input scp file $inscp: $!\n";
-    @spkrs = ();
-    while(<$i_fh>) {
-        @A = split;
-        if(@A == 0) { die "$0: Empty or space-only line in scp file $inscp\n"; }
-        $u = $A[0];
-        $s = $utt2spk{$u};
-        defined $s || die "$0: No utterance $u in utt2spk file $utt2spk_file\n";
-        if(!defined $spk_count{$s}) {
-            push @spkrs, $s;
-            $spk_count{$s} = 0;
-            $spk_data{$s} = [];  # ref to new empty array.
-        }
-        $spk_count{$s}++;
-        push @{$spk_data{$s}}, $_;
-    }
-    # Now split as equally as possible ..
-    # First allocate spks to files by allocating an approximately
-    # equal number of speakers.
-    $numspks = @spkrs;  # number of speakers.
-    $numscps = @OUTPUTS; # number of output files.
-    if ($numspks < $numscps) {
-      die "$0: Refusing to split data because number of speakers $numspks " .
-          "is less than the number of output .scp files $numscps\n";
-    }
-    for($scpidx = 0; $scpidx < $numscps; $scpidx++) {
-        $scparray[$scpidx] = []; # [] is array reference.
-    }
-    for ($spkidx = 0; $spkidx < $numspks; $spkidx++) {
-        $scpidx = int(($spkidx*$numscps) / $numspks);
-        $spk = $spkrs[$spkidx];
-        push @{$scparray[$scpidx]}, $spk;
-        $scpcount[$scpidx] += $spk_count{$spk};
-    }
-
-    # Now will try to reassign beginning + ending speakers
-    # to different scp's and see if it gets more balanced.
-    # Suppose objf we're minimizing is sum_i (num utts in scp[i] - average)^2.
-    # We can show that if considering changing just 2 scp's, we minimize
-    # this by minimizing the squared difference in sizes.  This is
-    # equivalent to minimizing the absolute difference in sizes.  This
-    # shows this method is bound to converge.
-
-    $changed = 1;
-    while($changed) {
-        $changed = 0;
-        for($scpidx = 0; $scpidx < $numscps; $scpidx++) {
-            # First try to reassign ending spk of this scp.
-            if($scpidx < $numscps-1) {
-                $sz = @{$scparray[$scpidx]};
-                if($sz > 0) {
-                    $spk = $scparray[$scpidx]->[$sz-1];
-                    $count = $spk_count{$spk};
-                    $nutt1 = $scpcount[$scpidx];
-                    $nutt2 = $scpcount[$scpidx+1];
-                    if( abs( ($nutt2+$count) - ($nutt1-$count))
-                        < abs($nutt2 - $nutt1))  { # Would decrease
-                        # size-diff by reassigning spk...
-                        $scpcount[$scpidx+1] += $count;
-                        $scpcount[$scpidx] -= $count;
-                        pop @{$scparray[$scpidx]};
-                        unshift @{$scparray[$scpidx+1]}, $spk;
-                        $changed = 1;
-                    }
-                }
-            }
-            if($scpidx > 0 && @{$scparray[$scpidx]} > 0) {
-                $spk = $scparray[$scpidx]->[0];
-                $count = $spk_count{$spk};
-                $nutt1 = $scpcount[$scpidx-1];
-                $nutt2 = $scpcount[$scpidx];
-                if( abs( ($nutt2-$count) - ($nutt1+$count))
-                    < abs($nutt2 - $nutt1))  { # Would decrease
-                    # size-diff by reassigning spk...
-                    $scpcount[$scpidx-1] += $count;
-                    $scpcount[$scpidx] -= $count;
-                    shift @{$scparray[$scpidx]};
-                    push @{$scparray[$scpidx-1]}, $spk;
-                    $changed = 1;
-                }
-            }
-        }
-    }
-    # Now print out the files...
-    for($scpidx = 0; $scpidx < $numscps; $scpidx++) {
-        $scpfile = $OUTPUTS[$scpidx];
-        ($scpfile ne '-' ? open($f_fh, '>', $scpfile)
-                         : open($f_fh, '>&', \*STDOUT)) ||
-            die "$0: Could not open scp file $scpfile for writing: $!\n";
-        $count = 0;
-        if(@{$scparray[$scpidx]} == 0) {
-            print STDERR "$0: eError: split_scp.pl producing empty .scp file " .
-                         "$scpfile (too many splits and too few speakers?)\n";
-            $error = 1;
-        } else {
-            foreach $spk ( @{$scparray[$scpidx]} ) {
-                print $f_fh @{$spk_data{$spk}};
-                $count += $spk_count{$spk};
-            }
-            $count == $scpcount[$scpidx] || die "Count mismatch [code error]";
-        }
-        close($f_fh);
-    }
-} else {
-   # This block is the "normal" case where there is no --utt2spk
-   # option and we just break into equal size chunks.
-
-    open($i_fh, '<', $inscp) || die "$0: Error opening input scp file $inscp: $!\n";
-
-    $numscps = @OUTPUTS;  # size of array.
-    @F = ();
-    while(<$i_fh>) {
-        push @F, $_;
-    }
-    $numlines = @F;
-    if($numlines == 0) {
-        print STDERR "$0: error: empty input scp file $inscp\n";
-        $error = 1;
-    }
-    $linesperscp = int( $numlines / $numscps); # the "whole part"..
-    $linesperscp >= 1 || die "$0: You are splitting into too many pieces! [reduce \$nj ($numscps) to be smaller than the number of lines ($numlines) in $inscp]\n";
-    $remainder = $numlines - ($linesperscp * $numscps);
-    ($remainder >= 0 && $remainder < $numlines) || die "bad remainder $remainder";
-    # [just doing int() rounds down].
-    $n = 0;
-    for($scpidx = 0; $scpidx < @OUTPUTS; $scpidx++) {
-        $scpfile = $OUTPUTS[$scpidx];
-        ($scpfile ne '-' ? open($o_fh, '>', $scpfile)
-                         : open($o_fh, '>&', \*STDOUT)) ||
-            die "$0: Could not open scp file $scpfile for writing: $!\n";
-        for($k = 0; $k < $linesperscp + ($scpidx < $remainder ? 1 : 0); $k++) {
-            print $o_fh $F[$n++];
-        }
-        close($o_fh) || die "$0: Eror closing scp file $scpfile: $!\n";
-    }
-    $n == $numlines || die "$n != $numlines [code error]";
-}
-
-exit ($error);
diff --git a/egs/aishell2/transformer/utils/subset_data_dir_tr_cv.sh b/egs/aishell2/transformer/utils/subset_data_dir_tr_cv.sh
deleted file mode 100755
index e16cebd..0000000
--- a/egs/aishell2/transformer/utils/subset_data_dir_tr_cv.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env bash
-
-dev_num_utt=1000
-
-echo "$0 $@"
-. utils/parse_options.sh || exit 1;
-
-train_data=$1
-out_dir=$2
-
-[ ! -f ${train_data}/wav.scp ] && echo "$0: no such file ${train_data}/wav.scp" && exit 1;
-[ ! -f ${train_data}/text ] && echo "$0: no such file ${train_data}/text" && exit 1;
-
-mkdir -p ${out_dir}/train && mkdir -p ${out_dir}/dev
-
-cp ${train_data}/wav.scp ${out_dir}/train/wav.scp.bak
-cp ${train_data}/text ${out_dir}/train/text.bak
-
-num_utt=$(wc -l <${out_dir}/train/wav.scp.bak)
-
-utils/shuffle_list.pl --srand 1 ${out_dir}/train/wav.scp.bak > ${out_dir}/train/wav.scp.shuf
-head -n ${dev_num_utt} ${out_dir}/train/wav.scp.shuf > ${out_dir}/dev/wav.scp
-tail -n $((${num_utt}-${dev_num_utt})) ${out_dir}/train/wav.scp.shuf > ${out_dir}/train/wav.scp
-
-utils/shuffle_list.pl --srand 1 ${out_dir}/train/text.bak > ${out_dir}/train/text.shuf
-head -n ${dev_num_utt} ${out_dir}/train/text.shuf > ${out_dir}/dev/text
-tail -n $((${num_utt}-${dev_num_utt})) ${out_dir}/train/text.shuf > ${out_dir}/train/text
-
-rm ${out_dir}/train/wav.scp.bak ${out_dir}/train/text.bak
-rm ${out_dir}/train/wav.scp.shuf ${out_dir}/train/text.shuf
diff --git a/egs/aishell2/transformer/utils/text2token.py b/egs/aishell2/transformer/utils/text2token.py
deleted file mode 100755
index 56c3913..0000000
--- a/egs/aishell2/transformer/utils/text2token.py
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
-#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-
-import argparse
-import codecs
-import re
-import sys
-
-is_python2 = sys.version_info[0] == 2
-
-
-def exist_or_not(i, match_pos):
-    start_pos = None
-    end_pos = None
-    for pos in match_pos:
-        if pos[0] <= i < pos[1]:
-            start_pos = pos[0]
-            end_pos = pos[1]
-            break
-
-    return start_pos, end_pos
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description="convert raw text to tokenized text",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument(
-        "--nchar",
-        "-n",
-        default=1,
-        type=int,
-        help="number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2",
-    )
-    parser.add_argument(
-        "--skip-ncols", "-s", default=0, type=int, help="skip first n columns"
-    )
-    parser.add_argument("--space", default="<space>", type=str, help="space symbol")
-    parser.add_argument(
-        "--non-lang-syms",
-        "-l",
-        default=None,
-        type=str,
-        help="list of non-linguistic symobles, e.g., <NOISE> etc.",
-    )
-    parser.add_argument("text", type=str, default=False, nargs="?", help="input text")
-    parser.add_argument(
-        "--trans_type",
-        "-t",
-        type=str,
-        default="char",
-        choices=["char", "phn"],
-        help="""Transcript type. char/phn. e.g., for TIMIT FADG0_SI1279 -
-                        If trans_type is char,
-                        read from SI1279.WRD file -> "bricks are an alternative"
-                        Else if trans_type is phn,
-                        read from SI1279.PHN file -> "sil b r ih sil k s aa r er n aa l
-                        sil t er n ih sil t ih v sil" """,
-    )
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    rs = []
-    if args.non_lang_syms is not None:
-        with codecs.open(args.non_lang_syms, "r", encoding="utf-8") as f:
-            nls = [x.rstrip() for x in f.readlines()]
-            rs = [re.compile(re.escape(x)) for x in nls]
-
-    if args.text:
-        f = codecs.open(args.text, encoding="utf-8")
-    else:
-        f = codecs.getreader("utf-8")(sys.stdin if is_python2 else sys.stdin.buffer)
-
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer
-    )
-    line = f.readline()
-    n = args.nchar
-    while line:
-        x = line.split()
-        print(" ".join(x[: args.skip_ncols]), end=" ")
-        a = " ".join(x[args.skip_ncols :])
-
-        # get all matched positions
-        match_pos = []
-        for r in rs:
-            i = 0
-            while i >= 0:
-                m = r.search(a, i)
-                if m:
-                    match_pos.append([m.start(), m.end()])
-                    i = m.end()
-                else:
-                    break
-
-        if args.trans_type == "phn":
-            a = a.split(" ")
-        else:
-            if len(match_pos) > 0:
-                chars = []
-                i = 0
-                while i < len(a):
-                    start_pos, end_pos = exist_or_not(i, match_pos)
-                    if start_pos is not None:
-                        chars.append(a[start_pos:end_pos])
-                        i = end_pos
-                    else:
-                        chars.append(a[i])
-                        i += 1
-                a = chars
-
-            a = [a[j : j + n] for j in range(0, len(a), n)]
-
-        a_flat = []
-        for z in a:
-            a_flat.append("".join(z))
-
-        a_chars = [z.replace(" ", args.space) for z in a_flat]
-        if args.trans_type == "phn":
-            a_chars = [z.replace("sil", args.space) for z in a_chars]
-        print(" ".join(a_chars))
-        line = f.readline()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/egs/aishell2/transformer/utils/text_tokenize.py b/egs/aishell2/transformer/utils/text_tokenize.py
deleted file mode 100755
index 962ea11..0000000
--- a/egs/aishell2/transformer/utils/text_tokenize.py
+++ /dev/null
@@ -1,106 +0,0 @@
-import re
-import argparse
-
-
-def load_dict(seg_file):
-    seg_dict = {}
-    with open(seg_file, 'r') as infile:
-        for line in infile:
-            s = line.strip().split()
-            key = s[0]
-            value = s[1:]
-            seg_dict[key] = " ".join(value)
-    return seg_dict
-
-
-def forward_segment(text, dic):
-    word_list = []
-    i = 0
-    while i < len(text):
-        longest_word = text[i]
-        for j in range(i + 1, len(text) + 1):
-            word = text[i:j]
-            if word in dic:
-                if len(word) > len(longest_word):
-                    longest_word = word
-        word_list.append(longest_word)
-        i += len(longest_word)
-    return word_list
-
-
-def tokenize(txt,
-             seg_dict):
-    out_txt = ""
-    pattern = re.compile(r"([\u4E00-\u9FA5A-Za-z0-9])")
-    for word in txt:
-        if pattern.match(word):
-            if word in seg_dict:
-                out_txt += seg_dict[word] + " "
-            else:
-                out_txt += "<unk>" + " "
-        else:
-            continue
-    return out_txt.strip()
-
-
-def get_parser():
-    parser = argparse.ArgumentParser(
-        description="text tokenize",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument(
-        "--text-file",
-        "-t",
-        default=False,
-        required=True,
-        type=str,
-        help="input text",
-    )
-    parser.add_argument(
-        "--seg-file",
-        "-s",
-        default=False,
-        required=True,
-        type=str,
-        help="seg file",
-    )
-    parser.add_argument(
-        "--txt-index",
-        "-i",
-        default=1,
-        required=True,
-        type=int,
-        help="txt index",
-    )
-    parser.add_argument(
-        "--output-dir",
-        "-o",
-        default=False,
-        required=True,
-        type=str,
-        help="output dir",
-    )
-    return parser
-
-
-def main():
-    parser = get_parser()
-    args = parser.parse_args()
-
-    txt_writer = open("{}/text.{}.txt".format(args.output_dir, args.txt_index), 'w')
-    shape_writer = open("{}/len.{}".format(args.output_dir, args.txt_index), 'w')
-    seg_dict = load_dict(args.seg_file)
-    with open(args.text_file, 'r') as infile:
-        for line in infile:
-            s = line.strip().split()
-            text_id = s[0]
-            text_list = forward_segment("".join(s[1:]).lower(), seg_dict)
-            text = tokenize(text_list, seg_dict)
-            lens = len(text.strip().split())
-            txt_writer.write(text_id + " " + text + '\n')
-            shape_writer.write(text_id + " " + str(lens) + '\n')
-
-
-if __name__ == '__main__':
-    main()
-
diff --git a/egs/aishell2/transformer/utils/text_tokenize.sh b/egs/aishell2/transformer/utils/text_tokenize.sh
deleted file mode 100755
index 6b74fef..0000000
--- a/egs/aishell2/transformer/utils/text_tokenize.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env bash
-
-
-# Begin configuration section.
-nj=32
-cmd=utils/run.pl
-
-echo "$0 $@"
-
-. utils/parse_options.sh || exit 1;
-
-# tokenize configuration
-text_dir=$1
-seg_file=$2
-logdir=$3
-output_dir=$4
-
-txt_dir=${output_dir}/txt; mkdir -p ${output_dir}/txt
-mkdir -p ${logdir}
-
-$cmd JOB=1:$nj $logdir/text_tokenize.JOB.log \
-  python utils/text_tokenize.py -t ${text_dir}/txt/text.JOB.txt \
-      -s ${seg_file} -i JOB -o ${txt_dir} \
-      || exit 1;
-
-# concatenate the text files together.
-for n in $(seq $nj); do
-  cat ${txt_dir}/text.$n.txt || exit 1
-done > ${output_dir}/text || exit 1
-
-for n in $(seq $nj); do
-  cat ${txt_dir}/len.$n || exit 1
-done > ${output_dir}/text_shape || exit 1
-
-echo "$0: Succeeded text tokenize"
diff --git a/egs/aishell2/transformer/utils/textnorm_zh.py b/egs/aishell2/transformer/utils/textnorm_zh.py
deleted file mode 100755
index 79feb83..0000000
--- a/egs/aishell2/transformer/utils/textnorm_zh.py
+++ /dev/null
@@ -1,834 +0,0 @@
-#!/usr/bin/env python3
-# coding=utf-8
-
-# Authors:
-#   2019.5 Zhiyang Zhou (https://github.com/Joee1995/chn_text_norm.git)
-#   2019.9 Jiayu DU
-#
-# requirements:
-#   - python 3.X
-# notes: python 2.X WILL fail or produce misleading results
-
-import sys, os, argparse, codecs, string, re
-
-# ================================================================================ #
-#                                    basic constant
-# ================================================================================ #
-CHINESE_DIGIS = u'闆朵竴浜屼笁鍥涗簲鍏竷鍏節'
-BIG_CHINESE_DIGIS_SIMPLIFIED = u'闆跺9璐板弫鑲嗕紞闄嗘煉鎹岀帠'
-BIG_CHINESE_DIGIS_TRADITIONAL = u'闆跺9璨冲弮鑲嗕紞闄告煉鎹岀帠'
-SMALLER_BIG_CHINESE_UNITS_SIMPLIFIED = u'鍗佺櫨鍗冧竾'
-SMALLER_BIG_CHINESE_UNITS_TRADITIONAL = u'鎷句桨浠熻惉'
-LARGER_CHINESE_NUMERING_UNITS_SIMPLIFIED = u'浜垮厗浜灀绉┌娌熸锭姝h浇'
-LARGER_CHINESE_NUMERING_UNITS_TRADITIONAL = u'鍎勫厗浜灀绉┌婧濇緱姝h級'
-SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED = u'鍗佺櫨鍗冧竾'
-SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL = u'鎷句桨浠熻惉'
-
-ZERO_ALT = u'銆�'
-ONE_ALT = u'骞�'
-TWO_ALTS = [u'涓�', u'鍏�']
-
-POSITIVE = [u'姝�', u'姝�']
-NEGATIVE = [u'璐�', u'璨�']
-POINT = [u'鐐�', u'榛�']
-# PLUS = [u'鍔�', u'鍔�']
-# SIL = [u'鏉�', u'妲�']
-
-FILLER_CHARS = ['鍛�', '鍟�']
-ER_WHITELIST = '(鍎垮コ|鍎垮瓙|鍎垮瓩|濂冲効|鍎垮|濡诲効|' \
-             '鑳庡効|濠村効|鏂扮敓鍎縷濠村辜鍎縷骞煎効|灏戝効|灏忓効|鍎挎瓕|鍎跨|鍎跨|鎵樺効鎵�|瀛ゅ効|' \
-             '鍎挎垙|鍎垮寲|鍙板効搴剕楣垮効宀泑姝e効鍏粡|鍚婂効閮庡綋|鐢熷効鑲插コ|鎵樺効甯﹀コ|鍏诲効闃茶�亅鐥村効鍛嗗コ|' \
-             '浣冲効浣冲|鍎挎�滃吔鎵皘鍎挎棤甯哥埗|鍎夸笉瀚屾瘝涓憒鍎胯鍗冮噷姣嶆媴蹇鍎垮ぇ涓嶇敱鐖穦鑻忎篂鍎�)'
-
-# 涓枃鏁板瓧绯荤粺绫诲瀷
-NUMBERING_TYPES = ['low', 'mid', 'high']
-
-CURRENCY_NAMES = '(浜烘皯甯亅缇庡厓|鏃ュ厓|鑻遍晳|娆у厓|椹厠|娉曢儙|鍔犳嬁澶у厓|婢冲厓|娓竵|鍏堜护|鑺叞椹厠|鐖卞皵鍏伴晳|' \
-                 '閲屾媺|鑽峰叞鐩緗鍩冩柉搴撳|姣斿濉攟鍗板凹鐩緗鏋楀悏鐗箌鏂拌タ鍏板厓|姣旂储|鍗㈠竷|鏂板姞鍧″厓|闊╁厓|娉伴摙)'
-CURRENCY_UNITS = '((浜縷鍗冧竾|鐧句竾|涓噟鍗億鐧�)|(浜縷鍗冧竾|鐧句竾|涓噟鍗億鐧緗)鍏億(浜縷鍗冧竾|鐧句竾|涓噟鍗億鐧緗)鍧梶瑙抾姣泑鍒�)'
-COM_QUANTIFIERS = '(鍖箌寮爘搴鍥瀨鍦簗灏緗鏉涓獆棣東闃檤闃祙缃憒鐐畖椤秥涓榺妫祙鍙獆鏀瘄琚瓅杈唡鎸憒鎷厊棰梶澹硘绐爘鏇瞸澧檤缇鑵攟' \
-                  '鐮搴瀹璐瘄鎵巪鎹唡鍒�|浠鎵搢鎵媩缃梶鍧灞眧宀瓅姹焲婧獆閽焲闃焲鍗晐鍙寍瀵箌鍑簗鍙澶磡鑴殀鏉縷璺硘鏋潀浠秥璐磡' \
-                  '閽坾绾縷绠鍚峾浣峾韬珅鍫倈璇緗鏈瑋椤祙瀹秥鎴穦灞倈涓潀姣珅鍘榺鍒唡閽眧涓鏂鎷厊閾鐭硘閽閿眧蹇絴(鍗億姣珅寰�)鍏媩' \
-                  '姣珅鍘榺鍒唡瀵竱灏簗涓坾閲寍瀵粅甯竱閾簗绋媩(鍗億鍒唡鍘榺姣珅寰�)绫硘鎾畖鍕簗鍚坾鍗噟鏂梶鐭硘鐩榺纰梶纰焲鍙爘妗秥绗紎鐩唡' \
-                  '鐩抾鏉瘄閽焲鏂泑閿厊绨媩绡畖鐩榺妗秥缃恷鐡秥澹秥鍗畖鐩弢绠﹟绠眧鐓瞸鍟東琚媩閽祙骞磡鏈坾鏃瀛鍒粅鏃秥鍛▅澶﹟绉抾鍒唡鏃瑋' \
-                  '绾獆宀亅涓東鏇磡澶渱鏄澶弢绉媩鍐瑋浠浼弢杈坾涓竱娉绮抾棰梶骞鍫唡鏉鏍箌鏀瘄閬搢闈鐗噟寮爘棰梶鍧�)'
-
-# punctuation information are based on Zhon project (https://github.com/tsroten/zhon.git)
-CHINESE_PUNC_STOP = '锛侊紵锝°��'
-CHINESE_PUNC_NON_STOP = '锛傦純锛勶紖锛嗭紘锛堬級锛婏紜锛岋紞锛忥細锛涳紲锛濓紴锛狅蓟锛硷冀锛撅伎锝�锝涳綔锝濓綖锝燂綘锝剑锝ゃ�併�冦�嬨�屻�嶃�庛�忋�愩�戙�斻�曘�栥�椼�樸�欍�氥�涖�溿�濄�炪�熴�般�俱�库�撯�斺�樷�欌�涒�溾�濃�炩�熲�︹�э箯'
-CHINESE_PUNC_LIST = CHINESE_PUNC_STOP + CHINESE_PUNC_NON_STOP
-
-# ================================================================================ #
-#                                    basic class
-# ================================================================================ #
-class ChineseChar(object):
-    """
-    涓枃瀛楃
-    姣忎釜瀛楃瀵瑰簲绠�浣撳拰绻佷綋,
-    e.g. 绠�浣� = '璐�', 绻佷綋 = '璨�'
-    杞崲鏃跺彲杞崲涓虹畝浣撴垨绻佷綋
-    """
-
-    def __init__(self, simplified, traditional):
-        self.simplified = simplified
-        self.traditional = traditional
-        #self.__repr__ = self.__str__
-
-    def __str__(self):
-        return self.simplified or self.traditional or None
-
-    def __repr__(self):
-        return self.__str__()
-
-
-class ChineseNumberUnit(ChineseChar):
-    """
-    涓枃鏁板瓧/鏁颁綅瀛楃
-    姣忎釜瀛楃闄ょ箒绠�浣撳杩樻湁涓�涓澶栫殑澶у啓瀛楃
-    e.g. '闄�' 鍜� '闄�'
-    """
-
-    def __init__(self, power, simplified, traditional, big_s, big_t):
-        super(ChineseNumberUnit, self).__init__(simplified, traditional)
-        self.power = power
-        self.big_s = big_s
-        self.big_t = big_t
-
-    def __str__(self):
-        return '10^{}'.format(self.power)
-
-    @classmethod
-    def create(cls, index, value, numbering_type=NUMBERING_TYPES[1], small_unit=False):
-
-        if small_unit:
-            return ChineseNumberUnit(power=index + 1,
-                                     simplified=value[0], traditional=value[1], big_s=value[1], big_t=value[1])
-        elif numbering_type == NUMBERING_TYPES[0]:
-            return ChineseNumberUnit(power=index + 8,
-                                     simplified=value[0], traditional=value[1], big_s=value[0], big_t=value[1])
-        elif numbering_type == NUMBERING_TYPES[1]:
-            return ChineseNumberUnit(power=(index + 2) * 4,
-                                     simplified=value[0], traditional=value[1], big_s=value[0], big_t=value[1])
-        elif numbering_type == NUMBERING_TYPES[2]:
-            return ChineseNumberUnit(power=pow(2, index + 3),
-                                     simplified=value[0], traditional=value[1], big_s=value[0], big_t=value[1])
-        else:
-            raise ValueError(
-                'Counting type should be in {0} ({1} provided).'.format(NUMBERING_TYPES, numbering_type))
-
-
-class ChineseNumberDigit(ChineseChar):
-    """
-    涓枃鏁板瓧瀛楃
-    """
-
-    def __init__(self, value, simplified, traditional, big_s, big_t, alt_s=None, alt_t=None):
-        super(ChineseNumberDigit, self).__init__(simplified, traditional)
-        self.value = value
-        self.big_s = big_s
-        self.big_t = big_t
-        self.alt_s = alt_s
-        self.alt_t = alt_t
-
-    def __str__(self):
-        return str(self.value)
-
-    @classmethod
-    def create(cls, i, v):
-        return ChineseNumberDigit(i, v[0], v[1], v[2], v[3])
-
-
-class ChineseMath(ChineseChar):
-    """
-    涓枃鏁颁綅瀛楃
-    """
-
-    def __init__(self, simplified, traditional, symbol, expression=None):
-        super(ChineseMath, self).__init__(simplified, traditional)
-        self.symbol = symbol
-        self.expression = expression
-        self.big_s = simplified
-        self.big_t = traditional
-
-
-CC, CNU, CND, CM = ChineseChar, ChineseNumberUnit, ChineseNumberDigit, ChineseMath
-
-
-class NumberSystem(object):
-    """
-    涓枃鏁板瓧绯荤粺
-    """
-    pass
-
-
-class MathSymbol(object):
-    """
-    鐢ㄤ簬涓枃鏁板瓧绯荤粺鐨勬暟瀛︾鍙� (绻�/绠�浣�), e.g.
-    positive = ['姝�', '姝�']
-    negative = ['璐�', '璨�']
-    point = ['鐐�', '榛�']
-    """
-
-    def __init__(self, positive, negative, point):
-        self.positive = positive
-        self.negative = negative
-        self.point = point
-
-    def __iter__(self):
-        for v in self.__dict__.values():
-            yield v
-
-
-# class OtherSymbol(object):
-#     """
-#     鍏朵粬绗﹀彿
-#     """
-#
-#     def __init__(self, sil):
-#         self.sil = sil
-#
-#     def __iter__(self):
-#         for v in self.__dict__.values():
-#             yield v
-
-
-# ================================================================================ #
-#                                    basic utils
-# ================================================================================ #
-def create_system(numbering_type=NUMBERING_TYPES[1]):
-    """
-    鏍规嵁鏁板瓧绯荤粺绫诲瀷杩斿洖鍒涘缓鐩稿簲鐨勬暟瀛楃郴缁燂紝榛樿涓� mid
-    NUMBERING_TYPES = ['low', 'mid', 'high']: 涓枃鏁板瓧绯荤粺绫诲瀷
-        low:  '鍏�' = '浜�' * '鍗�' = $10^{9}$,  '浜�' = '鍏�' * '鍗�', etc.
-        mid:  '鍏�' = '浜�' * '涓�' = $10^{12}$, '浜�' = '鍏�' * '涓�', etc.
-        high: '鍏�' = '浜�' * '浜�' = $10^{16}$, '浜�' = '鍏�' * '鍏�', etc.
-    杩斿洖瀵瑰簲鐨勬暟瀛楃郴缁�
-    """
-
-    # chinese number units of '浜�' and larger
-    all_larger_units = zip(
-        LARGER_CHINESE_NUMERING_UNITS_SIMPLIFIED, LARGER_CHINESE_NUMERING_UNITS_TRADITIONAL)
-    larger_units = [CNU.create(i, v, numbering_type, False)
-                    for i, v in enumerate(all_larger_units)]
-    # chinese number units of '鍗�, 鐧�, 鍗�, 涓�'
-    all_smaller_units = zip(
-        SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED, SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL)
-    smaller_units = [CNU.create(i, v, small_unit=True)
-                     for i, v in enumerate(all_smaller_units)]
-    # digis
-    chinese_digis = zip(CHINESE_DIGIS, CHINESE_DIGIS,
-                        BIG_CHINESE_DIGIS_SIMPLIFIED, BIG_CHINESE_DIGIS_TRADITIONAL)
-    digits = [CND.create(i, v) for i, v in enumerate(chinese_digis)]
-    digits[0].alt_s, digits[0].alt_t = ZERO_ALT, ZERO_ALT
-    digits[1].alt_s, digits[1].alt_t = ONE_ALT, ONE_ALT
-    digits[2].alt_s, digits[2].alt_t = TWO_ALTS[0], TWO_ALTS[1]
-
-    # symbols
-    positive_cn = CM(POSITIVE[0], POSITIVE[1], '+', lambda x: x)
-    negative_cn = CM(NEGATIVE[0], NEGATIVE[1], '-', lambda x: -x)
-    point_cn = CM(POINT[0], POINT[1], '.', lambda x,
-                  y: float(str(x) + '.' + str(y)))
-    # sil_cn = CM(SIL[0], SIL[1], '-', lambda x, y: float(str(x) + '-' + str(y)))
-    system = NumberSystem()
-    system.units = smaller_units + larger_units
-    system.digits = digits
-    system.math = MathSymbol(positive_cn, negative_cn, point_cn)
-    # system.symbols = OtherSymbol(sil_cn)
-    return system
-
-
-def chn2num(chinese_string, numbering_type=NUMBERING_TYPES[1]):
-
-    def get_symbol(char, system):
-        for u in system.units:
-            if char in [u.traditional, u.simplified, u.big_s, u.big_t]:
-                return u
-        for d in system.digits:
-            if char in [d.traditional, d.simplified, d.big_s, d.big_t, d.alt_s, d.alt_t]:
-                return d
-        for m in system.math:
-            if char in [m.traditional, m.simplified]:
-                return m
-
-    def string2symbols(chinese_string, system):
-        int_string, dec_string = chinese_string, ''
-        for p in [system.math.point.simplified, system.math.point.traditional]:
-            if p in chinese_string:
-                int_string, dec_string = chinese_string.split(p)
-                break
-        return [get_symbol(c, system) for c in int_string], \
-               [get_symbol(c, system) for c in dec_string]
-
-    def correct_symbols(integer_symbols, system):
-        """
-        涓�鐧惧叓 to 涓�鐧惧叓鍗�
-        涓�浜夸竴鍗冧笁鐧句竾 to 涓�浜� 涓�鍗冧竾 涓夌櫨涓�
-        """
-
-        if integer_symbols and isinstance(integer_symbols[0], CNU):
-            if integer_symbols[0].power == 1:
-                integer_symbols = [system.digits[1]] + integer_symbols
-
-        if len(integer_symbols) > 1:
-            if isinstance(integer_symbols[-1], CND) and isinstance(integer_symbols[-2], CNU):
-                integer_symbols.append(
-                    CNU(integer_symbols[-2].power - 1, None, None, None, None))
-
-        result = []
-        unit_count = 0
-        for s in integer_symbols:
-            if isinstance(s, CND):
-                result.append(s)
-                unit_count = 0
-            elif isinstance(s, CNU):
-                current_unit = CNU(s.power, None, None, None, None)
-                unit_count += 1
-
-            if unit_count == 1:
-                result.append(current_unit)
-            elif unit_count > 1:
-                for i in range(len(result)):
-                    if isinstance(result[-i - 1], CNU) and result[-i - 1].power < current_unit.power:
-                        result[-i - 1] = CNU(result[-i - 1].power +
-                                             current_unit.power, None, None, None, None)
-        return result
-
-    def compute_value(integer_symbols):
-        """
-        Compute the value.
-        When current unit is larger than previous unit, current unit * all previous units will be used as all previous units.
-        e.g. '涓ゅ崈涓�' = 2000 * 10000 not 2000 + 10000
-        """
-        value = [0]
-        last_power = 0
-        for s in integer_symbols:
-            if isinstance(s, CND):
-                value[-1] = s.value
-            elif isinstance(s, CNU):
-                value[-1] *= pow(10, s.power)
-                if s.power > last_power:
-                    value[:-1] = list(map(lambda v: v *
-                                                    pow(10, s.power), value[:-1]))
-                    last_power = s.power
-                value.append(0)
-        return sum(value)
-
-    system = create_system(numbering_type)
-    int_part, dec_part = string2symbols(chinese_string, system)
-    int_part = correct_symbols(int_part, system)
-    int_str = str(compute_value(int_part))
-    dec_str = ''.join([str(d.value) for d in dec_part])
-    if dec_part:
-        return '{0}.{1}'.format(int_str, dec_str)
-    else:
-        return int_str
-
-
-def num2chn(number_string, numbering_type=NUMBERING_TYPES[1], big=False,
-            traditional=False, alt_zero=False, alt_one=False, alt_two=True,
-            use_zeros=True, use_units=True):
-
-    def get_value(value_string, use_zeros=True):
-
-        striped_string = value_string.lstrip('0')
-
-        # record nothing if all zeros
-        if not striped_string:
-            return []
-
-        # record one digits
-        elif len(striped_string) == 1:
-            if use_zeros and len(value_string) != len(striped_string):
-                return [system.digits[0], system.digits[int(striped_string)]]
-            else:
-                return [system.digits[int(striped_string)]]
-
-        # recursively record multiple digits
-        else:
-            result_unit = next(u for u in reversed(
-                system.units) if u.power < len(striped_string))
-            result_string = value_string[:-result_unit.power]
-            return get_value(result_string) + [result_unit] + get_value(striped_string[-result_unit.power:])
-
-    system = create_system(numbering_type)
-
-    int_dec = number_string.split('.')
-    if len(int_dec) == 1:
-        int_string = int_dec[0]
-        dec_string = ""
-    elif len(int_dec) == 2:
-        int_string = int_dec[0]
-        dec_string = int_dec[1]
-    else:
-        raise ValueError(
-            "invalid input num string with more than one dot: {}".format(number_string))
-
-    if use_units and len(int_string) > 1:
-        result_symbols = get_value(int_string)
-    else:
-        result_symbols = [system.digits[int(c)] for c in int_string]
-    dec_symbols = [system.digits[int(c)] for c in dec_string]
-    if dec_string:
-        result_symbols += [system.math.point] + dec_symbols
-
-    if alt_two:
-        liang = CND(2, system.digits[2].alt_s, system.digits[2].alt_t,
-                    system.digits[2].big_s, system.digits[2].big_t)
-        for i, v in enumerate(result_symbols):
-            if isinstance(v, CND) and v.value == 2:
-                next_symbol = result_symbols[i +
-                                             1] if i < len(result_symbols) - 1 else None
-                previous_symbol = result_symbols[i - 1] if i > 0 else None
-                if isinstance(next_symbol, CNU) and isinstance(previous_symbol, (CNU, type(None))):
-                    if next_symbol.power != 1 and ((previous_symbol is None) or (previous_symbol.power != 1)):
-                        result_symbols[i] = liang
-
-    # if big is True, '涓�' will not be used and `alt_two` has no impact on output
-    if big:
-        attr_name = 'big_'
-        if traditional:
-            attr_name += 't'
-        else:
-            attr_name += 's'
-    else:
-        if traditional:
-            attr_name = 'traditional'
-        else:
-            attr_name = 'simplified'
-
-    result = ''.join([getattr(s, attr_name) for s in result_symbols])
-
-    # if not use_zeros:
-    #     result = result.strip(getattr(system.digits[0], attr_name))
-
-    if alt_zero:
-        result = result.replace(
-            getattr(system.digits[0], attr_name), system.digits[0].alt_s)
-
-    if alt_one:
-        result = result.replace(
-            getattr(system.digits[1], attr_name), system.digits[1].alt_s)
-
-    for i, p in enumerate(POINT):
-        if result.startswith(p):
-            return CHINESE_DIGIS[0] + result
-
-    # ^10, 11, .., 19
-    if len(result) >= 2 and result[1] in [SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED[0],
-                                          SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL[0]] and \
-            result[0] in [CHINESE_DIGIS[1], BIG_CHINESE_DIGIS_SIMPLIFIED[1], BIG_CHINESE_DIGIS_TRADITIONAL[1]]:
-        result = result[1:]
-
-    return result
-
-
-# ================================================================================ #
-#                          different types of rewriters
-# ================================================================================ #
-class Cardinal:
-    """
-    CARDINAL绫�
-    """
-
-    def __init__(self, cardinal=None, chntext=None):
-        self.cardinal = cardinal
-        self.chntext = chntext
-
-    def chntext2cardinal(self):
-        return chn2num(self.chntext)
-
-    def cardinal2chntext(self):
-        return num2chn(self.cardinal)
-
-class Digit:
-    """
-    DIGIT绫�
-    """
-
-    def __init__(self, digit=None, chntext=None):
-        self.digit = digit
-        self.chntext = chntext
-
-    # def chntext2digit(self):
-    #     return chn2num(self.chntext)
-
-    def digit2chntext(self):
-        return num2chn(self.digit, alt_two=False, use_units=False)
-
-
-class TelePhone:
-    """
-    TELEPHONE绫�
-    """
-
-    def __init__(self, telephone=None, raw_chntext=None, chntext=None):
-        self.telephone = telephone
-        self.raw_chntext = raw_chntext
-        self.chntext = chntext
-
-    # def chntext2telephone(self):
-    #     sil_parts = self.raw_chntext.split('<SIL>')
-    #     self.telephone = '-'.join([
-    #         str(chn2num(p)) for p in sil_parts
-    #     ])
-    #     return self.telephone
-
-    def telephone2chntext(self, fixed=False):
-
-        if fixed:
-            sil_parts = self.telephone.split('-')
-            self.raw_chntext = '<SIL>'.join([
-                num2chn(part, alt_two=False, use_units=False) for part in sil_parts
-            ])
-            self.chntext = self.raw_chntext.replace('<SIL>', '')
-        else:
-            sp_parts = self.telephone.strip('+').split()
-            self.raw_chntext = '<SP>'.join([
-                num2chn(part, alt_two=False, use_units=False) for part in sp_parts
-            ])
-            self.chntext = self.raw_chntext.replace('<SP>', '')
-        return self.chntext
-
-
-class Fraction:
-    """
-    FRACTION绫�
-    """
-
-    def __init__(self, fraction=None, chntext=None):
-        self.fraction = fraction
-        self.chntext = chntext
-
-    def chntext2fraction(self):
-        denominator, numerator = self.chntext.split('鍒嗕箣')
-        return chn2num(numerator) + '/' + chn2num(denominator)
-
-    def fraction2chntext(self):
-        numerator, denominator = self.fraction.split('/')
-        return num2chn(denominator) + '鍒嗕箣' + num2chn(numerator)
-
-
-class Date:
-    """
-    DATE绫�
-    """
-
-    def __init__(self, date=None, chntext=None):
-        self.date = date
-        self.chntext = chntext
-
-    # def chntext2date(self):
-    #     chntext = self.chntext
-    #     try:
-    #         year, other = chntext.strip().split('骞�', maxsplit=1)
-    #         year = Digit(chntext=year).digit2chntext() + '骞�'
-    #     except ValueError:
-    #         other = chntext
-    #         year = ''
-    #     if other:
-    #         try:
-    #             month, day = other.strip().split('鏈�', maxsplit=1)
-    #             month = Cardinal(chntext=month).chntext2cardinal() + '鏈�'
-    #         except ValueError:
-    #             day = chntext
-    #             month = ''
-    #         if day:
-    #             day = Cardinal(chntext=day[:-1]).chntext2cardinal() + day[-1]
-    #     else:
-    #         month = ''
-    #         day = ''
-    #     date = year + month + day
-    #     self.date = date
-    #     return self.date
-
-    def date2chntext(self):
-        date = self.date
-        try:
-            year, other = date.strip().split('骞�', 1)
-            year = Digit(digit=year).digit2chntext() + '骞�'
-        except ValueError:
-            other = date
-            year = ''
-        if other:
-            try:
-                month, day = other.strip().split('鏈�', 1)
-                month = Cardinal(cardinal=month).cardinal2chntext() + '鏈�'
-            except ValueError:
-                day = date
-                month = ''
-            if day:
-                day = Cardinal(cardinal=day[:-1]).cardinal2chntext() + day[-1]
-        else:
-            month = ''
-            day = ''
-        chntext = year + month + day
-        self.chntext = chntext
-        return self.chntext
-
-
-class Money:
-    """
-    MONEY绫�
-    """
-
-    def __init__(self, money=None, chntext=None):
-        self.money = money
-        self.chntext = chntext
-
-    # def chntext2money(self):
-    #     return self.money
-
-    def money2chntext(self):
-        money = self.money
-        pattern = re.compile(r'(\d+(\.\d+)?)')
-        matchers = pattern.findall(money)
-        if matchers:
-            for matcher in matchers:
-                money = money.replace(matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext())
-        self.chntext = money
-        return self.chntext
-
-
-class Percentage:
-    """
-    PERCENTAGE绫�
-    """
-
-    def __init__(self, percentage=None, chntext=None):
-        self.percentage = percentage
-        self.chntext = chntext
-
-    def chntext2percentage(self):
-        return chn2num(self.chntext.strip().strip('鐧惧垎涔�')) + '%'
-
-    def percentage2chntext(self):
-        return '鐧惧垎涔�' + num2chn(self.percentage.strip().strip('%'))
-
-
-def remove_erhua(text, er_whitelist):
-    """
-    鍘婚櫎鍎垮寲闊宠瘝涓殑鍎�:
-    浠栧コ鍎垮湪閭h竟鍎� -> 浠栧コ鍎垮湪閭h竟
-    """
-
-    er_pattern = re.compile(er_whitelist)
-    new_str=''
-    while re.search('鍎�',text):
-        a = re.search('鍎�',text).span()
-        remove_er_flag = 0
-
-        if er_pattern.search(text):
-            b = er_pattern.search(text).span()
-            if b[0] <= a[0]:
-                remove_er_flag = 1
-
-        if remove_er_flag == 0 :
-            new_str = new_str + text[0:a[0]]
-            text = text[a[1]:]
-        else:
-            new_str = new_str + text[0:b[1]]
-            text = text[b[1]:]
-
-    text = new_str + text
-    return text
-
-# ================================================================================ #
-#                            NSW Normalizer
-# ================================================================================ #
-class NSWNormalizer:
-    def __init__(self, raw_text):
-        self.raw_text = '^' + raw_text + '$'
-        self.norm_text = ''
-
-    def _particular(self):
-        text = self.norm_text
-        pattern = re.compile(r"(([a-zA-Z]+)浜�([a-zA-Z]+))")
-        matchers = pattern.findall(text)
-        if matchers:
-            # print('particular')
-            for matcher in matchers:
-                text = text.replace(matcher[0], matcher[1]+'2'+matcher[2], 1)
-        self.norm_text = text
-        return self.norm_text
-
-    def normalize(self):
-        text = self.raw_text
-
-        # 瑙勮寖鍖栨棩鏈�
-        pattern = re.compile(r"\D+((([089]\d|(19|20)\d{2})骞�)?(\d{1,2}鏈�(\d{1,2}[鏃ュ彿])?)?)")
-        matchers = pattern.findall(text)
-        if matchers:
-            #print('date')
-            for matcher in matchers:
-                text = text.replace(matcher[0], Date(date=matcher[0]).date2chntext(), 1)
-
-        # 瑙勮寖鍖栭噾閽�
-        pattern = re.compile(r"\D+((\d+(\.\d+)?)[澶氫綑鍑燷?" + CURRENCY_UNITS + r"(\d" + CURRENCY_UNITS + r"?)?)")
-        matchers = pattern.findall(text)
-        if matchers:
-            #print('money')
-            for matcher in matchers:
-                text = text.replace(matcher[0], Money(money=matcher[0]).money2chntext(), 1)
-
-        # 瑙勮寖鍖栧浐璇�/鎵嬫満鍙风爜
-        # 鎵嬫満
-        # http://www.jihaoba.com/news/show/13680
-        # 绉诲姩锛�139銆�138銆�137銆�136銆�135銆�134銆�159銆�158銆�157銆�150銆�151銆�152銆�188銆�187銆�182銆�183銆�184銆�178銆�198
-        # 鑱旈�氾細130銆�131銆�132銆�156銆�155銆�186銆�185銆�176
-        # 鐢典俊锛�133銆�153銆�189銆�180銆�181銆�177
-        pattern = re.compile(r"\D((\+?86 ?)?1([38]\d|5[0-35-9]|7[678]|9[89])\d{8})\D")
-        matchers = pattern.findall(text)
-        if matchers:
-            #print('telephone')
-            for matcher in matchers:
-                text = text.replace(matcher[0], TelePhone(telephone=matcher[0]).telephone2chntext(), 1)
-        # 鍥鸿瘽
-        pattern = re.compile(r"\D((0(10|2[1-3]|[3-9]\d{2})-?)?[1-9]\d{6,7})\D")
-        matchers = pattern.findall(text)
-        if matchers:
-            # print('fixed telephone')
-            for matcher in matchers:
-                text = text.replace(matcher[0], TelePhone(telephone=matcher[0]).telephone2chntext(fixed=True), 1)
-
-        # 瑙勮寖鍖栧垎鏁�
-        pattern = re.compile(r"(\d+/\d+)")
-        matchers = pattern.findall(text)
-        if matchers:
-            #print('fraction')
-            for matcher in matchers:
-                text = text.replace(matcher, Fraction(fraction=matcher).fraction2chntext(), 1)
-
-        # 瑙勮寖鍖栫櫨鍒嗘暟
-        text = text.replace('锛�', '%')
-        pattern = re.compile(r"(\d+(\.\d+)?%)")
-        matchers = pattern.findall(text)
-        if matchers:
-            #print('percentage')
-            for matcher in matchers:
-                text = text.replace(matcher[0], Percentage(percentage=matcher[0]).percentage2chntext(), 1)
-
-        # 瑙勮寖鍖栫函鏁�+閲忚瘝
-        pattern = re.compile(r"(\d+(\.\d+)?)[澶氫綑鍑燷?" + COM_QUANTIFIERS)
-        matchers = pattern.findall(text)
-        if matchers:
-            #print('cardinal+quantifier')
-            for matcher in matchers:
-                text = text.replace(matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext(), 1)
-
-        # 瑙勮寖鍖栨暟瀛楃紪鍙�
-        pattern = re.compile(r"(\d{4,32})")
-        matchers = pattern.findall(text)
-        if matchers:
-            #print('digit')
-            for matcher in matchers:
-                text = text.replace(matcher, Digit(digit=matcher).digit2chntext(), 1)
-
-        # 瑙勮寖鍖栫函鏁�
-        pattern = re.compile(r"(\d+(\.\d+)?)")
-        matchers = pattern.findall(text)
-        if matchers:
-            #print('cardinal')
-            for matcher in matchers:
-                text = text.replace(matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext(), 1)
-
-        self.norm_text = text
-        self._particular()
-
-        return self.norm_text.lstrip('^').rstrip('$')
-
-
-def nsw_test_case(raw_text):
-    print('I:' + raw_text)
-    print('O:' + NSWNormalizer(raw_text).normalize())
-    print('')
-
-
-def nsw_test():
-    nsw_test_case('鍥鸿瘽锛�0595-23865596鎴�23880880銆�')
-    nsw_test_case('鍥鸿瘽锛�0595-23865596鎴�23880880銆�')
-    nsw_test_case('鎵嬫満锛�+86 19859213959鎴�15659451527銆�')
-    nsw_test_case('鍒嗘暟锛�32477/76391銆�')
-    nsw_test_case('鐧惧垎鏁帮細80.03%銆�')
-    nsw_test_case('缂栧彿锛�31520181154418銆�')
-    nsw_test_case('绾暟锛�2983.07鍏嬫垨12345.60绫炽��')
-    nsw_test_case('鏃ユ湡锛�1999骞�2鏈�20鏃ユ垨09骞�3鏈�15鍙枫��')
-    nsw_test_case('閲戦挶锛�12鍧�5锛�34.5鍏冿紝20.1涓�')
-    nsw_test_case('鐗规畩锛歄2O鎴朆2C銆�')
-    nsw_test_case('3456涓囧惃')
-    nsw_test_case('2938涓�')
-    nsw_test_case('938')
-    nsw_test_case('浠婂ぉ鍚冧簡115涓皬绗煎寘231涓澶�')
-    nsw_test_case('鏈�62锛呯殑姒傜巼')
-
-
-if __name__ == '__main__':
-    #nsw_test()
-
-    p = argparse.ArgumentParser()
-    p.add_argument('ifile', help='input filename, assume utf-8 encoding')
-    p.add_argument('ofile', help='output filename')
-    p.add_argument('--to_upper', action='store_true', help='convert to upper case')
-    p.add_argument('--to_lower', action='store_true', help='convert to lower case')
-    p.add_argument('--has_key', action='store_true', help="input text has Kaldi's key as first field.")
-    p.add_argument('--remove_fillers', type=bool, default=True, help='remove filler chars such as "鍛�, 鍟�"')
-    p.add_argument('--remove_erhua', type=bool, default=True, help='remove erhua chars such as "杩欏効"')
-    p.add_argument('--log_interval', type=int, default=10000, help='log interval in number of processed lines')
-    args = p.parse_args()
-
-    ifile = codecs.open(args.ifile, 'r', 'utf8')
-    ofile = codecs.open(args.ofile, 'w+', 'utf8')
-
-    n = 0
-    for l in ifile:
-        key = ''
-        text = ''
-        if args.has_key:
-            cols = l.split(maxsplit=1)
-            key = cols[0]
-            if len(cols) == 2:
-                text = cols[1].strip()
-            else:
-                text = ''
-        else:
-            text = l.strip()
-
-        # cases
-        if args.to_upper and args.to_lower:
-            sys.stderr.write('text norm: to_upper OR to_lower?')
-            exit(1)
-        if args.to_upper:
-            text = text.upper()
-        if args.to_lower:
-            text = text.lower()
-
-        # Filler chars removal
-        if args.remove_fillers:
-            for ch in FILLER_CHARS:
-                text = text.replace(ch, '')
-
-        if args.remove_erhua:
-            text = remove_erhua(text, ER_WHITELIST)
-
-        # NSW(Non-Standard-Word) normalization
-        text = NSWNormalizer(text).normalize()
-
-        # Punctuations removal
-        old_chars = CHINESE_PUNC_LIST + string.punctuation # includes all CN and EN punctuations
-        new_chars = ' ' * len(old_chars)
-        del_chars = ''
-        text = text.translate(str.maketrans(old_chars, new_chars, del_chars))
-
-        #
-        if args.has_key:
-            ofile.write(key + '\t' + text + '\n')
-        else:
-            ofile.write(text + '\n')
-
-        n += 1
-        if n % args.log_interval == 0:
-            sys.stderr.write("text norm: {} lines done.\n".format(n))
-
-    sys.stderr.write("text norm: {} lines done in total.\n".format(n))
-
-    ifile.close()
-    ofile.close()
diff --git a/egs/aishell2/transformerLM/utils b/egs/aishell2/transformerLM/utils
new file mode 120000
index 0000000..fe070dd
--- /dev/null
+++ b/egs/aishell2/transformerLM/utils
@@ -0,0 +1 @@
+../../aishell/transformer/utils
\ No newline at end of file
diff --git a/egs/aishell2/transformerLM/utils/parse_options.sh b/egs/aishell2/transformerLM/utils/parse_options.sh
deleted file mode 100755
index 71fb9e5..0000000
--- a/egs/aishell2/transformerLM/utils/parse_options.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### Now we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
diff --git a/egs/aishell2/transformerLM/utils/run.pl b/egs/aishell2/transformerLM/utils/run.pl
deleted file mode 100755
index 483f95b..0000000
--- a/egs/aishell2/transformerLM/utils/run.pl
+++ /dev/null
@@ -1,356 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-# In general, doing
-#  run.pl some.log a b c is like running the command a b c in
-# the bash shell, and putting the standard error and output into some.log.
-# To run parallel jobs (backgrounded on the host machine), you can do (e.g.)
-#  run.pl JOB=1:4 some.JOB.log a b c JOB is like running the command a b c JOB
-# and putting it in some.JOB.log, for each one. [Note: JOB can be any identifier].
-# If any of the jobs fails, this script will fail.
-
-# A typical example is:
-#  run.pl some.log my-prog "--opt=foo bar" foo \|  other-prog baz
-# and run.pl will run something like:
-# ( my-prog '--opt=foo bar' foo |  other-prog baz ) >& some.log
-#
-# Basically it takes the command-line arguments, quotes them
-# as necessary to preserve spaces, and evaluates them with bash.
-# In addition it puts the command line at the top of the log, and
-# the start and end times of the command at the beginning and end.
-# The reason why this is useful is so that we can create a different
-# version of this program that uses a queueing system instead.
-
-#use Data::Dumper;
-
-@ARGV < 2 && die "usage: run.pl log-file command-line arguments...";
-
-#print STDERR "COMMAND-LINE: " .  Dumper(\@ARGV) . "\n";
-$job_pick = 'all';
-$max_jobs_run = -1;
-$jobstart = 1;
-$jobend = 1;
-$ignored_opts = ""; # These will be ignored.
-
-# First parse an option like JOB=1:4, and any
-# options that would normally be given to
-# queue.pl, which we will just discard.
-
-for (my $x = 1; $x <= 2; $x++) { # This for-loop is to
-  # allow the JOB=1:n option to be interleaved with the
-  # options to qsub.
-  while (@ARGV >= 2 && $ARGV[0] =~ m:^-:) {
-    # parse any options that would normally go to qsub, but which will be ignored here.
-    my $switch = shift @ARGV;
-    if ($switch eq "-V") {
-      $ignored_opts .= "-V ";
-    } elsif ($switch eq "--max-jobs-run" || $switch eq "-tc") {
-      # we do support the option --max-jobs-run n, and its GridEngine form -tc n.
-      # if the command appears multiple times uses the smallest option.
-      if ( $max_jobs_run <= 0 ) {
-          $max_jobs_run =  shift @ARGV;
-      } else {
-        my $new_constraint = shift @ARGV;
-        if ( ($new_constraint < $max_jobs_run) ) {
-          $max_jobs_run = $new_constraint;
-        }
-      }
-      
-      if (! ($max_jobs_run > 0)) {
-        die "run.pl: invalid option --max-jobs-run $max_jobs_run";
-      }
-    } else {
-      my $argument = shift @ARGV;
-      if ($argument =~ m/^--/) {
-        print STDERR "run.pl: WARNING: suspicious argument '$argument' to $switch; starts with '-'\n";
-      }
-      if ($switch eq "-sync" && $argument =~ m/^[yY]/) {
-        $ignored_opts .= "-sync "; # Note: in the
-        # corresponding code in queue.pl it says instead, just "$sync = 1;".
-      } elsif ($switch eq "-pe") { # e.g. -pe smp 5
-        my $argument2 = shift @ARGV;
-        $ignored_opts .= "$switch $argument $argument2 ";
-      } elsif ($switch eq "--gpu") {
-        $using_gpu = $argument;
-      } elsif ($switch eq "--pick") {
-        if($argument =~ m/^(all|failed|incomplete)$/) {
-          $job_pick = $argument;
-        } else {
-          print STDERR "run.pl: ERROR: --pick argument must be one of 'all', 'failed' or 'incomplete'"
-        }
-      } else {
-        # Ignore option.
-        $ignored_opts .= "$switch $argument ";
-      }
-    }
-  }
-  if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) { # e.g. JOB=1:20
-    $jobname = $1;
-    $jobstart = $2;
-    $jobend = $3;
-    if ($jobstart > $jobend) {
-      die "run.pl: invalid job range $ARGV[0]";
-    }
-    if ($jobstart <= 0) {
-      die "run.pl: invalid job range $ARGV[0], start must be strictly positive (this is required for GridEngine compatibility).";
-    }
-    shift;
-  } elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
-    $jobname = $1;
-    $jobstart = $2;
-    $jobend = $2;
-    shift;
-  } elsif ($ARGV[0] =~ m/.+\=.*\:.*$/) {
-    print STDERR "run.pl: Warning: suspicious first argument to run.pl: $ARGV[0]\n";
-  }
-}
-
-# Users found this message confusing so we are removing it.
-# if ($ignored_opts ne "") {
-#   print STDERR "run.pl: Warning: ignoring options \"$ignored_opts\"\n";
-# }
-
-if ($max_jobs_run == -1) { # If --max-jobs-run option not set,
-                           # then work out the number of processors if possible,
-                           # and set it based on that.
-  $max_jobs_run = 0;
-  if ($using_gpu) {
-    if (open(P, "nvidia-smi -L |")) {
-      $max_jobs_run++ while (<P>);
-      close(P);
-    }
-    if ($max_jobs_run == 0) {
-      $max_jobs_run = 1;
-      print STDERR "run.pl: Warning: failed to detect number of GPUs from nvidia-smi, using ${max_jobs_run}\n";
-    }
-  } elsif (open(P, "</proc/cpuinfo")) {  # Linux
-    while (<P>) { if (m/^processor/) { $max_jobs_run++; } }
-    if ($max_jobs_run == 0) {
-      print STDERR "run.pl: Warning: failed to detect any processors from /proc/cpuinfo\n";
-      $max_jobs_run = 10;  # reasonable default.
-    }
-    close(P);
-  } elsif (open(P, "sysctl -a |")) {  # BSD/Darwin
-    while (<P>) {
-      if (m/hw\.ncpu\s*[:=]\s*(\d+)/) { # hw.ncpu = 4, or hw.ncpu: 4
-        $max_jobs_run = $1;
-        last;
-      }
-    }
-    close(P);
-    if ($max_jobs_run == 0) {
-      print STDERR "run.pl: Warning: failed to detect any processors from sysctl -a\n";
-      $max_jobs_run = 10;  # reasonable default.
-    }
-  } else {
-    # allow at most 32 jobs at once, on non-UNIX systems; change this code
-    # if you need to change this default.
-    $max_jobs_run = 32;
-  }
-  # The just-computed value of $max_jobs_run is just the number of processors
-  # (or our best guess); and if it happens that the number of jobs we need to
-  # run is just slightly above $max_jobs_run, it will make sense to increase
-  # $max_jobs_run to equal the number of jobs, so we don't have a small number
-  # of leftover jobs.
-  $num_jobs = $jobend - $jobstart + 1;
-  if (!$using_gpu &&
-      $num_jobs > $max_jobs_run && $num_jobs < 1.4 * $max_jobs_run) {
-    $max_jobs_run = $num_jobs;
-  }
-}
-
-sub pick_or_exit {
-  # pick_or_exit ( $logfile ) 
-  # Invoked before each job is started helps to run jobs selectively.
-  #
-  # Given the name of the output logfile decides whether the job must be 
-  # executed (by returning from the subroutine) or not (by terminating the
-  # process calling exit)
-  # 
-  # PRE: $job_pick is a global variable set by command line switch --pick
-  #      and indicates which class of jobs must be executed.
-  #
-  # 1) If a failed job is not executed the process exit code will indicate 
-  #    failure, just as if the task was just executed  and failed.
-  #
-  # 2) If a task is incomplete it will be executed. Incomplete may be either
-  #    a job whose log file does not contain the accounting notes in the end,
-  #    or a job whose log file does not exist.
-  #
-  # 3) If the $job_pick is set to 'all' (default behavior) a task will be
-  #    executed regardless of the result of previous attempts.
-  #
-  # This logic could have been implemented in the main execution loop
-  # but a subroutine to preserve the current level of readability of
-  # that part of the code.
-  #
-  # Alexandre Felipe, (o.alexandre.felipe@gmail.com) 14th of August of 2020
-  #
-  if($job_pick eq 'all'){
-    return; # no need to bother with the previous log
-  }
-  open my $fh, "<", $_[0] or return; # job not executed yet
-  my $log_line;
-  my $cur_line;
-  while ($cur_line = <$fh>) {
-    if( $cur_line =~ m/# Ended \(code .*/ ) {
-      $log_line = $cur_line;
-    }
-  }
-  close $fh;
-  if (! defined($log_line)){
-    return; # incomplete
-  }
-  if ( $log_line =~ m/# Ended \(code 0\).*/ ) {
-    exit(0); # complete
-  } elsif ( $log_line =~ m/# Ended \(code \d+(; signal \d+)?\).*/ ){
-    if ($job_pick !~ m/^(failed|all)$/) {
-      exit(1); # failed but not going to run
-    } else {
-      return; # failed
-    }
-  } elsif ( $log_line =~ m/.*\S.*/ ) {
-    return; # incomplete jobs are always run
-  }
-}
-
-
-$logfile = shift @ARGV;
-
-if (defined $jobname && $logfile !~ m/$jobname/ &&
-    $jobend > $jobstart) {
-  print STDERR "run.pl: you are trying to run a parallel job but "
-    . "you are putting the output into just one log file ($logfile)\n";
-  exit(1);
-}
-
-$cmd = "";
-
-foreach $x (@ARGV) {
-    if ($x =~ m/^\S+$/) { $cmd .=  $x . " "; }
-    elsif ($x =~ m:\":) { $cmd .= "'$x' "; }
-    else { $cmd .= "\"$x\" "; }
-}
-
-#$Data::Dumper::Indent=0;
-$ret = 0;
-$numfail = 0;
-%active_pids=();
-
-use POSIX ":sys_wait_h";
-for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
-  if (scalar(keys %active_pids) >= $max_jobs_run) {
-
-    # Lets wait for a change in any child's status
-    # Then we have to work out which child finished
-    $r = waitpid(-1, 0);
-    $code = $?;
-    if ($r < 0 ) { die "run.pl: Error waiting for child process"; } # should never happen.
-    if ( defined $active_pids{$r} ) {
-        $jid=$active_pids{$r};
-        $fail[$jid]=$code;
-        if ($code !=0) { $numfail++;}
-        delete $active_pids{$r};
-        # print STDERR "Finished: $r/$jid " .  Dumper(\%active_pids) . "\n";
-    } else {
-        die "run.pl: Cannot find the PID of the child process that just finished.";
-    }
-
-    # In theory we could do a non-blocking waitpid over all jobs running just
-    # to find out if only one or more jobs finished during the previous waitpid()
-    # However, we just omit this and will reap the next one in the next pass
-    # through the for(;;) cycle
-  }
-  $childpid = fork();
-  if (!defined $childpid) { die "run.pl: Error forking in run.pl (writing to $logfile)"; }
-  if ($childpid == 0) { # We're in the child... this branch
-    # executes the job and returns (possibly with an error status).
-    if (defined $jobname) {
-      $cmd =~ s/$jobname/$jobid/g;
-      $logfile =~ s/$jobname/$jobid/g;
-    }
-    # exit if the job does not need to be executed
-    pick_or_exit( $logfile );
-
-    system("mkdir -p `dirname $logfile` 2>/dev/null");
-    open(F, ">$logfile") || die "run.pl: Error opening log file $logfile";
-    print F "# " . $cmd . "\n";
-    print F "# Started at " . `date`;
-    $starttime = `date +'%s'`;
-    print F "#\n";
-    close(F);
-
-    # Pipe into bash.. make sure we're not using any other shell.
-    open(B, "|bash") || die "run.pl: Error opening shell command";
-    print B "( " . $cmd . ") 2>>$logfile >> $logfile";
-    close(B);                   # If there was an error, exit status is in $?
-    $ret = $?;
-
-    $lowbits = $ret & 127;
-    $highbits = $ret >> 8;
-    if ($lowbits != 0) { $return_str = "code $highbits; signal $lowbits" }
-    else { $return_str = "code $highbits"; }
-
-    $endtime = `date +'%s'`;
-    open(F, ">>$logfile") || die "run.pl: Error opening log file $logfile (again)";
-    $enddate = `date`;
-    chop $enddate;
-    print F "# Accounting: time=" . ($endtime - $starttime) . " threads=1\n";
-    print F "# Ended ($return_str) at " . $enddate . ", elapsed time " . ($endtime-$starttime) . " seconds\n";
-    close(F);
-    exit($ret == 0 ? 0 : 1);
-  } else {
-    $pid[$jobid] = $childpid;
-    $active_pids{$childpid} = $jobid;
-    # print STDERR "Queued: " .  Dumper(\%active_pids) . "\n";
-  }
-}
-
-# Now we have submitted all the jobs, lets wait until all the jobs finish
-foreach $child (keys %active_pids) {
-    $jobid=$active_pids{$child};
-    $r = waitpid($pid[$jobid], 0);
-    $code = $?;
-    if ($r == -1) { die "run.pl: Error waiting for child process"; } # should never happen.
-    if ($r != 0) { $fail[$jobid]=$code; $numfail++ if $code!=0; } # Completed successfully
-}
-
-# Some sanity checks:
-# The $fail array should not contain undefined codes
-# The number of non-zeros in that array  should be equal to $numfail
-# We cannot do foreach() here, as the JOB ids do not start at zero
-$failed_jids=0;
-for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
-  $job_return = $fail[$jobid];
-  if (not defined $job_return ) {
-    # print Dumper(\@fail);
-
-    die "run.pl: Sanity check failed: we have indication that some jobs are running " .
-      "even after we waited for all jobs to finish" ;
-  }
-  if ($job_return != 0 ){ $failed_jids++;}
-}
-if ($failed_jids != $numfail) {
-  die "run.pl: Sanity check failed: cannot find out how many jobs failed ($failed_jids x $numfail)."
-}
-if ($numfail > 0) { $ret = 1; }
-
-if ($ret != 0) {
-  $njobs = $jobend - $jobstart + 1;
-  if ($njobs == 1) {
-    if (defined $jobname) {
-      $logfile =~ s/$jobname/$jobstart/; # only one numbered job, so replace name with
-                                         # that job.
-    }
-    print STDERR "run.pl: job failed, log is in $logfile\n";
-    if ($logfile =~ m/JOB/) {
-      print STDERR "run.pl: probably you forgot to put JOB=1:\$nj in your script.";
-    }
-  }
-  else {
-    $logfile =~ s/$jobname/*/g;
-    print STDERR "run.pl: $numfail / $njobs failed, log is in $logfile\n";
-  }
-}
-
-
-exit ($ret);
diff --git a/egs/aishell2/transformerLM/utils/split_scp.pl b/egs/aishell2/transformerLM/utils/split_scp.pl
deleted file mode 100755
index 0876dcb..0000000
--- a/egs/aishell2/transformerLM/utils/split_scp.pl
+++ /dev/null
@@ -1,246 +0,0 @@
-#!/usr/bin/env perl
-
-# Copyright 2010-2011 Microsoft Corporation
-
-# See ../../COPYING for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This program splits up any kind of .scp or archive-type file.
-# If there is no utt2spk option it will work on any text  file and
-# will split it up with an approximately equal number of lines in
-# each but.
-# With the --utt2spk option it will work on anything that has the
-# utterance-id as the first entry on each line; the utt2spk file is
-# of the form "utterance speaker" (on each line).
-# It splits it into equal size chunks as far as it can.  If you use the utt2spk
-# option it will make sure these chunks coincide with speaker boundaries.  In
-# this case, if there are more chunks than speakers (and in some other
-# circumstances), some of the resulting chunks will be empty and it will print
-# an error message and exit with nonzero status.
-# You will normally call this like:
-# split_scp.pl scp scp.1 scp.2 scp.3 ...
-# or
-# split_scp.pl --utt2spk=utt2spk scp scp.1 scp.2 scp.3 ...
-# Note that you can use this script to split the utt2spk file itself,
-# e.g. split_scp.pl --utt2spk=utt2spk utt2spk utt2spk.1 utt2spk.2 ...
-
-# You can also call the scripts like:
-# split_scp.pl -j 3 0 scp scp.0
-# [note: with this option, it assumes zero-based indexing of the split parts,
-# i.e. the second number must be 0 <= n < num-jobs.]
-
-use warnings;
-
-$num_jobs = 0;
-$job_id = 0;
-$utt2spk_file = "";
-$one_based = 0;
-
-for ($x = 1; $x <= 3 && @ARGV > 0; $x++) {
-    if ($ARGV[0] eq "-j") {
-        shift @ARGV;
-        $num_jobs = shift @ARGV;
-        $job_id = shift @ARGV;
-    }
-    if ($ARGV[0] =~ /--utt2spk=(.+)/) {
-        $utt2spk_file=$1;
-        shift;
-    }
-    if ($ARGV[0] eq '--one-based') {
-        $one_based = 1;
-        shift @ARGV;
-    }
-}
-
-if ($num_jobs != 0 && ($num_jobs < 0 || $job_id - $one_based < 0 ||
-                       $job_id - $one_based >= $num_jobs)) {
-  die "$0: Invalid job number/index values for '-j $num_jobs $job_id" .
-      ($one_based ? " --one-based" : "") . "'\n"
-}
-
-$one_based
-    and $job_id--;
-
-if(($num_jobs == 0 && @ARGV < 2) || ($num_jobs > 0 && (@ARGV < 1 || @ARGV > 2))) {
-    die
-"Usage: split_scp.pl [--utt2spk=<utt2spk_file>] in.scp out1.scp out2.scp ...
-   or: split_scp.pl -j num-jobs job-id [--one-based] [--utt2spk=<utt2spk_file>] in.scp [out.scp]
- ... where 0 <= job-id < num-jobs, or 1 <= job-id <- num-jobs if --one-based.\n";
-}
-
-$error = 0;
-$inscp = shift @ARGV;
-if ($num_jobs == 0) { # without -j option
-    @OUTPUTS = @ARGV;
-} else {
-    for ($j = 0; $j < $num_jobs; $j++) {
-        if ($j == $job_id) {
-            if (@ARGV > 0) { push @OUTPUTS, $ARGV[0]; }
-            else { push @OUTPUTS, "-"; }
-        } else {
-            push @OUTPUTS, "/dev/null";
-        }
-    }
-}
-
-if ($utt2spk_file ne "") {  # We have the --utt2spk option...
-    open($u_fh, '<', $utt2spk_file) || die "$0: Error opening utt2spk file $utt2spk_file: $!\n";
-    while(<$u_fh>) {
-        @A = split;
-        @A == 2 || die "$0: Bad line $_ in utt2spk file $utt2spk_file\n";
-        ($u,$s) = @A;
-        $utt2spk{$u} = $s;
-    }
-    close $u_fh;
-    open($i_fh, '<', $inscp) || die "$0: Error opening input scp file $inscp: $!\n";
-    @spkrs = ();
-    while(<$i_fh>) {
-        @A = split;
-        if(@A == 0) { die "$0: Empty or space-only line in scp file $inscp\n"; }
-        $u = $A[0];
-        $s = $utt2spk{$u};
-        defined $s || die "$0: No utterance $u in utt2spk file $utt2spk_file\n";
-        if(!defined $spk_count{$s}) {
-            push @spkrs, $s;
-            $spk_count{$s} = 0;
-            $spk_data{$s} = [];  # ref to new empty array.
-        }
-        $spk_count{$s}++;
-        push @{$spk_data{$s}}, $_;
-    }
-    # Now split as equally as possible ..
-    # First allocate spks to files by allocating an approximately
-    # equal number of speakers.
-    $numspks = @spkrs;  # number of speakers.
-    $numscps = @OUTPUTS; # number of output files.
-    if ($numspks < $numscps) {
-      die "$0: Refusing to split data because number of speakers $numspks " .
-          "is less than the number of output .scp files $numscps\n";
-    }
-    for($scpidx = 0; $scpidx < $numscps; $scpidx++) {
-        $scparray[$scpidx] = []; # [] is array reference.
-    }
-    for ($spkidx = 0; $spkidx < $numspks; $spkidx++) {
-        $scpidx = int(($spkidx*$numscps) / $numspks);
-        $spk = $spkrs[$spkidx];
-        push @{$scparray[$scpidx]}, $spk;
-        $scpcount[$scpidx] += $spk_count{$spk};
-    }
-
-    # Now will try to reassign beginning + ending speakers
-    # to different scp's and see if it gets more balanced.
-    # Suppose objf we're minimizing is sum_i (num utts in scp[i] - average)^2.
-    # We can show that if considering changing just 2 scp's, we minimize
-    # this by minimizing the squared difference in sizes.  This is
-    # equivalent to minimizing the absolute difference in sizes.  This
-    # shows this method is bound to converge.
-
-    $changed = 1;
-    while($changed) {
-        $changed = 0;
-        for($scpidx = 0; $scpidx < $numscps; $scpidx++) {
-            # First try to reassign ending spk of this scp.
-            if($scpidx < $numscps-1) {
-                $sz = @{$scparray[$scpidx]};
-                if($sz > 0) {
-                    $spk = $scparray[$scpidx]->[$sz-1];
-                    $count = $spk_count{$spk};
-                    $nutt1 = $scpcount[$scpidx];
-                    $nutt2 = $scpcount[$scpidx+1];
-                    if( abs( ($nutt2+$count) - ($nutt1-$count))
-                        < abs($nutt2 - $nutt1))  { # Would decrease
-                        # size-diff by reassigning spk...
-                        $scpcount[$scpidx+1] += $count;
-                        $scpcount[$scpidx] -= $count;
-                        pop @{$scparray[$scpidx]};
-                        unshift @{$scparray[$scpidx+1]}, $spk;
-                        $changed = 1;
-                    }
-                }
-            }
-            if($scpidx > 0 && @{$scparray[$scpidx]} > 0) {
-                $spk = $scparray[$scpidx]->[0];
-                $count = $spk_count{$spk};
-                $nutt1 = $scpcount[$scpidx-1];
-                $nutt2 = $scpcount[$scpidx];
-                if( abs( ($nutt2-$count) - ($nutt1+$count))
-                    < abs($nutt2 - $nutt1))  { # Would decrease
-                    # size-diff by reassigning spk...
-                    $scpcount[$scpidx-1] += $count;
-                    $scpcount[$scpidx] -= $count;
-                    shift @{$scparray[$scpidx]};
-                    push @{$scparray[$scpidx-1]}, $spk;
-                    $changed = 1;
-                }
-            }
-        }
-    }
-    # Now print out the files...
-    for($scpidx = 0; $scpidx < $numscps; $scpidx++) {
-        $scpfile = $OUTPUTS[$scpidx];
-        ($scpfile ne '-' ? open($f_fh, '>', $scpfile)
-                         : open($f_fh, '>&', \*STDOUT)) ||
-            die "$0: Could not open scp file $scpfile for writing: $!\n";
-        $count = 0;
-        if(@{$scparray[$scpidx]} == 0) {
-            print STDERR "$0: eError: split_scp.pl producing empty .scp file " .
-                         "$scpfile (too many splits and too few speakers?)\n";
-            $error = 1;
-        } else {
-            foreach $spk ( @{$scparray[$scpidx]} ) {
-                print $f_fh @{$spk_data{$spk}};
-                $count += $spk_count{$spk};
-            }
-            $count == $scpcount[$scpidx] || die "Count mismatch [code error]";
-        }
-        close($f_fh);
-    }
-} else {
-   # This block is the "normal" case where there is no --utt2spk
-   # option and we just break into equal size chunks.
-
-    open($i_fh, '<', $inscp) || die "$0: Error opening input scp file $inscp: $!\n";
-
-    $numscps = @OUTPUTS;  # size of array.
-    @F = ();
-    while(<$i_fh>) {
-        push @F, $_;
-    }
-    $numlines = @F;
-    if($numlines == 0) {
-        print STDERR "$0: error: empty input scp file $inscp\n";
-        $error = 1;
-    }
-    $linesperscp = int( $numlines / $numscps); # the "whole part"..
-    $linesperscp >= 1 || die "$0: You are splitting into too many pieces! [reduce \$nj ($numscps) to be smaller than the number of lines ($numlines) in $inscp]\n";
-    $remainder = $numlines - ($linesperscp * $numscps);
-    ($remainder >= 0 && $remainder < $numlines) || die "bad remainder $remainder";
-    # [just doing int() rounds down].
-    $n = 0;
-    for($scpidx = 0; $scpidx < @OUTPUTS; $scpidx++) {
-        $scpfile = $OUTPUTS[$scpidx];
-        ($scpfile ne '-' ? open($o_fh, '>', $scpfile)
-                         : open($o_fh, '>&', \*STDOUT)) ||
-            die "$0: Could not open scp file $scpfile for writing: $!\n";
-        for($k = 0; $k < $linesperscp + ($scpidx < $remainder ? 1 : 0); $k++) {
-            print $o_fh $F[$n++];
-        }
-        close($o_fh) || die "$0: Eror closing scp file $scpfile: $!\n";
-    }
-    $n == $numlines || die "$n != $numlines [code error]";
-}
-
-exit ($error);
diff --git a/egs/librispeech/conformer/run.sh b/egs/librispeech/conformer/run.sh
index 21a8d33..5296340 100755
--- a/egs/librispeech/conformer/run.sh
+++ b/egs/librispeech/conformer/run.sh
@@ -97,7 +97,7 @@
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     echo "stage 1: Feature and CMVN Generation"
-    utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0
+    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
 fi
 
 token_list=${feats_dir}/data/lang_char/${train_set}_${bpemode}${nbpe}_units.txt
diff --git a/egs/librispeech_100h/conformer/run.sh b/egs/librispeech_100h/conformer/run.sh
index e85c8eb..41df5a4 100755
--- a/egs/librispeech_100h/conformer/run.sh
+++ b/egs/librispeech_100h/conformer/run.sh
@@ -93,7 +93,7 @@
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     echo "stage 1: Feature and CMVN Generation"
-    utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config $asr_config --scale 1.0
+    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
 fi
 
 token_list=${feats_dir}/data/lang_char/${train_set}_${bpemode}${nbpe}_units.txt
@@ -120,7 +120,7 @@
 
 # ASR Training Stage
 world_size=$gpu_num  # run on one machine
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4; then
+if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
     echo "stage 4: ASR Training"
     mkdir -p ${exp_dir}/exp/${model_dir}
     mkdir -p ${exp_dir}/exp/${model_dir}/log
diff --git a/egs/wenetspeech/conformer/conf/decode_asr_transformer_5beam.yaml b/egs/wenetspeech/conformer/conf/decode_asr_transformer_5beam.yaml
new file mode 100644
index 0000000..e35e820
--- /dev/null
+++ b/egs/wenetspeech/conformer/conf/decode_asr_transformer_5beam.yaml
@@ -0,0 +1,6 @@
+beam_size: 5
+penalty: 0.0
+maxlenratio: 0.0
+minlenratio: 0.0
+ctc_weight: 0.5
+lm_weight: 0.7
diff --git a/egs/wenetspeech/conformer/conf/train_asr_conformer.yaml b/egs/wenetspeech/conformer/conf/train_asr_conformer.yaml
new file mode 100644
index 0000000..a9658b8
--- /dev/null
+++ b/egs/wenetspeech/conformer/conf/train_asr_conformer.yaml
@@ -0,0 +1,104 @@
+# network architecture
+# encoder related
+encoder: conformer
+encoder_conf:
+    output_size: 512    # dimension of attention
+    attention_heads: 8
+    linear_units: 2048  # the number of units of position-wise feed forward
+    num_blocks: 12      # the number of encoder blocks
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    attention_dropout_rate: 0.0
+    input_layer: conv2d # encoder architecture type
+    normalize_before: true
+    rel_pos_type: latest
+    pos_enc_layer_type: rel_pos
+    selfattention_layer_type: rel_selfattn
+    activation_type: swish
+    macaron_style: true
+    use_cnn_module: true
+    cnn_module_kernel: 15
+
+# decoder related
+decoder: transformer
+decoder_conf:
+    attention_heads: 8
+    linear_units: 2048
+    num_blocks: 6
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    self_attention_dropout_rate: 0.0
+    src_attention_dropout_rate: 0.0
+
+# CTC realted
+ctc_conf:
+    ignore_nan_grad: true
+
+# frontend related
+frontend: wav_frontend
+frontend_conf:
+    fs: 16000
+    window: hamming
+    n_mels: 80
+    frame_length: 25
+    frame_shift: 10
+    lfr_m: 1
+    lfr_n: 1
+
+# hybrid CTC/attention
+model_conf:
+    ctc_weight: 0.3
+    lsm_weight: 0.1     # label smoothing option
+    length_normalized_loss: false
+
+# optimization related
+accum_grad: 4
+grad_clip: 5
+patience: none
+max_epoch: 30
+val_scheduler_criterion:
+    - valid
+    - acc
+best_model_criterion:
+-   - valid
+    - acc
+    - max
+keep_nbest_models: 10
+
+optim: adam
+optim_conf:
+   lr: 0.0015
+scheduler: warmuplr
+scheduler_conf:
+   warmup_steps: 30000
+
+specaug: specaug
+specaug_conf:
+    apply_time_warp: true
+    time_warp_window: 5
+    time_warp_mode: bicubic
+    apply_freq_mask: true
+    freq_mask_width_range:
+    - 0
+    - 30
+    num_freq_mask: 2
+    apply_time_mask: true
+    time_mask_width_range:
+    - 0
+    - 40
+    num_time_mask: 2
+
+dataset_conf:
+    data_names: speech,text
+    data_types: sound,text_nospace
+    shuffle: True
+    shuffle_conf:
+        shuffle_size: 2048
+        sort_size: 500
+    batch_conf:
+        batch_type: token
+        batch_size: 32000
+    num_workers: 8
+
+log_interval: 50
+normalize: None
diff --git a/egs/wenetspeech/conformer/local/data.sh b/egs/wenetspeech/conformer/local/data.sh
new file mode 100755
index 0000000..2b0a4be
--- /dev/null
+++ b/egs/wenetspeech/conformer/local/data.sh
@@ -0,0 +1,102 @@
+#!/usr/bin/env bash
+# Set bash to 'debug' mode, it will exit on :
+# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
+set -e
+set -u
+set -o pipefail
+
+log() {
+    local fname=${BASH_SOURCE[1]##*/}
+    echo -e "$(date '+%Y-%m-%dT%H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+SECONDS=0
+
+# general configuration
+nj=10
+stage=2
+stop_stage=100
+set=L
+data_dir="data"
+WENETSPEECH=
+train_cmd=
+
+log "$0 $*"
+. utils/parse_options.sh
+
+if [ ! -d "${WENETSPEECH}/audio" ] && [ ! -f "${WENETSPEECH}/WenetSpeech.json" ]; then
+    echo "Valid WENETSPEECH data not found in ${WENETSPEECH}."
+    echo "Please follow the instruction in https://wenet-e2e.github.io/WenetSpeech/"
+    echo "and re-construct the data."
+    exit 1
+fi
+
+train_set=train_"$(echo "${set}" | tr "[:upper:]" "[:lower:]")"
+dev_set=dev
+test_sets="test_net test_meeting"
+
+if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
+    log "data preparation"
+    mkdir -p ${data_dir}
+    abs_data_dir=$(readlink -f ${data_dir})
+    log "making Kaldi format data directory in ${abs_data_dir}"
+    local/wenetspeech_data_prep.sh \
+        --train-subset ${set} \
+        --stage 1 \
+        ${WENETSPEECH} \
+        ${abs_data_dir}
+
+    # prepare utt2spk and spk2utt files
+    for x in ${train_set} ${dev_set} ${test_sets}; do
+        dir=${data_dir}/${x}
+        paste -d " " <(cut -f 1 ${dir}/segments) <(cut -f 1 ${dir}/segments) | \
+            sort -u > ${dir}/utt2spk
+        utils/utt2spk_to_spk2utt.pl ${dir}/utt2spk > ${dir}/spk2utt
+    done
+fi
+
+if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
+    log "process the long term opus audio file, may take about 3 hours"
+    for x in ${train_set} ${dev_set} ${test_sets}; do
+        log "process audio for ${data_dir}/${x}"
+        dir=${data_dir}/${x}
+        mkdir -p ${dir}/logs
+
+        nutt=$(<${dir}/segments wc -l)
+        nj=$((nj<nutt?nj:nutt))
+
+        split_scps=""
+        for n in $(seq ${nj}); do
+            split_scps="${split_scps} ${dir}/logs/segments.${n}"
+        done
+        utils/split_scp.pl ${dir}/segments ${split_scps}
+
+        ${train_cmd} "JOB=1:${nj}" "${dir}/logs/process_audio.JOB.log"\
+            python3 local/process_opus.py \
+                ${dir}/wav.scp \
+                ${dir}/logs/segments.JOB   \
+                ${dir}/logs/wav.JOB.scp
+
+        # modify the `wav.scp` file and rename the `segments` file
+        # rename the `segments` file to avoid the audio file formatting process in stage 3 of `asr.sh`
+        mv ${dir}/wav.scp ${dir}/wav.scp.org
+        mv ${dir}/segments ${dir}/segments.org
+        for n in $(seq ${nj}); do
+            cat ${dir}/logs/wav.${n}.scp || exit 1;
+        done | sort -u > ${dir}/wav.scp
+    done
+fi
+
+if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
+    log "format text file"
+    for x in ${train_set} ${dev_set} ${test_sets}; do
+        log "format text for ${data_dir}/${x}"
+        dir=${data_dir}/${x}
+        mv ${dir}/text ${dir}/text.org
+        paste -d " " <(cut -f 1 ${dir}/text.org) \
+            <(cut -f 2- ${dir}/text.org | local/text_normalize.pl) | \
+            sort -u > ${dir}/text
+        utils/fix_data_dir.sh ${dir}
+    done
+fi
+
+log "Successfully finished. [elapsed=${SECONDS}s]"
diff --git a/egs/wenetspeech/conformer/local/extract_meta.py b/egs/wenetspeech/conformer/local/extract_meta.py
new file mode 100755
index 0000000..6074162
--- /dev/null
+++ b/egs/wenetspeech/conformer/local/extract_meta.py
@@ -0,0 +1,114 @@
+# Copyright 2021  Xiaomi Corporation (Author: Yongqing Wang)
+#                 Mobvoi Inc(Author: Di Wu, Binbin Zhang)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import json
+import os
+import sys
+
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        description="""
+      This script is used to process raw json dataset of WenetSpeech,
+      where the long wav is splitinto segments and
+      data of wenet format is generated.
+      """
+    )
+    parser.add_argument("input_json", help="""Input json file of WenetSpeech""")
+    parser.add_argument("output_dir", help="""Output dir for prepared data""")
+
+    args = parser.parse_args()
+    return args
+
+
+def meta_analysis(input_json, output_dir):
+    input_dir = os.path.dirname(input_json)
+
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    try:
+        with open(input_json, "r") as injson:
+            json_data = json.load(injson)
+    except Exception:
+        sys.exit(f"Failed to load input json file: {input_json}")
+    else:
+        if json_data["audios"] is not None:
+            with open(f"{output_dir}/text", "w") as utt2text, open(
+                f"{output_dir}/segments", "w"
+            ) as segments, open(f"{output_dir}/utt2dur", "w") as utt2dur, open(
+                f"{output_dir}/wav.scp", "w"
+            ) as wavscp, open(
+                f"{output_dir}/utt2subsets", "w"
+            ) as utt2subsets, open(
+                f"{output_dir}/reco2dur", "w"
+            ) as reco2dur:
+                for long_audio in json_data["audios"]:
+                    try:
+                        long_audio_path = os.path.realpath(
+                            os.path.join(input_dir, long_audio["path"])
+                        )
+                        aid = long_audio["aid"]
+                        segments_lists = long_audio["segments"]
+                        duration = long_audio["duration"]
+                        assert os.path.exists(long_audio_path)
+                    except AssertionError:
+                        print(
+                            f"""Warning: {aid} something is wrong,
+                                  maybe AssertionError, skipped"""
+                        )
+                        continue
+                    except Exception:
+                        print(
+                            f"""Warning: {aid} something is wrong, maybe the
+                                  error path: {long_audio_path}, skipped"""
+                        )
+                        continue
+                    else:
+                        wavscp.write(f"{aid}\t{long_audio_path}\n")
+                        reco2dur.write(f"{aid}\t{duration}\n")
+                        for segment_file in segments_lists:
+                            try:
+                                sid = segment_file["sid"]
+                                start_time = segment_file["begin_time"]
+                                end_time = segment_file["end_time"]
+                                dur = end_time - start_time
+                                text = segment_file["text"]
+                                segment_subsets = segment_file["subsets"]
+                            except Exception:
+                                print(
+                                    f"""Warning: {segment_file} something
+                                          is wrong, skipped"""
+                                )
+                                continue
+                            else:
+                                utt2text.write(f"{sid}\t{text}\n")
+                                segments.write(
+                                    f"{sid}\t{aid}\t{start_time}\t{end_time}\n"
+                                )
+                                utt2dur.write(f"{sid}\t{dur}\n")
+                                segment_sub_names = " ".join(segment_subsets)
+                                utt2subsets.write(f"{sid}\t{segment_sub_names}\n")
+
+
+def main():
+    args = get_args()
+
+    meta_analysis(args.input_json, args.output_dir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/aishell2/transformer/utils/__init__.py b/egs/wenetspeech/conformer/local/path.sh
similarity index 100%
rename from egs/aishell2/transformer/utils/__init__.py
rename to egs/wenetspeech/conformer/local/path.sh
diff --git a/egs/wenetspeech/conformer/local/process_opus.py b/egs/wenetspeech/conformer/local/process_opus.py
new file mode 100755
index 0000000..581953f
--- /dev/null
+++ b/egs/wenetspeech/conformer/local/process_opus.py
@@ -0,0 +1,88 @@
+# Copyright 2021  NPU, ASLP Group (Author: Qijie Shao)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# process_opus.py: segmentation and downsampling of opus audio
+
+# usage: python3 process_opus.py wav.scp segments output_wav.scp
+
+import os
+import sys
+
+import torchaudio
+
+
+def read_file(wav_scp, segments):
+    wav_scp_dict = {}
+    with open(wav_scp, "r", encoding="UTF-8") as fin:
+        for line_str in fin:
+            wav_id, path = line_str.strip().split()
+            wav_scp_dict[wav_id] = path
+
+    utt_list = []
+    seg_path_list = []
+    start_time_list = []
+    end_time_list = []
+    with open(segments, "r", encoding="UTF-8") as fin:
+        for line_str in fin:
+            arr = line_str.strip().split()
+            assert len(arr) == 4
+            utt_list.append(arr[0])
+            seg_path_list.append(wav_scp_dict[arr[1]])
+            start_time_list.append(float(arr[2]))
+            end_time_list.append(float(arr[3]))
+    return utt_list, seg_path_list, start_time_list, end_time_list
+
+
+# TODO(Qijie): Fix the process logic
+def output(output_wav_scp, utt_list, seg_path_list, start_time_list, end_time_list):
+    num_utts = len(utt_list)
+    step = int(num_utts * 0.01)
+    with open(output_wav_scp, "w", encoding="UTF-8") as fout:
+        previous_wav_path = ""
+        for i in range(num_utts):
+            utt_id = utt_list[i]
+            current_wav_path = seg_path_list[i]
+            output_dir = (os.path.dirname(current_wav_path)).replace(
+                "audio", "audio_seg"
+            )
+            seg_wav_path = os.path.join(output_dir, utt_id + ".wav")
+
+            os.makedirs(output_dir, exist_ok=True)
+            if current_wav_path != previous_wav_path:
+                waveform, sample_rate = torchaudio.load(current_wav_path)
+            previous_wav_path = current_wav_path
+
+            start = int(start_time_list[i] * sample_rate)
+            end = int(end_time_list[i] * sample_rate)
+            target_audio = waveform[:, start:end]
+            torchaudio.save(seg_wav_path, target_audio, sample_rate)
+
+            fout.write("{} {}\n".format(utt_id, seg_wav_path))
+            if i % step == 0:
+                print("seg wav finished: {}%".format(int(i / step)))
+
+
+def main():
+    wav_scp = sys.argv[1]
+    segments = sys.argv[2]
+    output_wav_scp = sys.argv[3]
+
+    utt_list, seg_path_list, start_time_list, end_time_list = read_file(
+        wav_scp, segments
+    )
+    output(output_wav_scp, utt_list, seg_path_list, start_time_list, end_time_list)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/wenetspeech/conformer/local/text_normalize.pl b/egs/wenetspeech/conformer/local/text_normalize.pl
new file mode 100755
index 0000000..55b35e2
--- /dev/null
+++ b/egs/wenetspeech/conformer/local/text_normalize.pl
@@ -0,0 +1,24 @@
+#!/usr/bin/env perl
+use utf8;
+use open qw(:std :utf8);
+use warnings;
+
+while (<STDIN>) {
+    chomp;
+    # remove non UTF-8 whitespace character
+    if ($_ =~ /銆�/) {$_ =~ s:銆�::g;}
+    if ($_ =~ /聽/) {$_ =~ s:聽::g;}
+    # upper letters
+    if ($_ =~ /[a-zA-Z]/) {$_ =~ uc $_;}
+    # add "_" before and after each English word
+    if ($_ =~ /([A-Z]+)\s+([A-Z]+)/) {$_ =~ s/([A-Z]+)\s+([A-Z]+)/$1\_$2/g;}
+    if ($_ =~ /([A-Z]+)\s+([A-Z]+)/) {$_ =~ s/([A-Z]+)\s+([A-Z]+)/$1\_$2/g;}
+    if ($_ =~ m/([A-Z]+)(\p{Han}+)/) {$_ =~ s/([A-Z]+)(\p{Han}+)/$1\_$2/g;}
+    if ($_ =~ m/(\p{Han}+)([A-Z]+)/) {$_ =~ s/(\p{Han}+)([A-Z]+)/$1\_$2/g;}
+    # remove UTF-8 whitespace charcter
+    if ($_ =~ /\s+/) {$_ =~ s:\s+::g;}
+    # replace "_" with a normal whitespace
+    if ($_ =~ /\_/) {$_ =~ s:\_: :g;}
+
+    print "$_\n";
+}
diff --git a/egs/wenetspeech/conformer/local/wenetspeech_data_prep.sh b/egs/wenetspeech/conformer/local/wenetspeech_data_prep.sh
new file mode 100755
index 0000000..4959328
--- /dev/null
+++ b/egs/wenetspeech/conformer/local/wenetspeech_data_prep.sh
@@ -0,0 +1,135 @@
+#!/usr/bin/env bash
+
+# Copyright 2021  Xiaomi Corporation (Author: Yongqing Wang)
+#                 Seasalt AI, Inc (Author: Guoguo Chen)
+#                 Mobvoi Inc(Author: Di Wu, Binbin Zhang)
+#                 NPU, ASLP Group (Author: Qijie Shao)
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+set -o pipefail
+
+stage=1
+prefix=
+train_subset=L
+
+. utils/parse_options.sh || exit 1;
+
+filter_by_id () {
+  idlist=$1
+  input=$2
+  output=$3
+  field=1
+  if [ $# -eq 4 ]; then
+    field=$4
+  fi
+  cat $input | perl -se '
+    open(F, "<$idlist") || die "Could not open id-list file $idlist";
+    while(<F>) {
+      @A = split;
+      @A>=1 || die "Invalid id-list file line $_";
+      $seen{$A[0]} = 1;
+    }
+    while(<>) {
+      @A = split;
+      @A > 0 || die "Invalid file line $_";
+      @A >= $field || die "Invalid file line $_";
+      if ($seen{$A[$field-1]}) {
+        print $_;
+      }
+    }' -- -idlist="$idlist" -field="$field" > $output ||\
+  (echo "$0: filter_by_id() error: $input" && exit 1) || exit 1;
+}
+
+subset_data_dir () {
+  utt_list=$1
+  src_dir=$2
+  dest_dir=$3
+  mkdir -p $dest_dir || exit 1;
+  # wav.scp text segments utt2dur
+  filter_by_id $utt_list $src_dir/utt2dur $dest_dir/utt2dur ||\
+    (echo "$0: subset_data_dir() error: $src_dir/utt2dur" && exit 1) || exit 1;
+  filter_by_id $utt_list $src_dir/text $dest_dir/text ||\
+    (echo "$0: subset_data_dir() error: $src_dir/text" && exit 1) || exit 1;
+  filter_by_id $utt_list $src_dir/segments $dest_dir/segments ||\
+    (echo "$0: subset_data_dir() error: $src_dir/segments" && exit 1) || exit 1;
+  awk '{print $2}' $dest_dir/segments | sort | uniq > $dest_dir/reco
+  filter_by_id $dest_dir/reco $src_dir/wav.scp $dest_dir/wav.scp ||\
+    (echo "$0: subset_data_dir() error: $src_dir/wav.scp" && exit 1) || exit 1;
+  rm -f $dest_dir/reco
+}
+
+if [ $# -ne 2 ]; then
+  echo "Usage: $0 [options] <wenetspeech-dataset-dir> <data-dir>"
+  echo " e.g.: $0 --train-subset L /disk1/audio_data/wenetspeech/ data/"
+  echo ""
+  echo "This script takes the WenetSpeech source directory, and prepares the"
+  echo "WeNet format data directory."
+  echo "  --prefix <prefix>                # Prefix for output data directory."
+  echo "  --stage <stage>                  # Processing stage."
+  echo "  --train-subset <L|M|S|W>     # Train subset to be created."
+  exit 1
+fi
+
+wenetspeech_dir=$1
+data_dir=$2
+
+declare -A subsets
+subsets=(
+  [L]="train_l"
+  [M]="train_m"
+  [S]="train_s"
+  [W]="train_w"
+  [DEV]="dev"
+  [TEST_NET]="test_net"
+  [TEST_MEETING]="test_meeting")
+
+prefix=${prefix:+${prefix}_}
+
+corpus_dir=$data_dir/${prefix}corpus/
+if [ $stage -le 1 ]; then
+  echo "$0: Extract meta into $corpus_dir"
+  # Sanity check.
+  [ ! -f $wenetspeech_dir/WenetSpeech.json ] &&\
+    echo "$0: Please download $wenetspeech_dir/WenetSpeech.json!" && exit 1;
+  [ ! -d $wenetspeech_dir/audio ] &&\
+    echo "$0: Please download $wenetspeech_dir/audio!" && exit 1;
+
+  [ ! -d $corpus_dir ] && mkdir -p $corpus_dir
+
+  # Files to be created:
+  # wav.scp text segments utt2dur
+  python3 local/extract_meta.py \
+    $wenetspeech_dir/WenetSpeech.json $corpus_dir || exit 1;
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: Split data to train, dev, test_net, and test_meeting"
+  [ ! -f $corpus_dir/utt2subsets ] &&\
+    echo "$0: No such file $corpus_dir/utt2subsets!" && exit 1;
+  for label in $train_subset DEV TEST_NET TEST_MEETING; do
+    if [ ! ${subsets[$label]+set} ]; then
+      echo "$0: Subset $label is not defined in WenetSpeech.json." && exit 1;
+    fi
+    subset=${subsets[$label]}
+    [ ! -d $data_dir/${prefix}$subset ] && mkdir -p $data_dir/${prefix}$subset
+    cat $corpus_dir/utt2subsets | \
+       awk -v s=$label '{for (i=2;i<=NF;i++) if($i==s) print $0;}' \
+       > $corpus_dir/${prefix}${subset}_utt_list|| exit 1;
+    subset_data_dir $corpus_dir/${prefix}${subset}_utt_list \
+      $corpus_dir $data_dir/${prefix}$subset || exit 1;
+  done
+fi
+
+echo "$0: Done"
diff --git a/egs/wenetspeech/conformer/path.sh b/egs/wenetspeech/conformer/path.sh
new file mode 100755
index 0000000..7972642
--- /dev/null
+++ b/egs/wenetspeech/conformer/path.sh
@@ -0,0 +1,5 @@
+export FUNASR_DIR=$PWD/../../..
+
+# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
+export PYTHONIOENCODING=UTF-8
+export PATH=$FUNASR_DIR/funasr/bin:$PATH
diff --git a/egs/wenetspeech/conformer/run.sh b/egs/wenetspeech/conformer/run.sh
new file mode 100644
index 0000000..28a2824
--- /dev/null
+++ b/egs/wenetspeech/conformer/run.sh
@@ -0,0 +1,223 @@
+#!/usr/bin/env bash
+
+. ./path.sh || exit 1;
+
+# machines configuration
+CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
+gpu_num=8
+count=1
+gpu_inference=true  # Whether to perform gpu decoding, set false for cpu decoding
+# for gpu decoding, inference_nj=ngpu*njob; for cpu decoding, inference_nj=njob
+njob=5
+train_cmd=utils/run.pl
+infer_cmd=utils/run.pl
+
+# general configuration
+feats_dir="../DATA" #feature output dictionary
+exp_dir="."
+lang=zh
+token_type=char
+type=sound
+scp=wav.scp
+speed_perturb="0.9 1.0 1.1"
+stage=3
+stop_stage=5
+
+# feature configuration
+feats_dim=80
+nj=64
+
+# data
+raw_data=/nfs/zhifu.gzf/wenetspeech_proc
+
+# exp tag
+tag="exp1"
+
+. utils/parse_options.sh || exit 1;
+
+# Set bash to 'debug' mode, it will exit on :
+# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
+set -e
+set -u
+set -o pipefail
+
+set=L
+train_set=train_l
+valid_set=dev
+test_sets="dev test_net test_meeting"
+
+asr_config=conf/train_asr_conformer.yaml
+model_dir="baseline_$(basename "${asr_config}" .yaml)_${lang}_${token_type}_${tag}"
+
+inference_config=conf/decode_asr_transformer_5beam.yaml
+inference_asr_model=valid.acc.ave_10best.pb
+
+# you can set gpu num for decoding here
+gpuid_list=$CUDA_VISIBLE_DEVICES  # set gpus for decoding, the same as training stage by default
+ngpu=$(echo $gpuid_list | awk -F "," '{print NF}')
+
+if ${gpu_inference}; then
+    inference_nj=$[${ngpu}*${njob}]
+    _ngpu=1
+else
+    inference_nj=$njob
+    _ngpu=0
+fi
+
+if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
+    echo "For downloading data, please refer to https://github.com/wenet-e2e/WenetSpeech."
+    exit 0;
+fi
+
+if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
+    echo "stage 0: Data preparation"
+    # Data preparation
+    local/data.sh --set ${set} --nj $nj --data_dir $feats_dir --WENETSPEECH $raw_data --train_cmd $train_cmd
+    mkdir $feats_dir/data
+    mv $feats_dir/$train_set $feats_dir/data/$train_set
+    for x in $test_sets; do
+        mv $feats_dir/$x $feats_dir/data/
+    done
+fi
+
+if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
+    echo "stage 1: Feature and CMVN Generation"
+    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 0.1
+fi
+
+token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
+echo "dictionary: ${token_list}"
+if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
+    echo "stage 2: Dictionary Preparation"
+    mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/
+
+    echo "make a dictionary"
+    echo "<blank>" > ${token_list}
+    echo "<s>" >> ${token_list}
+    echo "</s>" >> ${token_list}
+    utils/text2token.py -s 1 -n 1 --space "" ${feats_dir}/data/$train_set/text | cut -f 2- -d" " | tr " " "\n" \
+        | sort | uniq | grep -a -v -e '^\s*$' | awk '{print $0}' >> ${token_list}
+    echo "<unk>" >> ${token_list}
+fi
+
+# LM Training Stage
+world_size=$gpu_num  # run on one machine
+if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
+    echo "stage 3: LM Training"
+fi
+
+# ASR Training Stage
+world_size=$gpu_num  # run on one machine
+if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
+    echo "stage 4: ASR Training"
+    mkdir -p ${exp_dir}/exp/${model_dir}
+    mkdir -p ${exp_dir}/exp/${model_dir}/log
+    INIT_FILE=${exp_dir}/exp/${model_dir}/ddp_init
+    if [ -f $INIT_FILE ];then
+        rm -f $INIT_FILE
+    fi
+    init_method=file://$(readlink -f $INIT_FILE)
+    echo "$0: init method is $init_method"
+    for ((i = 0; i < $gpu_num; ++i)); do
+        {
+            rank=$i
+            local_rank=$i
+            gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
+            train.py \
+                --task_name asr \
+                --gpu_id $gpu_id \
+                --use_preprocessor true \
+                --token_type $token_type \
+                --token_list $token_list \
+                --dataset_type large \
+                --data_dir ${feats_dir}/data \
+                --train_set ${train_set} \
+                --valid_set ${valid_set} \
+                --data_file_names "wav.scp,text" \
+                --cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
+                --speed_perturb ${speed_perturb} \
+                --resume true \
+                --output_dir ${exp_dir}/exp/${model_dir} \
+                --config $asr_config \
+                --ngpu $gpu_num \
+                --num_worker_count $count \
+                --dist_init_method $init_method \
+                --dist_world_size $world_size \
+                --dist_rank $rank \
+                --local_rank $local_rank 1> ${exp_dir}/exp/${model_dir}/log/train.log.$i 2>&1
+        } &
+        done
+        wait
+fi
+
+# Testing Stage
+if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
+    echo "stage 5: Inference"
+    for dset in ${test_sets}; do
+        asr_exp=${exp_dir}/exp/${model_dir}
+        inference_tag="$(basename "${inference_config}" .yaml)"
+        _dir="${asr_exp}/${inference_tag}/${inference_asr_model}/${dset}"
+        _logdir="${_dir}/logdir"
+        if [ -d ${_dir} ]; then
+            echo "${_dir} is already exists. if you want to decode again, please delete this dir first."
+            exit 0
+        fi
+        mkdir -p "${_logdir}"
+        _data="${feats_dir}/data/${dset}"
+        key_file=${_data}/${scp}
+        num_scp_file="$(<${key_file} wc -l)"
+        _nj=$([ $inference_nj -le $num_scp_file ] && echo "$inference_nj" || echo "$num_scp_file")
+        split_scps=
+        for n in $(seq "${_nj}"); do
+            split_scps+=" ${_logdir}/keys.${n}.scp"
+        done
+        # shellcheck disable=SC2086
+        utils/split_scp.pl "${key_file}" ${split_scps}
+        _opts=
+        if [ -n "${inference_config}" ]; then
+            _opts+="--config ${inference_config} "
+        fi
+        ${infer_cmd} --gpu "${_ngpu}" --max-jobs-run "${_nj}" JOB=1:"${_nj}" "${_logdir}"/asr_inference.JOB.log \
+            python -m funasr.bin.asr_inference_launch \
+                --batch_size 1 \
+                --ngpu "${_ngpu}" \
+                --njob ${njob} \
+                --gpuid_list ${gpuid_list} \
+                --data_path_and_name_and_type "${_data}/${scp},speech,${type}" \
+                --cmvn_file ${feats_dir}/data/${train_set}/cmvn/am.mvn \
+                --key_file "${_logdir}"/keys.JOB.scp \
+                --asr_train_config "${asr_exp}"/config.yaml \
+                --asr_model_file "${asr_exp}"/"${inference_asr_model}" \
+                --output_dir "${_logdir}"/output.JOB \
+                --mode asr \
+                ${_opts}
+
+        for f in token token_int score text; do
+            if [ -f "${_logdir}/output.1/1best_recog/${f}" ]; then
+                for i in $(seq "${_nj}"); do
+                    cat "${_logdir}/output.${i}/1best_recog/${f}"
+                done | sort -k1 >"${_dir}/${f}"
+            fi
+        done
+        python utils/proce_text.py ${_dir}/text ${_dir}/text.proc
+        python utils/proce_text.py ${_data}/text ${_data}/text.proc
+        python utils/compute_wer.py ${_data}/text.proc ${_dir}/text.proc ${_dir}/text.cer
+        tail -n 3 ${_dir}/text.cer > ${_dir}/text.cer.txt
+        cat ${_dir}/text.cer.txt
+    done
+fi
+
+# Prepare files for ModelScope fine-tuning and inference
+if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
+    echo "stage 6: ModelScope Preparation"
+    cp ${feats_dir}/data/${train_set}/cmvn/am.mvn ${exp_dir}/exp/${model_dir}/am.mvn
+    vocab_size=$(cat ${token_list} | wc -l)
+    python utils/gen_modelscope_configuration.py \
+        --am_model_name $inference_asr_model \
+        --mode asr \
+        --model_name conformer \
+        --dataset wenetspeech \
+        --output_dir $exp_dir/exp/$model_dir \
+        --vocab_size $vocab_size \
+        --tag $tag
+fi
\ No newline at end of file
diff --git a/egs/wenetspeech/conformer/utils b/egs/wenetspeech/conformer/utils
new file mode 120000
index 0000000..fe070dd
--- /dev/null
+++ b/egs/wenetspeech/conformer/utils
@@ -0,0 +1 @@
+../../aishell/transformer/utils
\ No newline at end of file
diff --git a/funasr/datasets/large_datasets/dataset.py b/funasr/datasets/large_datasets/dataset.py
index 5df61fd..68b63e1 100644
--- a/funasr/datasets/large_datasets/dataset.py
+++ b/funasr/datasets/large_datasets/dataset.py
@@ -148,6 +148,12 @@
                         if "key" not in sample_dict:
                             sample_dict["key"] = segs[0]
                         sample_dict['hw_tag'] = 1
+                    elif data_type == "text_nospace":
+                        text = item
+                        segs = text.strip().split(maxsplit=1)
+                        sample_dict[data_name] = [x for x in segs[1]]
+                        if "key" not in sample_dict:
+                            sample_dict["key"] = segs[0]
                     else:
                         text = item
                         segs = text.strip().split()
diff --git a/funasr/utils/prepare_data.py b/funasr/utils/prepare_data.py
index 36795b4..7602740 100644
--- a/funasr/utils/prepare_data.py
+++ b/funasr/utils/prepare_data.py
@@ -185,7 +185,7 @@
         for i in range(nj):
             path = ""
             for file_name in file_names:
-                path = path + os.path.join(split_path, str(i + 1), file_name)
+                path = path + " " + os.path.join(split_path, str(i + 1), file_name)
             f_data.write(path + "\n")
 
 

--
Gitblit v1.9.1