From 0f06fc04c0dcd20c5a3c07976195f9ff3f95fada Mon Sep 17 00:00:00 2001
From: 嘉渊 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期二, 25 四月 2023 15:15:54 +0800
Subject: [PATCH] update

---
 egs/aishell/conformer/conf/train_asr_conformer.yaml |   26 +++++++++++++++++++++-----
 egs/aishell/conformer/run.sh                        |   10 +++++-----
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/egs/aishell/conformer/conf/train_asr_conformer.yaml b/egs/aishell/conformer/conf/train_asr_conformer.yaml
index ddf217e..b3e703c 100644
--- a/egs/aishell/conformer/conf/train_asr_conformer.yaml
+++ b/egs/aishell/conformer/conf/train_asr_conformer.yaml
@@ -29,16 +29,22 @@
     self_attention_dropout_rate: 0.0
     src_attention_dropout_rate: 0.0
 
+# frontend related
+frontend: wav_frontend
+frontend_conf:
+    fs: 16000
+    window: hamming
+    n_mels: 80
+    frame_length: 25
+    frame_shift: 10
+    lfr_m: 1
+    lfr_n: 1
+
 # hybrid CTC/attention
 model_conf:
     ctc_weight: 0.3
     lsm_weight: 0.1     # label smoothing option
     length_normalized_loss: false
-
-# minibatch related
-batch_type: length
-batch_bins: 25000
-num_workers: 16
 
 # optimization related
 accum_grad: 1
@@ -76,5 +82,15 @@
     - 40
     num_time_mask: 2
 
+dataset_conf:
+    shuffle: True
+    shuffle_conf:
+        shuffle_size: 2048
+        sort_size: 500
+    batch_conf:
+        batch_type: token
+        batch_size: 25000
+    num_workers: 8
+
 log_interval: 50
 normalize: None
diff --git a/egs/aishell/conformer/run.sh b/egs/aishell/conformer/run.sh
index 227b3f2..60afbec 100755
--- a/egs/aishell/conformer/run.sh
+++ b/egs/aishell/conformer/run.sh
@@ -3,7 +3,7 @@
 . ./path.sh || exit 1;
 
 # machines configuration
-CUDA_VISIBLE_DEVICES="0,1"
+CUDA_VISIBLE_DEVICES="2,3"
 gpu_num=2
 count=1
 gpu_inference=true  # Whether to perform gpu decoding, set false for cpu decoding
@@ -13,7 +13,7 @@
 infer_cmd=utils/run.pl
 
 # general configuration
-feats_dir="../DATA" #feature output dictionary
+feats_dir="/nfs/wangjiaming.wjm/Funasr_data/aishell-1-fix-cmvn" #feature output dictionary
 exp_dir="."
 lang=zh
 dumpdir=dump/fbank
@@ -21,7 +21,7 @@
 token_type=char
 scp=feats.scp
 type=kaldi_ark
-stage=0
+stage=3
 stop_stage=4
 
 # feature configuration
@@ -161,7 +161,8 @@
             rank=$i
             local_rank=$i
             gpu_id=$(echo $CUDA_VISIBLE_DEVICES | cut -d',' -f$[$i+1])
-            asr_train.py \
+            train.py \
+                --task_name asr \
                 --gpu_id $gpu_id \
                 --use_preprocessor true \
                 --token_type char \
@@ -177,7 +178,6 @@
                 --resume true \
                 --output_dir ${exp_dir}/exp/${model_dir} \
                 --config $asr_config \
-                --input_size $feats_dim \
                 --ngpu $gpu_num \
                 --num_worker_count $count \
                 --multiprocessing_distributed true \

--
Gitblit v1.9.1