From 97a689d65da434345a641a909f13b78e5690c86b Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 18 五月 2023 19:35:08 +0800
Subject: [PATCH] Merge pull request #526 from alibaba-damo-academy/dev_infer

---
 egs/librispeech_100h/conformer/conf/train_asr_conformer.yaml |   32 ++++++++++++++++++++++----------
 1 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/egs/librispeech/conformer/conf/train_asr_conformer_uttnorm.yaml b/egs/librispeech_100h/conformer/conf/train_asr_conformer.yaml
similarity index 77%
rename from egs/librispeech/conformer/conf/train_asr_conformer_uttnorm.yaml
rename to egs/librispeech_100h/conformer/conf/train_asr_conformer.yaml
index 16b7cc0..bd92bb0 100644
--- a/egs/librispeech/conformer/conf/train_asr_conformer_uttnorm.yaml
+++ b/egs/librispeech_100h/conformer/conf/train_asr_conformer.yaml
@@ -1,8 +1,8 @@
 encoder: conformer
 encoder_conf:
-    output_size: 512
-    attention_heads: 8
-    linear_units: 2048
+    output_size: 256
+    attention_heads: 4
+    linear_units: 1024
     num_blocks: 12
     dropout_rate: 0.1
     positional_dropout_rate: 0.1
@@ -19,7 +19,7 @@
 
 decoder: transformer
 decoder_conf:
-    attention_heads: 8
+    attention_heads: 4
     linear_units: 2048
     num_blocks: 6
     dropout_rate: 0.1
@@ -27,13 +27,25 @@
     self_attention_dropout_rate: 0.1
     src_attention_dropout_rate: 0.1
 
+# frontend related
+frontend: wav_frontend
+frontend_conf:
+    fs: 16000
+    window: hamming
+    n_mels: 80
+    frame_length: 25
+    frame_shift: 10
+    lfr_m: 1
+    lfr_n: 1
+
+# hybrid CTC/attention
 model_conf:
     ctc_weight: 0.3
     lsm_weight: 0.1
     length_normalized_loss: false
 
-accum_grad: 2
-max_epoch: 50
+accum_grad: 1
+max_epoch: 210
 patience: none
 init: none
 best_model_criterion:
@@ -44,11 +56,11 @@
 
 optim: adam
 optim_conf:
-    lr: 0.0025
+    lr: 0.002
     weight_decay: 0.000001
 scheduler: warmuplr
 scheduler_conf:
-    warmup_steps: 40000
+    warmup_steps: 15000
 
 specaug: specaug
 specaug_conf:
@@ -64,7 +76,7 @@
     time_mask_width_ratio_range:
     - 0.
     - 0.05
-    num_time_mask: 10
+    num_time_mask: 5
 
 dataset_conf:
     shuffle: True
@@ -77,4 +89,4 @@
     num_workers: 8
 
 log_interval: 50
-normalize: utterance_mvn
\ No newline at end of file
+normalize: None
\ No newline at end of file

--
Gitblit v1.9.1