From 280593676bda653ee7ec30563918f725c1eb6a20 Mon Sep 17 00:00:00 2001
From: 嘉渊 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期四, 11 五月 2023 15:05:57 +0800
Subject: [PATCH] update repo
---
/dev/null | 80 ----------------------------------------
egs/librispeech_100h/conformer/conf/train_asr_conformer.yaml | 30 ++++++++++----
2 files changed, 21 insertions(+), 89 deletions(-)
diff --git a/egs/librispeech/conformer/conf/train_asr_conformer_uttnorm.yaml b/egs/librispeech/conformer/conf/train_asr_conformer_uttnorm.yaml
deleted file mode 100644
index 16b7cc0..0000000
--- a/egs/librispeech/conformer/conf/train_asr_conformer_uttnorm.yaml
+++ /dev/null
@@ -1,80 +0,0 @@
-encoder: conformer
-encoder_conf:
- output_size: 512
- attention_heads: 8
- linear_units: 2048
- num_blocks: 12
- dropout_rate: 0.1
- positional_dropout_rate: 0.1
- attention_dropout_rate: 0.1
- input_layer: conv2d
- normalize_before: true
- macaron_style: true
- rel_pos_type: latest
- pos_enc_layer_type: rel_pos
- selfattention_layer_type: rel_selfattn
- activation_type: swish
- use_cnn_module: true
- cnn_module_kernel: 31
-
-decoder: transformer
-decoder_conf:
- attention_heads: 8
- linear_units: 2048
- num_blocks: 6
- dropout_rate: 0.1
- positional_dropout_rate: 0.1
- self_attention_dropout_rate: 0.1
- src_attention_dropout_rate: 0.1
-
-model_conf:
- ctc_weight: 0.3
- lsm_weight: 0.1
- length_normalized_loss: false
-
-accum_grad: 2
-max_epoch: 50
-patience: none
-init: none
-best_model_criterion:
-- - valid
- - acc
- - max
-keep_nbest_models: 10
-
-optim: adam
-optim_conf:
- lr: 0.0025
- weight_decay: 0.000001
-scheduler: warmuplr
-scheduler_conf:
- warmup_steps: 40000
-
-specaug: specaug
-specaug_conf:
- apply_time_warp: true
- time_warp_window: 5
- time_warp_mode: bicubic
- apply_freq_mask: true
- freq_mask_width_range:
- - 0
- - 27
- num_freq_mask: 2
- apply_time_mask: true
- time_mask_width_ratio_range:
- - 0.
- - 0.05
- num_time_mask: 10
-
-dataset_conf:
- shuffle: True
- shuffle_conf:
- shuffle_size: 1024
- sort_size: 500
- batch_conf:
- batch_type: token
- batch_size: 10000
- num_workers: 8
-
-log_interval: 50
-normalize: utterance_mvn
\ No newline at end of file
diff --git a/egs/librispeech_100h/conformer/conf/train_asr_conformer.yaml b/egs/librispeech_100h/conformer/conf/train_asr_conformer.yaml
index 68b127f..bd92bb0 100644
--- a/egs/librispeech_100h/conformer/conf/train_asr_conformer.yaml
+++ b/egs/librispeech_100h/conformer/conf/train_asr_conformer.yaml
@@ -1,8 +1,8 @@
encoder: conformer
encoder_conf:
- output_size: 512
- attention_heads: 8
- linear_units: 2048
+ output_size: 256
+ attention_heads: 4
+ linear_units: 1024
num_blocks: 12
dropout_rate: 0.1
positional_dropout_rate: 0.1
@@ -19,7 +19,7 @@
decoder: transformer
decoder_conf:
- attention_heads: 8
+ attention_heads: 4
linear_units: 2048
num_blocks: 6
dropout_rate: 0.1
@@ -27,13 +27,25 @@
self_attention_dropout_rate: 0.1
src_attention_dropout_rate: 0.1
+# frontend related
+frontend: wav_frontend
+frontend_conf:
+ fs: 16000
+ window: hamming
+ n_mels: 80
+ frame_length: 25
+ frame_shift: 10
+ lfr_m: 1
+ lfr_n: 1
+
+# hybrid CTC/attention
model_conf:
ctc_weight: 0.3
lsm_weight: 0.1
length_normalized_loss: false
-accum_grad: 2
-max_epoch: 50
+accum_grad: 1
+max_epoch: 210
patience: none
init: none
best_model_criterion:
@@ -44,11 +56,11 @@
optim: adam
optim_conf:
- lr: 0.0025
+ lr: 0.002
weight_decay: 0.000001
scheduler: warmuplr
scheduler_conf:
- warmup_steps: 40000
+ warmup_steps: 15000
specaug: specaug
specaug_conf:
@@ -64,7 +76,7 @@
time_mask_width_ratio_range:
- 0.
- 0.05
- num_time_mask: 10
+ num_time_mask: 5
dataset_conf:
shuffle: True
diff --git a/egs/librispeech_100h/conformer/conf/train_asr_conformer_uttnorm.yaml b/egs/librispeech_100h/conformer/conf/train_asr_conformer_uttnorm.yaml
deleted file mode 100644
index 16b7cc0..0000000
--- a/egs/librispeech_100h/conformer/conf/train_asr_conformer_uttnorm.yaml
+++ /dev/null
@@ -1,80 +0,0 @@
-encoder: conformer
-encoder_conf:
- output_size: 512
- attention_heads: 8
- linear_units: 2048
- num_blocks: 12
- dropout_rate: 0.1
- positional_dropout_rate: 0.1
- attention_dropout_rate: 0.1
- input_layer: conv2d
- normalize_before: true
- macaron_style: true
- rel_pos_type: latest
- pos_enc_layer_type: rel_pos
- selfattention_layer_type: rel_selfattn
- activation_type: swish
- use_cnn_module: true
- cnn_module_kernel: 31
-
-decoder: transformer
-decoder_conf:
- attention_heads: 8
- linear_units: 2048
- num_blocks: 6
- dropout_rate: 0.1
- positional_dropout_rate: 0.1
- self_attention_dropout_rate: 0.1
- src_attention_dropout_rate: 0.1
-
-model_conf:
- ctc_weight: 0.3
- lsm_weight: 0.1
- length_normalized_loss: false
-
-accum_grad: 2
-max_epoch: 50
-patience: none
-init: none
-best_model_criterion:
-- - valid
- - acc
- - max
-keep_nbest_models: 10
-
-optim: adam
-optim_conf:
- lr: 0.0025
- weight_decay: 0.000001
-scheduler: warmuplr
-scheduler_conf:
- warmup_steps: 40000
-
-specaug: specaug
-specaug_conf:
- apply_time_warp: true
- time_warp_window: 5
- time_warp_mode: bicubic
- apply_freq_mask: true
- freq_mask_width_range:
- - 0
- - 27
- num_freq_mask: 2
- apply_time_mask: true
- time_mask_width_ratio_range:
- - 0.
- - 0.05
- num_time_mask: 10
-
-dataset_conf:
- shuffle: True
- shuffle_conf:
- shuffle_size: 1024
- sort_size: 500
- batch_conf:
- batch_type: token
- batch_size: 10000
- num_workers: 8
-
-log_interval: 50
-normalize: utterance_mvn
\ No newline at end of file
--
Gitblit v1.9.1