From 128caa57c272122d6b51420e93babf510c2b5da2 Mon Sep 17 00:00:00 2001
From: 嘉渊 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期四, 13 七月 2023 11:08:58 +0800
Subject: [PATCH] update
---
/dev/null | 101 --------------------------------------------------
egs/aishell/e_branchformer/run.sh | 3 -
egs/aishell/branchformer/run.sh | 5 --
egs/aishell/branchformer/conf/train_asr_branchformer.yaml | 2
4 files changed, 3 insertions(+), 108 deletions(-)
diff --git a/egs/aishell/branchformer/conf/train_asr_branchformer.yaml b/egs/aishell/branchformer/conf/train_asr_branchformer.yaml
index b01fd5b..f35c897 100644
--- a/egs/aishell/branchformer/conf/train_asr_branchformer.yaml
+++ b/egs/aishell/branchformer/conf/train_asr_branchformer.yaml
@@ -97,7 +97,7 @@
sort_size: 500
batch_conf:
batch_type: token
- batch_size: 20000
+ batch_size: 10000
num_workers: 8
log_interval: 50
diff --git a/egs/aishell/branchformer/conf/train_asr_branchformer_bs10000.yaml b/egs/aishell/branchformer/conf/train_asr_branchformer_bs10000.yaml
deleted file mode 100644
index f35c897..0000000
--- a/egs/aishell/branchformer/conf/train_asr_branchformer_bs10000.yaml
+++ /dev/null
@@ -1,104 +0,0 @@
-# network architecture
-# encoder related
-encoder: branchformer
-encoder_conf:
- output_size: 256
- use_attn: true
- attention_heads: 4
- attention_layer_type: rel_selfattn
- pos_enc_layer_type: rel_pos
- rel_pos_type: latest
- use_cgmlp: true
- cgmlp_linear_units: 2048
- cgmlp_conv_kernel: 31
- use_linear_after_conv: false
- gate_activation: identity
- merge_method: concat
- cgmlp_weight: 0.5 # used only if merge_method is "fixed_ave"
- attn_branch_drop_rate: 0.0 # used only if merge_method is "learned_ave"
- num_blocks: 24
- dropout_rate: 0.1
- positional_dropout_rate: 0.1
- attention_dropout_rate: 0.1
- input_layer: conv2d
- stochastic_depth_rate: 0.0
-
-# decoder related
-decoder: transformer
-decoder_conf:
- attention_heads: 4
- linear_units: 2048
- num_blocks: 6
- dropout_rate: 0.1
- positional_dropout_rate: 0.1
- self_attention_dropout_rate: 0.
- src_attention_dropout_rate: 0.
-
-# frontend related
-frontend: wav_frontend
-frontend_conf:
- fs: 16000
- window: hamming
- n_mels: 80
- frame_length: 25
- frame_shift: 10
- lfr_m: 1
- lfr_n: 1
-
-# hybrid CTC/attention
-model_conf:
- ctc_weight: 0.3
- lsm_weight: 0.1 # label smoothing option
- length_normalized_loss: false
-
-# optimization related
-accum_grad: 1
-grad_clip: 5
-max_epoch: 180
-val_scheduler_criterion:
- - valid
- - acc
-best_model_criterion:
-- - valid
- - acc
- - max
-keep_nbest_models: 10
-
-optim: adam
-optim_conf:
- lr: 0.001
- weight_decay: 0.000001
-scheduler: warmuplr
-scheduler_conf:
- warmup_steps: 35000
-
-specaug: specaug
-specaug_conf:
- apply_time_warp: true
- time_warp_window: 5
- time_warp_mode: bicubic
- apply_freq_mask: true
- freq_mask_width_range:
- - 0
- - 27
- num_freq_mask: 2
- apply_time_mask: true
- time_mask_width_ratio_range:
- - 0.
- - 0.05
- num_time_mask: 10
-
-dataset_conf:
- data_names: speech,text
- data_types: sound,text
- shuffle: True
- shuffle_conf:
- shuffle_size: 2048
- sort_size: 500
- batch_conf:
- batch_type: token
- batch_size: 10000
- num_workers: 8
-
-log_interval: 50
-normalize: None
\ No newline at end of file
diff --git a/egs/aishell/branchformer/conf/train_asr_branchformer_bs16000.yaml b/egs/aishell/branchformer/conf/train_asr_branchformer_bs16000.yaml
deleted file mode 100644
index 5f889d0..0000000
--- a/egs/aishell/branchformer/conf/train_asr_branchformer_bs16000.yaml
+++ /dev/null
@@ -1,104 +0,0 @@
-# network architecture
-# encoder related
-encoder: branchformer
-encoder_conf:
- output_size: 256
- use_attn: true
- attention_heads: 4
- attention_layer_type: rel_selfattn
- pos_enc_layer_type: rel_pos
- rel_pos_type: latest
- use_cgmlp: true
- cgmlp_linear_units: 2048
- cgmlp_conv_kernel: 31
- use_linear_after_conv: false
- gate_activation: identity
- merge_method: concat
- cgmlp_weight: 0.5 # used only if merge_method is "fixed_ave"
- attn_branch_drop_rate: 0.0 # used only if merge_method is "learned_ave"
- num_blocks: 24
- dropout_rate: 0.1
- positional_dropout_rate: 0.1
- attention_dropout_rate: 0.1
- input_layer: conv2d
- stochastic_depth_rate: 0.0
-
-# decoder related
-decoder: transformer
-decoder_conf:
- attention_heads: 4
- linear_units: 2048
- num_blocks: 6
- dropout_rate: 0.1
- positional_dropout_rate: 0.1
- self_attention_dropout_rate: 0.
- src_attention_dropout_rate: 0.
-
-# frontend related
-frontend: wav_frontend
-frontend_conf:
- fs: 16000
- window: hamming
- n_mels: 80
- frame_length: 25
- frame_shift: 10
- lfr_m: 1
- lfr_n: 1
-
-# hybrid CTC/attention
-model_conf:
- ctc_weight: 0.3
- lsm_weight: 0.1 # label smoothing option
- length_normalized_loss: false
-
-# optimization related
-accum_grad: 1
-grad_clip: 5
-max_epoch: 180
-val_scheduler_criterion:
- - valid
- - acc
-best_model_criterion:
-- - valid
- - acc
- - max
-keep_nbest_models: 10
-
-optim: adam
-optim_conf:
- lr: 0.001
- weight_decay: 0.000001
-scheduler: warmuplr
-scheduler_conf:
- warmup_steps: 35000
-
-specaug: specaug
-specaug_conf:
- apply_time_warp: true
- time_warp_window: 5
- time_warp_mode: bicubic
- apply_freq_mask: true
- freq_mask_width_range:
- - 0
- - 27
- num_freq_mask: 2
- apply_time_mask: true
- time_mask_width_ratio_range:
- - 0.
- - 0.05
- num_time_mask: 10
-
-dataset_conf:
- data_names: speech,text
- data_types: sound,text
- shuffle: True
- shuffle_conf:
- shuffle_size: 2048
- sort_size: 500
- batch_conf:
- batch_type: token
- batch_size: 16000
- num_workers: 8
-
-log_interval: 50
-normalize: None
\ No newline at end of file
diff --git a/egs/aishell/branchformer/conf/train_asr_branchformer_bs16000_gc2.yaml b/egs/aishell/branchformer/conf/train_asr_branchformer_bs16000_gc2.yaml
deleted file mode 100644
index bd5d934..0000000
--- a/egs/aishell/branchformer/conf/train_asr_branchformer_bs16000_gc2.yaml
+++ /dev/null
@@ -1,104 +0,0 @@
-# network architecture
-# encoder related
-encoder: branchformer
-encoder_conf:
- output_size: 256
- use_attn: true
- attention_heads: 4
- attention_layer_type: rel_selfattn
- pos_enc_layer_type: rel_pos
- rel_pos_type: latest
- use_cgmlp: true
- cgmlp_linear_units: 2048
- cgmlp_conv_kernel: 31
- use_linear_after_conv: false
- gate_activation: identity
- merge_method: concat
- cgmlp_weight: 0.5 # used only if merge_method is "fixed_ave"
- attn_branch_drop_rate: 0.0 # used only if merge_method is "learned_ave"
- num_blocks: 24
- dropout_rate: 0.1
- positional_dropout_rate: 0.1
- attention_dropout_rate: 0.1
- input_layer: conv2d
- stochastic_depth_rate: 0.0
-
-# decoder related
-decoder: transformer
-decoder_conf:
- attention_heads: 4
- linear_units: 2048
- num_blocks: 6
- dropout_rate: 0.1
- positional_dropout_rate: 0.1
- self_attention_dropout_rate: 0.
- src_attention_dropout_rate: 0.
-
-# frontend related
-frontend: wav_frontend
-frontend_conf:
- fs: 16000
- window: hamming
- n_mels: 80
- frame_length: 25
- frame_shift: 10
- lfr_m: 1
- lfr_n: 1
-
-# hybrid CTC/attention
-model_conf:
- ctc_weight: 0.3
- lsm_weight: 0.1 # label smoothing option
- length_normalized_loss: false
-
-# optimization related
-accum_grad: 2
-grad_clip: 5
-max_epoch: 180
-val_scheduler_criterion:
- - valid
- - acc
-best_model_criterion:
-- - valid
- - acc
- - max
-keep_nbest_models: 10
-
-optim: adam
-optim_conf:
- lr: 0.001
- weight_decay: 0.000001
-scheduler: warmuplr
-scheduler_conf:
- warmup_steps: 35000
-
-specaug: specaug
-specaug_conf:
- apply_time_warp: true
- time_warp_window: 5
- time_warp_mode: bicubic
- apply_freq_mask: true
- freq_mask_width_range:
- - 0
- - 27
- num_freq_mask: 2
- apply_time_mask: true
- time_mask_width_ratio_range:
- - 0.
- - 0.05
- num_time_mask: 10
-
-dataset_conf:
- data_names: speech,text
- data_types: sound,text
- shuffle: True
- shuffle_conf:
- shuffle_size: 2048
- sort_size: 500
- batch_conf:
- batch_type: token
- batch_size: 16000
- num_workers: 8
-
-log_interval: 50
-normalize: None
\ No newline at end of file
diff --git a/egs/aishell/branchformer/run.sh b/egs/aishell/branchformer/run.sh
index 37336ea..6bb4a0c 100755
--- a/egs/aishell/branchformer/run.sh
+++ b/egs/aishell/branchformer/run.sh
@@ -46,10 +46,7 @@
valid_set=dev
test_sets="dev test"
-#asr_config=conf/train_asr_branchformer.yaml
-#asr_config=conf/train_asr_branchformer_bs16000.yaml
-asr_config=conf/train_asr_branchformer_bs16000_gc2.yaml
-#asr_config=conf/train_asr_branchformer_bs10000.yaml
+asr_config=conf/train_asr_branchformer.yaml
model_dir="baseline_$(basename "${asr_config}" .yaml)_${lang}_${token_type}_${tag}"
inference_config=conf/decode_asr_transformer.yaml
diff --git a/egs/aishell/e_branchformer/conf/train_asr_e_branchformer_bs16000.yaml b/egs/aishell/e_branchformer/conf/train_asr_e_branchformer_bs16000.yaml
deleted file mode 100644
index 6e81f48..0000000
--- a/egs/aishell/e_branchformer/conf/train_asr_e_branchformer_bs16000.yaml
+++ /dev/null
@@ -1,101 +0,0 @@
-# network architecture
-# encoder related
-encoder: e_branchformer
-encoder_conf:
- output_size: 256
- attention_heads: 4
- attention_layer_type: rel_selfattn
- pos_enc_layer_type: rel_pos
- rel_pos_type: latest
- cgmlp_linear_units: 1024
- cgmlp_conv_kernel: 31
- use_linear_after_conv: false
- gate_activation: identity
- num_blocks: 12
- dropout_rate: 0.1
- positional_dropout_rate: 0.1
- attention_dropout_rate: 0.1
- input_layer: conv2d
- layer_drop_rate: 0.0
- linear_units: 1024
- positionwise_layer_type: linear
- use_ffn: true
- macaron_ffn: true
- merge_conv_kernel: 31
-
-# decoder related
-decoder: transformer
-decoder_conf:
- attention_heads: 4
- linear_units: 2048
- num_blocks: 6
- dropout_rate: 0.1
- positional_dropout_rate: 0.1
- self_attention_dropout_rate: 0.
- src_attention_dropout_rate: 0.
-
-# frontend related
-frontend: wav_frontend
-frontend_conf:
- fs: 16000
- window: hamming
- n_mels: 80
- frame_length: 25
- frame_shift: 10
- lfr_m: 1
- lfr_n: 1
-
-# hybrid CTC/attention
-model_conf:
- ctc_weight: 0.3
- lsm_weight: 0.1 # label smoothing option
- length_normalized_loss: false
-
-# optimization related
-accum_grad: 1
-grad_clip: 5
-max_epoch: 180
-best_model_criterion:
-- - valid
- - acc
- - max
-keep_nbest_models: 10
-
-optim: adam
-optim_conf:
- lr: 0.001
- weight_decay: 0.000001
-scheduler: warmuplr
-scheduler_conf:
- warmup_steps: 35000
-
-specaug: specaug
-specaug_conf:
- apply_time_warp: true
- time_warp_window: 5
- time_warp_mode: bicubic
- apply_freq_mask: true
- freq_mask_width_range:
- - 0
- - 27
- num_freq_mask: 2
- apply_time_mask: true
- time_mask_width_ratio_range:
- - 0.
- - 0.05
- num_time_mask: 10
-
-dataset_conf:
- data_names: speech,text
- data_types: sound,text
- shuffle: True
- shuffle_conf:
- shuffle_size: 2048
- sort_size: 500
- batch_conf:
- batch_type: token
- batch_size: 16000
- num_workers: 8
-
-log_interval: 50
-normalize: None
\ No newline at end of file
diff --git a/egs/aishell/e_branchformer/run.sh b/egs/aishell/e_branchformer/run.sh
index 8290ebf..bcba2d7 100755
--- a/egs/aishell/e_branchformer/run.sh
+++ b/egs/aishell/e_branchformer/run.sh
@@ -46,8 +46,7 @@
valid_set=dev
test_sets="dev test"
-#asr_config=conf/train_asr_e_branchformer.yaml
-asr_config=conf/train_asr_e_branchformer_bs16000.yaml
+asr_config=conf/train_asr_e_branchformer.yaml
model_dir="baseline_$(basename "${asr_config}" .yaml)_${lang}_${token_type}_${tag}"
inference_config=conf/decode_asr_transformer.yaml
--
Gitblit v1.9.1