From eb43576ed00902a5c0d5c05f5b50f9eebda3a0e1 Mon Sep 17 00:00:00 2001
From: 嘉渊 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期一, 15 五月 2023 13:54:46 +0800
Subject: [PATCH] update repo
---
egs/aishell/paraformer/conf/train_asr_paraformer_conformer_12e_6d_2048_256.yaml | 2 +-
egs/aishell/paraformerbert/run.sh | 12 ++++++------
egs/aishell/paraformerbert/conf/train_asr_paraformerbert_conformer_12e_6d_2048_256.yaml | 37 ++++++++++++++++++++++++-------------
3 files changed, 31 insertions(+), 20 deletions(-)
diff --git a/egs/aishell/paraformer/conf/train_asr_paraformer_conformer_12e_6d_2048_256.yaml b/egs/aishell/paraformer/conf/train_asr_paraformer_conformer_12e_6d_2048_256.yaml
index 6073f1f..bac8d04 100644
--- a/egs/aishell/paraformer/conf/train_asr_paraformer_conformer_12e_6d_2048_256.yaml
+++ b/egs/aishell/paraformer/conf/train_asr_paraformer_conformer_12e_6d_2048_256.yaml
@@ -84,7 +84,7 @@
- 40
num_time_mask: 2
-predictor: cif_predictor_v2
+predictor: cif_predictor
predictor_conf:
idim: 256
threshold: 1.0
diff --git a/egs/aishell/paraformerbert/conf/train_asr_paraformerbert_conformer_12e_6d_2048_256.yaml b/egs/aishell/paraformerbert/conf/train_asr_paraformerbert_conformer_12e_6d_2048_256.yaml
index f51a2ea..8f3f067 100644
--- a/egs/aishell/paraformerbert/conf/train_asr_paraformerbert_conformer_12e_6d_2048_256.yaml
+++ b/egs/aishell/paraformerbert/conf/train_asr_paraformerbert_conformer_12e_6d_2048_256.yaml
@@ -29,6 +29,17 @@
self_attention_dropout_rate: 0.0
src_attention_dropout_rate: 0.0
+# frontend related
+frontend: wav_frontend
+frontend_conf:
+ fs: 16000
+ window: hamming
+ n_mels: 80
+ frame_length: 25
+ frame_shift: 10
+ lfr_m: 1
+ lfr_n: 1
+
# hybrid CTC/attention
model: paraformer_bert
model_conf:
@@ -41,19 +52,10 @@
embed_dims: 768
embeds_loss_weight: 2.0
-
-
-# minibatch related
-#batch_type: length
-#batch_bins: 40000
-batch_type: numel
-batch_bins: 2000000
-num_workers: 16
-
# optimization related
-accum_grad: 4
+accum_grad: 1
grad_clip: 5
-max_epoch: 50
+max_epoch: 150
val_scheduler_criterion:
- valid
- acc
@@ -92,8 +94,17 @@
threshold: 1.0
l_order: 1
r_order: 1
+ tail_threshold: 0.45
+dataset_conf:
+ shuffle: True
+ shuffle_conf:
+ shuffle_size: 2048
+ sort_size: 500
+ batch_conf:
+ batch_type: token
+ batch_size: 25000
+ num_workers: 8
log_interval: 50
-normalize: None
-allow_variable_data_keys: true
\ No newline at end of file
+normalize: None
\ No newline at end of file
diff --git a/egs/aishell/paraformerbert/run.sh b/egs/aishell/paraformerbert/run.sh
index e0245f3..b46d4e4 100755
--- a/egs/aishell/paraformerbert/run.sh
+++ b/egs/aishell/paraformerbert/run.sh
@@ -111,12 +111,12 @@
world_size=$gpu_num # run on one machine
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
echo "stage 3: Training"
- if ! "${skip_extract_embed}"; then
- echo "extract embeddings..."
- local/extract_embeds.sh \
- --bert_model_name ${bert_model_name} \
- --raw_dataset_path ${feats_dir}
- fi
+# if ! "${skip_extract_embed}"; then
+# echo "extract embeddings..."
+# local/extract_embeds.sh \
+# --bert_model_name ${bert_model_name} \
+# --raw_dataset_path ${feats_dir}
+# fi
mkdir -p ${exp_dir}/exp/${model_dir}
mkdir -p ${exp_dir}/exp/${model_dir}/log
INIT_FILE=${exp_dir}/exp/${model_dir}/ddp_init
--
Gitblit v1.9.1