From 9b4e9cc8a0311e5243d69b73ed073e7ea441982e Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 27 三月 2024 16:05:29 +0800
Subject: [PATCH] train update

---
 funasr/models/uniasr/template.yaml |   54 +++++++++++++++++++++++++++++++++++++++++-------------
 1 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/funasr/models/uniasr/template.yaml b/funasr/models/uniasr/template.yaml
index f4815c1..e72a2d5 100644
--- a/funasr/models/uniasr/template.yaml
+++ b/funasr/models/uniasr/template.yaml
@@ -18,6 +18,7 @@
     decoder_attention_chunk_type2: chunk
     loss_weight_model1: 0.5
 
+
 # encoder
 encoder: SANMEncoderChunkOpt
 encoder_conf:
@@ -34,11 +35,21 @@
     kernel_size: 11
     sanm_shfit: 0
     selfattention_layer_type: sanm
-    chunk_size: [20, 60]
-    stride: [10, 40]
-    pad_left: [5, 10]
-    encoder_att_look_back_factor: [0, 0]
-    decoder_att_look_back_factor: [0, 0]
+    chunk_size:
+    - 20
+    - 60
+    stride:
+    - 10
+    - 40
+    pad_left:
+    - 5
+    - 10
+    encoder_att_look_back_factor:
+    - 0
+    - 0
+    decoder_att_look_back_factor:
+    - 0
+    - 0
 
 # decoder
 decoder: FsmnDecoderSCAMAOpt
@@ -55,6 +66,7 @@
     kernel_size: 11
     concat_embeds: true
 
+# predictor
 predictor: CifPredictorV2
 predictor_conf:
     idim: 320
@@ -62,6 +74,8 @@
     l_order: 1
     r_order: 1
 
+
+# encoder2
 encoder2: SANMEncoderChunkOpt
 encoder2_conf:
     output_size: 320
@@ -77,12 +91,23 @@
     kernel_size: 21
     sanm_shfit: 0
     selfattention_layer_type: sanm
-    chunk_size: [45, 70]
-    stride: [35, 50]
-    pad_left: [5, 10]
-    encoder_att_look_back_factor: [0, 0]
-    decoder_att_look_back_factor: [0, 0]
+    chunk_size:
+    - 45
+    - 70
+    stride:
+    - 35
+    - 50
+    pad_left:
+    - 5
+    - 10
+    encoder_att_look_back_factor:
+    - 0
+    - 0
+    decoder_att_look_back_factor:
+    - 0
+    - 0
 
+# decoder
 decoder2: FsmnDecoderSCAMAOpt
 decoder2_conf:
     attention_dim: 320
@@ -108,10 +133,12 @@
 stride_conv_conf:
     kernel_size: 2
     stride: 2
-    pad: [0, 1]
+    pad:
+    - 0
+    - 1
 
 # frontend related
-frontend: WavFrontendOnline
+frontend: WavFrontend
 frontend_conf:
     fs: 16000
     window: hamming
@@ -120,6 +147,7 @@
     frame_shift: 10
     lfr_m: 7
     lfr_n: 6
+    dither: 0.0
 
 specaug: SpecAugLFR
 specaug_conf:
@@ -156,7 +184,7 @@
 dataset: AudioDataset
 dataset_conf:
     index_ds: IndexDSJsonl
-    batch_sampler: DynamicBatchLocalShuffleSampler
+    batch_sampler: BatchSampler
     batch_type: example # example or length
     batch_size: 1 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
     max_token_length: 2048 # filter samples if source_token_len+target_token_len > max_token_length,

--
Gitblit v1.9.1