From 00ea1186f96e6732e2edb4fab6c0ed6896e3b352 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 19 十二月 2023 22:53:18 +0800
Subject: [PATCH] funasr2

---
 funasr/models/neat_contextual_paraformer/template.yaml |   45 +++++++++++++++++++++++++++------------------
 1 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/examples/industrial_data_pretraining/paraformer-large/conf/finetune.yaml b/funasr/models/neat_contextual_paraformer/template.yaml
similarity index 65%
rename from examples/industrial_data_pretraining/paraformer-large/conf/finetune.yaml
rename to funasr/models/neat_contextual_paraformer/template.yaml
index 880aad9..012ecf7 100644
--- a/examples/industrial_data_pretraining/paraformer-large/conf/finetune.yaml
+++ b/funasr/models/neat_contextual_paraformer/template.yaml
@@ -1,6 +1,12 @@
+# This is an example that demonstrates how to configure a model file.
+# You can modify the configuration according to your own requirements.
+
+# to print the register_table:
+# from funasr.utils.register import registry_tables
+# registry_tables.print()
 
 # network architecture
-model: funasr.cli.models.paraformer:Paraformer
+model: NeatContextualParaformer
 model_conf:
     ctc_weight: 0.0
     lsm_weight: 0.1
@@ -8,9 +14,10 @@
     predictor_weight: 1.0
     predictor_bias: 1
     sampling_ratio: 0.75
+    inner_dim: 512
 
 # encoder
-encoder: sanm
+encoder: SANMEncoder
 encoder_conf:
     output_size: 512
     attention_heads: 4
@@ -26,8 +33,9 @@
     sanm_shfit: 0
     selfattention_layer_type: sanm
 
+
 # decoder
-decoder: paraformer_decoder_sanm
+decoder: ContextualParaformerDecoder
 decoder_conf:
     attention_heads: 4
     linear_units: 2048
@@ -40,7 +48,7 @@
     kernel_size: 11
     sanm_shfit: 0
 
-predictor: cif_predictor_v2
+predictor: CifPredictorV2
 predictor_conf:
     idim: 512
     threshold: 1.0
@@ -49,7 +57,7 @@
     tail_threshold: 0.45
 
 # frontend related
-frontend: wav_frontend
+frontend: WavFrontend
 frontend_conf:
     fs: 16000
     window: hamming
@@ -59,7 +67,7 @@
     lfr_m: 7
     lfr_n: 6
 
-specaug: specaug_lfr
+specaug: SpecAugLFR
 specaug_conf:
     apply_time_warp: false
     time_warp_window: 5
@@ -97,21 +105,22 @@
 scheduler_conf:
    warmup_steps: 30000
 
-
+dataset: AudioDataset
 dataset_conf:
-    data_names: speech,text
-    data_types: sound,text
+    index_ds: IndexDSJsonl
+    batch_sampler: DynamicBatchLocalShuffleSampler
+    batch_type: example # example or length
+    batch_size: 1 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
+    max_token_length: 2048 # filter samples if source_token_len+target_token_len > max_token_length,
+    buffer_size: 500
     shuffle: True
-    shuffle_conf:
-        shuffle_size: 2048
-        sort_size: 500
-    batch_conf:
-        batch_type: example
-        batch_size: 2
-    num_workers: 8
+    num_workers: 0
 
-split_with_space: true
-input_size: 560
+tokenizer: CharTokenizer
+tokenizer_conf:
+  unk_symbol: <unk>
+  split_with_space: true
+
 ctc_conf:
     dropout_rate: 0.0
     ctc_type: builtin

--
Gitblit v1.9.1