From 98e2c546a08917f450d32d63968affd5b975ad2a Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 16 十月 2024 15:22:05 +0800
Subject: [PATCH] funasr tables

---
 docs/tutorial/README_zh.md |   21 ++++---
 docs/tutorial/Tables_zh.md |  146 +++++++++++++++++++++++++-----------------------
 2 files changed, 89 insertions(+), 78 deletions(-)

diff --git a/docs/tutorial/README_zh.md b/docs/tutorial/README_zh.md
index afef070..3242275 100644
--- a/docs/tutorial/README_zh.md
+++ b/docs/tutorial/README_zh.md
@@ -442,22 +442,21 @@
 
 ### 鏌ョ湅娉ㄥ唽琛�
 
-```python
+```plaintext
 from funasr.register import tables
 
 tables.print()
 ```
 
-鏀寔鏌ョ湅鎸囧畾绫诲瀷鐨勬敞鍐岃〃锛歚tables.print("model")`
+鏀寔鏌ョ湅鎸囧畾绫诲瀷鐨勬敞鍐岃〃锛歕`tables.print("model")\`
 
-
-### 娉ㄥ唽鏂版ā鍨�
+### 娉ㄥ唽妯″瀷
 
 ```python
 from funasr.register import tables
 
-@tables.register("model_classes", "MinMo_S2T")
-class MinMo_S2T(nn.Module):
+@tables.register("model_classes", "SenseVoiceSmall")
+class SenseVoiceSmall(nn.Module):
   def __init__(*args, **kwargs):
     ...
 
@@ -479,10 +478,14 @@
 
 ```
 
-鐒跺悗鍦╟onfig.yaml涓寚瀹氭柊娉ㄥ唽妯″瀷
+鍦ㄩ渶瑕佹敞鍐岀殑绫诲悕鍓嶅姞涓� `@tables.register("model_classes","SenseVoiceSmall")`锛屽嵆鍙畬鎴愭敞鍐岋紝绫婚渶瑕佸疄鐜版湁锛歘_init__锛宖orward锛宨nference鏂规硶銆�
 
-```yaml
-model: MinMo_S2T
+瀹屾暣浠ｇ爜锛歔https://github.com/modelscope/FunASR/blob/main/funasr/models/sense\_voice/model.py#L443](https://github.com/modelscope/FunASR/blob/main/funasr/models/sense_voice/model.py#L443)
+
+娉ㄥ唽瀹屾垚鍚庯紝鍦╟onfig.yaml涓寚瀹氭柊娉ㄥ唽妯″瀷锛屽嵆鍙疄鐜板妯″瀷鐨勫畾涔�
+
+```python
+model: SenseVoiceSmall
 model_conf:
   ...
 ```
diff --git a/docs/tutorial/Tables_zh.md b/docs/tutorial/Tables_zh.md
index ec64baf..967c625 100644
--- a/docs/tutorial/Tables_zh.md
+++ b/docs/tutorial/Tables_zh.md
@@ -1,4 +1,4 @@
-# FunASR-1.x.x 娉ㄥ唽妯″瀷鏁欑▼
+# FunASR-1.x.x聽娉ㄥ唽妯″瀷鏁欑▼
 
 1.0鐗堟湰鐨勮璁″垵琛锋槸銆�**璁╂ā鍨嬮泦鎴愭洿绠�鍗�**銆戯紝鏍稿績feature涓烘敞鍐岃〃涓嶢utoModel锛�
 
@@ -11,7 +11,7 @@
 *   缁熶竴瀛︽湳涓庡伐涓氭ā鍨嬫帹鐞嗚缁冭剼鏈紱
     
 
-![image](https://alidocs.oss-cn-zhangjiakou.aliyuncs.com/a/6Ea1DxkZVte8y0g2/b78f122bd40b485687e5e13faa78ae850521.png)
+![image](https://alidocs.oss-cn-zhangjiakou.aliyuncs.com/a/6Ea1DxkZVte8y0g2/150e0eafd1c34f2dbb9360ccb5db4dc40521.png)
 
 # 蹇�熶笂鎵�
 
@@ -89,14 +89,14 @@
 res = model.generate(input=[str], output_dir=[str])
 ```
 
-*   wav鏂囦欢璺緞,聽渚嬪:聽asr\_example.wav
-    
-*   pcm鏂囦欢璺緞,聽渚嬪:聽asr\_example.pcm锛屾鏃堕渶瑕佹寚瀹氶煶棰戦噰鏍风巼fs锛堥粯璁や负16000锛�
-    
-*   闊抽瀛楄妭鏁版祦锛屼緥濡傦細楹﹀厠椋庣殑瀛楄妭鏁版暟鎹�
-    
-*   wav.scp锛宬aldi聽鏍峰紡鐨劼爓av聽鍒楄〃聽(`wav_id聽\t聽wav_path`),聽渚嬪:
-    
+*   *   wav鏂囦欢璺緞,聽渚嬪:聽asr\_example.wav
+        
+    *   pcm鏂囦欢璺緞,聽渚嬪:聽asr\_example.pcm锛屾鏃堕渶瑕佹寚瀹氶煶棰戦噰鏍风巼fs锛堥粯璁や负16000锛�
+        
+    *   闊抽瀛楄妭鏁版祦锛屼緥濡傦細楹﹀厠椋庣殑瀛楄妭鏁版暟鎹�
+        
+    *   wav.scp锛宬aldi聽鏍峰紡鐨劼爓av聽鍒楄〃聽(`wav_id聽\t聽wav_path`),聽渚嬪:
+        
 
 ```plaintext
 asr_example1  ./audios/asr_example1.wav
@@ -121,76 +121,82 @@
 
 ## 妯″瀷璧勬簮鐩綍
 
-![image.png](https://alidocs.oss-cn-zhangjiakou.aliyuncs.com/res/8oLl9y628rBNlapY/img/f16961f1-bdfb-4638-83d5-e4cb13a5a4a4.png)
+![image.png](https://alidocs.oss-cn-zhangjiakou.aliyuncs.com/res/8oLl9y628rBNlapY/img/cab7f215-787f-4407-885a-14dc89ae9e02.png)
+
+**妯″瀷閾炬帴涓猴細**[https://www.modelscope.cn/models/iic/SenseVoiceSmall/files](https://www.modelscope.cn/models/iic/SenseVoiceSmall/files)
 
 **閰嶇疆鏂囦欢**锛歝onfig.yaml
 
 ```yaml
-model: SenseVoiceLarge
-model_conf:
-  lsm_weight: 0.1
-  length_normalized_loss: true
-  activation_checkpoint: true
-  sos: <|startoftranscript|>
-  eos: <|endoftext|>
-  downsample_rate: 4
-  use_padmask: true
-
-encoder: SenseVoiceEncoder
+encoder: SenseVoiceEncoderSmall
 encoder_conf:
-  input_size: 128
-  attention_heads: 20
-  linear_units: 1280
-  num_blocks: 32
-  dropout_rate: 0.1
-  positional_dropout_rate: 0.1
-  attention_dropout_rate: 0.1
-  kernel_size: 31
-  sanm_shfit: 0
-  att_type: self_att_fsmn_sdpa
-  downsample_rate: 4
-  use_padmask: true
-  max_position_embeddings: 2048
-  rope_theta: 10000
-  
-frontend: WhisperFrontend
-frontend_conf:
-  fs: 16000
-  n_mels: 128
-  do_pad_trim: false
-  filters_path: null
+    output_size: 512
+    attention_heads: 4
+    linear_units: 2048
+    num_blocks: 50
+    tp_blocks: 20
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    attention_dropout_rate: 0.1
+    input_layer: pe
+    pos_enc_class: SinusoidalPositionEncoder
+    normalize_before: true
+    kernel_size: 11
+    sanm_shfit: 0
+    selfattention_layer_type: sanm
 
-tokenizer: SenseVoiceTokenizer
+
+model: SenseVoiceSmall
+model_conf:
+    length_normalized_loss: true
+    sos: 1
+    eos: 2
+    ignore_id: -1
+
+tokenizer: SentencepiecesTokenizer
 tokenizer_conf:
-  vocab_path: null
-  is_multilingual: true
-  num_languages: 8749
+  bpemodel: null
+  unk_symbol: <unk>
+  split_with_space: true
 
-dataset: SenseVoiceDataset
+frontend: WavFrontend
+frontend_conf:
+    fs: 16000
+    window: hamming
+    n_mels: 80
+    frame_length: 25
+    frame_shift: 10
+    lfr_m: 7
+    lfr_n: 6
+    cmvn_file: null
+
+
+dataset: SenseVoiceCTCDataset
 dataset_conf:
   index_ds: IndexDSJsonl
-  batch_sampler: BatchSampler
+  batch_sampler: EspnetStyleBatchSampler
+  data_split_num: 32
   batch_type: token
-  batch_size: 12000
-  sort_size: 64
+  batch_size: 14000
   max_token_length: 2000
   min_token_length: 60
   max_source_length: 2000
   min_source_length: 60
-  max_target_length: 150
+  max_target_length: 200
   min_target_length: 0
   shuffle: true
   num_workers: 4
   sos: ${model_conf.sos}
   eos: ${model_conf.eos}
   IndexDSJsonl: IndexDSJsonl
+  retry: 20
 
 train_conf:
   accum_grad: 1
   grad_clip: 5
-  max_epoch: 5
-  keep_nbest_models: 200
-  avg_nbest_model: 200
+  max_epoch: 20
+  keep_nbest_models: 10
+  avg_nbest_model: 10
   log_interval: 100
   resume: true
   validate_interval: 10000
@@ -198,11 +204,10 @@
 
 optim: adamw
 optim_conf:
-  lr: 2.5e-05
-
+  lr: 0.00002
 scheduler: warmuplr
 scheduler_conf:
-  warmup_steps: 20000
+  warmup_steps: 25000
 
 ```
 
@@ -222,8 +227,8 @@
   "file_path_metas": {
     "init_param":"model.pt", 
     "config":"config.yaml",
-    "tokenizer_conf": {"vocab_path": "tokens.tiktoken"},
-    "frontend_conf":{"filters_path": "mel_filters.npz"}}
+    "tokenizer_conf": {"bpemodel": "chn_jpn_yue_eng_ko_spectok.bpe.model"},
+    "frontend_conf":{"cmvn_file": "am.mvn"}}
 }
 ```
 
@@ -231,22 +236,21 @@
 
 ### 鏌ョ湅娉ㄥ唽琛�
 
-```python
+```plaintext
 from funasr.register import tables
 
 tables.print()
 ```
 
-鏀寔鏌ョ湅鎸囧畾绫诲瀷鐨勬敞鍐岃〃锛歚tables.print("model")`
+鏀寔鏌ョ湅鎸囧畾绫诲瀷鐨勬敞鍐岃〃锛歕`tables.print("model")\`
 
-
-### 鏂版敞鍐�
+### 娉ㄥ唽妯″瀷
 
 ```python
 from funasr.register import tables
 
-@tables.register("model_classes", "MinMo_S2T")
-class MinMo_S2T(nn.Module):
+@tables.register("model_classes", "SenseVoiceSmall")
+class SenseVoiceSmall(nn.Module):
   def __init__(*args, **kwargs):
     ...
 
@@ -268,10 +272,14 @@
 
 ```
 
-鍦╟onfig.yaml涓寚瀹氭柊娉ㄥ唽妯″瀷
+鍦ㄩ渶瑕佹敞鍐岀殑绫诲悕鍓嶅姞涓� `@tables.register("model_classes","SenseVoiceSmall")`锛屽嵆鍙畬鎴愭敞鍐岋紝绫婚渶瑕佸疄鐜版湁锛歘_init__锛宖orward锛宨nference鏂规硶銆�
 
-```yaml
-model: MinMo_S2T
+瀹屾暣浠ｇ爜锛歔https://github.com/modelscope/FunASR/blob/main/funasr/models/sense\_voice/model.py#L443](https://github.com/modelscope/FunASR/blob/main/funasr/models/sense_voice/model.py#L443)
+
+娉ㄥ唽瀹屾垚鍚庯紝鍦╟onfig.yaml涓寚瀹氭柊娉ㄥ唽妯″瀷锛屽嵆鍙疄鐜板妯″瀷鐨勫畾涔�
+
+```python
+model: SenseVoiceSmall
 model_conf:
   ...
 ```

--
Gitblit v1.9.1