From 2aa7d9182226f1304f58330cc82376f8aae3ee22 Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期二, 29 十月 2024 16:49:18 +0800
Subject: [PATCH] Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into main

---
 docs/tutorial/Tables_zh.md |  156 +++++++++++++++++++++++++++++-----------------------
 1 files changed, 87 insertions(+), 69 deletions(-)

diff --git a/docs/tutorial/Tables_zh.md b/docs/tutorial/Tables_zh.md
index ec64baf..9f616cf 100644
--- a/docs/tutorial/Tables_zh.md
+++ b/docs/tutorial/Tables_zh.md
@@ -1,4 +1,4 @@
-# FunASR-1.x.x 娉ㄥ唽妯″瀷鏁欑▼
+# FunASR-1.x.x聽娉ㄥ唽妯″瀷鏁欑▼
 
 1.0鐗堟湰鐨勮璁″垵琛锋槸銆�**璁╂ā鍨嬮泦鎴愭洿绠�鍗�**銆戯紝鏍稿績feature涓烘敞鍐岃〃涓嶢utoModel锛�
 
@@ -11,7 +11,7 @@
 *   缁熶竴瀛︽湳涓庡伐涓氭ā鍨嬫帹鐞嗚缁冭剼鏈紱
     
 
-![image](https://alidocs.oss-cn-zhangjiakou.aliyuncs.com/a/6Ea1DxkZVte8y0g2/b78f122bd40b485687e5e13faa78ae850521.png)
+![image](https://alidocs.oss-cn-zhangjiakou.aliyuncs.com/a/6Ea1DxkZVte8y0g2/150e0eafd1c34f2dbb9360ccb5db4dc40521.png)
 
 # 蹇�熶笂鎵�
 
@@ -89,14 +89,14 @@
 res = model.generate(input=[str], output_dir=[str])
 ```
 
-*   wav鏂囦欢璺緞,聽渚嬪:聽asr\_example.wav
-    
-*   pcm鏂囦欢璺緞,聽渚嬪:聽asr\_example.pcm锛屾鏃堕渶瑕佹寚瀹氶煶棰戦噰鏍风巼fs锛堥粯璁や负16000锛�
-    
-*   闊抽瀛楄妭鏁版祦锛屼緥濡傦細楹﹀厠椋庣殑瀛楄妭鏁版暟鎹�
-    
-*   wav.scp锛宬aldi聽鏍峰紡鐨劼爓av聽鍒楄〃聽(`wav_id聽\t聽wav_path`),聽渚嬪:
-    
+*   *   wav鏂囦欢璺緞,聽渚嬪:聽asr\_example.wav
+        
+    *   pcm鏂囦欢璺緞,聽渚嬪:聽asr\_example.pcm锛屾鏃堕渶瑕佹寚瀹氶煶棰戦噰鏍风巼fs锛堥粯璁や负16000锛�
+        
+    *   闊抽瀛楄妭鏁版祦锛屼緥濡傦細楹﹀厠椋庣殑瀛楄妭鏁版暟鎹�
+        
+    *   wav.scp锛宬aldi聽鏍峰紡鐨劼爓av聽鍒楄〃聽(`wav_id聽\t聽wav_path`),聽渚嬪:
+        
 
 ```plaintext
 asr_example1  ./audios/asr_example1.wav
@@ -121,76 +121,82 @@
 
 ## 妯″瀷璧勬簮鐩綍
 
-![image.png](https://alidocs.oss-cn-zhangjiakou.aliyuncs.com/res/8oLl9y628rBNlapY/img/f16961f1-bdfb-4638-83d5-e4cb13a5a4a4.png)
+![image.png](https://alidocs.oss-cn-zhangjiakou.aliyuncs.com/res/8oLl9y628rBNlapY/img/cab7f215-787f-4407-885a-14dc89ae9e02.png)
+
+**妯″瀷閾炬帴涓猴細**[https://www.modelscope.cn/models/iic/SenseVoiceSmall/files](https://www.modelscope.cn/models/iic/SenseVoiceSmall/files)
 
 **閰嶇疆鏂囦欢**锛歝onfig.yaml
 
 ```yaml
-model: SenseVoiceLarge
-model_conf:
-  lsm_weight: 0.1
-  length_normalized_loss: true
-  activation_checkpoint: true
-  sos: <|startoftranscript|>
-  eos: <|endoftext|>
-  downsample_rate: 4
-  use_padmask: true
-
-encoder: SenseVoiceEncoder
+encoder: SenseVoiceEncoderSmall
 encoder_conf:
-  input_size: 128
-  attention_heads: 20
-  linear_units: 1280
-  num_blocks: 32
-  dropout_rate: 0.1
-  positional_dropout_rate: 0.1
-  attention_dropout_rate: 0.1
-  kernel_size: 31
-  sanm_shfit: 0
-  att_type: self_att_fsmn_sdpa
-  downsample_rate: 4
-  use_padmask: true
-  max_position_embeddings: 2048
-  rope_theta: 10000
-  
-frontend: WhisperFrontend
-frontend_conf:
-  fs: 16000
-  n_mels: 128
-  do_pad_trim: false
-  filters_path: null
+    output_size: 512
+    attention_heads: 4
+    linear_units: 2048
+    num_blocks: 50
+    tp_blocks: 20
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    attention_dropout_rate: 0.1
+    input_layer: pe
+    pos_enc_class: SinusoidalPositionEncoder
+    normalize_before: true
+    kernel_size: 11
+    sanm_shfit: 0
+    selfattention_layer_type: sanm
 
-tokenizer: SenseVoiceTokenizer
+
+model: SenseVoiceSmall
+model_conf:
+    length_normalized_loss: true
+    sos: 1
+    eos: 2
+    ignore_id: -1
+
+tokenizer: SentencepiecesTokenizer
 tokenizer_conf:
-  vocab_path: null
-  is_multilingual: true
-  num_languages: 8749
+  bpemodel: null
+  unk_symbol: <unk>
+  split_with_space: true
 
-dataset: SenseVoiceDataset
+frontend: WavFrontend
+frontend_conf:
+    fs: 16000
+    window: hamming
+    n_mels: 80
+    frame_length: 25
+    frame_shift: 10
+    lfr_m: 7
+    lfr_n: 6
+    cmvn_file: null
+
+
+dataset: SenseVoiceCTCDataset
 dataset_conf:
   index_ds: IndexDSJsonl
-  batch_sampler: BatchSampler
+  batch_sampler: EspnetStyleBatchSampler
+  data_split_num: 32
   batch_type: token
-  batch_size: 12000
-  sort_size: 64
+  batch_size: 14000
   max_token_length: 2000
   min_token_length: 60
   max_source_length: 2000
   min_source_length: 60
-  max_target_length: 150
+  max_target_length: 200
   min_target_length: 0
   shuffle: true
   num_workers: 4
   sos: ${model_conf.sos}
   eos: ${model_conf.eos}
   IndexDSJsonl: IndexDSJsonl
+  retry: 20
 
 train_conf:
   accum_grad: 1
   grad_clip: 5
-  max_epoch: 5
-  keep_nbest_models: 200
-  avg_nbest_model: 200
+  max_epoch: 20
+  keep_nbest_models: 10
+  avg_nbest_model: 10
   log_interval: 100
   resume: true
   validate_interval: 10000
@@ -198,11 +204,10 @@
 
 optim: adamw
 optim_conf:
-  lr: 2.5e-05
-
+  lr: 0.00002
 scheduler: warmuplr
 scheduler_conf:
-  warmup_steps: 20000
+  warmup_steps: 25000
 
 ```
 
@@ -222,31 +227,32 @@
   "file_path_metas": {
     "init_param":"model.pt", 
     "config":"config.yaml",
-    "tokenizer_conf": {"vocab_path": "tokens.tiktoken"},
-    "frontend_conf":{"filters_path": "mel_filters.npz"}}
+    "tokenizer_conf": {"bpemodel": "chn_jpn_yue_eng_ko_spectok.bpe.model"},
+    "frontend_conf":{"cmvn_file": "am.mvn"}}
 }
 ```
+
+鍐呭鍙互澶嶇敤锛岀洿鎺ユ嫹璐濆嵆鍙紝闇�瑕佹敞鎰忓瓧娈� `file_path_metas` 鎵�鏈夊唴瀹逛細鑷姩鎷兼帴妯″瀷璧勬簮璺緞锛屽苟涓斾細瑕嗙洊 `config.yaml` 涓浉鍚屽瓧娈电殑璺緞銆�
 
 ## 娉ㄥ唽琛�
 
 ### 鏌ョ湅娉ㄥ唽琛�
 
-```python
+```plaintext
 from funasr.register import tables
 
 tables.print()
 ```
 
-鏀寔鏌ョ湅鎸囧畾绫诲瀷鐨勬敞鍐岃〃锛歚tables.print("model")`
+鏀寔鏌ョ湅鎸囧畾绫诲瀷鐨勬敞鍐岃〃锛屼緥濡傚彧鐪嬫敞鍐岀殑`model`绫伙細`tables.print("model")`
 
-
-### 鏂版敞鍐�
+### 娉ㄥ唽妯″瀷
 
 ```python
 from funasr.register import tables
 
-@tables.register("model_classes", "MinMo_S2T")
-class MinMo_S2T(nn.Module):
+@tables.register("model_classes", "SenseVoiceSmall")
+class SenseVoiceSmall(nn.Module):
   def __init__(*args, **kwargs):
     ...
 
@@ -268,14 +274,26 @@
 
 ```
 
-鍦╟onfig.yaml涓寚瀹氭柊娉ㄥ唽妯″瀷
+鍦ㄩ渶瑕佹敞鍐岀殑绫诲悕鍓嶅姞涓� `@tables.register("model_classes","SenseVoiceSmall")`锛屽嵆鍙畬鎴愭敞鍐岋紝绫婚渶瑕佸疄鐜版湁锛歘_init__锛宖orward锛宨nference鏂规硶銆�
 
-```yaml
-model: MinMo_S2T
+瀹屾暣浠ｇ爜锛歔https://github.com/modelscope/FunASR/blob/main/funasr/models/sense\_voice/model.py#L443](https://github.com/modelscope/FunASR/blob/main/funasr/models/sense_voice/model.py#L443)
+
+娉ㄥ唽瀹屾垚鍚庯紝鍦╟onfig.yaml涓寚瀹氭柊娉ㄥ唽妯″瀷锛屽嵆鍙疄鐜板妯″瀷鐨勫畾涔�
+
+```python
+model: SenseVoiceSmall
 model_conf:
   ...
 ```
 
+## 娉ㄥ唽澶辫触
+
+濡傛灉鍑虹幇鎵句笉鍒版敞鍐屾ā鍨嬫垨鑰呮敞鍐屽嚱鏁帮紝`assert model_class is not None, f'{kwargs["model"]} is not registered'`銆傛ā鍨嬫敞鍐岀殑鍘熺悊鏄紝import 妯″瀷鏂囦欢锛屽彲浠ラ�氳繃import鏉ユ煡鐪嬪叿浣撴敞鍐屽け璐ュ師鍥狅紝渚嬪锛屼笂杩版ā鍨嬫枃浠朵负锛宖unasr/models/sense_voice/model.py锛�
+
+```python
+from funasr.models.sense_voice.model import *
+```
+
 ## 娉ㄥ唽鍘熷垯
 
 *   Model锛氭ā鍨嬩箣闂翠簰鐩哥嫭绔嬶紝姣忎竴涓ā鍨嬶紝閮介渶瑕佸湪funasr/models/涓嬮潰鏂板缓涓�涓ā鍨嬬洰褰曪紝涓嶈閲囩敤绫荤殑缁ф壙鏂规硶锛侊紒锛佷笉瑕佷粠鍏朵粬妯″瀷鐩綍涓璱mport锛屾墍鏈夐渶瑕佺敤鍒扮殑閮藉崟鐙斁鍒拌嚜宸辩殑妯″瀷鐩綍涓紒锛侊紒涓嶈淇敼鐜版湁鐨勬ā鍨嬩唬鐮侊紒锛侊紒

--
Gitblit v1.9.1