From 28ccfbfc51068a663a80764e14074df5edf2b5ba Mon Sep 17 00:00:00 2001
From: kongdeqiang <kongdeqiang960204@163.com>
Date: 星期五, 13 三月 2026 17:41:41 +0800
Subject: [PATCH] 提交

---
 docs/tutorial/README_zh.md |  109 ++++++++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 96 insertions(+), 13 deletions(-)

diff --git a/docs/tutorial/README_zh.md b/docs/tutorial/README_zh.md
index 4e9bb3f..8960fd0 100644
--- a/docs/tutorial/README_zh.md
+++ b/docs/tutorial/README_zh.md
@@ -7,6 +7,7 @@
  <a href="#妯″瀷鎺ㄧ悊"> 妯″瀷鎺ㄧ悊 </a>   
 锝�<a href="#妯″瀷璁粌涓庢祴璇�"> 妯″瀷璁粌涓庢祴璇� </a>
 锝�<a href="#妯″瀷瀵煎嚭涓庢祴璇�"> 妯″瀷瀵煎嚭涓庢祴璇� </a>
+锝�<a href="#鏂版ā鍨嬫敞鍐屾暀绋�"> 鏂版ā鍨嬫敞鍐屾暀绋� </a>
 </h4>
 </div>
 
@@ -38,7 +39,7 @@
 model = AutoModel(model=[str], device=[str], ncpu=[int], output_dir=[str], batch_size=[int], hub=[str], **kwargs)
 ```
 - `model`(str): [妯″瀷浠撳簱](https://github.com/alibaba-damo-academy/FunASR/tree/main/model_zoo) 涓殑妯″瀷鍚嶇О锛屾垨鏈湴纾佺洏涓殑妯″瀷璺緞
-- `device`(str): `cuda:0`锛堥粯璁pu0锛夛紝浣跨敤 GPU 杩涜鎺ㄧ悊锛屾寚瀹氥�傚鏋滀负`cpu`锛屽垯浣跨敤 CPU 杩涜鎺ㄧ悊
+- `device`(str): `cuda:0`锛堥粯璁pu0锛夛紝浣跨敤 GPU 杩涜鎺ㄧ悊锛屾寚瀹氥�傚鏋滀负`cpu`锛屽垯浣跨敤 CPU 杩涜鎺ㄧ悊銆俙mps`锛歮ac鐢佃剳M绯诲垪鏂板搧璇曠敤mps杩涜鎺ㄧ悊銆俙xpu`锛氫娇鐢ㄨ嫳鐗瑰皵gpu杩涜鎺ㄧ悊銆�
 - `ncpu`(int): `4` 锛堥粯璁わ級锛岃缃敤浜� CPU 鍐呴儴鎿嶄綔骞惰鎬х殑绾跨▼鏁�
 - `output_dir`(str): `None` 锛堥粯璁わ級锛屽鏋滆缃紝杈撳嚭缁撴灉鐨勮緭鍑鸿矾寰�
 - `batch_size`(int): `1` 锛堥粯璁わ級锛岃В鐮佹椂鐨勬壒澶勭悊锛屾牱鏈釜鏁�
@@ -131,7 +132,7 @@
 
 model = AutoModel(model="fsmn-vad")
 
-wav_file = f"{model.model_path}/example/asr_example.wav"
+wav_file = f"{model.model_path}/example/vad_example.wav"
 res = model.generate(input=wav_file)
 print(res)
 ```
@@ -212,7 +213,7 @@
 ### 璇︾粏鍙傛暟浠嬬粛
 
 ```shell
-funasr/bin/train.py \
+funasr/bin/train_ds.py \
 ++model="${model_name_or_model_dir}" \
 ++train_data_set_list="${train_data}" \
 ++valid_data_set_list="${val_data}" \
@@ -225,12 +226,12 @@
 ++train_conf.validate_interval=2000 \
 ++train_conf.save_checkpoint_interval=2000 \
 ++train_conf.keep_nbest_models=20 \
-++train_conf.avg_nbest_model=5 \
+++train_conf.avg_nbest_model=10 \
 ++optim_conf.lr=0.0002 \
 ++output_dir="${output_dir}" &> ${log_file}
 ```
 
-- `model`锛坰tr锛夛細妯″瀷鍚嶅瓧锛堟ā鍨嬩粨搴撲腑鐨処D锛夛紝姝ゆ椂鑴氭湰浼氳嚜鍔ㄤ笅杞芥ā鍨嬪埌鏈锛涙垨鑰呮湰鍦板凡缁忎笅杞藉ソ鐨勬ā鍨嬭矾寰勩��
+- `model`锛坰tr锛夛細妯″瀷鍚嶅瓧锛堟ā鍨嬩粨搴撲腑鐨処D锛夛紝姝ゆ椂鑴氭湰浼氳嚜鍔ㄤ笅杞芥ā鍨嬪埌鏈湴锛涙垨鑰呮湰鍦板凡缁忎笅杞藉ソ鐨勬ā鍨嬭矾寰勩��
 - `train_data_set_list`锛坰tr锛夛細璁粌鏁版嵁璺緞锛岄粯璁や负jsonl鏍煎紡锛屽叿浣撳弬鑰冿紙[渚嬪瓙](https://github.com/alibaba-damo-academy/FunASR/blob/main/data/list)锛夈��
 - `valid_data_set_list`锛坰tr锛夛細楠岃瘉鏁版嵁璺緞锛岄粯璁や负jsonl鏍煎紡锛屽叿浣撳弬鑰冿紙[渚嬪瓙](https://github.com/alibaba-damo-academy/FunASR/blob/main/data/list)锛夈��
 - `dataset_conf.batch_type`锛坰tr锛夛細`example`锛堥粯璁わ級锛宐atch鐨勭被鍨嬨�俙example`琛ㄧず鎸夌収鍥哄畾鏁扮洰batch_size涓牱鏈粍batch锛沗length` or `token` 琛ㄧず鍔ㄦ�佺粍batch锛宐atch鎬婚暱搴︽垨鑰卼oken鏁颁负batch_size銆�
@@ -242,7 +243,7 @@
 - `train_conf.save_checkpoint_interval`锛坕nt锛夛細`5000`锛堥粯璁わ級锛岃缁冧腑妯″瀷淇濆瓨闂撮殧step鏁般��
 - `train_conf.avg_keep_nbest_models_type`锛坰tr锛夛細`acc`锛堥粯璁わ級锛屼繚鐣檔best鐨勬爣鍑嗕负acc锛堣秺澶ц秺濂斤級銆俙loss`琛ㄧず锛屼繚鐣檔best鐨勬爣鍑嗕负loss锛堣秺灏忚秺濂斤級銆�
 - `train_conf.keep_nbest_models`锛坕nt锛夛細`500`锛堥粯璁わ級锛屼繚鐣欐渶澶у灏戜釜妯″瀷鍙傛暟锛岄厤鍚� `avg_keep_nbest_models_type` 鎸夌収楠岃瘉闆� acc/loss 淇濈暀鏈�浣崇殑n涓ā鍨嬶紝鍏朵粬鍒犻櫎锛岃妭绾﹀瓨鍌ㄧ┖闂淬��
-- `train_conf.avg_nbest_model`锛坕nt锛夛細`5`锛堥粯璁わ級锛屼繚鐣欐渶澶у灏戜釜妯″瀷鍙傛暟锛岄厤鍚� `avg_keep_nbest_models_type` 鎸夌収楠岃瘉闆� acc/loss 瀵规渶浣崇殑n涓ā鍨嬪钩鍧囥��
+- `train_conf.avg_nbest_model`锛坕nt锛夛細`10`锛堥粯璁わ級锛屼繚鐣欐渶澶у灏戜釜妯″瀷鍙傛暟锛岄厤鍚� `avg_keep_nbest_models_type` 鎸夌収楠岃瘉闆� acc/loss 瀵规渶浣崇殑n涓ā鍨嬪钩鍧囥��
 - `train_conf.accum_grad`锛坕nt锛夛細`1`锛堥粯璁わ級锛屾搴︾疮绉姛鑳姐��
 - `train_conf.grad_clip`锛坒loat锛夛細`10.0`锛堥粯璁わ級锛屾搴︽埅鏂姛鑳姐��
 - `train_conf.use_fp16`锛坆ool锛夛細`False`锛堥粯璁わ級锛屽紑鍚痜p16璁粌锛屽姞蹇缁冮�熷害銆�
@@ -257,7 +258,7 @@
 gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 
 torchrun --nnodes 1 --nproc_per_node ${gpu_num} \
-../../../funasr/bin/train.py ${train_args}
+../../../funasr/bin/train_ds.py ${train_args}
 ```
 --nnodes 琛ㄧず鍙備笌鐨勮妭鐐规�绘暟锛�--nproc_per_node 琛ㄧず姣忎釜鑺傜偣涓婅繍琛岀殑杩涚▼鏁�
 
@@ -268,16 +269,16 @@
 export CUDA_VISIBLE_DEVICES="0,1"
 gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 
-torchrun --nnodes 2 --node_rank 0 --nproc_per_node ${gpu_num} --master_addr=192.168.1.1 --master_port=12345 \
-../../../funasr/bin/train.py ${train_args}
+torchrun --nnodes 2 --node_rank 0 --nproc_per_node ${gpu_num} --master_addr 192.168.1.1 --master_port 12345 \
+../../../funasr/bin/train_ds.py ${train_args}
 ```
 鍦ㄤ粠鑺傜偣涓婏紙鍋囪IP涓�192.168.1.2锛夛紝浣犻渶瑕佺‘淇滿ASTER_ADDR鍜孧ASTER_PORT鐜鍙橀噺涓庝富鑺傜偣璁剧疆鐨勪竴鑷达紝骞惰繍琛屽悓鏍风殑鍛戒护锛�
 ```shell
 export CUDA_VISIBLE_DEVICES="0,1"
 gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 
-torchrun --nnodes 2 --node_rank 1 --nproc_per_node ${gpu_num} --master_addr=192.168.1.1 --master_port=12345 \
-../../../funasr/bin/train.py ${train_args}
+torchrun --nnodes 2 --node_rank 1 --nproc_per_node ${gpu_num} --master_addr 192.168.1.1 --master_port 12345 \
+../../../funasr/bin/train_ds.py ${train_args}
 ```
 
 --nnodes 琛ㄧず鍙備笌鐨勮妭鐐规�绘暟锛�--node_rank 琛ㄧず褰撳墠鑺傜偣id锛�--nproc_per_node 琛ㄧず姣忎釜鑺傜偣涓婅繍琛岀殑杩涚▼鏁帮紙閫氬父涓篻pu涓暟锛�
@@ -330,6 +331,25 @@
 ++jsonl_file_in="../../../data/list/train.jsonl"
 ```
 
+#### 澶ф暟鎹缁�
+濡傛灉鏁版嵁閲忓緢澶э紝渚嬪5涓囧皬鏃朵互涓婏紝杩欐椂鍊欏鏄撻亣鍒板唴瀛樹笉瓒崇殑闂锛岀壒鍒槸澶歡pu瀹為獙锛岃繖鏃跺�欓渶瑕佸jsonl鏂囦欢杩涜鍒囧垎鎴恠lice锛岀劧鍚庡啓鍒皌xt閲岄潰锛屼竴琛屼竴涓猻lice锛岀劧鍚庤缃甡data_split_num`锛屼緥濡傦細
+```shell
+train_data="/root/data/list/data.list"
+
+funasr/bin/train_ds.py \
+++train_data_set_list="${train_data}" \
+++dataset_conf.data_split_num=256
+```
+鍏朵腑锛�
+`data.list`锛氫负绾枃鏈紝鍐呭鏄垏鍓插悗鐨刯sonl鏂囦欢锛屼緥濡傦紝`data.list`鐨勫唴瀹逛负锛�
+```bash
+data/list/train.0.jsonl
+data/list/train.1.jsonl
+...
+```
+`data_split_num`锛氳〃绀哄垏鍒唖lice鍒嗙粍涓暟锛屼緥濡傦紝data.list涓叡512琛岋紝data_split_num=256锛岃〃绀哄垎鎴�256缁勶紝姣忕粍鏈�2涓猨sonl鏂囦欢锛岃繖鏍锋瘡娆″彧load 2涓猨sonl鏁版嵁杩涜璁粌锛屼粠鑰岄檷浣庤缁冭繃绋嬩腑鍐呭瓨浣跨敤銆傛敞鎰忔槸鎸夌収椤哄簭鍒嗙粍銆�
+濡傛灉鏄紝闈炲父澶э紝骞朵笖鏁版嵁绫诲瀷宸紓姣旇緝澶э紝寤鸿鍒囧垎鏃跺�欒繘琛屾暟鎹潎琛°��
+
 #### 鏌ョ湅璁粌鏃ュ織
 
 ##### 鏌ョ湅瀹為獙log
@@ -359,7 +379,7 @@
 
 #### 鏈塩onfiguration.json
 
-鍋囧畾锛岃缁冩ā鍨嬭矾寰勪负锛�./model_dir锛屽鏋滄敼鐩綍涓嬫湁鐢熸垚configuration.json锛屽彧闇�瑕佸皢 [涓婅堪妯″瀷鎺ㄧ悊鏂规硶](https://github.com/alibaba-damo-academy/FunASR/blob/main/examples/README_zh.md#%E6%A8%A1%E5%9E%8B%E6%8E%A8%E7%90%86) 涓ā鍨嬪悕瀛椾慨鏀逛负妯″瀷璺緞鍗冲彲
+鍋囧畾锛岃缁冩ā鍨嬭矾寰勪负锛�./model_dir锛屽鏋滆鐩綍涓嬫湁鐢熸垚configuration.json锛屽彧闇�瑕佸皢 [涓婅堪妯″瀷鎺ㄧ悊鏂规硶](https://github.com/alibaba-damo-academy/FunASR/blob/main/examples/README_zh.md#%E6%A8%A1%E5%9E%8B%E6%8E%A8%E7%90%86) 涓ā鍨嬪悕瀛椾慨鏀逛负妯″瀷璺緞鍗冲彲
 
 渚嬪锛�
 
@@ -420,6 +440,12 @@
 res = model.export(quantize=False)
 ```
 
+### 浼樺寲onnx
+```shell
+# pip3 install -U onnxslim
+onnxslim model.onnx model.onnx
+```
+
 ### 娴嬭瘯ONNX
 ```python
 # pip3 install -U funasr-onnx
@@ -433,4 +459,61 @@
 print(result)
 ```
 
-鏇村渚嬪瓙璇峰弬鑰� [鏍蜂緥](https://github.com/alibaba-damo-academy/FunASR/tree/main/runtime/python/onnxruntime)
\ No newline at end of file
+鏇村渚嬪瓙璇峰弬鑰� [鏍蜂緥](https://github.com/alibaba-damo-academy/FunASR/tree/main/runtime/python/onnxruntime)
+
+<a name="鏂版ā鍨嬫敞鍐屾暀绋�"></a>
+## 鏂版ā鍨嬫敞鍐屾暀绋�
+
+
+### 鏌ョ湅娉ㄥ唽琛�
+
+```plaintext
+from funasr.register import tables
+
+tables.print()
+```
+
+鏀寔鏌ョ湅鎸囧畾绫诲瀷鐨勬敞鍐岃〃锛歕`tables.print("model")\`
+
+### 娉ㄥ唽妯″瀷
+
+```python
+from funasr.register import tables
+
+@tables.register("model_classes", "SenseVoiceSmall")
+class SenseVoiceSmall(nn.Module):
+  def __init__(*args, **kwargs):
+    ...
+
+  def forward(
+      self,
+      **kwargs,
+  ):  
+
+  def inference(
+      self,
+      data_in,
+      data_lengths=None,
+      key: list = None,
+      tokenizer=None,
+      frontend=None,
+      **kwargs,
+  ):
+    ...
+
+```
+
+鍦ㄩ渶瑕佹敞鍐岀殑绫诲悕鍓嶅姞涓� `@tables.register("model_classes","SenseVoiceSmall")`锛屽嵆鍙畬鎴愭敞鍐岋紝绫婚渶瑕佸疄鐜版湁锛歘_init__锛宖orward锛宨nference鏂规硶銆�
+
+瀹屾暣浠g爜锛歔https://github.com/modelscope/FunASR/blob/main/funasr/models/sense\_voice/model.py#L443](https://github.com/modelscope/FunASR/blob/main/funasr/models/sense_voice/model.py#L443)
+
+娉ㄥ唽瀹屾垚鍚庯紝鍦╟onfig.yaml涓寚瀹氭柊娉ㄥ唽妯″瀷锛屽嵆鍙疄鐜板妯″瀷鐨勫畾涔�
+
+```python
+model: SenseVoiceSmall
+model_conf:
+  ...
+```
+
+
+[鍏充簬娉ㄥ唽鏇村璇︾粏鏁欑▼鏂囨。](https://github.com/modelscope/FunASR/blob/main/docs/tutorial/Tables_zh.md)

--
Gitblit v1.9.1