From 5f25e809c5acb3a24d9eca942e7540f5ddf6d361 Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 05 十一月 2024 16:33:27 +0800
Subject: [PATCH] Update version.txt
---
examples/README_zh.md | 45 +++++++++++++++++++++++++++++++++++++++++----
1 files changed, 41 insertions(+), 4 deletions(-)
diff --git a/examples/README_zh.md b/examples/README_zh.md
index b016f9e..db5e276 100644
--- a/examples/README_zh.md
+++ b/examples/README_zh.md
@@ -70,6 +70,43 @@
#### 闈炲疄鏃惰闊宠瘑鍒�
+##### SenseVoice
+```python
+from funasr import AutoModel
+from funasr.utils.postprocess_utils import rich_transcription_postprocess
+
+model_dir = "iic/SenseVoiceSmall"
+
+model = AutoModel(
+ model=model_dir,
+ vad_model="fsmn-vad",
+ vad_kwargs={"max_single_segment_time": 30000},
+ device="cuda:0",
+)
+
+# en
+res = model.generate(
+ input=f"{model.model_path}/example/en.mp3",
+ cache={},
+ language="auto", # "zn", "en", "yue", "ja", "ko", "nospeech"
+ use_itn=True,
+ batch_size_s=60,
+ merge_vad=True, #
+ merge_length_s=15,
+)
+text = rich_transcription_postprocess(res[0]["text"])
+print(text)
+```
+鍙傛暟璇存槑锛�
+- `model_dir`锛氭ā鍨嬪悕绉帮紝鎴栨湰鍦扮鐩樹腑鐨勬ā鍨嬭矾寰勩��
+- `vad_model`锛氳〃绀哄紑鍚疺AD锛孷AD鐨勪綔鐢ㄦ槸灏嗛暱闊抽鍒囧壊鎴愮煭闊抽锛屾鏃舵帹鐞嗚�楁椂鍖呮嫭浜哣AD涓嶴enseVoice鎬昏�楁椂锛屼负閾捐矾鑰楁椂锛屽鏋滈渶瑕佸崟鐙祴璇昐enseVoice妯″瀷鑰楁椂锛屽彲浠ュ叧闂璙AD妯″瀷銆�
+- `vad_kwargs`锛氳〃绀篤AD妯″瀷閰嶇疆,`max_single_segment_time`: 琛ㄧず`vad_model`鏈�澶у垏鍓查煶棰戞椂闀�, 鍗曚綅鏄绉抦s銆�
+- `use_itn`锛氳緭鍑虹粨鏋滀腑鏄惁鍖呭惈鏍囩偣涓庨�嗘枃鏈鍒欏寲銆�
+- `batch_size_s` 琛ㄧず閲囩敤鍔ㄦ�乥atch锛宐atch涓�婚煶棰戞椂闀匡紝鍗曚綅涓虹s銆�
+- `merge_vad`锛氭槸鍚﹀皢 vad 妯″瀷鍒囧壊鐨勭煭闊抽纰庣墖鍚堟垚锛屽悎骞跺悗闀垮害涓篳merge_length_s`锛屽崟浣嶄负绉抯銆�
+- `ban_emo_unk`锛氱鐢╡mo_unk鏍囩锛岀鐢ㄥ悗鎵�鏈夌殑鍙ュ瓙閮戒細琚祴涓庢儏鎰熸爣绛俱��
+
+##### Paraformer
```python
from funasr import AutoModel
# paraformer-zh is a multi-functional asr model
@@ -230,7 +267,7 @@
++output_dir="${output_dir}" &> ${log_file}
```
-- `model`锛坰tr锛夛細妯″瀷鍚嶅瓧锛堟ā鍨嬩粨搴撲腑鐨処D锛夛紝姝ゆ椂鑴氭湰浼氳嚜鍔ㄤ笅杞芥ā鍨嬪埌鏈锛涙垨鑰呮湰鍦板凡缁忎笅杞藉ソ鐨勬ā鍨嬭矾寰勩��
+- `model`锛坰tr锛夛細妯″瀷鍚嶅瓧锛堟ā鍨嬩粨搴撲腑鐨処D锛夛紝姝ゆ椂鑴氭湰浼氳嚜鍔ㄤ笅杞芥ā鍨嬪埌鏈湴锛涙垨鑰呮湰鍦板凡缁忎笅杞藉ソ鐨勬ā鍨嬭矾寰勩��
- `train_data_set_list`锛坰tr锛夛細璁粌鏁版嵁璺緞锛岄粯璁や负jsonl鏍煎紡锛屽叿浣撳弬鑰冿紙[渚嬪瓙](https://github.com/alibaba-damo-academy/FunASR/blob/main/data/list)锛夈��
- `valid_data_set_list`锛坰tr锛夛細楠岃瘉鏁版嵁璺緞锛岄粯璁や负jsonl鏍煎紡锛屽叿浣撳弬鑰冿紙[渚嬪瓙](https://github.com/alibaba-damo-academy/FunASR/blob/main/data/list)锛夈��
- `dataset_conf.batch_type`锛坰tr锛夛細`example`锛堥粯璁わ級锛宐atch鐨勭被鍨嬨�俙example`琛ㄧず鎸夌収鍥哄畾鏁扮洰batch_size涓牱鏈粍batch锛沗length` or `token` 琛ㄧず鍔ㄦ�佺粍batch锛宐atch鎬婚暱搴︽垨鑰卼oken鏁颁负batch_size銆�
@@ -256,10 +293,10 @@
export CUDA_VISIBLE_DEVICES="0,1"
gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-torchrun --nnodes 1 --nproc_per_node ${gpu_num} \
+torchrun --nnodes 1 --nproc_per_node ${gpu_num} --master_port 12345 \
../../../funasr/bin/train.py ${train_args}
```
---nnodes 琛ㄧず鍙備笌鐨勮妭鐐规�绘暟锛�--nproc_per_node 琛ㄧず姣忎釜鑺傜偣涓婅繍琛岀殑杩涚▼鏁�
+--nnodes 琛ㄧず鍙備笌鐨勮妭鐐规�绘暟锛�--nproc_per_node 琛ㄧず姣忎釜鑺傜偣涓婅繍琛岀殑杩涚▼鏁帮紝--master_port 琛ㄧず绔彛鍙�
##### 澶氭満澶歡pu璁粌
@@ -280,7 +317,7 @@
../../../funasr/bin/train.py ${train_args}
```
---nnodes 琛ㄧず鍙備笌鐨勮妭鐐规�绘暟锛�--node_rank 琛ㄧず褰撳墠鑺傜偣id锛�--nproc_per_node 琛ㄧず姣忎釜鑺傜偣涓婅繍琛岀殑杩涚▼鏁帮紙閫氬父涓篻pu涓暟锛�
+--nnodes 琛ㄧず鍙備笌鐨勮妭鐐规�绘暟锛�--node_rank 琛ㄧず褰撳墠鑺傜偣id锛�--nproc_per_node 琛ㄧず姣忎釜鑺傜偣涓婅繍琛岀殑杩涚▼鏁帮紙閫氬父涓篻pu涓暟锛夛紝--master_port 琛ㄧず绔彛鍙�
#### 鍑嗗鏁版嵁
--
Gitblit v1.9.1