From 9c0735b7dfb1eaa1d8f186fe99f7d29e04a1178b Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期五, 22 三月 2024 19:09:49 +0800
Subject: [PATCH] update

---
 examples/README_zh.md |  131 ++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 128 insertions(+), 3 deletions(-)

diff --git a/examples/README_zh.md b/examples/README_zh.md
index 0e3846d..59d1303 100644
--- a/examples/README_zh.md
+++ b/examples/README_zh.md
@@ -16,17 +16,18 @@
 print(res)
 ```
 
-### 璇︾粏鐢ㄦ硶浠嬬粛
+### 鎺ュ彛璇存槑
 
 #### AutoModel 瀹氫箟
 ```python
-model = AutoModel(model=[str], device=[str], ncpu=[int], output_dir=[str], batch_size=[int], **kwargs)
+model = AutoModel(model=[str], device=[str], ncpu=[int], output_dir=[str], batch_size=[int], hub=[str], **kwargs)
 ```
 - `model`(str): [妯″瀷浠撳簱](https://github.com/alibaba-damo-academy/FunASR/tree/main/model_zoo) 涓殑妯″瀷鍚嶇О锛屾垨鏈湴纾佺洏涓殑妯″瀷璺緞
 - `device`(str): `cuda:0`锛堥粯璁pu0锛夛紝浣跨敤 GPU 杩涜鎺ㄧ悊锛屾寚瀹氥�傚鏋滀负`cpu`锛屽垯浣跨敤 CPU 杩涜鎺ㄧ悊
 - `ncpu`(int): `4` 锛堥粯璁わ級锛岃缃敤浜� CPU 鍐呴儴鎿嶄綔骞惰鎬х殑绾跨▼鏁�
 - `output_dir`(str): `None` 锛堥粯璁わ級锛屽鏋滆缃紝杈撳嚭缁撴灉鐨勮緭鍑鸿矾寰�
-- `batch_size`(int): `1` 锛堥粯璁わ級锛岃В鐮佹椂鐨勬壒澶勭悊澶у皬
+- `batch_size`(int): `1` 锛堥粯璁わ級锛岃В鐮佹椂鐨勬壒澶勭悊锛屾牱鏈釜鏁�
+- `hub`(str)锛歚ms`锛堥粯璁わ級锛屼粠modelscope涓嬭浇妯″瀷銆傚鏋滀负`hf`锛屼粠huggingface涓嬭浇妯″瀷銆�
 - `**kwargs`(dict): 鎵�鏈夊湪`config.yaml`涓弬鏁帮紝鍧囧彲浠ョ洿鎺ュ湪姝ゅ鎸囧畾锛屼緥濡傦紝vad妯″瀷涓渶澶у垏鍓查暱搴� `max_single_segment_time=6000` 锛堟绉掞級銆�
 
 #### AutoModel 鎺ㄧ悊
@@ -57,6 +58,130 @@
 - `type`(str)锛歚onnx`(榛樿)锛屽鍑簅nnx鏍煎紡銆俙torch`瀵煎嚭libtorch鏍煎紡銆�
 - `quantize`(bool)锛歚False`锛堥粯璁わ級锛屾槸鍚﹀仛閲忓寲銆�
 
+### 鏇村鐢ㄦ硶浠嬬粛
+
+
+#### 闈炲疄鏃惰闊宠瘑鍒�
+```python
+from funasr import AutoModel
+# paraformer-zh is a multi-functional asr model
+# use vad, punc, spk or not as you need
+model = AutoModel(model="paraformer-zh",  
+                  vad_model="fsmn-vad", 
+                  vad_kwargs={"max_single_segment_time": 60000},
+                  punc_model="ct-punc", 
+                  # spk_model="cam++"
+                  )
+wav_file = f"{model.model_path}/example/asr_example.wav"
+res = model.generate(input=wav_file, batch_size_s=300, batch_size_threshold_s=60, hotword='榄旀惌')
+print(res)
+```
+娉ㄦ剰锛�
+- 閫氬父妯″瀷杈撳叆闄愬埗鏃堕暱30s浠ヤ笅锛岀粍鍚坄vad_model`鍚庯紝鏀寔浠绘剰鏃堕暱闊抽杈撳叆锛屼笉灞�闄愪簬paraformer妯″瀷锛屾墍鏈夐煶棰戣緭鍏ユā鍨嬪潎鍙互銆�
+- `model`鐩稿叧鐨勫弬鏁板彲浠ョ洿鎺ュ湪`AutoModel`瀹氫箟涓洿鎺ユ寚瀹氾紱涓巂vad_model`鐩稿叧鍙傛暟鍙互閫氳繃`vad_kwargs`鏉ユ寚瀹氾紝绫诲瀷涓篸ict锛涚被浼肩殑鏈塦punc_kwargs`锛宍spk_kwargs`锛�
+- `max_single_segment_time`: 琛ㄧず`vad_model`鏈�澶у垏鍓查煶棰戞椂闀�, 鍗曚綅鏄绉抦s.
+- `batch_size_s` 琛ㄧず閲囩敤鍔ㄦ�乥atch锛宐atch涓�婚煶棰戞椂闀匡紝鍗曚綅涓虹s銆�
+- `batch_size_threshold_s`: 琛ㄧず`vad_model`鍒囧壊鍚庨煶棰戠墖娈垫椂闀胯秴杩� `batch_size_threshold_s`闃堝�兼椂锛屽皢batch_size鏁拌缃负1, 鍗曚綅涓虹s.
+
+寤鸿锛氬綋鎮ㄨ緭鍏ヤ负闀块煶棰戯紝閬囧埌OOM闂鏃讹紝鍥犱负鏄惧瓨鍗犵敤涓庨煶棰戞椂闀垮憟骞虫柟鍏崇郴澧炲姞锛屽垎涓�3绉嶆儏鍐碉細
+- a)鎺ㄧ悊璧峰闃舵锛屾樉瀛樹富瑕佸彇鍐充簬`batch_size_s`锛岄�傚綋鍑忓皬璇ュ�硷紝鍙互鍑忓皯鏄惧瓨鍗犵敤锛�
+- b)鎺ㄧ悊涓棿闃舵锛岄亣鍒癡AD鍒囧壊鐨勯暱闊抽鐗囨锛屾�籺oken鏁板皬浜巂batch_size_s`锛屼粛鐒跺嚭鐜癘OM锛屽彲浠ラ�傚綋鍑忓皬`batch_size_threshold_s`锛岃秴杩囬槇鍊硷紝寮哄埗batch涓�1; 
+- c)鎺ㄧ悊蹇粨鏉熼樁娈碉紝閬囧埌VAD鍒囧壊鐨勯暱闊抽鐗囨锛屾�籺oken鏁板皬浜巂batch_size_s`锛屼笖瓒呰繃闃堝�糮batch_size_threshold_s`锛屽己鍒禸atch涓�1锛屼粛鐒跺嚭鐜癘OM锛屽彲浠ラ�傚綋鍑忓皬`max_single_segment_time`锛屼娇寰梀AD鍒囧壊闊抽鏃堕暱鍙樼煭銆�
+
+#### 瀹炴椂璇煶璇嗗埆
+
+```python
+from funasr import AutoModel
+
+chunk_size = [0, 10, 5] #[0, 10, 5] 600ms, [0, 8, 4] 480ms
+encoder_chunk_look_back = 4 #number of chunks to lookback for encoder self-attention
+decoder_chunk_look_back = 1 #number of encoder chunks to lookback for decoder cross-attention
+
+model = AutoModel(model="paraformer-zh-streaming")
+
+import soundfile
+import os
+
+wav_file = os.path.join(model.model_path, "example/asr_example.wav")
+speech, sample_rate = soundfile.read(wav_file)
+chunk_stride = chunk_size[1] * 960 # 600ms
+
+cache = {}
+total_chunk_num = int(len((speech)-1)/chunk_stride+1)
+for i in range(total_chunk_num):
+    speech_chunk = speech[i*chunk_stride:(i+1)*chunk_stride]
+    is_final = i == total_chunk_num - 1
+    res = model.generate(input=speech_chunk, cache=cache, is_final=is_final, chunk_size=chunk_size, encoder_chunk_look_back=encoder_chunk_look_back, decoder_chunk_look_back=decoder_chunk_look_back)
+    print(res)
+```
+
+娉細`chunk_size`涓烘祦寮忓欢鏃堕厤缃紝`[0,10,5]`琛ㄧず涓婂睆瀹炴椂鍑哄瓧绮掑害涓篳10*60=600ms`锛屾湭鏉ヤ俊鎭负`5*60=300ms`銆傛瘡娆℃帹鐞嗚緭鍏ヤ负`600ms`锛堥噰鏍风偣鏁颁负`16000*0.6=960`锛夛紝杈撳嚭涓哄搴旀枃瀛楋紝鏈�鍚庝竴涓闊崇墖娈佃緭鍏ラ渶瑕佽缃甡is_final=True`鏉ュ己鍒惰緭鍑烘渶鍚庝竴涓瓧銆�
+
+#### 璇煶绔偣妫�娴嬶紙闈炲疄鏃讹級
+```python
+from funasr import AutoModel
+
+model = AutoModel(model="fsmn-vad")
+
+wav_file = f"{model.model_path}/example/asr_example.wav"
+res = model.generate(input=wav_file)
+print(res)
+```
+娉細VAD妯″瀷杈撳嚭鏍煎紡涓猴細`[[beg1, end1], [beg2, end2], .., [begN, endN]]`锛屽叾涓璥begN/endN`琛ㄧず绗琡N`涓湁鏁堥煶棰戠墖娈电殑璧峰鐐�/缁撴潫鐐癸紝
+鍗曚綅涓烘绉掋��
+
+#### 璇煶绔偣妫�娴嬶紙瀹炴椂锛�
+```python
+from funasr import AutoModel
+
+chunk_size = 200 # ms
+model = AutoModel(model="fsmn-vad")
+
+import soundfile
+
+wav_file = f"{model.model_path}/example/vad_example.wav"
+speech, sample_rate = soundfile.read(wav_file)
+chunk_stride = int(chunk_size * sample_rate / 1000)
+
+cache = {}
+total_chunk_num = int(len((speech)-1)/chunk_stride+1)
+for i in range(total_chunk_num):
+    speech_chunk = speech[i*chunk_stride:(i+1)*chunk_stride]
+    is_final = i == total_chunk_num - 1
+    res = model.generate(input=speech_chunk, cache=cache, is_final=is_final, chunk_size=chunk_size)
+    if len(res[0]["value"]):
+        print(res)
+```
+娉細娴佸紡VAD妯″瀷杈撳嚭鏍煎紡涓�4绉嶆儏鍐碉細
+- `[[beg1, end1], [beg2, end2], .., [begN, endN]]`锛氬悓涓婄绾縑AD杈撳嚭缁撴灉銆�
+- `[[beg, -1]]`锛氳〃绀哄彧妫�娴嬪埌璧峰鐐广��
+- `[[-1, end]]`锛氳〃绀哄彧妫�娴嬪埌缁撴潫鐐广��
+- `[]`锛氳〃绀烘棦娌℃湁妫�娴嬪埌璧峰鐐癸紝涔熸病鏈夋娴嬪埌缁撴潫鐐�
+杈撳嚭缁撴灉鍗曚綅涓烘绉掞紝浠庤捣濮嬬偣寮�濮嬬殑缁濆鏃堕棿銆�
+
+#### 鏍囩偣鎭㈠
+```python
+from funasr import AutoModel
+
+model = AutoModel(model="ct-punc")
+
+res = model.generate(input="閭d粖澶╃殑浼氬氨鍒拌繖閲屽惂 happy new year 鏄庡勾瑙�")
+print(res)
+```
+
+#### 鏃堕棿鎴抽娴�
+```python
+from funasr import AutoModel
+
+model = AutoModel(model="fa-zh")
+
+wav_file = f"{model.model_path}/example/asr_example.wav"
+text_file = f"{model.model_path}/example/text.txt"
+res = model.generate(input=(wav_file, text_file), data_type=("sound", "text"))
+print(res)
+```
+鏇村锛圼绀轰緥](https://github.com/alibaba-damo-academy/FunASR/tree/main/examples/industrial_data_pretraining)锛�
+
 
 ## 寰皟
 

--
Gitblit v1.9.1