From a05e753d11d9c36983ec4e58c421dbcf86d1dcd4 Mon Sep 17 00:00:00 2001
From: Xian Shi <40013335+R1ckShi@users.noreply.github.com>
Date: 星期二, 17 十月 2023 16:47:27 +0800
Subject: [PATCH] Merge branch 'main' into dev_onnx

---
 egs_modelscope/asr/TEMPLATE/README_zh.md |   51 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/egs_modelscope/asr/TEMPLATE/README_zh.md b/egs_modelscope/asr/TEMPLATE/README_zh.md
index 81e0271..47656b3 100644
--- a/egs_modelscope/asr/TEMPLATE/README_zh.md
+++ b/egs_modelscope/asr/TEMPLATE/README_zh.md
@@ -27,15 +27,18 @@
 inference_pipeline = pipeline(
     task=Tasks.auto_speech_recognition,
     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
-    model_revision='v1.0.6',
+    model_revision='v1.0.7',
     update_model=False,
     mode='paraformer_streaming'
     )
 import soundfile
 speech, sample_rate = soundfile.read("example/asr_example.wav")
 
-chunk_size = [5, 10, 5] #[5, 10, 5] 600ms, [8, 8, 4] 480ms
-param_dict = {"cache": dict(), "is_final": False, "chunk_size": chunk_size}
+chunk_size = [0, 10, 5] #[0, 10, 5] 600ms, [0, 8, 4] 480ms
+encoder_chunk_look_back = 4 #number of chunks to lookback for encoder self-attention
+decoder_chunk_look_back = 1 #number of encoder chunks to lookback for decoder cross-attention
+param_dict = {"cache": dict(), "is_final": False, "chunk_size": chunk_size,
+              "encoder_chunk_look_back": encoder_chunk_look_back, "decoder_chunk_look_back": decoder_chunk_look_back}
 chunk_stride = chunk_size[1] * 960 # 600ms銆�480ms
 # first chunk, 600ms
 speech_chunk = speech[0:chunk_stride]
@@ -55,7 +58,7 @@
 inference_pipeline = pipeline(
     task=Tasks.auto_speech_recognition,
     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
-    model_revision='v1.0.6',
+    model_revision='v1.0.7',
     update_model=False,
     mode="paraformer_fake_streaming"
 )
@@ -64,6 +67,23 @@
 print(rec_result)
 ```
 婕旂ず浠g爜瀹屾暣鐗堟湰锛岃鍙傝�僛demo](https://github.com/alibaba-damo-academy/FunASR/discussions/241)
+
+#### [Paraformer-contextual Model](https://www.modelscope.cn/models/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404/summary)
+```python
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+
+param_dict = dict()
+# param_dict['hotword'] = "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/hotword.txt"
+param_dict['hotword']="閭撻儊鏉� 鐜嬮鏄� 鐜嬫檾鍚�"
+inference_pipeline = pipeline(
+    task=Tasks.auto_speech_recognition,
+    model="damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404",
+    param_dict=param_dict)
+
+rec_result = inference_pipeline(audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_hotword.wav')
+print(rec_result)
+```
 
 #### [UniASR 妯″瀷](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/summary)
 UniASR 妯″瀷鏈変笁绉嶈В鐮佹ā寮�(fast銆乶ormal銆乷ffline)锛屾洿澶氭ā鍨嬬粏鑺傝鍙傝�僛鏂囨。](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/summary)
@@ -80,6 +100,29 @@
 fast 鍜� normal 鐨勮В鐮佹ā寮忔槸鍋囨祦寮忚В鐮侊紝鍙敤浜庤瘎浼拌瘑鍒噯纭�с��
 婕旂ず鐨勫畬鏁翠唬鐮侊紝璇峰弬瑙� [demo](https://github.com/alibaba-damo-academy/FunASR/discussions/151)
 
+#### [Paraformer-Spk model](https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/summary)
+杩斿洖璇嗗埆缁撴灉鐨勫悓鏃惰繑鍥炴瘡涓瓙鍙ョ殑璇磋瘽浜哄垎绫荤粨鏋溿�傚叧浜庤璇濅汉鏃ュ織妯″瀷鐨勮鎯呰瑙乕CAM++](https://modelscope.cn/models/damo/speech_campplus_speaker-diarization_common/summary)銆�
+
+```python
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+
+if __name__ == '__main__':
+    audio_in = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_speaker_demo.wav'
+    output_dir = "./results"
+    inference_pipeline = pipeline(
+        task=Tasks.auto_speech_recognition,
+        model='damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn',
+        model_revision='v0.0.2',
+        vad_model='damo/speech_fsmn_vad_zh-cn-16k-common-pytorch',
+        punc_model='damo/punc_ct-transformer_cn-en-common-vocab471067-large',
+        output_dir=output_dir,
+    )
+    rec_result = inference_pipeline(audio_in=audio_in, batch_size_token=5000, batch_size_token_threshold_s=40, max_single_segment_time=6000)
+    print(rec_result)
+```
+
+
 #### [RNN-T-online 妯″瀷]()
 Undo
 

--
Gitblit v1.9.1