From 9366bd9bcfa093f33e8636e76e668cef6fc2b581 Mon Sep 17 00:00:00 2001
From: yhliang <68215459+yhliang-aslp@users.noreply.github.com>
Date: 星期三, 27 九月 2023 10:07:50 +0800
Subject: [PATCH] update dev_lyh (#982)

---
 funasr/runtime/python/websocket/funasr_wss_client.py                                                              |   11 +++++
 egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/finetune.py       |    2 
 funasr/runtime/python/websocket/funasr_client_api.py                                                              |    5 +-
 egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py           |    2 
 funasr/runtime/docs/SDK_advanced_guide_offline_zh.md                                                              |   20 ++++++---
 funasr/runtime/docs/SDK_advanced_guide_online_zh.md                                                               |    7 ++-
 egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/infer.py          |    2 
 egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online_v2.py |    7 ++-
 funasr/runtime/python/websocket/funasr_wss_server.py                                                              |    8 +++-
 funasr/runtime/docs/SDK_tutorial_zh.md                                                                            |   10 +++--
 egs_modelscope/asr/TEMPLATE/README.md                                                                             |   11 +++--
 egs_modelscope/asr/TEMPLATE/README_zh.md                                                                          |   11 +++--
 funasr/runtime/docs/SDK_tutorial_online_zh.md                                                                     |   19 ++++++---
 egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py    |    2 
 14 files changed, 78 insertions(+), 39 deletions(-)

diff --git a/egs_modelscope/asr/TEMPLATE/README.md b/egs_modelscope/asr/TEMPLATE/README.md
index a8cb486..4cf6a7e 100644
--- a/egs_modelscope/asr/TEMPLATE/README.md
+++ b/egs_modelscope/asr/TEMPLATE/README.md
@@ -27,15 +27,18 @@
 inference_pipeline = pipeline(
     task=Tasks.auto_speech_recognition,
     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
-    model_revision='v1.0.6',
+    model_revision='v1.0.7',
     update_model=False,
     mode='paraformer_streaming'
     )
 import soundfile
 speech, sample_rate = soundfile.read("example/asr_example.wav")
 
-chunk_size = [5, 10, 5] #[5, 10, 5] 600ms, [8, 8, 4] 480ms
-param_dict = {"cache": dict(), "is_final": False, "chunk_size": chunk_size}
+chunk_size = [0, 10, 5] #[0, 10, 5] 600ms, [0, 8, 4] 480ms
+encoder_chunk_look_back = 4 #number of chunks to lookback for encoder self-attention
+decoder_chunk_look_back = 1 #number of encoder chunks to lookback for decoder cross-attention
+param_dict = {"cache": dict(), "is_final": False, "chunk_size": chunk_size,
+              "encoder_chunk_look_back": encoder_chunk_look_back, "decoder_chunk_look_back": decoder_chunk_look_back}
 chunk_stride = chunk_size[1] * 960 # 600ms銆�480ms
 # first chunk, 600ms
 speech_chunk = speech[0:chunk_stride]
@@ -55,7 +58,7 @@
 inference_pipeline = pipeline(
     task=Tasks.auto_speech_recognition,
     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
-    model_revision='v1.0.6',
+    model_revision='v1.0.7',
     update_model=False,
     mode="paraformer_fake_streaming"
 )
diff --git a/egs_modelscope/asr/TEMPLATE/README_zh.md b/egs_modelscope/asr/TEMPLATE/README_zh.md
index 81e0271..6db310e 100644
--- a/egs_modelscope/asr/TEMPLATE/README_zh.md
+++ b/egs_modelscope/asr/TEMPLATE/README_zh.md
@@ -27,15 +27,18 @@
 inference_pipeline = pipeline(
     task=Tasks.auto_speech_recognition,
     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
-    model_revision='v1.0.6',
+    model_revision='v1.0.7',
     update_model=False,
     mode='paraformer_streaming'
     )
 import soundfile
 speech, sample_rate = soundfile.read("example/asr_example.wav")
 
-chunk_size = [5, 10, 5] #[5, 10, 5] 600ms, [8, 8, 4] 480ms
-param_dict = {"cache": dict(), "is_final": False, "chunk_size": chunk_size}
+chunk_size = [0, 10, 5] #[0, 10, 5] 600ms, [0, 8, 4] 480ms
+encoder_chunk_look_back = 4 #number of chunks to lookback for encoder self-attention
+decoder_chunk_look_back = 1 #number of encoder chunks to lookback for decoder cross-attention
+param_dict = {"cache": dict(), "is_final": False, "chunk_size": chunk_size,
+              "encoder_chunk_look_back": encoder_chunk_look_back, "decoder_chunk_look_back": decoder_chunk_look_back}
 chunk_stride = chunk_size[1] * 960 # 600ms銆�480ms
 # first chunk, 600ms
 speech_chunk = speech[0:chunk_stride]
@@ -55,7 +58,7 @@
 inference_pipeline = pipeline(
     task=Tasks.auto_speech_recognition,
     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
-    model_revision='v1.0.6',
+    model_revision='v1.0.7',
     update_model=False,
     mode="paraformer_fake_streaming"
 )
diff --git a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
index bef3849..a3d5222 100644
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
@@ -4,7 +4,7 @@
 inference_pipeline = pipeline(
     task=Tasks.auto_speech_recognition,
     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
-    model_revision='v1.0.6',
+    model_revision='v1.0.7',
     update_model=False,
     mode="paraformer_fake_streaming"
 )
diff --git a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py
index c1f4afe..d9b5b2d 100644
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py
@@ -14,7 +14,7 @@
 inference_pipeline = pipeline(
     task=Tasks.auto_speech_recognition,
     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
-    model_revision='v1.0.6',
+    model_revision='v1.0.7',
     update_model=False,
     mode="paraformer_streaming"
 )
diff --git a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online_v2.py b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online_v2.py
index 65881cf..1f2500d 100644
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online_v2.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online_v2.py
@@ -24,9 +24,12 @@
 speech_length = speech.shape[0]
 
 sample_offset = 0
-chunk_size = [0, 8, 4] #[5, 10, 5] 600ms, [8, 8, 4] 480ms
+chunk_size = [0, 10, 5] #[0, 10, 5] 600ms, [0, 8, 4] 480ms
+encoder_chunk_look_back = 4 #number of chunks to lookback for encoder self-attention
+decoder_chunk_look_back = 1 #number of encoder chunks to lookback for decoder cross-attention
 stride_size =  chunk_size[1] * 960
-param_dict = {"cache": dict(), "is_final": False, "chunk_size": chunk_size, "encoder_chunk_look_back": 4, "decoder_chunk_look_back": 1}
+param_dict = {"cache": dict(), "is_final": False, "chunk_size": chunk_size, 
+              "encoder_chunk_look_back": encoder_chunk_look_back, "decoder_chunk_look_back": decoder_chunk_look_back}
 final_result = ""
 
 for sample_offset in range(0, speech_length, min(stride_size, speech_length - sample_offset)):
diff --git a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/finetune.py b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/finetune.py
index 30034aa..fa046d8 100644
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/finetune.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/finetune.py
@@ -14,7 +14,7 @@
     ds_dict = MsDataset.load(params.data_path)
     kwargs = dict(
         model=params.model,
-        model_revision='v1.0.6',
+        model_revision='v1.0.7',
         update_model=False,
         data_dir=ds_dict,
         dataset_type=params.dataset_type,
diff --git a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/infer.py b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/infer.py
index 241ebef..0537dda 100644
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/infer.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/infer.py
@@ -11,7 +11,7 @@
         model=args.model,
         output_dir=args.output_dir,
         batch_size=args.batch_size,
-        model_revision='v1.0.6',
+        model_revision='v1.0.7',
         update_model=False,
         mode="paraformer_fake_streaming",
         param_dict={"decoding_model": args.decoding_mode, "hotword": args.hotword_txt}
diff --git a/funasr/runtime/docs/SDK_advanced_guide_offline_zh.md b/funasr/runtime/docs/SDK_advanced_guide_offline_zh.md
index 5894873..09f4cdc 100644
--- a/funasr/runtime/docs/SDK_advanced_guide_offline_zh.md
+++ b/funasr/runtime/docs/SDK_advanced_guide_offline_zh.md
@@ -22,9 +22,12 @@
 閫氳繃涓嬭堪鍛戒护鎷夊彇骞跺惎鍔‵unASR runtime-SDK鐨刣ocker闀滃儚锛�
 
 ```shell
-sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.2.2
+sudo docker pull \
+  registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.2.2
 mkdir -p ./funasr-runtime-resources/models
-sudo docker run -p 10095:10095 -it --privileged=true -v ./funasr-runtime-resources/models:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.2.2
+sudo docker run -p 10095:10095 -it --privileged=true \
+  -v ./funasr-runtime-resources/models:/workspace/models \
+  registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.2.2
 ```
 濡傛灉鎮ㄦ病鏈夊畨瑁卍ocker锛屽彲鍙傝�僛Docker瀹夎](#Docker瀹夎)
 
@@ -100,18 +103,20 @@
 鑻ユ兂鐩存帴杩愯client杩涜娴嬭瘯锛屽彲鍙傝�冨涓嬬畝鏄撹鏄庯紝浠ython鐗堟湰涓轰緥锛�
 
 ```shell
-python3 wss_client_asr.py --host "127.0.0.1" --port 10095 --mode offline --audio_in "../audio/asr_example.wav" --output_dir "./results"
+python3 funasr_wss_client.py --host "127.0.0.1" --port 10095 --mode offline \
+        --audio_in "../audio/asr_example.wav" --output_dir "./results"
 ```
 
 鍛戒护鍙傛暟璇存槑锛�
 ```text
---host 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛岄渶瑕佹敼涓洪儴缃叉満鍣╥p
+--host 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛�
+       闇�瑕佹敼涓洪儴缃叉満鍣╥p
 --port 10095 閮ㄧ讲绔彛鍙�
 --mode offline琛ㄧず绂荤嚎鏂囦欢杞啓
 --audio_in 闇�瑕佽繘琛岃浆鍐欑殑闊抽鏂囦欢锛屾敮鎸佹枃浠惰矾寰勶紝鏂囦欢鍒楄〃wav.scp
 --thread_num 璁剧疆骞跺彂鍙戦�佺嚎绋嬫暟锛岄粯璁や负1
 --ssl 璁剧疆鏄惁寮�鍚痵sl璇佷功鏍¢獙锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
---hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (could be: 闃块噷宸村反 杈炬懇闄�)
+--hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓�(闃块噷宸村反 杈炬懇闄�)
 --use_itn 璁剧疆鏄惁浣跨敤itn锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
 ```
 
@@ -124,10 +129,11 @@
 鍛戒护鍙傛暟璇存槑锛�
 
 ```text
---server-ip 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛岄渶瑕佹敼涓洪儴缃叉満鍣╥p
+--server-ip 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛�
+            闇�瑕佹敼涓洪儴缃叉満鍣╥p
 --port 10095 閮ㄧ讲绔彛鍙�
 --wav-path 闇�瑕佽繘琛岃浆鍐欑殑闊抽鏂囦欢锛屾敮鎸佹枃浠惰矾寰�
---hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (could be: 闃块噷宸村反 杈炬懇闄�)
+--hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (闃块噷宸村反 杈炬懇闄�)
 --use-itn 璁剧疆鏄惁浣跨敤itn锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
 ```
 
diff --git a/funasr/runtime/docs/SDK_advanced_guide_online_zh.md b/funasr/runtime/docs/SDK_advanced_guide_online_zh.md
index d61f34b..52f325f 100644
--- a/funasr/runtime/docs/SDK_advanced_guide_online_zh.md
+++ b/funasr/runtime/docs/SDK_advanced_guide_online_zh.md
@@ -11,9 +11,12 @@
 閫氳繃涓嬭堪鍛戒护鎷夊彇骞跺惎鍔‵unASR杞欢鍖呯殑docker闀滃儚锛�
 
 ```shell
-sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.2
+sudo docker pull \
+  registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.2
 mkdir -p ./funasr-runtime-resources/models
-sudo docker run -p 10095:10095 -it --privileged=true -v ./funasr-runtime-resources/models:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.2
+sudo docker run -p 10095:10095 -it --privileged=true \
+  -v ./funasr-runtime-resources/models:/workspace/models \
+  registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.2
 ```
 濡傛灉鎮ㄦ病鏈夊畨瑁卍ocker锛屽彲鍙傝�僛Docker瀹夎](https://alibaba-damo-academy.github.io/FunASR/en/installation/docker_zh.html)
 
diff --git a/funasr/runtime/docs/SDK_tutorial_online_zh.md b/funasr/runtime/docs/SDK_tutorial_online_zh.md
index afcca4b..f0d447a 100644
--- a/funasr/runtime/docs/SDK_tutorial_online_zh.md
+++ b/funasr/runtime/docs/SDK_tutorial_online_zh.md
@@ -67,34 +67,39 @@
 
 鍛戒护鍙傛暟璇存槑锛�
 ```text
---host 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛岄渶瑕佹敼涓洪儴缃叉満鍣╥p
+--host 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛�
+       闇�瑕佹敼涓洪儴缃叉満鍣╥p
 --port 10095 閮ㄧ讲绔彛鍙�
---mode锛歚offline`琛ㄧず鎺ㄧ悊妯″紡涓轰竴鍙ヨ瘽璇嗗埆锛沗online`琛ㄧず鎺ㄧ悊妯″紡涓哄疄鏃惰闊宠瘑鍒紱`2pass`琛ㄧず涓哄疄鏃惰闊宠瘑鍒紝骞朵笖璇磋瘽鍙ュ熬閲囩敤绂荤嚎妯″瀷杩涜绾犻敊銆�
+--mode锛歚offline`琛ㄧず鎺ㄧ悊妯″紡涓轰竴鍙ヨ瘽璇嗗埆锛沗online`琛ㄧず鎺ㄧ悊妯″紡涓哄疄鏃惰闊宠瘑鍒紱`2pass`琛ㄧず涓哄疄鏃惰闊宠瘑鍒紝
+       骞朵笖璇磋瘽鍙ュ熬閲囩敤绂荤嚎妯″瀷杩涜绾犻敊銆�
 --chunk_size锛氳〃绀烘祦寮忔ā鍨媗atency閰嶇疆`[5,10,5]`锛岃〃绀哄綋鍓嶉煶棰戣В鐮佺墖娈典负600ms锛屽苟涓斿洖鐪�300ms锛屽彸鐪�300ms銆�
 --audio_in 闇�瑕佽繘琛岃浆鍐欑殑闊抽鏂囦欢锛屾敮鎸佹枃浠惰矾寰勶紝鏂囦欢鍒楄〃wav.scp
 --thread_num 璁剧疆骞跺彂鍙戦�佺嚎绋嬫暟锛岄粯璁や负1
 --ssl 璁剧疆鏄惁寮�鍚痵sl璇佷功鏍¢獙锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴+
---hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (could be: 闃块噷宸村反 杈炬懇闄�)
+--hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (闃块噷宸村反 杈炬懇闄�)
 --use_itn 璁剧疆鏄惁浣跨敤itn锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
 ```
 
 ### cpp-client
 杩涘叆samples/cpp鐩綍鍚庯紝鍙互鐢╟pp杩涜娴嬭瘯锛屾寚浠ゅ涓嬶細
 ```shell
-./funasr-wss-client-2pass --server-ip 127.0.0.1 --port 10095 --mode 2pass --wav-path ../audio/asr_example.wav
+./funasr-wss-client-2pass --server-ip 127.0.0.1 --port 10095 --mode 2pass \
+   --wav-path ../audio/asr_example.wav
 ```
 
 鍛戒护鍙傛暟璇存槑锛�
 
 ```text
---server-ip 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛岄渶瑕佹敼涓洪儴缃叉満鍣╥p
+--server-ip 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛�
+            闇�瑕佹敼涓洪儴缃叉満鍣╥p
 --port 10095 閮ㄧ讲绔彛鍙�
---mode锛歚offline`琛ㄧず鎺ㄧ悊妯″紡涓轰竴鍙ヨ瘽璇嗗埆锛沗online`琛ㄧず鎺ㄧ悊妯″紡涓哄疄鏃惰闊宠瘑鍒紱`2pass`琛ㄧず涓哄疄鏃惰闊宠瘑鍒紝骞朵笖璇磋瘽鍙ュ熬閲囩敤绂荤嚎妯″瀷杩涜绾犻敊銆�
+--mode锛歚offline`琛ㄧず鎺ㄧ悊妯″紡涓轰竴鍙ヨ瘽璇嗗埆锛沗online`琛ㄧず鎺ㄧ悊妯″紡涓哄疄鏃惰闊宠瘑鍒紱`2pass`琛ㄧず涓哄疄鏃惰闊宠瘑鍒紝
+        骞朵笖璇磋瘽鍙ュ熬閲囩敤绂荤嚎妯″瀷杩涜绾犻敊銆�
 --chunk-size锛氳〃绀烘祦寮忔ā鍨媗atency閰嶇疆`[5,10,5]`锛岃〃绀哄綋鍓嶉煶棰戣В鐮佺墖娈典负600ms锛屽苟涓斿洖鐪�300ms锛屽彸鐪�300ms銆�
 --wav-path 闇�瑕佽繘琛岃浆鍐欑殑闊抽鏂囦欢锛屾敮鎸佹枃浠惰矾寰�
 --thread-num 璁剧疆骞跺彂鍙戦�佺嚎绋嬫暟锛岄粯璁や负1
 --is-ssl 璁剧疆鏄惁寮�鍚痵sl璇佷功鏍¢獙锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
---hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (could be: 闃块噷宸村反 杈炬懇闄�)
+--hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (闃块噷宸村反 杈炬懇闄�)
 --use-itn 璁剧疆鏄惁浣跨敤itn锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
 ```
 
diff --git a/funasr/runtime/docs/SDK_tutorial_zh.md b/funasr/runtime/docs/SDK_tutorial_zh.md
index cdad39b..2c08957 100644
--- a/funasr/runtime/docs/SDK_tutorial_zh.md
+++ b/funasr/runtime/docs/SDK_tutorial_zh.md
@@ -69,13 +69,14 @@
 
 鍛戒护鍙傛暟璇存槑锛�
 ```text
---host 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛岄渶瑕佹敼涓洪儴缃叉満鍣╥p
+--host 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛�
+        闇�瑕佹敼涓洪儴缃叉満鍣╥p
 --port 10095 閮ㄧ讲绔彛鍙�
 --mode offline琛ㄧず绂荤嚎鏂囦欢杞啓
 --audio_in 闇�瑕佽繘琛岃浆鍐欑殑闊抽鏂囦欢锛屾敮鎸佹枃浠惰矾寰勶紝鏂囦欢鍒楄〃wav.scp
 --thread_num 璁剧疆骞跺彂鍙戦�佺嚎绋嬫暟锛岄粯璁や负1
 --ssl 璁剧疆鏄惁寮�鍚痵sl璇佷功鏍¢獙锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
---hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (could be: 闃块噷宸村反 杈炬懇闄�)
+--hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (闃块噷宸村反 杈炬懇闄�)
 --use_itn 璁剧疆鏄惁浣跨敤itn锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
 ```
 
@@ -88,12 +89,13 @@
 鍛戒护鍙傛暟璇存槑锛�
 
 ```text
---server-ip 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛岄渶瑕佹敼涓洪儴缃叉満鍣╥p
+--server-ip 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛�
+            闇�瑕佹敼涓洪儴缃叉満鍣╥p
 --port 10095 閮ㄧ讲绔彛鍙�
 --wav-path 闇�瑕佽繘琛岃浆鍐欑殑闊抽鏂囦欢锛屾敮鎸佹枃浠惰矾寰�
 --thread_num 璁剧疆骞跺彂鍙戦�佺嚎绋嬫暟锛岄粯璁や负1
 --ssl 璁剧疆鏄惁寮�鍚痵sl璇佷功鏍¢獙锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
---hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (could be: 闃块噷宸村反 杈炬懇闄�)
+--hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (闃块噷宸村反 杈炬懇闄�)
 --use-itn 璁剧疆鏄惁浣跨敤itn锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
 ```
 
diff --git a/funasr/runtime/python/websocket/funasr_client_api.py b/funasr/runtime/python/websocket/funasr_client_api.py
index aa573c0..d0992ff 100644
--- a/funasr/runtime/python/websocket/funasr_client_api.py
+++ b/funasr/runtime/python/websocket/funasr_client_api.py
@@ -51,7 +51,8 @@
         stride = int(60 *  chunk_size[1]/  chunk_interval / 1000 * 16000 * 2)
         chunk_num = (len(audio_bytes) - 1) // stride + 1
        
-        message = json.dumps({"mode":  mode, "chunk_size":  chunk_size, "chunk_interval":  chunk_interval,
+        message = json.dumps({"mode": args.mode, "chunk_size": args.chunk_size, "encoder_chunk_look_back": 4,
+                              "decoder_chunk_look_back": 1, "chunk_interval": args.chunk_interval, 
                               "wav_name": wav_name, "is_speaking": True})
  
         self.websocket.send(message)
@@ -131,4 +132,4 @@
     print("text",text)
  
     
-            
\ No newline at end of file
+            
diff --git a/funasr/runtime/python/websocket/funasr_wss_client.py b/funasr/runtime/python/websocket/funasr_wss_client.py
index bed0081..f4f35bb 100644
--- a/funasr/runtime/python/websocket/funasr_wss_client.py
+++ b/funasr/runtime/python/websocket/funasr_wss_client.py
@@ -29,6 +29,14 @@
                     type=str,
                     default="5, 10, 5",
                     help="chunk")
+parser.add_argument("--encoder_chunk_look_back",
+                    type=int,
+                    default=4,
+                    help="number of chunks to lookback for encoder self-attention")
+parser.add_argument("--decoder_chunk_look_back",
+                    type=int,
+                    default=1,
+                    help="number of encoder chunks to lookback for decoder cross-attention")
 parser.add_argument("--chunk_interval",
                     type=int,
                     default=10,
@@ -99,7 +107,8 @@
                     input=True,
                     frames_per_buffer=CHUNK)
 
-    message = json.dumps({"mode": args.mode, "chunk_size": args.chunk_size, "chunk_interval": args.chunk_interval,
+    message = json.dumps({"mode": args.mode, "chunk_size": args.chunk_size, "encoder_chunk_look_back": args.encoder_chunk_look_back,
+                          "decoder_chunk_look_back": args.decoder_chunk_look_back, "chunk_interval": args.chunk_interval, 
                           "wav_name": "microphone", "is_speaking": True})
     #voices.put(message)
     await websocket.send(message)
diff --git a/funasr/runtime/python/websocket/funasr_wss_server.py b/funasr/runtime/python/websocket/funasr_wss_server.py
index 57f7584..b74f225 100644
--- a/funasr/runtime/python/websocket/funasr_wss_server.py
+++ b/funasr/runtime/python/websocket/funasr_wss_server.py
@@ -103,8 +103,8 @@
     model=args.asr_model_online,
     ngpu=args.ngpu,
     ncpu=args.ncpu,
-    model_revision='v1.0.4',
-    update_model='v1.0.4',
+    model_revision='v1.0.7',
+    update_model='v1.0.7',
     mode='paraformer_streaming')
 
 print("model loaded! only support one client at the same time now!!!!")
@@ -159,6 +159,10 @@
                     websocket.wav_name = messagejson.get("wav_name")
                 if "chunk_size" in messagejson:
                     websocket.param_dict_asr_online["chunk_size"] = messagejson["chunk_size"]
+                if "encoder_chunk_look_back" in messagejson:
+                    websocket.param_dict_asr_online["encoder_chunk_look_back"] = messagejson["encoder_chunk_look_back"]
+                if "decoder_chunk_look_back" in messagejson:
+                    websocket.param_dict_asr_online["decoder_chunk_look_back"] = messagejson["decoder_chunk_look_back"]
                 if "mode" in messagejson:
                     websocket.mode = messagejson["mode"]
             if len(frames_asr_online) > 0 or len(frames_asr) > 0 or not isinstance(message, str):

--
Gitblit v1.9.1