From 9366bd9bcfa093f33e8636e76e668cef6fc2b581 Mon Sep 17 00:00:00 2001
From: yhliang <68215459+yhliang-aslp@users.noreply.github.com>
Date: 星期三, 27 九月 2023 10:07:50 +0800
Subject: [PATCH] update dev_lyh (#982)
---
funasr/runtime/python/websocket/funasr_wss_client.py | 11 +++++
egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/finetune.py | 2
funasr/runtime/python/websocket/funasr_client_api.py | 5 +-
egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py | 2
funasr/runtime/docs/SDK_advanced_guide_offline_zh.md | 20 ++++++---
funasr/runtime/docs/SDK_advanced_guide_online_zh.md | 7 ++-
egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/infer.py | 2
egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online_v2.py | 7 ++-
funasr/runtime/python/websocket/funasr_wss_server.py | 8 +++-
funasr/runtime/docs/SDK_tutorial_zh.md | 10 +++--
egs_modelscope/asr/TEMPLATE/README.md | 11 +++--
egs_modelscope/asr/TEMPLATE/README_zh.md | 11 +++--
funasr/runtime/docs/SDK_tutorial_online_zh.md | 19 ++++++---
egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py | 2
14 files changed, 78 insertions(+), 39 deletions(-)
diff --git a/egs_modelscope/asr/TEMPLATE/README.md b/egs_modelscope/asr/TEMPLATE/README.md
index a8cb486..4cf6a7e 100644
--- a/egs_modelscope/asr/TEMPLATE/README.md
+++ b/egs_modelscope/asr/TEMPLATE/README.md
@@ -27,15 +27,18 @@
inference_pipeline = pipeline(
task=Tasks.auto_speech_recognition,
model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
- model_revision='v1.0.6',
+ model_revision='v1.0.7',
update_model=False,
mode='paraformer_streaming'
)
import soundfile
speech, sample_rate = soundfile.read("example/asr_example.wav")
-chunk_size = [5, 10, 5] #[5, 10, 5] 600ms, [8, 8, 4] 480ms
-param_dict = {"cache": dict(), "is_final": False, "chunk_size": chunk_size}
+chunk_size = [0, 10, 5] #[0, 10, 5] 600ms, [0, 8, 4] 480ms
+encoder_chunk_look_back = 4 #number of chunks to lookback for encoder self-attention
+decoder_chunk_look_back = 1 #number of encoder chunks to lookback for decoder cross-attention
+param_dict = {"cache": dict(), "is_final": False, "chunk_size": chunk_size,
+ "encoder_chunk_look_back": encoder_chunk_look_back, "decoder_chunk_look_back": decoder_chunk_look_back}
chunk_stride = chunk_size[1] * 960 # 600ms銆�480ms
# first chunk, 600ms
speech_chunk = speech[0:chunk_stride]
@@ -55,7 +58,7 @@
inference_pipeline = pipeline(
task=Tasks.auto_speech_recognition,
model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
- model_revision='v1.0.6',
+ model_revision='v1.0.7',
update_model=False,
mode="paraformer_fake_streaming"
)
diff --git a/egs_modelscope/asr/TEMPLATE/README_zh.md b/egs_modelscope/asr/TEMPLATE/README_zh.md
index 81e0271..6db310e 100644
--- a/egs_modelscope/asr/TEMPLATE/README_zh.md
+++ b/egs_modelscope/asr/TEMPLATE/README_zh.md
@@ -27,15 +27,18 @@
inference_pipeline = pipeline(
task=Tasks.auto_speech_recognition,
model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
- model_revision='v1.0.6',
+ model_revision='v1.0.7',
update_model=False,
mode='paraformer_streaming'
)
import soundfile
speech, sample_rate = soundfile.read("example/asr_example.wav")
-chunk_size = [5, 10, 5] #[5, 10, 5] 600ms, [8, 8, 4] 480ms
-param_dict = {"cache": dict(), "is_final": False, "chunk_size": chunk_size}
+chunk_size = [0, 10, 5] #[0, 10, 5] 600ms, [0, 8, 4] 480ms
+encoder_chunk_look_back = 4 #number of chunks to lookback for encoder self-attention
+decoder_chunk_look_back = 1 #number of encoder chunks to lookback for decoder cross-attention
+param_dict = {"cache": dict(), "is_final": False, "chunk_size": chunk_size,
+ "encoder_chunk_look_back": encoder_chunk_look_back, "decoder_chunk_look_back": decoder_chunk_look_back}
chunk_stride = chunk_size[1] * 960 # 600ms銆�480ms
# first chunk, 600ms
speech_chunk = speech[0:chunk_stride]
@@ -55,7 +58,7 @@
inference_pipeline = pipeline(
task=Tasks.auto_speech_recognition,
model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
- model_revision='v1.0.6',
+ model_revision='v1.0.7',
update_model=False,
mode="paraformer_fake_streaming"
)
diff --git a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
index bef3849..a3d5222 100644
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
@@ -4,7 +4,7 @@
inference_pipeline = pipeline(
task=Tasks.auto_speech_recognition,
model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
- model_revision='v1.0.6',
+ model_revision='v1.0.7',
update_model=False,
mode="paraformer_fake_streaming"
)
diff --git a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py
index c1f4afe..d9b5b2d 100644
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py
@@ -14,7 +14,7 @@
inference_pipeline = pipeline(
task=Tasks.auto_speech_recognition,
model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
- model_revision='v1.0.6',
+ model_revision='v1.0.7',
update_model=False,
mode="paraformer_streaming"
)
diff --git a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online_v2.py b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online_v2.py
index 65881cf..1f2500d 100644
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online_v2.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online_v2.py
@@ -24,9 +24,12 @@
speech_length = speech.shape[0]
sample_offset = 0
-chunk_size = [0, 8, 4] #[5, 10, 5] 600ms, [8, 8, 4] 480ms
+chunk_size = [0, 10, 5] #[0, 10, 5] 600ms, [0, 8, 4] 480ms
+encoder_chunk_look_back = 4 #number of chunks to lookback for encoder self-attention
+decoder_chunk_look_back = 1 #number of encoder chunks to lookback for decoder cross-attention
stride_size = chunk_size[1] * 960
-param_dict = {"cache": dict(), "is_final": False, "chunk_size": chunk_size, "encoder_chunk_look_back": 4, "decoder_chunk_look_back": 1}
+param_dict = {"cache": dict(), "is_final": False, "chunk_size": chunk_size,
+ "encoder_chunk_look_back": encoder_chunk_look_back, "decoder_chunk_look_back": decoder_chunk_look_back}
final_result = ""
for sample_offset in range(0, speech_length, min(stride_size, speech_length - sample_offset)):
diff --git a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/finetune.py b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/finetune.py
index 30034aa..fa046d8 100644
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/finetune.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/finetune.py
@@ -14,7 +14,7 @@
ds_dict = MsDataset.load(params.data_path)
kwargs = dict(
model=params.model,
- model_revision='v1.0.6',
+ model_revision='v1.0.7',
update_model=False,
data_dir=ds_dict,
dataset_type=params.dataset_type,
diff --git a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/infer.py b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/infer.py
index 241ebef..0537dda 100644
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/infer.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/infer.py
@@ -11,7 +11,7 @@
model=args.model,
output_dir=args.output_dir,
batch_size=args.batch_size,
- model_revision='v1.0.6',
+ model_revision='v1.0.7',
update_model=False,
mode="paraformer_fake_streaming",
param_dict={"decoding_model": args.decoding_mode, "hotword": args.hotword_txt}
diff --git a/funasr/runtime/docs/SDK_advanced_guide_offline_zh.md b/funasr/runtime/docs/SDK_advanced_guide_offline_zh.md
index 5894873..09f4cdc 100644
--- a/funasr/runtime/docs/SDK_advanced_guide_offline_zh.md
+++ b/funasr/runtime/docs/SDK_advanced_guide_offline_zh.md
@@ -22,9 +22,12 @@
閫氳繃涓嬭堪鍛戒护鎷夊彇骞跺惎鍔‵unASR runtime-SDK鐨刣ocker闀滃儚锛�
```shell
-sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.2.2
+sudo docker pull \
+ registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.2.2
mkdir -p ./funasr-runtime-resources/models
-sudo docker run -p 10095:10095 -it --privileged=true -v ./funasr-runtime-resources/models:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.2.2
+sudo docker run -p 10095:10095 -it --privileged=true \
+ -v ./funasr-runtime-resources/models:/workspace/models \
+ registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.2.2
```
濡傛灉鎮ㄦ病鏈夊畨瑁卍ocker锛屽彲鍙傝�僛Docker瀹夎](#Docker瀹夎)
@@ -100,18 +103,20 @@
鑻ユ兂鐩存帴杩愯client杩涜娴嬭瘯锛屽彲鍙傝�冨涓嬬畝鏄撹鏄庯紝浠ython鐗堟湰涓轰緥锛�
```shell
-python3 wss_client_asr.py --host "127.0.0.1" --port 10095 --mode offline --audio_in "../audio/asr_example.wav" --output_dir "./results"
+python3 funasr_wss_client.py --host "127.0.0.1" --port 10095 --mode offline \
+ --audio_in "../audio/asr_example.wav" --output_dir "./results"
```
鍛戒护鍙傛暟璇存槑锛�
```text
---host 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛岄渶瑕佹敼涓洪儴缃叉満鍣╥p
+--host 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛�
+ 闇�瑕佹敼涓洪儴缃叉満鍣╥p
--port 10095 閮ㄧ讲绔彛鍙�
--mode offline琛ㄧず绂荤嚎鏂囦欢杞啓
--audio_in 闇�瑕佽繘琛岃浆鍐欑殑闊抽鏂囦欢锛屾敮鎸佹枃浠惰矾寰勶紝鏂囦欢鍒楄〃wav.scp
--thread_num 璁剧疆骞跺彂鍙戦�佺嚎绋嬫暟锛岄粯璁や负1
--ssl 璁剧疆鏄惁寮�鍚痵sl璇佷功鏍¢獙锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
---hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (could be: 闃块噷宸村反 杈炬懇闄�)
+--hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓�(闃块噷宸村反 杈炬懇闄�)
--use_itn 璁剧疆鏄惁浣跨敤itn锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
```
@@ -124,10 +129,11 @@
鍛戒护鍙傛暟璇存槑锛�
```text
---server-ip 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛岄渶瑕佹敼涓洪儴缃叉満鍣╥p
+--server-ip 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛�
+ 闇�瑕佹敼涓洪儴缃叉満鍣╥p
--port 10095 閮ㄧ讲绔彛鍙�
--wav-path 闇�瑕佽繘琛岃浆鍐欑殑闊抽鏂囦欢锛屾敮鎸佹枃浠惰矾寰�
---hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (could be: 闃块噷宸村反 杈炬懇闄�)
+--hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (闃块噷宸村反 杈炬懇闄�)
--use-itn 璁剧疆鏄惁浣跨敤itn锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
```
diff --git a/funasr/runtime/docs/SDK_advanced_guide_online_zh.md b/funasr/runtime/docs/SDK_advanced_guide_online_zh.md
index d61f34b..52f325f 100644
--- a/funasr/runtime/docs/SDK_advanced_guide_online_zh.md
+++ b/funasr/runtime/docs/SDK_advanced_guide_online_zh.md
@@ -11,9 +11,12 @@
閫氳繃涓嬭堪鍛戒护鎷夊彇骞跺惎鍔‵unASR杞欢鍖呯殑docker闀滃儚锛�
```shell
-sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.2
+sudo docker pull \
+ registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.2
mkdir -p ./funasr-runtime-resources/models
-sudo docker run -p 10095:10095 -it --privileged=true -v ./funasr-runtime-resources/models:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.2
+sudo docker run -p 10095:10095 -it --privileged=true \
+ -v ./funasr-runtime-resources/models:/workspace/models \
+ registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.2
```
濡傛灉鎮ㄦ病鏈夊畨瑁卍ocker锛屽彲鍙傝�僛Docker瀹夎](https://alibaba-damo-academy.github.io/FunASR/en/installation/docker_zh.html)
diff --git a/funasr/runtime/docs/SDK_tutorial_online_zh.md b/funasr/runtime/docs/SDK_tutorial_online_zh.md
index afcca4b..f0d447a 100644
--- a/funasr/runtime/docs/SDK_tutorial_online_zh.md
+++ b/funasr/runtime/docs/SDK_tutorial_online_zh.md
@@ -67,34 +67,39 @@
鍛戒护鍙傛暟璇存槑锛�
```text
---host 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛岄渶瑕佹敼涓洪儴缃叉満鍣╥p
+--host 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛�
+ 闇�瑕佹敼涓洪儴缃叉満鍣╥p
--port 10095 閮ㄧ讲绔彛鍙�
---mode锛歚offline`琛ㄧず鎺ㄧ悊妯″紡涓轰竴鍙ヨ瘽璇嗗埆锛沗online`琛ㄧず鎺ㄧ悊妯″紡涓哄疄鏃惰闊宠瘑鍒紱`2pass`琛ㄧず涓哄疄鏃惰闊宠瘑鍒紝骞朵笖璇磋瘽鍙ュ熬閲囩敤绂荤嚎妯″瀷杩涜绾犻敊銆�
+--mode锛歚offline`琛ㄧず鎺ㄧ悊妯″紡涓轰竴鍙ヨ瘽璇嗗埆锛沗online`琛ㄧず鎺ㄧ悊妯″紡涓哄疄鏃惰闊宠瘑鍒紱`2pass`琛ㄧず涓哄疄鏃惰闊宠瘑鍒紝
+ 骞朵笖璇磋瘽鍙ュ熬閲囩敤绂荤嚎妯″瀷杩涜绾犻敊銆�
--chunk_size锛氳〃绀烘祦寮忔ā鍨媗atency閰嶇疆`[5,10,5]`锛岃〃绀哄綋鍓嶉煶棰戣В鐮佺墖娈典负600ms锛屽苟涓斿洖鐪�300ms锛屽彸鐪�300ms銆�
--audio_in 闇�瑕佽繘琛岃浆鍐欑殑闊抽鏂囦欢锛屾敮鎸佹枃浠惰矾寰勶紝鏂囦欢鍒楄〃wav.scp
--thread_num 璁剧疆骞跺彂鍙戦�佺嚎绋嬫暟锛岄粯璁や负1
--ssl 璁剧疆鏄惁寮�鍚痵sl璇佷功鏍¢獙锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴+
---hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (could be: 闃块噷宸村反 杈炬懇闄�)
+--hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (闃块噷宸村反 杈炬懇闄�)
--use_itn 璁剧疆鏄惁浣跨敤itn锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
```
### cpp-client
杩涘叆samples/cpp鐩綍鍚庯紝鍙互鐢╟pp杩涜娴嬭瘯锛屾寚浠ゅ涓嬶細
```shell
-./funasr-wss-client-2pass --server-ip 127.0.0.1 --port 10095 --mode 2pass --wav-path ../audio/asr_example.wav
+./funasr-wss-client-2pass --server-ip 127.0.0.1 --port 10095 --mode 2pass \
+ --wav-path ../audio/asr_example.wav
```
鍛戒护鍙傛暟璇存槑锛�
```text
---server-ip 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛岄渶瑕佹敼涓洪儴缃叉満鍣╥p
+--server-ip 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛�
+ 闇�瑕佹敼涓洪儴缃叉満鍣╥p
--port 10095 閮ㄧ讲绔彛鍙�
---mode锛歚offline`琛ㄧず鎺ㄧ悊妯″紡涓轰竴鍙ヨ瘽璇嗗埆锛沗online`琛ㄧず鎺ㄧ悊妯″紡涓哄疄鏃惰闊宠瘑鍒紱`2pass`琛ㄧず涓哄疄鏃惰闊宠瘑鍒紝骞朵笖璇磋瘽鍙ュ熬閲囩敤绂荤嚎妯″瀷杩涜绾犻敊銆�
+--mode锛歚offline`琛ㄧず鎺ㄧ悊妯″紡涓轰竴鍙ヨ瘽璇嗗埆锛沗online`琛ㄧず鎺ㄧ悊妯″紡涓哄疄鏃惰闊宠瘑鍒紱`2pass`琛ㄧず涓哄疄鏃惰闊宠瘑鍒紝
+ 骞朵笖璇磋瘽鍙ュ熬閲囩敤绂荤嚎妯″瀷杩涜绾犻敊銆�
--chunk-size锛氳〃绀烘祦寮忔ā鍨媗atency閰嶇疆`[5,10,5]`锛岃〃绀哄綋鍓嶉煶棰戣В鐮佺墖娈典负600ms锛屽苟涓斿洖鐪�300ms锛屽彸鐪�300ms銆�
--wav-path 闇�瑕佽繘琛岃浆鍐欑殑闊抽鏂囦欢锛屾敮鎸佹枃浠惰矾寰�
--thread-num 璁剧疆骞跺彂鍙戦�佺嚎绋嬫暟锛岄粯璁や负1
--is-ssl 璁剧疆鏄惁寮�鍚痵sl璇佷功鏍¢獙锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
---hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (could be: 闃块噷宸村反 杈炬懇闄�)
+--hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (闃块噷宸村反 杈炬懇闄�)
--use-itn 璁剧疆鏄惁浣跨敤itn锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
```
diff --git a/funasr/runtime/docs/SDK_tutorial_zh.md b/funasr/runtime/docs/SDK_tutorial_zh.md
index cdad39b..2c08957 100644
--- a/funasr/runtime/docs/SDK_tutorial_zh.md
+++ b/funasr/runtime/docs/SDK_tutorial_zh.md
@@ -69,13 +69,14 @@
鍛戒护鍙傛暟璇存槑锛�
```text
---host 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛岄渶瑕佹敼涓洪儴缃叉満鍣╥p
+--host 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛�
+ 闇�瑕佹敼涓洪儴缃叉満鍣╥p
--port 10095 閮ㄧ讲绔彛鍙�
--mode offline琛ㄧず绂荤嚎鏂囦欢杞啓
--audio_in 闇�瑕佽繘琛岃浆鍐欑殑闊抽鏂囦欢锛屾敮鎸佹枃浠惰矾寰勶紝鏂囦欢鍒楄〃wav.scp
--thread_num 璁剧疆骞跺彂鍙戦�佺嚎绋嬫暟锛岄粯璁や负1
--ssl 璁剧疆鏄惁寮�鍚痵sl璇佷功鏍¢獙锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
---hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (could be: 闃块噷宸村反 杈炬懇闄�)
+--hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (闃块噷宸村反 杈炬懇闄�)
--use_itn 璁剧疆鏄惁浣跨敤itn锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
```
@@ -88,12 +89,13 @@
鍛戒护鍙傛暟璇存槑锛�
```text
---server-ip 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛岄渶瑕佹敼涓洪儴缃叉満鍣╥p
+--server-ip 涓篎unASR runtime-SDK鏈嶅姟閮ㄧ讲鏈哄櫒ip锛岄粯璁や负鏈満ip锛�127.0.0.1锛夛紝濡傛灉client涓庢湇鍔′笉鍦ㄥ悓涓�鍙版湇鍔″櫒锛�
+ 闇�瑕佹敼涓洪儴缃叉満鍣╥p
--port 10095 閮ㄧ讲绔彛鍙�
--wav-path 闇�瑕佽繘琛岃浆鍐欑殑闊抽鏂囦欢锛屾敮鎸佹枃浠惰矾寰�
--thread_num 璁剧疆骞跺彂鍙戦�佺嚎绋嬫暟锛岄粯璁や负1
--ssl 璁剧疆鏄惁寮�鍚痵sl璇佷功鏍¢獙锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
---hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (could be: 闃块噷宸村反 杈炬懇闄�)
+--hotword 濡傛灉妯″瀷涓虹儹璇嶆ā鍨嬶紝鍙互璁剧疆鐑瘝: *.txt(姣忚涓�涓儹璇�) 鎴栬�呯┖鏍煎垎闅旂殑鐑瘝瀛楃涓� (闃块噷宸村反 杈炬懇闄�)
--use-itn 璁剧疆鏄惁浣跨敤itn锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
```
diff --git a/funasr/runtime/python/websocket/funasr_client_api.py b/funasr/runtime/python/websocket/funasr_client_api.py
index aa573c0..d0992ff 100644
--- a/funasr/runtime/python/websocket/funasr_client_api.py
+++ b/funasr/runtime/python/websocket/funasr_client_api.py
@@ -51,7 +51,8 @@
stride = int(60 * chunk_size[1]/ chunk_interval / 1000 * 16000 * 2)
chunk_num = (len(audio_bytes) - 1) // stride + 1
- message = json.dumps({"mode": mode, "chunk_size": chunk_size, "chunk_interval": chunk_interval,
+ message = json.dumps({"mode": args.mode, "chunk_size": args.chunk_size, "encoder_chunk_look_back": 4,
+ "decoder_chunk_look_back": 1, "chunk_interval": args.chunk_interval,
"wav_name": wav_name, "is_speaking": True})
self.websocket.send(message)
@@ -131,4 +132,4 @@
print("text",text)
-
\ No newline at end of file
+
diff --git a/funasr/runtime/python/websocket/funasr_wss_client.py b/funasr/runtime/python/websocket/funasr_wss_client.py
index bed0081..f4f35bb 100644
--- a/funasr/runtime/python/websocket/funasr_wss_client.py
+++ b/funasr/runtime/python/websocket/funasr_wss_client.py
@@ -29,6 +29,14 @@
type=str,
default="5, 10, 5",
help="chunk")
+parser.add_argument("--encoder_chunk_look_back",
+ type=int,
+ default=4,
+ help="number of chunks to lookback for encoder self-attention")
+parser.add_argument("--decoder_chunk_look_back",
+ type=int,
+ default=1,
+ help="number of encoder chunks to lookback for decoder cross-attention")
parser.add_argument("--chunk_interval",
type=int,
default=10,
@@ -99,7 +107,8 @@
input=True,
frames_per_buffer=CHUNK)
- message = json.dumps({"mode": args.mode, "chunk_size": args.chunk_size, "chunk_interval": args.chunk_interval,
+ message = json.dumps({"mode": args.mode, "chunk_size": args.chunk_size, "encoder_chunk_look_back": args.encoder_chunk_look_back,
+ "decoder_chunk_look_back": args.decoder_chunk_look_back, "chunk_interval": args.chunk_interval,
"wav_name": "microphone", "is_speaking": True})
#voices.put(message)
await websocket.send(message)
diff --git a/funasr/runtime/python/websocket/funasr_wss_server.py b/funasr/runtime/python/websocket/funasr_wss_server.py
index 57f7584..b74f225 100644
--- a/funasr/runtime/python/websocket/funasr_wss_server.py
+++ b/funasr/runtime/python/websocket/funasr_wss_server.py
@@ -103,8 +103,8 @@
model=args.asr_model_online,
ngpu=args.ngpu,
ncpu=args.ncpu,
- model_revision='v1.0.4',
- update_model='v1.0.4',
+ model_revision='v1.0.7',
+ update_model='v1.0.7',
mode='paraformer_streaming')
print("model loaded! only support one client at the same time now!!!!")
@@ -159,6 +159,10 @@
websocket.wav_name = messagejson.get("wav_name")
if "chunk_size" in messagejson:
websocket.param_dict_asr_online["chunk_size"] = messagejson["chunk_size"]
+ if "encoder_chunk_look_back" in messagejson:
+ websocket.param_dict_asr_online["encoder_chunk_look_back"] = messagejson["encoder_chunk_look_back"]
+ if "decoder_chunk_look_back" in messagejson:
+ websocket.param_dict_asr_online["decoder_chunk_look_back"] = messagejson["decoder_chunk_look_back"]
if "mode" in messagejson:
websocket.mode = messagejson["mode"]
if len(frames_asr_online) > 0 or len(frames_asr) > 0 or not isinstance(message, str):
--
Gitblit v1.9.1