From d0718e276250e2f6e32eb3404d838dc3b1f1de7f Mon Sep 17 00:00:00 2001
From: wucong.lyb <wucong.lyb@alibaba-inc.com>
Date: 星期四, 29 六月 2023 17:13:24 +0800
Subject: [PATCH] update SDK_advanced.md
---
funasr/runtime/SDK_advanced.md | 214 +++++++++++++++++++++++++++++++++++++++++++++++++++++
funasr/runtime/readme.md | 2
2 files changed, 215 insertions(+), 1 deletions(-)
diff --git a/funasr/runtime/SDK_advanced.md b/funasr/runtime/SDK_advanced.md
new file mode 100644
index 0000000..d4e7464
--- /dev/null
+++ b/funasr/runtime/SDK_advanced.md
@@ -0,0 +1,214 @@
+#FunASR绂荤嚎鏂囦欢杞啓鏈嶅姟寮�鍙戞寚鍗�
+FunASR鎻愪緵鍙竴閿湰鍦版垨鑰呬簯绔湇鍔″櫒閮ㄧ讲鐨勪腑鏂囩绾挎枃浠惰浆鍐欐湇鍔★紝鍐呮牳涓篎unASR宸插紑婧恟untime-SDK銆侳unASR-runtime缁撳悎浜嗚揪鎽╅櫌璇煶瀹為獙瀹ゅ湪Modelscope绀惧尯寮�婧愮殑璇煶绔偣妫�娴�(VAD)銆丳araformer-large璇煶璇嗗埆(ASR)銆佹爣鐐规娴�(PUNC) 绛夌浉鍏宠兘鍔涳紝鍙互鍑嗙‘銆侀珮鏁堢殑瀵归煶棰戣繘琛岄珮骞跺彂杞啓銆�
+
+鏈枃妗d负FunASR绂荤嚎鏂囦欢杞啓鏈嶅姟寮�鍙戞寚鍗椼�傚鏋滄偍鎯冲揩閫熶綋楠岀绾挎枃浠惰浆鍐欐湇鍔★紝璇峰弬鑰僃unASR绂荤嚎鏂囦欢杞啓鏈嶅姟涓�閿儴缃茬ず渚嬶紙[鐐瑰嚮姝ゅ](./SDK_tutorial.md)锛夈��
+##Docker瀹夎
+涓嬭堪姝ラ涓烘墜鍔ㄥ畨瑁卍ocker鍙奷ocker闀滃儚鐨勬楠わ紝濡傛偍docker闀滃儚宸插惎鍔紝鍙互蹇界暐鏈楠わ細
+### docker鐜瀹夎
+```shell
+# Ubuntu锛�
+curl -fsSL https://test.docker.com -o test-docker.sh
+sudo sh test-docker.sh
+# Debian锛�
+curl -fsSL https://get.docker.com -o get-docker.sh
+sudo sh get-docker.sh
+# CentOS锛�
+curl -fsSL https://get.docker.com | bash -s docker --mirror Aliyun
+# MacOS锛�
+brew install --cask --appdir=/Applications docker
+```
+瀹夎璇﹁锛歨ttps://alibaba-damo-academy.github.io/FunASR/en/installation/docker.html
+### docker鍚姩
+```shell
+sudo systemctl start docker
+```
+### 闀滃儚鎷夊彇鍙婂惎鍔�
+閫氳繃涓嬭堪鍛戒护鎷夊彇骞跺惎鍔‵unASR runtime-SDK鐨刣ocker闀滃儚锛�
+```shell
+sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.0.1
+
+sudo docker run -p 10095:10095 -it --privileged=true -v /root:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.0.1
+```
+```text
+-p <瀹夸富鏈虹鍙�>:<鏄犲皠鍒癲ocker绔彛>
+濡傜ず渚嬶紝瀹夸富鏈�(ecs)绔彛10095鏄犲皠鍒癲ocker绔彛10095涓娿�傚墠鎻愭槸纭繚ecs瀹夊叏瑙勫垯鎵撳紑浜�10095绔彛銆�
+-v <瀹夸富鏈鸿矾寰�>:<鎸傝浇鑷砫ocker璺緞>
+濡傜ず渚嬶紝瀹夸富鏈鸿矾寰�/root鎸傝浇鑷砫ocker璺緞/workspace/models
+```
+
+
+##鏈嶅姟绔惎鍔�
+闀滃儚鍚姩涔嬪悗锛屽惎鍔� funasr-wss-server鏈嶅姟绋嬪簭锛�
+
+funasr-wss-server鏀寔浠嶮odelscope涓嬭浇妯″瀷锛岄渶瑕佽缃悓鏃惰缃ā鍨嬩笅杞藉湴鍧�锛�--download-model-dir锛夊強model ID锛�--model-dir銆�--vad-dir銆�--punc-dir锛�,绀轰緥濡備笅锛�
+```shell
+cd /workspace/FunASR/funasr/runtime/websocket/build/bin
+
+./funasr-wss-server \
+ --download-model-dir /workspace/models \
+ --model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx \
+ --vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx \
+ --punc-dir damo/punc_ct-transformer_zh-cn-common-vocab272727-onnx \
+ --decoder-thread-num 32 \
+ --io-thread-num 8 \
+ --port 10095 \
+ --certfile ../../../ssl_key/server.crt \
+ --keyfile ../../../ssl_key/server.key
+ ```
+```text
+--download-model-dir #妯″瀷涓嬭浇鍦板潃锛岄�氳繃璁剧疆model ID浠嶮odelscope涓嬭浇妯″瀷
+--model-dir # modelscope model ID
+--quantize # True涓洪噺鍖朅SR妯″瀷锛孎alse涓洪潪閲忓寲ASR妯″瀷锛岄粯璁ゆ槸True
+--vad-dir # modelscope model ID
+--vad-quant # True涓洪噺鍖朧AD妯″瀷锛孎alse涓洪潪閲忓寲VAD妯″瀷锛岄粯璁ゆ槸True
+--punc-dir # modelscope model ID
+--punc-quant # True涓洪噺鍖朠UNC妯″瀷锛孎alse涓洪潪閲忓寲PUNC妯″瀷锛岄粯璁ゆ槸True
+--port # 鏈嶅姟绔洃鍚殑绔彛鍙凤紝榛樿涓� 8889
+--decoder-thread-num # 鏈嶅姟绔惎鍔ㄧ殑鎺ㄧ悊绾跨▼鏁帮紝榛樿涓� 8
+--io-thread-num # 鏈嶅姟绔惎鍔ㄧ殑IO绾跨▼鏁帮紝榛樿涓� 1
+--certfile <string> # ssl鐨勮瘉涔︽枃浠讹紝榛樿涓猴細../../../ssl_key/server.crt
+--keyfile <string> # ssl鐨勫瘑閽ユ枃浠讹紝榛樿涓猴細../../../ssl_key/server.key
+```
+funasr-wss-server鍚屾椂涔熸敮鎸佷粠鏈湴璺緞鍔犺浇妯″瀷锛堟湰鍦版ā鍨嬭祫婧愬噯澶囪瑙乕妯″瀷璧勬簮鍑嗗](#anchor-1)锛夛紝闇�瑕佽缃缃ā鍨嬫湰鍦拌矾寰勶紙--download-model-dir锛夌ず渚嬪涓嬶細
+```shell
+cd /workspace/FunASR/funasr/runtime/websocket/build/bin
+
+./funasr-wss-server \
+ --model-dir /workspace/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx \
+ --vad-dir /workspace/models/damo/speech_fsmn_vad_zh-cn-16k-common-onnx \
+ --punc-dir /workspace/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-onnx \
+ --decoder-thread-num 32 \
+ --io-thread-num 8 \
+ --port 10095 \
+ --certfile ../../../ssl_key/server.crt \
+ --keyfile ../../../ssl_key/server.key
+ ```
+```text
+--model-dir # ASR妯″瀷璺緞D锛岄粯璁や负锛�/workspace/models/asr
+--quantize # True涓洪噺鍖朅SR妯″瀷锛孎alse涓洪潪閲忓寲ASR妯″瀷锛岄粯璁ゆ槸True
+--vad-dir # VAD妯″瀷璺緞锛岄粯璁や负锛�/workspace/models/vad
+--vad-quant # True涓洪噺鍖朧AD妯″瀷锛孎alse涓洪潪閲忓寲VAD妯″瀷锛岄粯璁ゆ槸True
+--punc-dir # PUNC妯″瀷璺緞锛岄粯璁や负锛�/workspace/models/punc
+--punc-quant # True涓洪噺鍖朠UNC妯″瀷锛孎alse涓洪潪閲忓寲PUNC妯″瀷锛岄粯璁ゆ槸True
+--port # 鏈嶅姟绔洃鍚殑绔彛鍙凤紝榛樿涓� 8889
+--decoder-thread-num # 鏈嶅姟绔惎鍔ㄧ殑鎺ㄧ悊绾跨▼鏁帮紝榛樿涓� 8
+--io-thread-num # 鏈嶅姟绔惎鍔ㄧ殑IO绾跨▼鏁帮紝榛樿涓� 1
+--certfile <string> # ssl鐨勮瘉涔︽枃浠讹紝榛樿涓猴細../../../ssl_key/server.crt
+--keyfile <string> # ssl鐨勫瘑閽ユ枃浠讹紝榛樿涓猴細../../../ssl_key/server.key
+```
+
+## <a id="anchor-1">妯″瀷璧勬簮鍑嗗</a>
+濡傛灉鎮ㄩ�夋嫨閫氳繃funasr-wss-server浠嶮odelscope涓嬭浇妯″瀷锛屽彲浠ヨ烦杩囨湰姝ラ銆�
+
+FunASR绂荤嚎鏂囦欢杞啓鏈嶅姟涓殑vad銆乤sr鍜宲unc妯″瀷璧勬簮鍧囨潵鑷狹odelscope锛屾ā鍨嬪湴鍧�璇﹁涓嬭〃锛�
+
+| 妯″瀷 | Modelscope閾炬帴 |
+|------|------------------------------------------------------------------------------------------------------------------|
+| VAD | https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary |
+| ASR | https://www.modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary |
+| PUNC | https://www.modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/summary |
+
+绂荤嚎鏂囦欢杞啓鏈嶅姟涓儴缃茬殑鏄噺鍖栧悗鐨凮NNX妯″瀷锛屼笅闈粙缁嶄笅濡備綍瀵煎嚭ONNX妯″瀷鍙婂叾閲忓寲锛氭偍鍙互閫夋嫨浠嶮odelscope瀵煎嚭ONNX妯″瀷銆佷粠鏈湴鏂囦欢瀵煎嚭ONNX妯″瀷鎴栬�呬粠finetune鍚庣殑璧勬簮瀵煎嚭妯″瀷锛�
+###浠嶮odelscope瀵煎嚭ONNX妯″瀷
+浠嶮odelscope缃戠珯涓嬭浇瀵瑰簲model name鐨勬ā鍨嬶紝鐒跺悗瀵煎嚭閲忓寲鍚庣殑ONNX妯″瀷锛�
+```shell
+python -m funasr.export.export_model --model-name damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir ./export --type onnx --quantize True
+```
+```text
+--model-name Modelscope涓婄殑妯″瀷鍚嶇О锛屼緥濡俤amo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
+--export-dir ONNX妯″瀷瀵煎嚭鍦板潃
+--type 妯″瀷绫诲瀷锛岀洰鍓嶆敮鎸� ONNX銆乼orch
+--quantize int8妯″瀷閲忓寲
+```
+
+###浠庢湰鍦版枃浠跺鍑篛NNX妯″瀷
+璁剧疆model name涓烘ā鍨嬫湰鍦拌矾寰勶紝瀵煎嚭閲忓寲鍚庣殑ONNX妯″瀷锛�
+```shell
+python -m funasr.export.export_model --model-name /workspace/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir ./export --type onnx --quantize True
+```
+```text
+--model-name 妯″瀷鏈湴璺緞锛屼緥濡�/workspace/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
+--export-dir ONNX妯″瀷瀵煎嚭鍦板潃
+--type 妯″瀷绫诲瀷锛岀洰鍓嶆敮鎸� ONNX銆乼orch
+--quantize int8妯″瀷閲忓寲
+```
+###浠巉inetune鍚庣殑璧勬簮瀵煎嚭妯″瀷
+鍋囧鎮ㄦ兂閮ㄧ讲finetune鍚庣殑妯″瀷锛屽彲浠ュ弬鑰冨涓嬫楠わ細
+
+灏嗘偍finetune鍚庨渶瑕侀儴缃茬殑妯″瀷锛堜緥濡�10epoch.pb锛夛紝閲嶅懡鍚嶄负model.pb锛屽苟灏嗗師modelscope涓ā鍨媘odel.pb鏇挎崲鎺夛紝鍋囧鏇挎崲鍚庣殑妯″瀷璺緞涓�/path/to/finetune/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch锛岄�氳繃涓嬭堪鍛戒护鎶奻inetune鍚庣殑妯″瀷杞垚onnx妯″瀷锛�
+```shell
+python -m funasr.export.export_model --model-name /path/to/finetune/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir ./export --type onnx --quantize True
+```
+
+##瀹㈡埛绔惎鍔�
+鍦ㄦ湇鍔″櫒涓婂畬鎴怓unASR绂荤嚎鏂囦欢杞啓鏈嶅姟閮ㄧ讲浠ュ悗锛屽彲浠ラ�氳繃濡備笅鐨勬楠ゆ潵娴嬭瘯鍜屼娇鐢ㄧ绾挎枃浠惰浆鍐欐湇鍔°�傜洰鍓岶unASR-bin鏀寔澶氱鏂瑰紡鍚姩瀹㈡埛绔紝濡備笅鏄熀浜巔ython-client銆乧++-client鐨勫懡浠よ瀹炰緥鍙婅嚜瀹氫箟瀹㈡埛绔疻ebsocket閫氫俊鍗忚锛�
+###python-client
+```shell
+python wss_client_asr.py --host "127.0.0.1" --port 10095 --mode offline --audio_in "./data/wav.scp" --send_without_sleep --output_dir "./results"
+```
+```text
+--host # 鏈嶅姟绔痠p鍦板潃锛屾湰鏈烘祴璇曞彲璁剧疆涓� 127.0.0.1
+--port # 鏈嶅姟绔洃鍚鍙e彿
+--audio_in # 闊抽杈撳叆锛岃緭鍏ュ彲浠ユ槸锛歸av璺緞 鎴栬�� wav.scp璺緞锛坘aldi鏍煎紡鐨剋av list锛寃av_id \t wav_path锛�
+--output_dir # 璇嗗埆缁撴灉杈撳嚭璺緞
+--ssl # 鏄惁浣跨敤SSL鍔犲瘑锛岄粯璁や娇鐢�
+--mode # offline妯″紡
+```
+
+###c++-client锛�
+```shell
+. /funasr-wss-client --server-ip 127.0.0.1 --port 10095 --wav-path test.wav --thread-num 1 --is-ssl 1
+```
+```text
+--server-ip # 鏈嶅姟绔痠p鍦板潃锛屾湰鏈烘祴璇曞彲璁剧疆涓� 127.0.0.1
+--port # 鏈嶅姟绔洃鍚鍙e彿
+--wav-path # 闊抽杈撳叆锛岃緭鍏ュ彲浠ユ槸锛歸av璺緞 鎴栬�� wav.scp璺緞锛坘aldi鏍煎紡鐨剋av list锛寃av_id \t wav_path锛�
+--thread-num # 瀹㈡埛绔嚎绋嬫暟
+--is-ssl # 鏄惁浣跨敤SSL鍔犲瘑锛岄粯璁や娇鐢�
+```
+
+###鑷畾涔夊鎴风锛�
+濡傛灉鎮ㄦ兂瀹氫箟鑷繁鐨刢lient锛寃ebsocket閫氫俊鍗忚涓猴細
+```text
+# 棣栨閫氫俊
+{"mode": "offline", "wav_name": wav_name, "is_speaking": True}
+# 鍙戦�亀av鏁版嵁
+bytes鏁版嵁
+# 鍙戦�佺粨鏉熸爣蹇�
+{"is_speaking": False}
+```
+
+##濡備綍瀹氬埗鏈嶅姟閮ㄧ讲
+FunASR-runtime鐨勪唬鐮佸凡寮�婧愶紝濡傛灉鏈嶅姟绔拰瀹㈡埛绔笉鑳藉緢濂界殑婊¤冻鎮ㄧ殑闇�姹傦紝鎮ㄥ彲浠ユ牴鎹嚜宸辩殑闇�姹傝繘琛岃繘涓�姝ョ殑寮�鍙戯細
+###c++ 瀹㈡埛绔細
+https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/websocket
+###python 瀹㈡埛绔細
+https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/websocket
+###c++ 鏈嶅姟绔細
+####VAD
+```c++
+// VAD妯″瀷鐨勪娇鐢ㄥ垎涓篎smnVadInit鍜孎smnVadInfer涓や釜姝ラ锛�
+FUNASR_HANDLE vad_hanlde=FsmnVadInit(model_path, thread_num);
+// 鍏朵腑锛歮odel_path 鍖呭惈"model-dir"銆�"quantize"锛宼hread_num涓簅nnx绾跨▼鏁帮紱
+FUNASR_RESULT result=FsmnVadInfer(vad_hanlde, wav_file.c_str(), NULL, 16000);
+// 鍏朵腑锛歷ad_hanlde涓篎unOfflineInit杩斿洖鍊硷紝wav_file涓洪煶棰戣矾寰勶紝sampling_rate涓洪噰鏍风巼(榛樿16k)
+```
+浣跨敤绀轰緥璇﹁锛歨ttps://github.com/alibaba-damo-academy/FunASR/blob/main/funasr/runtime/onnxruntime/bin/funasr-onnx-offline-vad.cpp
+####ASR
+```text
+// ASR妯″瀷鐨勪娇鐢ㄥ垎涓篎unOfflineInit鍜孎unOfflineInfer涓や釜姝ラ锛�
+FUNASR_HANDLE asr_hanlde=FunOfflineInit(model_path, thread_num);
+// 鍏朵腑锛歮odel_path 鍖呭惈"model-dir"銆�"quantize"锛宼hread_num涓簅nnx绾跨▼鏁帮紱
+FUNASR_RESULT result=FunOfflineInfer(asr_hanlde, wav_file.c_str(), RASR_NONE, NULL, 16000);
+// 鍏朵腑锛歛sr_hanlde涓篎unOfflineInit杩斿洖鍊硷紝wav_file涓洪煶棰戣矾寰勶紝sampling_rate涓洪噰鏍风巼(榛樿16k)
+```
+浣跨敤绀轰緥璇﹁锛歨ttps://github.com/alibaba-damo-academy/FunASR/blob/main/funasr/runtime/onnxruntime/bin/funasr-onnx-offline.cpp
+####PUNC
+```text
+// PUNC妯″瀷鐨勪娇鐢ㄥ垎涓篊TTransformerInit鍜孋TTransformerInfer涓や釜姝ラ锛�
+FUNASR_HANDLE punc_hanlde=CTTransformerInit(model_path, thread_num);
+// 鍏朵腑锛歮odel_path 鍖呭惈"model-dir"銆�"quantize"锛宼hread_num涓簅nnx绾跨▼鏁帮紱
+FUNASR_RESULT result=CTTransformerInfer(punc_hanlde, txt_str.c_str(), RASR_NONE, NULL);
+// 鍏朵腑锛歱unc_hanlde涓篊TTransformerInit杩斿洖鍊硷紝txt_str涓烘枃鏈�
+```
+浣跨敤绀轰緥璇﹁锛歨ttps://github.com/alibaba-damo-academy/FunASR/blob/main/funasr/runtime/onnxruntime/bin/funasr-onnx-offline-punc.cpp
\ No newline at end of file
diff --git a/funasr/runtime/readme.md b/funasr/runtime/readme.md
index 52bee72..11095a7 100644
--- a/funasr/runtime/readme.md
+++ b/funasr/runtime/readme.md
@@ -7,4 +7,4 @@
璇ユ暀绋嬩富瑕侀拡瀵瑰皬鐧界敤鎴凤紝鏃犱慨鏀瑰畾鍒堕渶姹傦紝鏀寔浠巑odelscope涓笅杞芥ā鍨嬮儴缃诧紝涔熸敮鎸佺敤鎴穎inetune鍚庣殑妯″瀷閮ㄧ讲锛岃缁嗘暀绋嬪弬鑰冿紙[鐐瑰嚮姝ゅ](./SDK_tutorial.md)锛�
## Runtime-SDK 楂橀樁寮�鍙戞寚鍗�
-璇ユ暀绋嬩富瑕侀拡瀵归珮闃跺紑鍙戣�咃紝闇�瑕佸鏈嶅姟杩涜淇敼涓庡畾鍒讹紝鏀寔浠巑odelscope涓笅杞芥ā鍨嬮儴缃诧紝涔熸敮鎸佺敤鎴穎inetune鍚庣殑妯″瀷閮ㄧ讲锛岃缁嗘暀绋嬪弬鑰冿紙[鐐瑰嚮姝ゅ]()锛�
+璇ユ暀绋嬩富瑕侀拡瀵归珮闃跺紑鍙戣�咃紝闇�瑕佸鏈嶅姟杩涜淇敼涓庡畾鍒讹紝鏀寔浠巑odelscope涓笅杞芥ā鍨嬮儴缃诧紝涔熸敮鎸佺敤鎴穎inetune鍚庣殑妯″瀷閮ㄧ讲锛岃缁嗘暀绋嬪弬鑰冿紙[鐐瑰嚮姝ゅ](./SDK_advanced.md)锛�
--
Gitblit v1.9.1