From 000ec7e6309c024f31effccb3fb0cf2ebec97cc8 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 08 八月 2023 19:59:01 +0800
Subject: [PATCH] Merge branch 'main' of github.com:alibaba-damo-academy/FunASR add

---
 funasr/runtime/docs/SDK_advanced_guide_online.md     |   75 +++++++++++++++++++++++++++++++++++++
 funasr/runtime/websocket/funasr-wss-server-2pass.cpp |    6 +-
 README.md                                            |    6 +++
 3 files changed, 83 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 24e62e7..5b62d3d 100644
--- a/README.md
+++ b/README.md
@@ -87,6 +87,12 @@
   year={2023},
   booktitle={INTERSPEECH},
 }
+@inproceedings{wang2023told,
+  author={Jiaming Wang and Zhihao Du and Shiliang Zhang},
+  title={{TOLD:} {A} Novel Two-Stage Overlap-Aware Framework for Speaker Diarization},
+  year={2023},
+  booktitle={ICASSP},
+}
 @inproceedings{gao22b_interspeech,
   author={Zhifu Gao and ShiLiang Zhang and Ian McLoughlin and Zhijie Yan},
   title={{Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive End-to-End Speech Recognition}},
diff --git a/funasr/runtime/docs/SDK_advanced_guide_online.md b/funasr/runtime/docs/SDK_advanced_guide_online.md
index 4e3e5ad..7e478cb 100644
--- a/funasr/runtime/docs/SDK_advanced_guide_online.md
+++ b/funasr/runtime/docs/SDK_advanced_guide_online.md
@@ -1,7 +1,80 @@
  # Advanced Development Guide (File transcription service)
  
 FunASR provides a Chinese online transcription service that can be deployed locally or on a cloud server with just one click. The core of the service is the FunASR runtime SDK, which has been open-sourced. FunASR-runtime combines various capabilities such as speech endpoint detection (VAD), offline large-scale speech recognition (ASR) using Paraformer-large, online large-scale speech recognition (ASR) using Paraformer-large, and punctuation detection (PUNC), which have all been open-sourced by the speech laboratory of DAMO Academy on the Modelscope community. 
-This document serves as a development guide for the FunASR online transcription service. If you wish to quickly experience the online transcription service, please refer to the one-click deployment example for the FunASR online transcription service ([docs](./SDK_tutorial_online.md)).
+This document serves as a development guide for the FunASR online transcription service. If you wish to quickly experience the online transcription service, please refer to the one-click deployment example for the FunASR online transcription service [Quick Start](#Quick Start)銆�
+
+### 闀滃儚鍚姩
+
+閫氳繃涓嬭堪鍛戒护鎷夊彇骞跺惎鍔‵unASR杞欢鍖呯殑docker闀滃儚锛�
+
+```shell
+sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.0
+mkdir -p ./funasr-runtime-resources/models
+sudo docker run -p 10095:10095 -it --privileged=true -v ./funasr-runtime-resources/models:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.0
+```
+濡傛灉鎮ㄦ病鏈夊畨瑁卍ocker锛屽彲鍙傝�僛Docker瀹夎](https://alibaba-damo-academy.github.io/FunASR/en/installation/docker_zh.html)
+
+### 鏈嶅姟绔惎鍔�
+
+docker鍚姩涔嬪悗锛屽惎鍔� funasr-wss-server-2pass鏈嶅姟绋嬪簭锛�
+```shell
+cd FunASR/funasr/runtime
+./run_server_2pass.sh \
+  --download-model-dir /workspace/models \
+  --vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx \
+  --model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx  \
+  --online-model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online-onnx  \
+  --punc-dir damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727-onnx
+```
+鏈嶅姟绔缁嗗弬鏁颁粙缁嶅彲鍙傝�僛鏈嶅姟绔弬鏁颁粙缁峕(#鏈嶅姟绔弬鏁颁粙缁�)
+### 瀹㈡埛绔祴璇曚笌浣跨敤
+
+涓嬭浇瀹㈡埛绔祴璇曞伐鍏风洰褰晄amples
+```shell
+wget https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/sample/funasr_samples.tar.gz
+```
+鎴戜滑浠ython璇█瀹㈡埛绔负渚嬶紝杩涜璇存槑锛屾敮鎸侀煶棰戞牸寮忥紙.wav, .pcm锛夛紝浠ュ強澶氭枃浠跺垪琛╳av.scp杈撳叆锛屽叾浠栫増鏈鎴风璇峰弬鑰冩枃妗o紙[鐐瑰嚮姝ゅ](#瀹㈡埛绔敤娉曡瑙�)锛夛紝瀹氬埗鏈嶅姟閮ㄧ讲璇峰弬鑰僛濡備綍瀹氬埗鏈嶅姟閮ㄧ讲](#濡備綍瀹氬埗鏈嶅姟閮ㄧ讲)
+```shell
+python3 wss_client_asr.py --host "127.0.0.1" --port 10095 --mode 2pass
+```
+
+
+## Quick Start
+
+### Server Startup
+
+pull and run docker image:
+
+```shell
+sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.0
+mkdir -p ./funasr-runtime-resources/models
+sudo docker run -p 10095:10095 -it --privileged=true -v ./funasr-runtime-resources/models:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.0
+```
+
+start funasr-wss-server-2pass锛�
+```shell
+cd FunASR/funasr/runtime
+./run_server_2pass.sh \
+  --download-model-dir /workspace/models \
+  --vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx \
+  --model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx  \
+  --online-model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online-onnx  \
+  --punc-dir damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727-onnx
+```
+
+
+
+### Client Testing and Usage
+
+After running the above installation instructions, the client testing tool directory samples will be downloaded in the default installation directory /root/funasr-runtime-resources ([download click](https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/sample/funasr_samples.tar.gz)).
+We take the Python language client as an example to explain that it supports multiple audio format inputs (such as .wav, .pcm, .mp3, etc.), video inputs (.mp4, etc.), and multiple file list wav.scp inputs. For other client versions, please refer to the [documentation](#Detailed-Description-of-Client-Usage).
+
+```shell
+python3 funasr_wss_client.py --host "127.0.0.1" --port 10095 --mode 2pass --audio_in "../audio/asr_example.wav"
+```
+
+
+
 
 ## Installation of Docker
 
diff --git a/funasr/runtime/websocket/funasr-wss-server-2pass.cpp b/funasr/runtime/websocket/funasr-wss-server-2pass.cpp
index 22b4587..835cd8e 100644
--- a/funasr/runtime/websocket/funasr-wss-server-2pass.cpp
+++ b/funasr/runtime/websocket/funasr-wss-server-2pass.cpp
@@ -154,7 +154,7 @@
       std::string python_cmd =
           "python -m funasr.utils.runtime_sdk_download_tool --type onnx --quantize True ";
 
-      if (vad_dir.isSet() && !s_vad_path.empty()) {
+      if (!s_vad_path.empty()) {
         std::string python_cmd_vad;
         std::string down_vad_path;
         std::string down_vad_model;
@@ -200,7 +200,7 @@
         LOG(INFO) << "VAD model is not set, use default.";
       }
 
-      if (offline_model_dir.isSet() && !s_offline_asr_path.empty()) {
+      if (!s_offline_asr_path.empty()) {
         std::string python_cmd_asr;
         std::string down_asr_path;
         std::string down_asr_model;
@@ -288,7 +288,7 @@
         LOG(INFO) << "ASR online model is not set, use default.";
       }
 
-      if (punc_dir.isSet() && !s_punc_path.empty()) {
+      if (!s_punc_path.empty()) {
         std::string python_cmd_punc;
         std::string down_punc_path;
         std::string down_punc_model;

--
Gitblit v1.9.1