From 94de39dde2e616a01683c518023d0fab72b4e103 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 19 二月 2024 22:21:50 +0800
Subject: [PATCH] aishell example

---
 runtime/docs/SDK_advanced_guide_online.md |   32 ++++++++++++++++++--------------
 1 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/runtime/docs/SDK_advanced_guide_online.md b/runtime/docs/SDK_advanced_guide_online.md
index 506c7fe..ba3b92b 100644
--- a/runtime/docs/SDK_advanced_guide_online.md
+++ b/runtime/docs/SDK_advanced_guide_online.md
@@ -1,11 +1,15 @@
 # Real-time Speech Transcription Service Development Guide
+([绠�浣撲腑鏂嘳(SDK_advanced_guide_online_zh.md)|English)
 
-FunASR provides a real-time speech transcription service that can be easily deployed on local or cloud servers, with the FunASR runtime-SDK as the core. It integrates the speech endpoint detection (VAD), Paraformer-large non-streaming speech recognition (ASR), Paraformer-large streaming speech recognition (ASR), punctuation (PUNC), and other related capabilities open-sourced by the speech laboratory of DAMO Academy on the Modelscope community. The software package can perform real-time speech-to-text transcription, and can also accurately transcribe text at the end of sentences for high-precision output. The output text contains punctuation and supports high-concurrency multi-channel requests.
+[//]: # (FunASR provides a real-time speech transcription service that can be easily deployed on local or cloud servers, with the FunASR runtime-SDK as the core. It integrates the speech endpoint detection &#40;VAD&#41;, Paraformer-large non-streaming speech recognition &#40;ASR&#41;, Paraformer-large streaming speech recognition &#40;ASR&#41;, punctuation &#40;PUNC&#41;, and other related capabilities open-sourced by the speech laboratory of DAMO Academy on the Modelscope community. The software package can perform real-time speech-to-text transcription, and can also accurately transcribe text at the end of sentences for high-precision output. The output text contains punctuation and supports high-concurrency multi-channel requests.)
+FunASR Real-time Speech Recognition Software Package integrates real-time versions of speech endpoint detection model, speech recognition model, punctuation prediction model, and so on. By using multiple models collaboratively, it can perform real-time speech-to-text conversion, as well as high-precision transcription correction at the end of a sentence, with punctuation included in the output text. It supports multiple concurrent requests. Depending on the user's scenarios, it supports three service modes: real-time speech recognition service (online), non-real-time single-sentence transcription (offline), and real-time and non-real-time integrated collaboration (2pass). The software package provides client libraries in various programming languages such as HTML, Python, C++, Java, and C#, allowing users to use and further develop the software.
 
 <img src="images/online_structure.png"  width="900"/>
 
 | TIME       | INFO                                                                                | IMAGE VERSION                       | IMAGE ID     |
 |------------|-------------------------------------------------------------------------------------|-------------------------------------|--------------|
+| 2024.01.25 | Optimization of the client-side | funasr-runtime-sdk-online-cpu-0.1.7  | 2aa23805572e      |
+| 2024.01.03 | The 2pass-offline mode supports Ngram language model decoding and WFST hotwords, while also addressing known crash issues and memory leak problems | funasr-runtime-sdk-online-cpu-0.1.6  | f99925110d27      |
 | 2023.11.09 | fix bug: without online results                                                     | funasr-runtime-sdk-online-cpu-0.1.5 | b16584b6d38b      |
 | 2023.11.08 | supporting server-side loading of hotwords, adaptation to runtime structure changes | funasr-runtime-sdk-online-cpu-0.1.4 | 691974017c38 |
 | 2023.09.19 | supporting hotwords, timestamps, and ITN model in 2pass mode                        | funasr-runtime-sdk-online-cpu-0.1.2 | 7222c5319bcf |
@@ -24,9 +28,9 @@
 ### Pull Docker Image
 Use the following command to pull and start the FunASR software package docker image:
 ```shell
-sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.5
+sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.8
 mkdir -p ./funasr-runtime-resources/models
-sudo docker run -p 10096:10095 -it --privileged=true -v $PWD/funasr-runtime-resources/models:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.5
+sudo docker run -p 10096:10095 -it --privileged=true -v $PWD/funasr-runtime-resources/models:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.8
 ```
 
 ### Launching the Server
@@ -40,7 +44,8 @@
   --model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx  \
   --online-model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online-onnx  \
   --punc-dir damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727-onnx \
-  --itn-dir thuduj12/fst_itn_zh > log.out 2>&1 &
+  --lm-dir damo/speech_ngram_lm_zh-cn-ai-wesp-fst \
+  --itn-dir thuduj12/fst_itn_zh > log.txt 2>&1 &
 
 # If you want to close ssl锛宲lease add锛�--certfile 0
 ```
@@ -78,17 +83,15 @@
 ```shell
 cd /workspace/FunASR/runtime
 nohup bash run_server_2pass.sh \
-  --model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx \
+  --model-dir damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx \
   --online-model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online-onnx \
   --vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx \
   --punc-dir damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727-onnx \
+  --lm-dir damo/speech_ngram_lm_zh-cn-ai-wesp-fst \
   --itn-dir thuduj12/fst_itn_zh \
-  --decoder-thread-num 32 \
-  --io-thread-num  8 \
-  --port 10095 \
   --certfile  ../../../ssl_key/server.crt \
   --keyfile ../../../ssl_key/server.key \
-  --hotword ../../hotwords.txt > log.out 2>&1 &
+  --hotword ../../hotwords.txt > log.txt 2>&1 &
 
 # If you want to close ssl锛宲lease add锛�--certfile 0
 # If you want to deploy the timestamp or nn hotword model, please set --model-dir to the corresponding model:
@@ -102,15 +105,16 @@
 --download-model-dir: Model download address, download models from Modelscope by setting the model ID.
 --model-dir: modelscope model ID or local model path.
 --online-model-dir modelscope model ID
---quantize: True for quantized ASR model, False for non-quantized ASR model. Default is True.
 --vad-dir: modelscope model ID or local model path.
---vad-quant: True for quantized VAD model, False for non-quantized VAD model. Default is True.
 --punc-dir: modelscope model ID or local model path.
---punc-quant: True for quantized PUNC model, False for non-quantized PUNC model. Default is True.
+--lm-dir modelscope model ID or local model path.
 --itn-dir modelscope model ID or local model path.
 --port: Port number that the server listens on. Default is 10095.
---decoder-thread-num: Number of inference threads that the server starts. Default is 8.
---io-thread-num: Number of IO threads that the server starts. Default is 1.
+--decoder-thread-num: The number of thread pools on the server side that can handle concurrent requests.
+                      The script will automatically configure parameters decoder-thread-num and io-thread-num based on the server's thread count.
+--io-thread-num: Number of IO threads that the server starts.
+--model-thread-num: The number of internal threads for each recognition route to control the parallelism of the ONNX model. 
+        The default value is 1. It is recommended that decoder-thread-num * model-thread-num equals the total number of threads.
 --certfile <string>: SSL certificate file. Default is ../../../ssl_key/server.crt. If you want to close ssl锛宻et 0
 --keyfile <string>: SSL key file. Default is ../../../ssl_key/server.key. 
 --hotword: Hotword file path, one line for each hotword(e.g.:闃块噷宸村反 20), if the client provides hot words, then combined with the hot words provided by the client.

--
Gitblit v1.9.1