From 1d1ef01b4e23630a99a3be7e9d1dce9550a793e9 Mon Sep 17 00:00:00 2001 From: yhliang <68215459+yhliang-aslp@users.noreply.github.com> Date: 星期四, 11 五月 2023 16:26:24 +0800 Subject: [PATCH] Merge branch 'main' into dev_smohan --- funasr/runtime/python/websocket/README.md | 107 ++++++++++++++++++++++++++++++++++++++++++----------- 1 files changed, 85 insertions(+), 22 deletions(-) diff --git a/funasr/runtime/python/websocket/README.md b/funasr/runtime/python/websocket/README.md index ee7dca0..7ca5730 100644 --- a/funasr/runtime/python/websocket/README.md +++ b/funasr/runtime/python/websocket/README.md @@ -1,6 +1,6 @@ # Service with websocket-python -This is a demo using funasr pipeline with websocket python-api. +This is a demo using funasr pipeline with websocket python-api. It supports the offline, online, offline/online-2pass unifying speech recognition. ## For the Server @@ -22,24 +22,49 @@ ### Start server #### ASR offline server - -[//]: # (```shell) - -[//]: # (python ws_server_online.py --host "0.0.0.0" --port 10095 --asr_model "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch") - -[//]: # (```) -#### ASR streaming server +##### API-reference ```shell -python ws_server_online.py --host "0.0.0.0" --port 10095 --asr_model_online "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online" +python ws_server_offline.py \ +--port [port id] \ +--asr_model [asr model_name] \ +--punc_model [punc model_name] \ +--ngpu [0 or 1] \ +--ncpu [1 or 4] +``` +##### Usage examples +```shell +python ws_server_offline.py --port 10095 --asr_model "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" +``` + +#### ASR streaming server +##### API-reference +```shell +python ws_server_online.py \ +--port [port id] \ +--asr_model_online [asr model_name] \ +--ngpu [0 or 1] \ +--ncpu [1 or 4] +``` +##### Usage examples +```shell +python ws_server_online.py --port 10095 --asr_model_online "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online" ``` #### ASR offline/online 2pass server - -[//]: # (```shell) - -[//]: # (python ws_server_online.py --host "0.0.0.0" --port 10095 --asr_model "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch") - -[//]: # (```) +##### API-reference +```shell +python ws_server_2pass.py \ +--port [port id] \ +--asr_model [asr model_name] \ +--asr_model_online [asr model_name] \ +--punc_model [punc model_name] \ +--ngpu [0 or 1] \ +--ncpu [1 or 4] +``` +##### Usage examples +```shell +python ws_server_2pass.py --port 10095 --asr_model "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" --asr_model_online "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online" +``` ## For the client @@ -51,17 +76,55 @@ ``` ### Start client -#### Recording from mircrophone +#### API-reference ```shell -# --chunk_size, "5,10,5"=600ms, "8,8,4"=480ms -python ws_client.py --host "127.0.0.1" --port 10095 --chunk_size "5,10,5" +python ws_client.py \ +--host [ip_address] \ +--port [port id] \ +--chunk_size ["5,10,5"=600ms, "8,8,4"=480ms] \ +--chunk_interval [duration of send chunk_size/chunk_interval] \ +--words_max_print [max number of words to print] \ +--audio_in [if set, loadding from wav.scp, else recording from mircrophone] \ +--output_dir [if set, write the results to output_dir] \ +--send_without_sleep [only set for offline] ``` -#### Loadding from wav.scp(kaldi style) +#### Usage examples +##### ASR offline client +Recording from mircrophone ```shell -# --chunk_size, "5,10,5"=600ms, "8,8,4"=480ms -python ws_client.py --host "127.0.0.1" --port 10095 --chunk_size "5,10,5" --audio_in "./data/wav.scp" +# --chunk_interval, "10": 600/10=60ms, "5"=600/5=120ms, "20": 600/12=30ms +python ws_client.py --host "0.0.0.0" --port 10095 --chunk_interval 10 --words_max_print 100 +``` +Loadding from wav.scp(kaldi style) +```shell +# --chunk_interval, "10": 600/10=60ms, "5"=600/5=120ms, "20": 600/12=30ms +python ws_client.py --host "0.0.0.0" --port 10095 --chunk_interval 10 --words_max_print 100 --audio_in "./data/wav.scp" --send_without_sleep --output_dir "./results" ``` +##### ASR streaming client +Recording from mircrophone +```shell +# --chunk_size, "5,10,5"=600ms, "8,8,4"=480ms +python ws_client.py --host "0.0.0.0" --port 10095 --chunk_size "5,10,5" --words_max_print 100 +``` +Loadding from wav.scp(kaldi style) +```shell +# --chunk_size, "5,10,5"=600ms, "8,8,4"=480ms +python ws_client.py --host "0.0.0.0" --port 10095 --chunk_size "5,10,5" --audio_in "./data/wav.scp" --words_max_print 100 --output_dir "./results" +``` + +##### ASR offline/online 2pass client +Recording from mircrophone +```shell +# --chunk_size, "5,10,5"=600ms, "8,8,4"=480ms +python ws_client.py --host "0.0.0.0" --port 10095 --chunk_size "8,8,4" --words_max_print 10000 +``` +Loadding from wav.scp(kaldi style) +```shell +# --chunk_size, "5,10,5"=600ms, "8,8,4"=480ms +python ws_client.py --host "0.0.0.0" --port 10095 --chunk_size "8,8,4" --audio_in "./data/wav.scp" --words_max_print 10000 --output_dir "./results" +``` ## Acknowledge 1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR). -2. We acknowledge [cgisky1980](https://github.com/cgisky1980/FunASR) for contributing the websocket service. +2. We acknowledge [zhaoming](https://github.com/zhaomingwork/FunASR/tree/fix_bug_for_python_websocket) for contributing the websocket service. +3. We acknowledge [cgisky1980](https://github.com/cgisky1980/FunASR) for contributing the websocket service of offline model. -- Gitblit v1.9.1