From eee6af2ece605035b0a0835eb9dbed5ae872c755 Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期五, 02 六月 2023 22:08:10 +0800
Subject: [PATCH] Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into main
---
fun_text_processing/inverse_text_normalization/id/taggers/cardinal.py | 15 +++----
egs_modelscope/tp/TEMPLATE/README.md | 2
funasr/runtime/html5/static/wsconnecter.js | 7 +++
funasr/runtime/python/websocket/wss_srv_asr.py | 27 ++++++++++++-
egs_modelscope/asr/TEMPLATE/README.md | 21 ++++++++++
egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py | 1
egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py | 1
egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py | 3 +
fun_text_processing/inverse_text_normalization/id/id_unit_test.tsv | 16 ++++----
egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py | 3 +
funasr/runtime/html5/static/main.js | 4 +
README.md | 1
12 files changed, 74 insertions(+), 27 deletions(-)
diff --git a/README.md b/README.md
index c31d616..7c289e0 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,6 @@
| [**Highlights**](#highlights)
| [**Installation**](#installation)
| [**Docs**](https://alibaba-damo-academy.github.io/FunASR/en/index.html)
-| [**Tutorial_CN**](https://github.com/alibaba-damo-academy/FunASR/wiki#funasr%E7%94%A8%E6%88%B7%E6%89%8B%E5%86%8C)
| [**Papers**](https://github.com/alibaba-damo-academy/FunASR#citations)
| [**Runtime**](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime)
| [**Model Zoo**](https://github.com/alibaba-damo-academy/FunASR/blob/main/docs/model_zoo/modelscope_models.md)
diff --git a/egs_modelscope/asr/TEMPLATE/README.md b/egs_modelscope/asr/TEMPLATE/README.md
index 2c54333..b938e34 100644
--- a/egs_modelscope/asr/TEMPLATE/README.md
+++ b/egs_modelscope/asr/TEMPLATE/README.md
@@ -20,11 +20,13 @@
print(rec_result)
```
#### [Paraformer-online Model](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary)
+##### Streaming Decoding
```python
inference_pipeline = pipeline(
task=Tasks.auto_speech_recognition,
model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
- model_revision='v1.0.6',
+ model_revision='v1.0.4',
+ update_model='v1.0.4',
mode='paraformer_streaming'
)
import soundfile
@@ -42,6 +44,23 @@
rec_result = inference_pipeline(audio_in=speech_chunk, param_dict=param_dict)
print(rec_result)
```
+
+##### Fake Streaming Decoding
+```python
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+
+inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
+ model_revision='v1.0.6',
+ update_model='v1.0.6',
+ mode="paraformer_fake_streaming"
+)
+audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'
+rec_result = inference_pipeline(audio_in=audio_in)
+print(rec_result)
+```
Full code of demo, please ref to [demo](https://github.com/alibaba-damo-academy/FunASR/discussions/241)
#### [UniASR Model](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/summary)
diff --git a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
index 283f529..5fa98e5 100644
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
@@ -5,6 +5,7 @@
task=Tasks.auto_speech_recognition,
model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
model_revision='v1.0.6',
+ update_model='v1.0.6',
mode="paraformer_fake_streaming"
)
audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'
diff --git a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py
index 9d49d7d..77f7939 100644
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py
@@ -14,7 +14,8 @@
inference_pipeline = pipeline(
task=Tasks.auto_speech_recognition,
model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
- model_revision='v1.0.6',
+ model_revision='v1.0.4',
+ update_model='v1.0.4',
mode="paraformer_streaming"
)
diff --git a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
index 5fa417b..869ec0f 100644
--- a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
@@ -5,6 +5,7 @@
task=Tasks.auto_speech_recognition,
model='damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online',
model_revision='v1.0.6',
+ update_model='v1.0.6',
mode="paraformer_fake_streaming"
)
audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'
diff --git a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py
index d1dd441..45a2f96 100644
--- a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py
@@ -14,7 +14,8 @@
inference_pipeline = pipeline(
task=Tasks.auto_speech_recognition,
model='damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online',
- model_revision='v1.0.6',
+ model_revision='v1.0.4',
+ update_model='v1.0.4',
mode="paraformer_streaming"
)
diff --git a/egs_modelscope/tp/TEMPLATE/README.md b/egs_modelscope/tp/TEMPLATE/README.md
index 7cc8508..3c7129f 100644
--- a/egs_modelscope/tp/TEMPLATE/README.md
+++ b/egs_modelscope/tp/TEMPLATE/README.md
@@ -11,7 +11,7 @@
inference_pipeline = pipeline(
task=Tasks.speech_timestamp,
model='damo/speech_timestamp_prediction-v1-16k-offline',
- output_dir=None)
+ model_revision='v1.1.0')
rec_result = inference_pipeline(
audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_timestamps.wav',
diff --git a/fun_text_processing/inverse_text_normalization/id/id_unit_test.tsv b/fun_text_processing/inverse_text_normalization/id/id_unit_test.tsv
index 662bf04..49eda5f 100644
--- a/fun_text_processing/inverse_text_normalization/id/id_unit_test.tsv
+++ b/fun_text_processing/inverse_text_normalization/id/id_unit_test.tsv
@@ -1,10 +1,10 @@
dua ribu dua puluh dua 2022
-tiga ribu 300
+tiga ribu 3000
sembilan ribu sembilan ratus sembilan puluh sembilan 9999
-seribu satu 100001
-ribu 100
+seribu satu 1001
+ribu 1000
seribu 1000
-seribu dua ratus delapan puluh sembilan 10289
+seribu dua ratus delapan puluh sembilan 1289
ribu dua ratus delapan puluh sembilan 1289
nol satu dua tiga empat lima enam tujuh delapan sembilan 01 2345-6789
empat belas 14
@@ -22,8 +22,8 @@
seratus dua puluh tiga 123
ratus dua puluh tiga 123
dua puluh empat maret 24 maret
-ribu tujuh puluh enam 10076
-seribu tujuh puluh enam 100076
-ribu tujuh puluh enam rupiah 10076 rupiah
+ribu tujuh puluh enam 1076
+seribu tujuh puluh enam 1076
+ribu tujuh puluh enam rupiah 1076 rupiah
tujuh puluh enam 76
-ditambah enam dua dua satu enam lima tiga sembilan nol enam nol lima +62 21 6539-0605
\ No newline at end of file
+ditambah enam dua dua satu enam lima tiga sembilan nol enam nol lima +62 21 6539-0605
diff --git a/fun_text_processing/inverse_text_normalization/id/taggers/cardinal.py b/fun_text_processing/inverse_text_normalization/id/taggers/cardinal.py
index 539acbc..d2f1a77 100644
--- a/fun_text_processing/inverse_text_normalization/id/taggers/cardinal.py
+++ b/fun_text_processing/inverse_text_normalization/id/taggers/cardinal.py
@@ -26,11 +26,10 @@
graph_teen = pynini.string_file(get_abs_path("data/numbers/teen.tsv"))
graph_hundreds = pynini.string_file(get_abs_path("data/numbers/hundreds.tsv"))
graph_thousand = pynini.string_file(get_abs_path("data/numbers/thousand.tsv"))
-
- graph_cents = pynini.cross("seratus", "100") | pynini.cross("ratus", "100") | pynini.union(graph_hundreds, pynutil.insert("0"))
+
graph_hundred = pynini.cross("ratus", "") | pynini.cross("seratus", "")
- graph_hundred_component = pynini.union(graph_digit + delete_space + graph_hundred, pynutil.insert("00"))
+ graph_hundred_component = pynini.union(graph_digit + delete_space + graph_hundred, pynutil.insert("0"))
graph_hundred_component += delete_space
graph_hundred_component += pynini.union(
graph_teen | pynutil.insert("00"),
@@ -44,8 +43,8 @@
(graph_ties | pynutil.insert("0")) + delete_space + (
graph_digit | pynutil.insert("0")),
)
- graph_hundred_component = graph_hundred_component | graph_cents | graph_one_hundred_component
-
+ graph_hundred_component = graph_hundred_component | graph_one_hundred_component
+
graph_hundred_component_at_least_one_none_zero_digit = graph_hundred_component @ (
pynini.closure(DAMO_DIGIT) + (DAMO_DIGIT - "0") + pynini.closure(DAMO_DIGIT)
)
@@ -54,14 +53,12 @@
)
graph_thousand = pynini.cross("ribu", "") | pynini.cross("seribu", "")
graph_one_thousand_component = pynini.union(pynini.cross("ribu", "1") | pynini.cross("seribu", "1"))
- graph_thousand_cents = pynini.cross("seribu", "10") | pynini.cross("ribu","10") | pynini.union(graph_thousand, pynutil.insert(""))
+
graph_thousands = pynini.union(
graph_hundred_component_at_least_one_none_zero_digit + delete_space + (pynutil.delete("ribu") | pynutil.delete("seribu")),
pynutil.insert("000", weight=0.1),
)
- graph_thousand_component = pynini.union(graph_digit + delete_space + graph_thousand, pynutil.insert("000"))
- graph_thousand_component += delete_space
- graph_thousands = graph_thousands | graph_thousand_cents | graph_thousand_component | graph_one_thousand_component
+ graph_thousands = graph_thousands | (pynutil.insert("00") + graph_one_thousand_component)
graph_million = pynini.union(
graph_hundred_component_at_least_one_none_zero_digit + delete_space + (pynutil.delete("juta") | pynutil.delete("sejuta")),
diff --git a/funasr/runtime/html5/static/main.js b/funasr/runtime/html5/static/main.js
index be57df1..9317778 100644
--- a/funasr/runtime/html5/static/main.js
+++ b/funasr/runtime/html5/static/main.js
@@ -145,7 +145,9 @@
isRec = false;
info_div.innerHTML="璇风瓑鍊�...";
btnStop.disabled = true;
- setTimeout(function(){btnStart.disabled = false;info_div.innerHTML="璇风偣鍑诲紑濮�";}, 3000 );
+ setTimeout(function(){
+ console.log("call stop ws!");
+ wsconnecter.wsStop();btnStart.disabled = false;info_div.innerHTML="璇风偣鍑诲紑濮�";}, 3000 );
rec.stop(function(blob,duration){
console.log(blob);
diff --git a/funasr/runtime/html5/static/wsconnecter.js b/funasr/runtime/html5/static/wsconnecter.js
index dfa8235..676a94a 100644
--- a/funasr/runtime/html5/static/wsconnecter.js
+++ b/funasr/runtime/html5/static/wsconnecter.js
@@ -28,7 +28,11 @@
if ( 'WebSocket' in window ) {
speechSokt = new WebSocket( Uri ); // 瀹氫箟socket杩炴帴瀵硅薄
speechSokt.onopen = function(e){onOpen(e);}; // 瀹氫箟鍝嶅簲鍑芥暟
- speechSokt.onclose = function(e){onClose(e);};
+ speechSokt.onclose = function(e){
+ console.log("onclose ws!");
+ speechSokt.close();
+ onClose(e);
+ };
speechSokt.onmessage = function(e){onMessage(e);};
speechSokt.onerror = function(e){onError(e);};
return 1;
@@ -42,6 +46,7 @@
// 瀹氫箟鍋滄涓庡彂閫佸嚱鏁�
this.wsStop = function () {
if(speechSokt != undefined) {
+ console.log("stop ws!");
speechSokt.close();
}
};
diff --git a/funasr/runtime/python/websocket/wss_srv_asr.py b/funasr/runtime/python/websocket/wss_srv_asr.py
index 6460fbf..948619b 100644
--- a/funasr/runtime/python/websocket/wss_srv_asr.py
+++ b/funasr/runtime/python/websocket/wss_srv_asr.py
@@ -58,16 +58,36 @@
model=args.asr_model_online,
ngpu=args.ngpu,
ncpu=args.ncpu,
- model_revision='v1.0.6',
+ model_revision='v1.0.4',
+ update_model='v1.0.4',
mode='paraformer_streaming')
-print("model loaded")
+print("model loaded! only support one client at the same time now!!!!")
+async def ws_reset(websocket):
+ print("ws reset now, total num is ",len(websocket_users))
+ websocket.param_dict_asr_online = {"cache": dict()}
+ websocket.param_dict_vad = {'in_cache': dict(), "is_final": True}
+ websocket.param_dict_asr_online["is_final"]=True
+ audio_in=b''.join(np.zeros(int(16000),dtype=np.int16))
+ inference_pipeline_vad(audio_in=audio_in, param_dict=websocket.param_dict_vad)
+ inference_pipeline_asr_online(audio_in=audio_in, param_dict=websocket.param_dict_asr_online)
+ await websocket.close()
+
+
+async def clear_websocket():
+ for websocket in websocket_users:
+ await ws_reset(websocket)
+ websocket_users.clear()
+
+
+
async def ws_serve(websocket, path):
frames = []
frames_asr = []
frames_asr_online = []
global websocket_users
+ await clear_websocket()
websocket_users.add(websocket)
websocket.param_dict_asr = {}
websocket.param_dict_asr_online = {"cache": dict()}
@@ -139,7 +159,8 @@
except websockets.ConnectionClosed:
- print("ConnectionClosed...", websocket_users)
+ print("ConnectionClosed...", websocket_users,flush=True)
+ await ws_reset(websocket)
websocket_users.remove(websocket)
except websockets.InvalidState:
print("InvalidState...")
--
Gitblit v1.9.1