雾聪
2023-06-02 eee6af2ece605035b0a0835eb9dbed5ae872c755
Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into main
12个文件已修改
101 ■■■■ 已修改文件
README.md 1 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/TEMPLATE/README.md 21 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py 1 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py 3 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py 1 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py 3 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/tp/TEMPLATE/README.md 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
fun_text_processing/inverse_text_normalization/id/id_unit_test.tsv 16 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
fun_text_processing/inverse_text_normalization/id/taggers/cardinal.py 15 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/html5/static/main.js 4 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/html5/static/wsconnecter.js 7 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/python/websocket/wss_srv_asr.py 27 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
README.md
@@ -13,7 +13,6 @@
| [**Highlights**](#highlights)
| [**Installation**](#installation)
| [**Docs**](https://alibaba-damo-academy.github.io/FunASR/en/index.html)
| [**Tutorial_CN**](https://github.com/alibaba-damo-academy/FunASR/wiki#funasr%E7%94%A8%E6%88%B7%E6%89%8B%E5%86%8C)
| [**Papers**](https://github.com/alibaba-damo-academy/FunASR#citations)
| [**Runtime**](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime)
| [**Model Zoo**](https://github.com/alibaba-damo-academy/FunASR/blob/main/docs/model_zoo/modelscope_models.md)
egs_modelscope/asr/TEMPLATE/README.md
@@ -20,11 +20,13 @@
print(rec_result)
```
#### [Paraformer-online Model](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary)
##### Streaming Decoding
```python
inference_pipeline = pipeline(
    task=Tasks.auto_speech_recognition,
    model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
    model_revision='v1.0.6',
    model_revision='v1.0.4',
    update_model='v1.0.4',
    mode='paraformer_streaming'
    )
import soundfile
@@ -42,6 +44,23 @@
rec_result = inference_pipeline(audio_in=speech_chunk, param_dict=param_dict)
print(rec_result)
```
##### Fake Streaming Decoding
```python
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
inference_pipeline = pipeline(
    task=Tasks.auto_speech_recognition,
    model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
    model_revision='v1.0.6',
    update_model='v1.0.6',
    mode="paraformer_fake_streaming"
)
audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'
rec_result = inference_pipeline(audio_in=audio_in)
print(rec_result)
```
Full code of demo, please ref to [demo](https://github.com/alibaba-damo-academy/FunASR/discussions/241)
#### [UniASR Model](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/summary)
egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
@@ -5,6 +5,7 @@
    task=Tasks.auto_speech_recognition,
    model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
    model_revision='v1.0.6',
    update_model='v1.0.6',
    mode="paraformer_fake_streaming"
)
audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'
egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py
@@ -14,7 +14,8 @@
inference_pipeline = pipeline(
    task=Tasks.auto_speech_recognition,
    model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online',
    model_revision='v1.0.6',
    model_revision='v1.0.4',
    update_model='v1.0.4',
    mode="paraformer_streaming"
)
egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
@@ -5,6 +5,7 @@
    task=Tasks.auto_speech_recognition,
    model='damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online',
    model_revision='v1.0.6',
    update_model='v1.0.6',
    mode="paraformer_fake_streaming"
)
audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav'
egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo_online.py
@@ -14,7 +14,8 @@
inference_pipeline = pipeline(
    task=Tasks.auto_speech_recognition,
    model='damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online',
    model_revision='v1.0.6',
    model_revision='v1.0.4',
    update_model='v1.0.4',
    mode="paraformer_streaming"
)
egs_modelscope/tp/TEMPLATE/README.md
@@ -11,7 +11,7 @@
inference_pipeline = pipeline(
    task=Tasks.speech_timestamp,
    model='damo/speech_timestamp_prediction-v1-16k-offline',
    output_dir=None)
    model_revision='v1.1.0')
rec_result = inference_pipeline(
    audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_timestamps.wav',
fun_text_processing/inverse_text_normalization/id/id_unit_test.tsv
@@ -1,10 +1,10 @@
dua ribu dua puluh dua    2022
tiga ribu    300
tiga ribu    3000
sembilan ribu sembilan ratus sembilan puluh sembilan    9999
seribu satu    100001
ribu    100
seribu satu    1001
ribu    1000
seribu    1000
seribu dua ratus delapan puluh sembilan    10289
seribu dua ratus delapan puluh sembilan    1289
ribu dua ratus delapan puluh sembilan    1289
nol satu dua tiga empat lima enam tujuh delapan sembilan    01 2345-6789
empat belas    14
@@ -22,8 +22,8 @@
seratus dua puluh tiga    123
ratus dua puluh tiga    123
dua puluh empat maret     24 maret
ribu tujuh puluh enam    10076
seribu tujuh puluh enam    100076
ribu tujuh puluh enam rupiah    10076 rupiah
ribu tujuh puluh enam    1076
seribu tujuh puluh enam    1076
ribu tujuh puluh enam rupiah    1076 rupiah
tujuh puluh enam    76
ditambah enam dua dua satu enam lima tiga sembilan nol enam nol lima    +62 21 6539-0605
ditambah enam dua dua satu enam lima tiga sembilan nol enam nol lima    +62 21 6539-0605
fun_text_processing/inverse_text_normalization/id/taggers/cardinal.py
@@ -26,11 +26,10 @@
        graph_teen = pynini.string_file(get_abs_path("data/numbers/teen.tsv"))
        graph_hundreds = pynini.string_file(get_abs_path("data/numbers/hundreds.tsv"))
        graph_thousand = pynini.string_file(get_abs_path("data/numbers/thousand.tsv"))
        graph_cents = pynini.cross("seratus", "100") | pynini.cross("ratus", "100") | pynini.union(graph_hundreds, pynutil.insert("0"))
        graph_hundred = pynini.cross("ratus", "") | pynini.cross("seratus", "")
        graph_hundred_component = pynini.union(graph_digit + delete_space + graph_hundred, pynutil.insert("00"))
        graph_hundred_component = pynini.union(graph_digit + delete_space + graph_hundred, pynutil.insert("0"))
        graph_hundred_component += delete_space
        graph_hundred_component += pynini.union(
            graph_teen | pynutil.insert("00"),
@@ -44,8 +43,8 @@
                (graph_ties | pynutil.insert("0")) + delete_space + (
                            graph_digit | pynutil.insert("0")),
        )
        graph_hundred_component = graph_hundred_component | graph_cents | graph_one_hundred_component
        graph_hundred_component = graph_hundred_component | graph_one_hundred_component
        graph_hundred_component_at_least_one_none_zero_digit = graph_hundred_component @ (
            pynini.closure(DAMO_DIGIT) + (DAMO_DIGIT - "0") + pynini.closure(DAMO_DIGIT)
        )
@@ -54,14 +53,12 @@
        )
        graph_thousand = pynini.cross("ribu", "") | pynini.cross("seribu", "")
        graph_one_thousand_component = pynini.union(pynini.cross("ribu", "1") | pynini.cross("seribu", "1"))
        graph_thousand_cents = pynini.cross("seribu", "10") | pynini.cross("ribu","10") | pynini.union(graph_thousand, pynutil.insert(""))
        graph_thousands = pynini.union(
            graph_hundred_component_at_least_one_none_zero_digit + delete_space + (pynutil.delete("ribu") | pynutil.delete("seribu")),
            pynutil.insert("000", weight=0.1),
        )
        graph_thousand_component = pynini.union(graph_digit + delete_space + graph_thousand, pynutil.insert("000"))
        graph_thousand_component += delete_space
        graph_thousands = graph_thousands | graph_thousand_cents | graph_thousand_component | graph_one_thousand_component
        graph_thousands = graph_thousands | (pynutil.insert("00") + graph_one_thousand_component)
        graph_million = pynini.union(
            graph_hundred_component_at_least_one_none_zero_digit + delete_space + (pynutil.delete("juta") | pynutil.delete("sejuta")),
funasr/runtime/html5/static/main.js
@@ -145,7 +145,9 @@
    isRec = false;
    info_div.innerHTML="请等候...";
    btnStop.disabled = true;
    setTimeout(function(){btnStart.disabled = false;info_div.innerHTML="请点击开始";}, 3000 );
    setTimeout(function(){
        console.log("call stop ws!");
        wsconnecter.wsStop();btnStart.disabled = false;info_div.innerHTML="请点击开始";}, 3000 );
    rec.stop(function(blob,duration){
  
        console.log(blob);
funasr/runtime/html5/static/wsconnecter.js
@@ -28,7 +28,11 @@
        if ( 'WebSocket' in window ) {
            speechSokt = new WebSocket( Uri ); // 定义socket连接对象
            speechSokt.onopen = function(e){onOpen(e);}; // 定义响应函数
            speechSokt.onclose = function(e){onClose(e);};
            speechSokt.onclose = function(e){
                console.log("onclose ws!");
                speechSokt.close();
                onClose(e);
                };
            speechSokt.onmessage = function(e){onMessage(e);};
            speechSokt.onerror = function(e){onError(e);};
            return 1;
@@ -42,6 +46,7 @@
    // 定义停止与发送函数
    this.wsStop = function () {
        if(speechSokt != undefined) {
            console.log("stop ws!");
            speechSokt.close();
        }
    };
funasr/runtime/python/websocket/wss_srv_asr.py
@@ -58,16 +58,36 @@
    model=args.asr_model_online,
    ngpu=args.ngpu,
    ncpu=args.ncpu,
    model_revision='v1.0.6',
    model_revision='v1.0.4',
    update_model='v1.0.4',
    mode='paraformer_streaming')
print("model loaded")
print("model loaded! only support one client at the same time now!!!!")
async def ws_reset(websocket):
    print("ws reset now, total num is ",len(websocket_users))
    websocket.param_dict_asr_online = {"cache": dict()}
    websocket.param_dict_vad = {'in_cache': dict(), "is_final": True}
    websocket.param_dict_asr_online["is_final"]=True
    audio_in=b''.join(np.zeros(int(16000),dtype=np.int16))
    inference_pipeline_vad(audio_in=audio_in, param_dict=websocket.param_dict_vad)
    inference_pipeline_asr_online(audio_in=audio_in, param_dict=websocket.param_dict_asr_online)
    await websocket.close()
async def clear_websocket():
   for websocket in websocket_users:
       await ws_reset(websocket)
   websocket_users.clear()
async def ws_serve(websocket, path):
    frames = []
    frames_asr = []
    frames_asr_online = []
    global websocket_users
    await clear_websocket()
    websocket_users.add(websocket)
    websocket.param_dict_asr = {}
    websocket.param_dict_asr_online = {"cache": dict()}
@@ -139,7 +159,8 @@
     
    except websockets.ConnectionClosed:
        print("ConnectionClosed...", websocket_users)
        print("ConnectionClosed...", websocket_users,flush=True)
        await ws_reset(websocket)
        websocket_users.remove(websocket)
    except websockets.InvalidState:
        print("InvalidState...")