Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into main
| | |
| | | | [**Highlights**](#highlights) |
| | | | [**Installation**](#installation) |
| | | | [**Docs**](https://alibaba-damo-academy.github.io/FunASR/en/index.html) |
| | | | [**Tutorial_CN**](https://github.com/alibaba-damo-academy/FunASR/wiki#funasr%E7%94%A8%E6%88%B7%E6%89%8B%E5%86%8C) |
| | | | [**Papers**](https://github.com/alibaba-damo-academy/FunASR#citations) |
| | | | [**Runtime**](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime) |
| | | | [**Model Zoo**](https://github.com/alibaba-damo-academy/FunASR/blob/main/docs/model_zoo/modelscope_models.md) |
| | |
| | | print(rec_result) |
| | | ``` |
| | | #### [Paraformer-online Model](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary) |
| | | ##### Streaming Decoding |
| | | ```python |
| | | inference_pipeline = pipeline( |
| | | task=Tasks.auto_speech_recognition, |
| | | model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online', |
| | | model_revision='v1.0.6', |
| | | model_revision='v1.0.4', |
| | | update_model='v1.0.4', |
| | | mode='paraformer_streaming' |
| | | ) |
| | | import soundfile |
| | |
| | | rec_result = inference_pipeline(audio_in=speech_chunk, param_dict=param_dict) |
| | | print(rec_result) |
| | | ``` |
| | | |
| | | ##### Fake Streaming Decoding |
| | | ```python |
| | | from modelscope.pipelines import pipeline |
| | | from modelscope.utils.constant import Tasks |
| | | |
| | | inference_pipeline = pipeline( |
| | | task=Tasks.auto_speech_recognition, |
| | | model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online', |
| | | model_revision='v1.0.6', |
| | | update_model='v1.0.6', |
| | | mode="paraformer_fake_streaming" |
| | | ) |
| | | audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav' |
| | | rec_result = inference_pipeline(audio_in=audio_in) |
| | | print(rec_result) |
| | | ``` |
| | | Full code of demo, please ref to [demo](https://github.com/alibaba-damo-academy/FunASR/discussions/241) |
| | | |
| | | #### [UniASR Model](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/summary) |
| | |
| | | task=Tasks.auto_speech_recognition, |
| | | model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online', |
| | | model_revision='v1.0.6', |
| | | update_model='v1.0.6', |
| | | mode="paraformer_fake_streaming" |
| | | ) |
| | | audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav' |
| | |
| | | inference_pipeline = pipeline( |
| | | task=Tasks.auto_speech_recognition, |
| | | model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online', |
| | | model_revision='v1.0.6', |
| | | model_revision='v1.0.4', |
| | | update_model='v1.0.4', |
| | | mode="paraformer_streaming" |
| | | ) |
| | | |
| | |
| | | task=Tasks.auto_speech_recognition, |
| | | model='damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online', |
| | | model_revision='v1.0.6', |
| | | update_model='v1.0.6', |
| | | mode="paraformer_fake_streaming" |
| | | ) |
| | | audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav' |
| | |
| | | inference_pipeline = pipeline( |
| | | task=Tasks.auto_speech_recognition, |
| | | model='damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online', |
| | | model_revision='v1.0.6', |
| | | model_revision='v1.0.4', |
| | | update_model='v1.0.4', |
| | | mode="paraformer_streaming" |
| | | ) |
| | | |
| | |
| | | inference_pipeline = pipeline( |
| | | task=Tasks.speech_timestamp, |
| | | model='damo/speech_timestamp_prediction-v1-16k-offline', |
| | | output_dir=None) |
| | | model_revision='v1.1.0') |
| | | |
| | | rec_result = inference_pipeline( |
| | | audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_timestamps.wav', |
| | |
| | | dua ribu dua puluh dua 2022 |
| | | tiga ribu 300 |
| | | tiga ribu 3000 |
| | | sembilan ribu sembilan ratus sembilan puluh sembilan 9999 |
| | | seribu satu 100001 |
| | | ribu 100 |
| | | seribu satu 1001 |
| | | ribu 1000 |
| | | seribu 1000 |
| | | seribu dua ratus delapan puluh sembilan 10289 |
| | | seribu dua ratus delapan puluh sembilan 1289 |
| | | ribu dua ratus delapan puluh sembilan 1289 |
| | | nol satu dua tiga empat lima enam tujuh delapan sembilan 01 2345-6789 |
| | | empat belas 14 |
| | |
| | | seratus dua puluh tiga 123 |
| | | ratus dua puluh tiga 123 |
| | | dua puluh empat maret 24 maret |
| | | ribu tujuh puluh enam 10076 |
| | | seribu tujuh puluh enam 100076 |
| | | ribu tujuh puluh enam rupiah 10076 rupiah |
| | | ribu tujuh puluh enam 1076 |
| | | seribu tujuh puluh enam 1076 |
| | | ribu tujuh puluh enam rupiah 1076 rupiah |
| | | tujuh puluh enam 76 |
| | | ditambah enam dua dua satu enam lima tiga sembilan nol enam nol lima +62 21 6539-0605 |
| | | ditambah enam dua dua satu enam lima tiga sembilan nol enam nol lima +62 21 6539-0605 |
| | |
| | | graph_teen = pynini.string_file(get_abs_path("data/numbers/teen.tsv")) |
| | | graph_hundreds = pynini.string_file(get_abs_path("data/numbers/hundreds.tsv")) |
| | | graph_thousand = pynini.string_file(get_abs_path("data/numbers/thousand.tsv")) |
| | | |
| | | graph_cents = pynini.cross("seratus", "100") | pynini.cross("ratus", "100") | pynini.union(graph_hundreds, pynutil.insert("0")) |
| | | |
| | | graph_hundred = pynini.cross("ratus", "") | pynini.cross("seratus", "") |
| | | |
| | | graph_hundred_component = pynini.union(graph_digit + delete_space + graph_hundred, pynutil.insert("00")) |
| | | graph_hundred_component = pynini.union(graph_digit + delete_space + graph_hundred, pynutil.insert("0")) |
| | | graph_hundred_component += delete_space |
| | | graph_hundred_component += pynini.union( |
| | | graph_teen | pynutil.insert("00"), |
| | |
| | | (graph_ties | pynutil.insert("0")) + delete_space + ( |
| | | graph_digit | pynutil.insert("0")), |
| | | ) |
| | | graph_hundred_component = graph_hundred_component | graph_cents | graph_one_hundred_component |
| | | |
| | | graph_hundred_component = graph_hundred_component | graph_one_hundred_component |
| | | |
| | | graph_hundred_component_at_least_one_none_zero_digit = graph_hundred_component @ ( |
| | | pynini.closure(DAMO_DIGIT) + (DAMO_DIGIT - "0") + pynini.closure(DAMO_DIGIT) |
| | | ) |
| | |
| | | ) |
| | | graph_thousand = pynini.cross("ribu", "") | pynini.cross("seribu", "") |
| | | graph_one_thousand_component = pynini.union(pynini.cross("ribu", "1") | pynini.cross("seribu", "1")) |
| | | graph_thousand_cents = pynini.cross("seribu", "10") | pynini.cross("ribu","10") | pynini.union(graph_thousand, pynutil.insert("")) |
| | | |
| | | graph_thousands = pynini.union( |
| | | graph_hundred_component_at_least_one_none_zero_digit + delete_space + (pynutil.delete("ribu") | pynutil.delete("seribu")), |
| | | pynutil.insert("000", weight=0.1), |
| | | ) |
| | | graph_thousand_component = pynini.union(graph_digit + delete_space + graph_thousand, pynutil.insert("000")) |
| | | graph_thousand_component += delete_space |
| | | graph_thousands = graph_thousands | graph_thousand_cents | graph_thousand_component | graph_one_thousand_component |
| | | graph_thousands = graph_thousands | (pynutil.insert("00") + graph_one_thousand_component) |
| | | |
| | | graph_million = pynini.union( |
| | | graph_hundred_component_at_least_one_none_zero_digit + delete_space + (pynutil.delete("juta") | pynutil.delete("sejuta")), |
| | |
| | | isRec = false;
|
| | | info_div.innerHTML="请等候...";
|
| | | btnStop.disabled = true;
|
| | | setTimeout(function(){btnStart.disabled = false;info_div.innerHTML="请点击开始";}, 3000 );
|
| | | setTimeout(function(){
|
| | | console.log("call stop ws!");
|
| | | wsconnecter.wsStop();btnStart.disabled = false;info_div.innerHTML="请点击开始";}, 3000 );
|
| | | rec.stop(function(blob,duration){
|
| | |
|
| | | console.log(blob);
|
| | |
| | | if ( 'WebSocket' in window ) {
|
| | | speechSokt = new WebSocket( Uri ); // 定义socket连接对象
|
| | | speechSokt.onopen = function(e){onOpen(e);}; // 定义响应函数
|
| | | speechSokt.onclose = function(e){onClose(e);};
|
| | | speechSokt.onclose = function(e){
|
| | | console.log("onclose ws!");
|
| | | speechSokt.close();
|
| | | onClose(e);
|
| | | };
|
| | | speechSokt.onmessage = function(e){onMessage(e);};
|
| | | speechSokt.onerror = function(e){onError(e);};
|
| | | return 1;
|
| | |
| | | // 定义停止与发送函数
|
| | | this.wsStop = function () {
|
| | | if(speechSokt != undefined) {
|
| | | console.log("stop ws!");
|
| | | speechSokt.close();
|
| | | }
|
| | | };
|
| | |
| | | model=args.asr_model_online, |
| | | ngpu=args.ngpu, |
| | | ncpu=args.ncpu, |
| | | model_revision='v1.0.6', |
| | | model_revision='v1.0.4', |
| | | update_model='v1.0.4', |
| | | mode='paraformer_streaming') |
| | | |
| | | print("model loaded") |
| | | print("model loaded! only support one client at the same time now!!!!") |
| | | |
| | | async def ws_reset(websocket): |
| | | print("ws reset now, total num is ",len(websocket_users)) |
| | | websocket.param_dict_asr_online = {"cache": dict()} |
| | | websocket.param_dict_vad = {'in_cache': dict(), "is_final": True} |
| | | websocket.param_dict_asr_online["is_final"]=True |
| | | audio_in=b''.join(np.zeros(int(16000),dtype=np.int16)) |
| | | inference_pipeline_vad(audio_in=audio_in, param_dict=websocket.param_dict_vad) |
| | | inference_pipeline_asr_online(audio_in=audio_in, param_dict=websocket.param_dict_asr_online) |
| | | await websocket.close() |
| | | |
| | | |
| | | async def clear_websocket(): |
| | | for websocket in websocket_users: |
| | | await ws_reset(websocket) |
| | | websocket_users.clear() |
| | | |
| | | |
| | | |
| | | async def ws_serve(websocket, path): |
| | | frames = [] |
| | | frames_asr = [] |
| | | frames_asr_online = [] |
| | | global websocket_users |
| | | await clear_websocket() |
| | | websocket_users.add(websocket) |
| | | websocket.param_dict_asr = {} |
| | | websocket.param_dict_asr_online = {"cache": dict()} |
| | |
| | | |
| | | |
| | | except websockets.ConnectionClosed: |
| | | print("ConnectionClosed...", websocket_users) |
| | | print("ConnectionClosed...", websocket_users,flush=True) |
| | | await ws_reset(websocket) |
| | | websocket_users.remove(websocket) |
| | | except websockets.InvalidState: |
| | | print("InvalidState...") |