From 81fb78286f6e6893ef5a319bfb2ba21d340476d3 Mon Sep 17 00:00:00 2001 From: 游雁 <zhifu.gzf@alibaba-inc.com> Date: 星期五, 22 三月 2024 20:13:05 +0800 Subject: [PATCH] update --- examples/aishell/conformer/conf/conformer_12e_6d_2048_256.yaml | 7 - examples/aishell/transformer/conf/transformer_12e_6d_2048_256.yaml | 7 - docs/tutorial/README_zh.md | 70 ++++++++++++++-- examples/industrial_data_pretraining/paraformer/README_zh.md | 70 ++++++++++++++-- examples/README_zh.md | 70 ++++++++++++++-- 5 files changed, 177 insertions(+), 47 deletions(-) diff --git a/docs/tutorial/README_zh.md b/docs/tutorial/README_zh.md index 59d1303..563fe7c 100644 --- a/docs/tutorial/README_zh.md +++ b/docs/tutorial/README_zh.md @@ -2,11 +2,26 @@ FunASR寮�婧愪簡澶ч噺鍦ㄥ伐涓氭暟鎹笂棰勮缁冩ā鍨嬶紝鎮ㄥ彲浠ュ湪 [妯″瀷璁稿彲鍗忚](https://github.com/alibaba-damo-academy/FunASR/blob/main/MODEL_LICENSE)涓嬭嚜鐢变娇鐢ㄣ�佸鍒躲�佷慨鏀瑰拰鍒嗕韩FunASR妯″瀷锛屼笅闈㈠垪涓句唬琛ㄦ�х殑妯″瀷锛屾洿澶氭ā鍨嬭鍙傝�� [妯″瀷浠撳簱](https://github.com/alibaba-damo-academy/FunASR/tree/main/model_zoo)銆� +<div align="center"> +<h4> + <a href="#妯″瀷鎺ㄧ悊"> 妯″瀷鎺ㄧ悊 </a> +锝�<a href="#妯″瀷璁粌涓庢祴璇�"> 妯″瀷璁粌涓庢祴璇� </a> +锝�<a href="#妯″瀷瀵煎嚭涓庢祴璇�"> 妯″瀷瀵煎嚭涓庢祴璇� </a> +</h4> +</div> -## 鎺ㄧ悊 +<a name="妯″瀷鎺ㄧ悊"></a> +## 妯″瀷鎺ㄧ悊 ### 蹇�熶娇鐢� -#### [Paraformer 妯″瀷](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary) + +鍛戒护琛屾柟寮忚皟鐢細 +```shell +funasr ++model=paraformer-zh ++vad_model="fsmn-vad" ++punc_model="ct-punc" ++input=asr_example_zh.wav +``` + +python浠g爜璋冪敤锛堟帹鑽愶級 + ```python from funasr import AutoModel @@ -50,13 +65,6 @@ - `output_dir`: None 锛堥粯璁わ級锛屽鏋滆缃紝杈撳嚭缁撴灉鐨勮緭鍑鸿矾寰� - `**kwargs`(dict): 涓庢ā鍨嬬浉鍏崇殑鎺ㄧ悊鍙傛暟锛屼緥濡傦紝`beam_size=10`锛宍decoding_ctc_weight=0.1`銆� -### onnx涓巐ibtorch瀵煎嚭 - -```python -res = model.export(type="onnx", quantize=True) -``` -- `type`(str)锛歚onnx`(榛樿)锛屽鍑簅nnx鏍煎紡銆俙torch`瀵煎嚭libtorch鏍煎紡銆� -- `quantize`(bool)锛歚False`锛堥粯璁わ級锛屾槸鍚﹀仛閲忓寲銆� ### 鏇村鐢ㄦ硶浠嬬粛 @@ -182,10 +190,18 @@ ``` 鏇村锛圼绀轰緥](https://github.com/alibaba-damo-academy/FunASR/tree/main/examples/industrial_data_pretraining)锛� - -## 寰皟 +<a name="鏍稿績鍔熻兘"></a> +## 妯″瀷璁粌涓庢祴璇� ### 蹇�熷紑濮� + +鍛戒护琛屾墽琛岋紙鐢ㄤ簬蹇�熸祴璇曪紝涓嶆帹鑽愶級锛� +```shell +funasr-train ++model=paraformer-zh ++train_data_set_list=data/list/train.jsonl ++valid_data_set_list=data/list/val.jsonl ++output_dir="./outputs" &> log.txt & +``` + +python浠g爜鎵ц锛堝彲浠ュ鏈哄鍗★紝鎺ㄨ崘锛� + ```shell cd examples/industrial_data_pretraining/paraformer bash finetune.sh @@ -335,3 +351,35 @@ tensorboard --logdir /xxxx/FunASR/examples/industrial_data_pretraining/paraformer/outputs/log/tensorboard ``` 娴忚鍣ㄤ腑鎵撳紑锛歨ttp://localhost:6006/ + + +<a name="妯″瀷瀵煎嚭涓庢祴璇�"></a> +## 妯″瀷瀵煎嚭涓庢祴璇� +### 浠庡懡浠よ瀵煎嚭 +```shell +funasr-export ++model=paraformer ++quantize=false +``` + +### 浠嶱ython瀵煎嚭 +```python +from funasr import AutoModel + +model = AutoModel(model="paraformer") + +res = model.export(quantize=False) +``` + +### 娴嬭瘯ONNX +```python +# pip3 install -U funasr-onnx +from funasr_onnx import Paraformer +model_dir = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" +model = Paraformer(model_dir, batch_size=1, quantize=True) + +wav_path = ['~/.cache/modelscope/hub/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav'] + +result = model(wav_path) +print(result) +``` + +鏇村渚嬪瓙璇峰弬鑰� [鏍蜂緥](runtime/python/onnxruntime) \ No newline at end of file diff --git a/examples/README_zh.md b/examples/README_zh.md index 59d1303..563fe7c 100644 --- a/examples/README_zh.md +++ b/examples/README_zh.md @@ -2,11 +2,26 @@ FunASR寮�婧愪簡澶ч噺鍦ㄥ伐涓氭暟鎹笂棰勮缁冩ā鍨嬶紝鎮ㄥ彲浠ュ湪 [妯″瀷璁稿彲鍗忚](https://github.com/alibaba-damo-academy/FunASR/blob/main/MODEL_LICENSE)涓嬭嚜鐢变娇鐢ㄣ�佸鍒躲�佷慨鏀瑰拰鍒嗕韩FunASR妯″瀷锛屼笅闈㈠垪涓句唬琛ㄦ�х殑妯″瀷锛屾洿澶氭ā鍨嬭鍙傝�� [妯″瀷浠撳簱](https://github.com/alibaba-damo-academy/FunASR/tree/main/model_zoo)銆� +<div align="center"> +<h4> + <a href="#妯″瀷鎺ㄧ悊"> 妯″瀷鎺ㄧ悊 </a> +锝�<a href="#妯″瀷璁粌涓庢祴璇�"> 妯″瀷璁粌涓庢祴璇� </a> +锝�<a href="#妯″瀷瀵煎嚭涓庢祴璇�"> 妯″瀷瀵煎嚭涓庢祴璇� </a> +</h4> +</div> -## 鎺ㄧ悊 +<a name="妯″瀷鎺ㄧ悊"></a> +## 妯″瀷鎺ㄧ悊 ### 蹇�熶娇鐢� -#### [Paraformer 妯″瀷](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary) + +鍛戒护琛屾柟寮忚皟鐢細 +```shell +funasr ++model=paraformer-zh ++vad_model="fsmn-vad" ++punc_model="ct-punc" ++input=asr_example_zh.wav +``` + +python浠g爜璋冪敤锛堟帹鑽愶級 + ```python from funasr import AutoModel @@ -50,13 +65,6 @@ - `output_dir`: None 锛堥粯璁わ級锛屽鏋滆缃紝杈撳嚭缁撴灉鐨勮緭鍑鸿矾寰� - `**kwargs`(dict): 涓庢ā鍨嬬浉鍏崇殑鎺ㄧ悊鍙傛暟锛屼緥濡傦紝`beam_size=10`锛宍decoding_ctc_weight=0.1`銆� -### onnx涓巐ibtorch瀵煎嚭 - -```python -res = model.export(type="onnx", quantize=True) -``` -- `type`(str)锛歚onnx`(榛樿)锛屽鍑簅nnx鏍煎紡銆俙torch`瀵煎嚭libtorch鏍煎紡銆� -- `quantize`(bool)锛歚False`锛堥粯璁わ級锛屾槸鍚﹀仛閲忓寲銆� ### 鏇村鐢ㄦ硶浠嬬粛 @@ -182,10 +190,18 @@ ``` 鏇村锛圼绀轰緥](https://github.com/alibaba-damo-academy/FunASR/tree/main/examples/industrial_data_pretraining)锛� - -## 寰皟 +<a name="鏍稿績鍔熻兘"></a> +## 妯″瀷璁粌涓庢祴璇� ### 蹇�熷紑濮� + +鍛戒护琛屾墽琛岋紙鐢ㄤ簬蹇�熸祴璇曪紝涓嶆帹鑽愶級锛� +```shell +funasr-train ++model=paraformer-zh ++train_data_set_list=data/list/train.jsonl ++valid_data_set_list=data/list/val.jsonl ++output_dir="./outputs" &> log.txt & +``` + +python浠g爜鎵ц锛堝彲浠ュ鏈哄鍗★紝鎺ㄨ崘锛� + ```shell cd examples/industrial_data_pretraining/paraformer bash finetune.sh @@ -335,3 +351,35 @@ tensorboard --logdir /xxxx/FunASR/examples/industrial_data_pretraining/paraformer/outputs/log/tensorboard ``` 娴忚鍣ㄤ腑鎵撳紑锛歨ttp://localhost:6006/ + + +<a name="妯″瀷瀵煎嚭涓庢祴璇�"></a> +## 妯″瀷瀵煎嚭涓庢祴璇� +### 浠庡懡浠よ瀵煎嚭 +```shell +funasr-export ++model=paraformer ++quantize=false +``` + +### 浠嶱ython瀵煎嚭 +```python +from funasr import AutoModel + +model = AutoModel(model="paraformer") + +res = model.export(quantize=False) +``` + +### 娴嬭瘯ONNX +```python +# pip3 install -U funasr-onnx +from funasr_onnx import Paraformer +model_dir = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" +model = Paraformer(model_dir, batch_size=1, quantize=True) + +wav_path = ['~/.cache/modelscope/hub/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav'] + +result = model(wav_path) +print(result) +``` + +鏇村渚嬪瓙璇峰弬鑰� [鏍蜂緥](runtime/python/onnxruntime) \ No newline at end of file diff --git a/examples/aishell/conformer/conf/conformer_12e_6d_2048_256.yaml b/examples/aishell/conformer/conf/conformer_12e_6d_2048_256.yaml index 12eeb3e..747c0f6 100644 --- a/examples/aishell/conformer/conf/conformer_12e_6d_2048_256.yaml +++ b/examples/aishell/conformer/conf/conformer_12e_6d_2048_256.yaml @@ -74,13 +74,6 @@ accum_grad: 1 grad_clip: 5 max_epoch: 150 - val_scheduler_criterion: - - valid - - acc - best_model_criterion: - - - valid - - acc - - max keep_nbest_models: 10 log_interval: 50 diff --git a/examples/aishell/transformer/conf/transformer_12e_6d_2048_256.yaml b/examples/aishell/transformer/conf/transformer_12e_6d_2048_256.yaml index 51936c1..395ea44 100644 --- a/examples/aishell/transformer/conf/transformer_12e_6d_2048_256.yaml +++ b/examples/aishell/transformer/conf/transformer_12e_6d_2048_256.yaml @@ -68,13 +68,6 @@ accum_grad: 1 grad_clip: 5 max_epoch: 150 - val_scheduler_criterion: - - valid - - acc - best_model_criterion: - - - valid - - acc - - max keep_nbest_models: 10 log_interval: 50 diff --git a/examples/industrial_data_pretraining/paraformer/README_zh.md b/examples/industrial_data_pretraining/paraformer/README_zh.md index 59d1303..563fe7c 100644 --- a/examples/industrial_data_pretraining/paraformer/README_zh.md +++ b/examples/industrial_data_pretraining/paraformer/README_zh.md @@ -2,11 +2,26 @@ FunASR寮�婧愪簡澶ч噺鍦ㄥ伐涓氭暟鎹笂棰勮缁冩ā鍨嬶紝鎮ㄥ彲浠ュ湪 [妯″瀷璁稿彲鍗忚](https://github.com/alibaba-damo-academy/FunASR/blob/main/MODEL_LICENSE)涓嬭嚜鐢变娇鐢ㄣ�佸鍒躲�佷慨鏀瑰拰鍒嗕韩FunASR妯″瀷锛屼笅闈㈠垪涓句唬琛ㄦ�х殑妯″瀷锛屾洿澶氭ā鍨嬭鍙傝�� [妯″瀷浠撳簱](https://github.com/alibaba-damo-academy/FunASR/tree/main/model_zoo)銆� +<div align="center"> +<h4> + <a href="#妯″瀷鎺ㄧ悊"> 妯″瀷鎺ㄧ悊 </a> +锝�<a href="#妯″瀷璁粌涓庢祴璇�"> 妯″瀷璁粌涓庢祴璇� </a> +锝�<a href="#妯″瀷瀵煎嚭涓庢祴璇�"> 妯″瀷瀵煎嚭涓庢祴璇� </a> +</h4> +</div> -## 鎺ㄧ悊 +<a name="妯″瀷鎺ㄧ悊"></a> +## 妯″瀷鎺ㄧ悊 ### 蹇�熶娇鐢� -#### [Paraformer 妯″瀷](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary) + +鍛戒护琛屾柟寮忚皟鐢細 +```shell +funasr ++model=paraformer-zh ++vad_model="fsmn-vad" ++punc_model="ct-punc" ++input=asr_example_zh.wav +``` + +python浠g爜璋冪敤锛堟帹鑽愶級 + ```python from funasr import AutoModel @@ -50,13 +65,6 @@ - `output_dir`: None 锛堥粯璁わ級锛屽鏋滆缃紝杈撳嚭缁撴灉鐨勮緭鍑鸿矾寰� - `**kwargs`(dict): 涓庢ā鍨嬬浉鍏崇殑鎺ㄧ悊鍙傛暟锛屼緥濡傦紝`beam_size=10`锛宍decoding_ctc_weight=0.1`銆� -### onnx涓巐ibtorch瀵煎嚭 - -```python -res = model.export(type="onnx", quantize=True) -``` -- `type`(str)锛歚onnx`(榛樿)锛屽鍑簅nnx鏍煎紡銆俙torch`瀵煎嚭libtorch鏍煎紡銆� -- `quantize`(bool)锛歚False`锛堥粯璁わ級锛屾槸鍚﹀仛閲忓寲銆� ### 鏇村鐢ㄦ硶浠嬬粛 @@ -182,10 +190,18 @@ ``` 鏇村锛圼绀轰緥](https://github.com/alibaba-damo-academy/FunASR/tree/main/examples/industrial_data_pretraining)锛� - -## 寰皟 +<a name="鏍稿績鍔熻兘"></a> +## 妯″瀷璁粌涓庢祴璇� ### 蹇�熷紑濮� + +鍛戒护琛屾墽琛岋紙鐢ㄤ簬蹇�熸祴璇曪紝涓嶆帹鑽愶級锛� +```shell +funasr-train ++model=paraformer-zh ++train_data_set_list=data/list/train.jsonl ++valid_data_set_list=data/list/val.jsonl ++output_dir="./outputs" &> log.txt & +``` + +python浠g爜鎵ц锛堝彲浠ュ鏈哄鍗★紝鎺ㄨ崘锛� + ```shell cd examples/industrial_data_pretraining/paraformer bash finetune.sh @@ -335,3 +351,35 @@ tensorboard --logdir /xxxx/FunASR/examples/industrial_data_pretraining/paraformer/outputs/log/tensorboard ``` 娴忚鍣ㄤ腑鎵撳紑锛歨ttp://localhost:6006/ + + +<a name="妯″瀷瀵煎嚭涓庢祴璇�"></a> +## 妯″瀷瀵煎嚭涓庢祴璇� +### 浠庡懡浠よ瀵煎嚭 +```shell +funasr-export ++model=paraformer ++quantize=false +``` + +### 浠嶱ython瀵煎嚭 +```python +from funasr import AutoModel + +model = AutoModel(model="paraformer") + +res = model.export(quantize=False) +``` + +### 娴嬭瘯ONNX +```python +# pip3 install -U funasr-onnx +from funasr_onnx import Paraformer +model_dir = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" +model = Paraformer(model_dir, batch_size=1, quantize=True) + +wav_path = ['~/.cache/modelscope/hub/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav'] + +result = model(wav_path) +print(result) +``` + +鏇村渚嬪瓙璇峰弬鑰� [鏍蜂緥](runtime/python/onnxruntime) \ No newline at end of file -- Gitblit v1.9.1