From c9905b9be06e0a10806b895485916a9543ea5970 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期五, 01 三月 2024 15:08:30 +0800
Subject: [PATCH] whisper

---
 examples/industrial_data_pretraining/whisper/demo.py |    2 +-
 funasr/download/name_maps_from_hub.py                |    1 +
 README_zh.md                                         |   21 +++++++++++----------
 README.md                                            |    1 +
 4 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 91b9eda..295cdc9 100644
--- a/README.md
+++ b/README.md
@@ -79,6 +79,7 @@
 |                                   fsmn-vad <br> ( [猸怾(https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) [馃](https://huggingface.co/funasr/fsmn-vad) )                                   |              voice activity detection              | 5000 hours, Mandarin and English |    0.4M    | 
 |                                     fa-zh <br> ( [猸怾(https://modelscope.cn/models/damo/speech_timestamp_prediction-v1-16k-offline/summary) [馃](https://huggingface.co/funasr/fa-zh) )                                     |                timestamp prediction                |       5000 hours, Mandarin       |    38M     | 
 |                                       cam++ <br> ( [猸怾(https://modelscope.cn/models/iic/speech_campplus_sv_zh-cn_16k-common/summary) [馃](https://huggingface.co/funasr/campplus) )                                        |        speaker verification/diarization            |            5000 hours            |    7.2M    | 
+|                                                 whisper-large-v2 <br> ([猸怾(https://www.modelscope.cn/models/iic/speech_whisper-large_asr_multilingual/summary)  [馃]() )                                                   | speech recognition, with timestamps, non-streaming |          multilingual            |     1G     |
 
 
 
diff --git a/README_zh.md b/README_zh.md
index 086b2bc..73c4315 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -71,16 +71,17 @@
 锛堟敞锛氣瓙 琛ㄧずModelScope妯″瀷浠撳簱閾炬帴锛岎煠� 琛ㄧずHuggingface妯″瀷浠撳簱閾炬帴锛�
 
 
-|                                         妯″瀷鍚嶅瓧                                                                                                                 |        浠诲姟璇︽儏        |     璁粌鏁版嵁     | 鍙傛暟閲�  |
-|:------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------:|:------------:|:----:|
-| paraformer-zh <br> ([猸怾(https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)  [馃](https://huggingface.co/funasr/paraformer-tp) ) |  璇煶璇嗗埆锛屽甫鏃堕棿鎴宠緭鍑猴紝闈炲疄鏃�   |  60000灏忔椂锛屼腑鏂�  | 220M |
-|   paraformer-zh-streaming <br> ( [猸怾(https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [馃](https://huggingface.co/funasr/paraformer-zh-streaming) )   |      璇煶璇嗗埆锛屽疄鏃�       |  60000灏忔椂锛屼腑鏂�  | 220M |
-|      paraformer-en <br> ( [猸怾(https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [馃](https://huggingface.co/funasr/paraformer-en) )      |      璇煶璇嗗埆锛岄潪瀹炴椂      |  50000灏忔椂锛岃嫳鏂�  | 220M |
-|                  conformer-en <br> ( [猸怾(https://modelscope.cn/models/damo/speech_conformer_asr-en-16k-vocab4199-pytorch/summary) [馃](https://huggingface.co/funasr/conformer-en) )                   |      璇煶璇嗗埆锛岄潪瀹炴椂      |  50000灏忔椂锛岃嫳鏂�  | 220M |
-|                  ct-punc <br> ( [猸怾(https://modelscope.cn/models/damo/punc_ct-transformer_cn-en-common-vocab471067-large/summary) [馃](https://huggingface.co/funasr/ct-punc) )                   |        鏍囩偣鎭㈠        |  100M锛屼腑鏂囦笌鑻辨枃  | 1.1G | 
-|                       fsmn-vad <br> ( [猸怾(https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) [馃](https://huggingface.co/funasr/fsmn-vad) )                       |     璇煶绔偣妫�娴嬶紝瀹炴椂      | 5000灏忔椂锛屼腑鏂囦笌鑻辨枃 | 0.4M | 
-|                       fa-zh <br> ( [猸怾(https://modelscope.cn/models/damo/speech_timestamp_prediction-v1-16k-offline/summary) [馃](https://huggingface.co/funasr/fa-zh) )                        |      瀛楃骇鍒椂闂存埑棰勬祴      |  50000灏忔椂锛屼腑鏂�  | 38M  |
-|                           cam++ <br> ( [猸怾(https://modelscope.cn/models/iic/speech_campplus_sv_zh-cn_16k-common/summary) [馃](https://huggingface.co/funasr/campplus) )                            |      璇磋瘽浜虹‘璁�/鍒嗗壊      |   5000灏忔椂     |    7.2M    | 
+|                                         妯″瀷鍚嶅瓧                                                                                                                 |      浠诲姟璇︽儏       |     璁粌鏁版嵁     | 鍙傛暟閲�  |
+|:------------------------------------------------------------------------------------------------------------------------------------------------------------:|:---------------:|:------------:|:----:|
+| paraformer-zh <br> ([猸怾(https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)  [馃](https://huggingface.co/funasr/paraformer-tp) ) | 璇煶璇嗗埆锛屽甫鏃堕棿鎴宠緭鍑猴紝闈炲疄鏃� |  60000灏忔椂锛屼腑鏂�  | 220M |
+|   paraformer-zh-streaming <br> ( [猸怾(https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [馃](https://huggingface.co/funasr/paraformer-zh-streaming) )   |     璇煶璇嗗埆锛屽疄鏃�     |  60000灏忔椂锛屼腑鏂�  | 220M |
+|      paraformer-en <br> ( [猸怾(https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [馃](https://huggingface.co/funasr/paraformer-en) )      |    璇煶璇嗗埆锛岄潪瀹炴椂     |  50000灏忔椂锛岃嫳鏂�  | 220M |
+|                  conformer-en <br> ( [猸怾(https://modelscope.cn/models/damo/speech_conformer_asr-en-16k-vocab4199-pytorch/summary) [馃](https://huggingface.co/funasr/conformer-en) )                   |    璇煶璇嗗埆锛岄潪瀹炴椂     |  50000灏忔椂锛岃嫳鏂�  | 220M |
+|                  ct-punc <br> ( [猸怾(https://modelscope.cn/models/damo/punc_ct-transformer_cn-en-common-vocab471067-large/summary) [馃](https://huggingface.co/funasr/ct-punc) )                   |      鏍囩偣鎭㈠       |  100M锛屼腑鏂囦笌鑻辨枃  | 1.1G | 
+|                       fsmn-vad <br> ( [猸怾(https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) [馃](https://huggingface.co/funasr/fsmn-vad) )                       |    璇煶绔偣妫�娴嬶紝瀹炴椂    | 5000灏忔椂锛屼腑鏂囦笌鑻辨枃 | 0.4M | 
+|                       fa-zh <br> ( [猸怾(https://modelscope.cn/models/damo/speech_timestamp_prediction-v1-16k-offline/summary) [馃](https://huggingface.co/funasr/fa-zh) )                        |    瀛楃骇鍒椂闂存埑棰勬祴     |  50000灏忔椂锛屼腑鏂�  | 38M  |
+|                           cam++ <br> ( [猸怾(https://modelscope.cn/models/iic/speech_campplus_sv_zh-cn_16k-common/summary) [馃](https://huggingface.co/funasr/campplus) )                            |    璇磋瘽浜虹‘璁�/鍒嗗壊     |    5000灏忔椂    | 7.2M | 
+| whisper-large-v2 <br> ([猸怾(https://www.modelscope.cn/models/iic/speech_whisper-large_asr_multilingual/summary)  [馃]() ) | 璇煶璇嗗埆锛屽甫鏃堕棿鎴宠緭鍑猴紝闈炲疄鏃� |     澶氳瑷�      |  1G  |
 
 
 <a name="蹇�熷紑濮�"></a>
diff --git a/examples/industrial_data_pretraining/whisper/demo.py b/examples/industrial_data_pretraining/whisper/demo.py
index 071b49b..f010ea2 100644
--- a/examples/industrial_data_pretraining/whisper/demo.py
+++ b/examples/industrial_data_pretraining/whisper/demo.py
@@ -9,5 +9,5 @@
                   model_revision="v2.0.4",
                   )
 
-res = model.generate(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav")
+res = model.generate(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav", language=None)
 print(res)
diff --git a/funasr/download/name_maps_from_hub.py b/funasr/download/name_maps_from_hub.py
index bdcba35..fe493a7 100644
--- a/funasr/download/name_maps_from_hub.py
+++ b/funasr/download/name_maps_from_hub.py
@@ -8,6 +8,7 @@
     "ct-punc-c": "damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
     "fa-zh": "damo/speech_timestamp_prediction-v1-16k-offline",
     "cam++": "damo/speech_campplus_sv_zh-cn_16k-common",
+    "whisper-large-v2": "iic/speech_whisper-large_asr_multilingual",
 }
 
 name_maps_hf = {

--
Gitblit v1.9.1