From 2f27b165559cd53afab52047309ebe4ac838ebb8 Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 14 五月 2024 09:54:08 +0800
Subject: [PATCH] Add files via upload

---
 README.md |   19 ++++++++++---------
 1 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 38fd686..8b093bc 100644
--- a/README.md
+++ b/README.md
@@ -14,6 +14,7 @@
 | [**News**](https://github.com/alibaba-damo-academy/FunASR#whats-new) 
 | [**Installation**](#installation)
 | [**Quick Start**](#quick-start)
+| [**Tutorial**](https://github.com/alibaba-damo-academy/FunASR/blob/main/docs/tutorial/README.md)
 | [**Runtime**](./runtime/readme.md)
 | [**Model Zoo**](#model-zoo)
 | [**Contact**](#contact)
@@ -67,7 +68,7 @@
 ```
 
 ## Model Zoo
-FunASR has open-sourced a large number of pre-trained models on industrial data. You are free to use, copy, modify, and share FunASR models under the [Model License Agreement](./MODEL_LICENSE). Below are some representative models, for more models please refer to the [Model Zoo]().
+FunASR has open-sourced a large number of pre-trained models on industrial data. You are free to use, copy, modify, and share FunASR models under the [Model License Agreement](./MODEL_LICENSE). Below are some representative models, for more models please refer to the [Model Zoo](./model_zoo).
 
 (Note: 猸� represents the ModelScope model zoo, 馃 represents the Huggingface model zoo, 馃崁 represents the OpenAI model zoo)
 
@@ -82,8 +83,8 @@
 |                                   fsmn-vad <br> ( [猸怾(https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) [馃](https://huggingface.co/funasr/fsmn-vad) )                                   |               voice activity detection                | 5000 hours, Mandarin and English |    0.4M    | 
 |                                     fa-zh <br> ( [猸怾(https://modelscope.cn/models/damo/speech_timestamp_prediction-v1-16k-offline/summary) [馃](https://huggingface.co/funasr/fa-zh) )                                     |                 timestamp prediction                  |       5000 hours, Mandarin       |    38M     | 
 |                                       cam++ <br> ( [猸怾(https://modelscope.cn/models/iic/speech_campplus_sv_zh-cn_16k-common/summary) [馃](https://huggingface.co/funasr/campplus) )                                        |           speaker verification/diarization            |            5000 hours            |    7.2M    | 
-|                                                  Whisper-large-v2 <br> ([猸怾(https://www.modelscope.cn/models/iic/speech_whisper-large_asr_multilingual/summary)  [馃崁](https://github.com/openai/whisper) )                                                  |  speech recognition, with timestamps, non-streaming   |          multilingual            |    1.5G    |
-|                                                Whisper-large-v3 <br> ([猸怾(https://www.modelscope.cn/models/iic/Whisper-large-v3/summary)  [馃崁](https://github.com/openai/whisper) )                                                 |  speech recognition, with timestamps, non-streaming   |          multilingual            |    1.5G    |
+|                                                  Whisper-large-v2 <br> ([猸怾(https://www.modelscope.cn/models/iic/speech_whisper-large_asr_multilingual/summary)  [馃崁](https://github.com/openai/whisper) )                                                  |  speech recognition, with timestamps, non-streaming   |          multilingual      |    1550 M    |
+|                                                Whisper-large-v3 <br> ([猸怾(https://www.modelscope.cn/models/iic/Whisper-large-v3/summary)  [馃崁](https://github.com/openai/whisper) )                                                 |  speech recognition, with timestamps, non-streaming   |          multilingual            |    1550 M    |
 |                                         Qwen-Audio <br> ([猸怾(examples/industrial_data_pretraining/qwen_audio/demo.py)  [馃](https://huggingface.co/Qwen/Qwen-Audio) )                                         |      audio-text multimodal models (pretraining)       |     multilingual      |  8B  |
 |                   Qwen-Audio-Chat <br> ([猸怾(examples/industrial_data_pretraining/qwen_audio/demo_chat.py)  [馃](https://huggingface.co/Qwen/Qwen-Audio-Chat) )                                                |          audio-text multimodal models (chat)          |     multilingual      |  8B  |
 
@@ -97,7 +98,7 @@
 <a name="quick-start"></a>
 ## Quick Start
 
-Below is a quick start tutorial. Test audio files ([Mandarin](https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/vad_example.wav), [English]()).
+Below is a quick start tutorial. Test audio files ([Mandarin](https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/vad_example.wav), [English](https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_en.wav)).
 
 ### Command-line usage
 
@@ -112,7 +113,7 @@
 from funasr import AutoModel
 # paraformer-zh is a multi-functional asr model
 # use vad, punc, spk or not as you need
-model = AutoModel(model="paraformer-zh",  vad_model="fsmn-vad",  punc_model="ct-punc-c", 
+model = AutoModel(model="paraformer-zh",  vad_model="fsmn-vad",  punc_model="ct-punc", 
                   # spk_model="cam++", 
                   )
 res = model.generate(input=f"{model.model_path}/example/asr_example.wav", 
@@ -154,7 +155,7 @@
 from funasr import AutoModel
 
 model = AutoModel(model="fsmn-vad")
-wav_file = f"{model.model_path}/example/asr_example.wav"
+wav_file = f"{model.model_path}/example/vad_example.wav"
 res = model.generate(input=wav_file)
 print(res)
 ```
@@ -207,8 +208,8 @@
 res = model.generate(input=(wav_file, text_file), data_type=("sound", "text"))
 print(res)
 ```
-
-More examples ref to [docs](https://github.com/alibaba-damo-academy/FunASR/tree/main/examples/industrial_data_pretraining)
+More usages ref to [docs](docs/tutorial/README_zh.md), 
+more examples ref to [demo](https://github.com/alibaba-damo-academy/FunASR/tree/main/examples/industrial_data_pretraining)
 
 
 ## Export ONNX
@@ -227,7 +228,7 @@
 res = model.export(quantize=False)
 ```
 
-### Text ONNX
+### Test ONNX
 ```python
 # pip3 install -U funasr-onnx
 from funasr_onnx import Paraformer

--
Gitblit v1.9.1