From 2f27b165559cd53afab52047309ebe4ac838ebb8 Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 14 五月 2024 09:54:08 +0800
Subject: [PATCH] Add files via upload
---
README.md | 19 ++++++++++---------
1 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/README.md b/README.md
index 38fd686..8b093bc 100644
--- a/README.md
+++ b/README.md
@@ -14,6 +14,7 @@
| [**News**](https://github.com/alibaba-damo-academy/FunASR#whats-new)
| [**Installation**](#installation)
| [**Quick Start**](#quick-start)
+| [**Tutorial**](https://github.com/alibaba-damo-academy/FunASR/blob/main/docs/tutorial/README.md)
| [**Runtime**](./runtime/readme.md)
| [**Model Zoo**](#model-zoo)
| [**Contact**](#contact)
@@ -67,7 +68,7 @@
```
## Model Zoo
-FunASR has open-sourced a large number of pre-trained models on industrial data. You are free to use, copy, modify, and share FunASR models under the [Model License Agreement](./MODEL_LICENSE). Below are some representative models, for more models please refer to the [Model Zoo]().
+FunASR has open-sourced a large number of pre-trained models on industrial data. You are free to use, copy, modify, and share FunASR models under the [Model License Agreement](./MODEL_LICENSE). Below are some representative models, for more models please refer to the [Model Zoo](./model_zoo).
(Note: 猸� represents the ModelScope model zoo, 馃 represents the Huggingface model zoo, 馃崁 represents the OpenAI model zoo)
@@ -82,8 +83,8 @@
| fsmn-vad <br> ( [猸怾(https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) [馃](https://huggingface.co/funasr/fsmn-vad) ) | voice activity detection | 5000 hours, Mandarin and English | 0.4M |
| fa-zh <br> ( [猸怾(https://modelscope.cn/models/damo/speech_timestamp_prediction-v1-16k-offline/summary) [馃](https://huggingface.co/funasr/fa-zh) ) | timestamp prediction | 5000 hours, Mandarin | 38M |
| cam++ <br> ( [猸怾(https://modelscope.cn/models/iic/speech_campplus_sv_zh-cn_16k-common/summary) [馃](https://huggingface.co/funasr/campplus) ) | speaker verification/diarization | 5000 hours | 7.2M |
-| Whisper-large-v2 <br> ([猸怾(https://www.modelscope.cn/models/iic/speech_whisper-large_asr_multilingual/summary) [馃崁](https://github.com/openai/whisper) ) | speech recognition, with timestamps, non-streaming | multilingual | 1.5G |
-| Whisper-large-v3 <br> ([猸怾(https://www.modelscope.cn/models/iic/Whisper-large-v3/summary) [馃崁](https://github.com/openai/whisper) ) | speech recognition, with timestamps, non-streaming | multilingual | 1.5G |
+| Whisper-large-v2 <br> ([猸怾(https://www.modelscope.cn/models/iic/speech_whisper-large_asr_multilingual/summary) [馃崁](https://github.com/openai/whisper) ) | speech recognition, with timestamps, non-streaming | multilingual | 1550 M |
+| Whisper-large-v3 <br> ([猸怾(https://www.modelscope.cn/models/iic/Whisper-large-v3/summary) [馃崁](https://github.com/openai/whisper) ) | speech recognition, with timestamps, non-streaming | multilingual | 1550 M |
| Qwen-Audio <br> ([猸怾(examples/industrial_data_pretraining/qwen_audio/demo.py) [馃](https://huggingface.co/Qwen/Qwen-Audio) ) | audio-text multimodal models (pretraining) | multilingual | 8B |
| Qwen-Audio-Chat <br> ([猸怾(examples/industrial_data_pretraining/qwen_audio/demo_chat.py) [馃](https://huggingface.co/Qwen/Qwen-Audio-Chat) ) | audio-text multimodal models (chat) | multilingual | 8B |
@@ -97,7 +98,7 @@
<a name="quick-start"></a>
## Quick Start
-Below is a quick start tutorial. Test audio files ([Mandarin](https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/vad_example.wav), [English]()).
+Below is a quick start tutorial. Test audio files ([Mandarin](https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/vad_example.wav), [English](https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_en.wav)).
### Command-line usage
@@ -112,7 +113,7 @@
from funasr import AutoModel
# paraformer-zh is a multi-functional asr model
# use vad, punc, spk or not as you need
-model = AutoModel(model="paraformer-zh", vad_model="fsmn-vad", punc_model="ct-punc-c",
+model = AutoModel(model="paraformer-zh", vad_model="fsmn-vad", punc_model="ct-punc",
# spk_model="cam++",
)
res = model.generate(input=f"{model.model_path}/example/asr_example.wav",
@@ -154,7 +155,7 @@
from funasr import AutoModel
model = AutoModel(model="fsmn-vad")
-wav_file = f"{model.model_path}/example/asr_example.wav"
+wav_file = f"{model.model_path}/example/vad_example.wav"
res = model.generate(input=wav_file)
print(res)
```
@@ -207,8 +208,8 @@
res = model.generate(input=(wav_file, text_file), data_type=("sound", "text"))
print(res)
```
-
-More examples ref to [docs](https://github.com/alibaba-damo-academy/FunASR/tree/main/examples/industrial_data_pretraining)
+More usages ref to [docs](docs/tutorial/README_zh.md),
+more examples ref to [demo](https://github.com/alibaba-damo-academy/FunASR/tree/main/examples/industrial_data_pretraining)
## Export ONNX
@@ -227,7 +228,7 @@
res = model.export(quantize=False)
```
-### Text ONNX
+### Test ONNX
```python
# pip3 install -U funasr-onnx
from funasr_onnx import Paraformer
--
Gitblit v1.9.1