From f577bb5e72b0a8ce4b7c947e0661e15deb4078ea Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 30 五月 2024 14:55:32 +0800
Subject: [PATCH] docs
---
setup.py | 2
funasr/download/name_maps_from_hub.py | 22 ++++++++++
README_zh.md | 21 +++++++++-
README.md | 17 ++++++++
4 files changed, 58 insertions(+), 4 deletions(-)
diff --git a/README.md b/README.md
index 81fb1b3..3bbbf2a 100644
--- a/README.md
+++ b/README.md
@@ -157,6 +157,8 @@
```
Note: `chunk_size` is the configuration for streaming latency.` [0,10,5]` indicates that the real-time display granularity is `10*60=600ms`, and the lookahead information is `5*60=300ms`. Each inference input is `600ms` (sample points are `16000*0.6=960`), and the output is the corresponding text. For the last speech segment input, `is_final=True` needs to be set to force the output of the last word.
+<details><summary>More Examples</summary>
+
### Voice Activity Detection (Non-Streaming)
```python
from funasr import AutoModel
@@ -215,9 +217,24 @@
res = model.generate(input=(wav_file, text_file), data_type=("sound", "text"))
print(res)
```
+
+
+### Speech Emotion Recognition
+```python
+from funasr import AutoModel
+
+model = AutoModel(model="emotion2vec_plus_large")
+
+wav_file = f"{model.model_path}/example/test.wav"
+
+res = model.generate(wav_file, output_dir="./outputs", granularity="utterance", extract_embedding=False)
+print(res)
+```
+
More usages ref to [docs](docs/tutorial/README_zh.md),
more examples ref to [demo](https://github.com/alibaba-damo-academy/FunASR/tree/main/examples/industrial_data_pretraining)
+</details>
## Export ONNX
diff --git a/README_zh.md b/README_zh.md
index b6e2566..0428db3 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -68,10 +68,10 @@
git clone https://github.com/alibaba/FunASR.git && cd FunASR
pip3 install -e ./
```
-濡傛灉闇�瑕佷娇鐢ㄥ伐涓氶璁粌妯″瀷锛屽畨瑁卪odelscope锛堝彲閫夛級
+濡傛灉闇�瑕佷娇鐢ㄥ伐涓氶璁粌妯″瀷锛屽畨瑁卪odelscope涓巋uggingface_hub锛堝彲閫夛級
```shell
-pip3 install -U modelscope
+pip3 install -U modelscope huggingface_hub
```
## 妯″瀷浠撳簱
@@ -153,6 +153,8 @@
娉細`chunk_size`涓烘祦寮忓欢鏃堕厤缃紝`[0,10,5]`琛ㄧず涓婂睆瀹炴椂鍑哄瓧绮掑害涓篳10*60=600ms`锛屾湭鏉ヤ俊鎭负`5*60=300ms`銆傛瘡娆℃帹鐞嗚緭鍏ヤ负`600ms`锛堥噰鏍风偣鏁颁负`16000*0.6=960`锛夛紝杈撳嚭涓哄搴旀枃瀛楋紝鏈�鍚庝竴涓闊崇墖娈佃緭鍏ラ渶瑕佽缃甡is_final=True`鏉ュ己鍒惰緭鍑烘渶鍚庝竴涓瓧銆�
+<details><summary>鏇村渚嬪瓙</summary>
+
### 璇煶绔偣妫�娴嬶紙闈炲疄鏃讹級
```python
from funasr import AutoModel
@@ -216,9 +218,24 @@
res = model.generate(input=(wav_file, text_file), data_type=("sound", "text"))
print(res)
```
+
+### 鎯呮劅璇嗗埆
+```python
+from funasr import AutoModel
+
+model = AutoModel(model="emotion2vec_plus_large")
+
+wav_file = f"{model.model_path}/example/test.wav"
+
+res = model.generate(wav_file, output_dir="./outputs", granularity="utterance", extract_embedding=False)
+print(res)
+```
+
鏇磋缁嗭紙[鏁欑▼鏂囨。](docs/tutorial/README_zh.md)锛夛紝
鏇村锛圼妯″瀷绀轰緥](https://github.com/alibaba-damo-academy/FunASR/tree/main/examples/industrial_data_pretraining)锛�
+</details>
+
## 瀵煎嚭ONNX
### 浠庡懡浠よ瀵煎嚭
```shell
diff --git a/funasr/download/name_maps_from_hub.py b/funasr/download/name_maps_from_hub.py
index 3bb25a7..54ec61f 100644
--- a/funasr/download/name_maps_from_hub.py
+++ b/funasr/download/name_maps_from_hub.py
@@ -12,10 +12,30 @@
"Whisper-large-v2": "iic/speech_whisper-large_asr_multilingual",
"Whisper-large-v3": "iic/Whisper-large-v3",
"Qwen-Audio": "Qwen/Qwen-Audio",
+ "emotion2vec_plus_large": "iic/emotion2vec_plus_large",
+ "emotion2vec_plus_base": "iic/emotion2vec_plus_base",
+ "emotion2vec_plus_seed": "iic/emotion2vec_plus_seed",
}
name_maps_hf = {
- "": "",
+ "paraformer": "funasr/paraformer-zh",
+ "paraformer-zh": "funasr/paraformer-zh",
+ "paraformer-en": "funasr/paraformer-zh",
+ "paraformer-zh-streaming": "funasr/paraformer-zh-streaming",
+ "fsmn-vad": "funasr/fsmn-vad",
+ "ct-punc": "funasr/ct-punc",
+ "ct-punc-c": "iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
+ "fa-zh": "funasr/fa-zh",
+ "cam++": "funasr/campplus",
+ "Whisper-large-v2": "iic/speech_whisper-large_asr_multilingual",
+ "Whisper-large-v3": "iic/Whisper-large-v3",
+ "Qwen-Audio": "Qwen/Qwen-Audio",
+ "emotion2vec_plus_large": "emotion2vec/emotion2vec_plus_large",
+ "iic/emotion2vec_plus_large": "emotion2vec/emotion2vec_plus_large",
+ "emotion2vec_plus_base": "emotion2vec/emotion2vec_plus_base",
+ "iic/emotion2vec_plus_base": "emotion2vec/emotion2vec_plus_base",
+ "emotion2vec_plus_seed": "emotion2vec/emotion2vec_plus_seed",
+ "iic/emotion2vec_plus_seed": "emotion2vec/emotion2vec_plus_seed",
}
name_maps_openai = {
diff --git a/setup.py b/setup.py
index dffe594..3b40f03 100644
--- a/setup.py
+++ b/setup.py
@@ -39,7 +39,7 @@
"jaconv",
"hydra-core>=1.3.2",
"tensorboardX",
- "rotary_embedding_torch",
+ # "rotary_embedding_torch",
"openai-whisper",
],
# train: The modules invoked when training only.
--
Gitblit v1.9.1