From f577bb5e72b0a8ce4b7c947e0661e15deb4078ea Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 30 五月 2024 14:55:32 +0800
Subject: [PATCH] docs

---
 setup.py                              |    2 
 funasr/download/name_maps_from_hub.py |   22 ++++++++++
 README_zh.md                          |   21 +++++++++-
 README.md                             |   17 ++++++++
 4 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 81fb1b3..3bbbf2a 100644
--- a/README.md
+++ b/README.md
@@ -157,6 +157,8 @@
 ```
 Note: `chunk_size` is the configuration for streaming latency.` [0,10,5]` indicates that the real-time display granularity is `10*60=600ms`, and the lookahead information is `5*60=300ms`. Each inference input is `600ms` (sample points are `16000*0.6=960`), and the output is the corresponding text. For the last speech segment input, `is_final=True` needs to be set to force the output of the last word.
 
+<details><summary>More Examples</summary>
+
 ### Voice Activity Detection (Non-Streaming)
 ```python
 from funasr import AutoModel
@@ -215,9 +217,24 @@
 res = model.generate(input=(wav_file, text_file), data_type=("sound", "text"))
 print(res)
 ```
+
+
+### Speech Emotion Recognition
+```python
+from funasr import AutoModel
+
+model = AutoModel(model="emotion2vec_plus_large")
+
+wav_file = f"{model.model_path}/example/test.wav"
+
+res = model.generate(wav_file, output_dir="./outputs", granularity="utterance", extract_embedding=False)
+print(res)
+```
+
 More usages ref to [docs](docs/tutorial/README_zh.md), 
 more examples ref to [demo](https://github.com/alibaba-damo-academy/FunASR/tree/main/examples/industrial_data_pretraining)
 
+</details>
 
 ## Export ONNX
 
diff --git a/README_zh.md b/README_zh.md
index b6e2566..0428db3 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -68,10 +68,10 @@
 git clone https://github.com/alibaba/FunASR.git && cd FunASR
 pip3 install -e ./
 ```
-濡傛灉闇�瑕佷娇鐢ㄥ伐涓氶璁粌妯″瀷锛屽畨瑁卪odelscope锛堝彲閫夛級
+濡傛灉闇�瑕佷娇鐢ㄥ伐涓氶璁粌妯″瀷锛屽畨瑁卪odelscope涓巋uggingface_hub锛堝彲閫夛級
 
 ```shell
-pip3 install -U modelscope
+pip3 install -U modelscope huggingface_hub
 ```
 
 ## 妯″瀷浠撳簱
@@ -153,6 +153,8 @@
 
 娉細`chunk_size`涓烘祦寮忓欢鏃堕厤缃紝`[0,10,5]`琛ㄧず涓婂睆瀹炴椂鍑哄瓧绮掑害涓篳10*60=600ms`锛屾湭鏉ヤ俊鎭负`5*60=300ms`銆傛瘡娆℃帹鐞嗚緭鍏ヤ负`600ms`锛堥噰鏍风偣鏁颁负`16000*0.6=960`锛夛紝杈撳嚭涓哄搴旀枃瀛楋紝鏈�鍚庝竴涓闊崇墖娈佃緭鍏ラ渶瑕佽缃甡is_final=True`鏉ュ己鍒惰緭鍑烘渶鍚庝竴涓瓧銆�
 
+<details><summary>鏇村渚嬪瓙</summary>
+
 ### 璇煶绔偣妫�娴嬶紙闈炲疄鏃讹級
 ```python
 from funasr import AutoModel
@@ -216,9 +218,24 @@
 res = model.generate(input=(wav_file, text_file), data_type=("sound", "text"))
 print(res)
 ```
+
+### 鎯呮劅璇嗗埆
+```python
+from funasr import AutoModel
+
+model = AutoModel(model="emotion2vec_plus_large")
+
+wav_file = f"{model.model_path}/example/test.wav"
+
+res = model.generate(wav_file, output_dir="./outputs", granularity="utterance", extract_embedding=False)
+print(res)
+```
+
 鏇磋缁嗭紙[鏁欑▼鏂囨。](docs/tutorial/README_zh.md)锛夛紝
 鏇村锛圼妯″瀷绀轰緥](https://github.com/alibaba-damo-academy/FunASR/tree/main/examples/industrial_data_pretraining)锛�
 
+</details>
+
 ## 瀵煎嚭ONNX
 ### 浠庡懡浠よ瀵煎嚭
 ```shell
diff --git a/funasr/download/name_maps_from_hub.py b/funasr/download/name_maps_from_hub.py
index 3bb25a7..54ec61f 100644
--- a/funasr/download/name_maps_from_hub.py
+++ b/funasr/download/name_maps_from_hub.py
@@ -12,10 +12,30 @@
     "Whisper-large-v2": "iic/speech_whisper-large_asr_multilingual",
     "Whisper-large-v3": "iic/Whisper-large-v3",
     "Qwen-Audio": "Qwen/Qwen-Audio",
+    "emotion2vec_plus_large": "iic/emotion2vec_plus_large",
+    "emotion2vec_plus_base": "iic/emotion2vec_plus_base",
+    "emotion2vec_plus_seed": "iic/emotion2vec_plus_seed",
 }
 
 name_maps_hf = {
-    "": "",
+    "paraformer": "funasr/paraformer-zh",
+    "paraformer-zh": "funasr/paraformer-zh",
+    "paraformer-en": "funasr/paraformer-zh",
+    "paraformer-zh-streaming": "funasr/paraformer-zh-streaming",
+    "fsmn-vad": "funasr/fsmn-vad",
+    "ct-punc": "funasr/ct-punc",
+    "ct-punc-c": "iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
+    "fa-zh": "funasr/fa-zh",
+    "cam++": "funasr/campplus",
+    "Whisper-large-v2": "iic/speech_whisper-large_asr_multilingual",
+    "Whisper-large-v3": "iic/Whisper-large-v3",
+    "Qwen-Audio": "Qwen/Qwen-Audio",
+    "emotion2vec_plus_large": "emotion2vec/emotion2vec_plus_large",
+    "iic/emotion2vec_plus_large": "emotion2vec/emotion2vec_plus_large",
+    "emotion2vec_plus_base": "emotion2vec/emotion2vec_plus_base",
+    "iic/emotion2vec_plus_base": "emotion2vec/emotion2vec_plus_base",
+    "emotion2vec_plus_seed": "emotion2vec/emotion2vec_plus_seed",
+    "iic/emotion2vec_plus_seed": "emotion2vec/emotion2vec_plus_seed",
 }
 
 name_maps_openai = {
diff --git a/setup.py b/setup.py
index dffe594..3b40f03 100644
--- a/setup.py
+++ b/setup.py
@@ -39,7 +39,7 @@
         "jaconv",
         "hydra-core>=1.3.2",
         "tensorboardX",
-        "rotary_embedding_torch",
+        # "rotary_embedding_torch",
         "openai-whisper",
     ],
     # train: The modules invoked when training only.

--
Gitblit v1.9.1