From d1efd59af963a25314dbbe254d298ed441695ca1 Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期四, 25 一月 2024 14:32:54 +0800
Subject: [PATCH] update docker lists
---
README.md | 22 ++++++++++++----------
1 files changed, 12 insertions(+), 10 deletions(-)
diff --git a/README.md b/README.md
index 0094dc4..1ff7d95 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,7 @@
<a name="whats-new"></a>
## What's new:
+- 2024/01/25: Offline File Transcription Service 4.2, Offline File Transcription Service of English 1.3 released锛宱ptimized the VAD (Voice Activity Detection) data processing method, significantly reducing peak memory usage, memory leak optimization; Real-time Transcription Service 1.7 released锛宱ptimizatized the client-side锛�([docs](runtime/readme.md))
- 2024/01/09: The Funasr SDK for Windows version 2.0 has been released, featuring support for The offline file transcription service (CPU) of Mandarin 4.1, The offline file transcription service (CPU) of English 1.2, The real-time transcription service (CPU) of Mandarin 1.6. For more details, please refer to the official documentation or release notes([FunASR-Runtime-Windows](https://www.modelscope.cn/models/damo/funasr-runtime-win-cpu-x64/summary))
- 2024/01/03: File Transcription Service 4.0 released, Added support for 8k models, optimized timestamp mismatch issues and added sentence-level timestamps, improved the effectiveness of English word FST hotwords, supported automated configuration of thread parameters, and fixed known crash issues as well as memory leak problems, refer to ([docs](runtime/readme.md#file-transcription-service-mandarin-cpu)).
- 2024/01/03: Real-time Transcription Service 1.6 released锛孴he 2pass-offline mode supports Ngram language model decoding and WFST hotwords, while also addressing known crash issues and memory leak problems, ([docs](runtime/readme.md#the-real-time-transcription-service-mandarin-cpu))
@@ -91,12 +92,13 @@
from funasr import AutoModel
# paraformer-zh is a multi-functional asr model
# use vad, punc, spk or not as you need
-model = AutoModel(model="paraformer-zh", model_revision="v2.0.2", \
- vad_model="fsmn-vad", vad_model_revision="v2.0.2", \
- punc_model="ct-punc-c", punc_model_revision="v2.0.2", \
- spk_model="cam++", spk_model_revision="v2.0.2")
+model = AutoModel(model="paraformer-zh", model_revision="v2.0.4",
+ vad_model="fsmn-vad", vad_model_revision="v2.0.4",
+ punc_model="ct-punc-c", punc_model_revision="v2.0.4",
+ # spk_model="cam++", spk_model_revision="v2.0.2",
+ )
res = model.generate(input=f"{model.model_path}/example/asr_example.wav",
- batch_size=64,
+ batch_size_s=300,
hotword='榄旀惌')
print(res)
```
@@ -110,7 +112,7 @@
encoder_chunk_look_back = 4 #number of chunks to lookback for encoder self-attention
decoder_chunk_look_back = 1 #number of encoder chunks to lookback for decoder cross-attention
-model = AutoModel(model="paraformer-zh-streaming", model_revision="v2.0.2")
+model = AutoModel(model="paraformer-zh-streaming", model_revision="v2.0.4")
import soundfile
import os
@@ -133,7 +135,7 @@
```python
from funasr import AutoModel
-model = AutoModel(model="fsmn-vad", model_revision="v2.0.2")
+model = AutoModel(model="fsmn-vad", model_revision="v2.0.4")
wav_file = f"{model.model_path}/example/asr_example.wav"
res = model.generate(input=wav_file)
print(res)
@@ -143,7 +145,7 @@
from funasr import AutoModel
chunk_size = 200 # ms
-model = AutoModel(model="fsmn-vad", model_revision="v2.0.2")
+model = AutoModel(model="fsmn-vad", model_revision="v2.0.4")
import soundfile
@@ -164,7 +166,7 @@
```python
from funasr import AutoModel
-model = AutoModel(model="ct-punc", model_revision="v2.0.2")
+model = AutoModel(model="ct-punc", model_revision="v2.0.4")
res = model.generate(input="閭d粖澶╃殑浼氬氨鍒拌繖閲屽惂 happy new year 鏄庡勾瑙�")
print(res)
```
@@ -172,7 +174,7 @@
```python
from funasr import AutoModel
-model = AutoModel(model="fa-zh", model_revision="v2.0.2")
+model = AutoModel(model="fa-zh", model_revision="v2.0.4")
wav_file = f"{model.model_path}/example/asr_example.wav"
text_file = f"{model.model_path}/example/text.txt"
res = model.generate(input=(wav_file, text_file), data_type=("sound", "text"))
--
Gitblit v1.9.1