From 1159adbca076fa1a33bf4292ec5043e536285c5c Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 22 一月 2024 11:55:36 +0800
Subject: [PATCH] funasr1.0 update (#1278)
---
funasr/models/paraformer/model.py | 2 +-
examples/industrial_data_pretraining/paraformer-zh-spk/demo.py | 2 +-
examples/industrial_data_pretraining/paraformer-zh-spk/infer.sh | 2 +-
examples/industrial_data_pretraining/paraformer/demo.py | 7 ++++++-
funasr/auto/auto_model.py | 2 +-
examples/industrial_data_pretraining/bicif_paraformer/demo.py | 2 +-
README_zh.md | 2 +-
examples/industrial_data_pretraining/bicif_paraformer/infer.sh | 2 +-
examples/industrial_data_pretraining/seaco_paraformer/infer.sh | 2 +-
examples/industrial_data_pretraining/seaco_paraformer/demo.py | 2 +-
README.md | 2 +-
11 files changed, 16 insertions(+), 11 deletions(-)
diff --git a/README.md b/README.md
index c9b9e89..7500dd4 100644
--- a/README.md
+++ b/README.md
@@ -93,7 +93,7 @@
# use vad, punc, spk or not as you need
model = AutoModel(model="paraformer-zh", model_revision="v2.0.2",
vad_model="fsmn-vad", vad_model_revision="v2.0.2",
- punc_model="ct-punc-c", punc_model_revision="v2.0.2",
+ punc_model="ct-punc-c", punc_model_revision="v2.0.3",
# spk_model="cam++", spk_model_revision="v2.0.2",
)
res = model.generate(input=f"{model.model_path}/example/asr_example.wav",
diff --git a/README_zh.md b/README_zh.md
index 9cd1897..b19e7c2 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -89,7 +89,7 @@
# use vad, punc, spk or not as you need
model = AutoModel(model="paraformer-zh", model_revision="v2.0.2",
vad_model="fsmn-vad", vad_model_revision="v2.0.2",
- punc_model="ct-punc-c", punc_model_revision="v2.0.2",
+ punc_model="ct-punc-c", punc_model_revision="v2.0.3",
# spk_model="cam++", spk_model_revision="v2.0.2",
)
res = model.generate(input=f"{model.model_path}/example/asr_example.wav",
diff --git a/examples/industrial_data_pretraining/bicif_paraformer/demo.py b/examples/industrial_data_pretraining/bicif_paraformer/demo.py
index a06b308..f1b1496 100644
--- a/examples/industrial_data_pretraining/bicif_paraformer/demo.py
+++ b/examples/industrial_data_pretraining/bicif_paraformer/demo.py
@@ -10,7 +10,7 @@
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
vad_model_revision="v2.0.2",
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
- punc_model_revision="v2.0.2",
+ punc_model_revision="v2.0.3",
spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
spk_model_revision="v2.0.2",
)
diff --git a/examples/industrial_data_pretraining/bicif_paraformer/infer.sh b/examples/industrial_data_pretraining/bicif_paraformer/infer.sh
index 09e1c83..55efdf2 100644
--- a/examples/industrial_data_pretraining/bicif_paraformer/infer.sh
+++ b/examples/industrial_data_pretraining/bicif_paraformer/infer.sh
@@ -4,7 +4,7 @@
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
vad_model_revision="v2.0.2"
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
-punc_model_revision="v2.0.2"
+punc_model_revision="v2.0.3"
spk_model="damo/speech_campplus_sv_zh-cn_16k-common"
spk_model_revision="v2.0.2"
diff --git a/examples/industrial_data_pretraining/paraformer-zh-spk/demo.py b/examples/industrial_data_pretraining/paraformer-zh-spk/demo.py
index b4453e9..e17a831 100644
--- a/examples/industrial_data_pretraining/paraformer-zh-spk/demo.py
+++ b/examples/industrial_data_pretraining/paraformer-zh-spk/demo.py
@@ -10,7 +10,7 @@
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
vad_model_revision="v2.0.2",
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
- punc_model_revision="v2.0.2",
+ punc_model_revision="v2.0.3",
spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
spk_model_revision="v2.0.2"
)
diff --git a/examples/industrial_data_pretraining/paraformer-zh-spk/infer.sh b/examples/industrial_data_pretraining/paraformer-zh-spk/infer.sh
index 98a325d..b8610cb 100644
--- a/examples/industrial_data_pretraining/paraformer-zh-spk/infer.sh
+++ b/examples/industrial_data_pretraining/paraformer-zh-spk/infer.sh
@@ -4,7 +4,7 @@
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
vad_model_revision="v2.0.2"
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
-punc_model_revision="v2.0.2"
+punc_model_revision="v2.0.3"
spk_model="damo/speech_campplus_sv_zh-cn_16k-common"
spk_model_revision="v2.0.2"
diff --git a/examples/industrial_data_pretraining/paraformer/demo.py b/examples/industrial_data_pretraining/paraformer/demo.py
index 78af3aa..724191d 100644
--- a/examples/industrial_data_pretraining/paraformer/demo.py
+++ b/examples/industrial_data_pretraining/paraformer/demo.py
@@ -5,7 +5,12 @@
from funasr import AutoModel
-model = AutoModel(model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", model_revision="v2.0.2")
+model = AutoModel(model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", model_revision="v2.0.3",
+ # vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
+ # vad_model_revision="v2.0.2",
+ # punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
+ # punc_model_revision="v2.0.3",
+ )
res = model.generate(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav")
print(res)
diff --git a/examples/industrial_data_pretraining/seaco_paraformer/demo.py b/examples/industrial_data_pretraining/seaco_paraformer/demo.py
index 19ad1c9..a202956 100644
--- a/examples/industrial_data_pretraining/seaco_paraformer/demo.py
+++ b/examples/industrial_data_pretraining/seaco_paraformer/demo.py
@@ -10,7 +10,7 @@
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
vad_model_revision="v2.0.2",
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
- punc_model_revision="v2.0.2",
+ punc_model_revision="v2.0.3",
spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
spk_model_revision="v2.0.2",
)
diff --git a/examples/industrial_data_pretraining/seaco_paraformer/infer.sh b/examples/industrial_data_pretraining/seaco_paraformer/infer.sh
index 61029e1..f335684 100644
--- a/examples/industrial_data_pretraining/seaco_paraformer/infer.sh
+++ b/examples/industrial_data_pretraining/seaco_paraformer/infer.sh
@@ -4,7 +4,7 @@
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
vad_model_revision="v2.0.2"
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
-punc_model_revision="v2.0.2"
+punc_model_revision="v2.0.3"
python funasr/bin/inference.py \
+model=${model} \
diff --git a/funasr/auto/auto_model.py b/funasr/auto/auto_model.py
index 0538f66..ca6189d 100644
--- a/funasr/auto/auto_model.py
+++ b/funasr/auto/auto_model.py
@@ -391,7 +391,7 @@
if self.punc_model is not None:
self.punc_kwargs.update(cfg)
punc_res = self.inference(result["text"], model=self.punc_model, kwargs=self.punc_kwargs, **cfg)
- result["text_with_punc"] = punc_res[0]["text"]
+ result["text"] = punc_res[0]["text"]
# speaker embedding cluster after resorted
if self.spk_model is not None:
diff --git a/funasr/models/paraformer/model.py b/funasr/models/paraformer/model.py
index 468d23f..0c4f14a 100644
--- a/funasr/models/paraformer/model.py
+++ b/funasr/models/paraformer/model.py
@@ -451,7 +451,7 @@
self.nbest = kwargs.get("nbest", 1)
meta_data = {}
- if isinstance(data_in, torch.Tensor): # fbank
+ if isinstance(data_in, torch.Tensor) and kwargs.get("data_type", "sound") == "fbank": # fbank
speech, speech_lengths = data_in, data_lengths
if len(speech.shape) < 3:
speech = speech[None, :, :]
--
Gitblit v1.9.1