From d29f201e3201bde6a984e436888a2aae877e449f Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 19 三月 2024 12:04:50 +0800
Subject: [PATCH] vad conf

---
 examples/industrial_data_pretraining/emotion2vec/demo.py         |    2 +-
 examples/industrial_data_pretraining/paraformer/demo.py          |    2 +-
 funasr/auto/auto_model.py                                        |    3 ++-
 examples/industrial_data_pretraining/whisper/demo.py             |    2 +-
 examples/industrial_data_pretraining/whisper/demo_from_openai.py |    2 +-
 5 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/examples/industrial_data_pretraining/emotion2vec/demo.py b/examples/industrial_data_pretraining/emotion2vec/demo.py
index b274bd9..940b9f9 100644
--- a/examples/industrial_data_pretraining/emotion2vec/demo.py
+++ b/examples/industrial_data_pretraining/emotion2vec/demo.py
@@ -9,7 +9,7 @@
 model = AutoModel(model="iic/emotion2vec_base_finetuned", model_revision="v2.0.4",
                   # vad_model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",
                   # vad_model_revision="v2.0.4",
-                  # vad_kwargs={"max_single_segment_time": 10},
+                  # vad_kwargs={"max_single_segment_time": 1000},
                   )
 
 wav_file = f"{model.model_path}/example/test.wav"
diff --git a/examples/industrial_data_pretraining/paraformer/demo.py b/examples/industrial_data_pretraining/paraformer/demo.py
index 499791f..a39a526 100644
--- a/examples/industrial_data_pretraining/paraformer/demo.py
+++ b/examples/industrial_data_pretraining/paraformer/demo.py
@@ -9,7 +9,7 @@
                   model_revision="v2.0.4",
                   vad_model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",
                   vad_model_revision="v2.0.4",
-                  vad_kwargs={"max_single_segment_time": 60},
+                  vad_kwargs={"max_single_segment_time": 60000},
                   punc_model="iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
                   punc_model_revision="v2.0.4",
                   # spk_model="iic/speech_campplus_sv_zh-cn_16k-common",
diff --git a/examples/industrial_data_pretraining/whisper/demo.py b/examples/industrial_data_pretraining/whisper/demo.py
index a073f68..e1e1aad 100644
--- a/examples/industrial_data_pretraining/whisper/demo.py
+++ b/examples/industrial_data_pretraining/whisper/demo.py
@@ -10,7 +10,7 @@
 model = AutoModel(model="iic/Whisper-large-v3",
                   model_revision="v2.0.5",
                   vad_model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",
-				  vad_kwargs={"max_single_segment_time": 30},
+				  vad_kwargs={"max_single_segment_time": 30000},
                   )
 
 res = model.generate(
diff --git a/examples/industrial_data_pretraining/whisper/demo_from_openai.py b/examples/industrial_data_pretraining/whisper/demo_from_openai.py
index 9a22764..a257bc8 100644
--- a/examples/industrial_data_pretraining/whisper/demo_from_openai.py
+++ b/examples/industrial_data_pretraining/whisper/demo_from_openai.py
@@ -12,7 +12,7 @@
 # model = AutoModel(model="Whisper-large-v2", hub="openai")
 model = AutoModel(model="Whisper-large-v3",
                   vad_model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",
-                  vad_kwargs={"max_single_segment_time": 30},
+                  vad_kwargs={"max_single_segment_time": 30000},
 				  hub="openai",
                   )
 
diff --git a/funasr/auto/auto_model.py b/funasr/auto/auto_model.py
index bba44e7..bd786d0 100644
--- a/funasr/auto/auto_model.py
+++ b/funasr/auto/auto_model.py
@@ -312,7 +312,8 @@
             key = res[i]["key"]
             vadsegments = res[i]["value"]
             input_i = data_list[i]
-            speech = load_audio_text_image_video(input_i, fs=kwargs["frontend"].fs, audio_fs=kwargs.get("fs", 16000))
+            fs = kwargs["frontend"].fs if hasattr(kwargs["frontend"], "fs") else 16000
+            speech = load_audio_text_image_video(input_i, fs=fs, audio_fs=kwargs.get("fs", 16000))
             speech_lengths = len(speech)
             n = len(vadsegments)
             data_with_index = [(vadsegments[i], i) for i in range(n)]

--
Gitblit v1.9.1