From 39de3adfbc12bc491f6da9eb9ffdc5122a3f623d Mon Sep 17 00:00:00 2001
From: 语帆 <yf352572@alibaba-inc.com>
Date: 星期三, 28 二月 2024 16:39:15 +0800
Subject: [PATCH] test
---
funasr/auto/auto_model.py | 14 ++++++++------
1 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/funasr/auto/auto_model.py b/funasr/auto/auto_model.py
index e5faa2a..ba7dcab 100644
--- a/funasr/auto/auto_model.py
+++ b/funasr/auto/auto_model.py
@@ -23,7 +23,7 @@
from funasr.models.campplus.cluster_backend import ClusterBackend
except:
print("If you want to use the speaker diarization, please `pip install hdbscan`")
-
+import pdb
def prepare_data_iterator(data_in, input_len=None, data_type=None, key=None):
"""
@@ -141,7 +141,7 @@
kwargs = download_model(**kwargs)
set_all_random_seed(kwargs.get("seed", 0))
-
+
device = kwargs.get("device", "cuda")
if not torch.cuda.is_available() or kwargs.get("ngpu", 1) == 0:
device = "cpu"
@@ -161,19 +161,18 @@
vocab_size = len(tokenizer.token_list)
else:
vocab_size = -1
-
# build frontend
frontend = kwargs.get("frontend", None)
+
if frontend is not None:
frontend_class = tables.frontend_classes.get(frontend)
frontend = frontend_class(**kwargs["frontend_conf"])
kwargs["frontend"] = frontend
kwargs["input_size"] = frontend.output_size()
-
+
# build model
model_class = tables.model_classes.get(kwargs["model"])
model = model_class(**kwargs, **kwargs["model_conf"], vocab_size=vocab_size)
-
model.to(device)
# init_param
@@ -215,7 +214,7 @@
# batch_size = 1
key_list, data_list = prepare_data_iterator(input, input_len=input_len, data_type=kwargs.get("data_type", None), key=key)
-
+
speed_stats = {}
asr_result_list = []
num_samples = len(data_list)
@@ -228,15 +227,18 @@
data_batch = data_list[beg_idx:end_idx]
key_batch = key_list[beg_idx:end_idx]
batch = {"data_in": data_batch, "key": key_batch}
+
if (end_idx - beg_idx) == 1 and kwargs.get("data_type", None) == "fbank": # fbank
batch["data_in"] = data_batch[0]
batch["data_lengths"] = input_len
time1 = time.perf_counter()
with torch.no_grad():
+ pdb.set_trace()
results, meta_data = model.inference(**batch, **kwargs)
time2 = time.perf_counter()
+ pdb.set_trace()
asr_result_list.extend(results)
# batch_data_time = time_per_frame_s * data_batch_i["speech_lengths"].sum().item()
--
Gitblit v1.9.1