From 5130d2406df1aa567d13eec49eea8f9e392c6790 Mon Sep 17 00:00:00 2001
From: 语帆 <yf352572@alibaba-inc.com>
Date: 星期五, 23 二月 2024 17:01:41 +0800
Subject: [PATCH] test
---
funasr/models/lcbnet/model.py | 7 ++++---
1 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/funasr/models/lcbnet/model.py b/funasr/models/lcbnet/model.py
index 563ff26..54fba1c 100644
--- a/funasr/models/lcbnet/model.py
+++ b/funasr/models/lcbnet/model.py
@@ -89,8 +89,8 @@
text_encoder = text_encoder_class(input_size=vocab_size, **text_encoder_conf)
fusion_encoder_class = tables.encoder_classes.get(fusion_encoder)
fusion_encoder = fusion_encoder_class(**fusion_encoder_conf)
- bias_predictor_class = tables.encoder_classes.get_class(bias_predictor)
- bias_predictor = bias_predictor_class(bias_predictor_conf)
+ bias_predictor_class = tables.encoder_classes.get(bias_predictor)
+ bias_predictor = bias_predictor_class(**bias_predictor_conf)
if decoder is not None:
decoder_class = tables.decoder_classes.get(decoder)
@@ -414,7 +414,7 @@
self.init_beam_search(**kwargs)
self.nbest = kwargs.get("nbest", 1)
pdb.set_trace()
-
+
meta_data = {}
if isinstance(data_in, torch.Tensor) and kwargs.get("data_type", "sound") == "fbank": # fbank
speech, speech_lengths = data_in, data_lengths
@@ -425,6 +425,7 @@
else:
# extract fbank feats
time1 = time.perf_counter()
+ pdb.set_trace()
audio_sample_list = load_audio_text_image_video(data_in, fs=frontend.fs, audio_fs=kwargs.get("fs", 16000),
data_type=kwargs.get("data_type", "sound"),
tokenizer=tokenizer)
--
Gitblit v1.9.1