From 9377eed41e5ed37cede49c7c8dc8225050b884f6 Mon Sep 17 00:00:00 2001
From: 维石 <shixian.shi@alibaba-inc.com>
Date: 星期一, 17 六月 2024 20:20:00 +0800
Subject: [PATCH] update code
---
funasr/models/llm_asr_nar/model.py | 2 --
funasr/models/llm_asr/model.py | 2 --
runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py | 5 ++---
funasr/models/mfcca/mfcca_encoder.py | 2 --
funasr/datasets/llm_datasets_qwenaudio/datasets.py | 2 --
funasr/models/paraformer_streaming/model.py | 4 ----
funasr/models/lcbnet/model.py | 2 --
funasr/models/transformer/model.py | 2 --
funasr/models/bicif_paraformer/cif_predictor.py | 2 +-
funasr/models/contextual_paraformer/decoder.py | 1 -
funasr/frontends/default.py | 1 -
funasr/datasets/sense_voice_datasets/datasets.py | 2 --
funasr/models/sense_voice/model.py | 8 --------
funasr/datasets/llm_datasets/datasets.py | 2 --
funasr/models/whisper_lid/model.py | 2 --
funasr/models/whisper/model.py | 2 --
funasr/datasets/llm_datasets_vicuna/datasets.py | 2 --
17 files changed, 3 insertions(+), 40 deletions(-)
diff --git a/funasr/datasets/llm_datasets/datasets.py b/funasr/datasets/llm_datasets/datasets.py
index b660554..61caded 100644
--- a/funasr/datasets/llm_datasets/datasets.py
+++ b/funasr/datasets/llm_datasets/datasets.py
@@ -64,8 +64,6 @@
def __getitem__(self, index):
item = self.index_ds[index]
- # import pdb;
- # pdb.set_trace()
source = item["source"]
data_src = load_audio_text_image_video(source, fs=self.fs)
if self.preprocessor_speech:
diff --git a/funasr/datasets/llm_datasets_qwenaudio/datasets.py b/funasr/datasets/llm_datasets_qwenaudio/datasets.py
index b56e624..569665a 100644
--- a/funasr/datasets/llm_datasets_qwenaudio/datasets.py
+++ b/funasr/datasets/llm_datasets_qwenaudio/datasets.py
@@ -66,8 +66,6 @@
def __getitem__(self, index):
item = self.index_ds[index]
- # import pdb;
- # pdb.set_trace()
source = item["source"]
data_src = load_audio_text_image_video(source, fs=self.fs)
if self.preprocessor_speech:
diff --git a/funasr/datasets/llm_datasets_vicuna/datasets.py b/funasr/datasets/llm_datasets_vicuna/datasets.py
index 04fa514..cde29a9 100644
--- a/funasr/datasets/llm_datasets_vicuna/datasets.py
+++ b/funasr/datasets/llm_datasets_vicuna/datasets.py
@@ -66,8 +66,6 @@
def __getitem__(self, index):
item = self.index_ds[index]
- # import pdb;
- # pdb.set_trace()
source = item["source"]
data_src = load_audio_text_image_video(source, fs=self.fs)
if self.preprocessor_speech:
diff --git a/funasr/datasets/sense_voice_datasets/datasets.py b/funasr/datasets/sense_voice_datasets/datasets.py
index 690a1c5..0117182 100644
--- a/funasr/datasets/sense_voice_datasets/datasets.py
+++ b/funasr/datasets/sense_voice_datasets/datasets.py
@@ -71,8 +71,6 @@
return len(self.index_ds)
def __getitem__(self, index):
- # import pdb;
- # pdb.set_trace()
output = None
for idx in range(self.retry):
diff --git a/funasr/frontends/default.py b/funasr/frontends/default.py
index 462da86..68bd6fb 100644
--- a/funasr/frontends/default.py
+++ b/funasr/frontends/default.py
@@ -235,7 +235,6 @@
self, input: torch.Tensor, input_lengths: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor]:
# 1. Domain-conversion: e.g. Stft: time -> time-freq
- # import pdb;pdb.set_trace()
if self.stft is not None:
input_stft, feats_lens = self._compute_stft(input, input_lengths)
else:
diff --git a/funasr/models/bicif_paraformer/cif_predictor.py b/funasr/models/bicif_paraformer/cif_predictor.py
index 3739c76..ca98cdc 100644
--- a/funasr/models/bicif_paraformer/cif_predictor.py
+++ b/funasr/models/bicif_paraformer/cif_predictor.py
@@ -198,7 +198,7 @@
output2 = self.upsample_cnn(_output)
output2 = output2.transpose(1, 2)
output2, _ = self.self_attn(output2, mask)
- # import pdb; pdb.set_trace()
+
alphas2 = torch.sigmoid(self.cif_output2(output2))
alphas2 = torch.nn.functional.relu(alphas2 * self.smooth_factor2 - self.noise_threshold2)
# repeat the mask in T demension to match the upsampled length
diff --git a/funasr/models/contextual_paraformer/decoder.py b/funasr/models/contextual_paraformer/decoder.py
index 0b30c99..ba2ce9a 100644
--- a/funasr/models/contextual_paraformer/decoder.py
+++ b/funasr/models/contextual_paraformer/decoder.py
@@ -424,7 +424,6 @@
# contextual_mask = myutils.sequence_mask(contextual_length, device=memory.device)[:, None, :]
contextual_mask = self.make_pad_mask(contextual_length)
contextual_mask, _ = self.prepare_mask(contextual_mask)
- # import pdb; pdb.set_trace()
contextual_mask = contextual_mask.transpose(2, 1).unsqueeze(1)
cx, tgt_mask, _, _, _ = self.bias_decoder(
x_self_attn, tgt_mask, bias_embed, memory_mask=contextual_mask
diff --git a/funasr/models/lcbnet/model.py b/funasr/models/lcbnet/model.py
index d3df25a..7b2038e 100644
--- a/funasr/models/lcbnet/model.py
+++ b/funasr/models/lcbnet/model.py
@@ -23,8 +23,6 @@
from funasr.utils.datadir_writer import DatadirWriter
from funasr.register import tables
-import pdb
-
@tables.register("model_classes", "LCBNet")
class LCBNet(nn.Module):
diff --git a/funasr/models/llm_asr/model.py b/funasr/models/llm_asr/model.py
index 4345f69..399dfe7 100644
--- a/funasr/models/llm_asr/model.py
+++ b/funasr/models/llm_asr/model.py
@@ -164,8 +164,6 @@
text: (Batch, Length)
text_lengths: (Batch,)
"""
- # import pdb;
- # pdb.set_trace()
if len(text_lengths.size()) > 1:
text_lengths = text_lengths[:, 0]
if len(speech_lengths.size()) > 1:
diff --git a/funasr/models/llm_asr_nar/model.py b/funasr/models/llm_asr_nar/model.py
index 8c0c3ff..192c199 100644
--- a/funasr/models/llm_asr_nar/model.py
+++ b/funasr/models/llm_asr_nar/model.py
@@ -166,8 +166,6 @@
text: (Batch, Length)
text_lengths: (Batch,)
"""
- # import pdb;
- # pdb.set_trace()
if len(text_lengths.size()) > 1:
text_lengths = text_lengths[:, 0]
if len(speech_lengths.size()) > 1:
diff --git a/funasr/models/mfcca/mfcca_encoder.py b/funasr/models/mfcca/mfcca_encoder.py
index 19a6df9..a0bb58e 100644
--- a/funasr/models/mfcca/mfcca_encoder.py
+++ b/funasr/models/mfcca/mfcca_encoder.py
@@ -34,7 +34,6 @@
from funasr.models.transformer.utils.subsampling import TooShortUttError
from funasr.models.transformer.utils.subsampling import check_short_utt
from funasr.models.encoder.abs_encoder import AbsEncoder
-import pdb
import math
@@ -363,7 +362,6 @@
t_leng = xs_pad.size(1)
d_dim = xs_pad.size(2)
xs_pad = xs_pad.reshape(-1, channel_size, t_leng, d_dim)
- # pdb.set_trace()
if channel_size < 8:
repeat_num = math.ceil(8 / channel_size)
xs_pad = xs_pad.repeat(1, repeat_num, 1, 1)[:, 0:8, :, :]
diff --git a/funasr/models/paraformer_streaming/model.py b/funasr/models/paraformer_streaming/model.py
index f287614..16021ce 100644
--- a/funasr/models/paraformer_streaming/model.py
+++ b/funasr/models/paraformer_streaming/model.py
@@ -50,8 +50,6 @@
super().__init__(*args, **kwargs)
- # import pdb;
- # pdb.set_trace()
self.sampling_ratio = kwargs.get("sampling_ratio", 0.2)
self.scama_mask = None
@@ -83,8 +81,6 @@
text: (Batch, Length)
text_lengths: (Batch,)
"""
- # import pdb;
- # pdb.set_trace()
decoding_ind = kwargs.get("decoding_ind")
if len(text_lengths.size()) > 1:
text_lengths = text_lengths[:, 0]
diff --git a/funasr/models/sense_voice/model.py b/funasr/models/sense_voice/model.py
index 127d5a0..f2ea05f 100644
--- a/funasr/models/sense_voice/model.py
+++ b/funasr/models/sense_voice/model.py
@@ -73,8 +73,6 @@
):
target_mask = kwargs.get("target_mask", None)
- # import pdb;
- # pdb.set_trace()
if len(text_lengths.size()) > 1:
text_lengths = text_lengths[:, 0]
if len(speech_lengths.size()) > 1:
@@ -303,8 +301,6 @@
):
target_mask = kwargs.get("target_mask", None)
- # import pdb;
- # pdb.set_trace()
if len(text_lengths.size()) > 1:
text_lengths = text_lengths[:, 0]
if len(speech_lengths.size()) > 1:
@@ -648,8 +644,6 @@
):
target_mask = kwargs.get("target_mask", None)
- # import pdb;
- # pdb.set_trace()
if len(text_lengths.size()) > 1:
text_lengths = text_lengths[:, 0]
if len(speech_lengths.size()) > 1:
@@ -1052,8 +1046,6 @@
):
target_mask = kwargs.get("target_mask", None)
- # import pdb;
- # pdb.set_trace()
if len(text_lengths.size()) > 1:
text_lengths = text_lengths[:, 0]
if len(speech_lengths.size()) > 1:
diff --git a/funasr/models/transformer/model.py b/funasr/models/transformer/model.py
index 0d5ed23..adfd525 100644
--- a/funasr/models/transformer/model.py
+++ b/funasr/models/transformer/model.py
@@ -145,8 +145,6 @@
text: (Batch, Length)
text_lengths: (Batch,)
"""
- # import pdb;
- # pdb.set_trace()
if len(text_lengths.size()) > 1:
text_lengths = text_lengths[:, 0]
if len(speech_lengths.size()) > 1:
diff --git a/funasr/models/whisper/model.py b/funasr/models/whisper/model.py
index 4710b9c..a332100 100644
--- a/funasr/models/whisper/model.py
+++ b/funasr/models/whisper/model.py
@@ -114,8 +114,6 @@
result = whisper.decode(self.model, speech, language='english')
# result = whisper.transcribe(self.model, speech)
-
- import pdb; pdb.set_trace()
results = []
result_i = {"key": key[0], "text": result.text}
diff --git a/funasr/models/whisper_lid/model.py b/funasr/models/whisper_lid/model.py
index 0701f61..02cd373 100644
--- a/funasr/models/whisper_lid/model.py
+++ b/funasr/models/whisper_lid/model.py
@@ -140,8 +140,6 @@
text: (Batch, Length)
text_lengths: (Batch,)
"""
- # import pdb;
- # pdb.set_trace()
if len(text_lengths.size()) > 1:
text_lengths = text_lengths[:, 0]
if len(speech_lengths.size()) > 1:
diff --git a/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py b/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py
index 871674e..1c58d93 100644
--- a/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py
+++ b/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py
@@ -328,7 +328,6 @@
) -> List:
# make hotword list
hotwords, hotwords_length = self.proc_hotword(hotwords)
- # import pdb; pdb.set_trace()
[bias_embed] = self.eb_infer(hotwords, hotwords_length)
# index from bias_embed
bias_embed = bias_embed.transpose(1, 0, 2)
@@ -376,10 +375,10 @@
return np.array(hotwords)
hotword_int = [word_map(i) for i in hotwords]
- # import pdb; pdb.set_trace()
+
hotword_int.append(np.array([1]))
hotwords = pad_list(hotword_int, pad_value=0, max_len=10)
- # import pdb; pdb.set_trace()
+
return hotwords, hotwords_length
def bb_infer(
--
Gitblit v1.9.1