From ba3a3bf4e67e861b833092d05d7c3842ea670cbc Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 28 五月 2024 14:53:18 +0800
Subject: [PATCH] Add files via upload
---
funasr/models/ct_transformer_streaming/model.py | 133 ++++++++++++++------------------------------
1 files changed, 43 insertions(+), 90 deletions(-)
diff --git a/funasr/models/ct_transformer_streaming/model.py b/funasr/models/ct_transformer_streaming/model.py
index a9b2efb..a1afc7a 100644
--- a/funasr/models/ct_transformer_streaming/model.py
+++ b/funasr/models/ct_transformer_streaming/model.py
@@ -31,6 +31,7 @@
CT-Transformer: Controllable time-delay transformer for real-time punctuation prediction and disfluency detection
https://arxiv.org/pdf/2003.01309.pdf
"""
+
def __init__(
self,
*args,
@@ -38,8 +39,9 @@
):
super().__init__(*args, **kwargs)
-
- def punc_forward(self, text: torch.Tensor, text_lengths: torch.Tensor, vad_indexes: torch.Tensor, **kwargs):
+ def punc_forward(
+ self, text: torch.Tensor, text_lengths: torch.Tensor, vad_indexes: torch.Tensor, **kwargs
+ ):
"""Compute loss value from buffer sequences.
Args:
@@ -55,23 +57,23 @@
def with_vad(self):
return True
-
- def inference(self,
- data_in,
- data_lengths=None,
- key: list = None,
- tokenizer=None,
- frontend=None,
- cache: dict = {},
- **kwargs,
- ):
+
+ def inference(
+ self,
+ data_in,
+ data_lengths=None,
+ key: list = None,
+ tokenizer=None,
+ frontend=None,
+ cache: dict = {},
+ **kwargs,
+ ):
assert len(data_in) == 1
-
+
if len(cache) == 0:
cache["pre_text"] = []
text = load_audio_text_image_video(data_in, data_type=kwargs.get("kwargs", "text"))[0]
text = "".join(cache["pre_text"]) + " " + text
-
split_size = kwargs.get("split_size", 20)
@@ -82,7 +84,7 @@
mini_sentences_id = split_to_mini_sentence(tokens_int, split_size)
assert len(mini_sentences) == len(mini_sentences_id)
cache_sent = []
- cache_sent_id = torch.from_numpy(np.array([], dtype='int32'))
+ cache_sent_id = torch.from_numpy(np.array([], dtype="int32"))
skip_num = 0
sentence_punc_list = []
sentence_words_list = []
@@ -97,8 +99,8 @@
mini_sentence_id = np.concatenate((cache_sent_id, mini_sentence_id), axis=0)
data = {
"text": torch.unsqueeze(torch.from_numpy(mini_sentence_id), 0),
- "text_lengths": torch.from_numpy(np.array([len(mini_sentence_id)], dtype='int32')),
- "vad_indexes": torch.from_numpy(np.array([len(cache["pre_text"])], dtype='int32')),
+ "text_lengths": torch.from_numpy(np.array([len(mini_sentence_id)], dtype="int32")),
+ "vad_indexes": torch.from_numpy(np.array([len(cache["pre_text"])], dtype="int32")),
}
data = to_device(data, kwargs["device"])
# y, _ = self.wrapped_model(**data)
@@ -114,20 +116,27 @@
sentenceEnd = -1
last_comma_index = -1
for i in range(len(punctuations) - 2, 1, -1):
- if self.punc_list[punctuations[i]] == "銆�" or self.punc_list[punctuations[i]] == "锛�":
+ if (
+ self.punc_list[punctuations[i]] == "銆�"
+ or self.punc_list[punctuations[i]] == "锛�"
+ ):
sentenceEnd = i
break
if last_comma_index < 0 and self.punc_list[punctuations[i]] == "锛�":
last_comma_index = i
- if sentenceEnd < 0 and len(mini_sentence) > cache_pop_trigger_limit and last_comma_index >= 0:
+ if (
+ sentenceEnd < 0
+ and len(mini_sentence) > cache_pop_trigger_limit
+ and last_comma_index >= 0
+ ):
# The sentence it too long, cut off at a comma.
sentenceEnd = last_comma_index
punctuations[sentenceEnd] = self.sentence_end_id
- cache_sent = mini_sentence[sentenceEnd + 1:]
- cache_sent_id = mini_sentence_id[sentenceEnd + 1:]
- mini_sentence = mini_sentence[0:sentenceEnd + 1]
- punctuations = punctuations[0:sentenceEnd + 1]
+ cache_sent = mini_sentence[sentenceEnd + 1 :]
+ cache_sent_id = mini_sentence_id[sentenceEnd + 1 :]
+ mini_sentence = mini_sentence[0 : sentenceEnd + 1]
+ punctuations = punctuations[0 : sentenceEnd + 1]
# if len(punctuations) == 0:
# continue
@@ -141,7 +150,10 @@
sentence_punc_list_out = []
for i in range(0, len(sentence_words_list)):
if i > 0:
- if len(sentence_words_list[i][0].encode()) == 1 and len(sentence_words_list[i - 1][-1].encode()) == 1:
+ if (
+ len(sentence_words_list[i][0].encode()) == 1
+ and len(sentence_words_list[i - 1][-1].encode()) == 1
+ ):
sentence_words_list[i] = " " + sentence_words_list[i]
if skip_num < len(cache["pre_text"]):
skip_num += 1
@@ -158,7 +170,7 @@
if sentence_punc_list[i] == "銆�" or sentence_punc_list[i] == "锛�":
sentenceEnd = i
break
- cache["pre_text"] = sentence_words_list[sentenceEnd + 1:]
+ cache["pre_text"] = sentence_words_list[sentenceEnd + 1 :]
if sentence_out[-1] in self.punc_list:
sentence_out = sentence_out[:-1]
sentence_punc_list_out[-1] = "_"
@@ -167,74 +179,15 @@
punc_array = punctuations
else:
punc_array = torch.cat([punc_array, punctuations], dim=0)
-
+
result_i = {"key": key[0], "text": sentence_out, "punc_array": punc_array}
results.append(result_i)
-
+
return results, meta_data
- def export(
- self,
- **kwargs,
- ):
-
- is_onnx = kwargs.get("type", "onnx") == "onnx"
- encoder_class = tables.encoder_classes.get(kwargs["encoder"] + "Export")
- self.encoder = encoder_class(self.encoder, onnx=is_onnx)
-
- self.forward = self._export_forward
-
- return self
+ def export(self, **kwargs):
- def _export_forward(self, inputs: torch.Tensor,
- text_lengths: torch.Tensor,
- vad_indexes: torch.Tensor,
- sub_masks: torch.Tensor,
- ):
- """Compute loss value from buffer sequences.
+ from .export_meta import export_rebuild_model
- Args:
- input (torch.Tensor): Input ids. (batch, len)
- hidden (torch.Tensor): Target ids. (batch, len)
-
- """
- x = self.embed(inputs)
- # mask = self._target_mask(input)
- h, _ = self.encoder(x, text_lengths, vad_indexes, sub_masks)
- y = self.decoder(h)
- return y
-
- def export_dummy_inputs(self):
- length = 120
- text_indexes = torch.randint(0, self.embed.num_embeddings, (1, length)).type(torch.int32)
- text_lengths = torch.tensor([length], dtype=torch.int32)
- vad_mask = torch.ones(length, length, dtype=torch.float32)[None, None, :, :]
- sub_masks = torch.ones(length, length, dtype=torch.float32)
- sub_masks = torch.tril(sub_masks).type(torch.float32)
- return (text_indexes, text_lengths, vad_mask, sub_masks[None, None, :, :])
-
- def export_input_names(self):
- return ['inputs', 'text_lengths', 'vad_masks', 'sub_masks']
-
- def export_output_names(self):
- return ['logits']
-
- def export_dynamic_axes(self):
- return {
- 'inputs': {
- 1: 'feats_length'
- },
- 'vad_masks': {
- 2: 'feats_length1',
- 3: 'feats_length2'
- },
- 'sub_masks': {
- 2: 'feats_length1',
- 3: 'feats_length2'
- },
- 'logits': {
- 1: 'logits_length'
- },
- }
- def export_name(self):
- return "model.onnx"
+ models = export_rebuild_model(model=self, **kwargs)
+ return models
--
Gitblit v1.9.1