From ba3a3bf4e67e861b833092d05d7c3842ea670cbc Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 28 五月 2024 14:53:18 +0800
Subject: [PATCH] Add files via upload
---
funasr/models/ct_transformer_streaming/model.py | 106 ++++++++++++++++++++++++++++++++--------------------
1 files changed, 65 insertions(+), 41 deletions(-)
diff --git a/funasr/models/ct_transformer_streaming/model.py b/funasr/models/ct_transformer_streaming/model.py
index 5254d15..a1afc7a 100644
--- a/funasr/models/ct_transformer_streaming/model.py
+++ b/funasr/models/ct_transformer_streaming/model.py
@@ -1,20 +1,28 @@
-from typing import Any
-from typing import List
-from typing import Tuple
-from typing import Optional
-import numpy as np
-import torch.nn.functional as F
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+# MIT License (https://opensource.org/licenses/MIT)
-from funasr.models.transformer.utils.nets_utils import make_pad_mask
-from funasr.train_utils.device_funcs import force_gatherable
-from funasr.train_utils.device_funcs import to_device
import torch
-import torch.nn as nn
-from funasr.models.ct_transformer.utils import split_to_mini_sentence, split_words
-from funasr.utils.load_utils import load_audio_text_image_video
-from funasr.models.ct_transformer.model import CTTransformer
+import numpy as np
+from contextlib import contextmanager
+from distutils.version import LooseVersion
from funasr.register import tables
+from funasr.train_utils.device_funcs import to_device
+from funasr.models.ct_transformer.model import CTTransformer
+from funasr.utils.load_utils import load_audio_text_image_video
+from funasr.models.ct_transformer.utils import split_to_mini_sentence, split_words
+
+
+if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"):
+ from torch.cuda.amp import autocast
+else:
+ # Nothing to do if torch<1.6.0
+ @contextmanager
+ def autocast(enabled=True):
+ yield
+
@tables.register("model_classes", "CTTransformerStreaming")
class CTTransformerStreaming(CTTransformer):
@@ -23,6 +31,7 @@
CT-Transformer: Controllable time-delay transformer for real-time punctuation prediction and disfluency detection
https://arxiv.org/pdf/2003.01309.pdf
"""
+
def __init__(
self,
*args,
@@ -30,8 +39,9 @@
):
super().__init__(*args, **kwargs)
-
- def punc_forward(self, text: torch.Tensor, text_lengths: torch.Tensor, vad_indexes: torch.Tensor, **kwargs):
+ def punc_forward(
+ self, text: torch.Tensor, text_lengths: torch.Tensor, vad_indexes: torch.Tensor, **kwargs
+ ):
"""Compute loss value from buffer sequences.
Args:
@@ -48,24 +58,22 @@
def with_vad(self):
return True
-
-
- def generate(self,
- data_in,
- data_lengths=None,
- key: list = None,
- tokenizer=None,
- frontend=None,
- cache: dict = {},
- **kwargs,
- ):
+ def inference(
+ self,
+ data_in,
+ data_lengths=None,
+ key: list = None,
+ tokenizer=None,
+ frontend=None,
+ cache: dict = {},
+ **kwargs,
+ ):
assert len(data_in) == 1
-
+
if len(cache) == 0:
cache["pre_text"] = []
text = load_audio_text_image_video(data_in, data_type=kwargs.get("kwargs", "text"))[0]
text = "".join(cache["pre_text"]) + " " + text
-
split_size = kwargs.get("split_size", 20)
@@ -76,7 +84,7 @@
mini_sentences_id = split_to_mini_sentence(tokens_int, split_size)
assert len(mini_sentences) == len(mini_sentences_id)
cache_sent = []
- cache_sent_id = torch.from_numpy(np.array([], dtype='int32'))
+ cache_sent_id = torch.from_numpy(np.array([], dtype="int32"))
skip_num = 0
sentence_punc_list = []
sentence_words_list = []
@@ -91,8 +99,8 @@
mini_sentence_id = np.concatenate((cache_sent_id, mini_sentence_id), axis=0)
data = {
"text": torch.unsqueeze(torch.from_numpy(mini_sentence_id), 0),
- "text_lengths": torch.from_numpy(np.array([len(mini_sentence_id)], dtype='int32')),
- "vad_indexes": torch.from_numpy(np.array([len(cache["pre_text"])], dtype='int32')),
+ "text_lengths": torch.from_numpy(np.array([len(mini_sentence_id)], dtype="int32")),
+ "vad_indexes": torch.from_numpy(np.array([len(cache["pre_text"])], dtype="int32")),
}
data = to_device(data, kwargs["device"])
# y, _ = self.wrapped_model(**data)
@@ -108,20 +116,27 @@
sentenceEnd = -1
last_comma_index = -1
for i in range(len(punctuations) - 2, 1, -1):
- if self.punc_list[punctuations[i]] == "銆�" or self.punc_list[punctuations[i]] == "锛�":
+ if (
+ self.punc_list[punctuations[i]] == "銆�"
+ or self.punc_list[punctuations[i]] == "锛�"
+ ):
sentenceEnd = i
break
if last_comma_index < 0 and self.punc_list[punctuations[i]] == "锛�":
last_comma_index = i
- if sentenceEnd < 0 and len(mini_sentence) > cache_pop_trigger_limit and last_comma_index >= 0:
+ if (
+ sentenceEnd < 0
+ and len(mini_sentence) > cache_pop_trigger_limit
+ and last_comma_index >= 0
+ ):
# The sentence it too long, cut off at a comma.
sentenceEnd = last_comma_index
punctuations[sentenceEnd] = self.sentence_end_id
- cache_sent = mini_sentence[sentenceEnd + 1:]
- cache_sent_id = mini_sentence_id[sentenceEnd + 1:]
- mini_sentence = mini_sentence[0:sentenceEnd + 1]
- punctuations = punctuations[0:sentenceEnd + 1]
+ cache_sent = mini_sentence[sentenceEnd + 1 :]
+ cache_sent_id = mini_sentence_id[sentenceEnd + 1 :]
+ mini_sentence = mini_sentence[0 : sentenceEnd + 1]
+ punctuations = punctuations[0 : sentenceEnd + 1]
# if len(punctuations) == 0:
# continue
@@ -135,7 +150,10 @@
sentence_punc_list_out = []
for i in range(0, len(sentence_words_list)):
if i > 0:
- if len(sentence_words_list[i][0].encode()) == 1 and len(sentence_words_list[i - 1][-1].encode()) == 1:
+ if (
+ len(sentence_words_list[i][0].encode()) == 1
+ and len(sentence_words_list[i - 1][-1].encode()) == 1
+ ):
sentence_words_list[i] = " " + sentence_words_list[i]
if skip_num < len(cache["pre_text"]):
skip_num += 1
@@ -152,7 +170,7 @@
if sentence_punc_list[i] == "銆�" or sentence_punc_list[i] == "锛�":
sentenceEnd = i
break
- cache["pre_text"] = sentence_words_list[sentenceEnd + 1:]
+ cache["pre_text"] = sentence_words_list[sentenceEnd + 1 :]
if sentence_out[-1] in self.punc_list:
sentence_out = sentence_out[:-1]
sentence_punc_list_out[-1] = "_"
@@ -161,9 +179,15 @@
punc_array = punctuations
else:
punc_array = torch.cat([punc_array, punctuations], dim=0)
-
+
result_i = {"key": key[0], "text": sentence_out, "punc_array": punc_array}
results.append(result_i)
-
+
return results, meta_data
+ def export(self, **kwargs):
+
+ from .export_meta import export_rebuild_model
+
+ models = export_rebuild_model(model=self, **kwargs)
+ return models
--
Gitblit v1.9.1