From 5b0047bf58804686ab9390d78e090cd39110bf8e Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 07 二月 2023 17:45:59 +0800
Subject: [PATCH] export model
---
funasr/export/models/e2e_asr_paraformer.py | 23 ++++++++++++++---------
funasr/export/test_onnx.py | 2 +-
funasr/export/models/encoder/sanm_encoder.py | 8 +++++++-
funasr/export/utils/torch_function.py | 12 ++++++++++++
funasr/export/models/decoder/sanm_decoder.py | 12 ++++++++----
5 files changed, 42 insertions(+), 15 deletions(-)
diff --git a/funasr/export/models/decoder/sanm_decoder.py b/funasr/export/models/decoder/sanm_decoder.py
index ca2563b..9084b7f 100644
--- a/funasr/export/models/decoder/sanm_decoder.py
+++ b/funasr/export/models/decoder/sanm_decoder.py
@@ -4,9 +4,8 @@
import torch.nn as nn
-# from espnet.nets.pytorch_backend.transformer.mask import subsequent_mask
-
from funasr.export.utils.torch_function import MakePadMask
+from funasr.export.utils.torch_function import sequence_mask
from funasr.modules.attention import MultiHeadedAttentionSANMDecoder
from funasr.export.models.modules.multihead_att import MultiHeadedAttentionSANMDecoder as MultiHeadedAttentionSANMDecoder_export
@@ -20,11 +19,15 @@
class ParaformerSANMDecoder(nn.Module):
def __init__(self, model,
max_seq_len=512,
- model_name='decoder'):
+ model_name='decoder',
+ onnx: bool = True,):
super().__init__()
# self.embed = model.embed #Embedding(model.embed, max_seq_len)
self.model = model
- self.make_pad_mask = MakePadMask(max_seq_len, flip=False)
+ if onnx:
+ self.make_pad_mask = MakePadMask(max_seq_len, flip=False)
+ else:
+ self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
for i, d in enumerate(self.model.decoders):
if isinstance(d.feed_forward, PositionwiseFeedForwardDecoderSANM):
@@ -51,6 +54,7 @@
self.output_layer = model.output_layer
self.after_norm = model.after_norm
self.model_name = model_name
+
def prepare_mask(self, mask):
mask_3d_btd = mask[:, :, None]
diff --git a/funasr/export/models/e2e_asr_paraformer.py b/funasr/export/models/e2e_asr_paraformer.py
index 162837a..dd87213 100644
--- a/funasr/export/models/e2e_asr_paraformer.py
+++ b/funasr/export/models/e2e_asr_paraformer.py
@@ -5,7 +5,7 @@
import torch.nn as nn
from funasr.export.utils.torch_function import MakePadMask
-from funasr.train.abs_espnet_model import AbsESPnetModel
+from funasr.export.utils.torch_function import sequence_mask
from funasr.models.encoder.sanm_encoder import SANMEncoder
from funasr.export.models.encoder.sanm_encoder import SANMEncoder as SANMEncoder_export
from funasr.models.predictor.cif import CifPredictorV2
@@ -29,19 +29,24 @@
**kwargs,
):
super().__init__()
+ onnx = False
+ if "onnx" in kwargs:
+ onnx = kwargs["onnx"]
if isinstance(model.encoder, SANMEncoder):
- self.encoder = SANMEncoder_export(model.encoder)
+ self.encoder = SANMEncoder_export(model.encoder, onnx=onnx)
if isinstance(model.predictor, CifPredictorV2):
self.predictor = CifPredictorV2_export(model.predictor)
if isinstance(model.decoder, ParaformerSANMDecoder):
- self.decoder = ParaformerSANMDecoder_export(model.decoder)
- self.make_pad_mask = MakePadMask(max_seq_len, flip=False)
+ self.decoder = ParaformerSANMDecoder_export(model.decoder, onnx=onnx)
+
self.feats_dim = feats_dim
self.model_name = model_name
- self.onnx = False
- if "onnx" in kwargs:
- self.onnx = kwargs["onnx"]
-
+
+ if onnx:
+ self.make_pad_mask = MakePadMask(max_seq_len, flip=False)
+ else:
+ self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
+
def forward(
self,
speech: torch.Tensor,
@@ -66,7 +71,7 @@
def get_dummy_inputs(self):
speech = torch.randn(2, 30, self.feats_dim)
- speech_lengths = torch.tensor([6, 30]).long()
+ speech_lengths = torch.tensor([6, 30], dtype=torch.int32)
return (speech, speech_lengths)
def get_input_names(self):
diff --git a/funasr/export/models/encoder/sanm_encoder.py b/funasr/export/models/encoder/sanm_encoder.py
index ee45732..a3c9100 100644
--- a/funasr/export/models/encoder/sanm_encoder.py
+++ b/funasr/export/models/encoder/sanm_encoder.py
@@ -2,6 +2,7 @@
import torch.nn as nn
from funasr.export.utils.torch_function import MakePadMask
+from funasr.export.utils.torch_function import sequence_mask
from funasr.modules.attention import MultiHeadedAttentionSANM
from funasr.export.models.modules.multihead_att import MultiHeadedAttentionSANM as MultiHeadedAttentionSANM_export
from funasr.export.models.modules.encoder_layer import EncoderLayerSANM as EncoderLayerSANM_export
@@ -15,13 +16,18 @@
max_seq_len=512,
feats_dim=560,
model_name='encoder',
+ onnx: bool = True,
):
super().__init__()
self.embed = model.embed
self.model = model
- self.make_pad_mask = MakePadMask(max_seq_len, flip=False)
self.feats_dim = feats_dim
+ if onnx:
+ self.make_pad_mask = MakePadMask(max_seq_len, flip=False)
+ else:
+ self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
+
if hasattr(model, 'encoders0'):
for i, d in enumerate(self.model.encoders0):
if isinstance(d.self_attn, MultiHeadedAttentionSANM):
diff --git a/funasr/export/test_onnx.py b/funasr/export/test_onnx.py
index 16d88e8..d1b4209 100644
--- a/funasr/export/test_onnx.py
+++ b/funasr/export/test_onnx.py
@@ -9,7 +9,7 @@
output_name = [nd.name for nd in sess.get_outputs()]
def _get_feed_dict(feats_length):
- return {'speech': np.zeros((1, feats_length, 560), dtype=np.float32), 'speech_lengths': [feats_length,]}
+ return {'speech': np.zeros((1, feats_length, 560), dtype=np.float32), 'speech_lengths': np.array([feats_length,], dtype=np.int64)}
def _run(feed_dict):
output = sess.run(output_name, input_feed=feed_dict)
diff --git a/funasr/export/utils/torch_function.py b/funasr/export/utils/torch_function.py
index e8e5e1a..a078a7e 100644
--- a/funasr/export/utils/torch_function.py
+++ b/funasr/export/utils/torch_function.py
@@ -44,6 +44,18 @@
else:
return mask
+class sequence_mask(nn.Module):
+ def __init__(self, max_seq_len=512, flip=True):
+ super().__init__()
+
+ def forward(self, lengths, max_seq_len=None, dtype=torch.float32, device=None):
+ if max_seq_len is None:
+ max_seq_len = lengths.max()
+ row_vector = torch.arange(0, max_seq_len, 1).to(lengths.device)
+ matrix = torch.unsqueeze(lengths, dim=-1)
+ mask = row_vector < matrix
+
+ return mask.type(dtype).to(device) if device is not None else mask.type(dtype)
def normalize(input: torch.Tensor, p: float = 2.0, dim: int = 1, out: Optional[torch.Tensor] = None) -> torch.Tensor:
if out is None:
--
Gitblit v1.9.1