From 15c4709beb4b588db2135fc1133cd6955b5ef819 Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 11 三月 2024 22:04:03 +0800
Subject: [PATCH] onnx (#1473)
---
funasr/models/paraformer/model.py | 10 +---
runtime/python/onnxruntime/funasr_onnx/vad_bin.py | 16 ++++----
funasr/auto/auto_model.py | 8 ++++
funasr/models/paraformer_streaming/model.py | 10 +---
funasr/models/bicif_paraformer/model.py | 7 +--
funasr/models/ct_transformer_streaming/encoder.py | 9 +---
funasr/models/paraformer/decoder.py | 24 ++++-------
funasr/models/sanm/encoder.py | 10 ++---
8 files changed, 40 insertions(+), 54 deletions(-)
diff --git a/funasr/auto/auto_model.py b/funasr/auto/auto_model.py
index edcede5..a18224f 100644
--- a/funasr/auto/auto_model.py
+++ b/funasr/auto/auto_model.py
@@ -494,11 +494,19 @@
export_dir = export_utils.export_onnx(
model=model,
data_in=data_list,
+ quantize=quantize,
+ fallback_num=fallback_num,
+ calib_num=calib_num,
+ opset_version=opset_version,
**kwargs)
else:
export_dir = export_utils.export_torchscripts(
model=model,
data_in=data_list,
+ quantize=quantize,
+ fallback_num=fallback_num,
+ calib_num=calib_num,
+ opset_version=opset_version,
**kwargs)
return export_dir
\ No newline at end of file
diff --git a/funasr/models/bicif_paraformer/model.py b/funasr/models/bicif_paraformer/model.py
index b93f93a..9849c8c 100644
--- a/funasr/models/bicif_paraformer/model.py
+++ b/funasr/models/bicif_paraformer/model.py
@@ -359,13 +359,10 @@
decoder_class = tables.decoder_classes.get(kwargs["decoder"] + "Export")
self.decoder = decoder_class(self.decoder, onnx=is_onnx)
- from funasr.utils.torch_function import MakePadMask
from funasr.utils.torch_function import sequence_mask
+
+ self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
- if is_onnx:
- self.make_pad_mask = MakePadMask(max_seq_len, flip=False)
- else:
- self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
self.forward = self.export_forward
diff --git a/funasr/models/ct_transformer_streaming/encoder.py b/funasr/models/ct_transformer_streaming/encoder.py
index badf5f6..bf0b8b2 100644
--- a/funasr/models/ct_transformer_streaming/encoder.py
+++ b/funasr/models/ct_transformer_streaming/encoder.py
@@ -416,13 +416,10 @@
self.model = model
self._output_size = model._output_size
- from funasr.utils.torch_function import MakePadMask
from funasr.utils.torch_function import sequence_mask
-
- if onnx:
- self.make_pad_mask = MakePadMask(max_seq_len, flip=False)
- else:
- self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
+
+
+ self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
from funasr.models.sanm.attention import MultiHeadedAttentionSANMExport
diff --git a/funasr/models/paraformer/decoder.py b/funasr/models/paraformer/decoder.py
index 59c6e1d..7c370ba 100644
--- a/funasr/models/paraformer/decoder.py
+++ b/funasr/models/paraformer/decoder.py
@@ -628,14 +628,12 @@
):
super().__init__()
# self.embed = model.embed #Embedding(model.embed, max_seq_len)
- from funasr.utils.torch_function import MakePadMask
+
from funasr.utils.torch_function import sequence_mask
self.model = model
- if onnx:
- self.make_pad_mask = MakePadMask(max_seq_len, flip=False)
- else:
- self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
+
+ self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
from funasr.models.sanm.attention import MultiHeadedAttentionSANMDecoderExport
from funasr.models.sanm.attention import MultiHeadedAttentionCrossAttExport
@@ -763,14 +761,12 @@
super().__init__()
# self.embed = model.embed #Embedding(model.embed, max_seq_len)
self.model = model
- from funasr.utils.torch_function import MakePadMask
+
from funasr.utils.torch_function import sequence_mask
self.model = model
- if onnx:
- self.make_pad_mask = MakePadMask(max_seq_len, flip=False)
- else:
- self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
+
+ self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
from funasr.models.sanm.attention import MultiHeadedAttentionSANMDecoderExport
from funasr.models.sanm.attention import MultiHeadedAttentionCrossAttExport
@@ -1036,14 +1032,12 @@
# self.embed = model.embed #Embedding(model.embed, max_seq_len)
self.model = model
- from funasr.utils.torch_function import MakePadMask
+
from funasr.utils.torch_function import sequence_mask
self.model = model
- if onnx:
- self.make_pad_mask = MakePadMask(max_seq_len, flip=False)
- else:
- self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
+
+ self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
from funasr.models.transformer.decoder import DecoderLayerExport
diff --git a/funasr/models/paraformer/model.py b/funasr/models/paraformer/model.py
index 2e2a36e..41a1bf7 100644
--- a/funasr/models/paraformer/model.py
+++ b/funasr/models/paraformer/model.py
@@ -566,15 +566,11 @@
decoder_class = tables.decoder_classes.get(kwargs["decoder"]+"Export")
self.decoder = decoder_class(self.decoder, onnx=is_onnx)
- from funasr.utils.torch_function import MakePadMask
from funasr.utils.torch_function import sequence_mask
-
-
- if is_onnx:
- self.make_pad_mask = MakePadMask(max_seq_len, flip=False)
- else:
- self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
+
+ self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
+
self.forward = self.export_forward
return self
diff --git a/funasr/models/paraformer_streaming/model.py b/funasr/models/paraformer_streaming/model.py
index 518fe93..33ec976 100644
--- a/funasr/models/paraformer_streaming/model.py
+++ b/funasr/models/paraformer_streaming/model.py
@@ -579,14 +579,10 @@
decoder_class = tables.decoder_classes.get(kwargs["decoder"] + "Export")
self.decoder = decoder_class(self.decoder, onnx=is_onnx)
- from funasr.utils.torch_function import MakePadMask
from funasr.utils.torch_function import sequence_mask
-
- if is_onnx:
- self.make_pad_mask = MakePadMask(max_seq_len, flip=False)
- else:
- self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
-
+
+
+ self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
import copy
import types
diff --git a/funasr/models/sanm/encoder.py b/funasr/models/sanm/encoder.py
index f0a3722..f574818 100644
--- a/funasr/models/sanm/encoder.py
+++ b/funasr/models/sanm/encoder.py
@@ -503,13 +503,11 @@
self.feats_dim = feats_dim
self._output_size = model._output_size
- from funasr.utils.torch_function import MakePadMask
+
from funasr.utils.torch_function import sequence_mask
-
- if onnx:
- self.make_pad_mask = MakePadMask(max_seq_len, flip=False)
- else:
- self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
+
+
+ self.make_pad_mask = sequence_mask(max_seq_len, flip=False)
from funasr.models.sanm.attention import MultiHeadedAttentionSANMExport
if hasattr(model, 'encoders0'):
diff --git a/runtime/python/onnxruntime/funasr_onnx/vad_bin.py b/runtime/python/onnxruntime/funasr_onnx/vad_bin.py
index 384f377..6b3a1bc 100644
--- a/runtime/python/onnxruntime/funasr_onnx/vad_bin.py
+++ b/runtime/python/onnxruntime/funasr_onnx/vad_bin.py
@@ -63,8 +63,8 @@
model = AutoModel(model=model_dir)
model_dir = model.export(type="onnx", quantize=quantize)
- config_file = os.path.join(model_dir, 'vad.yaml')
- cmvn_file = os.path.join(model_dir, 'vad.mvn')
+ config_file = os.path.join(model_dir, 'config.yaml')
+ cmvn_file = os.path.join(model_dir, 'am.mvn')
config = read_yaml(config_file)
self.frontend = WavFrontend(
@@ -73,8 +73,8 @@
)
self.ort_infer = OrtInferSession(model_file, device_id, intra_op_num_threads=intra_op_num_threads)
self.batch_size = batch_size
- self.vad_scorer = E2EVadModel(config["vad_post_conf"])
- self.max_end_sil = max_end_sil if max_end_sil is not None else config["vad_post_conf"]["max_end_silence_time"]
+ self.vad_scorer = E2EVadModel(config["model_conf"])
+ self.max_end_sil = max_end_sil if max_end_sil is not None else config["model_conf"]["max_end_silence_time"]
self.encoder_conf = config["encoder_conf"]
def prepare_cache(self, in_cache: list = []):
@@ -228,8 +228,8 @@
model = AutoModel(model=model_dir)
model_dir = model.export(type="onnx", quantize=quantize)
- config_file = os.path.join(model_dir, 'vad.yaml')
- cmvn_file = os.path.join(model_dir, 'vad.mvn')
+ config_file = os.path.join(model_dir, 'config.yaml')
+ cmvn_file = os.path.join(model_dir, 'am.mvn')
config = read_yaml(config_file)
self.frontend = WavFrontendOnline(
@@ -238,8 +238,8 @@
)
self.ort_infer = OrtInferSession(model_file, device_id, intra_op_num_threads=intra_op_num_threads)
self.batch_size = batch_size
- self.vad_scorer = E2EVadModel(config["vad_post_conf"])
- self.max_end_sil = max_end_sil if max_end_sil is not None else config["vad_post_conf"]["max_end_silence_time"]
+ self.vad_scorer = E2EVadModel(config["model_conf"])
+ self.max_end_sil = max_end_sil if max_end_sil is not None else config["model_conf"]["max_end_silence_time"]
self.encoder_conf = config["encoder_conf"]
def prepare_cache(self, in_cache: list = []):
--
Gitblit v1.9.1