From 88c4f4a25df3c171dc0d07efc400f73e6a09e165 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 07 二月 2023 21:43:30 +0800
Subject: [PATCH] export model
---
funasr/export/models/e2e_asr_paraformer.py | 11 ++++++++++-
funasr/export/test_onnx.py | 4 ++--
funasr/export/export_model.py | 10 +++++-----
3 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/funasr/export/export_model.py b/funasr/export/export_model.py
index e5a2320..9f5cb0e 100644
--- a/funasr/export/export_model.py
+++ b/funasr/export/export_model.py
@@ -42,10 +42,10 @@
self.export_config,
)
self._export_onnx(model, verbose, export_dir)
- # if self.onnx:
- # self._export_onnx(model, verbose, export_dir)
- # else:
- # self._export_torchscripts(model, verbose, export_dir)
+ if self.onnx:
+ self._export_onnx(model, verbose, export_dir)
+ else:
+ self._export_torchscripts(model, verbose, export_dir)
logging.info("output dir: {}".format(export_dir))
@@ -54,7 +54,7 @@
if enc_size:
dummy_input = model.get_dummy_inputs(enc_size)
else:
- dummy_input = model.get_dummy_inputs()
+ dummy_input = model.get_dummy_inputs_txt()
# model_script = torch.jit.script(model)
model_script = torch.jit.trace(model, dummy_input)
diff --git a/funasr/export/models/e2e_asr_paraformer.py b/funasr/export/models/e2e_asr_paraformer.py
index dd87213..8388f4f 100644
--- a/funasr/export/models/e2e_asr_paraformer.py
+++ b/funasr/export/models/e2e_asr_paraformer.py
@@ -63,8 +63,9 @@
decoder_out, _ = self.decoder(enc, enc_len, pre_acoustic_embeds, pre_token_length)
decoder_out = torch.log_softmax(decoder_out, dim=-1)
+ sample_ids = decoder_out.argmax(dim=-1)
- return decoder_out, pre_token_length
+ return decoder_out, sample_ids
# def get_output_size(self):
# return self.model.encoders[0].size
@@ -74,6 +75,14 @@
speech_lengths = torch.tensor([6, 30], dtype=torch.int32)
return (speech, speech_lengths)
+ def get_dummy_inputs_txt(self, txt_file: str = "/mnt/workspace/data_fbank/0207/12345.wav.fea.txt"):
+ import numpy as np
+ fbank = np.loadtxt(txt_file)
+ fbank_lengths = np.array([fbank.shape[0], ], dtype=np.int32)
+ speech = torch.from_numpy(fbank[None, :, :].astype(np.float32))
+ speech_lengths = torch.from_numpy(fbank_lengths.astype(np.int32))
+ return (speech, speech_lengths)
+
def get_input_names(self):
return ['speech', 'speech_lengths']
diff --git a/funasr/export/test_onnx.py b/funasr/export/test_onnx.py
index 91b128e..c62137e 100644
--- a/funasr/export/test_onnx.py
+++ b/funasr/export/test_onnx.py
@@ -3,13 +3,13 @@
if __name__ == '__main__':
- onnx_path = "/root/cache/export/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/torchscripts/model.onnx"
+ onnx_path = "/Users/zhifu/Downloads/model.onnx"
sess = onnxruntime.InferenceSession(onnx_path)
input_name = [nd.name for nd in sess.get_inputs()]
output_name = [nd.name for nd in sess.get_outputs()]
def _get_feed_dict(feats_length):
- return {'speech': np.zeros((1, feats_length, 560), dtype=np.float32), 'speech_lengths': np.array([feats_length,], dtype=np.int32)}
+ return {'speech': np.zeros((1, feats_length, 560), dtype=np.float32), 'speech_lengths': np.array([feats_length,], dtype=np.int64)}
def _run(feed_dict):
output = sess.run(output_name, input_feed=feed_dict)
--
Gitblit v1.9.1