| | |
| | | self.export_config, |
| | | ) |
| | | self._export_onnx(model, verbose, export_dir) |
| | | # if self.onnx: |
| | | # self._export_onnx(model, verbose, export_dir) |
| | | # else: |
| | | # self._export_torchscripts(model, verbose, export_dir) |
| | | if self.onnx: |
| | | self._export_onnx(model, verbose, export_dir) |
| | | else: |
| | | self._export_torchscripts(model, verbose, export_dir) |
| | | |
| | | logging.info("output dir: {}".format(export_dir)) |
| | | |
| | |
| | | if enc_size: |
| | | dummy_input = model.get_dummy_inputs(enc_size) |
| | | else: |
| | | dummy_input = model.get_dummy_inputs() |
| | | dummy_input = model.get_dummy_inputs_txt() |
| | | |
| | | # model_script = torch.jit.script(model) |
| | | model_script = torch.jit.trace(model, dummy_input) |
| | |
| | | |
| | | decoder_out, _ = self.decoder(enc, enc_len, pre_acoustic_embeds, pre_token_length) |
| | | decoder_out = torch.log_softmax(decoder_out, dim=-1) |
| | | sample_ids = decoder_out.argmax(dim=-1) |
| | | |
| | | return decoder_out, pre_token_length |
| | | return decoder_out, sample_ids |
| | | |
| | | # def get_output_size(self): |
| | | # return self.model.encoders[0].size |
| | |
| | | speech_lengths = torch.tensor([6, 30], dtype=torch.int32) |
| | | return (speech, speech_lengths) |
| | | |
| | | def get_dummy_inputs_txt(self, txt_file: str = "/mnt/workspace/data_fbank/0207/12345.wav.fea.txt"): |
| | | import numpy as np |
| | | fbank = np.loadtxt(txt_file) |
| | | fbank_lengths = np.array([fbank.shape[0], ], dtype=np.int32) |
| | | speech = torch.from_numpy(fbank[None, :, :].astype(np.float32)) |
| | | speech_lengths = torch.from_numpy(fbank_lengths.astype(np.int32)) |
| | | return (speech, speech_lengths) |
| | | |
| | | def get_input_names(self): |
| | | return ['speech', 'speech_lengths'] |
| | | |
| | |
| | | |
| | | |
| | | if __name__ == '__main__': |
| | | onnx_path = "/root/cache/export/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/torchscripts/model.onnx" |
| | | onnx_path = "/Users/zhifu/Downloads/model.onnx" |
| | | sess = onnxruntime.InferenceSession(onnx_path) |
| | | input_name = [nd.name for nd in sess.get_inputs()] |
| | | output_name = [nd.name for nd in sess.get_outputs()] |
| | | |
| | | def _get_feed_dict(feats_length): |
| | | return {'speech': np.zeros((1, feats_length, 560), dtype=np.float32), 'speech_lengths': np.array([feats_length,], dtype=np.int32)} |
| | | return {'speech': np.zeros((1, feats_length, 560), dtype=np.float32), 'speech_lengths': np.array([feats_length,], dtype=np.int64)} |
| | | |
| | | def _run(feed_dict): |
| | | output = sess.run(output_name, input_feed=feed_dict) |