From cfc4d402093060fe087424b0a6be4e2b2546eae8 Mon Sep 17 00:00:00 2001
From: wanchen.swc <wanchen.swc@alibaba-inc.com>
Date: 星期四, 30 三月 2023 18:15:15 +0800
Subject: [PATCH] [Export] support gpu inference
---
funasr/export/export_model.py | 21 +++++++++++++++------
1 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/funasr/export/export_model.py b/funasr/export/export_model.py
index b827f16..d3d119c 100644
--- a/funasr/export/export_model.py
+++ b/funasr/export/export_model.py
@@ -10,15 +10,16 @@
from funasr.export.models import get_model
import numpy as np
import random
-
+from funasr.utils.types import str2bool
# torch_version = float(".".join(torch.__version__.split(".")[:2]))
# assert torch_version > 1.9
-class ASRModelExportParaformer:
+class ModelExport:
def __init__(
self,
cache_dir: Union[Path, str] = None,
onnx: bool = True,
+ device: str = "cpu",
quant: bool = True,
fallback_num: int = 0,
audio_in: str = None,
@@ -36,6 +37,7 @@
)
print("output dir: {}".format(self.cache_dir))
self.onnx = onnx
+ self.device = device
self.quant = quant
self.fallback_num = fallback_num
self.frontend = None
@@ -74,8 +76,9 @@
# using dummy inputs for a example
if self.audio_in is not None:
feats, feats_len = self.load_feats(self.audio_in)
- for feat, len in zip(feats, feats_len):
- m(feat, len)
+ for i, (feat, len) in enumerate(zip(feats, feats_len)):
+ with torch.no_grad():
+ m(feat, len)
else:
dummy_input = model.get_dummy_inputs()
m(*dummy_input)
@@ -110,6 +113,10 @@
dummy_input = model.get_dummy_inputs(enc_size)
else:
dummy_input = model.get_dummy_inputs()
+
+ if self.device == 'cuda':
+ model = model.cuda()
+ dummy_input = tuple([i.cuda() for i in dummy_input])
# model_script = torch.jit.script(model)
model_script = torch.jit.trace(model, dummy_input)
@@ -233,15 +240,17 @@
parser.add_argument('--model-name', type=str, required=True)
parser.add_argument('--export-dir', type=str, required=True)
parser.add_argument('--type', type=str, default='onnx', help='["onnx", "torch"]')
- parser.add_argument('--quantize', action='store_true', help='export quantized model')
+ parser.add_argument('--device', type=str, default='cpu', help='["cpu", "cuda"]')
+ parser.add_argument('--quantize', type=str2bool, default=False, help='export quantized model')
parser.add_argument('--fallback-num', type=int, default=0, help='amp fallback number')
parser.add_argument('--audio_in', type=str, default=None, help='["wav", "wav.scp"]')
parser.add_argument('--calib_num', type=int, default=200, help='calib max num')
args = parser.parse_args()
- export_model = ASRModelExportParaformer(
+ export_model = ModelExport(
cache_dir=args.export_dir,
onnx=args.type == 'onnx',
+ device=args.device,
quant=args.quantize,
fallback_num=args.fallback_num,
audio_in=args.audio_in,
--
Gitblit v1.9.1