From 525f5d77564f016acdd03ff71197f7a4a9177840 Mon Sep 17 00:00:00 2001
From: wanchen.swc <wanchen.swc@alibaba-inc.com>
Date: 星期五, 10 三月 2023 17:08:04 +0800
Subject: [PATCH] [Quantization] onnx quantization

---
 funasr/export/export_model.py |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/funasr/export/export_model.py b/funasr/export/export_model.py
index 1c677c9..7370c3c 100644
--- a/funasr/export/export_model.py
+++ b/funasr/export/export_model.py
@@ -153,11 +153,19 @@
 
         if self.quant:
             from onnxruntime.quantization import QuantType, quantize_dynamic
+            import onnx
             quant_model_path = os.path.join(path, f'{model.model_name}_quant.onnx')
+            onnx_model = onnx.load(model_path)
+            nodes = [n.name for n in onnx_model.graph.node]
+            nodes_to_exclude = [m for m in nodes if 'output' in m]
             quantize_dynamic(
                 model_input=model_path,
                 model_output=quant_model_path,
+                op_types_to_quantize=['MatMul'],
+                per_channel=True,
+                reduce_range=False,
                 weight_type=QuantType.QUInt8,
+                nodes_to_exclude=nodes_to_exclude,
             )
 
 

--
Gitblit v1.9.1