From be98c46a7bcacb74dfabe40f9da63d75863b4d0f Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期五, 17 三月 2023 22:30:54 +0800
Subject: [PATCH] rtf benchmark
---
funasr/runtime/python/benchmark_libtorch.md | 28 +++++-----------------------
funasr/runtime/python/benchmark_onnx.md | 17 +++++++++++++++++
funasr/export/models/modules/multihead_att.py | 6 ++++--
3 files changed, 26 insertions(+), 25 deletions(-)
diff --git a/funasr/export/models/modules/multihead_att.py b/funasr/export/models/modules/multihead_att.py
index 0a56676..1983db8 100644
--- a/funasr/export/models/modules/multihead_att.py
+++ b/funasr/export/models/modules/multihead_att.py
@@ -75,8 +75,10 @@
return x, cache
-import torch.fx
-torch.fx.wrap('preprocess_for_attn')
+torch_version = float(".".join(torch.__version__.split(".")[:2]))
+if torch_version >= 1.8:
+ import torch.fx
+ torch.fx.wrap('preprocess_for_attn')
class MultiHeadedAttentionSANMDecoder(nn.Module):
diff --git a/funasr/runtime/python/benchmark_libtorch.md b/funasr/runtime/python/benchmark_libtorch.md
index 43f3b0e..6c068fe 100644
--- a/funasr/runtime/python/benchmark_libtorch.md
+++ b/funasr/runtime/python/benchmark_libtorch.md
@@ -26,35 +26,17 @@
## [Paraformer-large](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)
-### Intel(R) Xeon(R) Platinum 8369B CPU @ 2.90GHz 16core-32processor with avx512_vnni
-
-[//]: # (| concurrent-tasks | processing time(s) | RTF | Speedup Rate |)
-
-[//]: # (|:----------------:|:------------------:|:------:|:------------:|)
-
-[//]: # (| 1 (torch fp32) | 3522 | 0.0976 | 10.3 |)
-
-[//]: # (| 1 (torch int8) | 1746 | 0.0484 | 20.7 |)
-
-[//]: # (| 32 (torch fp32) | 236 | 0.0066 | 152.7 |)
-
-[//]: # (| 32 (torch int8) | 114 | 0.0032 | 317.4 |)
-
-[//]: # (| 64 (torch fp32) | 235 | 0.0065 | 153.7 |)
-
-[//]: # (| 64 (torch int8) | 113 | 0.0031 | 319.2 |)
-
### Intel(R) Xeon(R) Platinum 8269CY CPU @ 2.50GHz 16core-32processor with avx512_vnni
| concurrent-tasks | processing time(s) | RTF | Speedup Rate |
|:----------------:|:------------------:|:------:|:------------:|
-| 1 (torch fp32) | 3522 | 0.0976 | 10.3 |
+| 1 (torch fp32) | 3522 | 0.0976 | 10.3 |
| 1 (torch int8) | 1746 | 0.0484 | 20.7 |
-| 32 (torch fp32) | 236 | 0.0066 | 152.7 |
-| 32 (torch int8) | 114 | 0.0032 | 317.4 |
-| 64 (torch fp32) | 235 | 0.0065 | 153.7 |
-| 64 (torch int8) | 113 | 0.0031 | 319.2 |
+| 32 (torch fp32) | 236 | 0.0066 | 152.7 |
+| 32 (torch int8) | 114 | 0.0032 | 317.4 |
+| 64 (torch fp32) | 235 | 0.0065 | 153.7 |
+| 64 (torch int8) | 113 | 0.0031 | 319.2 |
[//]: # (### Intel(R) Xeon(R) Platinum 8163 CPU @ 2.50GHz 32core-64processor without avx512_vnni)
diff --git a/funasr/runtime/python/benchmark_onnx.md b/funasr/runtime/python/benchmark_onnx.md
index 35c7068..fe938ee 100644
--- a/funasr/runtime/python/benchmark_onnx.md
+++ b/funasr/runtime/python/benchmark_onnx.md
@@ -72,3 +72,20 @@
| 96 (onnx int8) | 108 | 0.0030 | 334.1 |
## [Paraformer](https://modelscope.cn/models/damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8358-tensorflow1/summary)
+
+ ### Intel(R) Xeon(R) Platinum 8369B CPU @ 2.90GHz 16core-32processor with avx512_vnni
+
+| concurrent-tasks | processing time(s) | RTF | Speedup Rate |
+|:----------------:|:------------------:|:------:|:------------:|
+| 16 (onnx fp32) | 91 | 0.0025 | 395.2 |
+| 16 (onnx int8) | 78 | 0.0022 | 463.0 |
+| 32 (onnx fp32) | 60 | 0.0017 | 598.8 |
+| 32 (onnx int8) | 40 | 0.0011 | 892.9 |
+| 64 (onnx fp32) | 55 | 0.0015 | 653.6 |
+| 64 (onnx int8) | 31 | 0.0009 | 1162.8 |
+| 96 (onnx fp32) | 57 | 0.0016 | 632.9 |
+| 96 (onnx int8) | 33 | 0.0009 | 1098.9 |
+
+[//]: # (| 1 (onnx fp32) | 2806 | 0.0777 | 12.9 |)
+
+[//]: # (| 1 (onnx int8) | 1611 | 0.0446 | 22.4 |)
\ No newline at end of file
--
Gitblit v1.9.1