游雁
2023-03-17 be98c46a7bcacb74dfabe40f9da63d75863b4d0f
rtf benchmark
3个文件已修改
51 ■■■■ 已修改文件
funasr/export/models/modules/multihead_att.py 6 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/python/benchmark_libtorch.md 28 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/python/benchmark_onnx.md 17 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/export/models/modules/multihead_att.py
@@ -75,8 +75,10 @@
    return x, cache
import torch.fx
torch.fx.wrap('preprocess_for_attn')
torch_version = float(".".join(torch.__version__.split(".")[:2]))
if torch_version >= 1.8:
    import torch.fx
    torch.fx.wrap('preprocess_for_attn')
class MultiHeadedAttentionSANMDecoder(nn.Module):
funasr/runtime/python/benchmark_libtorch.md
@@ -26,35 +26,17 @@
## [Paraformer-large](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary) 
### Intel(R) Xeon(R) Platinum 8369B CPU @ 2.90GHz   16core-32processor    with avx512_vnni
[//]: # (| concurrent-tasks | processing time(s) |  RTF   | Speedup Rate |)
[//]: # (|:----------------:|:------------------:|:------:|:------------:|)
[//]: # (|  1 (torch fp32)  |        3522        | 0.0976 |     10.3     |)
[//]: # (|  1 (torch int8)  |        1746        | 0.0484 |     20.7     |)
[//]: # (|  32 (torch fp32)  |        236         | 0.0066 |    152.7     |)
[//]: # (|  32 (torch int8)  |        114         | 0.0032 |    317.4     |)
[//]: # (|  64 (torch fp32)  |        235         | 0.0065 |    153.7     |)
[//]: # (|  64 (torch int8)  |        113         | 0.0031 |    319.2     |)
### Intel(R) Xeon(R) Platinum 8269CY CPU @ 2.50GHz   16core-32processor    with avx512_vnni
| concurrent-tasks | processing time(s) |  RTF   | Speedup Rate |
|:----------------:|:------------------:|:------:|:------------:|
|  1 (torch fp32)  |        3522        | 0.0976 |     10.3     |
| 1 (torch fp32)   |        3522        | 0.0976 |     10.3     |
|  1 (torch int8)  |        1746        | 0.0484 |     20.7     |
|  32 (torch fp32)  |        236         | 0.0066 |    152.7     |
|  32 (torch int8)  |        114         | 0.0032 |    317.4     |
|  64 (torch fp32)  |        235         | 0.0065 |    153.7     |
|  64 (torch int8)  |        113         | 0.0031 |    319.2     |
| 32 (torch fp32)  |        236         | 0.0066 |    152.7     |
| 32 (torch int8)  |        114         | 0.0032 |    317.4     |
| 64 (torch fp32)  |        235         | 0.0065 |    153.7     |
| 64 (torch int8)  |        113         | 0.0031 |    319.2     |
[//]: # (### Intel(R) Xeon(R) Platinum 8163 CPU @ 2.50GHz    32core-64processor   without avx512_vnni)
funasr/runtime/python/benchmark_onnx.md
@@ -72,3 +72,20 @@
|  96 (onnx int8)  |        108         | 0.0030 |    334.1     |
## [Paraformer](https://modelscope.cn/models/damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8358-tensorflow1/summary)
 ### Intel(R) Xeon(R) Platinum 8369B CPU @ 2.90GHz   16core-32processor    with avx512_vnni
| concurrent-tasks | processing time(s) |  RTF   | Speedup Rate |
|:----------------:|:------------------:|:------:|:------------:|
|  16 (onnx fp32)  |         91         | 0.0025 |    395.2     |
|  16 (onnx int8)  |         78         | 0.0022 |    463.0     |
|  32 (onnx fp32)  |         60         | 0.0017 |    598.8     |
|  32 (onnx int8)  |         40         | 0.0011 |    892.9     |
|  64 (onnx fp32)  |         55         | 0.0015 |    653.6     |
|  64 (onnx int8)  |         31         | 0.0009 |    1162.8    |
|  96 (onnx fp32)  |         57         | 0.0016 |    632.9     |
|  96 (onnx int8)  |         33         | 0.0009 |    1098.9    |
[//]: # (|  1 (onnx fp32)   |        2806        | 0.0777 |     12.9     |)
[//]: # (|  1 (onnx int8)   |        1611        | 0.0446 |     22.4     |)