python/FunASR-XL.git

			@@ -75,8 +75,10 @@
			return x, cache


			import torch.fx
			torch.fx.wrap('preprocess_for_attn')
			torch_version = float(".".join(torch.__version__.split(".")[:2]))
			if torch_version >= 1.8:
			import torch.fx
			torch.fx.wrap('preprocess_for_attn')


			class MultiHeadedAttentionSANMDecoder(nn.Module):

			@@ -26,35 +26,17 @@

			## [Paraformer-large](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)

			### Intel(R) Xeon(R) Platinum 8369B CPU @ 2.90GHz 16core-32processor with avx512_vnni

			[//]: # (\| concurrent-tasks \| processing time(s) \| RTF \| Speedup Rate \|)

			[//]: # (\|:----------------:\|:------------------:\|:------:\|:------------:\|)

			[//]: # (\| 1 (torch fp32) \| 3522 \| 0.0976 \| 10.3 \|)

			[//]: # (\| 1 (torch int8) \| 1746 \| 0.0484 \| 20.7 \|)

			[//]: # (\| 32 (torch fp32) \| 236 \| 0.0066 \| 152.7 \|)

			[//]: # (\| 32 (torch int8) \| 114 \| 0.0032 \| 317.4 \|)

			[//]: # (\| 64 (torch fp32) \| 235 \| 0.0065 \| 153.7 \|)

			[//]: # (\| 64 (torch int8) \| 113 \| 0.0031 \| 319.2 \|)


			### Intel(R) Xeon(R) Platinum 8269CY CPU @ 2.50GHz 16core-32processor with avx512_vnni

			\| concurrent-tasks \| processing time(s) \| RTF \| Speedup Rate \|
			\|:----------------:\|:------------------:\|:------:\|:------------:\|
			\| 1 (torch fp32) \| 3522 \| 0.0976 \| 10.3 \|
			\| 1 (torch fp32) \| 3522 \| 0.0976 \| 10.3 \|
			\| 1 (torch int8) \| 1746 \| 0.0484 \| 20.7 \|
			\| 32 (torch fp32) \| 236 \| 0.0066 \| 152.7 \|
			\| 32 (torch int8) \| 114 \| 0.0032 \| 317.4 \|
			\| 64 (torch fp32) \| 235 \| 0.0065 \| 153.7 \|
			\| 64 (torch int8) \| 113 \| 0.0031 \| 319.2 \|
			\| 32 (torch fp32) \| 236 \| 0.0066 \| 152.7 \|
			\| 32 (torch int8) \| 114 \| 0.0032 \| 317.4 \|
			\| 64 (torch fp32) \| 235 \| 0.0065 \| 153.7 \|
			\| 64 (torch int8) \| 113 \| 0.0031 \| 319.2 \|


			[//]: # (### Intel(R) Xeon(R) Platinum 8163 CPU @ 2.50GHz 32core-64processor without avx512_vnni)

			@@ -72,3 +72,20 @@
			\| 96 (onnx int8) \| 108 \| 0.0030 \| 334.1 \|

			## [Paraformer](https://modelscope.cn/models/damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8358-tensorflow1/summary)

			### Intel(R) Xeon(R) Platinum 8369B CPU @ 2.90GHz 16core-32processor with avx512_vnni

			\| concurrent-tasks \| processing time(s) \| RTF \| Speedup Rate \|
			\|:----------------:\|:------------------:\|:------:\|:------------:\|
			\| 16 (onnx fp32) \| 91 \| 0.0025 \| 395.2 \|
			\| 16 (onnx int8) \| 78 \| 0.0022 \| 463.0 \|
			\| 32 (onnx fp32) \| 60 \| 0.0017 \| 598.8 \|
			\| 32 (onnx int8) \| 40 \| 0.0011 \| 892.9 \|
			\| 64 (onnx fp32) \| 55 \| 0.0015 \| 653.6 \|
			\| 64 (onnx int8) \| 31 \| 0.0009 \| 1162.8 \|
			\| 96 (onnx fp32) \| 57 \| 0.0016 \| 632.9 \|
			\| 96 (onnx int8) \| 33 \| 0.0009 \| 1098.9 \|

			[//]: # (\| 1 (onnx fp32) \| 2806 \| 0.0777 \| 12.9 \|)

			[//]: # (\| 1 (onnx int8) \| 1611 \| 0.0446 \| 22.4 \|)

	funasr/export/models/modules/multihead_att.py	6 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/python/benchmark_libtorch.md	28 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/python/benchmark_onnx.md	17 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

			@@ -26,35 +26,17 @@

			## [Paraformer-large](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)

			### Intel(R) Xeon(R) Platinum 8369B CPU @ 2.90GHz 16core-32processor with avx512_vnni

			[//]: # (\| concurrent-tasks \| processing time(s) \| RTF \| Speedup Rate \|)

			[//]: # (\|:----------------:\|:------------------:\|:------:\|:------------:\|)

			[//]: # (\| 1 (torch fp32) \| 3522 \| 0.0976 \| 10.3 \|)

			[//]: # (\| 1 (torch int8) \| 1746 \| 0.0484 \| 20.7 \|)

			[//]: # (\| 32 (torch fp32) \| 236 \| 0.0066 \| 152.7 \|)

			[//]: # (\| 32 (torch int8) \| 114 \| 0.0032 \| 317.4 \|)

			[//]: # (\| 64 (torch fp32) \| 235 \| 0.0065 \| 153.7 \|)

			[//]: # (\| 64 (torch int8) \| 113 \| 0.0031 \| 319.2 \|)


			### Intel(R) Xeon(R) Platinum 8269CY CPU @ 2.50GHz 16core-32processor with avx512_vnni

			\| concurrent-tasks \| processing time(s) \| RTF \| Speedup Rate \|
			\|:----------------:\|:------------------:\|:------:\|:------------:\|
			\| 1 (torch fp32) \| 3522 \| 0.0976 \| 10.3 \|
			\| 1 (torch fp32) \| 3522 \| 0.0976 \| 10.3 \|
			\| 1 (torch int8) \| 1746 \| 0.0484 \| 20.7 \|
			\| 32 (torch fp32) \| 236 \| 0.0066 \| 152.7 \|
			\| 32 (torch int8) \| 114 \| 0.0032 \| 317.4 \|
			\| 64 (torch fp32) \| 235 \| 0.0065 \| 153.7 \|
			\| 64 (torch int8) \| 113 \| 0.0031 \| 319.2 \|
			\| 32 (torch fp32) \| 236 \| 0.0066 \| 152.7 \|
			\| 32 (torch int8) \| 114 \| 0.0032 \| 317.4 \|
			\| 64 (torch fp32) \| 235 \| 0.0065 \| 153.7 \|
			\| 64 (torch int8) \| 113 \| 0.0031 \| 319.2 \|


			[//]: # (### Intel(R) Xeon(R) Platinum 8163 CPU @ 2.50GHz 32core-64processor without avx512_vnni)

			@@ -72,3 +72,20 @@
			\| 96 (onnx int8) \| 108 \| 0.0030 \| 334.1 \|

			## [Paraformer](https://modelscope.cn/models/damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8358-tensorflow1/summary)

			### Intel(R) Xeon(R) Platinum 8369B CPU @ 2.90GHz 16core-32processor with avx512_vnni

			\| concurrent-tasks \| processing time(s) \| RTF \| Speedup Rate \|
			\|:----------------:\|:------------------:\|:------:\|:------------:\|
			\| 16 (onnx fp32) \| 91 \| 0.0025 \| 395.2 \|
			\| 16 (onnx int8) \| 78 \| 0.0022 \| 463.0 \|
			\| 32 (onnx fp32) \| 60 \| 0.0017 \| 598.8 \|
			\| 32 (onnx int8) \| 40 \| 0.0011 \| 892.9 \|
			\| 64 (onnx fp32) \| 55 \| 0.0015 \| 653.6 \|
			\| 64 (onnx int8) \| 31 \| 0.0009 \| 1162.8 \|
			\| 96 (onnx fp32) \| 57 \| 0.0016 \| 632.9 \|
			\| 96 (onnx int8) \| 33 \| 0.0009 \| 1098.9 \|

			[//]: # (\| 1 (onnx fp32) \| 2806 \| 0.0777 \| 12.9 \|)

			[//]: # (\| 1 (onnx int8) \| 1611 \| 0.0446 \| 22.4 \|)