From b454a1054fadbff0ee963944ff42f66b98317582 Mon Sep 17 00:00:00 2001
From: Yabin Li <wucong.lyb@alibaba-inc.com>
Date: 星期二, 08 八月 2023 11:17:43 +0800
Subject: [PATCH] update online runtime, including vad-online, paraformer-online, punc-online,2pass (#815)
---
funasr/runtime/python/onnxruntime/funasr_onnx/utils/frontend.py | 22 ++++++++++++++++++++++
1 files changed, 22 insertions(+), 0 deletions(-)
diff --git a/funasr/runtime/python/onnxruntime/funasr_onnx/utils/frontend.py b/funasr/runtime/python/onnxruntime/funasr_onnx/utils/frontend.py
index ded04b6..295e7b5 100644
--- a/funasr/runtime/python/onnxruntime/funasr_onnx/utils/frontend.py
+++ b/funasr/runtime/python/onnxruntime/funasr_onnx/utils/frontend.py
@@ -349,6 +349,28 @@
return array
+class SinusoidalPositionEncoderOnline():
+ '''Streaming Positional encoding.
+ '''
+
+ def encode(self, positions: np.ndarray = None, depth: int = None, dtype: np.dtype = np.float32):
+ batch_size = positions.shape[0]
+ positions = positions.astype(dtype)
+ log_timescale_increment = np.log(np.array([10000], dtype=dtype)) / (depth / 2 - 1)
+ inv_timescales = np.exp(np.arange(depth / 2).astype(dtype) * (-log_timescale_increment))
+ inv_timescales = np.reshape(inv_timescales, [batch_size, -1])
+ scaled_time = np.reshape(positions, [1, -1, 1]) * np.reshape(inv_timescales, [1, 1, -1])
+ encoding = np.concatenate((np.sin(scaled_time), np.cos(scaled_time)), axis=2)
+ return encoding.astype(dtype)
+
+ def forward(self, x, start_idx=0):
+ batch_size, timesteps, input_dim = x.shape
+ positions = np.arange(1, timesteps+1+start_idx)[None, :]
+ position_encoding = self.encode(positions, input_dim, x.dtype)
+
+ return x + position_encoding[:, start_idx: start_idx + timesteps]
+
+
def test():
path = "/nfs/zhifu.gzf/export/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav"
import librosa
--
Gitblit v1.9.1