From 5f088a67cd1b18a8260746971f32a6569e0cf2c6 Mon Sep 17 00:00:00 2001
From: haoneng.lhn <haoneng.lhn@alibaba-inc.com>
Date: 星期三, 13 九月 2023 20:02:54 +0800
Subject: [PATCH] add paraformer online opt infer code
---
funasr/modules/attention.py | 32 ++++++++++++++++++++++++++++++++
1 files changed, 32 insertions(+), 0 deletions(-)
diff --git a/funasr/modules/attention.py b/funasr/modules/attention.py
index ab59493..f5430e1 100644
--- a/funasr/modules/attention.py
+++ b/funasr/modules/attention.py
@@ -456,6 +456,38 @@
att_outs = self.forward_attention(v_h, scores, mask, mask_att_chunk_encoder)
return att_outs + fsmn_memory
+ def forward_chunk(self, x, cache=None, chunk_size=None, look_back=0):
+ """Compute scaled dot product attention.
+
+ Args:
+ query (torch.Tensor): Query tensor (#batch, time1, size).
+ key (torch.Tensor): Key tensor (#batch, time2, size).
+ value (torch.Tensor): Value tensor (#batch, time2, size).
+ mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
+ (#batch, time1, time2).
+
+ Returns:
+ torch.Tensor: Output tensor (#batch, time1, d_model).
+
+ """
+ q_h, k_h, v_h, v = self.forward_qkv(x)
+ if chunk_size is not None and look_back > 0:
+ if cache is not None:
+ k_h = torch.cat((cache["k"], k_h), dim=2)
+ v_h = torch.cat((cache["v"], v_h), dim=2)
+ cache["k"] = k_h[:, :, -(look_back * chunk_size[1]):, :]
+ cache["v"] = v_h[:, :, -(look_back * chunk_size[1]):, :]
+ else:
+ cache_tmp = {"k": k_h[:, :, -(look_back * chunk_size[1]):, :],
+ "v": v_h[:, :, -(look_back * chunk_size[1]):, :]}
+ cache = cache_tmp
+ fsmn_memory = self.forward_fsmn(v, None)
+ q_h = q_h * self.d_k ** (-0.5)
+ scores = torch.matmul(q_h, k_h.transpose(-2, -1))
+ att_outs = self.forward_attention(v_h, scores, None)
+ return att_outs + fsmn_memory, cache
+
+
class MultiHeadedAttentionSANMwithMask(MultiHeadedAttentionSANM):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
--
Gitblit v1.9.1