From dcb92f13eddbf3032ce363b35f13f80afa8f94d1 Mon Sep 17 00:00:00 2001
From: haoneng.lhn <haoneng.lhn@alibaba-inc.com>
Date: 星期四, 14 九月 2023 16:46:30 +0800
Subject: [PATCH] add paraformer online opt infer code
---
funasr/modules/attention.py | 29 +++++++++++++++++++++++++++++
1 files changed, 29 insertions(+), 0 deletions(-)
diff --git a/funasr/modules/attention.py b/funasr/modules/attention.py
index 157a2c5..b007d58 100644
--- a/funasr/modules/attention.py
+++ b/funasr/modules/attention.py
@@ -705,6 +705,35 @@
scores = torch.matmul(q_h, k_h.transpose(-2, -1))
return self.forward_attention(v_h, scores, memory_mask)
+ def forward_chunk(self, x, memory, cache=None, chunk_size=None, look_back=0):
+ """Compute scaled dot product attention.
+
+ Args:
+ query (torch.Tensor): Query tensor (#batch, time1, size).
+ key (torch.Tensor): Key tensor (#batch, time2, size).
+ value (torch.Tensor): Value tensor (#batch, time2, size).
+ mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
+ (#batch, time1, time2).
+
+ Returns:
+ torch.Tensor: Output tensor (#batch, time1, d_model).
+
+ """
+ q_h, k_h, v_h = self.forward_qkv(x, memory)
+ if chunk_size is not None and look_back > 0:
+ if cache is not None:
+ k_h = torch.cat((cache["k"], k_h), dim=2)
+ v_h = torch.cat((cache["v"], v_h), dim=2)
+ cache["k"] = k_h[:, :, -(look_back * chunk_size[1]):, :]
+ cache["v"] = v_h[:, :, -(look_back * chunk_size[1]):, :]
+ else:
+ cache_tmp = {"k": k_h[:, :, -(look_back * chunk_size[1]):, :],
+ "v": v_h[:, :, -(look_back * chunk_size[1]):, :]}
+ cache = cache_tmp
+ q_h = q_h * self.d_k ** (-0.5)
+ scores = torch.matmul(q_h, k_h.transpose(-2, -1))
+ return self.forward_attention(v_h, scores, None), cache
+
class MultiHeadSelfAttention(nn.Module):
"""Multi-Head Attention layer.
--
Gitblit v1.9.1