From 54931dd4e1a099d7d6f144c4e12e5453deb3aa26 Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期三, 28 六月 2023 10:41:57 +0800
Subject: [PATCH] Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into main
---
funasr/modules/embedding.py | 32 +++++++++++++++++++++-----------
1 files changed, 21 insertions(+), 11 deletions(-)
diff --git a/funasr/modules/embedding.py b/funasr/modules/embedding.py
index e0070de..aaac80a 100644
--- a/funasr/modules/embedding.py
+++ b/funasr/modules/embedding.py
@@ -407,22 +407,32 @@
return x + position_encoding
- def forward_chunk(self, x, cache=None):
- start_idx = 0
- pad_left = 0
- pad_right = 0
+class StreamSinusoidalPositionEncoder(torch.nn.Module):
+ '''
+
+ '''
+ def __int__(self, d_model=80, dropout_rate=0.1):
+ pass
+
+ def encode(self, positions: torch.Tensor = None, depth: int = None, dtype: torch.dtype = torch.float32):
+ batch_size = positions.size(0)
+ positions = positions.type(dtype)
+ log_timescale_increment = torch.log(torch.tensor([10000], dtype=dtype)) / (depth / 2 - 1)
+ inv_timescales = torch.exp(torch.arange(depth / 2).type(dtype) * (-log_timescale_increment))
+ inv_timescales = torch.reshape(inv_timescales, [batch_size, -1])
+ scaled_time = torch.reshape(positions, [1, -1, 1]) * torch.reshape(inv_timescales, [1, 1, -1])
+ encoding = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=2)
+ return encoding.type(dtype)
+
+ def forward(self, x, cache=None):
batch_size, timesteps, input_dim = x.size()
+ start_idx = 0
if cache is not None:
start_idx = cache["start_idx"]
- pad_left = cache["left"]
- pad_right = cache["right"]
+ cache["start_idx"] += timesteps
positions = torch.arange(1, timesteps+start_idx+1)[None, :]
position_encoding = self.encode(positions, input_dim, x.dtype).to(x.device)
- outputs = x + position_encoding[:, start_idx: start_idx + timesteps]
- outputs = outputs.transpose(1,2)
- outputs = F.pad(outputs, (pad_left, pad_right))
- outputs = outputs.transpose(1,2)
- return outputs
+ return x + position_encoding[:, start_idx: start_idx + timesteps]
class StreamingRelPositionalEncoding(torch.nn.Module):
"""Relative positional encoding.
--
Gitblit v1.9.1