From 4ace5a95b052d338947fc88809a440ccd55cf6b4 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 16 十一月 2023 16:39:52 +0800
Subject: [PATCH] funasr pages

---
 funasr/modules/embedding.py |  114 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 100 insertions(+), 14 deletions(-)

diff --git a/funasr/modules/embedding.py b/funasr/modules/embedding.py
index 4b292a7..1995bbe 100644
--- a/funasr/modules/embedding.py
+++ b/funasr/modules/embedding.py
@@ -9,6 +9,7 @@
 import math
 import torch
 import torch.nn.functional as F
+from torch import einsum
 
 def _pre_hook(
     state_dict,
@@ -393,8 +394,9 @@
     def encode(self, positions: torch.Tensor = None, depth: int = None, dtype: torch.dtype = torch.float32):
         batch_size = positions.size(0)
         positions = positions.type(dtype)
-        log_timescale_increment = torch.log(torch.tensor([10000], dtype=dtype)) / (depth / 2 - 1)
-        inv_timescales = torch.exp(torch.arange(depth / 2).type(dtype) * (-log_timescale_increment))
+        device = positions.device
+        log_timescale_increment = torch.log(torch.tensor([10000], dtype=dtype, device=device)) / (depth / 2 - 1)
+        inv_timescales = torch.exp(torch.arange(depth / 2, device=device).type(dtype) * (-log_timescale_increment))
         inv_timescales = torch.reshape(inv_timescales, [batch_size, -1])
         scaled_time = torch.reshape(positions, [1, -1, 1]) * torch.reshape(inv_timescales, [1, 1, -1])
         encoding = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=2)
@@ -402,7 +404,7 @@
 
     def forward(self, x):
         batch_size, timesteps, input_dim = x.size()
-        positions = torch.arange(1, timesteps+1)[None, :]
+        positions = torch.arange(1, timesteps+1, device=x.device)[None, :]
         position_encoding = self.encode(positions, input_dim, x.dtype).to(x.device)
 
         return x + position_encoding
@@ -425,19 +427,103 @@
         return encoding.type(dtype)
 
     def forward(self, x, cache=None):
-        start_idx = 0
-        pad_left = 0
-        pad_right = 0
         batch_size, timesteps, input_dim = x.size()
+        start_idx = 0
         if cache is not None:
             start_idx = cache["start_idx"]
-            pad_left = cache["left"]
-            pad_right = cache["right"]
+            cache["start_idx"] += timesteps
         positions = torch.arange(1, timesteps+start_idx+1)[None, :]
         position_encoding = self.encode(positions, input_dim, x.dtype).to(x.device)
-        outputs = x + position_encoding[:, start_idx: start_idx + timesteps]
-        outputs = outputs.transpose(1, 2)
-        outputs = F.pad(outputs, (pad_left, pad_right))
-        outputs = outputs.transpose(1, 2)
-        return outputs
-       
+        return x + position_encoding[:, start_idx: start_idx + timesteps]
+
+class StreamingRelPositionalEncoding(torch.nn.Module):
+    """Relative positional encoding.
+    Args:
+        size: Module size.
+        max_len: Maximum input length.
+        dropout_rate: Dropout rate.
+    """
+
+    def __init__(
+        self, size: int, dropout_rate: float = 0.0, max_len: int = 5000
+    ) -> None:
+        """Construct a RelativePositionalEncoding object."""
+        super().__init__()
+
+        self.size = size
+
+        self.pe = None
+        self.dropout = torch.nn.Dropout(p=dropout_rate)
+
+        self.extend_pe(torch.tensor(0.0).expand(1, max_len))
+        self._register_load_state_dict_pre_hook(_pre_hook)
+
+    def extend_pe(self, x: torch.Tensor, left_context: int = 0) -> None:
+        """Reset positional encoding.
+        Args:
+            x: Input sequences. (B, T, ?)
+            left_context: Number of frames in left context.
+        """
+        time1 = x.size(1) + left_context
+
+        if self.pe is not None:
+            if self.pe.size(1) >= time1 * 2 - 1:
+                if self.pe.dtype != x.dtype or self.pe.device != x.device:
+                    self.pe = self.pe.to(device=x.device, dtype=x.dtype)
+                return
+
+        pe_positive = torch.zeros(time1, self.size)
+        pe_negative = torch.zeros(time1, self.size)
+
+        position = torch.arange(0, time1, dtype=torch.float32).unsqueeze(1)
+        div_term = torch.exp(
+            torch.arange(0, self.size, 2, dtype=torch.float32)
+            * -(math.log(10000.0) / self.size)
+        )
+
+        pe_positive[:, 0::2] = torch.sin(position * div_term)
+        pe_positive[:, 1::2] = torch.cos(position * div_term)
+        pe_positive = torch.flip(pe_positive, [0]).unsqueeze(0)
+
+        pe_negative[:, 0::2] = torch.sin(-1 * position * div_term)
+        pe_negative[:, 1::2] = torch.cos(-1 * position * div_term)
+        pe_negative = pe_negative[1:].unsqueeze(0)
+
+        self.pe = torch.cat([pe_positive, pe_negative], dim=1).to(
+            dtype=x.dtype, device=x.device
+        )
+
+    def forward(self, x: torch.Tensor, left_context: int = 0) -> torch.Tensor:
+        """Compute positional encoding.
+        Args:
+            x: Input sequences. (B, T, ?)
+            left_context: Number of frames in left context.
+        Returns:
+            pos_enc: Positional embedding sequences. (B, 2 * (T - 1), ?)
+        """
+        self.extend_pe(x, left_context=left_context)
+
+        time1 = x.size(1) + left_context
+
+        pos_enc = self.pe[
+            :, self.pe.size(1) // 2 - time1 + 1 : self.pe.size(1) // 2 + x.size(1)
+        ]
+        pos_enc = self.dropout(pos_enc)
+
+        return pos_enc
+
+
+class ScaledSinuEmbedding(torch.nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.scale = torch.nn.Parameter(torch.ones(1,))
+        inv_freq = 1. / (10000 ** (torch.arange(0, dim, 2).float() / dim))
+        self.register_buffer('inv_freq', inv_freq)
+
+    def forward(self, x):
+        n, device = x.shape[1], x.device
+        t = torch.arange(n, device = device).type_as(self.inv_freq)
+        sinu = einsum('i , j -> i j', t, self.inv_freq)
+        emb = torch.cat((sinu.sin(), sinu.cos()), dim = -1)
+        return emb * self.scale
+

--
Gitblit v1.9.1