From 8f26a9acc2461ce0c77eacc3d36d3cef3457f520 Mon Sep 17 00:00:00 2001
From: speech_asr <wangjiaming.wjm@alibaba-inc.com>
Date: 星期三, 29 三月 2023 15:49:46 +0800
Subject: [PATCH] Merge branch 'dev_wjm' of https://github.com/alibaba/FunASR into dev_wjm

---
 funasr/modules/embedding.py |   13 +++++++++++--
 1 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/funasr/modules/embedding.py b/funasr/modules/embedding.py
index e4f9bff..79ca0b2 100644
--- a/funasr/modules/embedding.py
+++ b/funasr/modules/embedding.py
@@ -8,7 +8,7 @@
 
 import math
 import torch
-
+import torch.nn.functional as F
 
 def _pre_hook(
     state_dict,
@@ -409,9 +409,18 @@
 
     def forward_chunk(self, x, cache=None):
         start_idx = 0
+        pad_left = 0
+        pad_right = 0
         batch_size, timesteps, input_dim = x.size()
         if cache is not None:
             start_idx = cache["start_idx"]
+            pad_left = cache["left"]
+            pad_right = cache["right"]
         positions = torch.arange(1, timesteps+start_idx+1)[None, :]
         position_encoding = self.encode(positions, input_dim, x.dtype).to(x.device)
-        return x + position_encoding[:, start_idx: start_idx + timesteps]
+        outputs = x + position_encoding[:, start_idx: start_idx + timesteps]
+        outputs = outputs.transpose(1,2)
+        outputs = F.pad(outputs, (pad_left, pad_right))
+        outputs = outputs.transpose(1,2)
+        return outputs
+       

--
Gitblit v1.9.1