From 08ee9e6aacc2e306211d393f6e8ce3a7f3620102 Mon Sep 17 00:00:00 2001
From: yhliang <68215459+yhliang-aslp@users.noreply.github.com>
Date: 星期四, 10 八月 2023 20:46:21 +0800
Subject: [PATCH] Add modular SA-ASR recipe for M2MeT2.0 (#831)

---
 funasr/export/models/e2e_vad.py |   17 ++++-------------
 1 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/funasr/export/models/e2e_vad.py b/funasr/export/models/e2e_vad.py
index 0653e06..d3e8f30 100644
--- a/funasr/export/models/e2e_vad.py
+++ b/funasr/export/models/e2e_vad.py
@@ -11,7 +11,7 @@
 class E2EVadModel(nn.Module):
     def __init__(self, model,
                 max_seq_len=512,
-                feats_dim=560,
+                feats_dim=400,
                 model_name='model',
                 **kwargs,):
         super(E2EVadModel, self).__init__()
@@ -24,19 +24,10 @@
             raise "unsupported encoder"
         
 
-    def forward(self, feats: torch.Tensor,
-                       in_cache0: torch.Tensor,
-                       in_cache1: torch.Tensor,
-                       in_cache2: torch.Tensor,
-                       in_cache3: torch.Tensor,
-                       ):
+    def forward(self, feats: torch.Tensor, *args, ):
 
-        scores, cache0, cache1, cache2, cache3 = self.encoder(feats,
-                                                              in_cache0,
-                                                              in_cache1,
-                                                              in_cache2,
-                                                              in_cache3)  # return B * T * D
-        return scores, cache0, cache1, cache2, cache3
+        scores, out_caches = self.encoder(feats, *args)
+        return scores, out_caches
 
     def get_dummy_inputs(self, frame=30):
         speech = torch.randn(1, frame, self.feats_dim)

--
Gitblit v1.9.1