From 08ee9e6aacc2e306211d393f6e8ce3a7f3620102 Mon Sep 17 00:00:00 2001
From: yhliang <68215459+yhliang-aslp@users.noreply.github.com>
Date: 星期四, 10 八月 2023 20:46:21 +0800
Subject: [PATCH] Add modular SA-ASR recipe for M2MeT2.0 (#831)
---
funasr/export/models/e2e_vad.py | 17 ++++-------------
1 files changed, 4 insertions(+), 13 deletions(-)
diff --git a/funasr/export/models/e2e_vad.py b/funasr/export/models/e2e_vad.py
index 0653e06..d3e8f30 100644
--- a/funasr/export/models/e2e_vad.py
+++ b/funasr/export/models/e2e_vad.py
@@ -11,7 +11,7 @@
class E2EVadModel(nn.Module):
def __init__(self, model,
max_seq_len=512,
- feats_dim=560,
+ feats_dim=400,
model_name='model',
**kwargs,):
super(E2EVadModel, self).__init__()
@@ -24,19 +24,10 @@
raise "unsupported encoder"
- def forward(self, feats: torch.Tensor,
- in_cache0: torch.Tensor,
- in_cache1: torch.Tensor,
- in_cache2: torch.Tensor,
- in_cache3: torch.Tensor,
- ):
+ def forward(self, feats: torch.Tensor, *args, ):
- scores, cache0, cache1, cache2, cache3 = self.encoder(feats,
- in_cache0,
- in_cache1,
- in_cache2,
- in_cache3) # return B * T * D
- return scores, cache0, cache1, cache2, cache3
+ scores, out_caches = self.encoder(feats, *args)
+ return scores, out_caches
def get_dummy_inputs(self, frame=30):
speech = torch.randn(1, frame, self.feats_dim)
--
Gitblit v1.9.1