From f2413d66f54c95d50b02f0ebdf614fee2bf4cc8c Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 17 五月 2023 18:28:59 +0800
Subject: [PATCH] websocket online vad endpoint
---
funasr/export/models/CT_Transformer.py | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/funasr/export/models/CT_Transformer.py b/funasr/export/models/CT_Transformer.py
index ea6ff4f..2319c4a 100644
--- a/funasr/export/models/CT_Transformer.py
+++ b/funasr/export/models/CT_Transformer.py
@@ -10,7 +10,7 @@
class CT_Transformer(nn.Module):
"""
- Author: Speech Lab, Alibaba Group, China
+ Author: Speech Lab of DAMO Academy, Alibaba Group
CT-Transformer: Controllable time-delay transformer for real-time punctuation prediction and disfluency detection
https://arxiv.org/pdf/2003.01309.pdf
"""
@@ -53,7 +53,7 @@
def get_dummy_inputs(self):
length = 120
- text_indexes = torch.randint(0, self.embed.num_embeddings, (2, length))
+ text_indexes = torch.randint(0, self.embed.num_embeddings, (2, length)).type(torch.int32)
text_lengths = torch.tensor([length-20, length], dtype=torch.int32)
return (text_indexes, text_lengths)
@@ -81,7 +81,7 @@
class CT_Transformer_VadRealtime(nn.Module):
"""
- Author: Speech Lab, Alibaba Group, China
+ Author: Speech Lab of DAMO Academy, Alibaba Group
CT-Transformer: Controllable time-delay transformer for real-time punctuation prediction and disfluency detection
https://arxiv.org/pdf/2003.01309.pdf
"""
@@ -130,7 +130,7 @@
def get_dummy_inputs(self):
length = 120
- text_indexes = torch.randint(0, self.embed.num_embeddings, (1, length))
+ text_indexes = torch.randint(0, self.embed.num_embeddings, (1, length)).type(torch.int32)
text_lengths = torch.tensor([length], dtype=torch.int32)
vad_mask = torch.ones(length, length, dtype=torch.float32)[None, None, :, :]
sub_masks = torch.ones(length, length, dtype=torch.float32)
--
Gitblit v1.9.1