From ee06cb9c6870d9e1579015aabfe1a84a61a5c087 Mon Sep 17 00:00:00 2001
From: 九耳 <mengzhe.cmz@alibaba-inc.com>
Date: 星期二, 28 二月 2023 18:11:12 +0800
Subject: [PATCH] punctuation:add training code, support largedataset
---
funasr/datasets/large_datasets/utils/padding.py | 5 ++---
1 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/funasr/datasets/large_datasets/utils/padding.py b/funasr/datasets/large_datasets/utils/padding.py
index e814b1c..e0feac6 100644
--- a/funasr/datasets/large_datasets/utils/padding.py
+++ b/funasr/datasets/large_datasets/utils/padding.py
@@ -6,9 +6,8 @@
def padding(data, float_pad_value=0.0, int_pad_value=-1):
assert isinstance(data, list)
assert "key" in data[0]
- assert "speech" in data[0]
- assert "text" in data[0]
-
+ assert "speech" in data[0] or "text" in data[0]
+
keys = [x["key"] for x in data]
batch = {}
--
Gitblit v1.9.1