From ee06cb9c6870d9e1579015aabfe1a84a61a5c087 Mon Sep 17 00:00:00 2001
From: 九耳 <mengzhe.cmz@alibaba-inc.com>
Date: 星期二, 28 二月 2023 18:11:12 +0800
Subject: [PATCH] punctuation:add training code, support largedataset

---
 funasr/datasets/large_datasets/utils/padding.py |    5 ++---
 1 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/funasr/datasets/large_datasets/utils/padding.py b/funasr/datasets/large_datasets/utils/padding.py
index e814b1c..e0feac6 100644
--- a/funasr/datasets/large_datasets/utils/padding.py
+++ b/funasr/datasets/large_datasets/utils/padding.py
@@ -6,9 +6,8 @@
 def padding(data, float_pad_value=0.0, int_pad_value=-1):
     assert isinstance(data, list)
     assert "key" in data[0]
-    assert "speech" in data[0]
-    assert "text" in data[0]
-
+    assert "speech" in data[0] or "text" in data[0]
+    
     keys = [x["key"] for x in data]
 
     batch = {}

--
Gitblit v1.9.1