From d653df71cb5e6dd5acc321ead66d31c6ab036527 Mon Sep 17 00:00:00 2001
From: jmwang66 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期五, 23 十二月 2022 10:40:23 +0800
Subject: [PATCH] update text postprocess
---
funasr/utils/postprocess_utils.py | 10 +++-------
1 files changed, 3 insertions(+), 7 deletions(-)
diff --git a/funasr/utils/postprocess_utils.py b/funasr/utils/postprocess_utils.py
index 72080ae..fe8bd54 100644
--- a/funasr/utils/postprocess_utils.py
+++ b/funasr/utils/postprocess_utils.py
@@ -12,10 +12,8 @@
def isAllChinese(word: Union[List[Any], str]):
word_lists = []
- table = str.maketrans('', '', string.punctuation)
for i in word:
- cur = i.translate(table)
- cur = cur.replace(' ', '')
+ cur = i.replace(' ', '')
cur = cur.replace('</s>', '')
cur = cur.replace('<s>', '')
word_lists.append(cur)
@@ -31,10 +29,8 @@
def isAllAlpha(word: Union[List[Any], str]):
word_lists = []
- table = str.maketrans('', '', string.punctuation)
for i in word:
- cur = i.translate(table)
- cur = cur.replace(' ', '')
+ cur = i.replace(' ', '')
cur = cur.replace('</s>', '')
cur = cur.replace('<s>', '')
word_lists.append(cur)
@@ -43,7 +39,7 @@
return False
for ch in word_lists:
- if ch.isalpha() is False:
+ if ch.isalpha() is False and ch != "'":
return False
elif ch.isalpha() is True and isChinese(ch) is True:
return False
--
Gitblit v1.9.1