From 8dab6d184a034ca86eafa644ea0d2100aadfe27d Mon Sep 17 00:00:00 2001
From: jmwang66 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期二, 09 五月 2023 10:58:33 +0800
Subject: [PATCH] Merge pull request #473 from alibaba-damo-academy/dev_smohan
---
funasr/utils/postprocess_utils.py | 11 ++++++-----
1 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/funasr/utils/postprocess_utils.py b/funasr/utils/postprocess_utils.py
index 2475548..f4efea6 100644
--- a/funasr/utils/postprocess_utils.py
+++ b/funasr/utils/postprocess_utils.py
@@ -106,17 +106,18 @@
if num in abbr_begin:
if time_stamp is not None:
begin = time_stamp[ts_nums[num]][0]
- word_lists.append(words[num].upper())
+ abbr_word = words[num].upper()
num += 1
while num < words_size:
if num in abbr_end:
- word_lists.append(words[num].upper())
+ abbr_word += words[num].upper()
last_num = num
break
else:
if words[num].encode('utf-8').isalpha():
- word_lists.append(words[num].upper())
+ abbr_word += words[num].upper()
num += 1
+ word_lists.append(abbr_word)
if time_stamp is not None:
end = time_stamp[ts_nums[num]][1]
ts_lists.append([begin, end])
@@ -224,7 +225,7 @@
ts_lists.append([begin, end])
begin = end
else:
- raise ValueError('invalid character: {}'.format(ch))
+ word_lists.append(ch)
if time_stamp is not None:
word_lists, ts_lists = abbr_dispose(word_lists, ts_lists)
@@ -241,4 +242,4 @@
if ch != ' ':
real_word_lists.append(ch)
sentence = ''.join(word_lists).strip()
- return sentence, real_word_lists
+ return sentence, real_word_lists
\ No newline at end of file
--
Gitblit v1.9.1