From dcc4f728cdb83a48250825288bbb92b7a0d2848b Mon Sep 17 00:00:00 2001
From: haoneng.lhn <haoneng.lhn@alibaba-inc.com>
Date: 星期五, 19 五月 2023 13:19:55 +0800
Subject: [PATCH] update paraformer online text postprocess
---
funasr/bin/asr_infer.py | 13 ++++++++++---
1 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/funasr/bin/asr_infer.py b/funasr/bin/asr_infer.py
index acb5fd8..fc311c8 100644
--- a/funasr/bin/asr_infer.py
+++ b/funasr/bin/asr_infer.py
@@ -828,9 +828,16 @@
# Change integer-ids to tokens
token = self.converter.ids2tokens(token_int)
- token = " ".join(token)
-
- results.append(token)
+ postprocessed_result = ""
+ for item in token:
+ if item.endswith('@@'):
+ postprocessed_result += item[:-2]
+ elif re.match('^[a-zA-Z]+$', item):
+ postprocessed_result += item + " "
+ else:
+ postprocessed_result += item
+
+ results.append(postprocessed_result)
# assert check_return_type(results)
return results
--
Gitblit v1.9.1