From dcc4f728cdb83a48250825288bbb92b7a0d2848b Mon Sep 17 00:00:00 2001
From: haoneng.lhn <haoneng.lhn@alibaba-inc.com>
Date: 星期五, 19 五月 2023 13:19:55 +0800
Subject: [PATCH] update paraformer online text postprocess

---
 funasr/bin/asr_infer.py |   13 ++++++++++---
 1 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/funasr/bin/asr_infer.py b/funasr/bin/asr_infer.py
index acb5fd8..fc311c8 100644
--- a/funasr/bin/asr_infer.py
+++ b/funasr/bin/asr_infer.py
@@ -828,9 +828,16 @@
 
                 # Change integer-ids to tokens
                 token = self.converter.ids2tokens(token_int)
-                token = " ".join(token)
-
-                results.append(token)
+                postprocessed_result = ""
+                for item in token:
+                    if item.endswith('@@'):
+                        postprocessed_result += item[:-2]
+                    elif re.match('^[a-zA-Z]+$', item):
+                        postprocessed_result += item + " "
+                    else:
+                        postprocessed_result += item
+                        
+                results.append(postprocessed_result)
 
         # assert check_return_type(results)
         return results

--
Gitblit v1.9.1