From c4e37cb6c179d35030c211004e6875a016eb7972 Mon Sep 17 00:00:00 2001
From: hnluo <haoneng.lhn@alibaba-inc.com>
Date: 星期五, 19 五月 2023 13:38:52 +0800
Subject: [PATCH] Merge pull request #532 from alibaba-damo-academy/dev_lhn

---
 funasr/bin/asr_infer.py                                                                                 |   14 +++++++++++---
 egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py |    2 +-
 egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py       |    2 +-
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
index b566454..bc511bb 100644
--- a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
@@ -34,6 +34,6 @@
     rec_result = inference_pipeline(audio_in=speech[sample_offset: sample_offset + stride_size],
                                     param_dict=param_dict)
     if len(rec_result) != 0:
-        final_result += rec_result['text'] + " "
+        final_result += rec_result['text']
         print(rec_result)
 print(final_result)
diff --git a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/infer.py b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
similarity index 96%
rename from egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/infer.py
rename to egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
index 6672bbf..abe6640 100644
--- a/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/infer.py
+++ b/egs_modelscope/asr/paraformer/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/demo.py
@@ -34,6 +34,6 @@
     rec_result = inference_pipeline(audio_in=speech[sample_offset: sample_offset + stride_size],
                                     param_dict=param_dict)
     if len(rec_result) != 0:
-        final_result += rec_result['text'] + " "
+        final_result += rec_result['text']
         print(rec_result)
 print(final_result.strip())
diff --git a/funasr/bin/asr_infer.py b/funasr/bin/asr_infer.py
index acb5fd8..9da7ef7 100644
--- a/funasr/bin/asr_infer.py
+++ b/funasr/bin/asr_infer.py
@@ -9,6 +9,7 @@
 import time
 import copy
 import os
+import re
 import codecs
 import tempfile
 import requests
@@ -828,9 +829,16 @@
 
                 # Change integer-ids to tokens
                 token = self.converter.ids2tokens(token_int)
-                token = " ".join(token)
-
-                results.append(token)
+                postprocessed_result = ""
+                for item in token:
+                    if item.endswith('@@'):
+                        postprocessed_result += item[:-2]
+                    elif re.match('^[a-zA-Z]+$', item):
+                        postprocessed_result += item + " "
+                    else:
+                        postprocessed_result += item
+                        
+                results.append(postprocessed_result)
 
         # assert check_return_type(results)
         return results

--
Gitblit v1.9.1