From 4984724f6a580fdefc4dcec430e69fd75635d25b Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期二, 10 十月 2023 16:12:40 +0800
Subject: [PATCH] support en-bpe model

---
 funasr/runtime/onnxruntime/src/ct-transformer.cpp |   24 ++++++++++++++++++++++--
 1 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/funasr/runtime/onnxruntime/src/ct-transformer.cpp b/funasr/runtime/onnxruntime/src/ct-transformer.cpp
index a6c75fb..64a70da 100644
--- a/funasr/runtime/onnxruntime/src/ct-transformer.cpp
+++ b/funasr/runtime/onnxruntime/src/ct-transformer.cpp
@@ -46,7 +46,7 @@
 {
 }
 
-string CTTransformer::AddPunc(const char* sz_input)
+string CTTransformer::AddPunc(const char* sz_input, std::string language)
 {
     string strResult;
     vector<string> strOut;
@@ -139,8 +139,28 @@
             }
         }
     }
-    for (auto& item : NewSentenceOut)
+
+    for (auto& item : NewSentenceOut){
         strResult += item;
+    }
+    
+    if(language == "en-bpe"){
+        std::vector<std::string> chineseSymbols;
+        chineseSymbols.push_back("锛�");
+        chineseSymbols.push_back("銆�");
+        chineseSymbols.push_back("銆�");
+        chineseSymbols.push_back("锛�");
+
+        std::string englishSymbols = ",.,?";
+        for (size_t i = 0; i < chineseSymbols.size(); i++) {
+            size_t pos = 0;
+            while ((pos = strResult.find(chineseSymbols[i], pos)) != std::string::npos) {
+                strResult.replace(pos, 3, 1, englishSymbols[i]);
+                pos++;
+            }
+        }
+    }
+
     return strResult;
 }
 

--
Gitblit v1.9.1