From 1d7bbbffb6a024a33859b48a7a656d0455dc0be1 Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 16 十月 2023 11:47:59 +0800
Subject: [PATCH] Update README.md

---
 funasr/runtime/onnxruntime/src/vocab.cpp |   24 ++++++++++++++++++++++++
 1 files changed, 24 insertions(+), 0 deletions(-)

diff --git a/funasr/runtime/onnxruntime/src/vocab.cpp b/funasr/runtime/onnxruntime/src/vocab.cpp
index 95174c7..2babc40 100644
--- a/funasr/runtime/onnxruntime/src/vocab.cpp
+++ b/funasr/runtime/onnxruntime/src/vocab.cpp
@@ -75,6 +75,21 @@
     return false;
 }
 
+string Vocab::WordFormat(std::string word)
+{
+    if(word == "i"){
+        return "I";
+    }else if(word == "i'm"){
+        return "I'm";
+    }else if(word == "i've"){
+        return "I've";
+    }else if(word == "i'll"){
+        return "I'll";
+    }else{
+        return word;
+    }
+}
+
 string Vocab::Vector2StringV2(vector<int> in, std::string language)
 {
     int i;
@@ -94,6 +109,7 @@
             size_t found = word.find(unicodeChar);
             if(found != std::string::npos){
                 if (combine != ""){
+                    combine = WordFormat(combine);
                     if (words.size() != 0){
                         combine = " " + combine;
                     }
@@ -163,6 +179,14 @@
         }
     }
 
+    if (language == "en-bpe" and combine != ""){
+        combine = WordFormat(combine);
+        if (words.size() != 0){
+            combine = " " + combine;
+        }
+        words.push_back(combine);
+    }
+
     stringstream ss;
     for (auto it = words.begin(); it != words.end(); it++) {
         ss << *it;

--
Gitblit v1.9.1