From 4984724f6a580fdefc4dcec430e69fd75635d25b Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期二, 10 十月 2023 16:12:40 +0800
Subject: [PATCH] support en-bpe model
---
funasr/runtime/onnxruntime/src/vocab.cpp | 17 +++++++++++++++++
1 files changed, 17 insertions(+), 0 deletions(-)
diff --git a/funasr/runtime/onnxruntime/src/vocab.cpp b/funasr/runtime/onnxruntime/src/vocab.cpp
index 3f51911..2babc40 100644
--- a/funasr/runtime/onnxruntime/src/vocab.cpp
+++ b/funasr/runtime/onnxruntime/src/vocab.cpp
@@ -75,6 +75,21 @@
return false;
}
+string Vocab::WordFormat(std::string word)
+{
+ if(word == "i"){
+ return "I";
+ }else if(word == "i'm"){
+ return "I'm";
+ }else if(word == "i've"){
+ return "I've";
+ }else if(word == "i'll"){
+ return "I'll";
+ }else{
+ return word;
+ }
+}
+
string Vocab::Vector2StringV2(vector<int> in, std::string language)
{
int i;
@@ -94,6 +109,7 @@
size_t found = word.find(unicodeChar);
if(found != std::string::npos){
if (combine != ""){
+ combine = WordFormat(combine);
if (words.size() != 0){
combine = " " + combine;
}
@@ -164,6 +180,7 @@
}
if (language == "en-bpe" and combine != ""){
+ combine = WordFormat(combine);
if (words.size() != 0){
combine = " " + combine;
}
--
Gitblit v1.9.1