From c2dee5e3c29eba79e591d9e9caebaef15ea4e56b Mon Sep 17 00:00:00 2001
From: hnluo <haoneng.lhn@alibaba-inc.com>
Date: 星期四, 29 六月 2023 11:09:28 +0800
Subject: [PATCH] Merge pull request #687 from alibaba-damo-academy/dev_lhn
---
funasr/runtime/onnxruntime/src/vocab.cpp | 48 ++++++++++++++++--------------------------------
1 files changed, 16 insertions(+), 32 deletions(-)
diff --git a/funasr/runtime/onnxruntime/src/vocab.cpp b/funasr/runtime/onnxruntime/src/vocab.cpp
index f71ff46..65af8b6 100644
--- a/funasr/runtime/onnxruntime/src/vocab.cpp
+++ b/funasr/runtime/onnxruntime/src/vocab.cpp
@@ -1,5 +1,6 @@
#include "vocab.h"
-#include "yaml-cpp/yaml.h"
+#include <yaml-cpp/yaml.h>
+#include <glog/logging.h>
#include <fstream>
#include <iostream>
@@ -9,89 +10,80 @@
using namespace std;
+namespace funasr {
Vocab::Vocab(const char *filename)
{
ifstream in(filename);
- loadVocabFromYaml(filename);
+ LoadVocabFromYaml(filename);
}
Vocab::~Vocab()
{
}
-void Vocab::loadVocabFromYaml(const char* filename){
+void Vocab::LoadVocabFromYaml(const char* filename){
YAML::Node config;
try{
config = YAML::LoadFile(filename);
- }catch(...){
- printf("error loading file, yaml file error or not exist.\n");
+ }catch(exception const &e){
+ LOG(INFO) << "Error loading file, yaml file error or not exist.";
exit(-1);
}
-
YAML::Node myList = config["token_list"];
for (YAML::const_iterator it = myList.begin(); it != myList.end(); ++it) {
vocab.push_back(it->as<string>());
}
}
-string Vocab::vector2string(vector<int> in)
+string Vocab::Vector2String(vector<int> in)
{
int i;
stringstream ss;
for (auto it = in.begin(); it != in.end(); it++) {
ss << vocab[*it];
}
-
return ss.str();
}
-int str2int(string str)
+int Str2Int(string str)
{
const char *ch_array = str.c_str();
if (((ch_array[0] & 0xf0) != 0xe0) || ((ch_array[1] & 0xc0) != 0x80) ||
((ch_array[2] & 0xc0) != 0x80))
return 0;
-
int val = ((ch_array[0] & 0x0f) << 12) | ((ch_array[1] & 0x3f) << 6) |
(ch_array[2] & 0x3f);
return val;
}
-bool Vocab::isChinese(string ch)
+bool Vocab::IsChinese(string ch)
{
if (ch.size() != 3) {
return false;
}
-
- int unicode = str2int(ch);
+ int unicode = Str2Int(ch);
if (unicode >= 19968 && unicode <= 40959) {
return true;
}
-
return false;
}
-string Vocab::vector2stringV2(vector<int> in)
+string Vocab::Vector2StringV2(vector<int> in)
{
int i;
list<string> words;
-
int is_pre_english = false;
int pre_english_len = 0;
-
int is_combining = false;
string combine = "";
for (auto it = in.begin(); it != in.end(); it++) {
string word = vocab[*it];
-
// step1 space character skips
if (word == "<s>" || word == "</s>" || word == "<unk>")
continue;
-
// step2 combie phoneme to full word
{
int sub_word = !(word.find("@@") == string::npos);
-
// process word start and middle part
if (sub_word) {
combine += word.erase(word.length() - 2);
@@ -109,15 +101,13 @@
// step3 process english word deal with space , turn abbreviation to upper case
{
-
// input word is chinese, not need process
- if (isChinese(word)) {
+ if (IsChinese(word)) {
words.push_back(word);
is_pre_english = false;
}
// input word is english word
else {
-
// pre word is chinese
if (!is_pre_english) {
word[0] = word[0] - 32;
@@ -125,10 +115,8 @@
pre_english_len = word.size();
}
-
// pre word is english word
else {
-
// single letter turn to upper case
if (word.size() == 1) {
word[0] = word[0] - 32;
@@ -147,16 +135,10 @@
pre_english_len = word.size();
}
}
-
is_pre_english = true;
-
}
}
}
-
- // for (auto it = words.begin(); it != words.end(); it++) {
- // cout << *it << endl;
- // }
stringstream ss;
for (auto it = words.begin(); it != words.end(); it++) {
@@ -166,7 +148,9 @@
return ss.str();
}
-int Vocab::size()
+int Vocab::Size()
{
return vocab.size();
}
+
+} // namespace funasr
\ No newline at end of file
--
Gitblit v1.9.1