From c2e4e3c2e9be855277d9f4fa9cd0544892ff829a Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 30 八月 2023 09:57:30 +0800
Subject: [PATCH] Merge branch 'main' of github.com:alibaba-damo-academy/FunASR add

---
 funasr/runtime/onnxruntime/src/vocab.cpp |   41 ++++++++++++++++++++++++++---------------
 1 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/funasr/runtime/onnxruntime/src/vocab.cpp b/funasr/runtime/onnxruntime/src/vocab.cpp
index ba041b8..c29156f 100644
--- a/funasr/runtime/onnxruntime/src/vocab.cpp
+++ b/funasr/runtime/onnxruntime/src/vocab.cpp
@@ -1,5 +1,6 @@
 #include "vocab.h"
-#include "yaml-cpp/yaml.h"
+#include <yaml-cpp/yaml.h>
+#include <glog/logging.h>
 
 #include <fstream>
 #include <iostream>
@@ -9,6 +10,7 @@
 
 using namespace std;
 
+namespace funasr {
 Vocab::Vocab(const char *filename)
 {
     ifstream in(filename);
@@ -22,24 +24,32 @@
     YAML::Node config;
     try{
         config = YAML::LoadFile(filename);
-    }catch(...){
-        printf("error loading file, yaml file error or not exist.\n");
+    }catch(exception const &e){
+        LOG(INFO) << "Error loading file, yaml file error or not exist.";
         exit(-1);
     }
     YAML::Node myList = config["token_list"];
+    int i = 0;
     for (YAML::const_iterator it = myList.begin(); it != myList.end(); ++it) {
         vocab.push_back(it->as<string>());
+        token_id[it->as<string>()] = i;
+        i ++;
     }
 }
 
-string Vocab::Vector2String(vector<int> in)
-{
-    int i;
-    stringstream ss;
-    for (auto it = in.begin(); it != in.end(); it++) {
-        ss << vocab[*it];
+int Vocab::GetIdByToken(const std::string &token) {
+    if (token_id.count(token)) {
+        return token_id[token];
     }
-    return ss.str();
+    return 0;
+}
+
+void Vocab::Vector2String(vector<int> in, std::vector<std::string> &preds)
+{
+    for (auto it = in.begin(); it != in.end(); it++) {
+        string word = vocab[*it];
+        preds.emplace_back(word);
+    }
 }
 
 int Str2Int(string str)
@@ -108,17 +118,16 @@
             else {
                 // pre word is chinese
                 if (!is_pre_english) {
-                    word[0] = word[0] - 32;
+                    // word[0] = word[0] - 32;
                     words.push_back(word);
                     pre_english_len = word.size();
-
                 }
                 // pre word is english word
                 else {
                     // single letter turn to upper case
-                    if (word.size() == 1) {
-                        word[0] = word[0] - 32;
-                    }
+                    // if (word.size() == 1) {
+                    //     word[0] = word[0] - 32;
+                    // }
 
                     if (pre_english_len > 1) {
                         words.push_back(" ");
@@ -150,3 +159,5 @@
 {
     return vocab.size();
 }
+
+} // namespace funasr

--
Gitblit v1.9.1