From 00cfc36b9a1ad4d114434eb7770c1e67940d4862 Mon Sep 17 00:00:00 2001
From: Yabin Li <wucong.lyb@alibaba-inc.com>
Date: 星期一, 13 五月 2024 16:52:41 +0800
Subject: [PATCH] c++ runtime adapt to 1.0 (#1724)
---
runtime/onnxruntime/src/tokenizer.cpp | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 55 insertions(+), 0 deletions(-)
diff --git a/runtime/onnxruntime/src/tokenizer.cpp b/runtime/onnxruntime/src/tokenizer.cpp
index 7618282..06d64d8 100644
--- a/runtime/onnxruntime/src/tokenizer.cpp
+++ b/runtime/onnxruntime/src/tokenizer.cpp
@@ -127,6 +127,61 @@
return m_ready;
}
+bool CTokenizer::OpenYaml(const char* sz_yamlfile, const char* token_file)
+{
+ YAML::Node m_Config;
+ try{
+ m_Config = YAML::LoadFile(sz_yamlfile);
+ }catch(exception const &e){
+ LOG(INFO) << "Error loading file, yaml file error or not exist.";
+ exit(-1);
+ }
+
+ try
+ {
+ YAML::Node conf_seg_jieba = m_Config["seg_jieba"];
+ if (conf_seg_jieba.IsDefined()){
+ seg_jieba = conf_seg_jieba.as<bool>();
+ }
+
+ auto Puncs = m_Config["model_conf"]["punc_list"];
+ if (Puncs.IsSequence())
+ {
+ for (size_t i = 0; i < Puncs.size(); ++i)
+ {
+ if (Puncs[i].IsScalar())
+ {
+ m_id2punc.push_back(Puncs[i].as<string>());
+ m_punc2id.insert(make_pair<string, int>(Puncs[i].as<string>(), i));
+ }
+ }
+ }
+
+ nlohmann::json json_array;
+ std::ifstream file(token_file);
+ if (file.is_open()) {
+ file >> json_array;
+ file.close();
+ } else {
+ LOG(INFO) << "Error loading token file, token file error or not exist.";
+ return false;
+ }
+
+ int i = 0;
+ for (const auto& element : json_array) {
+ m_id2token.push_back(element);
+ m_token2id[element] = i;
+ i++;
+ }
+ }
+ catch (YAML::BadFile& e) {
+ LOG(ERROR) << "Read error!";
+ return false;
+ }
+ m_ready = true;
+ return m_ready;
+}
+
vector<string> CTokenizer::Id2String(vector<int> input)
{
vector<string> result;
--
Gitblit v1.9.1