From 28ccfbfc51068a663a80764e14074df5edf2b5ba Mon Sep 17 00:00:00 2001
From: kongdeqiang <kongdeqiang960204@163.com>
Date: 星期五, 13 三月 2026 17:41:41 +0800
Subject: [PATCH] 提交

---
 runtime/onnxruntime/src/tokenizer.cpp |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/runtime/onnxruntime/src/tokenizer.cpp b/runtime/onnxruntime/src/tokenizer.cpp
index f56601a..06d64d8 100644
--- a/runtime/onnxruntime/src/tokenizer.cpp
+++ b/runtime/onnxruntime/src/tokenizer.cpp
@@ -53,8 +53,8 @@
 
         SetJiebaRes(jieba_dict_trie_, jieba_model_);
     }else {
-        jieba_dict_trie_ = NULL;
-        jieba_model_ = NULL;
+        jieba_dict_trie_ = nullptr;
+        jieba_model_ = nullptr;
     }
 }
 
@@ -127,6 +127,61 @@
 	return m_ready;
 }
 
+bool CTokenizer::OpenYaml(const char* sz_yamlfile, const char* token_file)
+{
+	YAML::Node m_Config;
+	try{
+		m_Config = YAML::LoadFile(sz_yamlfile);
+	}catch(exception const &e){
+        LOG(INFO) << "Error loading file, yaml file error or not exist.";
+        exit(-1);
+    }
+
+	try
+	{
+		YAML::Node conf_seg_jieba = m_Config["seg_jieba"];
+        if (conf_seg_jieba.IsDefined()){
+            seg_jieba = conf_seg_jieba.as<bool>();
+        }
+
+		auto Puncs = m_Config["model_conf"]["punc_list"];
+		if (Puncs.IsSequence())
+		{
+			for (size_t i = 0; i < Puncs.size(); ++i)
+			{
+				if (Puncs[i].IsScalar())
+				{ 
+					m_id2punc.push_back(Puncs[i].as<string>());
+					m_punc2id.insert(make_pair<string, int>(Puncs[i].as<string>(), i));
+				}
+			}
+		}
+
+		nlohmann::json json_array;
+		std::ifstream file(token_file);
+		if (file.is_open()) {
+			file >> json_array;
+			file.close();
+		} else {
+			LOG(INFO) << "Error loading token file, token file error or not exist.";
+			return  false;
+		}
+
+		int i = 0;
+		for (const auto& element : json_array) {
+			m_id2token.push_back(element);
+			m_token2id[element] = i;
+			i++;
+		}
+	}
+	catch (YAML::BadFile& e) {
+		LOG(ERROR) << "Read error!";
+		return  false;
+	}
+	m_ready = true;
+	return m_ready;
+}
+
 vector<string> CTokenizer::Id2String(vector<int> input)
 {
 	vector<string> result;

--
Gitblit v1.9.1