From 6b0b94bdcdb40ca42e2b65dc7ff85b88876feada Mon Sep 17 00:00:00 2001
From: Xian Shi <40013335+R1ckShi@users.noreply.github.com>
Date: 星期二, 17 十月 2023 16:44:48 +0800
Subject: [PATCH] Update README.md

---
 funasr/runtime/onnxruntime/src/tokenizer.h |   22 +++++++++++++++++++++-
 1 files changed, 21 insertions(+), 1 deletions(-)

diff --git a/funasr/runtime/onnxruntime/src/tokenizer.h b/funasr/runtime/onnxruntime/src/tokenizer.h
index 7326db8..149161b 100644
--- a/funasr/runtime/onnxruntime/src/tokenizer.h
+++ b/funasr/runtime/onnxruntime/src/tokenizer.h
@@ -1,6 +1,15 @@
+/**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+ * MIT License  (https://opensource.org/licenses/MIT)
+*/
+
 #pragma once
 #include <yaml-cpp/yaml.h>
+#include "cppjieba/DictTrie.hpp"
+#include "cppjieba/HMMModel.hpp"
+#include "cppjieba/Jieba.hpp"
 
+namespace funasr {
 class CTokenizer {
 private:
 
@@ -8,10 +17,15 @@
 	vector<string>   m_id2token,m_id2punc;
 	map<string, int>  m_token2id,m_punc2id;
 
+	cppjieba::DictTrie *jieba_dict_trie_;
+    cppjieba::HMMModel *jieba_model_;
+	cppjieba::Jieba jieba_processor_;
+
 public:
 
 	CTokenizer(const char* sz_yamlfile);
 	CTokenizer();
+	~CTokenizer();
 	bool OpenYaml(const char* sz_yamlfile);
 	void ReadYaml(const YAML::Node& node);
 	vector<string> Id2String(vector<int> input);
@@ -21,7 +35,13 @@
 	string Id2Punc(int n_punc_id);
 	vector<int> Punc2Ids(vector<string> input);
 	vector<string> SplitChineseString(const string& str_info);
+	vector<string> SplitChineseJieba(const string& str_info);
 	void StrSplit(const string& str, const char split, vector<string>& res);
 	void Tokenize(const char* str_info, vector<string>& str_out, vector<int>& id_out);
-
+	bool IsPunc(string& Punc);
+	bool seg_jieba = false;
+	void SetJiebaRes(cppjieba::DictTrie *dict, cppjieba::HMMModel *hmm);
+	void JiebaInit(std::string punc_config);
 };
+
+} // namespace funasr

--
Gitblit v1.9.1