From 3a4281f4959534b1bf5d01acf0085f4f8e6f2ec8 Mon Sep 17 00:00:00 2001
From: wuhongsheng <664116298@qq.com>
Date: 星期五, 05 七月 2024 00:55:32 +0800
Subject: [PATCH] 优化speakid和语句匹配逻辑,部分解决speakid不从0递增问题 (#1870)
---
runtime/onnxruntime/src/tokenizer.h | 6 ++++--
1 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/runtime/onnxruntime/src/tokenizer.h b/runtime/onnxruntime/src/tokenizer.h
index 149161b..81aea7e 100644
--- a/runtime/onnxruntime/src/tokenizer.h
+++ b/runtime/onnxruntime/src/tokenizer.h
@@ -8,6 +8,7 @@
#include "cppjieba/DictTrie.hpp"
#include "cppjieba/HMMModel.hpp"
#include "cppjieba/Jieba.hpp"
+#include "nlohmann/json.hpp"
namespace funasr {
class CTokenizer {
@@ -17,8 +18,8 @@
vector<string> m_id2token,m_id2punc;
map<string, int> m_token2id,m_punc2id;
- cppjieba::DictTrie *jieba_dict_trie_;
- cppjieba::HMMModel *jieba_model_;
+ cppjieba::DictTrie *jieba_dict_trie_=nullptr;
+ cppjieba::HMMModel *jieba_model_=nullptr;
cppjieba::Jieba jieba_processor_;
public:
@@ -27,6 +28,7 @@
CTokenizer();
~CTokenizer();
bool OpenYaml(const char* sz_yamlfile);
+ bool OpenYaml(const char* sz_yamlfile, const char* token_file);
void ReadYaml(const YAML::Node& node);
vector<string> Id2String(vector<int> input);
vector<int> String2Ids(vector<string> input);
--
Gitblit v1.9.1