From 1596f6f414f6f41da66506debb1dff19fffeb3ec Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 24 六月 2024 11:55:17 +0800
Subject: [PATCH] fixbug hotwords
---
runtime/onnxruntime/src/vocab.h | 8 +++++++-
1 files changed, 7 insertions(+), 1 deletions(-)
diff --git a/runtime/onnxruntime/src/vocab.h b/runtime/onnxruntime/src/vocab.h
index 8834b97..36fabf4 100644
--- a/runtime/onnxruntime/src/vocab.h
+++ b/runtime/onnxruntime/src/vocab.h
@@ -6,6 +6,7 @@
#include <string>
#include <vector>
#include <map>
+#include "nlohmann/json.hpp"
using namespace std;
namespace funasr {
@@ -13,11 +14,15 @@
private:
vector<string> vocab;
std::map<string, int> token_id;
+ std::map<string, string> lex_map;
bool IsEnglish(string ch);
void LoadVocabFromYaml(const char* filename);
+ void LoadVocabFromJson(const char* filename);
+ void LoadLex(const char* filename);
public:
Vocab(const char *filename);
+ Vocab(const char *filename, const char *lex_file);
~Vocab();
int Size() const;
bool IsChinese(string ch);
@@ -26,7 +31,8 @@
string Vector2StringV2(vector<int> in, std::string language="");
string Id2String(int id) const;
string WordFormat(std::string word);
- int GetIdByToken(const std::string &token);
+ int GetIdByToken(const std::string &token) const;
+ string Word2Lex(const std::string &word) const;
};
} // namespace funasr
--
Gitblit v1.9.1