From 3c83d64c84602de055f503af7d4e2761c829ec2e Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期二, 12 十二月 2023 11:11:02 +0800
Subject: [PATCH] fst: support eng hotword
---
runtime/onnxruntime/src/vocab.h | 6 +++++-
1 files changed, 5 insertions(+), 1 deletions(-)
diff --git a/runtime/onnxruntime/src/vocab.h b/runtime/onnxruntime/src/vocab.h
index 8834b97..19e3648 100644
--- a/runtime/onnxruntime/src/vocab.h
+++ b/runtime/onnxruntime/src/vocab.h
@@ -13,11 +13,14 @@
private:
vector<string> vocab;
std::map<string, int> token_id;
+ std::map<string, string> lex_map;
bool IsEnglish(string ch);
void LoadVocabFromYaml(const char* filename);
+ void LoadLex(const char* filename);
public:
Vocab(const char *filename);
+ Vocab(const char *filename, const char *lex_file);
~Vocab();
int Size() const;
bool IsChinese(string ch);
@@ -26,7 +29,8 @@
string Vector2StringV2(vector<int> in, std::string language="");
string Id2String(int id) const;
string WordFormat(std::string word);
- int GetIdByToken(const std::string &token);
+ int GetIdByToken(const std::string &token) const;
+ string Word2Lex(const std::string &word) const;
};
} // namespace funasr
--
Gitblit v1.9.1