| | |
| | | #include "cppjieba/DictTrie.hpp" |
| | | #include "cppjieba/HMMModel.hpp" |
| | | #include "cppjieba/Jieba.hpp" |
| | | #include "nlohmann/json.hpp" |
| | | |
| | | namespace funasr { |
| | | class CTokenizer { |
| | |
| | | vector<string> m_id2token,m_id2punc; |
| | | map<string, int> m_token2id,m_punc2id; |
| | | |
| | | cppjieba::DictTrie *jieba_dict_trie_; |
| | | cppjieba::HMMModel *jieba_model_; |
| | | cppjieba::DictTrie *jieba_dict_trie_=nullptr; |
| | | cppjieba::HMMModel *jieba_model_=nullptr; |
| | | cppjieba::Jieba jieba_processor_; |
| | | |
| | | public: |
| | |
| | | CTokenizer(); |
| | | ~CTokenizer(); |
| | | bool OpenYaml(const char* sz_yamlfile); |
| | | bool OpenYaml(const char* sz_yamlfile, const char* token_file); |
| | | void ReadYaml(const YAML::Node& node); |
| | | vector<string> Id2String(vector<int> input); |
| | | vector<int> String2Ids(vector<string> input); |