/** * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. * MIT License (https://opensource.org/licenses/MIT) */ #pragma once #include class CTokenizer { private: bool m_ready = false; vector m_id2token,m_id2punc; map m_token2id,m_punc2id; public: CTokenizer(const char* sz_yamlfile); CTokenizer(); bool OpenYaml(const char* sz_yamlfile); void ReadYaml(const YAML::Node& node); vector Id2String(vector input); vector String2Ids(vector input); int String2Id(string input); vector Id2Punc(vector input); string Id2Punc(int n_punc_id); vector Punc2Ids(vector input); vector SplitChineseString(const string& str_info); void StrSplit(const string& str, const char split, vector& res); void Tokenize(const char* str_info, vector& str_out, vector& id_out); };